├── Lecture 1 - Linear Regression and Gradient Descent
    ├── LinReg_and_Gradient_Descent.pdf
    ├── auto-mpg.data
    ├── gradient.py
    └── linreg-sklearn.py
├── Lecture 2 - Gradient Descent and Normal Equations
    ├── Gradient_Descent_and_Normal_Equations.pdf
    ├── descent-normal-autompg.py
    ├── error_surface.py
    ├── gradient.py
    └── linreg-normal_equations.py
├── Lecture 3 - Curve Fitting and Model Validation
    ├── Lecture3.pdf
    ├── README.md
    ├── polyfit-auto-mpg-cv.py
    ├── polyfit-auto-mpg-t-test.py
    ├── polyfit-auto-mpg.py
    ├── polyfit-generalisation.py
    ├── polyfit.py
    ├── residuals-auto-mpg.py
    ├── residuals-random.py
    └── residuals-vs-fitted.py
├── Lecture 4 - Decision Trees
    ├── Decision_Trees.pdf
    ├── README.md
    ├── auto-mpg-modified.data
    ├── dt-credit.py
    ├── entropy.py
    ├── overfit_demo.py
    └── scikit-dt-auto-mpg.py
├── Lecture 5 - Probabilities and Logistic Regression
    ├── Probabilities_and_Logistic_Regression.pdf
    ├── README.md
    ├── auto-mpg.data
    ├── linreg-normal_equations.py
    ├── logreg-hp-origin.py
    ├── logreg_gradient.py
    ├── logreg_gradient_2_variables.py
    └── logreg_gradient_2_variables_iris.py
├── Lecture 6 - Naive Bayes
    ├── Naive_Bayes.pdf
    ├── README.md
    ├── auto-mpg.data
    ├── gender_height_weight.csv
    ├── naive_bayes_autompg.py
    └── naive_bayes_mf.py
├── Lecture 7 - Text Classification
    ├── README.md
    ├── data
    │   └── SMSSpamCollection
    ├── predict.py
    └── transform.py
└── README.md


/Lecture 1 - Linear Regression and Gradient Descent/LinReg_and_Gradient_Descent.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nmanchev/MachineLearningStudyGroup/092f642d888f3dfb105aa8768d4a6927c93a4278/Lecture 1 - Linear Regression and Gradient Descent/LinReg_and_Gradient_Descent.pdf


--------------------------------------------------------------------------------
/Lecture 1 - Linear Regression and Gradient Descent/auto-mpg.data:
--------------------------------------------------------------------------------
  1 | 18.0   8   307.0      130.0      3504.      12.0   70  1	"chevrolet chevelle malibu"
  2 | 15.0   8   350.0      165.0      3693.      11.5   70  1	"buick skylark 320"
  3 | 18.0   8   318.0      150.0      3436.      11.0   70  1	"plymouth satellite"
  4 | 16.0   8   304.0      150.0      3433.      12.0   70  1	"amc rebel sst"
  5 | 17.0   8   302.0      140.0      3449.      10.5   70  1	"ford torino"
  6 | 15.0   8   429.0      198.0      4341.      10.0   70  1	"ford galaxie 500"
  7 | 14.0   8   454.0      220.0      4354.       9.0   70  1	"chevrolet impala"
  8 | 14.0   8   440.0      215.0      4312.       8.5   70  1	"plymouth fury iii"
  9 | 14.0   8   455.0      225.0      4425.      10.0   70  1	"pontiac catalina"
 10 | 15.0   8   390.0      190.0      3850.       8.5   70  1	"amc ambassador dpl"
 11 | 15.0   8   383.0      170.0      3563.      10.0   70  1	"dodge challenger se"
 12 | 14.0   8   340.0      160.0      3609.       8.0   70  1	"plymouth 'cuda 340"
 13 | 15.0   8   400.0      150.0      3761.       9.5   70  1	"chevrolet monte carlo"
 14 | 14.0   8   455.0      225.0      3086.      10.0   70  1	"buick estate wagon (sw)"
 15 | 24.0   4   113.0      95.00      2372.      15.0   70  3	"toyota corona mark ii"
 16 | 22.0   6   198.0      95.00      2833.      15.5   70  1	"plymouth duster"
 17 | 18.0   6   199.0      97.00      2774.      15.5   70  1	"amc hornet"
 18 | 21.0   6   200.0      85.00      2587.      16.0   70  1	"ford maverick"
 19 | 27.0   4   97.00      88.00      2130.      14.5   70  3	"datsun pl510"
 20 | 26.0   4   97.00      46.00      1835.      20.5   70  2	"volkswagen 1131 deluxe sedan"
 21 | 25.0   4   110.0      87.00      2672.      17.5   70  2	"peugeot 504"
 22 | 24.0   4   107.0      90.00      2430.      14.5   70  2	"audi 100 ls"
 23 | 25.0   4   104.0      95.00      2375.      17.5   70  2	"saab 99e"
 24 | 26.0   4   121.0      113.0      2234.      12.5   70  2	"bmw 2002"
 25 | 21.0   6   199.0      90.00      2648.      15.0   70  1	"amc gremlin"
 26 | 10.0   8   360.0      215.0      4615.      14.0   70  1	"ford f250"
 27 | 10.0   8   307.0      200.0      4376.      15.0   70  1	"chevy c20"
 28 | 11.0   8   318.0      210.0      4382.      13.5   70  1	"dodge d200"
 29 | 9.0    8   304.0      193.0      4732.      18.5   70  1	"hi 1200d"
 30 | 27.0   4   97.00      88.00      2130.      14.5   71  3	"datsun pl510"
 31 | 28.0   4   140.0      90.00      2264.      15.5   71  1	"chevrolet vega 2300"
 32 | 25.0   4   113.0      95.00      2228.      14.0   71  3	"toyota corona"
 33 | 25.0   4   98.00      ?          2046.      19.0   71  1	"ford pinto"
 34 | 19.0   6   232.0      100.0      2634.      13.0   71  1	"amc gremlin"
 35 | 16.0   6   225.0      105.0      3439.      15.5   71  1	"plymouth satellite custom"
 36 | 17.0   6   250.0      100.0      3329.      15.5   71  1	"chevrolet chevelle malibu"
 37 | 19.0   6   250.0      88.00      3302.      15.5   71  1	"ford torino 500"
 38 | 18.0   6   232.0      100.0      3288.      15.5   71  1	"amc matador"
 39 | 14.0   8   350.0      165.0      4209.      12.0   71  1	"chevrolet impala"
 40 | 14.0   8   400.0      175.0      4464.      11.5   71  1	"pontiac catalina brougham"
 41 | 14.0   8   351.0      153.0      4154.      13.5   71  1	"ford galaxie 500"
 42 | 14.0   8   318.0      150.0      4096.      13.0   71  1	"plymouth fury iii"
 43 | 12.0   8   383.0      180.0      4955.      11.5   71  1	"dodge monaco (sw)"
 44 | 13.0   8   400.0      170.0      4746.      12.0   71  1	"ford country squire (sw)"
 45 | 13.0   8   400.0      175.0      5140.      12.0   71  1	"pontiac safari (sw)"
 46 | 18.0   6   258.0      110.0      2962.      13.5   71  1	"amc hornet sportabout (sw)"
 47 | 22.0   4   140.0      72.00      2408.      19.0   71  1	"chevrolet vega (sw)"
 48 | 19.0   6   250.0      100.0      3282.      15.0   71  1	"pontiac firebird"
 49 | 18.0   6   250.0      88.00      3139.      14.5   71  1	"ford mustang"
 50 | 23.0   4   122.0      86.00      2220.      14.0   71  1	"mercury capri 2000"
 51 | 28.0   4   116.0      90.00      2123.      14.0   71  2	"opel 1900"
 52 | 30.0   4   79.00      70.00      2074.      19.5   71  2	"peugeot 304"
 53 | 30.0   4   88.00      76.00      2065.      14.5   71  2	"fiat 124b"
 54 | 31.0   4   71.00      65.00      1773.      19.0   71  3	"toyota corolla 1200"
 55 | 35.0   4   72.00      69.00      1613.      18.0   71  3	"datsun 1200"
 56 | 27.0   4   97.00      60.00      1834.      19.0   71  2	"volkswagen model 111"
 57 | 26.0   4   91.00      70.00      1955.      20.5   71  1	"plymouth cricket"
 58 | 24.0   4   113.0      95.00      2278.      15.5   72  3	"toyota corona hardtop"
 59 | 25.0   4   97.50      80.00      2126.      17.0   72  1	"dodge colt hardtop"
 60 | 23.0   4   97.00      54.00      2254.      23.5   72  2	"volkswagen type 3"
 61 | 20.0   4   140.0      90.00      2408.      19.5   72  1	"chevrolet vega"
 62 | 21.0   4   122.0      86.00      2226.      16.5   72  1	"ford pinto runabout"
 63 | 13.0   8   350.0      165.0      4274.      12.0   72  1	"chevrolet impala"
 64 | 14.0   8   400.0      175.0      4385.      12.0   72  1	"pontiac catalina"
 65 | 15.0   8   318.0      150.0      4135.      13.5   72  1	"plymouth fury iii"
 66 | 14.0   8   351.0      153.0      4129.      13.0   72  1	"ford galaxie 500"
 67 | 17.0   8   304.0      150.0      3672.      11.5   72  1	"amc ambassador sst"
 68 | 11.0   8   429.0      208.0      4633.      11.0   72  1	"mercury marquis"
 69 | 13.0   8   350.0      155.0      4502.      13.5   72  1	"buick lesabre custom"
 70 | 12.0   8   350.0      160.0      4456.      13.5   72  1	"oldsmobile delta 88 royale"
 71 | 13.0   8   400.0      190.0      4422.      12.5   72  1	"chrysler newport royal"
 72 | 19.0   3   70.00      97.00      2330.      13.5   72  3	"mazda rx2 coupe"
 73 | 15.0   8   304.0      150.0      3892.      12.5   72  1	"amc matador (sw)"
 74 | 13.0   8   307.0      130.0      4098.      14.0   72  1	"chevrolet chevelle concours (sw)"
 75 | 13.0   8   302.0      140.0      4294.      16.0   72  1	"ford gran torino (sw)"
 76 | 14.0   8   318.0      150.0      4077.      14.0   72  1	"plymouth satellite custom (sw)"
 77 | 18.0   4   121.0      112.0      2933.      14.5   72  2	"volvo 145e (sw)"
 78 | 22.0   4   121.0      76.00      2511.      18.0   72  2	"volkswagen 411 (sw)"
 79 | 21.0   4   120.0      87.00      2979.      19.5   72  2	"peugeot 504 (sw)"
 80 | 26.0   4   96.00      69.00      2189.      18.0   72  2	"renault 12 (sw)"
 81 | 22.0   4   122.0      86.00      2395.      16.0   72  1	"ford pinto (sw)"
 82 | 28.0   4   97.00      92.00      2288.      17.0   72  3	"datsun 510 (sw)"
 83 | 23.0   4   120.0      97.00      2506.      14.5   72  3	"toyouta corona mark ii (sw)"
 84 | 28.0   4   98.00      80.00      2164.      15.0   72  1	"dodge colt (sw)"
 85 | 27.0   4   97.00      88.00      2100.      16.5   72  3	"toyota corolla 1600 (sw)"
 86 | 13.0   8   350.0      175.0      4100.      13.0   73  1	"buick century 350"
 87 | 14.0   8   304.0      150.0      3672.      11.5   73  1	"amc matador"
 88 | 13.0   8   350.0      145.0      3988.      13.0   73  1	"chevrolet malibu"
 89 | 14.0   8   302.0      137.0      4042.      14.5   73  1	"ford gran torino"
 90 | 15.0   8   318.0      150.0      3777.      12.5   73  1	"dodge coronet custom"
 91 | 12.0   8   429.0      198.0      4952.      11.5   73  1	"mercury marquis brougham"
 92 | 13.0   8   400.0      150.0      4464.      12.0   73  1	"chevrolet caprice classic"
 93 | 13.0   8   351.0      158.0      4363.      13.0   73  1	"ford ltd"
 94 | 14.0   8   318.0      150.0      4237.      14.5   73  1	"plymouth fury gran sedan"
 95 | 13.0   8   440.0      215.0      4735.      11.0   73  1	"chrysler new yorker brougham"
 96 | 12.0   8   455.0      225.0      4951.      11.0   73  1	"buick electra 225 custom"
 97 | 13.0   8   360.0      175.0      3821.      11.0   73  1	"amc ambassador brougham"
 98 | 18.0   6   225.0      105.0      3121.      16.5   73  1	"plymouth valiant"
 99 | 16.0   6   250.0      100.0      3278.      18.0   73  1	"chevrolet nova custom"
100 | 18.0   6   232.0      100.0      2945.      16.0   73  1	"amc hornet"
101 | 18.0   6   250.0      88.00      3021.      16.5   73  1	"ford maverick"
102 | 23.0   6   198.0      95.00      2904.      16.0   73  1	"plymouth duster"
103 | 26.0   4   97.00      46.00      1950.      21.0   73  2	"volkswagen super beetle"
104 | 11.0   8   400.0      150.0      4997.      14.0   73  1	"chevrolet impala"
105 | 12.0   8   400.0      167.0      4906.      12.5   73  1	"ford country"
106 | 13.0   8   360.0      170.0      4654.      13.0   73  1	"plymouth custom suburb"
107 | 12.0   8   350.0      180.0      4499.      12.5   73  1	"oldsmobile vista cruiser"
108 | 18.0   6   232.0      100.0      2789.      15.0   73  1	"amc gremlin"
109 | 20.0   4   97.00      88.00      2279.      19.0   73  3	"toyota carina"
110 | 21.0   4   140.0      72.00      2401.      19.5   73  1	"chevrolet vega"
111 | 22.0   4   108.0      94.00      2379.      16.5   73  3	"datsun 610"
112 | 18.0   3   70.00      90.00      2124.      13.5   73  3	"maxda rx3"
113 | 19.0   4   122.0      85.00      2310.      18.5   73  1	"ford pinto"
114 | 21.0   6   155.0      107.0      2472.      14.0   73  1	"mercury capri v6"
115 | 26.0   4   98.00      90.00      2265.      15.5   73  2	"fiat 124 sport coupe"
116 | 15.0   8   350.0      145.0      4082.      13.0   73  1	"chevrolet monte carlo s"
117 | 16.0   8   400.0      230.0      4278.      9.50   73  1	"pontiac grand prix"
118 | 29.0   4   68.00      49.00      1867.      19.5   73  2	"fiat 128"
119 | 24.0   4   116.0      75.00      2158.      15.5   73  2	"opel manta"
120 | 20.0   4   114.0      91.00      2582.      14.0   73  2	"audi 100ls"
121 | 19.0   4   121.0      112.0      2868.      15.5   73  2	"volvo 144ea"
122 | 15.0   8   318.0      150.0      3399.      11.0   73  1	"dodge dart custom"
123 | 24.0   4   121.0      110.0      2660.      14.0   73  2	"saab 99le"
124 | 20.0   6   156.0      122.0      2807.      13.5   73  3	"toyota mark ii"
125 | 11.0   8   350.0      180.0      3664.      11.0   73  1	"oldsmobile omega"
126 | 20.0   6   198.0      95.00      3102.      16.5   74  1	"plymouth duster"
127 | 21.0   6   200.0      ?          2875.      17.0   74  1	"ford maverick"
128 | 19.0   6   232.0      100.0      2901.      16.0   74  1	"amc hornet"
129 | 15.0   6   250.0      100.0      3336.      17.0   74  1	"chevrolet nova"
130 | 31.0   4   79.00      67.00      1950.      19.0   74  3	"datsun b210"
131 | 26.0   4   122.0      80.00      2451.      16.5   74  1	"ford pinto"
132 | 32.0   4   71.00      65.00      1836.      21.0   74  3	"toyota corolla 1200"
133 | 25.0   4   140.0      75.00      2542.      17.0   74  1	"chevrolet vega"
134 | 16.0   6   250.0      100.0      3781.      17.0   74  1	"chevrolet chevelle malibu classic"
135 | 16.0   6   258.0      110.0      3632.      18.0   74  1	"amc matador"
136 | 18.0   6   225.0      105.0      3613.      16.5   74  1	"plymouth satellite sebring"
137 | 16.0   8   302.0      140.0      4141.      14.0   74  1	"ford gran torino"
138 | 13.0   8   350.0      150.0      4699.      14.5   74  1	"buick century luxus (sw)"
139 | 14.0   8   318.0      150.0      4457.      13.5   74  1	"dodge coronet custom (sw)"
140 | 14.0   8   302.0      140.0      4638.      16.0   74  1	"ford gran torino (sw)"
141 | 14.0   8   304.0      150.0      4257.      15.5   74  1	"amc matador (sw)"
142 | 29.0   4   98.00      83.00      2219.      16.5   74  2	"audi fox"
143 | 26.0   4   79.00      67.00      1963.      15.5   74  2	"volkswagen dasher"
144 | 26.0   4   97.00      78.00      2300.      14.5   74  2	"opel manta"
145 | 31.0   4   76.00      52.00      1649.      16.5   74  3	"toyota corona"
146 | 32.0   4   83.00      61.00      2003.      19.0   74  3	"datsun 710"
147 | 28.0   4   90.00      75.00      2125.      14.5   74  1	"dodge colt"
148 | 24.0   4   90.00      75.00      2108.      15.5   74  2	"fiat 128"
149 | 26.0   4   116.0      75.00      2246.      14.0   74  2	"fiat 124 tc"
150 | 24.0   4   120.0      97.00      2489.      15.0   74  3	"honda civic"
151 | 26.0   4   108.0      93.00      2391.      15.5   74  3	"subaru"
152 | 31.0   4   79.00      67.00      2000.      16.0   74  2	"fiat x1.9"
153 | 19.0   6   225.0      95.00      3264.      16.0   75  1	"plymouth valiant custom"
154 | 18.0   6   250.0      105.0      3459.      16.0   75  1	"chevrolet nova"
155 | 15.0   6   250.0      72.00      3432.      21.0   75  1	"mercury monarch"
156 | 15.0   6   250.0      72.00      3158.      19.5   75  1	"ford maverick"
157 | 16.0   8   400.0      170.0      4668.      11.5   75  1	"pontiac catalina"
158 | 15.0   8   350.0      145.0      4440.      14.0   75  1	"chevrolet bel air"
159 | 16.0   8   318.0      150.0      4498.      14.5   75  1	"plymouth grand fury"
160 | 14.0   8   351.0      148.0      4657.      13.5   75  1	"ford ltd"
161 | 17.0   6   231.0      110.0      3907.      21.0   75  1	"buick century"
162 | 16.0   6   250.0      105.0      3897.      18.5   75  1	"chevroelt chevelle malibu"
163 | 15.0   6   258.0      110.0      3730.      19.0   75  1	"amc matador"
164 | 18.0   6   225.0      95.00      3785.      19.0   75  1	"plymouth fury"
165 | 21.0   6   231.0      110.0      3039.      15.0   75  1	"buick skyhawk"
166 | 20.0   8   262.0      110.0      3221.      13.5   75  1	"chevrolet monza 2+2"
167 | 13.0   8   302.0      129.0      3169.      12.0   75  1	"ford mustang ii"
168 | 29.0   4   97.00      75.00      2171.      16.0   75  3	"toyota corolla"
169 | 23.0   4   140.0      83.00      2639.      17.0   75  1	"ford pinto"
170 | 20.0   6   232.0      100.0      2914.      16.0   75  1	"amc gremlin"
171 | 23.0   4   140.0      78.00      2592.      18.5   75  1	"pontiac astro"
172 | 24.0   4   134.0      96.00      2702.      13.5   75  3	"toyota corona"
173 | 25.0   4   90.00      71.00      2223.      16.5   75  2	"volkswagen dasher"
174 | 24.0   4   119.0      97.00      2545.      17.0   75  3	"datsun 710"
175 | 18.0   6   171.0      97.00      2984.      14.5   75  1	"ford pinto"
176 | 29.0   4   90.00      70.00      1937.      14.0   75  2	"volkswagen rabbit"
177 | 19.0   6   232.0      90.00      3211.      17.0   75  1	"amc pacer"
178 | 23.0   4   115.0      95.00      2694.      15.0   75  2	"audi 100ls"
179 | 23.0   4   120.0      88.00      2957.      17.0   75  2	"peugeot 504"
180 | 22.0   4   121.0      98.00      2945.      14.5   75  2	"volvo 244dl"
181 | 25.0   4   121.0      115.0      2671.      13.5   75  2	"saab 99le"
182 | 33.0   4   91.00      53.00      1795.      17.5   75  3	"honda civic cvcc"
183 | 28.0   4   107.0      86.00      2464.      15.5   76  2	"fiat 131"
184 | 25.0   4   116.0      81.00      2220.      16.9   76  2	"opel 1900"
185 | 25.0   4   140.0      92.00      2572.      14.9   76  1	"capri ii"
186 | 26.0   4   98.00      79.00      2255.      17.7   76  1	"dodge colt"
187 | 27.0   4   101.0      83.00      2202.      15.3   76  2	"renault 12tl"
188 | 17.5   8   305.0      140.0      4215.      13.0   76  1	"chevrolet chevelle malibu classic"
189 | 16.0   8   318.0      150.0      4190.      13.0   76  1	"dodge coronet brougham"
190 | 15.5   8   304.0      120.0      3962.      13.9   76  1	"amc matador"
191 | 14.5   8   351.0      152.0      4215.      12.8   76  1	"ford gran torino"
192 | 22.0   6   225.0      100.0      3233.      15.4   76  1	"plymouth valiant"
193 | 22.0   6   250.0      105.0      3353.      14.5   76  1	"chevrolet nova"
194 | 24.0   6   200.0      81.00      3012.      17.6   76  1	"ford maverick"
195 | 22.5   6   232.0      90.00      3085.      17.6   76  1	"amc hornet"
196 | 29.0   4   85.00      52.00      2035.      22.2   76  1	"chevrolet chevette"
197 | 24.5   4   98.00      60.00      2164.      22.1   76  1	"chevrolet woody"
198 | 29.0   4   90.00      70.00      1937.      14.2   76  2	"vw rabbit"
199 | 33.0   4   91.00      53.00      1795.      17.4   76  3	"honda civic"
200 | 20.0   6   225.0      100.0      3651.      17.7   76  1	"dodge aspen se"
201 | 18.0   6   250.0      78.00      3574.      21.0   76  1	"ford granada ghia"
202 | 18.5   6   250.0      110.0      3645.      16.2   76  1	"pontiac ventura sj"
203 | 17.5   6   258.0      95.00      3193.      17.8   76  1	"amc pacer d/l"
204 | 29.5   4   97.00      71.00      1825.      12.2   76  2	"volkswagen rabbit"
205 | 32.0   4   85.00      70.00      1990.      17.0   76  3	"datsun b-210"
206 | 28.0   4   97.00      75.00      2155.      16.4   76  3	"toyota corolla"
207 | 26.5   4   140.0      72.00      2565.      13.6   76  1	"ford pinto"
208 | 20.0   4   130.0      102.0      3150.      15.7   76  2	"volvo 245"
209 | 13.0   8   318.0      150.0      3940.      13.2   76  1	"plymouth volare premier v8"
210 | 19.0   4   120.0      88.00      3270.      21.9   76  2	"peugeot 504"
211 | 19.0   6   156.0      108.0      2930.      15.5   76  3	"toyota mark ii"
212 | 16.5   6   168.0      120.0      3820.      16.7   76  2	"mercedes-benz 280s"
213 | 16.5   8   350.0      180.0      4380.      12.1   76  1	"cadillac seville"
214 | 13.0   8   350.0      145.0      4055.      12.0   76  1	"chevy c10"
215 | 13.0   8   302.0      130.0      3870.      15.0   76  1	"ford f108"
216 | 13.0   8   318.0      150.0      3755.      14.0   76  1	"dodge d100"
217 | 31.5   4   98.00      68.00      2045.      18.5   77  3	"honda accord cvcc"
218 | 30.0   4   111.0      80.00      2155.      14.8   77  1	"buick opel isuzu deluxe"
219 | 36.0   4   79.00      58.00      1825.      18.6   77  2	"renault 5 gtl"
220 | 25.5   4   122.0      96.00      2300.      15.5   77  1	"plymouth arrow gs"
221 | 33.5   4   85.00      70.00      1945.      16.8   77  3	"datsun f-10 hatchback"
222 | 17.5   8   305.0      145.0      3880.      12.5   77  1	"chevrolet caprice classic"
223 | 17.0   8   260.0      110.0      4060.      19.0   77  1	"oldsmobile cutlass supreme"
224 | 15.5   8   318.0      145.0      4140.      13.7   77  1	"dodge monaco brougham"
225 | 15.0   8   302.0      130.0      4295.      14.9   77  1	"mercury cougar brougham"
226 | 17.5   6   250.0      110.0      3520.      16.4   77  1	"chevrolet concours"
227 | 20.5   6   231.0      105.0      3425.      16.9   77  1	"buick skylark"
228 | 19.0   6   225.0      100.0      3630.      17.7   77  1	"plymouth volare custom"
229 | 18.5   6   250.0      98.00      3525.      19.0   77  1	"ford granada"
230 | 16.0   8   400.0      180.0      4220.      11.1   77  1	"pontiac grand prix lj"
231 | 15.5   8   350.0      170.0      4165.      11.4   77  1	"chevrolet monte carlo landau"
232 | 15.5   8   400.0      190.0      4325.      12.2   77  1	"chrysler cordoba"
233 | 16.0   8   351.0      149.0      4335.      14.5   77  1	"ford thunderbird"
234 | 29.0   4   97.00      78.00      1940.      14.5   77  2	"volkswagen rabbit custom"
235 | 24.5   4   151.0      88.00      2740.      16.0   77  1	"pontiac sunbird coupe"
236 | 26.0   4   97.00      75.00      2265.      18.2   77  3	"toyota corolla liftback"
237 | 25.5   4   140.0      89.00      2755.      15.8   77  1	"ford mustang ii 2+2"
238 | 30.5   4   98.00      63.00      2051.      17.0   77  1	"chevrolet chevette"
239 | 33.5   4   98.00      83.00      2075.      15.9   77  1	"dodge colt m/m"
240 | 30.0   4   97.00      67.00      1985.      16.4   77  3	"subaru dl"
241 | 30.5   4   97.00      78.00      2190.      14.1   77  2	"volkswagen dasher"
242 | 22.0   6   146.0      97.00      2815.      14.5   77  3	"datsun 810"
243 | 21.5   4   121.0      110.0      2600.      12.8   77  2	"bmw 320i"
244 | 21.5   3   80.00      110.0      2720.      13.5   77  3	"mazda rx-4"
245 | 43.1   4   90.00      48.00      1985.      21.5   78  2	"volkswagen rabbit custom diesel"
246 | 36.1   4   98.00      66.00      1800.      14.4   78  1	"ford fiesta"
247 | 32.8   4   78.00      52.00      1985.      19.4   78  3	"mazda glc deluxe"
248 | 39.4   4   85.00      70.00      2070.      18.6   78  3	"datsun b210 gx"
249 | 36.1   4   91.00      60.00      1800.      16.4   78  3	"honda civic cvcc"
250 | 19.9   8   260.0      110.0      3365.      15.5   78  1	"oldsmobile cutlass salon brougham"
251 | 19.4   8   318.0      140.0      3735.      13.2   78  1	"dodge diplomat"
252 | 20.2   8   302.0      139.0      3570.      12.8   78  1	"mercury monarch ghia"
253 | 19.2   6   231.0      105.0      3535.      19.2   78  1	"pontiac phoenix lj"
254 | 20.5   6   200.0      95.00      3155.      18.2   78  1	"chevrolet malibu"
255 | 20.2   6   200.0      85.00      2965.      15.8   78  1	"ford fairmont (auto)"
256 | 25.1   4   140.0      88.00      2720.      15.4   78  1	"ford fairmont (man)"
257 | 20.5   6   225.0      100.0      3430.      17.2   78  1	"plymouth volare"
258 | 19.4   6   232.0      90.00      3210.      17.2   78  1	"amc concord"
259 | 20.6   6   231.0      105.0      3380.      15.8   78  1	"buick century special"
260 | 20.8   6   200.0      85.00      3070.      16.7   78  1	"mercury zephyr"
261 | 18.6   6   225.0      110.0      3620.      18.7   78  1	"dodge aspen"
262 | 18.1   6   258.0      120.0      3410.      15.1   78  1	"amc concord d/l"
263 | 19.2   8   305.0      145.0      3425.      13.2   78  1	"chevrolet monte carlo landau"
264 | 17.7   6   231.0      165.0      3445.      13.4   78  1	"buick regal sport coupe (turbo)"
265 | 18.1   8   302.0      139.0      3205.      11.2   78  1	"ford futura"
266 | 17.5   8   318.0      140.0      4080.      13.7   78  1	"dodge magnum xe"
267 | 30.0   4   98.00      68.00      2155.      16.5   78  1	"chevrolet chevette"
268 | 27.5   4   134.0      95.00      2560.      14.2   78  3	"toyota corona"
269 | 27.2   4   119.0      97.00      2300.      14.7   78  3	"datsun 510"
270 | 30.9   4   105.0      75.00      2230.      14.5   78  1	"dodge omni"
271 | 21.1   4   134.0      95.00      2515.      14.8   78  3	"toyota celica gt liftback"
272 | 23.2   4   156.0      105.0      2745.      16.7   78  1	"plymouth sapporo"
273 | 23.8   4   151.0      85.00      2855.      17.6   78  1	"oldsmobile starfire sx"
274 | 23.9   4   119.0      97.00      2405.      14.9   78  3	"datsun 200-sx"
275 | 20.3   5   131.0      103.0      2830.      15.9   78  2	"audi 5000"
276 | 17.0   6   163.0      125.0      3140.      13.6   78  2	"volvo 264gl"
277 | 21.6   4   121.0      115.0      2795.      15.7   78  2	"saab 99gle"
278 | 16.2   6   163.0      133.0      3410.      15.8   78  2	"peugeot 604sl"
279 | 31.5   4   89.00      71.00      1990.      14.9   78  2	"volkswagen scirocco"
280 | 29.5   4   98.00      68.00      2135.      16.6   78  3	"honda accord lx"
281 | 21.5   6   231.0      115.0      3245.      15.4   79  1	"pontiac lemans v6"
282 | 19.8   6   200.0      85.00      2990.      18.2   79  1	"mercury zephyr 6"
283 | 22.3   4   140.0      88.00      2890.      17.3   79  1	"ford fairmont 4"
284 | 20.2   6   232.0      90.00      3265.      18.2   79  1	"amc concord dl 6"
285 | 20.6   6   225.0      110.0      3360.      16.6   79  1	"dodge aspen 6"
286 | 17.0   8   305.0      130.0      3840.      15.4   79  1	"chevrolet caprice classic"
287 | 17.6   8   302.0      129.0      3725.      13.4   79  1	"ford ltd landau"
288 | 16.5   8   351.0      138.0      3955.      13.2   79  1	"mercury grand marquis"
289 | 18.2   8   318.0      135.0      3830.      15.2   79  1	"dodge st. regis"
290 | 16.9   8   350.0      155.0      4360.      14.9   79  1	"buick estate wagon (sw)"
291 | 15.5   8   351.0      142.0      4054.      14.3   79  1	"ford country squire (sw)"
292 | 19.2   8   267.0      125.0      3605.      15.0   79  1	"chevrolet malibu classic (sw)"
293 | 18.5   8   360.0      150.0      3940.      13.0   79  1	"chrysler lebaron town @ country (sw)"
294 | 31.9   4   89.00      71.00      1925.      14.0   79  2	"vw rabbit custom"
295 | 34.1   4   86.00      65.00      1975.      15.2   79  3	"maxda glc deluxe"
296 | 35.7   4   98.00      80.00      1915.      14.4   79  1	"dodge colt hatchback custom"
297 | 27.4   4   121.0      80.00      2670.      15.0   79  1	"amc spirit dl"
298 | 25.4   5   183.0      77.00      3530.      20.1   79  2	"mercedes benz 300d"
299 | 23.0   8   350.0      125.0      3900.      17.4   79  1	"cadillac eldorado"
300 | 27.2   4   141.0      71.00      3190.      24.8   79  2	"peugeot 504"
301 | 23.9   8   260.0      90.00      3420.      22.2   79  1	"oldsmobile cutlass salon brougham"
302 | 34.2   4   105.0      70.00      2200.      13.2   79  1	"plymouth horizon"
303 | 34.5   4   105.0      70.00      2150.      14.9   79  1	"plymouth horizon tc3"
304 | 31.8   4   85.00      65.00      2020.      19.2   79  3	"datsun 210"
305 | 37.3   4   91.00      69.00      2130.      14.7   79  2	"fiat strada custom"
306 | 28.4   4   151.0      90.00      2670.      16.0   79  1	"buick skylark limited"
307 | 28.8   6   173.0      115.0      2595.      11.3   79  1	"chevrolet citation"
308 | 26.8   6   173.0      115.0      2700.      12.9   79  1	"oldsmobile omega brougham"
309 | 33.5   4   151.0      90.00      2556.      13.2   79  1	"pontiac phoenix"
310 | 41.5   4   98.00      76.00      2144.      14.7   80  2	"vw rabbit"
311 | 38.1   4   89.00      60.00      1968.      18.8   80  3	"toyota corolla tercel"
312 | 32.1   4   98.00      70.00      2120.      15.5   80  1	"chevrolet chevette"
313 | 37.2   4   86.00      65.00      2019.      16.4   80  3	"datsun 310"
314 | 28.0   4   151.0      90.00      2678.      16.5   80  1	"chevrolet citation"
315 | 26.4   4   140.0      88.00      2870.      18.1   80  1	"ford fairmont"
316 | 24.3   4   151.0      90.00      3003.      20.1   80  1	"amc concord"
317 | 19.1   6   225.0      90.00      3381.      18.7   80  1	"dodge aspen"
318 | 34.3   4   97.00      78.00      2188.      15.8   80  2	"audi 4000"
319 | 29.8   4   134.0      90.00      2711.      15.5   80  3	"toyota corona liftback"
320 | 31.3   4   120.0      75.00      2542.      17.5   80  3	"mazda 626"
321 | 37.0   4   119.0      92.00      2434.      15.0   80  3	"datsun 510 hatchback"
322 | 32.2   4   108.0      75.00      2265.      15.2   80  3	"toyota corolla"
323 | 46.6   4   86.00      65.00      2110.      17.9   80  3	"mazda glc"
324 | 27.9   4   156.0      105.0      2800.      14.4   80  1	"dodge colt"
325 | 40.8   4   85.00      65.00      2110.      19.2   80  3	"datsun 210"
326 | 44.3   4   90.00      48.00      2085.      21.7   80  2	"vw rabbit c (diesel)"
327 | 43.4   4   90.00      48.00      2335.      23.7   80  2	"vw dasher (diesel)"
328 | 36.4   5   121.0      67.00      2950.      19.9   80  2	"audi 5000s (diesel)"
329 | 30.0   4   146.0      67.00      3250.      21.8   80  2	"mercedes-benz 240d"
330 | 44.6   4   91.00      67.00      1850.      13.8   80  3	"honda civic 1500 gl"
331 | 40.9   4   85.00      ?          1835.      17.3   80  2	"renault lecar deluxe"
332 | 33.8   4   97.00      67.00      2145.      18.0   80  3	"subaru dl"
333 | 29.8   4   89.00      62.00      1845.      15.3   80  2	"vokswagen rabbit"
334 | 32.7   6   168.0      132.0      2910.      11.4   80  3	"datsun 280-zx"
335 | 23.7   3   70.00      100.0      2420.      12.5   80  3	"mazda rx-7 gs"
336 | 35.0   4   122.0      88.00      2500.      15.1   80  2	"triumph tr7 coupe"
337 | 23.6   4   140.0      ?          2905.      14.3   80  1	"ford mustang cobra"
338 | 32.4   4   107.0      72.00      2290.      17.0   80  3	"honda accord"
339 | 27.2   4   135.0      84.00      2490.      15.7   81  1	"plymouth reliant"
340 | 26.6   4   151.0      84.00      2635.      16.4   81  1	"buick skylark"
341 | 25.8   4   156.0      92.00      2620.      14.4   81  1	"dodge aries wagon (sw)"
342 | 23.5   6   173.0      110.0      2725.      12.6   81  1	"chevrolet citation"
343 | 30.0   4   135.0      84.00      2385.      12.9   81  1	"plymouth reliant"
344 | 39.1   4   79.00      58.00      1755.      16.9   81  3	"toyota starlet"
345 | 39.0   4   86.00      64.00      1875.      16.4   81  1	"plymouth champ"
346 | 35.1   4   81.00      60.00      1760.      16.1   81  3	"honda civic 1300"
347 | 32.3   4   97.00      67.00      2065.      17.8   81  3	"subaru"
348 | 37.0   4   85.00      65.00      1975.      19.4   81  3	"datsun 210 mpg"
349 | 37.7   4   89.00      62.00      2050.      17.3   81  3	"toyota tercel"
350 | 34.1   4   91.00      68.00      1985.      16.0   81  3	"mazda glc 4"
351 | 34.7   4   105.0      63.00      2215.      14.9   81  1	"plymouth horizon 4"
352 | 34.4   4   98.00      65.00      2045.      16.2   81  1	"ford escort 4w"
353 | 29.9   4   98.00      65.00      2380.      20.7   81  1	"ford escort 2h"
354 | 33.0   4   105.0      74.00      2190.      14.2   81  2	"volkswagen jetta"
355 | 34.5   4   100.0      ?          2320.      15.8   81  2	"renault 18i"
356 | 33.7   4   107.0      75.00      2210.      14.4   81  3	"honda prelude"
357 | 32.4   4   108.0      75.00      2350.      16.8   81  3	"toyota corolla"
358 | 32.9   4   119.0      100.0      2615.      14.8   81  3	"datsun 200sx"
359 | 31.6   4   120.0      74.00      2635.      18.3   81  3	"mazda 626"
360 | 28.1   4   141.0      80.00      3230.      20.4   81  2	"peugeot 505s turbo diesel"
361 | 30.7   6   145.0      76.00      3160.      19.6   81  2	"volvo diesel"
362 | 25.4   6   168.0      116.0      2900.      12.6   81  3	"toyota cressida"
363 | 24.2   6   146.0      120.0      2930.      13.8   81  3	"datsun 810 maxima"
364 | 22.4   6   231.0      110.0      3415.      15.8   81  1	"buick century"
365 | 26.6   8   350.0      105.0      3725.      19.0   81  1	"oldsmobile cutlass ls"
366 | 20.2   6   200.0      88.00      3060.      17.1   81  1	"ford granada gl"
367 | 17.6   6   225.0      85.00      3465.      16.6   81  1	"chrysler lebaron salon"
368 | 28.0   4   112.0      88.00      2605.      19.6   82  1	"chevrolet cavalier"
369 | 27.0   4   112.0      88.00      2640.      18.6   82  1	"chevrolet cavalier wagon"
370 | 34.0   4   112.0      88.00      2395.      18.0   82  1	"chevrolet cavalier 2-door"
371 | 31.0   4   112.0      85.00      2575.      16.2   82  1	"pontiac j2000 se hatchback"
372 | 29.0   4   135.0      84.00      2525.      16.0   82  1	"dodge aries se"
373 | 27.0   4   151.0      90.00      2735.      18.0   82  1	"pontiac phoenix"
374 | 24.0   4   140.0      92.00      2865.      16.4   82  1	"ford fairmont futura"
375 | 23.0   4   151.0      ?          3035.      20.5   82  1	"amc concord dl"
376 | 36.0   4   105.0      74.00      1980.      15.3   82  2	"volkswagen rabbit l"
377 | 37.0   4   91.00      68.00      2025.      18.2   82  3	"mazda glc custom l"
378 | 31.0   4   91.00      68.00      1970.      17.6   82  3	"mazda glc custom"
379 | 38.0   4   105.0      63.00      2125.      14.7   82  1	"plymouth horizon miser"
380 | 36.0   4   98.00      70.00      2125.      17.3   82  1	"mercury lynx l"
381 | 36.0   4   120.0      88.00      2160.      14.5   82  3	"nissan stanza xe"
382 | 36.0   4   107.0      75.00      2205.      14.5   82  3	"honda accord"
383 | 34.0   4   108.0      70.00      2245       16.9   82  3	"toyota corolla"
384 | 38.0   4   91.00      67.00      1965.      15.0   82  3	"honda civic"
385 | 32.0   4   91.00      67.00      1965.      15.7   82  3	"honda civic (auto)"
386 | 38.0   4   91.00      67.00      1995.      16.2   82  3	"datsun 310 gx"
387 | 25.0   6   181.0      110.0      2945.      16.4   82  1	"buick century limited"
388 | 38.0   6   262.0      85.00      3015.      17.0   82  1	"oldsmobile cutlass ciera (diesel)"
389 | 26.0   4   156.0      92.00      2585.      14.5   82  1	"chrysler lebaron medallion"
390 | 22.0   6   232.0      112.0      2835       14.7   82  1	"ford granada l"
391 | 32.0   4   144.0      96.00      2665.      13.9   82  3	"toyota celica gt"
392 | 36.0   4   135.0      84.00      2370.      13.0   82  1	"dodge charger 2.2"
393 | 27.0   4   151.0      90.00      2950.      17.3   82  1	"chevrolet camaro"
394 | 27.0   4   140.0      86.00      2790.      15.6   82  1	"ford mustang gl"
395 | 44.0   4   97.00      52.00      2130.      24.6   82  2	"vw pickup"
396 | 32.0   4   135.0      84.00      2295.      11.6   82  1	"dodge rampage"
397 | 28.0   4   120.0      79.00      2625.      18.6   82  1	"ford ranger"
398 | 31.0   4   119.0      82.00      2720.      19.4   82  1	"chevy s-10"
399 | 


--------------------------------------------------------------------------------
/Lecture 1 - Linear Regression and Gradient Descent/gradient.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
  4 | 
  5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
  6 | 
  7 | This work is licensed under the Creative Commons Attribution 4.0 International
  8 | License. To view a copy of this license, visit
  9 | http://creativecommons.org/licenses/by/4.0/.
 10 | """
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | import numpy as np
 15 | 
 16 | def corr_vars( start=1, stop=10, step=1, mu=0, sigma=3, func=lambda x: x ):
 17 |     """
 18 |     Generates a data set of (x,y) pairs with an underlying regularity. y is a
 19 |     function of x in the form of
 20 |     
 21 |     y = f(x) + e
 22 |     
 23 |     Where f(x) is specified by the *func* argument and e is a random Gaussian
 24 |     noise specified by *mu* and *sigma*.
 25 |     """
 26 |     
 27 |     # Generate x
 28 |     x = np.arange(start, stop, step)  
 29 |     
 30 |     # Generate random noise
 31 |     e = np.random.normal(mu, sigma, x.size)
 32 |     
 33 |     # Generate y values as y = func(x) + e
 34 |     y = np.zeros(x.size)
 35 |     
 36 |     for ind in range(x.size):
 37 |         y[ind] = func(x[ind]) + e[ind]
 38 |     
 39 |     return (x,y)
 40 | 
 41 | def y_hat(x, w):
 42 |     """
 43 |     Linear regression hypothesis: y_hat = X.w
 44 |     """
 45 |     
 46 |     return x.dot(w)
 47 | 
 48 | def gradient_descent(x, y, w, max_iter, alpha = 0.05):
 49 |     """
 50 |     Performs gradient descent to optimise w.
 51 | 
 52 |     Keyword arguments:
 53 |       
 54 |       *x* : Numpy array
 55 |         matrix of independent variables
 56 | 
 57 |       *y* : Numpy array
 58 |         columnar vector of target values
 59 | 
 60 |       *w* : Numpy array
 61 |         initial model parameters
 62 | 
 63 |       *max_iter* : int
 64 |         maximum number of iterations
 65 | 
 66 |       *alpha* : int, optional
 67 |         learning rate (defaults to 0.05)
 68 |         
 69 |     Returns: 
 70 | 
 71 |       *J_hist* : Numpy array
 72 |         values of J(w) at each iteration
 73 | 
 74 |       *w* : Numpy array
 75 |         estimated model parameters
 76 |     """
 77 |     
 78 |     N = y.shape[0]
 79 |     
 80 |     J_hist = np.zeros(max_iter)
 81 | 
 82 |     print("\nGradient descent starts\n")
 83 | 
 84 |     for i in range(0, max_iter):
 85 |         
 86 |         J = np.sum( (y_hat(x, w) - y) ** 2 ) / (2 * N)
 87 | 
 88 |         J_hist[i] = J
 89 |         
 90 |         print("Iteration %d, J(w): %f\n" % (i, J))
 91 |         
 92 |         gradient = np.dot(x.T, y_hat(x, w) - y) / N    
 93 |         
 94 |         w = w - alpha * gradient
 95 | 
 96 |     print("Gradient descent finished.\n")
 97 |         
 98 |     return (J_hist, w)
 99 | 
100 | def main():
101 | 
102 |     # Initialise the data set
103 | 
104 |     np.random.seed(100)
105 | 
106 |     (x,y) = corr_vars(sigma=2, func=lambda x: 4*np.log2(x))
107 | 
108 |     x = np.array([x]).T
109 |     y = np.array([y]).T
110 |     
111 |     # Add ones for w_0
112 |     mat_ones = np.ones(shape=(9, 2))
113 |     mat_ones[:,1] = x[:,0]
114 |     x = mat_ones
115 |     
116 |     # Print the X and y
117 |     print("X:")
118 |     print(x)
119 |     
120 |     print("\nY:")
121 |     print(y)
122 |     
123 |     m,n=np.shape(x)
124 | 
125 |     # Initialise w with zeros      
126 |     w = np.array([np.ones(n)]).T    
127 | 
128 |     # Perform gradient descent
129 |     (j_hist, w) = gradient_descent(x, y, w, 10)
130 | 
131 |     print("Model parameters:\n")
132 |     print(w)
133 | 
134 |     # Plot X and y
135 |     f, (ax1,ax2) = plt.subplots(1, 2, figsize=(7,7))
136 |     ax1.scatter(x[:,1], y)        
137 |     
138 |     # Plot the regression line
139 |     ax1.plot(x[:,1], y_hat(x, w), color='r')
140 |     ax1.grid(True)
141 |     
142 |     # Plot the change of J(w)
143 |     x = np.arange(1,j_hist.size + 1)
144 |     y = j_hist
145 |     
146 |     ax2.plot(x, j_hist)
147 |     ax2.grid(True)       
148 | 
149 | if __name__ == "__main__":
150 |     main()    


--------------------------------------------------------------------------------
/Lecture 1 - Linear Regression and Gradient Descent/linreg-sklearn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
 4 | 
 5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
 6 | 
 7 | This work is licensed under the Creative Commons Attribution 4.0 International
 8 | License. To view a copy of this license, visit
 9 | http://creativecommons.org/licenses/by/4.0/.
10 | """
11 | 
12 | import numpy as np
13 | 
14 | from sklearn import linear_model
15 | 
16 | def corr_vars( start=1, stop=10, step=1, mu=0, sigma=3, func=lambda x: x ):
17 |     """
18 |     Generates a data set of (x,y) pairs with an underlying regularity. y is a
19 |     function of x in the form of
20 |     
21 |     y = f(x) + e
22 |     
23 |     Where f(x) is specified by the *func* argument and e is a random Gaussian
24 |     noise specified by *mu* and *sigma*.
25 |     """
26 |     
27 |     # Generate x
28 |     x = np.arange(start, stop, step)  
29 |     
30 |     # Generate random noise
31 |     e = np.random.normal(mu, sigma, x.size)
32 |     
33 |     # Generate y values as y = func(x) + e
34 |     y = np.zeros(x.size)
35 |     
36 |     for ind in range(x.size):
37 |         y[ind] = func(x[ind]) + e[ind]
38 |     
39 |     return (x,y)
40 | 
41 | def main():
42 | 
43 |     # Initialise the data set
44 |     np.random.seed(100)
45 | 
46 |     (x,y) = corr_vars(sigma=2, func=lambda x: 4*np.log2(x))
47 | 
48 |     x = np.array([x]).T
49 |     y = np.array([y]).T
50 |     
51 |     # Fit a scikit-learn linear model
52 |     regr = linear_model.LinearRegression()
53 |     
54 |     regr.fit(x, y)
55 |     
56 |     # Print model parameters
57 |     print("Model parameters:\n")
58 |     print(regr.intercept_)
59 |     print(regr.coef_)
60 |     
61 | if __name__ == "__main__":
62 |     main()    


--------------------------------------------------------------------------------
/Lecture 2 - Gradient Descent and Normal Equations/Gradient_Descent_and_Normal_Equations.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nmanchev/MachineLearningStudyGroup/092f642d888f3dfb105aa8768d4a6927c93a4278/Lecture 2 - Gradient Descent and Normal Equations/Gradient_Descent_and_Normal_Equations.pdf


--------------------------------------------------------------------------------
/Lecture 2 - Gradient Descent and Normal Equations/descent-normal-autompg.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
  4 | 
  5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
  6 | 
  7 | This work is licensed under the Creative Commons Attribution 4.0 International
  8 | License. To view a copy of this license, visit
  9 | http://creativecommons.org/licenses/by/4.0/.
 10 | """
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | import numpy as np
 15 | 
 16 | def y_hat(x, w):
 17 |     """
 18 |     Linear regression hypothesis: y_hat = X.w
 19 |     """   
 20 |     return x.dot(w)
 21 |     
 22 | def gradient_descent(x, y, w, max_iter, alpha = 0.05):
 23 |     N = y.shape[0]
 24 |     
 25 |     J_hist = np.zeros(max_iter)
 26 | 
 27 |     print("\nGradient descent starts\n")
 28 | 
 29 |     for i in range(0, max_iter):
 30 |         
 31 |         J = np.sum( (y_hat(x, w) - y) ** 2 ) / (2 * N)
 32 | 
 33 |         J_hist[i] = J
 34 |         
 35 |         print("Iteration %d, J(w): %f\n" % (i, J))
 36 |         
 37 |         gradient = np.dot(x.T, y_hat(x, w) - y) / N    
 38 |         
 39 |         w = w - alpha * gradient
 40 | 
 41 |     print("Gradient descent finished.\n")
 42 |         
 43 |     return (J_hist, w)
 44 | 
 45 | def main():
 46 | 
 47 |     np.random.seed(100)
 48 | 
 49 |     # Load the data set
 50 |     # We use Auto MPG from UCI Machine Learning Repository
 51 |     # https://archive.ics.uci.edu/ml/datasets/Auto+MPG
 52 | 
 53 |     car_data = np.genfromtxt("auto-mpg.data", usecols=(0, 3))
 54 |     car_data = car_data[~np.isnan(car_data).any(axis=1)]
 55 | 
 56 |     # Assign Horsepower attribute to x and MPG to y
 57 |     x = car_data[:,1]
 58 |     y = car_data[:,0]
 59 | 
 60 |     x = np.array([x]).T
 61 |     y = np.array([y]).T
 62 | 
 63 |     # Normalize
 64 |     x = (x - np.mean(x)) / np.std(x) 
 65 |         
 66 |     # Add ones for w_0    
 67 |     x = np.hstack((np.array([np.ones(x.shape[0])]).T, x))
 68 | 
 69 |     # Initialise model parameters    
 70 |     w = np.array([np.zeros(x.shape[1])]).T
 71 | 
 72 |     (j_hist, w) = gradient_descent(x, y, w, 20, 0.5)
 73 | 
 74 |     print("Gradient Descent Model parameters:\n")
 75 |     print(w, '\n')
 76 |    
 77 |     # Normal equations method
 78 |     xTx = np.linalg.inv(x.T.dot(x))
 79 |     xTy = x.T.dot(y)
 80 |     w = xTx.dot(xTy)
 81 | 
 82 |     print("Normal Equations Model parameters:\n")
 83 |     print(w)
 84 |     
 85 |     f, (ax1,ax2) = plt.subplots(1, 2, figsize=(12,8))
 86 |     ax1.scatter(x[:,1], y)        
 87 |     ax1.plot(x[:,1], y_hat(x, w), color='r')
 88 |     ax1.set_title("Horsepower vs MPG")
 89 |     ax1.grid(True)
 90 |     
 91 |     
 92 |     x = np.arange(1,j_hist.size + 1)
 93 |     y = j_hist
 94 |     
 95 |     ax2.plot(x, j_hist)
 96 |     ax2.set_title("J(w)")
 97 |     ax2.grid(True)
 98 |     
 99 | if __name__ == "__main__":
100 |     main()    


--------------------------------------------------------------------------------
/Lecture 2 - Gradient Descent and Normal Equations/error_surface.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
 4 | 
 5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
 6 | 
 7 | This work is licensed under the Creative Commons Attribution 4.0 International
 8 | License. To view a copy of this license, visit
 9 | http://creativecommons.org/licenses/by/4.0/.
10 | """
11 | 
12 | import matplotlib.pyplot as plt
13 | 
14 | import numpy as np
15 | 
16 | from mpl_toolkits.mplot3d import Axes3D
17 | 
18 | def J(x,y,w_0,w_1):
19 |     N = y.shape[0]    
20 |     J = 0
21 |     for i in range(0, len(x)):
22 |        J = J + ((w_0 + w_1 * x[i]) - y[i] ) ** 2
23 |     return J / N   
24 |    
25 | def main():
26 |     # Initialise the data set   
27 |     
28 |     car_data = np.genfromtxt("auto-mpg.data", usecols=(0, 3))
29 |     car_data = car_data[~np.isnan(car_data).any(axis=1)]
30 | 
31 |     # Assign Horsepower attribute to x and MPG to y
32 |     x = car_data[:,1]
33 |     y = car_data[:,0]
34 | 
35 |     x = np.array([x]).T
36 |     y = np.array([y]).T
37 | 
38 |     # Normalize
39 |     x = (x - np.mean(x)) / np.std(x)    
40 |     
41 | 
42 |     w_0 = np.linspace(-300.0, 300.0, 50)
43 |     w_1 = np.linspace(-300.0, 300.0, 50)
44 | 
45 |     W0, W1 = np.meshgrid(w_0, w_1)
46 |     
47 |     E = np.array([J(x, y, w_0, w_1) for w_0, w_1 in zip(np.ravel(W0), np.ravel(W1))])
48 | 
49 |     E = E.reshape(W0.shape)
50 |     
51 |     fig = plt.figure(figsize=(7,6))
52 |     ax = fig.add_subplot(111, projection='3d')
53 | 
54 |     ax.plot_surface(W0, W1, E, rstride=1, cstride=1, color='b', alpha=0.5)
55 | 
56 |     ax.set_xticks([])
57 |     ax.set_yticks([])
58 |     ax.set_zticklabels([])
59 | 
60 |     ax.set_xlabel('$w_0$', fontsize=16)
61 |     ax.set_ylabel('$w_1$', fontsize=16)
62 |     ax.set_zlabel('$J(w_0, w_1)$', fontsize=16)
63 |     
64 |     plt.show()       
65 |     '''
66 |     plt.figure()
67 |     CS = plt.contour(W0, W1, E)
68 |     plt.clabel(CS, inline=1, fontsize=10)
69 |     plt.title('Simplest default with labels')
70 |     '''
71 | if __name__ == "__main__":
72 |     main()    


--------------------------------------------------------------------------------
/Lecture 2 - Gradient Descent and Normal Equations/gradient.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
  4 | 
  5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
  6 | 
  7 | This work is licensed under the Creative Commons Attribution 4.0 International
  8 | License. To view a copy of this license, visit
  9 | http://creativecommons.org/licenses/by/4.0/.
 10 | """
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | import numpy as np
 15 | 
 16 | def corr_vars( start=1, stop=10, step=1, mu=0, sigma=3, func=lambda x: x ):
 17 |     """
 18 |     Generates a data set of (x,y) pairs with an underlying regularity. y is a
 19 |     function of x in the form of
 20 |     
 21 |     y = f(x) + e
 22 |     
 23 |     Where f(x) is specified by the *func* argument and e is a random Gaussian
 24 |     noise specified by *mu* and *sigma*.
 25 |     """
 26 |     
 27 |     # Generate x
 28 |     x = np.arange(start, stop, step)  
 29 |     
 30 |     # Generate random noise
 31 |     e = np.random.normal(mu, sigma, x.size)
 32 |     
 33 |     # Generate y values as y = func(x) + e
 34 |     y = np.zeros(x.size)
 35 |     
 36 |     for ind in range(x.size):
 37 |         y[ind] = func(x[ind]) + e[ind]
 38 |     
 39 |     return (x,y)
 40 | 
 41 | def y_hat(x, w):
 42 |     """
 43 |     Linear regression hypothesis: y_hat = X.w
 44 |     """
 45 |     
 46 |     return x.dot(w)
 47 | 
 48 | def gradient_descent(x, y, w, max_iter, alpha = 0.001):
 49 |     """
 50 |     Performs gradient descent to optimise w.
 51 | 
 52 |     Keyword arguments:
 53 |       
 54 |       *x* : Numpy array
 55 |         matrix of independent variables
 56 | 
 57 |       *y* : Numpy array
 58 |         columnar vector of target values
 59 | 
 60 |       *w* : Numpy array
 61 |         initial model parameters
 62 | 
 63 |       *max_iter* : int
 64 |         maximum number of iterations
 65 | 
 66 |       *alpha* : int, optional
 67 |         learning rate (defaults to 0.05)
 68 |         
 69 |     Returns: 
 70 | 
 71 |       *J_hist* : Numpy array
 72 |         values of J(w) at each iteration
 73 | 
 74 |       *w* : Numpy array
 75 |         estimated model parameters
 76 |     """
 77 |     
 78 |     N = y.shape[0]
 79 |     
 80 |     J_hist = np.zeros(max_iter)
 81 | 
 82 |     print("\nGradient descent starts\n")
 83 | 
 84 |     for i in range(0, max_iter):
 85 |         
 86 |         J = np.sum( (y_hat(x, w) - y) ** 2 ) / (2 * N)
 87 | 
 88 |         J_hist[i] = J
 89 |         
 90 |         print("Iteration %d, J(w): %f\n" % (i, J))
 91 |         
 92 |         gradient = np.dot(x.T, y_hat(x, w) - y) / N    
 93 |         
 94 |         w = w - alpha * gradient
 95 | 
 96 |     print("Gradient descent finished.\n")
 97 |         
 98 |     return (J_hist, w)
 99 | 
100 | def main():
101 | 
102 |     # Initialise the data set
103 | 
104 |     np.random.seed(100)
105 | 
106 |     (x,y) = corr_vars(sigma=2, func=lambda x: 4*np.log2(x))
107 | 
108 |     x = np.array([x]).T
109 |     y = np.array([y]).T
110 |     
111 |     # Add ones for w_0
112 |     mat_ones = np.ones(shape=(9, 2))
113 |     mat_ones[:,1] = x[:,0]
114 |     x = mat_ones
115 |     
116 |     # Print the X and y
117 |     print("X:")
118 |     print(x)
119 |     
120 |     print("\nY:")
121 |     print(y)
122 |     
123 |     m,n=np.shape(x)
124 | 
125 |     # Initialise w with zeros      
126 |     w = np.array([np.ones(n)]).T    
127 | 
128 |     # Perform gradient descent
129 |     (j_hist, w) = gradient_descent(x, y, w, 10, 0.1)
130 | 
131 |     print("Model parameters:\n")
132 |     print(w)
133 | 
134 |     # Plot X and y
135 |     f, (ax1,ax2) = plt.subplots(1, 2, figsize=(7,7))
136 |     ax1.scatter(x[:,1], y)        
137 |     
138 |     # Plot the regression line
139 |     ax1.plot(x[:,1], y_hat(x, w), color='r')
140 |     ax1.grid(True)
141 |     
142 |     # Plot the change of J(w)
143 |     x = np.arange(1,j_hist.size + 1)
144 |     y = j_hist
145 |     
146 |     ax2.plot(x, j_hist)
147 |     ax2.grid(True)       
148 | 
149 | if __name__ == "__main__":
150 |     main()    


--------------------------------------------------------------------------------
/Lecture 2 - Gradient Descent and Normal Equations/linreg-normal_equations.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
 4 | 
 5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
 6 | 
 7 | This work is licensed under the Creative Commons Attribution 4.0 International
 8 | License. To view a copy of this license, visit
 9 | http://creativecommons.org/licenses/by/4.0/.
10 | """
11 | 
12 | import numpy as np
13 | 
14 | def corr_vars( start=1, stop=10, step=1, mu=0, sigma=3, func=lambda x: x ):
15 |     """
16 |     Generates a data set of (x,y) pairs with an underlying regularity. y is a
17 |     function of x in the form of
18 |     
19 |     y = f(x) + e
20 |     
21 |     Where f(x) is specified by the *func* argument and e is a random Gaussian
22 |     noise specified by *mu* and *sigma*.
23 |     """
24 |     
25 |     # Generate x
26 |     x = np.arange(start, stop, step)  
27 |     
28 |     # Generate random noise
29 |     e = np.random.normal(mu, sigma, x.size)
30 |     
31 |     # Generate y values as y = func(x) + e
32 |     y = np.zeros(x.size)
33 |     
34 |     for ind in range(x.size):
35 |         y[ind] = func(x[ind]) + e[ind]
36 |     
37 |     return (x,y)
38 | 
39 | def main():
40 | 
41 |     # Initialise the data set
42 |     np.random.seed(100)
43 | 
44 |     (x,y) = corr_vars(sigma=2, func=lambda x: 4*np.log2(x))
45 | 
46 |     x = np.array([x]).T
47 |     y = np.array([y]).T
48 |     
49 |     # Add ones for w_0
50 |     mat_ones = np.ones(shape=(9, 2))
51 |     mat_ones[:,1] = x[:,0]
52 |     x = mat_ones    
53 | 
54 |     # Normal equations method
55 |     xTx_inv = np.linalg.inv(x.T.dot(x))
56 |     xTy = x.T.dot(y)
57 |     w = xTx_inv.dot(xTy)
58 |         
59 |     print("Model parameters:\n")
60 |     print(w)
61 |     
62 | if __name__ == "__main__":
63 |     main()    


--------------------------------------------------------------------------------
/Lecture 3 - Curve Fitting and Model Validation/Lecture3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nmanchev/MachineLearningStudyGroup/092f642d888f3dfb105aa8768d4a6927c93a4278/Lecture 3 - Curve Fitting and Model Validation/Lecture3.pdf


--------------------------------------------------------------------------------
/Lecture 3 - Curve Fitting and Model Validation/README.md:
--------------------------------------------------------------------------------
 1 | ## Curve fitting and cross validation examples
 2 | 
 3 | Code examples used in Lecture 3
 4 | 
 5 | * polyfit.py - Polynomial regression against linear data with Gaussian noise
 6 | * polyfit-auto-mpg.py - Polynomial regression against the Auto MPG data set
 7 | * polyfit-auto-mpg-cv.py - k-fold Cross Validation for polynomial regression model using the Auto MPG data set
 8 | * polyfit-auto-mpg-t-test.py - T test for polynomial regression model using the Auto MPG data set
 9 | * polyfit-generalisation.py - Hold out validation for polynomial regression model using the Auto MPG data set
10 | * residuals-auto-mpg.py - Residuals plots for polynomial regression model using the Auto MPG data set
11 | * residuals-random.py - Residuals histogram for linear data with Gaussian noise
12 | * residuals-vs-fitted.py - Fitted vs. residuals plot for for linear data with Gaussian noise
13 | 
14 | This repository contains materials from the London Machine Learning Study Group Meetups
15 | 
16 | The meetup page is available at [http://www.meetup.com/London-Machine-Learning-Study-Group](http://www.meetup.com/London-Machine-Learning-Study-Group).
17 | 
18 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
19 | 
20 | This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this license, visit [http://creativecommons.org/licenses/by/4.0](http://creativecommons.org/licenses/by/4.0).
21 | 


--------------------------------------------------------------------------------
/Lecture 3 - Curve Fitting and Model Validation/polyfit-auto-mpg-cv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
 4 | 
 5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
 6 | 
 7 | This work is licensed under the Creative Commons Attribution 4.0 International
 8 | License. To view a copy of this license, visit
 9 | http://creativecommons.org/licenses/by/4.0/.
10 | """
11 | 
12 | import numpy as np
13 | 
14 | from sklearn.cross_validation import KFold
15 | 
16 | 
17 | def y_hat(x, w):
18 |     """
19 |     Linear regression hypothesis: y_hat = X.w
20 |     """   
21 |     return x.dot(w)
22 |     
23 | def polyMatrix(v, order):       
24 |     """
25 |     Given a nx1 vector v, the function generates a matrix of the form:
26 |         
27 |         [ v[0] v[0]^2 ... v[0]^order ]
28 |         [ v[1] v[1]^2 ... v[1]^order ]
29 |         [             ...            ]
30 |         [ v[n] v[n]^2 ... v[n]^order ]
31 | 
32 |     """   
33 |     vector = v
34 |     v_pow  = 2
35 |     
36 |     while v_pow <= order:        
37 |       v = np.hstack((v, np.power(vector, v_pow)))            
38 |       v_pow = v_pow + 1
39 |         
40 |     return v
41 | 
42 | # Load the data set
43 | # We use Auto MPG from UCI Machine Learning Repository
44 | # https://archive.ics.uci.edu/ml/datasets/Auto+MPG
45 | 
46 | car_data = np.genfromtxt("auto-mpg.data", usecols=(0, 3))
47 | car_data = car_data[~np.isnan(car_data).any(axis=1)]
48 | 
49 | # Assign Horsepower attribute to x and MPG to y
50 | x = car_data[:,1]
51 | y = car_data[:,0]
52 | 
53 | x = np.array([x]).T
54 | y = np.array([y]).T
55 | 
56 | # Set the order of the model and generate the X matrix
57 | k = 1
58 | x = polyMatrix(x, k)
59 | 
60 | # Set the number of folds
61 | folds = 10
62 | 
63 | # Get the folds indexes
64 | kf = KFold(x.shape[0], n_folds = folds, shuffle = True)
65 | 
66 | # Initialise an array to keep the errors from each iteration
67 | sse = np.zeros(folds)
68 | 
69 | fold_index = 0
70 | 
71 | # Perform k-fold cross valdation
72 | for train_index, test_index in kf:
73 |        
74 |     # Get the training and test subsets
75 |     x_train, x_test = x[train_index], x[test_index]
76 |     y_train, y_test = y[train_index], y[test_index]
77 | 
78 |     # Add ones for w_0    
79 |     x_train = np.hstack((np.array([np.ones(x_train.shape[0])]).T, x_train))
80 |     x_test  = np.hstack((np.array([np.ones(x_test.shape[0])]).T, x_test))
81 | 
82 |     # Initialise model parameters    
83 |     w = np.array([np.zeros(x_train.shape[1])]).T
84 |     
85 |     # Normal equations method
86 |     xTx = np.linalg.inv(x_train.T.dot(x_train))
87 |     xTy = x_train.T.dot(y_train)
88 |     w = xTx.dot(xTy)
89 | 
90 |     # Compute error sum of squares
91 |     sse[fold_index] = np.sum( (y_hat(x_test, w) - y_test) ** 2)    
92 |     print("SSE[%i]: %.2f" % (fold_index, sse[fold_index]))    
93 |     
94 |     fold_index = fold_index + 1
95 | 
96 | # Print the average error from all folds
97 | print("Average SSE : %.2f" % (np.average(sse)))
98 |     
99 | 


--------------------------------------------------------------------------------
/Lecture 3 - Curve Fitting and Model Validation/polyfit-auto-mpg-t-test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
  4 | 
  5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
  6 | 
  7 | This work is licensed under the Creative Commons Attribution 4.0 International
  8 | License. To view a copy of this license, visit
  9 | http://creativecommons.org/licenses/by/4.0/.
 10 | """
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | import numpy as np
 14 | 
 15 | from numpy.polynomial.polynomial import polyval
 16 | from scipy import stats
 17 | 
 18 | def y_hat(x, w):
 19 |     """
 20 |     Linear regression hypothesis: y_hat = X.w
 21 |     """   
 22 |     return x.dot(w)
 23 |     
 24 | def polyMatrix(v, order):       
 25 |     """
 26 |     Given a nx1 vector v, the function generates a matrix of the form:
 27 |         
 28 |         [ v[0] v[0]^2 ... v[0]^order ]
 29 |         [ v[1] v[1]^2 ... v[1]^order ]
 30 |         [             ...            ]
 31 |         [ v[n] v[n]^2 ... v[n]^order ]
 32 | 
 33 |     """   
 34 |     vector = v
 35 |     v_pow  = 2
 36 |     
 37 |     while v_pow <= order:        
 38 |       v = np.hstack((v, np.power(vector, v_pow)))            
 39 |       v_pow = v_pow + 1
 40 |         
 41 |     return v
 42 | 
 43 | # Load the data set
 44 | # We use Auto MPG from UCI Machine Learning Repository
 45 | # https://archive.ics.uci.edu/ml/datasets/Auto+MPG
 46 | 
 47 | car_data = np.genfromtxt("auto-mpg.data", usecols=(0, 3))
 48 | car_data = car_data[~np.isnan(car_data).any(axis=1)]
 49 | 
 50 | # Assign Horsepower attribute to x and MPG to y
 51 | x = car_data[:,1]
 52 | y = car_data[:,0]
 53 | 
 54 | x = np.array([x]).T
 55 | y = np.array([y]).T
 56 | 
 57 | # Set the order of the model and get the X matrix
 58 | k = 1
 59 | x = polyMatrix(x, k)
 60 | 
 61 | # Add ones for w_0    
 62 | x = np.hstack((np.array([np.ones(x.shape[0])]).T, x))
 63 | 
 64 | # Initialise model parameters    
 65 | w = np.array([np.zeros(x.shape[1])]).T
 66 |     
 67 | # Normal equations method
 68 | xTx = np.linalg.inv(x.T.dot(x))
 69 | xTy = x.T.dot(y)
 70 | w = xTx.dot(xTy)
 71 | 
 72 | print("Normal Equations Model parameters:\n")
 73 | print(w)
 74 | 
 75 | # Plot the data points
 76 | f, ax1 = plt.subplots(1, 1, figsize=(7,7))
 77 | ax1.scatter(x[:,1], y)       
 78 | 
 79 | # Plot a smooth curve using the fitted coefficients
 80 | x_smooth = np.linspace(x[:,1].min(), x[:,1].max(), 200)
 81 | f = np.squeeze(polyval(x_smooth, w))
 82 | ax1.plot(x_smooth, f, color='r')
 83 | 
 84 | ax1.set_title('Auto MPG - MPG vs Horsepower')
 85 | ax1.grid(True)
 86 | 
 87 | # Compute the p-values
 88 | print("\n")
 89 | print("Individual Regression Coefficients t Test:\n")
 90 | 
 91 | # Compute sum of squared errors
 92 | sse = np.sum( (y_hat(x, w) - y) ** 2)    
 93 | print("SSE:", sse)
 94 | 
 95 | # Compute sample variance (SSE / degree of freedom)
 96 | n = x.shape[0]
 97 | sigma = sse / (n - (k+1))
 98 | print("sigma:", sigma)
 99 | 
100 | # Covariance matrix
101 | C = sigma * np.linalg.inv(np.dot(x.T, x))
102 | 
103 | # Test statistics for the coefficients
104 | se = np.sqrt(C.diagonal())
105 |    
106 | t0w = np.zeros(len(se))
107 | for i in range(len(se)):
108 |     t0w[i] = w[i] / se[i]        
109 | 
110 | p_values = 1 - stats.t.cdf(abs(t0w), float(n - (k+1)))
111 | 
112 | print("P values:")
113 | for p in p_values:
114 |  print('%f' % p)    
115 |     


--------------------------------------------------------------------------------
/Lecture 3 - Curve Fitting and Model Validation/polyfit-auto-mpg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
 4 | 
 5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
 6 | 
 7 | This work is licensed under the Creative Commons Attribution 4.0 International
 8 | License. To view a copy of this license, visit
 9 | http://creativecommons.org/licenses/by/4.0/.
10 | """
11 | 
12 | import matplotlib.pyplot as plt
13 | import numpy as np
14 | 
15 | from numpy.polynomial.polynomial import polyval
16 | 
17 | def y_hat(x, w):
18 |     """
19 |     Linear regression hypothesis: y_hat = X.w
20 |     """   
21 |     return x.dot(w)
22 |     
23 | def polyMatrix(v, order):       
24 |     """
25 |     Given a nx1 vector v, the function generates a matrix of the form:
26 |         
27 |         [ v[0] v[0]^2 ... v[0]^order ]
28 |         [ v[1] v[1]^2 ... v[1]^order ]
29 |         [             ...            ]
30 |         [ v[n] v[n]^2 ... v[n]^order ]
31 | 
32 |     """   
33 |     vector = v
34 |     v_pow  = 2
35 |     
36 |     while v_pow <= order:        
37 |       v = np.hstack((v, np.power(vector, v_pow)))            
38 |       v_pow = v_pow + 1
39 |         
40 |     return v
41 | 
42 | # Load the data set
43 | # We use Auto MPG from UCI Machine Learning Repository
44 | # https://archive.ics.uci.edu/ml/datasets/Auto+MPG
45 | 
46 | car_data = np.genfromtxt("auto-mpg.data", usecols=(0, 3))
47 | car_data = car_data[~np.isnan(car_data).any(axis=1)]
48 | 
49 | # Assign Horsepower attribute to x and MPG to y
50 | x = car_data[:,1]
51 | y = car_data[:,0]
52 | 
53 | x = np.array([x]).T
54 | y = np.array([y]).T
55 | 
56 | # Set the order of the model and get the X matrix
57 | k = 1
58 | x = polyMatrix(x, k)
59 | 
60 | # Add ones for w_0    
61 | x = np.hstack((np.array([np.ones(x.shape[0])]).T, x))
62 | 
63 | # Initialise model parameters    
64 | w = np.array([np.zeros(x.shape[1])]).T
65 |     
66 | # Normal equations method
67 | xTx = np.linalg.inv(x.T.dot(x))
68 | xTy = x.T.dot(y)
69 | w = xTx.dot(xTy)
70 | 
71 | print("Normal Equations Model parameters:\n")
72 | print(w)
73 | 
74 | # Plot the data points
75 | f, ax1 = plt.subplots(1, 1, figsize=(7,7))
76 | ax1.scatter(x[:,1], y)       
77 | 
78 | # Plot a smooth curve using the fitted coefficients
79 | x_smooth = np.linspace(x[:,1].min(), x[:,1].max(), 200)
80 | f = np.squeeze(polyval(x_smooth, w))
81 | ax1.plot(x_smooth, f, color='r')
82 | 
83 | ax1.set_title('Auto MPG - MPG vs Horsepower')
84 | ax1.grid(True)
85 |     


--------------------------------------------------------------------------------
/Lecture 3 - Curve Fitting and Model Validation/polyfit-generalisation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
  4 | 
  5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
  6 | 
  7 | This work is licensed under the Creative Commons Attribution 4.0 International
  8 | License. To view a copy of this license, visit
  9 | http://creativecommons.org/licenses/by/4.0/.
 10 | """
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | import numpy as np
 14 | 
 15 | from sklearn.cross_validation import train_test_split
 16 | 
 17 | def corr_vars( start=1, stop=10, step=1, mu=0, sigma=3, func=lambda x: x ):
 18 | 
 19 |     # Generate x
 20 |     x = np.arange(start, stop, step)  
 21 |     
 22 |     # Generate random noise
 23 |     e = np.random.normal(mu, sigma, x.size)
 24 |     
 25 |     # Generate y values as y = func(x) + e
 26 |     y = np.zeros(x.size)
 27 |     
 28 |     for ind in range(x.size):
 29 |         y[ind] = func(x[ind]) + e[ind]
 30 |     
 31 |     return (x,y)
 32 | 
 33 | def y_hat(x, w):
 34 |     """
 35 |     Linear regression hypothesis: y_hat = X.w
 36 |     """   
 37 |     return x.dot(w)
 38 |     
 39 | def polyMatrix(v, order):       
 40 |     """
 41 |     Given a nx1 vector v, the function generates a matrix of the form:
 42 |         
 43 |         [ v[0] v[0]^2 ... v[0]^order ]
 44 |         [ v[1] v[1]^2 ... v[1]^order ]
 45 |         [             ...            ]
 46 |         [ v[n] v[n]^2 ... v[n]^order ]
 47 | 
 48 |     """   
 49 |     vector = v
 50 |     v_pow  = 2
 51 |     
 52 |     while v_pow <= order:        
 53 |       v = np.hstack((v, np.power(vector, v_pow)))            
 54 |       v_pow = v_pow + 1
 55 |         
 56 |     return v
 57 |     
 58 | def trainPolyFit(x, y, order):
 59 |     x = polyMatrix(x, order)
 60 | 
 61 |     # Add ones for w_0    
 62 |     x = np.hstack((np.array([np.ones(x.shape[0])]).T, x))
 63 | 
 64 |     # Hold out        
 65 |     x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20)   
 66 | 
 67 |     # Initialise model parameters    
 68 |     w = np.array([np.zeros(x.shape[1])]).T
 69 |             
 70 |     # Normal equations method
 71 |     xTx = np.linalg.inv(x_train.T.dot(x_train))
 72 |     xTy = x_train.T.dot(y_train)
 73 |     w = xTx.dot(xTy)    
 74 | 
 75 |     # Compute error
 76 |     N = y_test.shape[0]
 77 | 
 78 |     train_err = np.sum( (y_hat(x_train, w) - y_train) ** 2 ) / (2 * N)
 79 |     test_err = np.sum( (y_hat(x_test, w) - y_test) ** 2 ) / (2 * N)
 80 |     
 81 |     return train_err, test_err
 82 | 
 83 | 
 84 | np.random.seed(100)
 85 | 
 86 | (x,y) = corr_vars(sigma=2, func=lambda x: 4*np.log2(x))
 87 | 
 88 | x = np.array([x]).T
 89 | y = np.array([y]).T
 90 | 
 91 | # Vary the order of the model and compute the
 92 | # training and test errors    
 93 | errors = np.zeros([5,2])
 94 | for order in range(1,len(errors) + 1):
 95 |     errors[order-1, ] = trainPolyFit(x, y, order)
 96 | 
 97 | print("Training and test errors:\n")
 98 | print(errors)
 99 | 
100 | # Plot X and y
101 | f, ax1 = plt.subplots(figsize=(7,7))
102 | 
103 | # Plot the regression line
104 | x_plot = np.arange(1, len(errors)+1)
105 | ax1.plot(x_plot, errors[:,0], color='b', label="Training")
106 | ax1.plot(x_plot, errors[:,1], color='r', label="Test")
107 | ax1.grid(True)
108 | 
109 | ax1.legend(loc="upper right", shadow=True)
110 |   


--------------------------------------------------------------------------------
/Lecture 3 - Curve Fitting and Model Validation/polyfit.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
  4 | 
  5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
  6 | 
  7 | This work is licensed under the Creative Commons Attribution 4.0 International
  8 | License. To view a copy of this license, visit
  9 | http://creativecommons.org/licenses/by/4.0/.
 10 | """
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | import numpy as np
 14 | 
 15 | from numpy.polynomial.polynomial import polyval
 16 | 
 17 | 
 18 | def corr_vars( start=1, stop=10, step=1, mu=0, sigma=3, func=lambda x: x ):
 19 | 
 20 |     # Generate x
 21 |     x = np.arange(start, stop, step)  
 22 |     
 23 |     # Generate random noise
 24 |     e = np.random.normal(mu, sigma, x.size)
 25 |     
 26 |     # Generate y values as y = func(x) + e
 27 |     y = np.zeros(x.size)
 28 |     
 29 |     for ind in range(x.size):
 30 |         y[ind] = func(x[ind]) + e[ind]
 31 |     
 32 |     return (x,y)
 33 | 
 34 | def y_hat(x, w):
 35 |     """
 36 |     Linear regression hypothesis: y_hat = X.w
 37 |     """   
 38 |     return x.dot(w)
 39 |     
 40 | def polyMatrix(x, order):       
 41 |     """
 42 |     Given a nx1 vector x, the function generates a matrix of the form:
 43 |         
 44 |         [ x[0] x[0]^2 ... x[0]^order ]
 45 |         [ x[1] x[1]^2 ... x[1]^order ]
 46 |         [             ...            ]
 47 |         [ x[n] x[n]^2 ... x[n]^order ]
 48 | 
 49 |     """   
 50 |     vector = x
 51 |     x_pow  = 2
 52 |     
 53 |     while x_pow <= order:        
 54 |       x = np.hstack((x, np.power(vector, x_pow)))            
 55 |       x_pow = x_pow + 1
 56 |         
 57 |     return x
 58 | 
 59 | np.random.seed(100)
 60 | 
 61 | (x,y) = corr_vars(sigma=2, func=lambda x: 4*np.log2(x))
 62 | 
 63 | x = np.array([x]).T
 64 | y = np.array([y]).T
 65 | 
 66 | # Set the order of the model and get the X matrix
 67 | k = 1
 68 | x = polyMatrix(x, k)
 69 | 
 70 | # Add ones for w_0    
 71 | x = np.hstack((np.array([np.ones(x.shape[0])]).T, x))
 72 | 
 73 | # Initialise model parameters    
 74 | w = np.array([np.zeros(x.shape[1])]).T
 75 | 
 76 | # Print X and y
 77 | print(x,'\n')    
 78 | print(y,'\n')    
 79 | 
 80 | # Normal equations method
 81 | xTx = np.linalg.inv(x.T.dot(x))
 82 | xTy = x.T.dot(y)
 83 | w = xTx.dot(xTy)
 84 | 
 85 | # Print the model parameters
 86 | print("Normal Equations Model parameters:\n")
 87 | print(w)
 88 | 
 89 | # Plot the data points
 90 | f, ax1 = plt.subplots(1, 1, figsize=(7,7))
 91 | ax1.scatter(x[:,1], y)       
 92 | 
 93 | # Plot a smooth curve using the fitted coefficients
 94 | x_smooth = np.linspace(x[:,1].min(), x[:,1].max(), 200)
 95 | f = np.squeeze(polyval(x_smooth, w))
 96 | ax1.plot(x_smooth, f, color='r')
 97 | 
 98 | ax1.set_title('y = 4*log2(x) + e')
 99 | ax1.grid(True)
100 |     
101 | 


--------------------------------------------------------------------------------
/Lecture 3 - Curve Fitting and Model Validation/residuals-auto-mpg.py:
--------------------------------------------------------------------------------
 1 | """
 2 | (C) 2016 Nikolay Manchev
 3 | [London Machine Learning Study Group](http://www.meetup.com/London-Machine-Learning-Study-Group/members/)
 4 | 
 5 | This work is licensed under the Creative Commons Attribution 4.0 International
 6 | License. To view a copy of this license, visit
 7 | http://creativecommons.org/licenses/by/4.0/.
 8 | """
 9 | 
10 | import numpy as np
11 | 
12 | import matplotlib.pyplot as plt
13 | 
14 | # Load the data set
15 | # We use Auto MPG from UCI Machine Learning Repository
16 | # https://archive.ics.uci.edu/ml/datasets/Auto+MPG
17 | 
18 | car_data = np.genfromtxt("auto-mpg.data", usecols=(0, 3))
19 | car_data = car_data[~np.isnan(car_data).any(axis=1)]
20 | 
21 | # Assign Horsepower attribute to x and MPG to y
22 | x = car_data[:,1]
23 | y = car_data[:,0]
24 | 
25 | x = np.array([x]).T
26 | y = np.array([y]).T
27 |     
28 | # Add ones for w_0
29 | mat_ones = np.ones(shape=(x.shape[0], 2))
30 | mat_ones[:,1] = x[:,0]
31 | x = mat_ones    
32 | 
33 | # Normal equations method
34 | xTx_inv = np.linalg.inv(x.T.dot(x))
35 | xTy = x.T.dot(y)
36 | w = xTx_inv.dot(xTy)
37 | 
38 | # Print intercept and slope       
39 | print("Model parameters:\n")
40 | print(w)
41 | 
42 | # Make predictions on the training set
43 | y_hat = w[0] + w[1]*x[:,1]
44 | 
45 | # Get the residuals
46 | y_hat = y_hat.reshape(y_hat.shape[0],-1)
47 | residuals = np.subtract(y_hat, y)
48 | 
49 | # Plot a histogram of the residuals
50 | plt.figure(figsize=(10,8))
51 | n, bins, patches = plt.hist(residuals, 30,  facecolor='green', alpha=0.75)
52 | 
53 | plt.title("Histogram of Residuals")
54 | plt.grid(True)
55 | plt.show()
56 | 
57 | # Plot residuals vs predictions
58 | plt.rcParams.update({'font.size': 15})
59 | 
60 | f, ax = plt.subplots( figsize=(10,8))
61 | 
62 | ax.scatter(y_hat, residuals, s=10)
63 | ax.axhline(0, color='red')
64 | 
65 | ax.set_title("Residuals vs fitted")
66 | ax.set_ylabel("Residuals")
67 | ax.set_xlabel("$\hat y$",fontsize=20)
68 | ax.grid(True)
69 | 


--------------------------------------------------------------------------------
/Lecture 3 - Curve Fitting and Model Validation/residuals-random.py:
--------------------------------------------------------------------------------
 1 | """
 2 | (C) 2016 Nikolay Manchev
 3 | [London Machine Learning Study Group](http://www.meetup.com/London-Machine-Learning-Study-Group/members/)
 4 | 
 5 | This work is licensed under the Creative Commons Attribution 4.0 International
 6 | License. To view a copy of this license, visit
 7 | http://creativecommons.org/licenses/by/4.0/.
 8 | """
 9 | 
10 | import numpy as np
11 | 
12 | import matplotlib.pyplot as plt
13 | import matplotlib.mlab as mlab
14 | 
15 | def corr_vars( start=1, stop=10, step=1, mu=0, sigma=3, func=lambda x: x ):
16 | 
17 |     # Generate x
18 |     x = np.arange(start, stop, step)  
19 |     
20 |     # Generate random noise
21 |     e = np.random.normal(mu, sigma, x.size)
22 |     
23 |     # Generate y values as y = func(x) + e
24 |     y = np.zeros(x.size)
25 |     
26 |     for ind in range(x.size):
27 |         y[ind] = func(x[ind]) + e[ind]
28 |     
29 |     return (x,y)
30 |     
31 | # Populate the data set
32 | np.random.seed(100)
33 | 
34 | # Select a linear or non-linear function for the data generation
35 | (x,y) = corr_vars(start=-3, stop=3, sigma=5, step = 0.005, func=lambda x:np.power(x,4) - 3*np.power(x,3) +8*np.power(x,2) + 7*x)
36 | #(x,y) = corr_vars(start=-3, stop=3, sigma=5, step = 0.005, func=lambda x:7+6*x)
37 | 
38 | x = np.array([x]).T
39 | y = np.array([y]).T
40 | 
41 | # Add ones for w_0
42 | mat_ones = np.ones(shape=(x.size, 2))
43 | mat_ones[:,1] = x[:,0]
44 | x = mat_ones
45 | 
46 | # Normal equations method
47 | xTx_inv = np.linalg.inv(x.T.dot(x))
48 | xTy = x.T.dot(y)
49 | w = xTx_inv.dot(xTy)
50 |     
51 | print("Model parameters:\n")
52 | print(w)    
53 | 
54 | # Make predictions on the training set
55 | y_hat = w[0] + w[1]*x[:,1]
56 | 
57 | # Get the residuals
58 | y_hat = y_hat.reshape(y_hat.shape[0],-1)
59 | residuals = np.subtract(y_hat, y)
60 | 
61 | # Plot the data and the fitted line
62 | 
63 | f, (ax1, ax2) = plt.subplots(2, figsize=(10,8))
64 | f.subplots_adjust(hspace=.5)
65 | 
66 | ax1.scatter(x[:,1], y, s=10)
67 | ax1.plot(x[:,1], y_hat, color='r')
68 | 
69 | ax1.set_title("Data and fitted line")
70 | ax1.set_xlabel("$x$")
71 | ax1.set_ylabel("$y$")
72 | ax1.grid(True)
73 | 
74 | # Plot residuals
75 | 
76 | n, bins, patches = ax2.hist(residuals, 20, normed=1, facecolor='green', alpha=0.75)
77 | 
78 | # Plot expected distribution
79 | x_exp = np.linspace(-100, 100)
80 | y_exp = mlab.normpdf(x_exp , np.mean(residuals), np.std(residuals))
81 | l = ax2.plot(x_exp, y_exp, 'r--', linewidth=1)
82 | 
83 | ax2.set_title("Histogram of Residuals")
84 | ax2.grid(True)
85 | ax2.set_xlim([-100,100])
86 | 
87 | 
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/Lecture 3 - Curve Fitting and Model Validation/residuals-vs-fitted.py:
--------------------------------------------------------------------------------
 1 | """
 2 | (C) 2016 Nikolay Manchev
 3 | [London Machine Learning Study Group](http://www.meetup.com/London-Machine-Learning-Study-Group/members/)
 4 | 
 5 | This work is licensed under the Creative Commons Attribution 4.0 International
 6 | License. To view a copy of this license, visit
 7 | http://creativecommons.org/licenses/by/4.0/.
 8 | """
 9 | 
10 | import numpy as np
11 | 
12 | import matplotlib.pyplot as plt
13 | 
14 | def corr_vars( start=1, stop=10, step=1, mu=0, sigma=3, func=lambda x: x ):
15 | 
16 |     # Generate x
17 |     x = np.arange(start, stop, step)  
18 |     
19 |     # Generate random noise
20 |     e = np.random.normal(mu, sigma, x.size)
21 |     
22 |     # Generate y values as y = func(x) + e
23 |     y = np.zeros(x.size)
24 |     
25 |     for ind in range(x.size):
26 |         y[ind] = func(x[ind]) + e[ind]
27 |     
28 |     return (x,y)
29 |     
30 | # Initialise the data set
31 | np.random.seed(100)
32 | 
33 | # Generate data using one of the two functions (linear and non-linear)
34 | #(x,y) = corr_vars(start=-3, stop=3, sigma=5, step = 0.005, func=lambda x:np.power(x,4) - 3*np.power(x,3) +8*np.power(x,2) +  + 7*x)
35 | (x,y) = corr_vars(start=-3, stop=3, sigma=5, step = 0.005, func=lambda x:7+6*x)
36 | 
37 | x = np.array([x]).T
38 | y = np.array([y]).T
39 | 
40 | # Add ones for w_0
41 | mat_ones = np.ones(shape=(x.size, 2))
42 | mat_ones[:,1] = x[:,0]
43 | x = mat_ones
44 | 
45 | # Normal equations method
46 | xTx_inv = np.linalg.inv(x.T.dot(x))
47 | xTy = x.T.dot(y)
48 | w = xTx_inv.dot(xTy)
49 |     
50 | print("Model parameters:\n")
51 | print(w)    
52 | 
53 | # Make predictions on the training set
54 | y_hat = w[0] + w[1]*x[:,1]
55 | 
56 | # Get the residuals
57 | y_hat = y_hat.reshape(y_hat.shape[0],-1)
58 | residuals = np.subtract(y_hat, y)
59 | 
60 | # Plot residuals vs predictions
61 | plt.rcParams.update({'font.size': 15})
62 | 
63 | f, ax = plt.subplots( figsize=(10,8))
64 | 
65 | ax.scatter(y_hat, residuals, s=10)
66 | ax.axhline(0, color='red')
67 | 
68 | ax.set_title("Residuals vs fitted")
69 | ax.set_ylabel("Residuals")
70 | ax.set_xlabel("$\hat y$",fontsize=20)
71 | ax.grid(True)
72 | 


--------------------------------------------------------------------------------
/Lecture 4 - Decision Trees/Decision_Trees.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nmanchev/MachineLearningStudyGroup/092f642d888f3dfb105aa8768d4a6927c93a4278/Lecture 4 - Decision Trees/Decision_Trees.pdf


--------------------------------------------------------------------------------
/Lecture 4 - Decision Trees/README.md:
--------------------------------------------------------------------------------
 1 | ## Decision Trees
 2 | 
 3 | Code examples used in Lecture 4
 4 | 
 5 | * auto-mpg-modified.data - A modified version of the Auto MPG data set from UCI Machine Learning Repository, with the continues MPG attribute partitioned as follows:
 6 | 	* [9;19) - BAD
 7 | 	* (9;26] - OK
 8 | 	* (26;47] - GOOD
 9 | * entropy.py - Splits a data set by attribute and threshold value and computes the entropy for each split
10 | * dt-credit.py - An implementation of a decision tree algorithm against the credit rating data set
11 | * scikit-dt-auto-mpg.py - A decision tree trained on the modified Auto MPG data set, using DecisionTreeClassifier from scikit-learn
12 | * overfit_demo.py - Accuracy score for training/test subset against the modified Auto MPG data set, using DecisionTreeClassifier from scikit-learn
13 | 
14 | This repository contains materials from the London Machine Learning Study Group Meetups
15 | 
16 | The meetup page is available at [http://www.meetup.com/London-Machine-Learning-Study-Group](http://www.meetup.com/London-Machine-Learning-Study-Group).
17 | 
18 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
19 | 
20 | This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this license, visit [http://creativecommons.org/licenses/by/4.0](http://creativecommons.org/licenses/by/4.0).
21 | 


--------------------------------------------------------------------------------
/Lecture 4 - Decision Trees/auto-mpg-modified.data:
--------------------------------------------------------------------------------
  1 | 0	8	304	193	4732	18.5	70	1	hi 1200d
  2 | 0	8	360	215	4615	14	70	1	ford f250
  3 | 0	8	307	200	4376	15	70	1	chevy c20
  4 | 0	8	318	210	4382	13.5	70	1	dodge d200
  5 | 0	8	429	208	4633	11	72	1	mercury marquis
  6 | 0	8	400	150	4997	14	73	1	chevrolet impala
  7 | 0	8	350	180	3664	11	73	1	oldsmobile omega
  8 | 0	8	383	180	4955	11.5	71	1	dodge monaco (sw)
  9 | 0	8	350	160	4456	13.5	72	1	oldsmobile delta 88 royale
 10 | 0	8	429	198	4952	11.5	73	1	mercury marquis brougham
 11 | 0	8	455	225	4951	11	73	1	buick electra 225 custom
 12 | 0	8	400	167	4906	12.5	73	1	ford country
 13 | 0	8	350	180	4499	12.5	73	1	oldsmobile vista cruiser
 14 | 0	8	400	170	4746	12	71	1	ford country squire (sw)
 15 | 0	8	400	175	5140	12	71	1	pontiac safari (sw)
 16 | 0	8	350	165	4274	12	72	1	chevrolet impala
 17 | 0	8	350	155	4502	13.5	72	1	buick lesabre custom
 18 | 0	8	400	190	4422	12.5	72	1	chrysler newport royal
 19 | 0	8	307	130	4098	14	72	1	chevrolet chevelle concours (sw)
 20 | 0	8	302	140	4294	16	72	1	ford gran torino (sw)
 21 | 0	8	350	175	4100	13	73	1	buick century 350
 22 | 0	8	350	145	3988	13	73	1	chevrolet malibu
 23 | 0	8	400	150	4464	12	73	1	chevrolet caprice classic
 24 | 0	8	351	158	4363	13	73	1	ford ltd
 25 | 0	8	440	215	4735	11	73	1	chrysler new yorker brougham
 26 | 0	8	360	175	3821	11	73	1	amc ambassador brougham
 27 | 0	8	360	170	4654	13	73	1	plymouth custom suburb
 28 | 0	8	350	150	4699	14.5	74	1	buick century luxus (sw)
 29 | 0	8	302	129	3169	12	75	1	ford mustang ii
 30 | 0	8	318	150	3940	13.2	76	1	plymouth volare premier v8
 31 | 0	8	350	145	4055	12	76	1	chevy c10
 32 | 0	8	302	130	3870	15	76	1	ford f108
 33 | 0	8	318	150	3755	14	76	1	dodge d100
 34 | 0	8	454	220	4354	9	70	1	chevrolet impala
 35 | 0	8	440	215	4312	8.5	70	1	plymouth fury iii
 36 | 0	8	455	225	4425	10	70	1	pontiac catalina
 37 | 0	8	340	160	3609	8	70	1	plymouth 'cuda 340
 38 | 0	8	455	225	3086	10	70	1	buick estate wagon (sw)
 39 | 0	8	350	165	4209	12	71	1	chevrolet impala
 40 | 0	8	400	175	4464	11.5	71	1	pontiac catalina brougham
 41 | 0	8	351	153	4154	13.5	71	1	ford galaxie 500
 42 | 0	8	318	150	4096	13	71	1	plymouth fury iii
 43 | 0	8	400	175	4385	12	72	1	pontiac catalina
 44 | 0	8	351	153	4129	13	72	1	ford galaxie 500
 45 | 0	8	318	150	4077	14	72	1	plymouth satellite custom (sw)
 46 | 0	8	304	150	3672	11.5	73	1	amc matador
 47 | 0	8	302	137	4042	14.5	73	1	ford gran torino
 48 | 0	8	318	150	4237	14.5	73	1	plymouth fury gran sedan
 49 | 0	8	318	150	4457	13.5	74	1	dodge coronet custom (sw)
 50 | 0	8	302	140	4638	16	74	1	ford gran torino (sw)
 51 | 0	8	304	150	4257	15.5	74	1	amc matador (sw)
 52 | 0	8	351	148	4657	13.5	75	1	ford ltd
 53 | 0	8	351	152	4215	12.8	76	1	ford gran torino
 54 | 0	8	350	165	3693	11.5	70	1	buick skylark 320
 55 | 0	8	429	198	4341	10	70	1	ford galaxie 500
 56 | 0	8	390	190	3850	8.5	70	1	amc ambassador dpl
 57 | 0	8	383	170	3563	10	70	1	dodge challenger se
 58 | 0	8	400	150	3761	9.5	70	1	chevrolet monte carlo
 59 | 0	8	318	150	4135	13.5	72	1	plymouth fury iii
 60 | 0	8	304	150	3892	12.5	72	1	amc matador (sw)
 61 | 0	8	318	150	3777	12.5	73	1	dodge coronet custom
 62 | 0	8	350	145	4082	13	73	1	chevrolet monte carlo s
 63 | 0	8	318	150	3399	11	73	1	dodge dart custom
 64 | 0	6	250	100	3336	17	74	1	chevrolet nova
 65 | 0	6	250	72	3432	21	75	1	mercury monarch
 66 | 0	6	250	72	3158	19.5	75	1	ford maverick
 67 | 0	8	350	145	4440	14	75	1	chevrolet bel air
 68 | 0	6	258	110	3730	19	75	1	amc matador
 69 | 0	8	302	130	4295	14.9	77	1	mercury cougar brougham
 70 | 0	8	304	120	3962	13.9	76	1	amc matador
 71 | 0	8	318	145	4140	13.7	77	1	dodge monaco brougham
 72 | 0	8	350	170	4165	11.4	77	1	chevrolet monte carlo landau
 73 | 0	8	400	190	4325	12.2	77	1	chrysler cordoba
 74 | 0	8	351	142	4054	14.3	79	1	ford country squire (sw)
 75 | 0	8	304	150	3433	12	70	1	amc rebel sst
 76 | 0	6	225	105	3439	15.5	71	1	plymouth satellite custom
 77 | 0	6	250	100	3278	18	73	1	chevrolet nova custom
 78 | 0	8	400	230	4278	9.5	73	1	pontiac grand prix
 79 | 0	6	250	100	3781	17	74	1	chevrolet chevelle malibu classic
 80 | 0	6	258	110	3632	18	74	1	amc matador
 81 | 0	8	302	140	4141	14	74	1	ford gran torino
 82 | 0	8	400	170	4668	11.5	75	1	pontiac catalina
 83 | 0	8	318	150	4498	14.5	75	1	plymouth grand fury
 84 | 0	6	250	105	3897	18.5	75	1	chevroelt chevelle malibu
 85 | 0	8	318	150	4190	13	76	1	dodge coronet brougham
 86 | 0	8	400	180	4220	11.1	77	1	pontiac grand prix lj
 87 | 0	8	351	149	4335	14.5	77	1	ford thunderbird
 88 | 0	6	163	133	3410	15.8	78	2	peugeot 604sl
 89 | 0	6	168	120	3820	16.7	76	2	mercedes-benz 280s
 90 | 0	8	350	180	4380	12.1	76	1	cadillac seville
 91 | 0	8	351	138	3955	13.2	79	1	mercury grand marquis
 92 | 0	8	350	155	4360	14.9	79	1	buick estate wagon (sw)
 93 | 0	8	302	140	3449	10.5	70	1	ford torino
 94 | 0	6	250	100	3329	15.5	71	1	chevrolet chevelle malibu
 95 | 0	8	304	150	3672	11.5	72	1	amc ambassador sst
 96 | 0	6	231	110	3907	21	75	1	buick century
 97 | 0	8	260	110	4060	19	77	1	oldsmobile cutlass supreme
 98 | 0	6	163	125	3140	13.6	78	2	volvo 264gl
 99 | 0	8	305	130	3840	15.4	79	1	chevrolet caprice classic
100 | 0	8	305	140	4215	13	76	1	chevrolet chevelle malibu classic
101 | 0	6	258	95	3193	17.8	76	1	amc pacer d/l
102 | 0	8	305	145	3880	12.5	77	1	chevrolet caprice classic
103 | 0	6	250	110	3520	16.4	77	1	chevrolet concours
104 | 0	8	318	140	4080	13.7	78	1	dodge magnum xe
105 | 0	8	302	129	3725	13.4	79	1	ford ltd landau
106 | 0	6	225	85	3465	16.6	81	1	chrysler lebaron salon
107 | 0	6	231	165	3445	13.4	78	1	buick regal sport coupe (turbo)
108 | 0	8	307	130	3504	12	70	1	chevrolet chevelle malibu
109 | 0	8	318	150	3436	11	70	1	plymouth satellite
110 | 0	6	199	97	2774	15.5	70	1	amc hornet
111 | 0	6	232	100	3288	15.5	71	1	amc matador
112 | 0	6	258	110	2962	13.5	71	1	amc hornet sportabout (sw)
113 | 0	6	250	88	3139	14.5	71	1	ford mustang
114 | 0	4	121	112	2933	14.5	72	2	volvo 145e (sw)
115 | 0	6	225	105	3121	16.5	73	1	plymouth valiant
116 | 0	6	232	100	2945	16	73	1	amc hornet
117 | 0	6	250	88	3021	16.5	73	1	ford maverick
118 | 0	6	232	100	2789	15	73	1	amc gremlin
119 | 0	3	70	90	2124	13.5	73	3	maxda rx3
120 | 0	6	225	105	3613	16.5	74	1	plymouth satellite sebring
121 | 0	6	250	105	3459	16	75	1	chevrolet nova
122 | 0	6	225	95	3785	19	75	1	plymouth fury
123 | 0	6	171	97	2984	14.5	75	1	ford pinto
124 | 0	6	250	78	3574	21	76	1	ford granada ghia
125 | 0	6	258	120	3410	15.1	78	1	amc concord d/l
126 | 0	8	302	139	3205	11.2	78	1	ford futura
127 | 0	8	318	135	3830	15.2	79	1	dodge st. regis
128 | 0	6	250	110	3645	16.2	76	1	pontiac ventura sj
129 | 0	6	250	98	3525	19	77	1	ford granada
130 | 0	8	360	150	3940	13	79	1	chrysler lebaron town @ country (sw)
131 | 0	6	225	110	3620	18.7	78	1	dodge aspen
132 | 1	6	232	100	2634	13	71	1	amc gremlin
133 | 1	6	250	88	3302	15.5	71	1	ford torino 500
134 | 1	6	250	100	3282	15	71	1	pontiac firebird
135 | 1	3	70	97	2330	13.5	72	3	mazda rx2 coupe
136 | 1	4	122	85	2310	18.5	73	1	ford pinto
137 | 1	4	121	112	2868	15.5	73	2	volvo 144ea
138 | 1	6	232	100	2901	16	74	1	amc hornet
139 | 1	6	225	95	3264	16	75	1	plymouth valiant custom
140 | 1	6	232	90	3211	17	75	1	amc pacer
141 | 1	4	120	88	3270	21.9	76	2	peugeot 504
142 | 1	6	156	108	2930	15.5	76	3	toyota mark ii
143 | 1	6	225	100	3630	17.7	77	1	plymouth volare custom
144 | 1	6	225	90	3381	18.7	80	1	dodge aspen
145 | 1	6	231	105	3535	19.2	78	1	pontiac phoenix lj
146 | 1	8	305	145	3425	13.2	78	1	chevrolet monte carlo landau
147 | 1	8	267	125	3605	15	79	1	chevrolet malibu classic (sw)
148 | 1	8	318	140	3735	13.2	78	1	dodge diplomat
149 | 1	6	232	90	3210	17.2	78	1	amc concord
150 | 1	6	200	85	2990	18.2	79	1	mercury zephyr 6
151 | 1	8	260	110	3365	15.5	78	1	oldsmobile cutlass salon brougham
152 | 1	4	140	90	2408	19.5	72	1	chevrolet vega
153 | 1	4	97	88	2279	19	73	3	toyota carina
154 | 1	4	114	91	2582	14	73	2	audi 100ls
155 | 1	6	156	122	2807	13.5	73	3	toyota mark ii
156 | 1	6	198	95	3102	16.5	74	1	plymouth duster
157 | 1	8	262	110	3221	13.5	75	1	chevrolet monza 2+2
158 | 1	6	232	100	2914	16	75	1	amc gremlin
159 | 1	6	225	100	3651	17.7	76	1	dodge aspen se
160 | 1	4	130	102	3150	15.7	76	2	volvo 245
161 | 1	8	302	139	3570	12.8	78	1	mercury monarch ghia
162 | 1	6	200	85	2965	15.8	78	1	ford fairmont (auto)
163 | 1	6	232	90	3265	18.2	79	1	amc concord dl 6
164 | 1	6	200	88	3060	17.1	81	1	ford granada gl
165 | 1	5	131	103	2830	15.9	78	2	audi 5000
166 | 1	6	231	105	3425	16.9	77	1	buick skylark
167 | 1	6	200	95	3155	18.2	78	1	chevrolet malibu
168 | 1	6	225	100	3430	17.2	78	1	plymouth volare
169 | 1	6	231	105	3380	15.8	78	1	buick century special
170 | 1	6	225	110	3360	16.6	79	1	dodge aspen 6
171 | 1	6	200	85	3070	16.7	78	1	mercury zephyr
172 | 1	6	200	85	2587	16	70	1	ford maverick
173 | 1	6	199	90	2648	15	70	1	amc gremlin
174 | 1	4	122	86	2226	16.5	72	1	ford pinto runabout
175 | 1	4	120	87	2979	19.5	72	2	peugeot 504 (sw)
176 | 1	4	140	72	2401	19.5	73	1	chevrolet vega
177 | 1	6	155	107	2472	14	73	1	mercury capri v6
178 | 1	6	200	?	2875	17	74	1	ford maverick
179 | 1	6	231	110	3039	15	75	1	buick skyhawk
180 | 1	4	134	95	2515	14.8	78	3	toyota celica gt liftback
181 | 1	4	121	110	2600	12.8	77	2	bmw 320i
182 | 1	3	80	110	2720	13.5	77	3	mazda rx-4
183 | 1	6	231	115	3245	15.4	79	1	pontiac lemans v6
184 | 1	4	121	115	2795	15.7	78	2	saab 99gle
185 | 1	6	198	95	2833	15.5	70	1	plymouth duster
186 | 1	4	140	72	2408	19	71	1	chevrolet vega (sw)
187 | 1	4	121	76	2511	18	72	2	volkswagen 411 (sw)
188 | 1	4	122	86	2395	16	72	1	ford pinto (sw)
189 | 1	4	108	94	2379	16.5	73	3	datsun 610
190 | 1	4	121	98	2945	14.5	75	2	volvo 244dl
191 | 1	6	225	100	3233	15.4	76	1	plymouth valiant
192 | 1	6	250	105	3353	14.5	76	1	chevrolet nova
193 | 1	6	146	97	2815	14.5	77	3	datsun 810
194 | 1	6	232	112	2835	14.7	82	1	ford granada l
195 | 1	4	140	88	2890	17.3	79	1	ford fairmont 4
196 | 1	6	231	110	3415	15.8	81	1	buick century
197 | 1	6	232	90	3085	17.6	76	1	amc hornet
198 | 1	4	122	86	2220	14	71	1	mercury capri 2000
199 | 1	4	97	54	2254	23.5	72	2	volkswagen type 3
200 | 1	4	120	97	2506	14.5	72	3	toyouta corona mark ii (sw)
201 | 1	6	198	95	2904	16	73	1	plymouth duster
202 | 1	4	140	83	2639	17	75	1	ford pinto
203 | 1	4	140	78	2592	18.5	75	1	pontiac astro
204 | 1	4	115	95	2694	15	75	2	audi 100ls
205 | 1	4	120	88	2957	17	75	2	peugeot 504
206 | 1	8	350	125	3900	17.4	79	1	cadillac eldorado
207 | 1	4	151	?	3035	20.5	82	1	amc concord dl
208 | 1	4	156	105	2745	16.7	78	1	plymouth sapporo
209 | 1	6	173	110	2725	12.6	81	1	chevrolet citation
210 | 1	4	140	?	2905	14.3	80	1	ford mustang cobra
211 | 1	3	70	100	2420	12.5	80	3	mazda rx-7 gs
212 | 1	4	151	85	2855	17.6	78	1	oldsmobile starfire sx
213 | 1	4	119	97	2405	14.9	78	3	datsun 200-sx
214 | 1	8	260	90	3420	22.2	79	1	oldsmobile cutlass salon brougham
215 | 1	4	113	95	2372	15	70	3	toyota corona mark ii
216 | 1	4	107	90	2430	14.5	70	2	audi 100 ls
217 | 1	4	113	95	2278	15.5	72	3	toyota corona hardtop
218 | 1	4	116	75	2158	15.5	73	2	opel manta
219 | 1	4	121	110	2660	14	73	2	saab 99le
220 | 1	4	90	75	2108	15.5	74	2	fiat 128
221 | 1	4	120	97	2489	15	74	3	honda civic
222 | 1	4	134	96	2702	13.5	75	3	toyota corona
223 | 1	4	119	97	2545	17	75	3	datsun 710
224 | 1	6	200	81	3012	17.6	76	1	ford maverick
225 | 1	4	140	92	2865	16.4	82	1	ford fairmont futura
226 | 1	6	146	120	2930	13.8	81	3	datsun 810 maxima
227 | 1	4	151	90	3003	20.1	80	1	amc concord
228 | 1	4	98	60	2164	22.1	76	1	chevrolet woody
229 | 1	4	151	88	2740	16	77	1	pontiac sunbird coupe
230 | 1	4	110	87	2672	17.5	70	2	peugeot 504
231 | 1	4	104	95	2375	17.5	70	2	saab 99e
232 | 1	4	113	95	2228	14	71	3	toyota corona
233 | 1	4	98	?	2046	19	71	1	ford pinto
234 | 1	4	97.5	80	2126	17	72	1	dodge colt hardtop
235 | 1	4	140	75	2542	17	74	1	chevrolet vega
236 | 1	4	90	71	2223	16.5	75	2	volkswagen dasher
237 | 1	4	121	115	2671	13.5	75	2	saab 99le
238 | 1	4	116	81	2220	16.9	76	2	opel 1900
239 | 1	4	140	92	2572	14.9	76	1	capri ii
240 | 1	6	181	110	2945	16.4	82	1	buick century limited
241 | 1	4	140	88	2720	15.4	78	1	ford fairmont (man)
242 | 1	5	183	77	3530	20.1	79	2	mercedes benz 300d
243 | 1	6	168	116	2900	12.6	81	3	toyota cressida
244 | 1	4	122	96	2300	15.5	77	1	plymouth arrow gs
245 | 1	4	140	89	2755	15.8	77	1	ford mustang ii 2+2
246 | 1	4	156	92	2620	14.4	81	1	dodge aries wagon (sw)
247 | 1	4	97	46	1835	20.5	70	2	volkswagen 1131 deluxe sedan
248 | 1	4	121	113	2234	12.5	70	2	bmw 2002
249 | 1	4	91	70	1955	20.5	71	1	plymouth cricket
250 | 1	4	96	69	2189	18	72	2	renault 12 (sw)
251 | 1	4	97	46	1950	21	73	2	volkswagen super beetle
252 | 1	4	98	90	2265	15.5	73	2	fiat 124 sport coupe
253 | 1	4	122	80	2451	16.5	74	1	ford pinto
254 | 1	4	79	67	1963	15.5	74	2	volkswagen dasher
255 | 1	4	97	78	2300	14.5	74	2	opel manta
256 | 1	4	116	75	2246	14	74	2	fiat 124 tc
257 | 1	4	108	93	2391	15.5	74	3	subaru
258 | 1	4	98	79	2255	17.7	76	1	dodge colt
259 | 1	4	97	75	2265	18.2	77	3	toyota corolla liftback
260 | 1	4	156	92	2585	14.5	82	1	chrysler lebaron medallion
261 | 2	4	140	88	2870	18.1	80	1	ford fairmont
262 | 2	4	140	72	2565	13.6	76	1	ford pinto
263 | 2	4	151	84	2635	16.4	81	1	buick skylark
264 | 2	8	350	105	3725	19	81	1	oldsmobile cutlass ls
265 | 2	6	173	115	2700	12.9	79	1	oldsmobile omega brougham
266 | 2	4	97	88	2130	14.5	70	3	datsun pl510
267 | 2	4	97	88	2130	14.5	71	3	datsun pl510
268 | 2	4	97	60	1834	19	71	2	volkswagen model 111
269 | 2	4	97	88	2100	16.5	72	3	toyota corolla 1600 (sw)
270 | 2	4	101	83	2202	15.3	76	2	renault 12tl
271 | 2	4	112	88	2640	18.6	82	1	chevrolet cavalier wagon
272 | 2	4	151	90	2735	18	82	1	pontiac phoenix
273 | 2	4	151	90	2950	17.3	82	1	chevrolet camaro
274 | 2	4	140	86	2790	15.6	82	1	ford mustang gl
275 | 2	4	119	97	2300	14.7	78	3	datsun 510
276 | 2	4	141	71	3190	24.8	79	2	peugeot 504
277 | 2	4	135	84	2490	15.7	81	1	plymouth reliant
278 | 2	4	121	80	2670	15	79	1	amc spirit dl
279 | 2	4	134	95	2560	14.2	78	3	toyota corona
280 | 2	4	156	105	2800	14.4	80	1	dodge colt
281 | 2	4	140	90	2264	15.5	71	1	chevrolet vega 2300
282 | 2	4	116	90	2123	14	71	2	opel 1900
283 | 2	4	97	92	2288	17	72	3	datsun 510 (sw)
284 | 2	4	98	80	2164	15	72	1	dodge colt (sw)
285 | 2	4	90	75	2125	14.5	74	1	dodge colt
286 | 2	4	107	86	2464	15.5	76	2	fiat 131
287 | 2	4	97	75	2155	16.4	76	3	toyota corolla
288 | 2	4	151	90	2678	16.5	80	1	chevrolet citation
289 | 2	4	112	88	2605	19.6	82	1	chevrolet cavalier
290 | 2	4	120	79	2625	18.6	82	1	ford ranger
291 | 2	4	141	80	3230	20.4	81	2	peugeot 505s turbo diesel
292 | 2	4	151	90	2670	16	79	1	buick skylark limited
293 | 2	6	173	115	2595	11.3	79	1	chevrolet citation
294 | 2	4	68	49	1867	19.5	73	2	fiat 128
295 | 2	4	98	83	2219	16.5	74	2	audi fox
296 | 2	4	97	75	2171	16	75	3	toyota corolla
297 | 2	4	90	70	1937	14	75	2	volkswagen rabbit
298 | 2	4	85	52	2035	22.2	76	1	chevrolet chevette
299 | 2	4	90	70	1937	14.2	76	2	vw rabbit
300 | 2	4	97	78	1940	14.5	77	2	volkswagen rabbit custom
301 | 2	4	135	84	2525	16	82	1	dodge aries se
302 | 2	4	97	71	1825	12.2	76	2	volkswagen rabbit
303 | 2	4	98	68	2135	16.6	78	3	honda accord lx
304 | 2	4	134	90	2711	15.5	80	3	toyota corona liftback
305 | 2	4	89	62	1845	15.3	80	2	vokswagen rabbit
306 | 2	4	98	65	2380	20.7	81	1	ford escort 2h
307 | 2	4	79	70	2074	19.5	71	2	peugeot 304
308 | 2	4	88	76	2065	14.5	71	2	fiat 124b
309 | 2	4	111	80	2155	14.8	77	1	buick opel isuzu deluxe
310 | 2	4	97	67	1985	16.4	77	3	subaru dl
311 | 2	4	98	68	2155	16.5	78	1	chevrolet chevette
312 | 2	4	146	67	3250	21.8	80	2	mercedes-benz 240d
313 | 2	4	135	84	2385	12.9	81	1	plymouth reliant
314 | 2	4	98	63	2051	17	77	1	chevrolet chevette
315 | 2	4	97	78	2190	14.1	77	2	volkswagen dasher
316 | 2	6	145	76	3160	19.6	81	2	volvo diesel
317 | 2	4	105	75	2230	14.5	78	1	dodge omni
318 | 2	4	71	65	1773	19	71	3	toyota corolla 1200
319 | 2	4	79	67	1950	19	74	3	datsun b210
320 | 2	4	76	52	1649	16.5	74	3	toyota corona
321 | 2	4	79	67	2000	16	74	2	fiat x1.9
322 | 2	4	112	85	2575	16.2	82	1	pontiac j2000 se hatchback
323 | 2	4	91	68	1970	17.6	82	3	mazda glc custom
324 | 2	4	119	82	2720	19.4	82	1	chevy s-10
325 | 2	4	120	75	2542	17.5	80	3	mazda 626
326 | 2	4	98	68	2045	18.5	77	3	honda accord cvcc
327 | 2	4	89	71	1990	14.9	78	2	volkswagen scirocco
328 | 2	4	120	74	2635	18.3	81	3	mazda 626
329 | 2	4	85	65	2020	19.2	79	3	datsun 210
330 | 2	4	89	71	1925	14	79	2	vw rabbit custom
331 | 2	4	71	65	1836	21	74	3	toyota corolla 1200
332 | 2	4	83	61	2003	19	74	3	datsun 710
333 | 2	4	85	70	1990	17	76	3	datsun b-210
334 | 2	4	91	67	1965	15.7	82	3	honda civic (auto)
335 | 2	4	144	96	2665	13.9	82	3	toyota celica gt
336 | 2	4	135	84	2295	11.6	82	1	dodge rampage
337 | 2	4	98	70	2120	15.5	80	1	chevrolet chevette
338 | 2	4	108	75	2265	15.2	80	3	toyota corolla
339 | 2	4	97	67	2065	17.8	81	3	subaru
340 | 2	4	107	72	2290	17	80	3	honda accord
341 | 2	4	108	75	2350	16.8	81	3	toyota corolla
342 | 2	6	168	132	2910	11.4	80	3	datsun 280-zx
343 | 2	4	78	52	1985	19.4	78	3	mazda glc deluxe
344 | 2	4	119	100	2615	14.8	81	3	datsun 200sx
345 | 2	4	91	53	1795	17.5	75	3	honda civic cvcc
346 | 2	4	91	53	1795	17.4	76	3	honda civic
347 | 2	4	105	74	2190	14.2	81	2	volkswagen jetta
348 | 2	4	85	70	1945	16.8	77	3	datsun f-10 hatchback
349 | 2	4	98	83	2075	15.9	77	1	dodge colt m/m
350 | 2	4	151	90	2556	13.2	79	1	pontiac phoenix
351 | 2	4	107	75	2210	14.4	81	3	honda prelude
352 | 2	4	97	67	2145	18	80	3	subaru dl
353 | 2	4	112	88	2395	18	82	1	chevrolet cavalier 2-door
354 | 2	4	108	70	2245	16.9	82	3	toyota corolla
355 | 2	4	86	65	1975	15.2	79	3	maxda glc deluxe
356 | 2	4	91	68	1985	16	81	3	mazda glc 4
357 | 2	4	105	70	2200	13.2	79	1	plymouth horizon
358 | 2	4	97	78	2188	15.8	80	2	audi 4000
359 | 2	4	98	65	2045	16.2	81	1	ford escort 4w
360 | 2	4	105	70	2150	14.9	79	1	plymouth horizon tc3
361 | 2	4	100	?	2320	15.8	81	2	renault 18i
362 | 2	4	105	63	2215	14.9	81	1	plymouth horizon 4
363 | 2	4	72	69	1613	18	71	3	datsun 1200
364 | 2	4	122	88	2500	15.1	80	2	triumph tr7 coupe
365 | 2	4	81	60	1760	16.1	81	3	honda civic 1300
366 | 2	4	98	80	1915	14.4	79	1	dodge colt hatchback custom
367 | 2	4	79	58	1825	18.6	77	2	renault 5 gtl
368 | 2	4	105	74	1980	15.3	82	2	volkswagen rabbit l
369 | 2	4	98	70	2125	17.3	82	1	mercury lynx l
370 | 2	4	120	88	2160	14.5	82	3	nissan stanza xe
371 | 2	4	107	75	2205	14.5	82	3	honda accord
372 | 2	4	135	84	2370	13	82	1	dodge charger 2.2
373 | 2	4	98	66	1800	14.4	78	1	ford fiesta
374 | 2	4	91	60	1800	16.4	78	3	honda civic cvcc
375 | 2	5	121	67	2950	19.9	80	2	audi 5000s (diesel)
376 | 2	4	119	92	2434	15	80	3	datsun 510 hatchback
377 | 2	4	85	65	1975	19.4	81	3	datsun 210 mpg
378 | 2	4	91	68	2025	18.2	82	3	mazda glc custom l
379 | 2	4	86	65	2019	16.4	80	3	datsun 310
380 | 2	4	91	69	2130	14.7	79	2	fiat strada custom
381 | 2	4	89	62	2050	17.3	81	3	toyota tercel
382 | 2	4	105	63	2125	14.7	82	1	plymouth horizon miser
383 | 2	4	91	67	1965	15	82	3	honda civic
384 | 2	4	91	67	1995	16.2	82	3	datsun 310 gx
385 | 2	6	262	85	3015	17	82	1	oldsmobile cutlass ciera (diesel)
386 | 2	4	89	60	1968	18.8	80	3	toyota corolla tercel
387 | 2	4	86	64	1875	16.4	81	1	plymouth champ
388 | 2	4	79	58	1755	16.9	81	3	toyota starlet
389 | 2	4	85	70	2070	18.6	78	3	datsun b210 gx
390 | 2	4	85	65	2110	19.2	80	3	datsun 210
391 | 2	4	85	?	1835	17.3	80	2	renault lecar deluxe
392 | 2	4	98	76	2144	14.7	80	2	vw rabbit
393 | 2	4	90	48	1985	21.5	78	2	volkswagen rabbit custom diesel
394 | 2	4	90	48	2335	23.7	80	2	vw dasher (diesel)
395 | 2	4	97	52	2130	24.6	82	2	vw pickup
396 | 2	4	90	48	2085	21.7	80	2	vw rabbit c (diesel)
397 | 2	4	91	67	1850	13.8	80	3	honda civic 1500 gl
398 | 2	4	86	65	2110	17.9	80	3	mazda glc
399 | 


--------------------------------------------------------------------------------
/Lecture 4 - Decision Trees/dt-credit.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
  4 | 
  5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
  6 | 
  7 | This work is licensed under the Creative Commons Attribution 4.0 International
  8 | License. To view a copy of this license, visit
  9 | http://creativecommons.org/licenses/by/4.0/.
 10 | """
 11 | 
 12 | import numpy as np
 13 | import math
 14 | 
 15 | 
 16 | def split(dataset, attribute, value):
 17 |     """
 18 |     Split a dataset in two by value of an attribute
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     dataset    : dataset for the split
 23 |     attribute  : attribute to split on     
 24 |     value      : threshold value for the split
 25 | 
 26 |     Returns
 27 |     -------
 28 |     a tuple containing the two splits   
 29 |     """
 30 |     set_one = dataset[dataset[:, attribute] > value]
 31 |     set_two = dataset[dataset[:, attribute] <= value]
 32 |     return (set_one, set_two)
 33 | 
 34 | 
 35 | def entropy(dataset):
 36 |     """
 37 |     Computes the entropy for a dataset. The entropy is computed as    
 38 | 
 39 |       H = sum_{i} p(x_i) log_2 p(x_i)
 40 | 
 41 |     The sum is taken over all unique values in the set. The
 42 |     probability p(x_i) is computed as  
 43 |     
 44 |     p(x_i) = (frequency of occurrence of x_i) / (size of the dataset)
 45 | 
 46 |     Parameters
 47 |     ----------
 48 |     dataset    : a list of values
 49 | 
 50 |     Returns
 51 |     -------
 52 |     the entropy of the set          
 53 |     """    
 54 |     H = 0
 55 |     
 56 |     for freq in count_distinct(dataset).values():
 57 |         H += (-freq/len(dataset)) * math.log(freq/len(dataset), 2) 
 58 |         
 59 |     return H
 60 | 
 61 | 
 62 | def mode(dataset):
 63 |     """
 64 |     Computes the mode (i.e. most frequent value) of the dataset 
 65 | 
 66 |     Parameters
 67 |     ----------
 68 |     dataset    : a list of values
 69 | 
 70 |     Returns
 71 |     -------
 72 |     the distinct value with highest frequency of occurrence    
 73 |     """    
 74 |     counts = count_distinct(dataset)
 75 |     return max(counts, key=counts.get)
 76 | 
 77 | 
 78 | def count_distinct(dataset):
 79 |     """
 80 |     Gets a list of unique values in a dataset and computes the
 81 |     frequency of occurrence for each unique value.
 82 | 
 83 |     Parameters
 84 |     ----------
 85 |     dataset    : a list of values
 86 | 
 87 |     Returns
 88 |     -------
 89 |     a dictionary of unique values and their respective frequency 
 90 |     of occurrence    
 91 |     """            
 92 |     counts = {}
 93 | 
 94 |     # Loop over all elements of the dataset    
 95 |     for item in dataset:        
 96 |         if (item in counts):
 97 |             # This value is already in the dictionary. 
 98 |             # Increase its count.
 99 |             counts[item] = counts[item] + 1
100 |         else:
101 |             # This is the first occurrence of the word.
102 |             # Add it to the dictionary and set its count to 1
103 |             counts[item] = 1            
104 |     return counts
105 | 
106 | def IG(dataset, attr_index, labels):
107 |     """
108 |     Computes the expected reduction of entropy if the dataset is
109 |     split by a specific attribute.
110 |     
111 |     IG(dataset, attribute) = H(dataset) - H(dataset|attribute)    
112 |     
113 |     Parameters
114 |     ----------
115 |     dataset    : a list of values
116 |     attr_index : index of an attribute to split on
117 |     labels     : class labels for the examples in dataset
118 | 
119 |     Returns
120 |     -------
121 |     IG(dataset, attribute)
122 |     """     
123 |     # Get the dataset distinct values and their respective 
124 |     # frequency of occurrence
125 |     dataset_attributes = count_distinct(dataset[:,attr_index])    
126 | 
127 |     # Start with 0 entropy
128 |     I = 0
129 | 
130 |     # Compute the entropy of the split
131 |     # I(X, A) = \sum_{i=1}^{m} \frac{|X_i|}{|X|} \times H(X_i)
132 |     for key in dataset_attributes.keys():
133 |   
134 |         # Compute the weighted average \frac{|X_i|}{|X|}   
135 |         p = dataset_attributes[key] / sum(dataset_attributes.values())
136 | 
137 |         # Get the class labels for X_i
138 |         subset_labels = labels[dataset[:,attr_index] == key] 
139 | 
140 |         # Add \frac{|X_i|}{|X|} \times H(X_i) to I(X,A)
141 |         I = I + p * entropy(subset_labels)
142 |     
143 |     # Return H(dataset) - I(dataset, A)
144 |     return entropy(dataset[:,attr_index]) - I
145 | 
146 | 
147 | def select_best(dataset, attributes, labels):
148 |     """
149 |     Selects the best attribute to split on based on reduction of entropy.
150 |       
151 |     Parameters
152 |     ----------
153 |     dataset    : a list of values
154 |     attributes : names of the attributes in the dataset
155 |     labels     : class labels for the examples in dataset
156 | 
157 |     Returns
158 |     -------
159 |     The attribute that maximizes the decrease of entropy after
160 |     splitting
161 |     """         
162 |     best_IG = 0
163 |     best_attr = None
164 |     
165 |     # Go over all attributes of the set
166 |     for attr in attributes:
167 |         # Compute the expected Information Gain if we split on
168 |         # that attribute
169 |         gain = IG(dataset, attributes.index(attr), labels)
170 |         # If the gain is higher than what we have so far select that attribute
171 |         if (gain >= best_IG):
172 |             best_IG = gain
173 |             best_attr = attr
174 |         
175 |     # Return the attribute that produces the highest gain
176 |     return best_attr
177 | 
178 | def build_tree(dataset, attributes, labels, default, verbose = False):
179 |     
180 |     if verbose:
181 |         print("*****************")
182 |         print("INPUT ATTRIBUTES:", attributes)
183 |     
184 |     # No data? Return default classification   
185 |     if dataset.size == 0:
186 |         return default
187 |                 
188 |     # All examples have the same classification? Return this label
189 |     if len(set(labels)) <= 1:
190 | 
191 |         if verbose:
192 |             print("SAME CLASS    :", labels[0])
193 |             print("*****************")
194 | 
195 |         return labels[0]
196 | 
197 |     # Attributes empty? Return MODE   
198 |     if len(attributes) <= 1:
199 |         return default
200 | 
201 |     # Choose best attribute
202 |     attr = select_best(dataset, attributes, labels)
203 |     
204 |     if (attr == None):
205 |         if verbose:
206 |             print("NO ATTRIBUTE TO SPLIT ON")
207 |             print("************************")
208 |         return default
209 |     
210 |     if verbose:
211 |         print("SPLITTING ON    :", attr)
212 |         print("*****************")
213 | 
214 | 
215 |     # Get distinct attribute index and values
216 |     attr_index  = attributes.index(attr)
217 |     attr_values = count_distinct(dataset[:,attributes.index(attr)]).keys()
218 | 
219 |     # Remove the selected attribute from the list of remaining attributes
220 |     attributes = [x for x in attributes if x != attr]
221 |         
222 |     # Add a node for that attribute
223 |     tree = {attr:{}}
224 |         
225 |     for v in attr_values:
226 | 
227 |         # Get the indexes of all examples that have value v for the
228 |         # chosen attribute        
229 |         indexes = dataset[:, attr_index] == v
230 |         
231 |         # Get all examples and their respective labels
232 |         subtree_dataset = dataset[indexes]
233 |         subtree_labels  = labels[indexes]    
234 |         
235 |         # Build a subtree using the selected examples
236 |         subtree  = build_tree(subtree_dataset, attributes,
237 |                               subtree_labels, mode(subtree_labels))
238 |     
239 |         # Attach the subtree
240 |         tree[attr][v] = subtree
241 |     
242 |     return tree
243 |        
244 | def predict(tree, attributes, example):
245 |     """
246 |     Traverse a tree to make a prediction.
247 |     
248 |     Parameters
249 |     ----------
250 |     tree       : a dictionary containing a decision tree
251 |     attributes : names of the attributes in the dataset
252 |     example    : example to classify
253 | 
254 |     Returns
255 |     -------
256 |     The class label for this example.
257 |     If the example cannot be classified, this function returns None.
258 |     """
259 |     # Get the attribute at the tree root
260 |     for attr, value in tree.items():
261 |       attr_index = attributes.index(attr)
262 |       try:
263 |         # Get the node that has the same value as in the example
264 |         node = tree[attr][example[attr_index]]
265 |       except KeyError:
266 |         # No such node exists? We can't classify the example then
267 |         return None
268 |       if isinstance(node, dict):
269 |         # Node exists, but it is a subtree. Traverse recursively.
270 |         return predict(node, attributes, example)
271 |       else:
272 |         # Node exists and is a terminal node. Its value is the class label.
273 |         return node         
274 | 
275 | def printTree(tree, attributes, offset = "|->"):
276 |     """
277 |     Prints a decision tree from dictionary.
278 |     
279 |     Parameters
280 |     ----------
281 |     tree       : a dictionary containing a decision tree
282 |     attributes : names of the attributes in the dataset
283 |     """
284 |     for attr, value in tree.items():
285 |       node = tree[attr]
286 |       if isinstance(node, dict):
287 |         print(offset,attr)
288 |         printTree(node, attributes, ("    " + offset))
289 |       else:
290 |         print(offset,attr, "->", value)
291 |       
292 |     
293 | # Load the data set
294 | 
295 | data   = np.array([[0,0,0],[1,0,1],[0,0,0],[0,0,0],[0,1,1],[1,0,0],[0,1,0],[1,1,1],[1,0,0],[1,0,0]])
296 | #data   = np.array([[1,1,1],[2,1,2],[1,1,1],[1,1,1],[1,2,2],[2,1,1],[1,2,1],[2,2,2],[2,1,1],[2,1,1]])
297 | #data   = np.array([[0,0,0],['A',0,'A'],[0,0,0],[0,0,0],['A','A','A'],['A',0,0],[0,'A',0],[0,'A','A'],['A',0,0],['A',0,0]])
298 | labels = np.array([0, 0, 0, 0, 1, 0, 0, 1, 0, 0])
299 | 
300 | #noise = np.random.normal(0, 0.5, len(data))
301 | #data[:,1] += noise.astype(int)
302 | 
303 | # Set attribute names
304 | attributes =   ["<2 YRS JOB", "MISSED PMNTS", "DEFAULTED"]    
305 | class_labels = ["GOOD", "BAD"]
306 | 
307 | # Get the most frequent label
308 | default = mode(labels)
309 |     
310 | tree = build_tree(data, attributes, labels, default)
311 | 
312 | printTree(tree, attributes)
313 | #print(predict(tree, attributes, [1,0,1]))
314 |     


--------------------------------------------------------------------------------
/Lecture 4 - Decision Trees/entropy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
  4 | 
  5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
  6 | 
  7 | This work is licensed under the Creative Commons Attribution 4.0 International
  8 | License. To view a copy of this license, visit
  9 | http://creativecommons.org/licenses/by/4.0/.
 10 | """
 11 | 
 12 | import numpy as np
 13 | import math
 14 | 
 15 | 
 16 | def split(dataset, attribute, value):
 17 |     """
 18 |     Split a dataset in two by value of an attribute
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     dataset    : dataset for the split
 23 |     attribute  : attribute to split on     
 24 |     value      : threshold value for the split
 25 | 
 26 |     Returns
 27 |     -------
 28 |     a tuple containing the two splits   
 29 |     """
 30 |     set_one = dataset[dataset[:, attribute] > value]
 31 |     set_two = dataset[dataset[:, attribute] <= value]
 32 |     return (set_one, set_two)
 33 | 
 34 | 
 35 | def count_distinct(dataset):
 36 |     """
 37 |     Gets a list of unique values in a dataset and computes the
 38 |     frequency of occurrence for each unique value.
 39 | 
 40 |     Parameters
 41 |     ----------
 42 |     dataset    : a list of values
 43 | 
 44 |     Returns
 45 |     -------
 46 |     a dictionary of unique values and their respective frequency 
 47 |     of occurrence    
 48 |     """            
 49 |     counts = {}
 50 | 
 51 |     # Loop over all elements of the dataset    
 52 |     for item in dataset:        
 53 |         if (item in counts):
 54 |             # This value is already in the dictionary. 
 55 |             # Increase its count.
 56 |             counts[item] = counts[item] + 1
 57 |         else:
 58 |             # This is the first occurrence of the word.
 59 |             # Add it to the dictionary and set its count to 1
 60 |             counts[item] = 1            
 61 |     return counts
 62 | 
 63 | 
 64 | def entropy(dataset):
 65 |     """
 66 |     Computes the entropy for a dataset. The entropy is computed as    
 67 | 
 68 |       H = sum_{i} p(x_i) log_2 p(x_i)
 69 | 
 70 |     The sum is taken over all unique values in the set. The
 71 |     probability p(x_i) is computed as  
 72 |     
 73 |     p(x_i) = (frequency of occurrence of x_i) / (size of the dataset)
 74 | 
 75 |     Parameters
 76 |     ----------
 77 |     dataset    : a list of values
 78 | 
 79 |     Returns
 80 |     -------
 81 |     the entropy of the set          
 82 |     """    
 83 |     H = 0
 84 |     
 85 |     for freq in count_distinct(dataset).values():
 86 |         H += (-freq/len(dataset)) * math.log(freq/len(dataset), 2) 
 87 |         
 88 |     return H
 89 | 
 90 |    
 91 | def show_split_entropy(dataset, attr_index, split_value):
 92 |     """
 93 |     Splits a dataset on attribute and prints the frequency of occurrence
 94 |     and the entropy for each split.
 95 | 
 96 |     Parameters
 97 |     ----------
 98 |     dataset     : a list of values
 99 |     attr_index  : index of an attribute to split on
100 |     split_value : threshold value for the split 
101 |          
102 |     """
103 |     # Split the dataset in two subsets
104 |     (x1, x2) = split(dataset,attr_index,split_value)
105 | 
106 |     # Print the frequencies and entropy for the first subset
107 |     print("First split")
108 |     print("**************")
109 |     print("Value counts: ", count_distinct(x1[:,attr_index]))
110 |     print("Entropy: ", entropy(x1[:,attr_index]), "\n")
111 | 
112 |     # Print the frequencies and entropy for the second subset
113 |     print("Second split")
114 |     print("**************")
115 |     print("Value counts: ", count_distinct(x2[:,attr_index]))
116 |     print("Entropy: ", entropy(x2[:,attr_index]))
117 | 
118 | 
119 | # Load the data set
120 | # We use a modified version of the Auto MPG from UCI Machine Learning 
121 | # Repository where the continuous MPG attribute has been converted to
122 | # categorical as follows:
123 | #
124 | # [9;19) - BAD
125 | # (9;26] - OK
126 | # (26;47] - GOOD
127 | #
128 | # The original dataset is available at 
129 | # https://archive.ics.uci.edu/ml/datasets/Auto+MPG
130 | 
131 | car_data = np.genfromtxt("auto-mpg-modified.data", usecols = range(8))
132 | car_data = car_data[~np.isnan(car_data).any(axis = 1)]
133 | 
134 | # Assign MPG to y and all other attributes to x
135 | data   = car_data[:,1:]
136 | labels = car_data[:,0]
137 | 
138 | # Set attribute names
139 | attributes =   ["CYLYNDERS", "DISPLACEMENT", "HORSEPOWER", "WEIGHT", "ACCELERATION", "MODEL_YEAR", "ORIGIN"]
140 | class_labels = ["BAD", "OK", "GOOD"]
141 | 
142 | # Look at the unique values for the MODEL_YEAR attribute
143 | print("Unique values for MODEL_YEAR: ", count_distinct(data[:,5]), "\n")
144 | 
145 | # Split the dataset at the value 75
146 | show_split_entropy(data, 5, 75)
147 | 


--------------------------------------------------------------------------------
/Lecture 4 - Decision Trees/overfit_demo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
 4 | 
 5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
 6 | 
 7 | This work is licensed under the Creative Commons Attribution 4.0 International
 8 | License. To view a copy of this license, visit
 9 | http://creativecommons.org/licenses/by/4.0/.
10 | """
11 | 
12 | import numpy as np
13 | 
14 | from sklearn import tree
15 | 
16 | from sklearn.cross_validation import train_test_split
17 | from sklearn.metrics import accuracy_score
18 | 
19 | # Load the data set
20 | # We use a modified version of the Auto MPG from UCI Machine Learning 
21 | # Repository where the continuous MPG attribute has been converted to
22 | # categorical as follows:
23 | #
24 | # [9;19) - BAD
25 | # (9;26] - OK
26 | # (26;47] - GOOD
27 | #
28 | # The original dataset is available at 
29 | # https://archive.ics.uci.edu/ml/datasets/Auto+MPG
30 | 
31 | car_data = np.genfromtxt("auto-mpg-modified.data", usecols = range(8))
32 | car_data = car_data[~np.isnan(car_data).any(axis = 1)]
33 | 
34 | # Assign MPG to y and all other attributes to x
35 | data   = car_data[:,1:]
36 | labels = car_data[:,0]
37 | 
38 | # Split the data into test/train subsets
39 | x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.1)
40 | 
41 | # Train a constrained Decision tree
42 | dt = tree.DecisionTreeClassifier(criterion='entropy')
43 | dt = dt.fit(x_train, y_train)
44 | pred_train = dt.predict(x_train)
45 | pred_test  = dt.predict(x_test)
46 | print("Prediction on training data :", accuracy_score(y_train, pred_train))
47 | print("Prediction on test data     :", accuracy_score(y_test, pred_test))
48 | 
49 | 


--------------------------------------------------------------------------------
/Lecture 4 - Decision Trees/scikit-dt-auto-mpg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
 4 | 
 5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
 6 | 
 7 | This work is licensed under the Creative Commons Attribution 4.0 International
 8 | License. To view a copy of this license, visit
 9 | http://creativecommons.org/licenses/by/4.0/.
10 | """
11 | 
12 | import numpy as np
13 | import pydotplus 
14 | 
15 | from sklearn import tree
16 | from io import StringIO
17 | 
18 | # Load the data set
19 | # We use a modified version of the Auto MPG from UCI Machine Learning 
20 | # Repository where the continuous MPG attribute has been converted to
21 | # categorical as follows:
22 | #
23 | # [9;19) - BAD
24 | # (9;26] - OK
25 | # (26;47] - GOOD
26 | #
27 | # The original dataset is available at 
28 | # https://archive.ics.uci.edu/ml/datasets/Auto+MPG
29 | 
30 | car_data = np.genfromtxt("auto-mpg-modified.data", usecols = range(8))
31 | car_data = car_data[~np.isnan(car_data).any(axis = 1)]
32 | 
33 | # Assign MPG to y and all other attributes to x
34 | data   = car_data[:,1:]
35 | labels = car_data[:,0]
36 | 
37 | # Uncomment to add some noise to the data
38 | #noise = np.random.normal(0, 10, len(data))
39 | #data[:,5] += noise.astype(int)
40 | 
41 | dt = tree.DecisionTreeClassifier(criterion = "entropy", max_depth=3)
42 | dt = dt.fit(data, labels)
43 | 
44 | attributes =   ["CYLYNDERS", "DISPLACEMENT", "HORSEPOWER", "WEIGHT", "ACCELERATION", "MODEL_YEAR", "ORIGIN"]    
45 | class_labels = ["BAD", "OK", "GOOD"]
46 | 
47 | out = StringIO()
48 | tree.export_graphviz(dt,out_file=out, 
49 |                      feature_names = attributes, 
50 |                      class_names = class_labels, 
51 |                      filled=True,
52 |                      impurity = False) 
53 | 
54 | pydotplus.graph_from_dot_data(out.getvalue()).write_png("dtree.png")
55 | 


--------------------------------------------------------------------------------
/Lecture 5 - Probabilities and Logistic Regression/Probabilities_and_Logistic_Regression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nmanchev/MachineLearningStudyGroup/092f642d888f3dfb105aa8768d4a6927c93a4278/Lecture 5 - Probabilities and Logistic Regression/Probabilities_and_Logistic_Regression.pdf


--------------------------------------------------------------------------------
/Lecture 5 - Probabilities and Logistic Regression/README.md:
--------------------------------------------------------------------------------
 1 | ## Probabilities and Logistic Regression
 2 | 
 3 | Code examples used in Lecture 5
 4 | 
 5 | * auto-mpg.data - The [Auto MPG](https://archive.ics.uci.edu/ml/datasets/Auto+MPG) from UCI Machine Learning Repository 
 6 | * logreg_gradient.py - Binary Logistic Regression with made up data
 7 | * linreg-normal_equations.py - Linear Regression (normal equations) with the made up data
 8 | * logreg-hp-origin.py - Binary Logistic Regression using the Auto MPG dataset (one input varaible)
 9 | * logreg_gradient_2_variables.py - Binary Logistic Regression using the Auto MPG dataset (two input varaibles)
10 | * logreg_gradient_2_variables_iris.py - Binary Logistic Regression using linearly separable classes from the Iris dataset
11 | 
12 | This repository contains materials from the London Machine Learning Study Group Meetups
13 | 
14 | The meetup page is available at [http://www.meetup.com/London-Machine-Learning-Study-Group](http://www.meetup.com/London-Machine-Learning-Study-Group).
15 | 
16 | (C) 2017 Nikolay Manchev, London Machine Learning Study Group
17 | 
18 | This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this license, visit [http://creativecommons.org/licenses/by/4.0](http://creativecommons.org/licenses/by/4.0).
19 | 


--------------------------------------------------------------------------------
/Lecture 5 - Probabilities and Logistic Regression/auto-mpg.data:
--------------------------------------------------------------------------------
  1 | 18.0   8   307.0      130.0      3504.      12.0   70  1	"chevrolet chevelle malibu"
  2 | 15.0   8   350.0      165.0      3693.      11.5   70  1	"buick skylark 320"
  3 | 18.0   8   318.0      150.0      3436.      11.0   70  1	"plymouth satellite"
  4 | 16.0   8   304.0      150.0      3433.      12.0   70  1	"amc rebel sst"
  5 | 17.0   8   302.0      140.0      3449.      10.5   70  1	"ford torino"
  6 | 15.0   8   429.0      198.0      4341.      10.0   70  1	"ford galaxie 500"
  7 | 14.0   8   454.0      220.0      4354.       9.0   70  1	"chevrolet impala"
  8 | 14.0   8   440.0      215.0      4312.       8.5   70  1	"plymouth fury iii"
  9 | 14.0   8   455.0      225.0      4425.      10.0   70  1	"pontiac catalina"
 10 | 15.0   8   390.0      190.0      3850.       8.5   70  1	"amc ambassador dpl"
 11 | 15.0   8   383.0      170.0      3563.      10.0   70  1	"dodge challenger se"
 12 | 14.0   8   340.0      160.0      3609.       8.0   70  1	"plymouth 'cuda 340"
 13 | 15.0   8   400.0      150.0      3761.       9.5   70  1	"chevrolet monte carlo"
 14 | 14.0   8   455.0      225.0      3086.      10.0   70  1	"buick estate wagon (sw)"
 15 | 24.0   4   113.0      95.00      2372.      15.0   70  3	"toyota corona mark ii"
 16 | 22.0   6   198.0      95.00      2833.      15.5   70  1	"plymouth duster"
 17 | 18.0   6   199.0      97.00      2774.      15.5   70  1	"amc hornet"
 18 | 21.0   6   200.0      85.00      2587.      16.0   70  1	"ford maverick"
 19 | 27.0   4   97.00      88.00      2130.      14.5   70  3	"datsun pl510"
 20 | 26.0   4   97.00      46.00      1835.      20.5   70  2	"volkswagen 1131 deluxe sedan"
 21 | 25.0   4   110.0      87.00      2672.      17.5   70  2	"peugeot 504"
 22 | 24.0   4   107.0      90.00      2430.      14.5   70  2	"audi 100 ls"
 23 | 25.0   4   104.0      95.00      2375.      17.5   70  2	"saab 99e"
 24 | 26.0   4   121.0      113.0      2234.      12.5   70  2	"bmw 2002"
 25 | 21.0   6   199.0      90.00      2648.      15.0   70  1	"amc gremlin"
 26 | 10.0   8   360.0      215.0      4615.      14.0   70  1	"ford f250"
 27 | 10.0   8   307.0      200.0      4376.      15.0   70  1	"chevy c20"
 28 | 11.0   8   318.0      210.0      4382.      13.5   70  1	"dodge d200"
 29 | 9.0    8   304.0      193.0      4732.      18.5   70  1	"hi 1200d"
 30 | 27.0   4   97.00      88.00      2130.      14.5   71  3	"datsun pl510"
 31 | 28.0   4   140.0      90.00      2264.      15.5   71  1	"chevrolet vega 2300"
 32 | 25.0   4   113.0      95.00      2228.      14.0   71  3	"toyota corona"
 33 | 25.0   4   98.00      ?          2046.      19.0   71  1	"ford pinto"
 34 | 19.0   6   232.0      100.0      2634.      13.0   71  1	"amc gremlin"
 35 | 16.0   6   225.0      105.0      3439.      15.5   71  1	"plymouth satellite custom"
 36 | 17.0   6   250.0      100.0      3329.      15.5   71  1	"chevrolet chevelle malibu"
 37 | 19.0   6   250.0      88.00      3302.      15.5   71  1	"ford torino 500"
 38 | 18.0   6   232.0      100.0      3288.      15.5   71  1	"amc matador"
 39 | 14.0   8   350.0      165.0      4209.      12.0   71  1	"chevrolet impala"
 40 | 14.0   8   400.0      175.0      4464.      11.5   71  1	"pontiac catalina brougham"
 41 | 14.0   8   351.0      153.0      4154.      13.5   71  1	"ford galaxie 500"
 42 | 14.0   8   318.0      150.0      4096.      13.0   71  1	"plymouth fury iii"
 43 | 12.0   8   383.0      180.0      4955.      11.5   71  1	"dodge monaco (sw)"
 44 | 13.0   8   400.0      170.0      4746.      12.0   71  1	"ford country squire (sw)"
 45 | 13.0   8   400.0      175.0      5140.      12.0   71  1	"pontiac safari (sw)"
 46 | 18.0   6   258.0      110.0      2962.      13.5   71  1	"amc hornet sportabout (sw)"
 47 | 22.0   4   140.0      72.00      2408.      19.0   71  1	"chevrolet vega (sw)"
 48 | 19.0   6   250.0      100.0      3282.      15.0   71  1	"pontiac firebird"
 49 | 18.0   6   250.0      88.00      3139.      14.5   71  1	"ford mustang"
 50 | 23.0   4   122.0      86.00      2220.      14.0   71  1	"mercury capri 2000"
 51 | 28.0   4   116.0      90.00      2123.      14.0   71  2	"opel 1900"
 52 | 30.0   4   79.00      70.00      2074.      19.5   71  2	"peugeot 304"
 53 | 30.0   4   88.00      76.00      2065.      14.5   71  2	"fiat 124b"
 54 | 31.0   4   71.00      65.00      1773.      19.0   71  3	"toyota corolla 1200"
 55 | 35.0   4   72.00      69.00      1613.      18.0   71  3	"datsun 1200"
 56 | 27.0   4   97.00      60.00      1834.      19.0   71  2	"volkswagen model 111"
 57 | 26.0   4   91.00      70.00      1955.      20.5   71  1	"plymouth cricket"
 58 | 24.0   4   113.0      95.00      2278.      15.5   72  3	"toyota corona hardtop"
 59 | 25.0   4   97.50      80.00      2126.      17.0   72  1	"dodge colt hardtop"
 60 | 23.0   4   97.00      54.00      2254.      23.5   72  2	"volkswagen type 3"
 61 | 20.0   4   140.0      90.00      2408.      19.5   72  1	"chevrolet vega"
 62 | 21.0   4   122.0      86.00      2226.      16.5   72  1	"ford pinto runabout"
 63 | 13.0   8   350.0      165.0      4274.      12.0   72  1	"chevrolet impala"
 64 | 14.0   8   400.0      175.0      4385.      12.0   72  1	"pontiac catalina"
 65 | 15.0   8   318.0      150.0      4135.      13.5   72  1	"plymouth fury iii"
 66 | 14.0   8   351.0      153.0      4129.      13.0   72  1	"ford galaxie 500"
 67 | 17.0   8   304.0      150.0      3672.      11.5   72  1	"amc ambassador sst"
 68 | 11.0   8   429.0      208.0      4633.      11.0   72  1	"mercury marquis"
 69 | 13.0   8   350.0      155.0      4502.      13.5   72  1	"buick lesabre custom"
 70 | 12.0   8   350.0      160.0      4456.      13.5   72  1	"oldsmobile delta 88 royale"
 71 | 13.0   8   400.0      190.0      4422.      12.5   72  1	"chrysler newport royal"
 72 | 19.0   3   70.00      97.00      2330.      13.5   72  3	"mazda rx2 coupe"
 73 | 15.0   8   304.0      150.0      3892.      12.5   72  1	"amc matador (sw)"
 74 | 13.0   8   307.0      130.0      4098.      14.0   72  1	"chevrolet chevelle concours (sw)"
 75 | 13.0   8   302.0      140.0      4294.      16.0   72  1	"ford gran torino (sw)"
 76 | 14.0   8   318.0      150.0      4077.      14.0   72  1	"plymouth satellite custom (sw)"
 77 | 18.0   4   121.0      112.0      2933.      14.5   72  2	"volvo 145e (sw)"
 78 | 22.0   4   121.0      76.00      2511.      18.0   72  2	"volkswagen 411 (sw)"
 79 | 21.0   4   120.0      87.00      2979.      19.5   72  2	"peugeot 504 (sw)"
 80 | 26.0   4   96.00      69.00      2189.      18.0   72  2	"renault 12 (sw)"
 81 | 22.0   4   122.0      86.00      2395.      16.0   72  1	"ford pinto (sw)"
 82 | 28.0   4   97.00      92.00      2288.      17.0   72  3	"datsun 510 (sw)"
 83 | 23.0   4   120.0      97.00      2506.      14.5   72  3	"toyouta corona mark ii (sw)"
 84 | 28.0   4   98.00      80.00      2164.      15.0   72  1	"dodge colt (sw)"
 85 | 27.0   4   97.00      88.00      2100.      16.5   72  3	"toyota corolla 1600 (sw)"
 86 | 13.0   8   350.0      175.0      4100.      13.0   73  1	"buick century 350"
 87 | 14.0   8   304.0      150.0      3672.      11.5   73  1	"amc matador"
 88 | 13.0   8   350.0      145.0      3988.      13.0   73  1	"chevrolet malibu"
 89 | 14.0   8   302.0      137.0      4042.      14.5   73  1	"ford gran torino"
 90 | 15.0   8   318.0      150.0      3777.      12.5   73  1	"dodge coronet custom"
 91 | 12.0   8   429.0      198.0      4952.      11.5   73  1	"mercury marquis brougham"
 92 | 13.0   8   400.0      150.0      4464.      12.0   73  1	"chevrolet caprice classic"
 93 | 13.0   8   351.0      158.0      4363.      13.0   73  1	"ford ltd"
 94 | 14.0   8   318.0      150.0      4237.      14.5   73  1	"plymouth fury gran sedan"
 95 | 13.0   8   440.0      215.0      4735.      11.0   73  1	"chrysler new yorker brougham"
 96 | 12.0   8   455.0      225.0      4951.      11.0   73  1	"buick electra 225 custom"
 97 | 13.0   8   360.0      175.0      3821.      11.0   73  1	"amc ambassador brougham"
 98 | 18.0   6   225.0      105.0      3121.      16.5   73  1	"plymouth valiant"
 99 | 16.0   6   250.0      100.0      3278.      18.0   73  1	"chevrolet nova custom"
100 | 18.0   6   232.0      100.0      2945.      16.0   73  1	"amc hornet"
101 | 18.0   6   250.0      88.00      3021.      16.5   73  1	"ford maverick"
102 | 23.0   6   198.0      95.00      2904.      16.0   73  1	"plymouth duster"
103 | 26.0   4   97.00      46.00      1950.      21.0   73  2	"volkswagen super beetle"
104 | 11.0   8   400.0      150.0      4997.      14.0   73  1	"chevrolet impala"
105 | 12.0   8   400.0      167.0      4906.      12.5   73  1	"ford country"
106 | 13.0   8   360.0      170.0      4654.      13.0   73  1	"plymouth custom suburb"
107 | 12.0   8   350.0      180.0      4499.      12.5   73  1	"oldsmobile vista cruiser"
108 | 18.0   6   232.0      100.0      2789.      15.0   73  1	"amc gremlin"
109 | 20.0   4   97.00      88.00      2279.      19.0   73  3	"toyota carina"
110 | 21.0   4   140.0      72.00      2401.      19.5   73  1	"chevrolet vega"
111 | 22.0   4   108.0      94.00      2379.      16.5   73  3	"datsun 610"
112 | 18.0   3   70.00      90.00      2124.      13.5   73  3	"maxda rx3"
113 | 19.0   4   122.0      85.00      2310.      18.5   73  1	"ford pinto"
114 | 21.0   6   155.0      107.0      2472.      14.0   73  1	"mercury capri v6"
115 | 26.0   4   98.00      90.00      2265.      15.5   73  2	"fiat 124 sport coupe"
116 | 15.0   8   350.0      145.0      4082.      13.0   73  1	"chevrolet monte carlo s"
117 | 16.0   8   400.0      230.0      4278.      9.50   73  1	"pontiac grand prix"
118 | 29.0   4   68.00      49.00      1867.      19.5   73  2	"fiat 128"
119 | 24.0   4   116.0      75.00      2158.      15.5   73  2	"opel manta"
120 | 20.0   4   114.0      91.00      2582.      14.0   73  2	"audi 100ls"
121 | 19.0   4   121.0      112.0      2868.      15.5   73  2	"volvo 144ea"
122 | 15.0   8   318.0      150.0      3399.      11.0   73  1	"dodge dart custom"
123 | 24.0   4   121.0      110.0      2660.      14.0   73  2	"saab 99le"
124 | 20.0   6   156.0      122.0      2807.      13.5   73  3	"toyota mark ii"
125 | 11.0   8   350.0      180.0      3664.      11.0   73  1	"oldsmobile omega"
126 | 20.0   6   198.0      95.00      3102.      16.5   74  1	"plymouth duster"
127 | 21.0   6   200.0      ?          2875.      17.0   74  1	"ford maverick"
128 | 19.0   6   232.0      100.0      2901.      16.0   74  1	"amc hornet"
129 | 15.0   6   250.0      100.0      3336.      17.0   74  1	"chevrolet nova"
130 | 31.0   4   79.00      67.00      1950.      19.0   74  3	"datsun b210"
131 | 26.0   4   122.0      80.00      2451.      16.5   74  1	"ford pinto"
132 | 32.0   4   71.00      65.00      1836.      21.0   74  3	"toyota corolla 1200"
133 | 25.0   4   140.0      75.00      2542.      17.0   74  1	"chevrolet vega"
134 | 16.0   6   250.0      100.0      3781.      17.0   74  1	"chevrolet chevelle malibu classic"
135 | 16.0   6   258.0      110.0      3632.      18.0   74  1	"amc matador"
136 | 18.0   6   225.0      105.0      3613.      16.5   74  1	"plymouth satellite sebring"
137 | 16.0   8   302.0      140.0      4141.      14.0   74  1	"ford gran torino"
138 | 13.0   8   350.0      150.0      4699.      14.5   74  1	"buick century luxus (sw)"
139 | 14.0   8   318.0      150.0      4457.      13.5   74  1	"dodge coronet custom (sw)"
140 | 14.0   8   302.0      140.0      4638.      16.0   74  1	"ford gran torino (sw)"
141 | 14.0   8   304.0      150.0      4257.      15.5   74  1	"amc matador (sw)"
142 | 29.0   4   98.00      83.00      2219.      16.5   74  2	"audi fox"
143 | 26.0   4   79.00      67.00      1963.      15.5   74  2	"volkswagen dasher"
144 | 26.0   4   97.00      78.00      2300.      14.5   74  2	"opel manta"
145 | 31.0   4   76.00      52.00      1649.      16.5   74  3	"toyota corona"
146 | 32.0   4   83.00      61.00      2003.      19.0   74  3	"datsun 710"
147 | 28.0   4   90.00      75.00      2125.      14.5   74  1	"dodge colt"
148 | 24.0   4   90.00      75.00      2108.      15.5   74  2	"fiat 128"
149 | 26.0   4   116.0      75.00      2246.      14.0   74  2	"fiat 124 tc"
150 | 24.0   4   120.0      97.00      2489.      15.0   74  3	"honda civic"
151 | 26.0   4   108.0      93.00      2391.      15.5   74  3	"subaru"
152 | 31.0   4   79.00      67.00      2000.      16.0   74  2	"fiat x1.9"
153 | 19.0   6   225.0      95.00      3264.      16.0   75  1	"plymouth valiant custom"
154 | 18.0   6   250.0      105.0      3459.      16.0   75  1	"chevrolet nova"
155 | 15.0   6   250.0      72.00      3432.      21.0   75  1	"mercury monarch"
156 | 15.0   6   250.0      72.00      3158.      19.5   75  1	"ford maverick"
157 | 16.0   8   400.0      170.0      4668.      11.5   75  1	"pontiac catalina"
158 | 15.0   8   350.0      145.0      4440.      14.0   75  1	"chevrolet bel air"
159 | 16.0   8   318.0      150.0      4498.      14.5   75  1	"plymouth grand fury"
160 | 14.0   8   351.0      148.0      4657.      13.5   75  1	"ford ltd"
161 | 17.0   6   231.0      110.0      3907.      21.0   75  1	"buick century"
162 | 16.0   6   250.0      105.0      3897.      18.5   75  1	"chevroelt chevelle malibu"
163 | 15.0   6   258.0      110.0      3730.      19.0   75  1	"amc matador"
164 | 18.0   6   225.0      95.00      3785.      19.0   75  1	"plymouth fury"
165 | 21.0   6   231.0      110.0      3039.      15.0   75  1	"buick skyhawk"
166 | 20.0   8   262.0      110.0      3221.      13.5   75  1	"chevrolet monza 2+2"
167 | 13.0   8   302.0      129.0      3169.      12.0   75  1	"ford mustang ii"
168 | 29.0   4   97.00      75.00      2171.      16.0   75  3	"toyota corolla"
169 | 23.0   4   140.0      83.00      2639.      17.0   75  1	"ford pinto"
170 | 20.0   6   232.0      100.0      2914.      16.0   75  1	"amc gremlin"
171 | 23.0   4   140.0      78.00      2592.      18.5   75  1	"pontiac astro"
172 | 24.0   4   134.0      96.00      2702.      13.5   75  3	"toyota corona"
173 | 25.0   4   90.00      71.00      2223.      16.5   75  2	"volkswagen dasher"
174 | 24.0   4   119.0      97.00      2545.      17.0   75  3	"datsun 710"
175 | 18.0   6   171.0      97.00      2984.      14.5   75  1	"ford pinto"
176 | 29.0   4   90.00      70.00      1937.      14.0   75  2	"volkswagen rabbit"
177 | 19.0   6   232.0      90.00      3211.      17.0   75  1	"amc pacer"
178 | 23.0   4   115.0      95.00      2694.      15.0   75  2	"audi 100ls"
179 | 23.0   4   120.0      88.00      2957.      17.0   75  2	"peugeot 504"
180 | 22.0   4   121.0      98.00      2945.      14.5   75  2	"volvo 244dl"
181 | 25.0   4   121.0      115.0      2671.      13.5   75  2	"saab 99le"
182 | 33.0   4   91.00      53.00      1795.      17.5   75  3	"honda civic cvcc"
183 | 28.0   4   107.0      86.00      2464.      15.5   76  2	"fiat 131"
184 | 25.0   4   116.0      81.00      2220.      16.9   76  2	"opel 1900"
185 | 25.0   4   140.0      92.00      2572.      14.9   76  1	"capri ii"
186 | 26.0   4   98.00      79.00      2255.      17.7   76  1	"dodge colt"
187 | 27.0   4   101.0      83.00      2202.      15.3   76  2	"renault 12tl"
188 | 17.5   8   305.0      140.0      4215.      13.0   76  1	"chevrolet chevelle malibu classic"
189 | 16.0   8   318.0      150.0      4190.      13.0   76  1	"dodge coronet brougham"
190 | 15.5   8   304.0      120.0      3962.      13.9   76  1	"amc matador"
191 | 14.5   8   351.0      152.0      4215.      12.8   76  1	"ford gran torino"
192 | 22.0   6   225.0      100.0      3233.      15.4   76  1	"plymouth valiant"
193 | 22.0   6   250.0      105.0      3353.      14.5   76  1	"chevrolet nova"
194 | 24.0   6   200.0      81.00      3012.      17.6   76  1	"ford maverick"
195 | 22.5   6   232.0      90.00      3085.      17.6   76  1	"amc hornet"
196 | 29.0   4   85.00      52.00      2035.      22.2   76  1	"chevrolet chevette"
197 | 24.5   4   98.00      60.00      2164.      22.1   76  1	"chevrolet woody"
198 | 29.0   4   90.00      70.00      1937.      14.2   76  2	"vw rabbit"
199 | 33.0   4   91.00      53.00      1795.      17.4   76  3	"honda civic"
200 | 20.0   6   225.0      100.0      3651.      17.7   76  1	"dodge aspen se"
201 | 18.0   6   250.0      78.00      3574.      21.0   76  1	"ford granada ghia"
202 | 18.5   6   250.0      110.0      3645.      16.2   76  1	"pontiac ventura sj"
203 | 17.5   6   258.0      95.00      3193.      17.8   76  1	"amc pacer d/l"
204 | 29.5   4   97.00      71.00      1825.      12.2   76  2	"volkswagen rabbit"
205 | 32.0   4   85.00      70.00      1990.      17.0   76  3	"datsun b-210"
206 | 28.0   4   97.00      75.00      2155.      16.4   76  3	"toyota corolla"
207 | 26.5   4   140.0      72.00      2565.      13.6   76  1	"ford pinto"
208 | 20.0   4   130.0      102.0      3150.      15.7   76  2	"volvo 245"
209 | 13.0   8   318.0      150.0      3940.      13.2   76  1	"plymouth volare premier v8"
210 | 19.0   4   120.0      88.00      3270.      21.9   76  2	"peugeot 504"
211 | 19.0   6   156.0      108.0      2930.      15.5   76  3	"toyota mark ii"
212 | 16.5   6   168.0      120.0      3820.      16.7   76  2	"mercedes-benz 280s"
213 | 16.5   8   350.0      180.0      4380.      12.1   76  1	"cadillac seville"
214 | 13.0   8   350.0      145.0      4055.      12.0   76  1	"chevy c10"
215 | 13.0   8   302.0      130.0      3870.      15.0   76  1	"ford f108"
216 | 13.0   8   318.0      150.0      3755.      14.0   76  1	"dodge d100"
217 | 31.5   4   98.00      68.00      2045.      18.5   77  3	"honda accord cvcc"
218 | 30.0   4   111.0      80.00      2155.      14.8   77  1	"buick opel isuzu deluxe"
219 | 36.0   4   79.00      58.00      1825.      18.6   77  2	"renault 5 gtl"
220 | 25.5   4   122.0      96.00      2300.      15.5   77  1	"plymouth arrow gs"
221 | 33.5   4   85.00      70.00      1945.      16.8   77  3	"datsun f-10 hatchback"
222 | 17.5   8   305.0      145.0      3880.      12.5   77  1	"chevrolet caprice classic"
223 | 17.0   8   260.0      110.0      4060.      19.0   77  1	"oldsmobile cutlass supreme"
224 | 15.5   8   318.0      145.0      4140.      13.7   77  1	"dodge monaco brougham"
225 | 15.0   8   302.0      130.0      4295.      14.9   77  1	"mercury cougar brougham"
226 | 17.5   6   250.0      110.0      3520.      16.4   77  1	"chevrolet concours"
227 | 20.5   6   231.0      105.0      3425.      16.9   77  1	"buick skylark"
228 | 19.0   6   225.0      100.0      3630.      17.7   77  1	"plymouth volare custom"
229 | 18.5   6   250.0      98.00      3525.      19.0   77  1	"ford granada"
230 | 16.0   8   400.0      180.0      4220.      11.1   77  1	"pontiac grand prix lj"
231 | 15.5   8   350.0      170.0      4165.      11.4   77  1	"chevrolet monte carlo landau"
232 | 15.5   8   400.0      190.0      4325.      12.2   77  1	"chrysler cordoba"
233 | 16.0   8   351.0      149.0      4335.      14.5   77  1	"ford thunderbird"
234 | 29.0   4   97.00      78.00      1940.      14.5   77  2	"volkswagen rabbit custom"
235 | 24.5   4   151.0      88.00      2740.      16.0   77  1	"pontiac sunbird coupe"
236 | 26.0   4   97.00      75.00      2265.      18.2   77  3	"toyota corolla liftback"
237 | 25.5   4   140.0      89.00      2755.      15.8   77  1	"ford mustang ii 2+2"
238 | 30.5   4   98.00      63.00      2051.      17.0   77  1	"chevrolet chevette"
239 | 33.5   4   98.00      83.00      2075.      15.9   77  1	"dodge colt m/m"
240 | 30.0   4   97.00      67.00      1985.      16.4   77  3	"subaru dl"
241 | 30.5   4   97.00      78.00      2190.      14.1   77  2	"volkswagen dasher"
242 | 22.0   6   146.0      97.00      2815.      14.5   77  3	"datsun 810"
243 | 21.5   4   121.0      110.0      2600.      12.8   77  2	"bmw 320i"
244 | 21.5   3   80.00      110.0      2720.      13.5   77  3	"mazda rx-4"
245 | 43.1   4   90.00      48.00      1985.      21.5   78  2	"volkswagen rabbit custom diesel"
246 | 36.1   4   98.00      66.00      1800.      14.4   78  1	"ford fiesta"
247 | 32.8   4   78.00      52.00      1985.      19.4   78  3	"mazda glc deluxe"
248 | 39.4   4   85.00      70.00      2070.      18.6   78  3	"datsun b210 gx"
249 | 36.1   4   91.00      60.00      1800.      16.4   78  3	"honda civic cvcc"
250 | 19.9   8   260.0      110.0      3365.      15.5   78  1	"oldsmobile cutlass salon brougham"
251 | 19.4   8   318.0      140.0      3735.      13.2   78  1	"dodge diplomat"
252 | 20.2   8   302.0      139.0      3570.      12.8   78  1	"mercury monarch ghia"
253 | 19.2   6   231.0      105.0      3535.      19.2   78  1	"pontiac phoenix lj"
254 | 20.5   6   200.0      95.00      3155.      18.2   78  1	"chevrolet malibu"
255 | 20.2   6   200.0      85.00      2965.      15.8   78  1	"ford fairmont (auto)"
256 | 25.1   4   140.0      88.00      2720.      15.4   78  1	"ford fairmont (man)"
257 | 20.5   6   225.0      100.0      3430.      17.2   78  1	"plymouth volare"
258 | 19.4   6   232.0      90.00      3210.      17.2   78  1	"amc concord"
259 | 20.6   6   231.0      105.0      3380.      15.8   78  1	"buick century special"
260 | 20.8   6   200.0      85.00      3070.      16.7   78  1	"mercury zephyr"
261 | 18.6   6   225.0      110.0      3620.      18.7   78  1	"dodge aspen"
262 | 18.1   6   258.0      120.0      3410.      15.1   78  1	"amc concord d/l"
263 | 19.2   8   305.0      145.0      3425.      13.2   78  1	"chevrolet monte carlo landau"
264 | 17.7   6   231.0      165.0      3445.      13.4   78  1	"buick regal sport coupe (turbo)"
265 | 18.1   8   302.0      139.0      3205.      11.2   78  1	"ford futura"
266 | 17.5   8   318.0      140.0      4080.      13.7   78  1	"dodge magnum xe"
267 | 30.0   4   98.00      68.00      2155.      16.5   78  1	"chevrolet chevette"
268 | 27.5   4   134.0      95.00      2560.      14.2   78  3	"toyota corona"
269 | 27.2   4   119.0      97.00      2300.      14.7   78  3	"datsun 510"
270 | 30.9   4   105.0      75.00      2230.      14.5   78  1	"dodge omni"
271 | 21.1   4   134.0      95.00      2515.      14.8   78  3	"toyota celica gt liftback"
272 | 23.2   4   156.0      105.0      2745.      16.7   78  1	"plymouth sapporo"
273 | 23.8   4   151.0      85.00      2855.      17.6   78  1	"oldsmobile starfire sx"
274 | 23.9   4   119.0      97.00      2405.      14.9   78  3	"datsun 200-sx"
275 | 20.3   5   131.0      103.0      2830.      15.9   78  2	"audi 5000"
276 | 17.0   6   163.0      125.0      3140.      13.6   78  2	"volvo 264gl"
277 | 21.6   4   121.0      115.0      2795.      15.7   78  2	"saab 99gle"
278 | 16.2   6   163.0      133.0      3410.      15.8   78  2	"peugeot 604sl"
279 | 31.5   4   89.00      71.00      1990.      14.9   78  2	"volkswagen scirocco"
280 | 29.5   4   98.00      68.00      2135.      16.6   78  3	"honda accord lx"
281 | 21.5   6   231.0      115.0      3245.      15.4   79  1	"pontiac lemans v6"
282 | 19.8   6   200.0      85.00      2990.      18.2   79  1	"mercury zephyr 6"
283 | 22.3   4   140.0      88.00      2890.      17.3   79  1	"ford fairmont 4"
284 | 20.2   6   232.0      90.00      3265.      18.2   79  1	"amc concord dl 6"
285 | 20.6   6   225.0      110.0      3360.      16.6   79  1	"dodge aspen 6"
286 | 17.0   8   305.0      130.0      3840.      15.4   79  1	"chevrolet caprice classic"
287 | 17.6   8   302.0      129.0      3725.      13.4   79  1	"ford ltd landau"
288 | 16.5   8   351.0      138.0      3955.      13.2   79  1	"mercury grand marquis"
289 | 18.2   8   318.0      135.0      3830.      15.2   79  1	"dodge st. regis"
290 | 16.9   8   350.0      155.0      4360.      14.9   79  1	"buick estate wagon (sw)"
291 | 15.5   8   351.0      142.0      4054.      14.3   79  1	"ford country squire (sw)"
292 | 19.2   8   267.0      125.0      3605.      15.0   79  1	"chevrolet malibu classic (sw)"
293 | 18.5   8   360.0      150.0      3940.      13.0   79  1	"chrysler lebaron town @ country (sw)"
294 | 31.9   4   89.00      71.00      1925.      14.0   79  2	"vw rabbit custom"
295 | 34.1   4   86.00      65.00      1975.      15.2   79  3	"maxda glc deluxe"
296 | 35.7   4   98.00      80.00      1915.      14.4   79  1	"dodge colt hatchback custom"
297 | 27.4   4   121.0      80.00      2670.      15.0   79  1	"amc spirit dl"
298 | 25.4   5   183.0      77.00      3530.      20.1   79  2	"mercedes benz 300d"
299 | 23.0   8   350.0      125.0      3900.      17.4   79  1	"cadillac eldorado"
300 | 27.2   4   141.0      71.00      3190.      24.8   79  2	"peugeot 504"
301 | 23.9   8   260.0      90.00      3420.      22.2   79  1	"oldsmobile cutlass salon brougham"
302 | 34.2   4   105.0      70.00      2200.      13.2   79  1	"plymouth horizon"
303 | 34.5   4   105.0      70.00      2150.      14.9   79  1	"plymouth horizon tc3"
304 | 31.8   4   85.00      65.00      2020.      19.2   79  3	"datsun 210"
305 | 37.3   4   91.00      69.00      2130.      14.7   79  2	"fiat strada custom"
306 | 28.4   4   151.0      90.00      2670.      16.0   79  1	"buick skylark limited"
307 | 28.8   6   173.0      115.0      2595.      11.3   79  1	"chevrolet citation"
308 | 26.8   6   173.0      115.0      2700.      12.9   79  1	"oldsmobile omega brougham"
309 | 33.5   4   151.0      90.00      2556.      13.2   79  1	"pontiac phoenix"
310 | 41.5   4   98.00      76.00      2144.      14.7   80  2	"vw rabbit"
311 | 38.1   4   89.00      60.00      1968.      18.8   80  3	"toyota corolla tercel"
312 | 32.1   4   98.00      70.00      2120.      15.5   80  1	"chevrolet chevette"
313 | 37.2   4   86.00      65.00      2019.      16.4   80  3	"datsun 310"
314 | 28.0   4   151.0      90.00      2678.      16.5   80  1	"chevrolet citation"
315 | 26.4   4   140.0      88.00      2870.      18.1   80  1	"ford fairmont"
316 | 24.3   4   151.0      90.00      3003.      20.1   80  1	"amc concord"
317 | 19.1   6   225.0      90.00      3381.      18.7   80  1	"dodge aspen"
318 | 34.3   4   97.00      78.00      2188.      15.8   80  2	"audi 4000"
319 | 29.8   4   134.0      90.00      2711.      15.5   80  3	"toyota corona liftback"
320 | 31.3   4   120.0      75.00      2542.      17.5   80  3	"mazda 626"
321 | 37.0   4   119.0      92.00      2434.      15.0   80  3	"datsun 510 hatchback"
322 | 32.2   4   108.0      75.00      2265.      15.2   80  3	"toyota corolla"
323 | 46.6   4   86.00      65.00      2110.      17.9   80  3	"mazda glc"
324 | 27.9   4   156.0      105.0      2800.      14.4   80  1	"dodge colt"
325 | 40.8   4   85.00      65.00      2110.      19.2   80  3	"datsun 210"
326 | 44.3   4   90.00      48.00      2085.      21.7   80  2	"vw rabbit c (diesel)"
327 | 43.4   4   90.00      48.00      2335.      23.7   80  2	"vw dasher (diesel)"
328 | 36.4   5   121.0      67.00      2950.      19.9   80  2	"audi 5000s (diesel)"
329 | 30.0   4   146.0      67.00      3250.      21.8   80  2	"mercedes-benz 240d"
330 | 44.6   4   91.00      67.00      1850.      13.8   80  3	"honda civic 1500 gl"
331 | 40.9   4   85.00      ?          1835.      17.3   80  2	"renault lecar deluxe"
332 | 33.8   4   97.00      67.00      2145.      18.0   80  3	"subaru dl"
333 | 29.8   4   89.00      62.00      1845.      15.3   80  2	"vokswagen rabbit"
334 | 32.7   6   168.0      132.0      2910.      11.4   80  3	"datsun 280-zx"
335 | 23.7   3   70.00      100.0      2420.      12.5   80  3	"mazda rx-7 gs"
336 | 35.0   4   122.0      88.00      2500.      15.1   80  2	"triumph tr7 coupe"
337 | 23.6   4   140.0      ?          2905.      14.3   80  1	"ford mustang cobra"
338 | 32.4   4   107.0      72.00      2290.      17.0   80  3	"honda accord"
339 | 27.2   4   135.0      84.00      2490.      15.7   81  1	"plymouth reliant"
340 | 26.6   4   151.0      84.00      2635.      16.4   81  1	"buick skylark"
341 | 25.8   4   156.0      92.00      2620.      14.4   81  1	"dodge aries wagon (sw)"
342 | 23.5   6   173.0      110.0      2725.      12.6   81  1	"chevrolet citation"
343 | 30.0   4   135.0      84.00      2385.      12.9   81  1	"plymouth reliant"
344 | 39.1   4   79.00      58.00      1755.      16.9   81  3	"toyota starlet"
345 | 39.0   4   86.00      64.00      1875.      16.4   81  1	"plymouth champ"
346 | 35.1   4   81.00      60.00      1760.      16.1   81  3	"honda civic 1300"
347 | 32.3   4   97.00      67.00      2065.      17.8   81  3	"subaru"
348 | 37.0   4   85.00      65.00      1975.      19.4   81  3	"datsun 210 mpg"
349 | 37.7   4   89.00      62.00      2050.      17.3   81  3	"toyota tercel"
350 | 34.1   4   91.00      68.00      1985.      16.0   81  3	"mazda glc 4"
351 | 34.7   4   105.0      63.00      2215.      14.9   81  1	"plymouth horizon 4"
352 | 34.4   4   98.00      65.00      2045.      16.2   81  1	"ford escort 4w"
353 | 29.9   4   98.00      65.00      2380.      20.7   81  1	"ford escort 2h"
354 | 33.0   4   105.0      74.00      2190.      14.2   81  2	"volkswagen jetta"
355 | 34.5   4   100.0      ?          2320.      15.8   81  2	"renault 18i"
356 | 33.7   4   107.0      75.00      2210.      14.4   81  3	"honda prelude"
357 | 32.4   4   108.0      75.00      2350.      16.8   81  3	"toyota corolla"
358 | 32.9   4   119.0      100.0      2615.      14.8   81  3	"datsun 200sx"
359 | 31.6   4   120.0      74.00      2635.      18.3   81  3	"mazda 626"
360 | 28.1   4   141.0      80.00      3230.      20.4   81  2	"peugeot 505s turbo diesel"
361 | 30.7   6   145.0      76.00      3160.      19.6   81  2	"volvo diesel"
362 | 25.4   6   168.0      116.0      2900.      12.6   81  3	"toyota cressida"
363 | 24.2   6   146.0      120.0      2930.      13.8   81  3	"datsun 810 maxima"
364 | 22.4   6   231.0      110.0      3415.      15.8   81  1	"buick century"
365 | 26.6   8   350.0      105.0      3725.      19.0   81  1	"oldsmobile cutlass ls"
366 | 20.2   6   200.0      88.00      3060.      17.1   81  1	"ford granada gl"
367 | 17.6   6   225.0      85.00      3465.      16.6   81  1	"chrysler lebaron salon"
368 | 28.0   4   112.0      88.00      2605.      19.6   82  1	"chevrolet cavalier"
369 | 27.0   4   112.0      88.00      2640.      18.6   82  1	"chevrolet cavalier wagon"
370 | 34.0   4   112.0      88.00      2395.      18.0   82  1	"chevrolet cavalier 2-door"
371 | 31.0   4   112.0      85.00      2575.      16.2   82  1	"pontiac j2000 se hatchback"
372 | 29.0   4   135.0      84.00      2525.      16.0   82  1	"dodge aries se"
373 | 27.0   4   151.0      90.00      2735.      18.0   82  1	"pontiac phoenix"
374 | 24.0   4   140.0      92.00      2865.      16.4   82  1	"ford fairmont futura"
375 | 23.0   4   151.0      ?          3035.      20.5   82  1	"amc concord dl"
376 | 36.0   4   105.0      74.00      1980.      15.3   82  2	"volkswagen rabbit l"
377 | 37.0   4   91.00      68.00      2025.      18.2   82  3	"mazda glc custom l"
378 | 31.0   4   91.00      68.00      1970.      17.6   82  3	"mazda glc custom"
379 | 38.0   4   105.0      63.00      2125.      14.7   82  1	"plymouth horizon miser"
380 | 36.0   4   98.00      70.00      2125.      17.3   82  1	"mercury lynx l"
381 | 36.0   4   120.0      88.00      2160.      14.5   82  3	"nissan stanza xe"
382 | 36.0   4   107.0      75.00      2205.      14.5   82  3	"honda accord"
383 | 34.0   4   108.0      70.00      2245       16.9   82  3	"toyota corolla"
384 | 38.0   4   91.00      67.00      1965.      15.0   82  3	"honda civic"
385 | 32.0   4   91.00      67.00      1965.      15.7   82  3	"honda civic (auto)"
386 | 38.0   4   91.00      67.00      1995.      16.2   82  3	"datsun 310 gx"
387 | 25.0   6   181.0      110.0      2945.      16.4   82  1	"buick century limited"
388 | 38.0   6   262.0      85.00      3015.      17.0   82  1	"oldsmobile cutlass ciera (diesel)"
389 | 26.0   4   156.0      92.00      2585.      14.5   82  1	"chrysler lebaron medallion"
390 | 22.0   6   232.0      112.0      2835       14.7   82  1	"ford granada l"
391 | 32.0   4   144.0      96.00      2665.      13.9   82  3	"toyota celica gt"
392 | 36.0   4   135.0      84.00      2370.      13.0   82  1	"dodge charger 2.2"
393 | 27.0   4   151.0      90.00      2950.      17.3   82  1	"chevrolet camaro"
394 | 27.0   4   140.0      86.00      2790.      15.6   82  1	"ford mustang gl"
395 | 44.0   4   97.00      52.00      2130.      24.6   82  2	"vw pickup"
396 | 32.0   4   135.0      84.00      2295.      11.6   82  1	"dodge rampage"
397 | 28.0   4   120.0      79.00      2625.      18.6   82  1	"ford ranger"
398 | 31.0   4   119.0      82.00      2720.      19.4   82  1	"chevy s-10"
399 | 


--------------------------------------------------------------------------------
/Lecture 5 - Probabilities and Logistic Regression/linreg-normal_equations.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | (C) 2017 Nikolay Manchev, London Machine Learning Study Group
 4 | 
 5 | http://www.meetup.com/London-Machine-Learning-Study-Group/
 6 | 
 7 | This work is licensed under the Creative Commons Attribution 4.0 International
 8 | License. To view a copy of this license, visit
 9 | http://creativecommons.org/licenses/by/4.0/.
10 | """
11 | 
12 | import numpy as np
13 | 
14 | import matplotlib.pyplot as plt
15 | 
16 | x = np.array([[1,2,3,4,5,6,7,8,9,10]]).T
17 | y = np.array([[0,0,0,0,0,1,1,1,1,1]]).T
18 | 
19 | # Normalize the inputs
20 | x = (x - np.mean(x)) / np.std(x) 
21 | 
22 | # Add ones for w_0
23 | mat_ones = np.ones(shape=(x.shape[0], 2))
24 | mat_ones[:,1] = x[:,0]
25 | x = mat_ones   
26 | 
27 | # Normal equations method
28 | xTx = np.linalg.inv(x.T.dot(x))
29 | xTy = x.T.dot(y)
30 | w = xTx.dot(xTy)
31 |     
32 | print("Model parameters:\n")
33 | print(w)
34 | 
35 | # Plot X and y
36 | f, ax1 = plt.subplots(1, 1, figsize=(7,7))
37 | ax1.scatter(x[:,1], y)        
38 | 
39 | # Make predictions on the training set
40 | y_hat = w[0] + w[1]*x[:,1]
41 | 
42 | # Plot the regression line
43 | ax1.plot(x[:,1], y_hat, color='r')
44 | ax1.grid(True)


--------------------------------------------------------------------------------
/Lecture 5 - Probabilities and Logistic Regression/logreg-hp-origin.py:
--------------------------------------------------------------------------------
  1 | """
  2 | (C) 2017 Nikolay Manchev
  3 | [London Machine Learning Study Group](http://www.meetup.com/London-Machine-Learning-Study-Group/members/)
  4 | 
  5 | This work is licensed under the Creative Commons Attribution 4.0 International
  6 | License. To view a copy of this license, visit
  7 | http://creativecommons.org/licenses/by/4.0/.
  8 | """
  9 | 
 10 | import numpy as np
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | def y_hat(x, w):
 15 |     """
 16 |     Logistic regression hypothesis: y_hat = 1 / (1 + e^(-x*w))
 17 |     """
 18 |     
 19 |     return (1/(1+np.exp(-x.dot(w))))
 20 | 
 21 | def gradient_ascent(x, y, w, max_iter, alpha = 0.01):
 22 |     """
 23 |     Performs gradient ascent to optimise L(w).
 24 | 
 25 |     Keyword arguments:
 26 |       
 27 |       *x* : Numpy array
 28 |         matrix of independent variables
 29 | 
 30 |       *y* : Numpy array
 31 |         columnar vector of target values
 32 | 
 33 |       *w* : Numpy array
 34 |         initial model parameters
 35 | 
 36 |       *max_iter* : int
 37 |         maximum number of iterations
 38 | 
 39 |       *alpha* : int, optional
 40 |         learning rate (defaults to 0.01)
 41 |         
 42 |     Returns: 
 43 | 
 44 |       *L_hist* : Numpy array
 45 |         values of L(w) at each iteration
 46 | 
 47 |       *w* : Numpy array
 48 |         estimated model parameters
 49 |     """
 50 |     
 51 |     L_hist = np.zeros(max_iter)
 52 | 
 53 |     print("\nGradient ascent starts.\n")
 54 | 
 55 |     for i in range(0, max_iter):
 56 |         
 57 |         # Likelihood function
 58 |         L = np.sum(y.T.dot(np.log(y_hat(x, w))) + (1-y.T).dot(np.log(1-y_hat(x, w))))
 59 | 
 60 |         # Keep L(w) for each iteration (for the final plot)
 61 |         L_hist[i] = L
 62 |         
 63 |         print("Iteration %d, L(w): %f\n" % (i, L))
 64 |         
 65 |         # Compute the gradient and adjust the model parameters
 66 |         gradient = np.dot(x.T, y - y_hat(x, w) )   
 67 |         
 68 |         w = w + alpha * gradient
 69 | 
 70 |     print("Gradient ascent finished.\n")
 71 |         
 72 |     return (L_hist, w)
 73 | 
 74 | 
 75 | # Load the data set
 76 | # We use Auto MPG from UCI Machine Learning Repository
 77 | # https://archive.ics.uci.edu/ml/datasets/Auto+MPG
 78 | 
 79 | car_data = np.genfromtxt("auto-mpg.data", usecols=(3, 7))
 80 | car_data = car_data[~np.isnan(car_data).any(axis=1)]
 81 | 
 82 | # Remove the data for Japan and recode US and Europe as 0 and 1
 83 | car_data = car_data[car_data[:,1]!=3]
 84 | car_data[:,1][car_data[:,1] == 1] = 0 
 85 | car_data[:,1][car_data[:,1] == 2] = 1
 86 | 
 87 | # Assign Horsepower attribute to x and Origin to y
 88 | x = car_data[:,0]
 89 | y = car_data[:,1]
 90 | 
 91 | x = np.array([x]).T
 92 | y = np.array([y]).T
 93 |     
 94 | # Normalize the inputs
 95 | hp_mean = np.mean(x)
 96 | hp_std = np.std(x)
 97 | x = (x - hp_mean) / hp_std
 98 | 
 99 | # Initialise w with ones      
100 | m,n=np.shape(x)
101 | w = np.array([np.ones(n)]).T    
102 | 
103 | # Perform gradient ascent
104 | (l_hist, w) = gradient_ascent(x, y, w, 10)
105 | 
106 | print("Model parameters:\n")
107 | print(w)
108 | 
109 | # Plot X and y
110 | f, (ax1,ax2) = plt.subplots(1, 2, figsize=(7,7))
111 | ax1.scatter(x, y)        
112 | 
113 | # Plot the decision boundary
114 | x = np.arange(-5, 5, 1)[np.newaxis].T
115 | ax1.plot(x, y_hat(x, w), color='r')
116 | ax1.grid(True)
117 | 
118 | # Plot the change of L(w)
119 | x = np.arange(1,l_hist.size + 1)
120 | y = l_hist
121 | 
122 | ax2.plot(x, l_hist)
123 | ax2.grid(True)
124 | 
125 | # To make predictions use
126 | # y_hat(np.array([(...hp_input...-hp_mean)/(hp_std)]), w)


--------------------------------------------------------------------------------
/Lecture 5 - Probabilities and Logistic Regression/logreg_gradient.py:
--------------------------------------------------------------------------------
  1 | """
  2 | (C) 2017 Nikolay Manchev
  3 | [London Machine Learning Study Group](http://www.meetup.com/London-Machine-Learning-Study-Group/members/)
  4 | 
  5 | This work is licensed under the Creative Commons Attribution 4.0 International
  6 | License. To view a copy of this license, visit
  7 | http://creativecommons.org/licenses/by/4.0/.
  8 | """
  9 | 
 10 | import numpy as np
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | def y_hat(x, w):
 15 |     """
 16 |     Logistic regression hypothesis: y_hat = 1 / (1 + e^(-x*w))
 17 |     """
 18 |     
 19 |     return (1/(1+np.exp(-x.dot(w))))
 20 | 
 21 | def gradient_ascent(x, y, w, max_iter, alpha = 0.01):
 22 |     """
 23 |     Performs gradient ascent to optimise L(w).
 24 | 
 25 |     Keyword arguments:
 26 |       
 27 |       *x* : Numpy array
 28 |         matrix of independent variables
 29 | 
 30 |       *y* : Numpy array
 31 |         columnar vector of target values
 32 | 
 33 |       *w* : Numpy array
 34 |         initial model parameters
 35 | 
 36 |       *max_iter* : int
 37 |         maximum number of iterations
 38 | 
 39 |       *alpha* : int, optional
 40 |         learning rate (defaults to 0.01)
 41 |         
 42 |     Returns: 
 43 | 
 44 |       *L_hist* : Numpy array
 45 |         values of L(w) at each iteration
 46 | 
 47 |       *w* : Numpy array
 48 |         estimated model parameters
 49 |     """
 50 |     
 51 |     L_hist = np.zeros(max_iter)
 52 | 
 53 |     print("\nGradient ascent starts.\n")
 54 | 
 55 |     for i in range(0, max_iter):
 56 |         
 57 |         # Likelihood function
 58 |         L = np.sum(y.T.dot(np.log(y_hat(x, w))) + (1-y.T).dot(np.log(1-y_hat(x, w))))
 59 | 
 60 |         # Keep L(w) for each iteration (for the final plot)
 61 |         L_hist[i] = L
 62 |         
 63 |         print("Iteration %d, L(w): %f\n" % (i, L))
 64 |         
 65 |         # Compute the gradient and adjust the model parameters
 66 |         gradient = np.dot(x.T, y - y_hat(x, w) ) 
 67 |         
 68 |         w = w + alpha * gradient
 69 | 
 70 |     print("Gradient ascent finished.\n")
 71 |         
 72 |     return (L_hist, w)
 73 | 
 74 | 
 75 | # Load the data set
 76 | x = np.array([[1,2,3,4,5,6,7,8,9,10]]).T
 77 | y = np.array([[0,0,0,0,0,1,1,1,1,1]]).T
 78 | 
 79 | # Normalize the inputs
 80 | x = (x - np.mean(x)) / np.std(x) 
 81 | 
 82 | # Initialise w with ones
 83 | m,n=np.shape(x)
 84 | w = np.array([np.ones(n)]).T    
 85 | 
 86 | # Perform gradient ascent for 25 iterations
 87 | (l_hist, w) = gradient_ascent(x, y, w, 25)
 88 | 
 89 | print("Model parameters:\n")
 90 | print(w)
 91 | 
 92 | # Plot X and y
 93 | f, (ax1,ax2) = plt.subplots(1, 2, figsize=(7,7))
 94 | ax1.scatter(x, y)        
 95 | 
 96 | # Plot the decision boundary
 97 | ax1.plot(x, y_hat(x, w), color='r')
 98 | ax1.grid(True)
 99 | 
100 | # Plot the change of L(w)
101 | x = np.arange(1,l_hist.size + 1)
102 | y = l_hist
103 | 
104 | ax2.plot(x, l_hist)
105 | ax2.grid(True)       
106 | 
107 | 


--------------------------------------------------------------------------------
/Lecture 5 - Probabilities and Logistic Regression/logreg_gradient_2_variables.py:
--------------------------------------------------------------------------------
  1 | """
  2 | (C) 2017 Nikolay Manchev
  3 | [London Machine Learning Study Group](http://www.meetup.com/London-Machine-Learning-Study-Group/members/)
  4 | 
  5 | This work is licensed under the Creative Commons Attribution 4.0 International
  6 | License. To view a copy of this license, visit
  7 | http://creativecommons.org/licenses/by/4.0/.
  8 | """
  9 | 
 10 | import numpy as np
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | def y_hat(x, w):
 15 |     """
 16 |     Logistic regression hypothesis: y_hat = 1 / (1 + e^(-x*w))
 17 |     """
 18 |     
 19 |     return (1/(1+np.exp(-x.dot(w))))
 20 | 
 21 | def gradient_ascent(x, y, w, max_iter, alpha = 0.01):
 22 |     """
 23 |     Performs gradient ascent to optimise L(w).
 24 | 
 25 |     Keyword arguments:
 26 |       
 27 |       *x* : Numpy array
 28 |         matrix of independent variables
 29 | 
 30 |       *y* : Numpy array
 31 |         columnar vector of target values
 32 | 
 33 |       *w* : Numpy array
 34 |         initial model parameters
 35 | 
 36 |       *max_iter* : int
 37 |         maximum number of iterations
 38 | 
 39 |       *alpha* : int, optional
 40 |         learning rate (defaults to 0.01)
 41 |         
 42 |     Returns: 
 43 | 
 44 |       *L_hist* : Numpy array
 45 |         values of L(w) at each iteration
 46 | 
 47 |       *w* : Numpy array
 48 |         estimated model parameters
 49 |     """
 50 |     
 51 |     L_hist = np.zeros(max_iter)
 52 | 
 53 |     print("\nGradient ascent starts.\n")
 54 | 
 55 |     for i in range(0, max_iter):
 56 |         
 57 |         # Likelihood function
 58 |         L = np.sum(y.T.dot(np.log(y_hat(x, w))) + (1-y.T).dot(np.log(1-y_hat(x, w))))
 59 | 
 60 |         # Keep L(w) for each iteration (for the final plot)
 61 |         L_hist[i] = L
 62 |         
 63 |         print("Iteration %d, L(w): %f\n" % (i, L))
 64 |         
 65 |         # Compute the gradient and adjust the model parameters
 66 |         gradient = np.dot(x.T, y - y_hat(x, w)) 
 67 |         
 68 |         w = w + alpha * gradient
 69 | 
 70 |     print("Gradient ascent finished.\n")
 71 |         
 72 |     return (L_hist, w)
 73 | 
 74 | 
 75 | # Load the data set
 76 | # We use Auto MPG from UCI Machine Learning Repository
 77 | # https://archive.ics.uci.edu/ml/datasets/Auto+MPG
 78 | 
 79 | car_data = np.genfromtxt("auto-mpg.data", usecols=(4, 3, 7))
 80 | car_data = car_data[~np.isnan(car_data).any(axis=1)]
 81 | 
 82 | # Remove the data for Japan and recode US and Europe as 0 and 1
 83 | car_data = car_data[car_data[:,2]!=3]
 84 | car_data[:,1][car_data[:,1] == 1] = 0
 85 | car_data[:,1][car_data[:,1] == 2] = 1
 86 | 
 87 | # Assign Horsepower attribute to x and Origin to y
 88 | x = car_data[:,[0,1]]
 89 | y = car_data[:,2]
 90 |  
 91 | y = np.array([y]).T
 92 |     
 93 | # Normalize the inputs
 94 | weight_mean = np.mean(x[:,0])
 95 | weight_std  = np.std(x[:,0])
 96 | hp_mean = np.mean(x[:,1])
 97 | hp_std  = np.std(x[:,1])
 98 | x[:,0] = (x[:,0] - weight_mean) / weight_std 
 99 | x[:,1] = (x[:,1] - hp_mean) / hp_std
100 | 
101 | # Initialise w with ones      
102 | m,n=np.shape(x)
103 | w = np.array([np.ones(n)]).T    
104 | 
105 | # Perform gradient ascent
106 | (l_hist, w) = gradient_ascent(x, y, w, 50)
107 | 
108 | print("Model parameters:\n")
109 | print(w)
110 | 
111 | # Plot the data points and a gradient for the probability
112 | # given by y_hat()
113 | f, ax1 = plt.subplots(1, 1, figsize=(7,7))
114 | 
115 | x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
116 | y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
117 | 
118 | xx, yy = np.meshgrid(np.arange(x_min, x_max, .1),
119 |                      np.arange(y_min, y_max, .1))
120 | 
121 | Z = y_hat(np.c_[xx.ravel(), yy.ravel()], w) 
122 | Z = Z.reshape(xx.shape)
123 | 
124 | ax1.contourf(xx, yy, Z, cmap=plt.cm.Blues)
125 | ax1.scatter(x[:,0], x[:,1], c=y, cmap=plt.cm.bwr)        
126 | 
127 | # To make predictions use
128 | # y_hat(np.array([((...weight_input...)-weight_mean)/weight_std,((...hp_input...)-hp_mean)/hp_std]), w)
129 | 


--------------------------------------------------------------------------------
/Lecture 5 - Probabilities and Logistic Regression/logreg_gradient_2_variables_iris.py:
--------------------------------------------------------------------------------
  1 | """
  2 | (C) 2017 Nikolay Manchev
  3 | [London Machine Learning Study Group](http://www.meetup.com/London-Machine-Learning-Study-Group/members/)
  4 | 
  5 | This work is licensed under the Creative Commons Attribution 4.0 International
  6 | License. To view a copy of this license, visit
  7 | http://creativecommons.org/licenses/by/4.0/.
  8 | """
  9 | 
 10 | import numpy as np
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | from sklearn import datasets
 15 | 
 16 | 
 17 | def y_hat(x, w):
 18 |     """
 19 |     Logistic regression hypothesis: y_hat = 1 / (1 + e^(-x*w))
 20 |     """
 21 |     
 22 |     return (1/(1+np.exp(-x.dot(w))))
 23 | 
 24 | def gradient_ascent(x, y, w, max_iter, alpha = 0.01):
 25 |     """
 26 |     Performs gradient ascent to optimise L(w).
 27 | 
 28 |     Keyword arguments:
 29 |       
 30 |       *x* : Numpy array
 31 |         matrix of independent variables
 32 | 
 33 |       *y* : Numpy array
 34 |         columnar vector of target values
 35 | 
 36 |       *w* : Numpy array
 37 |         initial model parameters
 38 | 
 39 |       *max_iter* : int
 40 |         maximum number of iterations
 41 | 
 42 |       *alpha* : int, optional
 43 |         learning rate (defaults to 0.01)
 44 |         
 45 |     Returns: 
 46 | 
 47 |       *L_hist* : Numpy array
 48 |         values of L(w) at each iteration
 49 | 
 50 |       *w* : Numpy array
 51 |         estimated model parameters
 52 |     """
 53 |         
 54 |     L_hist = np.zeros(max_iter)
 55 | 
 56 |     print("\nGradient ascent starts.\n")
 57 | 
 58 |     for i in range(0, max_iter):
 59 |         
 60 |         # Likelihood function
 61 |         L = np.sum(y.T.dot(np.log(y_hat(x, w))) + (1-y.T).dot(np.log(1-y_hat(x, w))))
 62 | 
 63 |         # Keep L(w) for each iteration (for the final plot)
 64 |         L_hist[i] = L
 65 |         
 66 |         print("Iteration %d, L(w): %f\n" % (i, L))
 67 |         
 68 |         # Compute the gradient and adjust the model parameters
 69 |         gradient = np.dot(x.T, y - y_hat(x, w))    
 70 |         
 71 |         w = w + alpha * gradient
 72 | 
 73 |     print("Gradient ascent finished.\n")
 74 |         
 75 |     return (L_hist, w)
 76 | 
 77 | # Load the IRIS dataset
 78 | iris = datasets.load_iris()
 79 | x = iris.data[:99, :2]  # we only take the first two features.
 80 | y = iris.target[:99]    # assign the class variable to y
 81 |  
 82 | y = np.array([y]).T
 83 |     
 84 | # Normalize the inputs
 85 | x[:,0] = (x[:,0] - np.mean(x[:,0])) / np.std(x[:,0]) 
 86 | x[:,1] = (x[:,1] - np.mean(x[:,1])) / np.std(x[:,1]) 
 87 | 
 88 | # Initialise w with ones      
 89 | m,n=np.shape(x)
 90 | w = np.array([np.ones(n)]).T    
 91 | 
 92 | # Perform gradient ascent
 93 | (l_hist, w) = gradient_ascent(x, y, w, 25)
 94 | 
 95 | print("Model parameters:\n")
 96 | print(w)
 97 | 
 98 | # Plot the classes and the probability given by y_hat()
 99 | f, ax1 = plt.subplots(1, 1, figsize=(7,7))
100 | 
101 | x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
102 | y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
103 | 
104 | xx, yy = np.meshgrid(np.arange(x_min, x_max, .1),
105 |                      np.arange(y_min, y_max, .1))
106 | 
107 | Z = y_hat(np.c_[xx.ravel(), yy.ravel()], w) 
108 | Z = Z.reshape(xx.shape)
109 | 
110 | ax1.contourf(xx, yy, Z, cmap=plt.cm.Blues)
111 | ax1.scatter(x[:,0], x[:,1], c=y, cmap=plt.cm.bwr)        
112 | 
113 | 


--------------------------------------------------------------------------------
/Lecture 6 - Naive Bayes/Naive_Bayes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nmanchev/MachineLearningStudyGroup/092f642d888f3dfb105aa8768d4a6927c93a4278/Lecture 6 - Naive Bayes/Naive_Bayes.pdf


--------------------------------------------------------------------------------
/Lecture 6 - Naive Bayes/README.md:
--------------------------------------------------------------------------------
 1 | ## Naive Bayes
 2 | 
 3 | Code examples used in Lecture 6
 4 | 
 5 | * auto-mpg.data - The [Auto MPG](https://archive.ics.uci.edu/ml/datasets/Auto+MPG) from UCI Machine Learning Repository 
 6 | * gender_height_weight.csv - Data from [National Longitudinal Youth Survey](http://www.bls.gov/nls/nlsy97.htm), Bureau of Labor Statistics, United States Department of Labor
 7 | * naive_bayes_mf.py - Naive Bayes classification using the National Longitudinal Youth Survey
 8 | * naive_bayes_autompg.py - Naive Bayes classification using the Auto MPG dataset (three target classes)
 9 | 
10 | This repository contains materials from the London Machine Learning Study Group Meetups
11 | 
12 | The meetup page is available at [http://www.meetup.com/London-Machine-Learning-Study-Group](http://www.meetup.com/London-Machine-Learning-Study-Group).
13 | 
14 | (C) 2017 Nikolay Manchev, London Machine Learning Study Group
15 | 
16 | This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this license, visit [http://creativecommons.org/licenses/by/4.0](http://creativecommons.org/licenses/by/4.0).
17 | 


--------------------------------------------------------------------------------
/Lecture 6 - Naive Bayes/auto-mpg.data:
--------------------------------------------------------------------------------
  1 | 18.0   8   307.0      130.0      3504.      12.0   70  1	"chevrolet chevelle malibu"
  2 | 15.0   8   350.0      165.0      3693.      11.5   70  1	"buick skylark 320"
  3 | 18.0   8   318.0      150.0      3436.      11.0   70  1	"plymouth satellite"
  4 | 16.0   8   304.0      150.0      3433.      12.0   70  1	"amc rebel sst"
  5 | 17.0   8   302.0      140.0      3449.      10.5   70  1	"ford torino"
  6 | 15.0   8   429.0      198.0      4341.      10.0   70  1	"ford galaxie 500"
  7 | 14.0   8   454.0      220.0      4354.       9.0   70  1	"chevrolet impala"
  8 | 14.0   8   440.0      215.0      4312.       8.5   70  1	"plymouth fury iii"
  9 | 14.0   8   455.0      225.0      4425.      10.0   70  1	"pontiac catalina"
 10 | 15.0   8   390.0      190.0      3850.       8.5   70  1	"amc ambassador dpl"
 11 | 15.0   8   383.0      170.0      3563.      10.0   70  1	"dodge challenger se"
 12 | 14.0   8   340.0      160.0      3609.       8.0   70  1	"plymouth 'cuda 340"
 13 | 15.0   8   400.0      150.0      3761.       9.5   70  1	"chevrolet monte carlo"
 14 | 14.0   8   455.0      225.0      3086.      10.0   70  1	"buick estate wagon (sw)"
 15 | 24.0   4   113.0      95.00      2372.      15.0   70  3	"toyota corona mark ii"
 16 | 22.0   6   198.0      95.00      2833.      15.5   70  1	"plymouth duster"
 17 | 18.0   6   199.0      97.00      2774.      15.5   70  1	"amc hornet"
 18 | 21.0   6   200.0      85.00      2587.      16.0   70  1	"ford maverick"
 19 | 27.0   4   97.00      88.00      2130.      14.5   70  3	"datsun pl510"
 20 | 26.0   4   97.00      46.00      1835.      20.5   70  2	"volkswagen 1131 deluxe sedan"
 21 | 25.0   4   110.0      87.00      2672.      17.5   70  2	"peugeot 504"
 22 | 24.0   4   107.0      90.00      2430.      14.5   70  2	"audi 100 ls"
 23 | 25.0   4   104.0      95.00      2375.      17.5   70  2	"saab 99e"
 24 | 26.0   4   121.0      113.0      2234.      12.5   70  2	"bmw 2002"
 25 | 21.0   6   199.0      90.00      2648.      15.0   70  1	"amc gremlin"
 26 | 10.0   8   360.0      215.0      4615.      14.0   70  1	"ford f250"
 27 | 10.0   8   307.0      200.0      4376.      15.0   70  1	"chevy c20"
 28 | 11.0   8   318.0      210.0      4382.      13.5   70  1	"dodge d200"
 29 | 9.0    8   304.0      193.0      4732.      18.5   70  1	"hi 1200d"
 30 | 27.0   4   97.00      88.00      2130.      14.5   71  3	"datsun pl510"
 31 | 28.0   4   140.0      90.00      2264.      15.5   71  1	"chevrolet vega 2300"
 32 | 25.0   4   113.0      95.00      2228.      14.0   71  3	"toyota corona"
 33 | 25.0   4   98.00      ?          2046.      19.0   71  1	"ford pinto"
 34 | 19.0   6   232.0      100.0      2634.      13.0   71  1	"amc gremlin"
 35 | 16.0   6   225.0      105.0      3439.      15.5   71  1	"plymouth satellite custom"
 36 | 17.0   6   250.0      100.0      3329.      15.5   71  1	"chevrolet chevelle malibu"
 37 | 19.0   6   250.0      88.00      3302.      15.5   71  1	"ford torino 500"
 38 | 18.0   6   232.0      100.0      3288.      15.5   71  1	"amc matador"
 39 | 14.0   8   350.0      165.0      4209.      12.0   71  1	"chevrolet impala"
 40 | 14.0   8   400.0      175.0      4464.      11.5   71  1	"pontiac catalina brougham"
 41 | 14.0   8   351.0      153.0      4154.      13.5   71  1	"ford galaxie 500"
 42 | 14.0   8   318.0      150.0      4096.      13.0   71  1	"plymouth fury iii"
 43 | 12.0   8   383.0      180.0      4955.      11.5   71  1	"dodge monaco (sw)"
 44 | 13.0   8   400.0      170.0      4746.      12.0   71  1	"ford country squire (sw)"
 45 | 13.0   8   400.0      175.0      5140.      12.0   71  1	"pontiac safari (sw)"
 46 | 18.0   6   258.0      110.0      2962.      13.5   71  1	"amc hornet sportabout (sw)"
 47 | 22.0   4   140.0      72.00      2408.      19.0   71  1	"chevrolet vega (sw)"
 48 | 19.0   6   250.0      100.0      3282.      15.0   71  1	"pontiac firebird"
 49 | 18.0   6   250.0      88.00      3139.      14.5   71  1	"ford mustang"
 50 | 23.0   4   122.0      86.00      2220.      14.0   71  1	"mercury capri 2000"
 51 | 28.0   4   116.0      90.00      2123.      14.0   71  2	"opel 1900"
 52 | 30.0   4   79.00      70.00      2074.      19.5   71  2	"peugeot 304"
 53 | 30.0   4   88.00      76.00      2065.      14.5   71  2	"fiat 124b"
 54 | 31.0   4   71.00      65.00      1773.      19.0   71  3	"toyota corolla 1200"
 55 | 35.0   4   72.00      69.00      1613.      18.0   71  3	"datsun 1200"
 56 | 27.0   4   97.00      60.00      1834.      19.0   71  2	"volkswagen model 111"
 57 | 26.0   4   91.00      70.00      1955.      20.5   71  1	"plymouth cricket"
 58 | 24.0   4   113.0      95.00      2278.      15.5   72  3	"toyota corona hardtop"
 59 | 25.0   4   97.50      80.00      2126.      17.0   72  1	"dodge colt hardtop"
 60 | 23.0   4   97.00      54.00      2254.      23.5   72  2	"volkswagen type 3"
 61 | 20.0   4   140.0      90.00      2408.      19.5   72  1	"chevrolet vega"
 62 | 21.0   4   122.0      86.00      2226.      16.5   72  1	"ford pinto runabout"
 63 | 13.0   8   350.0      165.0      4274.      12.0   72  1	"chevrolet impala"
 64 | 14.0   8   400.0      175.0      4385.      12.0   72  1	"pontiac catalina"
 65 | 15.0   8   318.0      150.0      4135.      13.5   72  1	"plymouth fury iii"
 66 | 14.0   8   351.0      153.0      4129.      13.0   72  1	"ford galaxie 500"
 67 | 17.0   8   304.0      150.0      3672.      11.5   72  1	"amc ambassador sst"
 68 | 11.0   8   429.0      208.0      4633.      11.0   72  1	"mercury marquis"
 69 | 13.0   8   350.0      155.0      4502.      13.5   72  1	"buick lesabre custom"
 70 | 12.0   8   350.0      160.0      4456.      13.5   72  1	"oldsmobile delta 88 royale"
 71 | 13.0   8   400.0      190.0      4422.      12.5   72  1	"chrysler newport royal"
 72 | 19.0   3   70.00      97.00      2330.      13.5   72  3	"mazda rx2 coupe"
 73 | 15.0   8   304.0      150.0      3892.      12.5   72  1	"amc matador (sw)"
 74 | 13.0   8   307.0      130.0      4098.      14.0   72  1	"chevrolet chevelle concours (sw)"
 75 | 13.0   8   302.0      140.0      4294.      16.0   72  1	"ford gran torino (sw)"
 76 | 14.0   8   318.0      150.0      4077.      14.0   72  1	"plymouth satellite custom (sw)"
 77 | 18.0   4   121.0      112.0      2933.      14.5   72  2	"volvo 145e (sw)"
 78 | 22.0   4   121.0      76.00      2511.      18.0   72  2	"volkswagen 411 (sw)"
 79 | 21.0   4   120.0      87.00      2979.      19.5   72  2	"peugeot 504 (sw)"
 80 | 26.0   4   96.00      69.00      2189.      18.0   72  2	"renault 12 (sw)"
 81 | 22.0   4   122.0      86.00      2395.      16.0   72  1	"ford pinto (sw)"
 82 | 28.0   4   97.00      92.00      2288.      17.0   72  3	"datsun 510 (sw)"
 83 | 23.0   4   120.0      97.00      2506.      14.5   72  3	"toyouta corona mark ii (sw)"
 84 | 28.0   4   98.00      80.00      2164.      15.0   72  1	"dodge colt (sw)"
 85 | 27.0   4   97.00      88.00      2100.      16.5   72  3	"toyota corolla 1600 (sw)"
 86 | 13.0   8   350.0      175.0      4100.      13.0   73  1	"buick century 350"
 87 | 14.0   8   304.0      150.0      3672.      11.5   73  1	"amc matador"
 88 | 13.0   8   350.0      145.0      3988.      13.0   73  1	"chevrolet malibu"
 89 | 14.0   8   302.0      137.0      4042.      14.5   73  1	"ford gran torino"
 90 | 15.0   8   318.0      150.0      3777.      12.5   73  1	"dodge coronet custom"
 91 | 12.0   8   429.0      198.0      4952.      11.5   73  1	"mercury marquis brougham"
 92 | 13.0   8   400.0      150.0      4464.      12.0   73  1	"chevrolet caprice classic"
 93 | 13.0   8   351.0      158.0      4363.      13.0   73  1	"ford ltd"
 94 | 14.0   8   318.0      150.0      4237.      14.5   73  1	"plymouth fury gran sedan"
 95 | 13.0   8   440.0      215.0      4735.      11.0   73  1	"chrysler new yorker brougham"
 96 | 12.0   8   455.0      225.0      4951.      11.0   73  1	"buick electra 225 custom"
 97 | 13.0   8   360.0      175.0      3821.      11.0   73  1	"amc ambassador brougham"
 98 | 18.0   6   225.0      105.0      3121.      16.5   73  1	"plymouth valiant"
 99 | 16.0   6   250.0      100.0      3278.      18.0   73  1	"chevrolet nova custom"
100 | 18.0   6   232.0      100.0      2945.      16.0   73  1	"amc hornet"
101 | 18.0   6   250.0      88.00      3021.      16.5   73  1	"ford maverick"
102 | 23.0   6   198.0      95.00      2904.      16.0   73  1	"plymouth duster"
103 | 26.0   4   97.00      46.00      1950.      21.0   73  2	"volkswagen super beetle"
104 | 11.0   8   400.0      150.0      4997.      14.0   73  1	"chevrolet impala"
105 | 12.0   8   400.0      167.0      4906.      12.5   73  1	"ford country"
106 | 13.0   8   360.0      170.0      4654.      13.0   73  1	"plymouth custom suburb"
107 | 12.0   8   350.0      180.0      4499.      12.5   73  1	"oldsmobile vista cruiser"
108 | 18.0   6   232.0      100.0      2789.      15.0   73  1	"amc gremlin"
109 | 20.0   4   97.00      88.00      2279.      19.0   73  3	"toyota carina"
110 | 21.0   4   140.0      72.00      2401.      19.5   73  1	"chevrolet vega"
111 | 22.0   4   108.0      94.00      2379.      16.5   73  3	"datsun 610"
112 | 18.0   3   70.00      90.00      2124.      13.5   73  3	"maxda rx3"
113 | 19.0   4   122.0      85.00      2310.      18.5   73  1	"ford pinto"
114 | 21.0   6   155.0      107.0      2472.      14.0   73  1	"mercury capri v6"
115 | 26.0   4   98.00      90.00      2265.      15.5   73  2	"fiat 124 sport coupe"
116 | 15.0   8   350.0      145.0      4082.      13.0   73  1	"chevrolet monte carlo s"
117 | 16.0   8   400.0      230.0      4278.      9.50   73  1	"pontiac grand prix"
118 | 29.0   4   68.00      49.00      1867.      19.5   73  2	"fiat 128"
119 | 24.0   4   116.0      75.00      2158.      15.5   73  2	"opel manta"
120 | 20.0   4   114.0      91.00      2582.      14.0   73  2	"audi 100ls"
121 | 19.0   4   121.0      112.0      2868.      15.5   73  2	"volvo 144ea"
122 | 15.0   8   318.0      150.0      3399.      11.0   73  1	"dodge dart custom"
123 | 24.0   4   121.0      110.0      2660.      14.0   73  2	"saab 99le"
124 | 20.0   6   156.0      122.0      2807.      13.5   73  3	"toyota mark ii"
125 | 11.0   8   350.0      180.0      3664.      11.0   73  1	"oldsmobile omega"
126 | 20.0   6   198.0      95.00      3102.      16.5   74  1	"plymouth duster"
127 | 21.0   6   200.0      ?          2875.      17.0   74  1	"ford maverick"
128 | 19.0   6   232.0      100.0      2901.      16.0   74  1	"amc hornet"
129 | 15.0   6   250.0      100.0      3336.      17.0   74  1	"chevrolet nova"
130 | 31.0   4   79.00      67.00      1950.      19.0   74  3	"datsun b210"
131 | 26.0   4   122.0      80.00      2451.      16.5   74  1	"ford pinto"
132 | 32.0   4   71.00      65.00      1836.      21.0   74  3	"toyota corolla 1200"
133 | 25.0   4   140.0      75.00      2542.      17.0   74  1	"chevrolet vega"
134 | 16.0   6   250.0      100.0      3781.      17.0   74  1	"chevrolet chevelle malibu classic"
135 | 16.0   6   258.0      110.0      3632.      18.0   74  1	"amc matador"
136 | 18.0   6   225.0      105.0      3613.      16.5   74  1	"plymouth satellite sebring"
137 | 16.0   8   302.0      140.0      4141.      14.0   74  1	"ford gran torino"
138 | 13.0   8   350.0      150.0      4699.      14.5   74  1	"buick century luxus (sw)"
139 | 14.0   8   318.0      150.0      4457.      13.5   74  1	"dodge coronet custom (sw)"
140 | 14.0   8   302.0      140.0      4638.      16.0   74  1	"ford gran torino (sw)"
141 | 14.0   8   304.0      150.0      4257.      15.5   74  1	"amc matador (sw)"
142 | 29.0   4   98.00      83.00      2219.      16.5   74  2	"audi fox"
143 | 26.0   4   79.00      67.00      1963.      15.5   74  2	"volkswagen dasher"
144 | 26.0   4   97.00      78.00      2300.      14.5   74  2	"opel manta"
145 | 31.0   4   76.00      52.00      1649.      16.5   74  3	"toyota corona"
146 | 32.0   4   83.00      61.00      2003.      19.0   74  3	"datsun 710"
147 | 28.0   4   90.00      75.00      2125.      14.5   74  1	"dodge colt"
148 | 24.0   4   90.00      75.00      2108.      15.5   74  2	"fiat 128"
149 | 26.0   4   116.0      75.00      2246.      14.0   74  2	"fiat 124 tc"
150 | 24.0   4   120.0      97.00      2489.      15.0   74  3	"honda civic"
151 | 26.0   4   108.0      93.00      2391.      15.5   74  3	"subaru"
152 | 31.0   4   79.00      67.00      2000.      16.0   74  2	"fiat x1.9"
153 | 19.0   6   225.0      95.00      3264.      16.0   75  1	"plymouth valiant custom"
154 | 18.0   6   250.0      105.0      3459.      16.0   75  1	"chevrolet nova"
155 | 15.0   6   250.0      72.00      3432.      21.0   75  1	"mercury monarch"
156 | 15.0   6   250.0      72.00      3158.      19.5   75  1	"ford maverick"
157 | 16.0   8   400.0      170.0      4668.      11.5   75  1	"pontiac catalina"
158 | 15.0   8   350.0      145.0      4440.      14.0   75  1	"chevrolet bel air"
159 | 16.0   8   318.0      150.0      4498.      14.5   75  1	"plymouth grand fury"
160 | 14.0   8   351.0      148.0      4657.      13.5   75  1	"ford ltd"
161 | 17.0   6   231.0      110.0      3907.      21.0   75  1	"buick century"
162 | 16.0   6   250.0      105.0      3897.      18.5   75  1	"chevroelt chevelle malibu"
163 | 15.0   6   258.0      110.0      3730.      19.0   75  1	"amc matador"
164 | 18.0   6   225.0      95.00      3785.      19.0   75  1	"plymouth fury"
165 | 21.0   6   231.0      110.0      3039.      15.0   75  1	"buick skyhawk"
166 | 20.0   8   262.0      110.0      3221.      13.5   75  1	"chevrolet monza 2+2"
167 | 13.0   8   302.0      129.0      3169.      12.0   75  1	"ford mustang ii"
168 | 29.0   4   97.00      75.00      2171.      16.0   75  3	"toyota corolla"
169 | 23.0   4   140.0      83.00      2639.      17.0   75  1	"ford pinto"
170 | 20.0   6   232.0      100.0      2914.      16.0   75  1	"amc gremlin"
171 | 23.0   4   140.0      78.00      2592.      18.5   75  1	"pontiac astro"
172 | 24.0   4   134.0      96.00      2702.      13.5   75  3	"toyota corona"
173 | 25.0   4   90.00      71.00      2223.      16.5   75  2	"volkswagen dasher"
174 | 24.0   4   119.0      97.00      2545.      17.0   75  3	"datsun 710"
175 | 18.0   6   171.0      97.00      2984.      14.5   75  1	"ford pinto"
176 | 29.0   4   90.00      70.00      1937.      14.0   75  2	"volkswagen rabbit"
177 | 19.0   6   232.0      90.00      3211.      17.0   75  1	"amc pacer"
178 | 23.0   4   115.0      95.00      2694.      15.0   75  2	"audi 100ls"
179 | 23.0   4   120.0      88.00      2957.      17.0   75  2	"peugeot 504"
180 | 22.0   4   121.0      98.00      2945.      14.5   75  2	"volvo 244dl"
181 | 25.0   4   121.0      115.0      2671.      13.5   75  2	"saab 99le"
182 | 33.0   4   91.00      53.00      1795.      17.5   75  3	"honda civic cvcc"
183 | 28.0   4   107.0      86.00      2464.      15.5   76  2	"fiat 131"
184 | 25.0   4   116.0      81.00      2220.      16.9   76  2	"opel 1900"
185 | 25.0   4   140.0      92.00      2572.      14.9   76  1	"capri ii"
186 | 26.0   4   98.00      79.00      2255.      17.7   76  1	"dodge colt"
187 | 27.0   4   101.0      83.00      2202.      15.3   76  2	"renault 12tl"
188 | 17.5   8   305.0      140.0      4215.      13.0   76  1	"chevrolet chevelle malibu classic"
189 | 16.0   8   318.0      150.0      4190.      13.0   76  1	"dodge coronet brougham"
190 | 15.5   8   304.0      120.0      3962.      13.9   76  1	"amc matador"
191 | 14.5   8   351.0      152.0      4215.      12.8   76  1	"ford gran torino"
192 | 22.0   6   225.0      100.0      3233.      15.4   76  1	"plymouth valiant"
193 | 22.0   6   250.0      105.0      3353.      14.5   76  1	"chevrolet nova"
194 | 24.0   6   200.0      81.00      3012.      17.6   76  1	"ford maverick"
195 | 22.5   6   232.0      90.00      3085.      17.6   76  1	"amc hornet"
196 | 29.0   4   85.00      52.00      2035.      22.2   76  1	"chevrolet chevette"
197 | 24.5   4   98.00      60.00      2164.      22.1   76  1	"chevrolet woody"
198 | 29.0   4   90.00      70.00      1937.      14.2   76  2	"vw rabbit"
199 | 33.0   4   91.00      53.00      1795.      17.4   76  3	"honda civic"
200 | 20.0   6   225.0      100.0      3651.      17.7   76  1	"dodge aspen se"
201 | 18.0   6   250.0      78.00      3574.      21.0   76  1	"ford granada ghia"
202 | 18.5   6   250.0      110.0      3645.      16.2   76  1	"pontiac ventura sj"
203 | 17.5   6   258.0      95.00      3193.      17.8   76  1	"amc pacer d/l"
204 | 29.5   4   97.00      71.00      1825.      12.2   76  2	"volkswagen rabbit"
205 | 32.0   4   85.00      70.00      1990.      17.0   76  3	"datsun b-210"
206 | 28.0   4   97.00      75.00      2155.      16.4   76  3	"toyota corolla"
207 | 26.5   4   140.0      72.00      2565.      13.6   76  1	"ford pinto"
208 | 20.0   4   130.0      102.0      3150.      15.7   76  2	"volvo 245"
209 | 13.0   8   318.0      150.0      3940.      13.2   76  1	"plymouth volare premier v8"
210 | 19.0   4   120.0      88.00      3270.      21.9   76  2	"peugeot 504"
211 | 19.0   6   156.0      108.0      2930.      15.5   76  3	"toyota mark ii"
212 | 16.5   6   168.0      120.0      3820.      16.7   76  2	"mercedes-benz 280s"
213 | 16.5   8   350.0      180.0      4380.      12.1   76  1	"cadillac seville"
214 | 13.0   8   350.0      145.0      4055.      12.0   76  1	"chevy c10"
215 | 13.0   8   302.0      130.0      3870.      15.0   76  1	"ford f108"
216 | 13.0   8   318.0      150.0      3755.      14.0   76  1	"dodge d100"
217 | 31.5   4   98.00      68.00      2045.      18.5   77  3	"honda accord cvcc"
218 | 30.0   4   111.0      80.00      2155.      14.8   77  1	"buick opel isuzu deluxe"
219 | 36.0   4   79.00      58.00      1825.      18.6   77  2	"renault 5 gtl"
220 | 25.5   4   122.0      96.00      2300.      15.5   77  1	"plymouth arrow gs"
221 | 33.5   4   85.00      70.00      1945.      16.8   77  3	"datsun f-10 hatchback"
222 | 17.5   8   305.0      145.0      3880.      12.5   77  1	"chevrolet caprice classic"
223 | 17.0   8   260.0      110.0      4060.      19.0   77  1	"oldsmobile cutlass supreme"
224 | 15.5   8   318.0      145.0      4140.      13.7   77  1	"dodge monaco brougham"
225 | 15.0   8   302.0      130.0      4295.      14.9   77  1	"mercury cougar brougham"
226 | 17.5   6   250.0      110.0      3520.      16.4   77  1	"chevrolet concours"
227 | 20.5   6   231.0      105.0      3425.      16.9   77  1	"buick skylark"
228 | 19.0   6   225.0      100.0      3630.      17.7   77  1	"plymouth volare custom"
229 | 18.5   6   250.0      98.00      3525.      19.0   77  1	"ford granada"
230 | 16.0   8   400.0      180.0      4220.      11.1   77  1	"pontiac grand prix lj"
231 | 15.5   8   350.0      170.0      4165.      11.4   77  1	"chevrolet monte carlo landau"
232 | 15.5   8   400.0      190.0      4325.      12.2   77  1	"chrysler cordoba"
233 | 16.0   8   351.0      149.0      4335.      14.5   77  1	"ford thunderbird"
234 | 29.0   4   97.00      78.00      1940.      14.5   77  2	"volkswagen rabbit custom"
235 | 24.5   4   151.0      88.00      2740.      16.0   77  1	"pontiac sunbird coupe"
236 | 26.0   4   97.00      75.00      2265.      18.2   77  3	"toyota corolla liftback"
237 | 25.5   4   140.0      89.00      2755.      15.8   77  1	"ford mustang ii 2+2"
238 | 30.5   4   98.00      63.00      2051.      17.0   77  1	"chevrolet chevette"
239 | 33.5   4   98.00      83.00      2075.      15.9   77  1	"dodge colt m/m"
240 | 30.0   4   97.00      67.00      1985.      16.4   77  3	"subaru dl"
241 | 30.5   4   97.00      78.00      2190.      14.1   77  2	"volkswagen dasher"
242 | 22.0   6   146.0      97.00      2815.      14.5   77  3	"datsun 810"
243 | 21.5   4   121.0      110.0      2600.      12.8   77  2	"bmw 320i"
244 | 21.5   3   80.00      110.0      2720.      13.5   77  3	"mazda rx-4"
245 | 43.1   4   90.00      48.00      1985.      21.5   78  2	"volkswagen rabbit custom diesel"
246 | 36.1   4   98.00      66.00      1800.      14.4   78  1	"ford fiesta"
247 | 32.8   4   78.00      52.00      1985.      19.4   78  3	"mazda glc deluxe"
248 | 39.4   4   85.00      70.00      2070.      18.6   78  3	"datsun b210 gx"
249 | 36.1   4   91.00      60.00      1800.      16.4   78  3	"honda civic cvcc"
250 | 19.9   8   260.0      110.0      3365.      15.5   78  1	"oldsmobile cutlass salon brougham"
251 | 19.4   8   318.0      140.0      3735.      13.2   78  1	"dodge diplomat"
252 | 20.2   8   302.0      139.0      3570.      12.8   78  1	"mercury monarch ghia"
253 | 19.2   6   231.0      105.0      3535.      19.2   78  1	"pontiac phoenix lj"
254 | 20.5   6   200.0      95.00      3155.      18.2   78  1	"chevrolet malibu"
255 | 20.2   6   200.0      85.00      2965.      15.8   78  1	"ford fairmont (auto)"
256 | 25.1   4   140.0      88.00      2720.      15.4   78  1	"ford fairmont (man)"
257 | 20.5   6   225.0      100.0      3430.      17.2   78  1	"plymouth volare"
258 | 19.4   6   232.0      90.00      3210.      17.2   78  1	"amc concord"
259 | 20.6   6   231.0      105.0      3380.      15.8   78  1	"buick century special"
260 | 20.8   6   200.0      85.00      3070.      16.7   78  1	"mercury zephyr"
261 | 18.6   6   225.0      110.0      3620.      18.7   78  1	"dodge aspen"
262 | 18.1   6   258.0      120.0      3410.      15.1   78  1	"amc concord d/l"
263 | 19.2   8   305.0      145.0      3425.      13.2   78  1	"chevrolet monte carlo landau"
264 | 17.7   6   231.0      165.0      3445.      13.4   78  1	"buick regal sport coupe (turbo)"
265 | 18.1   8   302.0      139.0      3205.      11.2   78  1	"ford futura"
266 | 17.5   8   318.0      140.0      4080.      13.7   78  1	"dodge magnum xe"
267 | 30.0   4   98.00      68.00      2155.      16.5   78  1	"chevrolet chevette"
268 | 27.5   4   134.0      95.00      2560.      14.2   78  3	"toyota corona"
269 | 27.2   4   119.0      97.00      2300.      14.7   78  3	"datsun 510"
270 | 30.9   4   105.0      75.00      2230.      14.5   78  1	"dodge omni"
271 | 21.1   4   134.0      95.00      2515.      14.8   78  3	"toyota celica gt liftback"
272 | 23.2   4   156.0      105.0      2745.      16.7   78  1	"plymouth sapporo"
273 | 23.8   4   151.0      85.00      2855.      17.6   78  1	"oldsmobile starfire sx"
274 | 23.9   4   119.0      97.00      2405.      14.9   78  3	"datsun 200-sx"
275 | 20.3   5   131.0      103.0      2830.      15.9   78  2	"audi 5000"
276 | 17.0   6   163.0      125.0      3140.      13.6   78  2	"volvo 264gl"
277 | 21.6   4   121.0      115.0      2795.      15.7   78  2	"saab 99gle"
278 | 16.2   6   163.0      133.0      3410.      15.8   78  2	"peugeot 604sl"
279 | 31.5   4   89.00      71.00      1990.      14.9   78  2	"volkswagen scirocco"
280 | 29.5   4   98.00      68.00      2135.      16.6   78  3	"honda accord lx"
281 | 21.5   6   231.0      115.0      3245.      15.4   79  1	"pontiac lemans v6"
282 | 19.8   6   200.0      85.00      2990.      18.2   79  1	"mercury zephyr 6"
283 | 22.3   4   140.0      88.00      2890.      17.3   79  1	"ford fairmont 4"
284 | 20.2   6   232.0      90.00      3265.      18.2   79  1	"amc concord dl 6"
285 | 20.6   6   225.0      110.0      3360.      16.6   79  1	"dodge aspen 6"
286 | 17.0   8   305.0      130.0      3840.      15.4   79  1	"chevrolet caprice classic"
287 | 17.6   8   302.0      129.0      3725.      13.4   79  1	"ford ltd landau"
288 | 16.5   8   351.0      138.0      3955.      13.2   79  1	"mercury grand marquis"
289 | 18.2   8   318.0      135.0      3830.      15.2   79  1	"dodge st. regis"
290 | 16.9   8   350.0      155.0      4360.      14.9   79  1	"buick estate wagon (sw)"
291 | 15.5   8   351.0      142.0      4054.      14.3   79  1	"ford country squire (sw)"
292 | 19.2   8   267.0      125.0      3605.      15.0   79  1	"chevrolet malibu classic (sw)"
293 | 18.5   8   360.0      150.0      3940.      13.0   79  1	"chrysler lebaron town @ country (sw)"
294 | 31.9   4   89.00      71.00      1925.      14.0   79  2	"vw rabbit custom"
295 | 34.1   4   86.00      65.00      1975.      15.2   79  3	"maxda glc deluxe"
296 | 35.7   4   98.00      80.00      1915.      14.4   79  1	"dodge colt hatchback custom"
297 | 27.4   4   121.0      80.00      2670.      15.0   79  1	"amc spirit dl"
298 | 25.4   5   183.0      77.00      3530.      20.1   79  2	"mercedes benz 300d"
299 | 23.0   8   350.0      125.0      3900.      17.4   79  1	"cadillac eldorado"
300 | 27.2   4   141.0      71.00      3190.      24.8   79  2	"peugeot 504"
301 | 23.9   8   260.0      90.00      3420.      22.2   79  1	"oldsmobile cutlass salon brougham"
302 | 34.2   4   105.0      70.00      2200.      13.2   79  1	"plymouth horizon"
303 | 34.5   4   105.0      70.00      2150.      14.9   79  1	"plymouth horizon tc3"
304 | 31.8   4   85.00      65.00      2020.      19.2   79  3	"datsun 210"
305 | 37.3   4   91.00      69.00      2130.      14.7   79  2	"fiat strada custom"
306 | 28.4   4   151.0      90.00      2670.      16.0   79  1	"buick skylark limited"
307 | 28.8   6   173.0      115.0      2595.      11.3   79  1	"chevrolet citation"
308 | 26.8   6   173.0      115.0      2700.      12.9   79  1	"oldsmobile omega brougham"
309 | 33.5   4   151.0      90.00      2556.      13.2   79  1	"pontiac phoenix"
310 | 41.5   4   98.00      76.00      2144.      14.7   80  2	"vw rabbit"
311 | 38.1   4   89.00      60.00      1968.      18.8   80  3	"toyota corolla tercel"
312 | 32.1   4   98.00      70.00      2120.      15.5   80  1	"chevrolet chevette"
313 | 37.2   4   86.00      65.00      2019.      16.4   80  3	"datsun 310"
314 | 28.0   4   151.0      90.00      2678.      16.5   80  1	"chevrolet citation"
315 | 26.4   4   140.0      88.00      2870.      18.1   80  1	"ford fairmont"
316 | 24.3   4   151.0      90.00      3003.      20.1   80  1	"amc concord"
317 | 19.1   6   225.0      90.00      3381.      18.7   80  1	"dodge aspen"
318 | 34.3   4   97.00      78.00      2188.      15.8   80  2	"audi 4000"
319 | 29.8   4   134.0      90.00      2711.      15.5   80  3	"toyota corona liftback"
320 | 31.3   4   120.0      75.00      2542.      17.5   80  3	"mazda 626"
321 | 37.0   4   119.0      92.00      2434.      15.0   80  3	"datsun 510 hatchback"
322 | 32.2   4   108.0      75.00      2265.      15.2   80  3	"toyota corolla"
323 | 46.6   4   86.00      65.00      2110.      17.9   80  3	"mazda glc"
324 | 27.9   4   156.0      105.0      2800.      14.4   80  1	"dodge colt"
325 | 40.8   4   85.00      65.00      2110.      19.2   80  3	"datsun 210"
326 | 44.3   4   90.00      48.00      2085.      21.7   80  2	"vw rabbit c (diesel)"
327 | 43.4   4   90.00      48.00      2335.      23.7   80  2	"vw dasher (diesel)"
328 | 36.4   5   121.0      67.00      2950.      19.9   80  2	"audi 5000s (diesel)"
329 | 30.0   4   146.0      67.00      3250.      21.8   80  2	"mercedes-benz 240d"
330 | 44.6   4   91.00      67.00      1850.      13.8   80  3	"honda civic 1500 gl"
331 | 40.9   4   85.00      ?          1835.      17.3   80  2	"renault lecar deluxe"
332 | 33.8   4   97.00      67.00      2145.      18.0   80  3	"subaru dl"
333 | 29.8   4   89.00      62.00      1845.      15.3   80  2	"vokswagen rabbit"
334 | 32.7   6   168.0      132.0      2910.      11.4   80  3	"datsun 280-zx"
335 | 23.7   3   70.00      100.0      2420.      12.5   80  3	"mazda rx-7 gs"
336 | 35.0   4   122.0      88.00      2500.      15.1   80  2	"triumph tr7 coupe"
337 | 23.6   4   140.0      ?          2905.      14.3   80  1	"ford mustang cobra"
338 | 32.4   4   107.0      72.00      2290.      17.0   80  3	"honda accord"
339 | 27.2   4   135.0      84.00      2490.      15.7   81  1	"plymouth reliant"
340 | 26.6   4   151.0      84.00      2635.      16.4   81  1	"buick skylark"
341 | 25.8   4   156.0      92.00      2620.      14.4   81  1	"dodge aries wagon (sw)"
342 | 23.5   6   173.0      110.0      2725.      12.6   81  1	"chevrolet citation"
343 | 30.0   4   135.0      84.00      2385.      12.9   81  1	"plymouth reliant"
344 | 39.1   4   79.00      58.00      1755.      16.9   81  3	"toyota starlet"
345 | 39.0   4   86.00      64.00      1875.      16.4   81  1	"plymouth champ"
346 | 35.1   4   81.00      60.00      1760.      16.1   81  3	"honda civic 1300"
347 | 32.3   4   97.00      67.00      2065.      17.8   81  3	"subaru"
348 | 37.0   4   85.00      65.00      1975.      19.4   81  3	"datsun 210 mpg"
349 | 37.7   4   89.00      62.00      2050.      17.3   81  3	"toyota tercel"
350 | 34.1   4   91.00      68.00      1985.      16.0   81  3	"mazda glc 4"
351 | 34.7   4   105.0      63.00      2215.      14.9   81  1	"plymouth horizon 4"
352 | 34.4   4   98.00      65.00      2045.      16.2   81  1	"ford escort 4w"
353 | 29.9   4   98.00      65.00      2380.      20.7   81  1	"ford escort 2h"
354 | 33.0   4   105.0      74.00      2190.      14.2   81  2	"volkswagen jetta"
355 | 34.5   4   100.0      ?          2320.      15.8   81  2	"renault 18i"
356 | 33.7   4   107.0      75.00      2210.      14.4   81  3	"honda prelude"
357 | 32.4   4   108.0      75.00      2350.      16.8   81  3	"toyota corolla"
358 | 32.9   4   119.0      100.0      2615.      14.8   81  3	"datsun 200sx"
359 | 31.6   4   120.0      74.00      2635.      18.3   81  3	"mazda 626"
360 | 28.1   4   141.0      80.00      3230.      20.4   81  2	"peugeot 505s turbo diesel"
361 | 30.7   6   145.0      76.00      3160.      19.6   81  2	"volvo diesel"
362 | 25.4   6   168.0      116.0      2900.      12.6   81  3	"toyota cressida"
363 | 24.2   6   146.0      120.0      2930.      13.8   81  3	"datsun 810 maxima"
364 | 22.4   6   231.0      110.0      3415.      15.8   81  1	"buick century"
365 | 26.6   8   350.0      105.0      3725.      19.0   81  1	"oldsmobile cutlass ls"
366 | 20.2   6   200.0      88.00      3060.      17.1   81  1	"ford granada gl"
367 | 17.6   6   225.0      85.00      3465.      16.6   81  1	"chrysler lebaron salon"
368 | 28.0   4   112.0      88.00      2605.      19.6   82  1	"chevrolet cavalier"
369 | 27.0   4   112.0      88.00      2640.      18.6   82  1	"chevrolet cavalier wagon"
370 | 34.0   4   112.0      88.00      2395.      18.0   82  1	"chevrolet cavalier 2-door"
371 | 31.0   4   112.0      85.00      2575.      16.2   82  1	"pontiac j2000 se hatchback"
372 | 29.0   4   135.0      84.00      2525.      16.0   82  1	"dodge aries se"
373 | 27.0   4   151.0      90.00      2735.      18.0   82  1	"pontiac phoenix"
374 | 24.0   4   140.0      92.00      2865.      16.4   82  1	"ford fairmont futura"
375 | 23.0   4   151.0      ?          3035.      20.5   82  1	"amc concord dl"
376 | 36.0   4   105.0      74.00      1980.      15.3   82  2	"volkswagen rabbit l"
377 | 37.0   4   91.00      68.00      2025.      18.2   82  3	"mazda glc custom l"
378 | 31.0   4   91.00      68.00      1970.      17.6   82  3	"mazda glc custom"
379 | 38.0   4   105.0      63.00      2125.      14.7   82  1	"plymouth horizon miser"
380 | 36.0   4   98.00      70.00      2125.      17.3   82  1	"mercury lynx l"
381 | 36.0   4   120.0      88.00      2160.      14.5   82  3	"nissan stanza xe"
382 | 36.0   4   107.0      75.00      2205.      14.5   82  3	"honda accord"
383 | 34.0   4   108.0      70.00      2245       16.9   82  3	"toyota corolla"
384 | 38.0   4   91.00      67.00      1965.      15.0   82  3	"honda civic"
385 | 32.0   4   91.00      67.00      1965.      15.7   82  3	"honda civic (auto)"
386 | 38.0   4   91.00      67.00      1995.      16.2   82  3	"datsun 310 gx"
387 | 25.0   6   181.0      110.0      2945.      16.4   82  1	"buick century limited"
388 | 38.0   6   262.0      85.00      3015.      17.0   82  1	"oldsmobile cutlass ciera (diesel)"
389 | 26.0   4   156.0      92.00      2585.      14.5   82  1	"chrysler lebaron medallion"
390 | 22.0   6   232.0      112.0      2835       14.7   82  1	"ford granada l"
391 | 32.0   4   144.0      96.00      2665.      13.9   82  3	"toyota celica gt"
392 | 36.0   4   135.0      84.00      2370.      13.0   82  1	"dodge charger 2.2"
393 | 27.0   4   151.0      90.00      2950.      17.3   82  1	"chevrolet camaro"
394 | 27.0   4   140.0      86.00      2790.      15.6   82  1	"ford mustang gl"
395 | 44.0   4   97.00      52.00      2130.      24.6   82  2	"vw pickup"
396 | 32.0   4   135.0      84.00      2295.      11.6   82  1	"dodge rampage"
397 | 28.0   4   120.0      79.00      2625.      18.6   82  1	"ford ranger"
398 | 31.0   4   119.0      82.00      2720.      19.4   82  1	"chevy s-10"
399 | 


--------------------------------------------------------------------------------
/Lecture 6 - Naive Bayes/naive_bayes_autompg.py:
--------------------------------------------------------------------------------
  1 | """
  2 | (C) 2017 Nikolay Manchev
  3 | [London Machine Learning Study Group](http://www.meetup.com/London-Machine-Learning-Study-Group/members/)
  4 | 
  5 | This work is licensed under the Creative Commons Attribution 4.0 International
  6 | License. To view a copy of this license, visit
  7 | http://creativecommons.org/licenses/by/4.0/.
  8 | """
  9 | 
 10 | import numpy as np
 11 | 
 12 | from sklearn.model_selection import train_test_split
 13 | from sklearn.metrics import accuracy_score
 14 | from sklearn.metrics import confusion_matrix
 15 | 
 16 | from math import sqrt
 17 | from math import pi
 18 | from math import exp
 19 | 
 20 | 
 21 | def getPriors(labels):
 22 |     """
 23 |     Get the class priors by calculating the class probability from the 
 24 |     provided set. The prior is computed as 
 25 |     
 26 |     (prior for class A) = (number of class A samples) / (total number of samples)
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     labels : target class values    
 31 |         
 32 |     Returns
 33 |     -------
 34 |     priors : A dictionary with the class priors. 
 35 |              E.g. { ClassA: prior, ClassB: prior, ...}    
 36 |     """
 37 |     priors = {}
 38 |     for className in labels:
 39 |         N = labels.size
 40 |         class_occurrence = (labels == className).sum()
 41 |         priors[className] = class_occurrence/N
 42 |     return priors
 43 | 
 44 |        
 45 | def fit(features, labels):
 46 |     """
 47 |     Fits coefficients for a Gaussian Naive Bayes. This method computes and
 48 |     returns the in-class mean and stadnard deviation for each feature in 
 49 |     the training vectors.
 50 |     
 51 |     Parameters
 52 |     ----------
 53 |     featires : training vectors     
 54 |     labels   : target class values    
 55 |         
 56 |     Returns
 57 |     -------
 58 |     priors : A dictionary with with the in-class mean/std for each attribute
 59 | 
 60 |     {ClassA: [(attribute1_mean, attribute1_std]), 
 61 |               (attribute2_mean, attribute2_std],...)
 62 |      ClassB: [(attribute1_mean, attribute1_std]), 
 63 |               (attribute2_mean, attribute2_std],...
 64 |      ...}
 65 |     """
 66 |     # Get the unique classes from the sample
 67 |     uniqueClasses = np.unique(labels)
 68 |     coeffs = {}
 69 |     # Loop over the unique classes to compute the mean/std statistics
 70 |     for className in uniqueClasses:
 71 |       featuresInClass = features[labels == className]
 72 |       # Compute the mean/std for each input feature
 73 |       statsInClass = [(np.mean(feature), np.std(feature)) for feature in zip(*featuresInClass)]            
 74 |       coeffs[className] = statsInClass    
 75 |     
 76 |     return coeffs    
 77 |                 
 78 | def getLikelihood(x, featureIndex, model, className):
 79 |     """
 80 |     Computes the likelihood (i.e. the probability of the evidence given the
 81 |     model parameters) for a single value/class combination. The likelihood
 82 |     is computed using a Gaussian probability desnity function
 83 |     
 84 |     f(x|mu, sigma) = 
 85 |         1 / sqrt( 2 * pi * sigma^2 ) * exp ( - ( x-mu )^2 / (2 * sigma^2) )
 86 |     
 87 |     Parameters
 88 |     ----------
 89 |     x             : observation value
 90 |     featureIndex  : position of this attribute in the input vector. If the 
 91 |                     model was fitted against an N-dimenisonal input vector
 92 |                     [x_0, x_1, ..., x_N], featureIndex should point to the
 93 |                     position of x in the original vector (e.g. 0,1,...,N)
 94 |     model         : a dictionary with with the in-class mean/std for each 
 95 |                     attribute. See the fit(features, labels) method
 96 |     className     : class to asses the observation against
 97 |                 
 98 |         
 99 |     Returns
100 |     -------
101 |     f : the (x|className) likelihood based on the Guassian PDF
102 |     """
103 |     classStats = model[className]
104 |     mean = classStats[featureIndex][0]
105 |     std  = classStats[featureIndex][1]
106 |     f = (1/(sqrt(2*pi*pow(std,2))) * exp(-pow((x-mean),2)/(2*pow(std,2))))
107 |     return f
108 |     
109 | def getPosterior(x, model, priors):
110 |     """
111 |     Computes the posterior using a Gaussian Naive Bayes.
112 |     
113 |     P(class|x = [x_1, x_2, ..., x_N]) = likelihood(x|class) * prior(class)
114 |     
115 |     We use the naive assumption of conditional independence between the features,
116 |     which means that
117 |     
118 |     P([x_1, x_2, ..., x_N]|class) = P(x_1|class) * P(x_2|class) * ... * P(x_N|class)
119 |     
120 |     Parameters
121 |     ----------
122 |     x             : input vector
123 |     model         : a dictionary with with the in-class mean/std for each 
124 |                     attribute. See the fit(features, labels) method
125 |     priors        : a dictionary with with the in-class mean/std for each attribute
126 |                 
127 |         
128 |     Returns
129 |     -------
130 |     p : the posterior for all classes in priors given the input vector
131 |     """
132 |     posteriors = {}
133 |     # Loop over all observed classes
134 |     for className in priors:
135 |         # Compute p(x_1|class) * p(x_2|class) * ... * p(x_N|class) using the
136 |         # likelihood function, then multiply by the prior to get
137 |         # p(class|x = [x_1, x_2, ..., x_N])
138 |         p = 1
139 |         for featureIndex in range(x.size):
140 |             p = p * (getLikelihood (x[featureIndex], featureIndex, model, className) * priors[className])
141 |         posteriors[className] = p
142 |     return posteriors
143 | 
144 | def classify(x, model, priors):
145 |     """
146 |     This method uses Maximum a posteriori estimation (MAP) to make a class
147 |     prediction on an unseen observation. 
148 |     
149 |     Class_MAP = argmax_c posterior(c|x) = argmax_c likelihood(x|c) * prior (c)
150 |     
151 |     Parameters
152 |     ----------
153 |     x             : input vector
154 |     model         : a dictionary with with the in-class mean/std for each 
155 |                     attribute. See the fit(features, labels) method
156 |     priors        : a dictionary with with the in-class mean/std for each attribute
157 |                 
158 |         
159 |     Returns
160 |     -------
161 |     The name of the class that maximizes the posterior value
162 |     """    
163 |     posteriors = getPosterior(x, model, priors)
164 |     return max(posteriors, key=lambda key: posteriors[key])
165 | 
166 | 
167 | # Load the data set
168 | # We use Auto MPG from UCI Machine Learning Repository
169 | # https://archive.ics.uci.edu/ml/datasets/Auto+MPGs
170 | 
171 | car_data = np.genfromtxt("auto-mpg.data", usecols=(4, 3, 7))
172 | car_data = car_data[~np.isnan(car_data).any(axis=1)]
173 | features = car_data[:,[0,1]]
174 | labels = car_data[:,2]
175 | 
176 | # Split the data into test/train subsets
177 | features_train, features_test, labels_train, labels_test = train_test_split(features,
178 |                                                                             labels, test_size=0.1,
179 |                                                                             random_state = 100)
180 | # Fit the model
181 | priors = getPriors(labels_train)
182 | model = fit(features_train, labels_train)
183 | 
184 | # Make predictions on the test data
185 | predictions = [classify(x, model, priors) for x in features_test]
186 | 
187 | # Measure accuracy
188 | print("Prediction accuracy: %.2f\n" % accuracy_score(labels_test, predictions))
189 | 


--------------------------------------------------------------------------------
/Lecture 6 - Naive Bayes/naive_bayes_mf.py:
--------------------------------------------------------------------------------
  1 | """
  2 | (C) 2017 Nikolay Manchev
  3 | [London Machine Learning Study Group](http://www.meetup.com/London-Machine-Learning-Study-Group/members/)
  4 | 
  5 | This work is licensed under the Creative Commons Attribution 4.0 International
  6 | License. To view a copy of this license, visit
  7 | http://creativecommons.org/licenses/by/4.0/.
  8 | """
  9 | 
 10 | import numpy as np
 11 | 
 12 | from sklearn.model_selection import train_test_split
 13 | from sklearn.metrics import accuracy_score
 14 | from sklearn.metrics import confusion_matrix
 15 | 
 16 | from math import sqrt
 17 | from math import pi
 18 | from math import exp
 19 | 
 20 | 
 21 | def getPriors(labels):
 22 |     """
 23 |     Get the class priors by calculating the class probability from the 
 24 |     provided set. The prior is computed as 
 25 |     
 26 |     (prior for class A) = (number of class A samples) / (total number of samples)
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     labels : target class values    
 31 |         
 32 |     Returns
 33 |     -------
 34 |     priors : A dictionary with the class priors. 
 35 |              E.g. { ClassA: prior, ClassB: prior, ...}    
 36 |     """
 37 |     priors = {}
 38 |     for className in labels:
 39 |         N = labels.size
 40 |         class_occurrence = (labels == className).sum()
 41 |         priors[className] = class_occurrence/N
 42 |     return priors
 43 | 
 44 |        
 45 | def fit(features, labels):
 46 |     """
 47 |     Fits coefficients for a Gaussian Naive Bayes. This method computes and
 48 |     returns the in-class mean and stadnard deviation for each feature in 
 49 |     the training vectors.
 50 |     
 51 |     Parameters
 52 |     ----------
 53 |     featires : training vectors     
 54 |     labels   : target class values    
 55 |         
 56 |     Returns
 57 |     -------
 58 |     priors : A dictionary with with the in-class mean/std for each attribute
 59 | 
 60 |     {ClassA: [(attribute1_mean, attribute1_std]), 
 61 |               (attribute2_mean, attribute2_std],...)
 62 |      ClassB: [(attribute1_mean, attribute1_std]), 
 63 |               (attribute2_mean, attribute2_std],...
 64 |      ...}
 65 |     """
 66 |     # Get the unique classes from the sample
 67 |     uniqueClasses = np.unique(labels)
 68 |     coeffs = {}
 69 |     # Loop over the unique classes to compute the mean/std statistics
 70 |     for className in uniqueClasses:
 71 |       featuresInClass = features[labels == className]
 72 |       # Compute the mean/std for each input feature
 73 |       statsInClass = [(np.mean(feature), np.std(feature)) for feature in zip(*featuresInClass)]            
 74 |       coeffs[className] = statsInClass    
 75 |     
 76 |     return coeffs    
 77 |                 
 78 | def getLikelihood(x, featureIndex, model, className):
 79 |     """
 80 |     Computes the likelihood (i.e. the probability of the evidence given the
 81 |     model parameters) for a single value/class combination. The likelihood
 82 |     is computed using a Gaussian probability desnity function
 83 |     
 84 |     f(x|mu, sigma) = 
 85 |         1 / sqrt( 2 * pi * sigma^2 ) * exp ( - ( x-mu )^2 / (2 * sigma^2) )
 86 |     
 87 |     Parameters
 88 |     ----------
 89 |     x             : observation value
 90 |     featureIndex  : position of this attribute in the input vector. If the 
 91 |                     model was fitted against an N-dimenisonal input vector
 92 |                     [x_0, x_1, ..., x_N], featureIndex should point to the
 93 |                     position of x in the original vector (e.g. 0,1,...,N)
 94 |     model         : a dictionary with with the in-class mean/std for each 
 95 |                     attribute. See the fit(features, labels) method
 96 |     className     : class to asses the observation against
 97 |                 
 98 |         
 99 |     Returns
100 |     -------
101 |     f : the (x|className) likelihood based on the Guassian PDF
102 |     """
103 |     classStats = model[className]
104 |     mean = classStats[featureIndex][0]
105 |     std  = classStats[featureIndex][1]
106 |     f = (1/(sqrt(2*pi*pow(std,2))) * exp(-pow((x-mean),2)/(2*pow(std,2))))
107 |     return f
108 |     
109 | def getPosterior(x, model, priors):
110 |     """
111 |     Computes the posterior using a Gaussian Naive Bayes.
112 |     
113 |     P(class|x = [x_1, x_2, ..., x_N]) = likelihood(x|class) * prior(class)
114 |     
115 |     We use the naive assumption of conditional independence between the features,
116 |     which means that
117 |     
118 |     P([x_1, x_2, ..., x_N]|class) = P(x_1|class) * P(x_2|class) * ... * P(x_N|class)
119 |     
120 |     Parameters
121 |     ----------
122 |     x             : input vector
123 |     model         : a dictionary with with the in-class mean/std for each 
124 |                     attribute. See the fit(features, labels) method
125 |     priors        : a dictionary with with the in-class mean/std for each attribute
126 |                 
127 |         
128 |     Returns
129 |     -------
130 |     p : the posterior for all classes in priors given the input vector
131 |     """
132 |     posteriors = {}
133 |     # Loop over all observed classes
134 |     for className in priors:
135 |         # Compute p(x_1|class) * p(x_2|class) * ... * p(x_N|class) using the
136 |         # likelihood function, then multiply by the prior to get
137 |         # p(class|x = [x_1, x_2, ..., x_N])
138 |         p = 1
139 |         for featureIndex in range(x.size):
140 |             p = p * (getLikelihood (x[featureIndex], featureIndex, model, className) * priors[className])
141 |         posteriors[className] = p
142 |     return posteriors
143 | 
144 | def classify(x, model, priors):
145 |     """
146 |     This method uses Maximum a posteriori estimation (MAP) to make a class
147 |     prediction on an unseen observation. 
148 |     
149 |     Class_MAP = argmax_c posterior(c|x) = argmax_c likelihood(x|c) * prior (c)
150 |     
151 |     Parameters
152 |     ----------
153 |     x             : input vector
154 |     model         : a dictionary with with the in-class mean/std for each 
155 |                     attribute. See the fit(features, labels) method
156 |     priors        : a dictionary with with the in-class mean/std for each attribute
157 |                 
158 |         
159 |     Returns
160 |     -------
161 |     The name of the class that maximizes the posterior value
162 |     """    
163 |     posteriors = getPosterior(x, model, priors)
164 |     return max(posteriors, key=lambda key: posteriors[key])
165 | 
166 | 
167 | # Data from National Longitudinal Youth Survey, Bureau of Labor Statistics, 
168 | # United States Department of Labor
169 | # http://www.bls.gov/nls/nlsy97.htm
170 | data = np.genfromtxt("gender_height_weight.csv", delimiter=",", skip_header=1)
171 | 
172 | # Assign [height(inchs), weight(lbs)] to features and [gender] to labels
173 | features = data[:,[1,2]]
174 | labels = data[:,0]
175 | 
176 | # Split the data into test/train subsets
177 | features_train, features_test, labels_train, labels_test = train_test_split(features,
178 |                                                                             labels, test_size=0.1,
179 |                                                                             random_state = 100)
180 | # Fit the model
181 | priors = getPriors(labels_train)
182 | model = fit(features_train, labels_train)
183 | 
184 | # To see the likelihood for a certain attribute per class we can do:
185 | # x = np.array([69])
186 | # getLikelihood(x, 0, model, 0)  <- likelihood for class 0 : 0.1171193286800898
187 | # getLikelihood(x, 0, model, 1)  <- likelihood for class 1 : 0.04168934664951199
188 | 
189 | # Make predictions on the test data
190 | predictions = [classify(x, model, priors) for x in features_test]
191 | 
192 | # Measure accuracy
193 | print("Prediction accuracy: %.2f\n" % accuracy_score(labels_test, predictions))
194 | 
195 | # Print confusion matrix
196 | print("Confusion matrix:\n")
197 | print(confusion_matrix(labels_test, predictions))
198 | 


--------------------------------------------------------------------------------
/Lecture 7 - Text Classification/README.md:
--------------------------------------------------------------------------------
 1 | ## Text Classification
 2 | 
 3 | Code examples used in Lecture 7
 4 | 
 5 | * data/SMSSpamCollection - The [SMS Spam Collection Data Set](https://archive.ics.uci.edu/ml/datasets/sms+spam+collection) from UCI Machine Learning Repository 
 6 | * transform.py - Generates an L2 normalized tf-idf matrix from the SMS dataset
 7 | * predict.py - Uses Multinomial Naive Bayes to predict the ham/spam class from the stored tf-idf matrix
 8 | 
 9 | This repository contains materials from the London Machine Learning Study Group Meetups
10 | 
11 | The meetup page is available at [http://www.meetup.com/London-Machine-Learning-Study-Group](http://www.meetup.com/London-Machine-Learning-Study-Group).
12 | 
13 | Lecture recordings are available on [YouTube](https://www.youtube.com/c/NikolayManchev)
14 | 
15 | (C) 2017 Nikolay Manchev, London Machine Learning Study Group
16 | 
17 | This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this license, visit [http://creativecommons.org/licenses/by/4.0](http://creativecommons.org/licenses/by/4.0).
18 | 


--------------------------------------------------------------------------------
/Lecture 7 - Text Classification/predict.py:
--------------------------------------------------------------------------------
 1 | """
 2 | (C) 2017 Nikolay Manchev
 3 | [London Machine Learning Study Group](http://www.meetup.com/London-Machine-Learning-Study-Group/members/)
 4 | 
 5 | This work is licensed under the Creative Commons Attribution 4.0 International
 6 | License. To view a copy of this license, visit
 7 | http://creativecommons.org/licenses/by/4.0/.
 8 | """
 9 | 
10 | from sklearn.model_selection import train_test_split
11 | from sklearn.metrics import accuracy_score
12 | from sklearn.naive_bayes import MultinomialNB
13 | 
14 | import numpy as np
15 | 
16 | import scipy
17 | 
18 | import timeit
19 | 
20 | np.random.seed(1234)
21 | 
22 | labels = np.fromfile("data/labels.csv", sep='\n')
23 | 
24 | tf_idf_matrix = scipy.io.mmread("data/training.mtx").todense()
25 | 
26 | X_train, X_test, y_train, y_test = train_test_split(tf_idf_matrix, labels, test_size=0.20, random_state=1234)
27 | 
28 | start_time = timeit.default_timer()
29 | 
30 | clf = MultinomialNB()
31 | 
32 | clf.fit(X_train, y_train)
33 | 
34 | y_pred = clf.predict(X_test)
35 | 
36 | print("Elapsed time: %f sec" % (timeit.default_timer() - start_time))
37 | 
38 | print(accuracy_score(y_test, y_pred))
39 | 


--------------------------------------------------------------------------------
/Lecture 7 - Text Classification/transform.py:
--------------------------------------------------------------------------------
  1 | """
  2 | (C) 2017 Nikolay Manchev
  3 | [London Machine Learning Study Group](http://www.meetup.com/London-Machine-Learning-Study-Group/members/)
  4 | 
  5 | This work is licensed under the Creative Commons Attribution 4.0 International
  6 | License. To view a copy of this license, visit
  7 | http://creativecommons.org/licenses/by/4.0/.
  8 | """
  9 | 
 10 | import math
 11 | import os
 12 | import pandas as pd
 13 | import numpy as np
 14 | 
 15 | import scipy.sparse
 16 | import scipy.io
 17 | 
 18 | import nltk.data
 19 | import nltk.tokenize
 20 | import nltk.stem
 21 | 
 22 | from nltk.corpus import stopwords
 23 | 
 24 | from collections import Counter
 25 | 
 26 | import numpy as np
 27 | 
 28 | def extract_words(text, stemmer = None, remove_stopwords = False):
 29 |     """
 30 |     Extracts all words from a document. The document is first tokenized,
 31 |     morphological affixes from words are removed, and stop words
 32 |     are excluded from the resulting list of words.
 33 |     
 34 |     Parameters
 35 |     ----------
 36 |     text             : input document (String)
 37 |     stemmer          : NLTK stemmer for the stemming process. Must be an NLTK 
 38 |                        stem package class. E.g:
 39 |                     
 40 |                        nltk.stem.porter.PorterStemmer()
 41 |                        nltk.stem.lancaster.LancasterStemmer()
 42 |                        nltk.stem.snowball.EnglishStemmer()
 43 |                       
 44 |                        If set to None, no stemming is performed on the input text
 45 |     remove_stopwords : If set to True, removes any stop words from the output,
 46 |                        using the nltk.corpus.stopwords corpus (English)
 47 |     
 48 |     Returns
 49 |     -------
 50 |     A list of words extracted from the input text.
 51 |     
 52 |     """
 53 | 
 54 |     # Get the stopwords corpus
 55 |     if "stopwords" not in os.listdir(nltk.data.find("corpora")):
 56 |         nltk.download("stopwords")
 57 | 
 58 |     # Tokenize the document
 59 |     tokenizer = nltk.tokenize.RegexpTokenizer(r'\w+')
 60 |     tokens = tokenizer.tokenize(text)
 61 |     
 62 |     if stemmer is None:
 63 |         # No stemmer? Just convert to lower case.
 64 |         words = [token.lower() for token in tokens]
 65 |     else:
 66 |         # Apply stemming    
 67 |         words = [stemmer.stem(word.lower()) for word in tokens]
 68 |         
 69 |     
 70 |     # Remove stop words
 71 |     if remove_stopwords:
 72 |         words = [word for word in words if word not in stopwords.words('english')]
 73 |     
 74 |     return words
 75 |     
 76 | def build_vocabulary(documents):
 77 |     """
 78 |     Builds a vocabulary based on all documents in the corpus.
 79 |     
 80 |     Parameters
 81 |     ----------
 82 |     documents : document corpus
 83 |     
 84 |     Returns
 85 |     -------
 86 |     A list, containing all unique words from the corpus
 87 |     
 88 |     """    
 89 |     vocabulary = set()
 90 |     
 91 |     # Iterate over each document in the corpus
 92 |     for doc in documents:
 93 |         # Iterate over all words in the current document and
 94 |         # add each word to the vocabulary set
 95 |         vocabulary.update([word for word in doc])
 96 |         
 97 |     # Convert the vocabulary to list
 98 |     vocabulary = list(vocabulary)
 99 |     
100 |     return vocabulary
101 | 
102 | 
103 | def get_idfs_dict(vocabulary, documents):
104 |     """
105 |     Gets a dictionary containing the vocabulary and their respective IDFs.
106 |     This method is used for debug purposes only.
107 |     
108 |     Parameters
109 |     ----------
110 |     vocabulary : vocabulary of the corpus
111 |     documents  : all documents in the corpus
112 |     
113 |     Returns
114 |     -------
115 |     A dictionary in the form of {(word1, word1_IDF), (word2, word2_IDF), ... }
116 |     
117 |     """
118 | 
119 |     # Get number of documents where each word from the vocabulary appears
120 |     counts = Counter()
121 | 
122 |     # Iterate over the vocabulary and count the occurrence of each word
123 |     for word in vocabulary:
124 |         for doc in documents:
125 |             if word in doc:
126 |                 counts[word] += 1
127 |     
128 |     # Get the number of documents in the corpus
129 |     number_of_docs = len(documents)
130 |     
131 |     # Create an empty dictionary
132 |     idfs = dict()
133 | 
134 |     # Iterate over the counts
135 |     for term in list(counts.items()):
136 |         
137 |         # Normalise the count by the number of documents, and take the log
138 |         # Add the (word, IDF) pair to the dictionary
139 |         idfs[term[0]] = math.log(number_of_docs / term[1], 2)
140 |     
141 |     return idfs
142 | 
143 | 
144 | def get_idfs(vocabulary, documents):
145 |     """
146 |     Gets a sparse diagonal matrix containing the IDFs for all words in the
147 |     vocabulary. The IDF are computed as the logarithmically scaled inverse 
148 |     fraction of the documents that contain the word, obtained by dividing 
149 |     the total number of documents by the number of documents containing the 
150 |     term, and then taking the logarithm of that quotient.
151 |     
152 |     Parameters
153 |     ----------
154 |     vocabulary : vocabulary of the corpus
155 |     documents  : all documents in the corpus
156 |     
157 |     Returns
158 |     -------
159 |     A diagonal matrix of size len(vocabulary) x len(vocabulary), where the
160 |     word's IDFs are located on the main diagonal, and all other elements in
161 |     the matrix are 0. Eg:
162 |     
163 |     index in the vocabulary    0      1      2      3     ...     N
164 |                                1 idf(word1)  0      0     ...     0  
165 |                                2      0  idf(word2) 0     ...     0  
166 |                                3      0      0 idf(word3) ...     0
167 |                              ...     ...    ...    ...    ...    ...
168 |                                N      0       0      0     ... idf(wordN)
169 |                   
170 |     where N = len(vocabulary)
171 |     
172 |     """
173 | 
174 |     # Get number of documents where each word from the vocabulary appears
175 |     counts = dict()
176 | 
177 |     for word in vocabulary:
178 |         for doc in documents:
179 |             if word in doc:
180 |                 if word in counts:
181 |                   counts[word] += 1
182 |                 else:
183 |                   counts[word] = 1
184 |     
185 |     # Compute inverse document frequency
186 |     number_of_docs = len(documents)
187 |     
188 |     # Create a list to hold all the IDFs
189 |     idfs = []
190 |     
191 |     # Iterate over the counts
192 |     for word in vocabulary:
193 |         
194 |         # Normalise the count by the number of documents, and take the log
195 |         # Add the value to the list of IDFs
196 |         idfs.append(math.log(number_of_docs / counts[word], 2))
197 | 
198 |     # Create a sparse diagonal matrix with the values from IDFs list located
199 |     # on the main diagonal
200 |     idf_matrix = scipy.sparse.diags(np.squeeze(np.asarray(idfs)))
201 | 
202 |     return idf_matrix    
203 | 
204 | def get_tf_vectors(vocabulary, documents):    
205 |     """
206 |     Computes the term frequency vectors for all documents. This method uses
207 |     raw count of a term in a document, i.e. the number of times that term 
208 |     t occurs in document d.
209 |     
210 |     Parameters
211 |     ----------
212 |     vocabulary : vocabulary of the corpus
213 |     documents  : all documents in the corpus
214 |     
215 |     Returns
216 |     -------
217 |     A sparse matrix of size len(documents) x len(vocabulary), containing the
218 |     raw counts for each term. Entries in the matrix can be viewed using
219 |     the print_sparse_row(matrix, row_index) method. Ex:
220 | 
221 |     tf_matrix.shape
222 |     (6918, 1869)
223 | 
224 |     print_sparse_row(tf_matrix,0)
225 |     col[106] 1
226 |     col[289] 1
227 |     col[482] 1
228 |     col[815] 1
229 |     col[1074] 1
230 |     col[1145] 1
231 |     col[1232] 1
232 |     col[1565] 1    
233 |     """
234 | 
235 |     # Document / sparse matrix row index
236 |     row_index = 0
237 |     
238 |     # Values and indices for the sparse matrix
239 |     rows = []
240 |     cols = []
241 |     values = []
242 | 
243 |     # Iterate over all documents in the corpus
244 |     for doc in documents:        
245 |         col_index = 0
246 |         
247 |         # Iterate over all words in the vocabulary
248 |         for word in vocabulary:
249 |             
250 |             # Current word in current document?
251 |             if word in doc:
252 |                 # Increase the term frequency for this word
253 |                 rows.append(row_index)
254 |                 cols.append(col_index)
255 |                 values.append(doc.count(word))
256 |                 col_index += 1
257 |             else:
258 |                 # Move to the next word in the vocabulary
259 |                 col_index += 1
260 |                 
261 |         # Move to the next document
262 |         row_index += 1
263 |     
264 |     # Compose a sparse matrix of size len(documents) x len(vocabulary) with
265 |     # all term frequencies
266 |     tf_matrix = scipy.sparse.csr_matrix((values, (rows, cols)), shape=(row_index, len(vocabulary)))
267 |     
268 |     return tf_matrix
269 | 
270 | def print_sparse_row(matrix, row_index):
271 |     """
272 |     Prints the indices and their respective values for a sparse matrix row.
273 |     This method is used for debugging purposes.    
274 |     
275 |     Ex:
276 | 
277 |     print_sparse_row(tf_matrix,0)
278 |     col[106] 1
279 |     col[289] 1
280 |     col[482] 1
281 |     col[815] 1
282 |     col[1074] 1
283 |     col[1145] 1
284 |     col[1232] 1
285 |     col[1565] 1    
286 |     
287 |     Parameters
288 |     ----------
289 |     matrix     : a sparse matrix
290 |     row_index  : index of a row from the sparse matrix
291 |     
292 |     Returns
293 |     -------
294 |     
295 |     """
296 |     # Convert the row of interest to a Numpy array
297 |     row = np.asarray(matrix[row_index].todense()).flatten()
298 |     
299 |     # Iterate over all columns of the row
300 |     col = 0
301 |     for el in row:
302 |         if el != 0:
303 |             # Print the column index and the respective value
304 |             print("col[%i] %s"%(col, el))
305 |         col += 1
306 | 
307 | 
308 | def print_tfidf(matrix, row_index, idfs):
309 |     """
310 |     For a given row from a TF matrix, this method prints a table containing
311 |     all words, their term frequency, IDF, and TFxIDF values. Ex:
312 |     
313 |     >>> idfs = get_idfs_dict(vocabulary, dataDF["Words"])
314 |     >>> print_tfidf(tf_matrix, 0, idfs)
315 | 
316 |     Column   Word      TF      IDF                 TFxIDF              
317 |     ------   ----      --      ---                 ------              
318 |     106      is        1       2.2355206178166482  2.23552061782       
319 |     289      just      1       4.616587945974135   4.61658794597       
320 |     482      the       1       1.448369266482225   1.44836926648       
321 |     815      vinc      1       1.8033980511864398  1.80339805119       
322 |     1074     da        1       1.8033980511864398  1.80339805119       
323 |     1145     book      1       5.434211203485566   5.43421120349       
324 |     1232     code      1       1.801942987986053   1.80194298799       
325 |     1565     awesom    1       2.6320179865437403  2.63201798654  
326 | 
327 |     Parameters
328 |     ----------
329 |     matrix     : matrix containg document term frequencies (see the 
330 |                  get_tf_vectors method)
331 |     row_index  : index of a row from the TF matrix
332 |     
333 |     Returns
334 |     -------
335 |     """    
336 |     
337 |     # Get the row of interest as a Numpy array
338 |     row = np.asarray(matrix[row_index].todense()).flatten()
339 |     col = 0
340 |     
341 |     # Set the output header
342 |     output = [["Column", "Word", "TF", "IDF", "TFxIDF"],
343 |               ["------", "----", "--", "---", "------"]]
344 |     
345 |     # Go over each element of the row (i.e. word from the document)
346 |     for el in row:
347 |         if el != 0:
348 |           # Append the column index, the word, and the TF, IDF, and TFxIDF
349 |           # values to the output
350 |           output.append([str(col), vocabulary[col], str(el), str(idfs[vocabulary[col]]), 
351 |                          str(idfs[vocabulary[col]]*el)])
352 |         col += 1
353 |                 
354 |     # Print the output as a table
355 |     col_width = max(len(word) for row in output for word in row) + 2  # padding
356 |     for row in output:
357 |       print("".join(word.ljust(col_width) for word in row))
358 | 
359 | def l2_normalized_matrix(matrix):
360 |     """
361 |     Normalises a sparse matrix by scaling its rows individually to L2 unit norm
362 | 
363 |     The new row values are computed as
364 |     
365 |         ||x|| = sqrt(sum(x^2))
366 |         
367 |     For efficiency, the resulting new matrix is formed by computing
368 |     
369 |     normalized_matrix = 
370 |         transpose(transpose transpose(matrix) * l2_norm)
371 |         
372 |     where matrix is the original sparse matrix and l2_norm is diagonal 
373 |     matrix of the reciprocals of sqrt(sum(x^2))
374 |     
375 |     Parameters
376 |     ----------
377 |     matrix     : a sparse matrix to be normalized
378 |     
379 |     Returns
380 |     -------
381 |     An L2 normalised sparse matrix based on the input matrix
382 |     
383 |     """     
384 |     # Compute the L2 norms
385 |     l2_norm = np.sqrt(matrix.power(2).sum(axis=1))
386 |     
387 |     # Get the reciprocals
388 |     with np.errstate(divide="ignore", invalid="ignore"):
389 |         l2_norm = np.reciprocal(l2_norm)
390 |         # Treat infinity and NaN as 0
391 |         l2_norm[~np.isfinite(l2_norm)] = 0  # -inf inf NaN   
392 |     
393 |     # Form a diagonal matrix of the reciprocals
394 |     l2_norm = scipy.sparse.diags(np.squeeze(np.asarray(l2_norm)))           
395 |         
396 |     # Compute the normalised matrix
397 |     normalized_matrix = (matrix.T * l2_norm).T
398 |     
399 |     return normalized_matrix
400 |        
401 | def mtx_save(file_name, matrix):
402 |     """
403 |     Writes a sparse matrix a to Matrix Market file-like target.
404 |     
405 |     Parameters
406 |     ----------
407 |     file_name : target file name
408 |     matrix    : a sparse matrix
409 |     
410 |     Returns
411 |     -------
412 |     
413 |     """
414 |     scipy.io.mmwrite(file_name, matrix)
415 | 
416 | def encode_labels(labelsDF):
417 |     """
418 |     Encodes a string set of target classes to a Numpy array of label indices
419 |     
420 |     Parameters
421 |     ----------
422 |     labelsDF : a Pandas DataFrame or Numpy array containing the labels
423 |     
424 |     Returns
425 |     -------
426 |     An encoded Numpy array
427 |     
428 |     Ex:
429 |     
430 |     >>> A = np.array(["a", "a", "b", "a"])
431 |     >>> encode_labels(A)
432 |         array([0, 0, 1, 0], dtype=int8)
433 |         
434 |     """    
435 |     # Factorize the labels
436 |     labelsDF = pd.Categorical(labelsDF)
437 |     catLabelsDF = labelsDF.codes
438 | 
439 |     return catLabelsDF
440 | 
441 | def labels_save(file_name, labels):
442 |     """
443 |     Saves the target class labels to an external file.
444 |     
445 |     Parameters
446 |     ----------
447 |     file_name : target file name
448 |     labels    : a Numpy array containing the labels
449 |     
450 |     Returns
451 |     -------
452 |         
453 |     """       
454 |     labels.tofile(file_name, sep='\n')
455 | 
456 | def hash_vectors(tf_idf_matrix, vocabulary, N=8000):
457 |     """
458 |     Applies feature hashing / hashing trick to a sparse matrix. This method
459 |     turns features into indices in a vector or matrix. It works by applying a 
460 |     hash function to the features and using their hash values as indices 
461 |     directly, rather than looking the indices up in an associative array.
462 |     
463 |     Parameters
464 |     ----------
465 |     tf_idf_matrix : a sparse matrix of TFxIDF values
466 |     vocabulary    : vocabulary of the corpus
467 |     N             : size of the hased vector
468 |     
469 |     Returns
470 |     -------
471 |     A sparse matrix of size tf_idf_matrix.shape[0] x N, containing the hashed
472 |     features
473 |     
474 |     >>> tf_idf_matrix.shape
475 |         (6918, 1869)
476 | 
477 |     >>> hash_vectors(tf_idf_matrix, vocabulary, 100).shape
478 |         (6918, 100)
479 |             
480 |     """       
481 |     
482 |     # Make sure the input is a csr_matrix (wee need to access the sparse
483 |     # matrix elements directly )
484 |     if not isinstance(tf_idf_matrix, scipy.sparse.csr.csr_matrix):
485 |       print("WARN: Input %s is not a Compressed Sparse Row matrix. Converting...")      
486 |       tf_idf_matrix = tf_idf_matrix.tocsr()
487 | 
488 | 
489 |     row_count = tf_idf_matrix.shape[0]
490 |   
491 |     hashed_rows = []
492 |     hashed_cols = []
493 |     hashed_data = []
494 |   
495 |     # Iterate over the matrix rows
496 |     for row_index in range(row_count):
497 |       
498 |       # Get the current row indices
499 |       row = tf_idf_matrix.getrow(row_index)
500 |       col_indices = row.indices
501 |       
502 |       # Iterate over the columns
503 |       for col_index in range(len(col_indices)):
504 |           # Get the word and its corresponding TFxIDF value
505 |           tf_idf_value = tf_idf_matrix[row_index, col_indices[col_index]]
506 |           word = vocabulary[col_indices[col_index]]
507 |           
508 |           # Apply a hash function h to the features (e.g., words), then use 
509 |           # the hash values directly as feature indices and update the
510 |           # resulting vector at those indices
511 | 
512 |           h = hash(word)
513 |           hashed_rows.append(row_index)
514 |           hashed_cols.append(h % N)
515 |           hashed_data.append(tf_idf_value)
516 |       
517 |     # Create a new sparse matrix with the hashed features
518 |     hashed_features_matrix = scipy.sparse.csr_matrix((hashed_data, 
519 |                                                      (hashed_rows, hashed_cols)), 
520 |                                                    shape=(row_count, N))
521 |                                        
522 |     return hashed_features_matrix                                
523 | 
524 | 
525 | # Read a data set
526 | dataDF = pd.read_csv("data/SMSSpamCollection", 
527 |                      sep='\t', lineterminator='\n', names = ["Label", "Text"])
528 | 
529 | # Initialise a stemmer
530 | 
531 | #porter = nltk.stem.porter.PorterStemmer()
532 | #lancaster = nltk.stem.lancaster.LancasterStemmer()
533 | snowball = nltk.stem.snowball.EnglishStemmer()
534 | 
535 | # Apply stemming
536 | print("Stemming...")
537 | dataDF["Words"] = dataDF.apply(lambda row: extract_words(row['Text'], snowball), axis=1)
538 | 
539 | # Remove empty rows. Messages like ":)" which will get removed by the stemmer
540 | dataDF = dataDF[dataDF.astype(str)["Words"] != '[]']
541 | dataDF = dataDF.reset_index(drop=True)
542 | 
543 | # Build a vocabulary
544 | print("Building vocabulary...")
545 | vocabulary = build_vocabulary(dataDF["Words"])
546 | 
547 | # Get the TF vectors
548 | print("Forming the TF matrix...")
549 | tf_matrix = get_tf_vectors(vocabulary, dataDF["Words"])
550 | 
551 | # Get the IDF matrix
552 | print("Forming the IDF matrix...")
553 | idf_matrix = get_idfs(vocabulary, dataDF["Words"])
554 | 
555 | # Compute the TFxIDF values
556 | print("Computing the TFxIDF matrix...")
557 | tf_idf_matrix = (tf_matrix * idf_matrix)
558 | tf_idf_matrix = l2_normalized_matrix(tf_idf_matrix)
559 | 
560 | #tf_idf_matrix = hash_vectors(tf_idf_matrix, vocabulary, 125)
561 | 
562 | # Encode the labels
563 | print("Encoding labels...")
564 | labels = encode_labels(dataDF["Label"])
565 | 
566 | # Save the TFxIDF matrix and the corresponding values
567 | print("Saving features and labels...")
568 | mtx_save("data/training.mtx", tf_idf_matrix)
569 | labels_save("data/labels.csv", labels)
570 | 
571 | print("all done!")
572 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning Study Group
 2 | 
 3 | This repository contains materials from the London Machine Learning Study Group Meetups
 4 | 
 5 | The meetup page is available at [http://www.meetup.com/London-Machine-Learning-Study-Group](http://www.meetup.com/London-Machine-Learning-Study-Group).
 6 | 
 7 | (C) 2016 Nikolay Manchev, London Machine Learning Study Group
 8 | 
 9 | This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this license, visit [http://creativecommons.org/licenses/by/4.0](http://creativecommons.org/licenses/by/4.0).
10 | 


--------------------------------------------------------------------------------