├── Chapter01
    ├── chapter_01.ipynb
    └── data
    │   └── web_traffic.tsv
├── Chapter02
    ├── README.rst
    ├── chapter_02.ipynb
    ├── data
    │   └── seeds.tsv
    ├── load.py
    └── tests
    │   └── test_load.py
├── Chapter03
    ├── chapter_03.ipynb
    └── data
    │   ├── .gitignore
    │   └── download.sh
├── Chapter04
    ├── chapter_04.ipynb
    └── data
    │   └── download.sh
├── Chapter05
    └── chapter_05.ipynb
├── Chapter06
    ├── chapter_06.ipynb
    └── data
    │   └── toy
    │       ├── 01.txt
    │       ├── 02.txt
    │       ├── 03.txt
    │       ├── 04.txt
    │       └── 05.txt
├── Chapter07
    ├── README.rst
    ├── Recommendations.ipynb
    ├── apriori
    │   ├── .gitignore
    │   ├── apriori.py
    │   ├── apriori_example.py
    │   ├── apriori_naive.py
    │   └── download.sh
    ├── data
    │   ├── .gitignore
    │   └── download.sh
    ├── load_ml100k.py
    └── stacked.py
├── Chapter08
    └── chapter_08.ipynb
├── Chapter09
    ├── chapter_09.ipynb
    ├── data
    │   ├── corpus.csv
    │   ├── missing.tsv
    │   └── not_authorized.tsv
    └── twitterauth.py
├── Chapter10
    ├── README.rst
    ├── Topic modeling.ipynb
    ├── data
    │   ├── .gitignore
    │   ├── download_ap.sh
    │   ├── download_wp.sh
    │   └── preprocess-wikidata.sh
    ├── wikitopics_create.py
    └── wikitopics_create_hdp.py
├── Chapter11
    └── chapter_11.ipynb
├── Chapter12
    ├── Computer Vision.ipynb
    ├── README.rst
    ├── ch12_3rd
    │   └── chapter_12.ipynb
    ├── download.sh
    ├── forest.jpeg
    └── scene00.jpg
├── Chapter13
    ├── chapter_13.ipynb
    ├── simple_breakout.py
    └── tf_breakout.py
├── Chapter14
    ├── README.rst
    ├── chapter.py
    ├── features.py
    ├── image-classification.py
    ├── jugfile.py
    ├── run-image-classification.sh
    ├── run-jugfile.sh
    └── setup-aws.txt
├── LICENSE
├── README.md
├── SimpleImageDataset
    ├── building00.jpg
    ├── building01.jpg
    ├── building02.jpg
    ├── building03.jpg
    ├── building04.jpg
    ├── building05.jpg
    ├── building06.jpg
    ├── building07.jpg
    ├── building08.jpg
    ├── building09.jpg
    ├── building10.jpg
    ├── building11.jpg
    ├── building12.jpg
    ├── building13.jpg
    ├── building14.jpg
    ├── building15.jpg
    ├── building16.jpg
    ├── building17.jpg
    ├── building18.jpg
    ├── building19.jpg
    ├── building20.jpg
    ├── building21.jpg
    ├── building22.jpg
    ├── building23.jpg
    ├── building24.jpg
    ├── building25.jpg
    ├── building26.jpg
    ├── building27.jpg
    ├── building28.jpg
    ├── building29.jpg
    ├── scene00.jpg
    ├── scene01.jpg
    ├── scene02.jpg
    ├── scene03.jpg
    ├── scene04.jpg
    ├── scene05.jpg
    ├── scene06.jpg
    ├── scene07.jpg
    ├── scene08.jpg
    ├── scene09.jpg
    ├── scene10.jpg
    ├── scene11.jpg
    ├── scene12.jpg
    ├── scene13.jpg
    ├── scene14.jpg
    ├── scene15.jpg
    ├── scene16.jpg
    ├── scene17.jpg
    ├── scene18.jpg
    ├── scene19.jpg
    ├── scene20.jpg
    ├── scene21.jpg
    ├── scene22.jpg
    ├── scene23.jpg
    ├── scene24.jpg
    ├── scene25.jpg
    ├── scene26.jpg
    ├── scene27.jpg
    ├── scene28.jpg
    ├── scene29.jpg
    ├── text00.jpg
    ├── text01.jpg
    ├── text02.jpg
    ├── text03.jpg
    ├── text04.jpg
    ├── text05.jpg
    ├── text06.jpg
    ├── text07.jpg
    ├── text08.jpg
    ├── text09.jpg
    ├── text10.jpg
    ├── text11.jpg
    ├── text12.jpg
    ├── text13.jpg
    ├── text14.jpg
    ├── text15.jpg
    ├── text16.jpg
    ├── text17.jpg
    ├── text18.jpg
    ├── text19.jpg
    ├── text20.jpg
    ├── text21.jpg
    ├── text22.jpg
    ├── text23.jpg
    ├── text24.jpg
    ├── text25.jpg
    ├── text26.jpg
    ├── text27.jpg
    ├── text28.jpg
    └── text29.jpg
└── environment.yml


/Chapter01/data/web_traffic.tsv:
--------------------------------------------------------------------------------
  1 | 1.000000	2273.331055
  2 | 2.000000	1657.255493
  3 | 3.000000	nan
  4 | 4.000000	1366.846436
  5 | 5.000000	1489.234375
  6 | 6.000000	1338.020020
  7 | 7.000000	1884.647339
  8 | 8.000000	2284.754150
  9 | 9.000000	1335.810913
 10 | 10.000000	1025.832397
 11 | 11.000000	1140.241089
 12 | 12.000000	1478.341797
 13 | 13.000000	1204.218384
 14 | 14.000000	1312.506348
 15 | 15.000000	1300.218872
 16 | 16.000000	1495.334717
 17 | 17.000000	1161.070801
 18 | 18.000000	1366.701904
 19 | 19.000000	1273.441162
 20 | 20.000000	1246.935425
 21 | 21.000000	1072.582886
 22 | 22.000000	1877.628296
 23 | 23.000000	1403.939697
 24 | 24.000000	nan
 25 | 25.000000	926.635559
 26 | 26.000000	1534.334595
 27 | 27.000000	2105.287109
 28 | 28.000000	2114.336182
 29 | 29.000000	1994.525146
 30 | 30.000000	1046.091919
 31 | 31.000000	2091.849854
 32 | 32.000000	2227.968018
 33 | 33.000000	1414.702515
 34 | 34.000000	1719.032471
 35 | 35.000000	1722.046875
 36 | 36.000000	1293.547974
 37 | 37.000000	1840.270752
 38 | 38.000000	2542.300781
 39 | 39.000000	1609.983643
 40 | 40.000000	2456.552246
 41 | 41.000000	1929.899170
 42 | 42.000000	1767.186646
 43 | 43.000000	1204.809082
 44 | 44.000000	1762.485840
 45 | 45.000000	1724.805054
 46 | 46.000000	2161.871338
 47 | 47.000000	809.148987
 48 | 48.000000	1323.292603
 49 | 49.000000	nan
 50 | 50.000000	1810.368774
 51 | 51.000000	1934.512695
 52 | 52.000000	1352.385010
 53 | 53.000000	2014.328369
 54 | 54.000000	1208.587036
 55 | 55.000000	2171.869629
 56 | 56.000000	1701.173584
 57 | 57.000000	1900.717651
 58 | 58.000000	1758.676025
 59 | 59.000000	1477.506836
 60 | 60.000000	1922.072266
 61 | 61.000000	1972.815430
 62 | 62.000000	1811.206665
 63 | 63.000000	1367.138306
 64 | 64.000000	1775.942993
 65 | 65.000000	1689.120850
 66 | 66.000000	1707.929565
 67 | 67.000000	1354.767578
 68 | 68.000000	1318.591553
 69 | 69.000000	1513.624146
 70 | 70.000000	2430.133789
 71 | 71.000000	1788.733276
 72 | 72.000000	1381.874512
 73 | 73.000000	1358.690796
 74 | 74.000000	991.249329
 75 | 75.000000	1586.527954
 76 | 76.000000	2058.635498
 77 | 77.000000	1692.005859
 78 | 78.000000	1459.202759
 79 | 79.000000	1202.182495
 80 | 80.000000	1950.823730
 81 | 81.000000	1494.491699
 82 | 82.000000	1654.861328
 83 | 83.000000	1218.084351
 84 | 84.000000	1457.957764
 85 | 85.000000	1179.684082
 86 | 86.000000	1484.483154
 87 | 87.000000	2731.174561
 88 | 88.000000	1414.573853
 89 | 89.000000	1061.369995
 90 | 90.000000	1573.748169
 91 | 91.000000	1260.964722
 92 | 92.000000	1215.403687
 93 | 93.000000	981.535828
 94 | 94.000000	1345.459351
 95 | 95.000000	2158.874512
 96 | 96.000000	nan
 97 | 97.000000	730.229004
 98 | 98.000000	1033.958618
 99 | 99.000000	1627.994995
100 | 100.000000	1155.129639
101 | 101.000000	1305.006836
102 | 102.000000	1444.623901
103 | 103.000000	2242.751709
104 | 104.000000	1843.219116
105 | 105.000000	1211.218140
106 | 106.000000	1384.472168
107 | 107.000000	1313.780762
108 | 108.000000	1509.269897
109 | 109.000000	1796.398926
110 | 110.000000	1265.616333
111 | 111.000000	1089.800781
112 | 112.000000	2159.838135
113 | 113.000000	1166.384277
114 | 114.000000	1391.697388
115 | 115.000000	1445.436523
116 | 116.000000	1196.357056
117 | 117.000000	1049.317017
118 | 118.000000	1999.745605
119 | 119.000000	473.342102
120 | 120.000000	1285.387329
121 | 121.000000	1737.291260
122 | 122.000000	1534.551758
123 | 123.000000	2636.690674
124 | 124.000000	1372.776123
125 | 125.000000	1325.509033
126 | 126.000000	833.302063
127 | 127.000000	1199.291992
128 | 128.000000	2431.282959
129 | 129.000000	1739.882080
130 | 130.000000	2121.373779
131 | 131.000000	1726.600342
132 | 132.000000	1343.868774
133 | 133.000000	1072.934570
134 | 134.000000	1387.351807
135 | 135.000000	1054.316284
136 | 136.000000	1051.666626
137 | 137.000000	1270.661377
138 | 138.000000	1857.948853
139 | 139.000000	1436.369629
140 | 140.000000	2016.855469
141 | 141.000000	1352.831787
142 | 142.000000	909.600891
143 | 143.000000	1761.136353
144 | 144.000000	1009.373230
145 | 145.000000	2035.223267
146 | 146.000000	1534.073975
147 | 147.000000	1708.339966
148 | 148.000000	734.669800
149 | 149.000000	1456.019043
150 | 150.000000	1332.946411
151 | 151.000000	1605.986450
152 | 152.000000	1065.177856
153 | 153.000000	1291.167480
154 | 154.000000	1370.269043
155 | 155.000000	nan
156 | 156.000000	1928.732788
157 | 157.000000	2249.301270
158 | 158.000000	988.290894
159 | 159.000000	1024.199097
160 | 160.000000	875.135132
161 | 161.000000	1568.285400
162 | 162.000000	1031.664551
163 | 163.000000	1079.630859
164 | 164.000000	1086.948853
165 | 165.000000	1152.780884
166 | 166.000000	961.387634
167 | 167.000000	1232.227417
168 | 168.000000	2189.118408
169 | 169.000000	1181.132080
170 | 170.000000	1477.397705
171 | 171.000000	1613.063110
172 | 172.000000	922.071716
173 | 173.000000	2432.531006
174 | 174.000000	1651.096313
175 | 175.000000	1078.927734
176 | 176.000000	825.445740
177 | 177.000000	1579.604736
178 | 178.000000	1873.424316
179 | 179.000000	1671.580200
180 | 180.000000	2454.900146
181 | 181.000000	nan
182 | 182.000000	nan
183 | 183.000000	1620.557739
184 | 184.000000	896.071289
185 | 185.000000	1950.104126
186 | 186.000000	2299.738281
187 | 187.000000	2165.413818
188 | 188.000000	1108.689819
189 | 189.000000	1732.473877
190 | 190.000000	1602.138550
191 | 191.000000	1685.260254
192 | 192.000000	2026.701294
193 | 193.000000	1690.662964
194 | 194.000000	1737.694214
195 | 195.000000	1475.258423
196 | 196.000000	1770.715698
197 | 197.000000	1349.187500
198 | 198.000000	1571.474609
199 | 199.000000	1862.707397
200 | 200.000000	1459.782349
201 | 201.000000	2284.336426
202 | 202.000000	1553.837158
203 | 203.000000	2323.653320
204 | 204.000000	1204.110352
205 | 205.000000	1769.132324
206 | 206.000000	2186.001709
207 | 207.000000	1331.175537
208 | 208.000000	1781.712402
209 | 209.000000	1243.196533
210 | 210.000000	1287.143433
211 | 211.000000	nan
212 | 212.000000	1502.286255
213 | 213.000000	877.458313
214 | 214.000000	1522.805054
215 | 215.000000	2611.905029
216 | 216.000000	1949.547485
217 | 217.000000	1707.867432
218 | 218.000000	1336.154785
219 | 219.000000	2212.902832
220 | 220.000000	1358.864380
221 | 221.000000	2502.499023
222 | 222.000000	1765.352539
223 | 223.000000	1529.414673
224 | 224.000000	1422.890625
225 | 225.000000	1950.468262
226 | 226.000000	2156.668945
227 | 227.000000	1504.507324
228 | 228.000000	1659.369995
229 | 229.000000	1033.489746
230 | 230.000000	1538.519165
231 | 231.000000	1345.894897
232 | 232.000000	2022.561157
233 | 233.000000	2036.099121
234 | 234.000000	2111.207275
235 | 235.000000	1589.440796
236 | 236.000000	1667.526733
237 | 237.000000	1064.860840
238 | 238.000000	1458.587402
239 | 239.000000	2401.041992
240 | 240.000000	1449.993530
241 | 241.000000	2407.700684
242 | 242.000000	1832.315430
243 | 243.000000	1424.621704
244 | 244.000000	1756.471436
245 | 245.000000	1642.072632
246 | 246.000000	1429.027832
247 | 247.000000	1928.955200
248 | 248.000000	1620.687744
249 | 249.000000	1362.290161
250 | 250.000000	1275.254883
251 | 251.000000	1301.666138
252 | 252.000000	998.833984
253 | 253.000000	1163.223877
254 | 254.000000	1480.306641
255 | 255.000000	2131.771240
256 | 256.000000	1833.486206
257 | 257.000000	1161.478271
258 | 258.000000	1168.261841
259 | 259.000000	1569.966431
260 | 260.000000	1675.275146
261 | 261.000000	966.771240
262 | 262.000000	1395.518433
263 | 263.000000	1638.024780
264 | 264.000000	1712.951782
265 | 265.000000	1799.802979
266 | 266.000000	1916.816895
267 | 267.000000	1895.225952
268 | 268.000000	1008.570923
269 | 269.000000	1002.869019
270 | 270.000000	1962.243896
271 | 271.000000	1729.660400
272 | 272.000000	732.257080
273 | 273.000000	2166.750244
274 | 274.000000	1060.113159
275 | 275.000000	1519.845337
276 | 276.000000	1708.907227
277 | 277.000000	1227.915405
278 | 278.000000	1085.683716
279 | 279.000000	1045.782104
280 | 280.000000	1720.696899
281 | 281.000000	1494.705444
282 | 282.000000	961.153259
283 | 283.000000	1420.741089
284 | 284.000000	1318.101196
285 | 285.000000	740.344238
286 | 286.000000	879.328247
287 | 287.000000	1358.047974
288 | 288.000000	2318.087402
289 | 289.000000	1545.019775
290 | 290.000000	1582.846069
291 | 291.000000	1693.926636
292 | 292.000000	1152.875244
293 | 293.000000	1469.117554
294 | 294.000000	2005.669189
295 | 295.000000	1113.713867
296 | 296.000000	1281.609741
297 | 297.000000	1500.906860
298 | 298.000000	1409.276733
299 | 299.000000	943.180420
300 | 300.000000	791.694214
301 | 301.000000	704.541565
302 | 302.000000	1585.458862
303 | 303.000000	1004.198181
304 | 304.000000	796.337952
305 | 305.000000	1000.802917
306 | 306.000000	2156.751465
307 | 307.000000	638.728699
308 | 308.000000	1391.960815
309 | 309.000000	1644.898071
310 | 310.000000	1398.569580
311 | 311.000000	967.325500
312 | 312.000000	1578.804077
313 | 313.000000	1068.719360
314 | 314.000000	1418.943726
315 | 315.000000	1784.473877
316 | 316.000000	1952.727905
317 | 317.000000	997.095337
318 | 318.000000	1485.097778
319 | 319.000000	1419.496948
320 | 320.000000	1534.019897
321 | 321.000000	1633.627075
322 | 322.000000	1012.951843
323 | 323.000000	2085.274414
324 | 324.000000	3101.601562
325 | 325.000000	1858.955200
326 | 326.000000	983.584900
327 | 327.000000	2169.784180
328 | 328.000000	2086.046875
329 | 329.000000	2204.625488
330 | 330.000000	1578.105591
331 | 331.000000	1526.881104
332 | 332.000000	1725.510986
333 | 333.000000	937.253723
334 | 334.000000	1678.458130
335 | 335.000000	1572.530029
336 | 336.000000	1188.498413
337 | 337.000000	1535.775879
338 | 338.000000	1335.063721
339 | 339.000000	1702.118652
340 | 340.000000	1927.334839
341 | 341.000000	1652.505371
342 | 342.000000	1492.118774
343 | 343.000000	1801.889038
344 | 344.000000	1977.426025
345 | 345.000000	1246.210693
346 | 346.000000	2142.636719
347 | 347.000000	1352.310547
348 | 348.000000	1507.071777
349 | 349.000000	1378.349976
350 | 350.000000	2387.540283
351 | 351.000000	1306.161377
352 | 352.000000	1425.368164
353 | 353.000000	1882.434814
354 | 354.000000	2395.280762
355 | 355.000000	1600.453857
356 | 356.000000	1445.337036
357 | 357.000000	1985.960449
358 | 358.000000	1160.152100
359 | 359.000000	2099.111816
360 | 360.000000	1541.235962
361 | 361.000000	1412.315308
362 | 362.000000	2116.764404
363 | 363.000000	1279.255859
364 | 364.000000	2040.119995
365 | 365.000000	2022.776611
366 | 366.000000	1902.603638
367 | 367.000000	1140.585327
368 | 368.000000	1904.104980
369 | 369.000000	2075.255127
370 | 370.000000	3662.633301
371 | 371.000000	1800.689453
372 | 372.000000	2432.671631
373 | 373.000000	1499.937500
374 | 374.000000	1041.650879
375 | 375.000000	1826.106323
376 | 376.000000	1734.499390
377 | 377.000000	1729.217041
378 | 378.000000	1077.025391
379 | 379.000000	1599.761108
380 | 380.000000	1147.693237
381 | 381.000000	1535.584473
382 | 382.000000	1515.563477
383 | 383.000000	1541.500366
384 | 384.000000	1446.428467
385 | 385.000000	1249.276855
386 | 386.000000	1711.814209
387 | 387.000000	2115.800293
388 | 388.000000	1817.904053
389 | 389.000000	1761.030518
390 | 390.000000	2174.820312
391 | 391.000000	1793.098755
392 | 392.000000	1711.772339
393 | 393.000000	1931.489136
394 | 394.000000	1804.897095
395 | 395.000000	1881.685181
396 | 396.000000	2290.734131
397 | 397.000000	1840.967407
398 | 398.000000	1642.179443
399 | 399.000000	1375.341309
400 | 400.000000	1524.707642
401 | 401.000000	1361.021362
402 | 402.000000	1304.565796
403 | 403.000000	1655.716919
404 | 404.000000	1930.118652
405 | 405.000000	1559.966187
406 | 406.000000	1737.071411
407 | 407.000000	1753.080200
408 | 408.000000	1043.204834
409 | 409.000000	1202.575317
410 | 410.000000	1499.095825
411 | 411.000000	2102.189453
412 | 412.000000	2390.331543
413 | 413.000000	1327.265259
414 | 414.000000	1286.826416
415 | 415.000000	1414.089966
416 | 416.000000	1971.299805
417 | 417.000000	1243.213623
418 | 418.000000	1922.367920
419 | 419.000000	1163.862671
420 | 420.000000	1651.475464
421 | 421.000000	1301.186523
422 | 422.000000	1849.299316
423 | 423.000000	1799.256348
424 | 424.000000	1703.327393
425 | 425.000000	1627.862061
426 | 426.000000	1522.336914
427 | 427.000000	1408.989502
428 | 428.000000	2630.947754
429 | 429.000000	1648.483032
430 | 430.000000	1536.905884
431 | 431.000000	1433.750366
432 | 432.000000	1748.919678
433 | 433.000000	1274.653442
434 | 434.000000	1658.341675
435 | 435.000000	1580.411011
436 | 436.000000	1607.185913
437 | 437.000000	1381.490356
438 | 438.000000	1322.875366
439 | 439.000000	1168.433716
440 | 440.000000	1067.946533
441 | 441.000000	1890.483154
442 | 442.000000	1658.906250
443 | 443.000000	1064.380005
444 | 444.000000	868.906921
445 | 445.000000	1287.892456
446 | 446.000000	2167.587646
447 | 447.000000	1383.131226
448 | 448.000000	1417.915161
449 | 449.000000	2017.528442
450 | 450.000000	1777.718750
451 | 451.000000	1596.717407
452 | 452.000000	1421.328735
453 | 453.000000	1324.599243
454 | 454.000000	1899.612427
455 | 455.000000	1513.721191
456 | 456.000000	1683.056152
457 | 457.000000	1369.445557
458 | 458.000000	1265.907593
459 | 459.000000	1035.090088
460 | 460.000000	2046.150024
461 | 461.000000	1498.508667
462 | 462.000000	1608.036011
463 | 463.000000	1330.513794
464 | 464.000000	1132.405518
465 | 465.000000	1237.636108
466 | 466.000000	2298.409180
467 | 467.000000	1241.165283
468 | 468.000000	2039.370850
469 | 469.000000	1177.535522
470 | 470.000000	1221.716675
471 | 471.000000	1745.758301
472 | 472.000000	1917.593384
473 | 473.000000	1165.316650
474 | 474.000000	861.017334
475 | 475.000000	1830.155396
476 | 476.000000	1170.794067
477 | 477.000000	1230.492554
478 | 478.000000	1274.034912
479 | 479.000000	1899.829224
480 | 480.000000	1867.080078
481 | 481.000000	1609.885742
482 | 482.000000	1963.965942
483 | 483.000000	1669.859253
484 | 484.000000	1292.068359
485 | 485.000000	1751.724243
486 | 486.000000	1335.341431
487 | 487.000000	1323.624023
488 | 488.000000	1651.736572
489 | 489.000000	2087.386963
490 | 490.000000	1438.429565
491 | 491.000000	1731.568237
492 | 492.000000	1949.754028
493 | 493.000000	2203.080078
494 | 494.000000	2261.097168
495 | 495.000000	1580.708740
496 | 496.000000	1562.130615
497 | 497.000000	1859.436646
498 | 498.000000	1793.891113
499 | 499.000000	1001.056335
500 | 500.000000	1912.867676
501 | 501.000000	2475.812744
502 | 502.000000	2105.730469
503 | 503.000000	1732.766724
504 | 504.000000	2310.781738
505 | 505.000000	1875.141357
506 | 506.000000	1817.766724
507 | 507.000000	1097.887329
508 | 508.000000	2017.046753
509 | 509.000000	2242.245361
510 | 510.000000	2773.306641
511 | 511.000000	1321.350464
512 | 512.000000	2739.834229
513 | 513.000000	1389.539062
514 | 514.000000	2251.552490
515 | 515.000000	2169.031006
516 | 516.000000	2029.887329
517 | 517.000000	1591.404053
518 | 518.000000	2343.211182
519 | 519.000000	2012.653320
520 | 520.000000	1614.831421
521 | 521.000000	1672.772339
522 | 522.000000	2000.651978
523 | 523.000000	2896.021973
524 | 524.000000	2637.968750
525 | 525.000000	1884.990601
526 | 526.000000	2405.921143
527 | 527.000000	2257.248779
528 | 528.000000	1961.182495
529 | 529.000000	1849.048218
530 | 530.000000	1559.181519
531 | 531.000000	1560.701660
532 | 532.000000	2041.094482
533 | 533.000000	1998.698853
534 | 534.000000	2052.123291
535 | 535.000000	1803.678223
536 | 536.000000	1970.451904
537 | 537.000000	1939.131104
538 | 538.000000	2082.247803
539 | 539.000000	1409.396606
540 | 540.000000	2733.470947
541 | 541.000000	2221.219238
542 | 542.000000	2331.755371
543 | 543.000000	2438.380615
544 | 544.000000	1917.306030
545 | 545.000000	1988.092041
546 | 546.000000	2145.496094
547 | 547.000000	2278.642578
548 | 548.000000	2159.122803
549 | 549.000000	2627.566895
550 | 550.000000	1537.308228
551 | 551.000000	1559.624634
552 | 552.000000	3045.290527
553 | 553.000000	2246.550781
554 | 554.000000	2384.003906
555 | 555.000000	2010.736084
556 | 556.000000	1972.834229
557 | 557.000000	2146.448242
558 | 558.000000	2102.908203
559 | 559.000000	2329.290527
560 | 560.000000	1733.708252
561 | 561.000000	2641.247070
562 | 562.000000	1993.119873
563 | 563.000000	2200.874268
564 | 564.000000	2394.948975
565 | 565.000000	2191.825684
566 | 566.000000	2496.806396
567 | 567.000000	2391.000732
568 | 568.000000	2436.711182
569 | 569.000000	1738.463013
570 | 570.000000	2054.031982
571 | 571.000000	2036.267822
572 | 572.000000	1836.029175
573 | 573.000000	3007.133545
574 | 574.000000	1429.928833
575 | 575.000000	2216.402588
576 | 576.000000	1904.106812
577 | 577.000000	2285.255371
578 | 578.000000	1994.338013
579 | 579.000000	2059.176758
580 | 580.000000	2171.187012
581 | 581.000000	1982.419312
582 | 582.000000	2099.515381
583 | 583.000000	2507.017334
584 | 584.000000	1913.215332
585 | 585.000000	2561.822021
586 | 586.000000	1302.399536
587 | 587.000000	1860.632202
588 | 588.000000	2287.544434
589 | 589.000000	1734.690063
590 | 590.000000	2156.122559
591 | 591.000000	2402.931885
592 | 592.000000	2404.802734
593 | 593.000000	3244.411377
594 | 594.000000	1978.216064
595 | 595.000000	2411.874023
596 | 596.000000	2007.088379
597 | 597.000000	2014.276733
598 | 598.000000	1565.664917
599 | 599.000000	2022.515991
600 | 600.000000	1772.145020
601 | 601.000000	2583.096436
602 | 602.000000	1844.953979
603 | 603.000000	1621.984863
604 | 604.000000	1770.774658
605 | 605.000000	2020.567627
606 | 606.000000	2355.657471
607 | 607.000000	1996.695801
608 | 608.000000	2127.384277
609 | 609.000000	2114.290771
610 | 610.000000	1935.230835
611 | 611.000000	2125.324707
612 | 612.000000	1787.222656
613 | 613.000000	2276.241211
614 | 614.000000	2978.175049
615 | 615.000000	2542.808594
616 | 616.000000	2113.446289
617 | 617.000000	1968.088379
618 | 618.000000	2368.984619
619 | 619.000000	2241.410400
620 | 620.000000	2073.782227
621 | 621.000000	2121.806152
622 | 622.000000	2167.166504
623 | 623.000000	2575.725342
624 | 624.000000	2500.377930
625 | 625.000000	2181.297363
626 | 626.000000	1967.151733
627 | 627.000000	2072.927246
628 | 628.000000	2027.206543
629 | 629.000000	2345.307617
630 | 630.000000	2024.258789
631 | 631.000000	2248.471924
632 | 632.000000	2455.304688
633 | 633.000000	2265.372070
634 | 634.000000	2424.891113
635 | 635.000000	2852.068115
636 | 636.000000	1997.479370
637 | 637.000000	3298.773438
638 | 638.000000	2367.100342
639 | 639.000000	1853.859985
640 | 640.000000	2896.925537
641 | 641.000000	2537.741943
642 | 642.000000	2300.602051
643 | 643.000000	2849.175781
644 | 644.000000	2975.004150
645 | 645.000000	1931.866577
646 | 646.000000	3009.414307
647 | 647.000000	2538.314941
648 | 648.000000	2783.420410
649 | 649.000000	2490.887939
650 | 650.000000	2407.929199
651 | 651.000000	2003.909668
652 | 652.000000	2752.269531
653 | 653.000000	2576.723145
654 | 654.000000	2817.946289
655 | 655.000000	2683.553467
656 | 656.000000	2628.284424
657 | 657.000000	2995.034912
658 | 658.000000	2303.661621
659 | 659.000000	2772.099609
660 | 660.000000	2606.699463
661 | 661.000000	2703.995361
662 | 662.000000	2840.417725
663 | 663.000000	3256.972412
664 | 664.000000	3024.288574
665 | 665.000000	2684.777588
666 | 666.000000	3006.578857
667 | 667.000000	3310.774902
668 | 668.000000	3183.121826
669 | 669.000000	2523.464600
670 | 670.000000	3401.200928
671 | 671.000000	2839.752686
672 | 672.000000	3193.740479
673 | 673.000000	2970.214355
674 | 674.000000	3338.654541
675 | 675.000000	3464.786621
676 | 676.000000	3265.989502
677 | 677.000000	3536.363037
678 | 678.000000	3090.552734
679 | 679.000000	2936.692627
680 | 680.000000	3009.171387
681 | 681.000000	4000.848389
682 | 682.000000	3490.038086
683 | 683.000000	2815.461914
684 | 684.000000	3383.949463
685 | 685.000000	2902.276611
686 | 686.000000	4261.487793
687 | 687.000000	3787.093262
688 | 688.000000	4140.599121
689 | 689.000000	3589.932617
690 | 690.000000	3345.195801
691 | 691.000000	3119.029297
692 | 692.000000	3456.691406
693 | 693.000000	4152.050293
694 | 694.000000	3828.693115
695 | 695.000000	3993.070557
696 | 696.000000	4668.495117
697 | 697.000000	3303.204834
698 | 698.000000	3932.197998
699 | 699.000000	4497.727539
700 | 700.000000	3402.736572
701 | 701.000000	3674.006592
702 | 702.000000	3551.005127
703 | 703.000000	4231.209961
704 | 704.000000	3806.804443
705 | 705.000000	3354.349121
706 | 706.000000	3603.937988
707 | 707.000000	4015.734131
708 | 708.000000	3550.420166
709 | 709.000000	3318.288818
710 | 710.000000	3933.817627
711 | 711.000000	3597.578125
712 | 712.000000	5290.647949
713 | 713.000000	3563.370850
714 | 714.000000	3991.379395
715 | 715.000000	3890.932861
716 | 716.000000	3637.689453
717 | 717.000000	3800.525146
718 | 718.000000	4190.283203
719 | 719.000000	5249.075195
720 | 720.000000	4178.081543
721 | 721.000000	4830.328125
722 | 722.000000	4347.202637
723 | 723.000000	4226.013672
724 | 724.000000	4813.762695
725 | 725.000000	3998.185547
726 | 726.000000	4358.066406
727 | 727.000000	4323.617188
728 | 728.000000	4157.835938
729 | 729.000000	4630.654297
730 | 730.000000	4415.905273
731 | 731.000000	4411.992188
732 | 732.000000	4725.586426
733 | 733.000000	4364.381348
734 | 734.000000	4800.028809
735 | 735.000000	4749.926758
736 | 736.000000	5144.264160
737 | 737.000000	4907.322754
738 | 738.000000	4310.609375
739 | 739.000000	4971.517578
740 | 740.000000	4815.629395
741 | 741.000000	5393.541992
742 | 742.000000	5906.814941
743 | 743.000000	4883.022461
744 | 


--------------------------------------------------------------------------------
/Chapter02/README.rst:
--------------------------------------------------------------------------------
 1 | =========
 2 | Chapter 2
 3 | =========
 4 | 
 5 | Support code for *Chapter 2: Learning How to Classify with Real-world
 6 | Examples*. The directory data contains the seeds dataset, originally downloaded
 7 | from https://archive.ics.uci.edu/ml/datasets/seeds
 8 | 
 9 | chapter_02.py
10 |     The code from the book (with a few extras)
11 | load.py
12 |     Code to load the seeds data
13 | 
14 | 


--------------------------------------------------------------------------------
/Chapter02/data/seeds.tsv:
--------------------------------------------------------------------------------
  1 | 15.26	14.84	0.871	5.763	3.312	2.221	5.22	Kama
  2 | 14.88	14.57	0.8811	5.554	3.333	1.018	4.956	Kama
  3 | 14.29	14.09	0.905	5.291	3.337	2.699	4.825	Kama
  4 | 13.84	13.94	0.8955	5.324	3.379	2.259	4.805	Kama
  5 | 16.14	14.99	0.9034	5.658	3.562	1.355	5.175	Kama
  6 | 14.38	14.21	0.8951	5.386	3.312	2.462	4.956	Kama
  7 | 14.69	14.49	0.8799	5.563	3.259	3.586	5.219	Kama
  8 | 14.11	14.1	0.8911	5.42	3.302	2.7	5.0	Kama
  9 | 16.63	15.46	0.8747	6.053	3.465	2.04	5.877	Kama
 10 | 16.44	15.25	0.888	5.884	3.505	1.969	5.533	Kama
 11 | 15.26	14.85	0.8696	5.714	3.242	4.543	5.314	Kama
 12 | 14.03	14.16	0.8796	5.438	3.201	1.717	5.001	Kama
 13 | 13.89	14.02	0.888	5.439	3.199	3.986	4.738	Kama
 14 | 13.78	14.06	0.8759	5.479	3.156	3.136	4.872	Kama
 15 | 13.74	14.05	0.8744	5.482	3.114	2.932	4.825	Kama
 16 | 14.59	14.28	0.8993	5.351	3.333	4.185	4.781	Kama
 17 | 13.99	13.83	0.9183	5.119	3.383	5.234	4.781	Kama
 18 | 15.69	14.75	0.9058	5.527	3.514	1.599	5.046	Kama
 19 | 14.7	14.21	0.9153	5.205	3.466	1.767	4.649	Kama
 20 | 12.72	13.57	0.8686	5.226	3.049	4.102	4.914	Kama
 21 | 14.16	14.4	0.8584	5.658	3.129	3.072	5.176	Kama
 22 | 14.11	14.26	0.8722	5.52	3.168	2.688	5.219	Kama
 23 | 15.88	14.9	0.8988	5.618	3.507	0.7651	5.091	Kama
 24 | 12.08	13.23	0.8664	5.099	2.936	1.415	4.961	Kama
 25 | 15.01	14.76	0.8657	5.789	3.245	1.791	5.001	Kama
 26 | 16.19	15.16	0.8849	5.833	3.421	0.903	5.307	Kama
 27 | 13.02	13.76	0.8641	5.395	3.026	3.373	4.825	Kama
 28 | 12.74	13.67	0.8564	5.395	2.956	2.504	4.869	Kama
 29 | 14.11	14.18	0.882	5.541	3.221	2.754	5.038	Kama
 30 | 13.45	14.02	0.8604	5.516	3.065	3.531	5.097	Kama
 31 | 13.16	13.82	0.8662	5.454	2.975	0.8551	5.056	Kama
 32 | 15.49	14.94	0.8724	5.757	3.371	3.412	5.228	Kama
 33 | 14.09	14.41	0.8529	5.717	3.186	3.92	5.299	Kama
 34 | 13.94	14.17	0.8728	5.585	3.15	2.124	5.012	Kama
 35 | 15.05	14.68	0.8779	5.712	3.328	2.129	5.36	Kama
 36 | 16.12	15.0	0.9	5.709	3.485	2.27	5.443	Kama
 37 | 16.2	15.27	0.8734	5.826	3.464	2.823	5.527	Kama
 38 | 17.08	15.38	0.9079	5.832	3.683	2.956	5.484	Kama
 39 | 14.8	14.52	0.8823	5.656	3.288	3.112	5.309	Kama
 40 | 14.28	14.17	0.8944	5.397	3.298	6.685	5.001	Kama
 41 | 13.54	13.85	0.8871	5.348	3.156	2.587	5.178	Kama
 42 | 13.5	13.85	0.8852	5.351	3.158	2.249	5.176	Kama
 43 | 13.16	13.55	0.9009	5.138	3.201	2.461	4.783	Kama
 44 | 15.5	14.86	0.882	5.877	3.396	4.711	5.528	Kama
 45 | 15.11	14.54	0.8986	5.579	3.462	3.128	5.18	Kama
 46 | 13.8	14.04	0.8794	5.376	3.155	1.56	4.961	Kama
 47 | 15.36	14.76	0.8861	5.701	3.393	1.367	5.132	Kama
 48 | 14.99	14.56	0.8883	5.57	3.377	2.958	5.175	Kama
 49 | 14.79	14.52	0.8819	5.545	3.291	2.704	5.111	Kama
 50 | 14.86	14.67	0.8676	5.678	3.258	2.129	5.351	Kama
 51 | 14.43	14.4	0.8751	5.585	3.272	3.975	5.144	Kama
 52 | 15.78	14.91	0.8923	5.674	3.434	5.593	5.136	Kama
 53 | 14.49	14.61	0.8538	5.715	3.113	4.116	5.396	Kama
 54 | 14.33	14.28	0.8831	5.504	3.199	3.328	5.224	Kama
 55 | 14.52	14.6	0.8557	5.741	3.113	1.481	5.487	Kama
 56 | 15.03	14.77	0.8658	5.702	3.212	1.933	5.439	Kama
 57 | 14.46	14.35	0.8818	5.388	3.377	2.802	5.044	Kama
 58 | 14.92	14.43	0.9006	5.384	3.412	1.142	5.088	Kama
 59 | 15.38	14.77	0.8857	5.662	3.419	1.999	5.222	Kama
 60 | 12.11	13.47	0.8392	5.159	3.032	1.502	4.519	Kama
 61 | 11.42	12.86	0.8683	5.008	2.85	2.7	4.607	Kama
 62 | 11.23	12.63	0.884	4.902	2.879	2.269	4.703	Kama
 63 | 12.36	13.19	0.8923	5.076	3.042	3.22	4.605	Kama
 64 | 13.22	13.84	0.868	5.395	3.07	4.157	5.088	Kama
 65 | 12.78	13.57	0.8716	5.262	3.026	1.176	4.782	Kama
 66 | 12.88	13.5	0.8879	5.139	3.119	2.352	4.607	Kama
 67 | 14.34	14.37	0.8726	5.63	3.19	1.313	5.15	Kama
 68 | 14.01	14.29	0.8625	5.609	3.158	2.217	5.132	Kama
 69 | 14.37	14.39	0.8726	5.569	3.153	1.464	5.3	Kama
 70 | 12.73	13.75	0.8458	5.412	2.882	3.533	5.067	Kama
 71 | 17.63	15.98	0.8673	6.191	3.561	4.076	6.06	Rosa
 72 | 16.84	15.67	0.8623	5.998	3.484	4.675	5.877	Rosa
 73 | 17.26	15.73	0.8763	5.978	3.594	4.539	5.791	Rosa
 74 | 19.11	16.26	0.9081	6.154	3.93	2.936	6.079	Rosa
 75 | 16.82	15.51	0.8786	6.017	3.486	4.004	5.841	Rosa
 76 | 16.77	15.62	0.8638	5.927	3.438	4.92	5.795	Rosa
 77 | 17.32	15.91	0.8599	6.064	3.403	3.824	5.922	Rosa
 78 | 20.71	17.23	0.8763	6.579	3.814	4.451	6.451	Rosa
 79 | 18.94	16.49	0.875	6.445	3.639	5.064	6.362	Rosa
 80 | 17.12	15.55	0.8892	5.85	3.566	2.858	5.746	Rosa
 81 | 16.53	15.34	0.8823	5.875	3.467	5.532	5.88	Rosa
 82 | 18.72	16.19	0.8977	6.006	3.857	5.324	5.879	Rosa
 83 | 20.2	16.89	0.8894	6.285	3.864	5.173	6.187	Rosa
 84 | 19.57	16.74	0.8779	6.384	3.772	1.472	6.273	Rosa
 85 | 19.51	16.71	0.878	6.366	3.801	2.962	6.185	Rosa
 86 | 18.27	16.09	0.887	6.173	3.651	2.443	6.197	Rosa
 87 | 18.88	16.26	0.8969	6.084	3.764	1.649	6.109	Rosa
 88 | 18.98	16.66	0.859	6.549	3.67	3.691	6.498	Rosa
 89 | 21.18	17.21	0.8989	6.573	4.033	5.78	6.231	Rosa
 90 | 20.88	17.05	0.9031	6.45	4.032	5.016	6.321	Rosa
 91 | 20.1	16.99	0.8746	6.581	3.785	1.955	6.449	Rosa
 92 | 18.76	16.2	0.8984	6.172	3.796	3.12	6.053	Rosa
 93 | 18.81	16.29	0.8906	6.272	3.693	3.237	6.053	Rosa
 94 | 18.59	16.05	0.9066	6.037	3.86	6.001	5.877	Rosa
 95 | 18.36	16.52	0.8452	6.666	3.485	4.933	6.448	Rosa
 96 | 16.87	15.65	0.8648	6.139	3.463	3.696	5.967	Rosa
 97 | 19.31	16.59	0.8815	6.341	3.81	3.477	6.238	Rosa
 98 | 18.98	16.57	0.8687	6.449	3.552	2.144	6.453	Rosa
 99 | 18.17	16.26	0.8637	6.271	3.512	2.853	6.273	Rosa
100 | 18.72	16.34	0.881	6.219	3.684	2.188	6.097	Rosa
101 | 16.41	15.25	0.8866	5.718	3.525	4.217	5.618	Rosa
102 | 17.99	15.86	0.8992	5.89	3.694	2.068	5.837	Rosa
103 | 19.46	16.5	0.8985	6.113	3.892	4.308	6.009	Rosa
104 | 19.18	16.63	0.8717	6.369	3.681	3.357	6.229	Rosa
105 | 18.95	16.42	0.8829	6.248	3.755	3.368	6.148	Rosa
106 | 18.83	16.29	0.8917	6.037	3.786	2.553	5.879	Rosa
107 | 18.85	16.17	0.9056	6.152	3.806	2.843	6.2	Rosa
108 | 17.63	15.86	0.88	6.033	3.573	3.747	5.929	Rosa
109 | 19.94	16.92	0.8752	6.675	3.763	3.252	6.55	Rosa
110 | 18.55	16.22	0.8865	6.153	3.674	1.738	5.894	Rosa
111 | 18.45	16.12	0.8921	6.107	3.769	2.235	5.794	Rosa
112 | 19.38	16.72	0.8716	6.303	3.791	3.678	5.965	Rosa
113 | 19.13	16.31	0.9035	6.183	3.902	2.109	5.924	Rosa
114 | 19.14	16.61	0.8722	6.259	3.737	6.682	6.053	Rosa
115 | 20.97	17.25	0.8859	6.563	3.991	4.677	6.316	Rosa
116 | 19.06	16.45	0.8854	6.416	3.719	2.248	6.163	Rosa
117 | 18.96	16.2	0.9077	6.051	3.897	4.334	5.75	Rosa
118 | 19.15	16.45	0.889	6.245	3.815	3.084	6.185	Rosa
119 | 18.89	16.23	0.9008	6.227	3.769	3.639	5.966	Rosa
120 | 20.03	16.9	0.8811	6.493	3.857	3.063	6.32	Rosa
121 | 20.24	16.91	0.8897	6.315	3.962	5.901	6.188	Rosa
122 | 18.14	16.12	0.8772	6.059	3.563	3.619	6.011	Rosa
123 | 16.17	15.38	0.8588	5.762	3.387	4.286	5.703	Rosa
124 | 18.43	15.97	0.9077	5.98	3.771	2.984	5.905	Rosa
125 | 15.99	14.89	0.9064	5.363	3.582	3.336	5.144	Rosa
126 | 18.75	16.18	0.8999	6.111	3.869	4.188	5.992	Rosa
127 | 18.65	16.41	0.8698	6.285	3.594	4.391	6.102	Rosa
128 | 17.98	15.85	0.8993	5.979	3.687	2.257	5.919	Rosa
129 | 20.16	17.03	0.8735	6.513	3.773	1.91	6.185	Rosa
130 | 17.55	15.66	0.8991	5.791	3.69	5.366	5.661	Rosa
131 | 18.3	15.89	0.9108	5.979	3.755	2.837	5.962	Rosa
132 | 18.94	16.32	0.8942	6.144	3.825	2.908	5.949	Rosa
133 | 15.38	14.9	0.8706	5.884	3.268	4.462	5.795	Rosa
134 | 16.16	15.33	0.8644	5.845	3.395	4.266	5.795	Rosa
135 | 15.56	14.89	0.8823	5.776	3.408	4.972	5.847	Rosa
136 | 15.38	14.66	0.899	5.477	3.465	3.6	5.439	Rosa
137 | 17.36	15.76	0.8785	6.145	3.574	3.526	5.971	Rosa
138 | 15.57	15.15	0.8527	5.92	3.231	2.64	5.879	Rosa
139 | 15.6	15.11	0.858	5.832	3.286	2.725	5.752	Rosa
140 | 16.23	15.18	0.885	5.872	3.472	3.769	5.922	Rosa
141 | 13.07	13.92	0.848	5.472	2.994	5.304	5.395	Canadian
142 | 13.32	13.94	0.8613	5.541	3.073	7.035	5.44	Canadian
143 | 13.34	13.95	0.862	5.389	3.074	5.995	5.307	Canadian
144 | 12.22	13.32	0.8652	5.224	2.967	5.469	5.221	Canadian
145 | 11.82	13.4	0.8274	5.314	2.777	4.471	5.178	Canadian
146 | 11.21	13.13	0.8167	5.279	2.687	6.169	5.275	Canadian
147 | 11.43	13.13	0.8335	5.176	2.719	2.221	5.132	Canadian
148 | 12.49	13.46	0.8658	5.267	2.967	4.421	5.002	Canadian
149 | 12.7	13.71	0.8491	5.386	2.911	3.26	5.316	Canadian
150 | 10.79	12.93	0.8107	5.317	2.648	5.462	5.194	Canadian
151 | 11.83	13.23	0.8496	5.263	2.84	5.195	5.307	Canadian
152 | 12.01	13.52	0.8249	5.405	2.776	6.992	5.27	Canadian
153 | 12.26	13.6	0.8333	5.408	2.833	4.756	5.36	Canadian
154 | 11.18	13.04	0.8266	5.22	2.693	3.332	5.001	Canadian
155 | 11.36	13.05	0.8382	5.175	2.755	4.048	5.263	Canadian
156 | 11.19	13.05	0.8253	5.25	2.675	5.813	5.219	Canadian
157 | 11.34	12.87	0.8596	5.053	2.849	3.347	5.003	Canadian
158 | 12.13	13.73	0.8081	5.394	2.745	4.825	5.22	Canadian
159 | 11.75	13.52	0.8082	5.444	2.678	4.378	5.31	Canadian
160 | 11.49	13.22	0.8263	5.304	2.695	5.388	5.31	Canadian
161 | 12.54	13.67	0.8425	5.451	2.879	3.082	5.491	Canadian
162 | 12.02	13.33	0.8503	5.35	2.81	4.271	5.308	Canadian
163 | 12.05	13.41	0.8416	5.267	2.847	4.988	5.046	Canadian
164 | 12.55	13.57	0.8558	5.333	2.968	4.419	5.176	Canadian
165 | 11.14	12.79	0.8558	5.011	2.794	6.388	5.049	Canadian
166 | 12.1	13.15	0.8793	5.105	2.941	2.201	5.056	Canadian
167 | 12.44	13.59	0.8462	5.319	2.897	4.924	5.27	Canadian
168 | 12.15	13.45	0.8443	5.417	2.837	3.638	5.338	Canadian
169 | 11.35	13.12	0.8291	5.176	2.668	4.337	5.132	Canadian
170 | 11.24	13.0	0.8359	5.09	2.715	3.521	5.088	Canadian
171 | 11.02	13.0	0.8189	5.325	2.701	6.735	5.163	Canadian
172 | 11.55	13.1	0.8455	5.167	2.845	6.715	4.956	Canadian
173 | 11.27	12.97	0.8419	5.088	2.763	4.309	5.0	Canadian
174 | 11.4	13.08	0.8375	5.136	2.763	5.588	5.089	Canadian
175 | 10.83	12.96	0.8099	5.278	2.641	5.182	5.185	Canadian
176 | 10.8	12.57	0.859	4.981	2.821	4.773	5.063	Canadian
177 | 11.26	13.01	0.8355	5.186	2.71	5.335	5.092	Canadian
178 | 10.74	12.73	0.8329	5.145	2.642	4.702	4.963	Canadian
179 | 11.48	13.05	0.8473	5.18	2.758	5.876	5.002	Canadian
180 | 12.21	13.47	0.8453	5.357	2.893	1.661	5.178	Canadian
181 | 11.41	12.95	0.856	5.09	2.775	4.957	4.825	Canadian
182 | 12.46	13.41	0.8706	5.236	3.017	4.987	5.147	Canadian
183 | 12.19	13.36	0.8579	5.24	2.909	4.857	5.158	Canadian
184 | 11.65	13.07	0.8575	5.108	2.85	5.209	5.135	Canadian
185 | 12.89	13.77	0.8541	5.495	3.026	6.185	5.316	Canadian
186 | 11.56	13.31	0.8198	5.363	2.683	4.062	5.182	Canadian
187 | 11.81	13.45	0.8198	5.413	2.716	4.898	5.352	Canadian
188 | 10.91	12.8	0.8372	5.088	2.675	4.179	4.956	Canadian
189 | 11.23	12.82	0.8594	5.089	2.821	7.524	4.957	Canadian
190 | 10.59	12.41	0.8648	4.899	2.787	4.975	4.794	Canadian
191 | 10.93	12.8	0.839	5.046	2.717	5.398	5.045	Canadian
192 | 11.27	12.86	0.8563	5.091	2.804	3.985	5.001	Canadian
193 | 11.87	13.02	0.8795	5.132	2.953	3.597	5.132	Canadian
194 | 10.82	12.83	0.8256	5.18	2.63	4.853	5.089	Canadian
195 | 12.11	13.27	0.8639	5.236	2.975	4.132	5.012	Canadian
196 | 12.8	13.47	0.886	5.16	3.126	4.873	4.914	Canadian
197 | 12.79	13.53	0.8786	5.224	3.054	5.483	4.958	Canadian
198 | 13.37	13.78	0.8849	5.32	3.128	4.67	5.091	Canadian
199 | 12.62	13.67	0.8481	5.41	2.911	3.306	5.231	Canadian
200 | 12.76	13.38	0.8964	5.073	3.155	2.828	4.83	Canadian
201 | 12.38	13.44	0.8609	5.219	2.989	5.472	5.045	Canadian
202 | 12.67	13.32	0.8977	4.984	3.135	2.3	4.745	Canadian
203 | 11.18	12.72	0.868	5.009	2.81	4.051	4.828	Canadian
204 | 12.7	13.41	0.8874	5.183	3.091	8.456	5.0	Canadian
205 | 12.37	13.47	0.8567	5.204	2.96	3.919	5.001	Canadian
206 | 12.19	13.2	0.8783	5.137	2.981	3.631	4.87	Canadian
207 | 11.23	12.88	0.8511	5.14	2.795	4.325	5.003	Canadian
208 | 13.2	13.66	0.8883	5.236	3.232	8.315	5.056	Canadian
209 | 11.84	13.21	0.8521	5.175	2.836	3.598	5.044	Canadian
210 | 12.3	13.34	0.8684	5.243	2.974	5.637	5.063	Canadian
211 | 


--------------------------------------------------------------------------------
/Chapter02/load.py:
--------------------------------------------------------------------------------
 1 | # This code is supporting material for the book
 2 | # Building Machine Learning Systems with Python
 3 | # by Willi Richert and Luis Pedro Coelho
 4 | # published by PACKT Publishing
 5 | #
 6 | # It is made available under the MIT License
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | def load_dataset(dataset_name):
12 |     '''
13 |     data = load_dataset(dataset_name)
14 | 
15 |     Load a given dataset
16 | 
17 |     Returns
18 |     -------
19 |     data : dictionary
20 |     '''
21 |     features = []
22 |     target = []
23 |     target_names = set()
24 |     with open('./data/{0}.tsv'.format(dataset_name)) as ifile:
25 |         for line in ifile:
26 |             tokens = line.strip().split('\t')
27 |             features.append([float(tk) for tk in tokens[:-1]])
28 |             target.append(tokens[-1])
29 |             target_names.add(tokens[-1])
30 |     features = np.array(features)
31 | 
32 |     target_names = list(target_names)
33 |     target_names.sort()
34 |     target = np.array([target_names.index(t) for t in target])
35 |     return {
36 |             'features': features,
37 |             'target_names': target_names,
38 |             'target': target,
39 |             }
40 | 


--------------------------------------------------------------------------------
/Chapter02/tests/test_load.py:
--------------------------------------------------------------------------------
 1 | # This code is supporting material for the book
 2 | # Building Machine Learning Systems with Python
 3 | # by Willi Richert and Luis Pedro Coelho
 4 | # published by PACKT Publishing
 5 | #
 6 | # It is made available under the MIT License
 7 | 
 8 | from load import load_dataset
 9 | 
10 | 
11 | def test_iris():
12 |     features, labels = load_dataset('iris')
13 |     assert len(features[0]) == 4
14 |     assert len(features)
15 |     assert len(features) == len(labels)
16 | 
17 | 
18 | def test_seeds():
19 |     features, labels = load_dataset('seeds')
20 |     assert len(features[0]) == 7
21 |     assert len(features)
22 |     assert len(features) == len(labels)
23 | 


--------------------------------------------------------------------------------
/Chapter03/chapter_03.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Regression"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "# This code is supporting material for the book\n",
 17 |     "# Building Machine Learning Systems with Python\n",
 18 |     "# by Willi Richert, Luis Pedro Coelho and Matthieu Brucher\n",
 19 |     "# published by PACKT Publishing\n",
 20 |     "#\n",
 21 |     "# It is made available under the MIT License\n",
 22 |     "\n",
 23 |     "import numpy as np\n",
 24 |     "from matplotlib import pyplot as plt"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "Use the magic command `%matplotlib` to see the plots inline:"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "%matplotlib inline"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "## Boston dataset\n",
 48 |     "\n",
 49 |     "Load the data"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "from sklearn.datasets import load_boston\n",
 59 |     "boston = load_boston()"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "The first regression attempt:"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "from sklearn.linear_model import LinearRegression\n",
 76 |     "lr = LinearRegression(fit_intercept=True)\n",
 77 |     "\n",
 78 |     "# Index number five in the number of rooms\n",
 79 |     "x = boston.data[:, 5]\n",
 80 |     "y = boston.target\n",
 81 |     "\n",
 82 |     "# lr.fit takes a two-dimensional array as input. We use np.atleast_2d\n",
 83 |     "# to convert from one to two dimensional, then transpose to make sure that the\n",
 84 |     "# format matches:\n",
 85 |     "x = np.transpose(np.atleast_2d(x))\n",
 86 |     "lr.fit(x, y)\n",
 87 |     "\n",
 88 |     "fig,ax = plt.subplots()\n",
 89 |     "ax.set_xlabel(\"Average number of rooms (RM)\")\n",
 90 |     "ax.set_ylabel(\"House Price\")\n",
 91 |     "xmin = x.min()\n",
 92 |     "xmax = x.max()\n",
 93 |     "ax.plot([xmin, xmax],\n",
 94 |     "        [lr.predict(xmin), lr.predict(xmax)],\n",
 95 |     "        '-', lw=2, color=\"#f9a602\")\n",
 96 |     "ax.scatter(x, y, s=2)\n",
 97 |     "fig.savefig('Regression_Fig_01.png')"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "from sklearn.metrics import mean_squared_error\n",
107 |     "mse = mean_squared_error(y, lr.predict(x))\n",
108 |     "print(\"Mean squared error (on training data): {:.3}\".format(mse))"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {
115 |     "scrolled": true
116 |    },
117 |    "outputs": [],
118 |    "source": [
119 |     "rmse = np.sqrt(mse)\n",
120 |     "print('RMSE (on training data): {}'.format(rmse))"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "from sklearn.metrics import r2_score\n",
130 |     "r2 = r2_score(y, lr.predict(x))\n",
131 |     "print(\"R2 (on training data): {:.2}\".format(r2))"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "Repeat, but using all the input variables now"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "x = boston.data\n",
148 |     "\n",
149 |     "lr.fit(x,y)\n",
150 |     "\n",
151 |     "mse = mean_squared_error(y, lr.predict(x))\n",
152 |     "print(\"Mean squared error (on training data): {:.3}\".format(mse))\n",
153 |     "rmse = np.sqrt(mse)\n",
154 |     "print('RMSE (on training data): {}'.format(rmse))\n",
155 |     "r2 = r2_score(y, lr.predict(x))\n",
156 |     "print(\"R2 (on training data): {:.2}\".format(r2))"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "To see how well we do, we plot _prediction vs. gold reality_:"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "fig,ax = plt.subplots()\n",
173 |     "ax.set_xlabel('Predicted price')\n",
174 |     "ax.set_ylabel('Actual price')\n",
175 |     "ax.plot([y.min(), y.max()], [y.min(), y.max()], ':', lw=2, color=\"#f9a602\")\n",
176 |     "ax.scatter(lr.predict(x), y, s=2)\n",
177 |     "fig.savefig(\"Regression_FIG_02.png\")"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "Now, we will use **cross-validation** for evaluating the regression quality:"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {
191 |     "scrolled": true
192 |    },
193 |    "outputs": [],
194 |    "source": [
195 |     "from sklearn.model_selection import KFold, cross_val_predict\n",
196 |     "kf = KFold(n_splits=5)\n",
197 |     "p = cross_val_predict(lr, x, y, cv=kf)\n",
198 |     "rmse_cv = np.sqrt(mean_squared_error(p, y))\n",
199 |     "print('RMSE on 5-fold CV: {:.2}'.format(rmse_cv))"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "We now compare a few different regression models on _both training data and using cross-validation_:"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": [
215 |     "from sklearn.linear_model import LinearRegression, ElasticNet, Lasso, Ridge                              \n",
216 |     "\n",
217 |     "for name, met in [\n",
218 |     "        ('linear regression', LinearRegression()),\n",
219 |     "        ('elastic-net(.5)', ElasticNet(alpha=0.5)),\n",
220 |     "        ('lasso(.5)', Lasso(alpha=0.5)),\n",
221 |     "        ('ridge(.5)', Ridge(alpha=0.5)),\n",
222 |     "]:\n",
223 |     "    # Fit on the whole data:\n",
224 |     "    met.fit(x, y)\n",
225 |     "\n",
226 |     "    # Predict on the whole data:\n",
227 |     "    p = met.predict(x)\n",
228 |     "    r2_train = r2_score(y, p)\n",
229 |     "\n",
230 |     "    kf = KFold(n_splits=5)\n",
231 |     "    p = np.zeros_like(y)\n",
232 |     "    for train, test in kf.split(x):\n",
233 |     "        met.fit(x[train], y[train])\n",
234 |     "        p[test] = met.predict(x[test])\n",
235 |     "\n",
236 |     "        r2_cv = r2_score(y, p)\n",
237 |     "    print('Method: {}'.format(name))\n",
238 |     "    print('R2 on training: {:.2}'.format(r2_train))\n",
239 |     "    print('R2 on 5-fold CV: {:.2}'.format(r2_cv))\n",
240 |     "    print('\\n')"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "metadata": {},
247 |    "outputs": [],
248 |    "source": [
249 |     "las = Lasso(normalize=True)                            \n",
250 |     "alphas = np.logspace(-5, 2, 1000)                   \n",
251 |     "alphas, coefs, _= las.path(x, y, alphas=alphas)     \n",
252 |     "\n",
253 |     "fig,ax = plt.subplots()                             \n",
254 |     "ax.plot(alphas, coefs.T)                            \n",
255 |     "ax.set_xscale('log')                                \n",
256 |     "ax.set_xlim(alphas.max(), alphas.min())             \n",
257 |     "\n",
258 |     "\n",
259 |     "ax.set_xlabel('Lasso coefficient path as a function of alpha')                                           \n",
260 |     "ax.set_xlabel('Alpha')                              \n",
261 |     "ax.set_ylabel('Coefficient weight')                 \n",
262 |     "fig.savefig('REGRESSION_FIG_03.png')"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {},
268 |    "source": [
269 |     "## Linear regression with Tensorflow"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "markdown",
274 |    "metadata": {},
275 |    "source": [
276 |     "Let's try and do the same with Tensorflow."
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": null,
282 |    "metadata": {},
283 |    "outputs": [],
284 |    "source": [
285 |     "# Batch size, epochs\n",
286 |     "batch_size = 100\n",
287 |     "n_epochs = 50000\n",
288 |     "steps = 1000"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "code",
293 |    "execution_count": null,
294 |    "metadata": {},
295 |    "outputs": [],
296 |    "source": [
297 |     "# Creation of the scaffolding\n",
298 |     "\n",
299 |     "import tensorflow as tf\n",
300 |     "tf.reset_default_graph()\n",
301 |     "\n",
302 |     "x = boston.data[:,5][:,None]\n",
303 |     "y = np.reshape(boston.target, (-1, 1))\n",
304 |     "\n",
305 |     "nb_features = x.shape[1]\n",
306 |     "\n",
307 |     "X = tf.placeholder(shape=[None, nb_features], dtype=tf.float32, name=\"X\")\n",
308 |     "Y = tf.placeholder(shape=[None, 1], dtype=tf.float32, name=\"y\")\n",
309 |     "\n",
310 |     "A = tf.Variable(tf.random_normal(shape=[nb_features, 1]), name=\"A\")\n",
311 |     "b = tf.Variable(tf.random_normal(shape=[1,1]), name=\"b\")"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": null,
317 |    "metadata": {},
318 |    "outputs": [],
319 |    "source": [
320 |     "# Creation of the graph\n",
321 |     "model_output = tf.matmul(X, A) + b\n",
322 |     "\n",
323 |     "loss = tf.reduce_mean(tf.square(Y - model_output))\n",
324 |     "\n",
325 |     "# Uncomment to get Ridge or Lasso\n",
326 |     "\"\"\"\n",
327 |     "beta = 0.005\n",
328 |     "regularizer = tf.nn.l2_loss(A)\n",
329 |     "loss = loss + beta * regularizer\n",
330 |     "\"\"\"\n",
331 |     "\"\"\"\n",
332 |     "beta = 0.5\n",
333 |     "regularizer = tf.reduce_mean(tf.abs(A))\n",
334 |     "loss = loss + beta * regularizer\n",
335 |     "\"\"\"\n",
336 |     "\n",
337 |     "grad_speed = 1e-3\n",
338 |     "my_opt = tf.train.GradientDescentOptimizer(grad_speed)\n",
339 |     "train_step = my_opt.minimize(loss)"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": null,
345 |    "metadata": {},
346 |    "outputs": [],
347 |    "source": [
348 |     "# Run the optimization\n",
349 |     "loss_vec = []\n",
350 |     "with tf.Session() as sess:\n",
351 |     "    sess.run(tf.global_variables_initializer())\n",
352 |     "    for epoch in range(n_epochs):\n",
353 |     "        permut = np.random.permutation(len(x))\n",
354 |     "        for j in range(0, len(x), batch_size):\n",
355 |     "            batch = permut[j:j+batch_size]\n",
356 |     "            Xs = x[batch]\n",
357 |     "            Ys = y[batch]\n",
358 |     "\n",
359 |     "            sess.run(train_step, feed_dict={X: Xs, Y: Ys})\n",
360 |     "            temp_loss = sess.run(loss, feed_dict={X: Xs, Y: Ys})\n",
361 |     "        \n",
362 |     "        if epoch % steps == steps - 1:\n",
363 |     "            temp_loss = sess.run(loss, feed_dict={X: x, Y: y})\n",
364 |     "            loss_vec.append(temp_loss)\n",
365 |     "\n",
366 |     "            (A_, b_) = sess.run([A, b])\n",
367 |     "            print('Epoch #%i  A = %s b = %s' % (epoch, np.transpose(A_), b_))\n",
368 |     "            print('Loss = %.8f' % temp_loss)\n",
369 |     "            print(\"\")\n",
370 |     "\n",
371 |     "\n",
372 |     "    [slope, y_intercept] = sess.run([A, b])\n",
373 |     "    prediction = sess.run(model_output, feed_dict={X: x})\n",
374 |     "    mse = mean_squared_error(y, prediction)\n",
375 |     "    print(\"Mean squared error (on training data): {:.3}\".format(mse))\n",
376 |     "    rmse = np.sqrt(mse)\n",
377 |     "    print('RMSE (on training data): {}'.format(rmse))\n",
378 |     "    r2 = r2_score(y, prediction)\n",
379 |     "    print(\"R2 (on training data): {:.2}\".format(r2))\n",
380 |     "\n",
381 |     "best_fit = []\n",
382 |     "for i in x:\n",
383 |     "    best_fit.append(slope[0]*i+y_intercept[0])"
384 |    ]
385 |   },
386 |   {
387 |    "cell_type": "code",
388 |    "execution_count": null,
389 |    "metadata": {},
390 |    "outputs": [],
391 |    "source": [
392 |     "# Plot 1D best fit\n",
393 |     "\n",
394 |     "fig,ax = plt.subplots()\n",
395 |     "ax.set_xlabel(\"Average number of rooms (RM)\")\n",
396 |     "ax.set_ylabel(\"House Price\")\n",
397 |     "\n",
398 |     "ax.scatter(x, y, s=2, label='Data Points')\n",
399 |     "ax.plot(x, np.array(best_fit), '-', lw=2, color=\"#f9a602\", label='Best fit line')\n",
400 |     "ax.legend(loc='upper left')\n",
401 |     "\n",
402 |     "fig.savefig('REGRESSION_FIG_06.png')\n",
403 |     "\n",
404 |     "# Plot loss over time\n",
405 |     "plt.figure()\n",
406 |     "fig,ax = plt.subplots()\n",
407 |     "ax.set_title('Loss per Epoch')\n",
408 |     "ax.set_xlabel('Epoch')\n",
409 |     "ax.set_ylabel('Loss')\n",
410 |     "\n",
411 |     "ax.plot(loss_vec, 'k-')\n",
412 |     "\n",
413 |     "fig.savefig('REGRESSION_FIG_07.png')"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "markdown",
418 |    "metadata": {},
419 |    "source": [
420 |     "What happens if we move to use all the features?"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "code",
425 |    "execution_count": null,
426 |    "metadata": {},
427 |    "outputs": [],
428 |    "source": [
429 |     "# Creation of the scaffolding\n",
430 |     "\n",
431 |     "import tensorflow as tf\n",
432 |     "tf.reset_default_graph()\n",
433 |     "\n",
434 |     "x = boston.data\n",
435 |     "y = np.reshape(boston.target, (-1, 1))\n",
436 |     "\n",
437 |     "nb_features = x.shape[1]\n",
438 |     "\n",
439 |     "X = tf.placeholder(shape=[None, nb_features], dtype=tf.float32, name=\"X\")\n",
440 |     "Y = tf.placeholder(shape=[None, 1], dtype=tf.float32, name=\"y\")\n",
441 |     "\n",
442 |     "A = tf.Variable(tf.random_normal(shape=[nb_features, 1]), name=\"A\")\n",
443 |     "b = tf.Variable(tf.random_normal(shape=[1,1]), name=\"b\")"
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "code",
448 |    "execution_count": null,
449 |    "metadata": {},
450 |    "outputs": [],
451 |    "source": [
452 |     "# Creation of the graph\n",
453 |     "model_output = tf.matmul(X, A) + b\n",
454 |     "\n",
455 |     "loss = tf.reduce_mean(tf.square(Y - model_output))\n",
456 |     "\n",
457 |     "# Uncomment to get Ridge or Lasso\n",
458 |     "\"\"\"\n",
459 |     "beta = 0.005\n",
460 |     "regularizer = tf.nn.l2_loss(A)\n",
461 |     "loss = loss + beta * regularizer\n",
462 |     "\"\"\"\n",
463 |     "\"\"\"\n",
464 |     "beta = 0.5\n",
465 |     "regularizer = tf.reduce_mean(tf.abs(A))\n",
466 |     "loss = loss + beta * regularizer\n",
467 |     "\"\"\"\n",
468 |     "\n",
469 |     "grad_speed = 5e-7\n",
470 |     "my_opt = tf.train.GradientDescentOptimizer(grad_speed)\n",
471 |     "train_step = my_opt.minimize(loss)"
472 |    ]
473 |   },
474 |   {
475 |    "cell_type": "code",
476 |    "execution_count": null,
477 |    "metadata": {},
478 |    "outputs": [],
479 |    "source": [
480 |     "# Run the optimization\n",
481 |     "loss_vec = []\n",
482 |     "with tf.Session() as sess:\n",
483 |     "    sess.run(tf.global_variables_initializer())\n",
484 |     "    for epoch in range(n_epochs):\n",
485 |     "        permut = np.random.permutation(len(x))\n",
486 |     "        for j in range(0, len(x), batch_size):\n",
487 |     "            batch = permut[j:j+batch_size]\n",
488 |     "            Xs = x[batch]\n",
489 |     "            Ys = y[batch]\n",
490 |     "\n",
491 |     "            sess.run(train_step, feed_dict={X: Xs, Y: Ys})\n",
492 |     "            temp_loss = sess.run(loss, feed_dict={X: Xs, Y: Ys})\n",
493 |     "        \n",
494 |     "        if epoch % steps == steps - 1:\n",
495 |     "            temp_loss = sess.run(loss, feed_dict={X: x, Y: y})\n",
496 |     "            loss_vec.append(temp_loss)\n",
497 |     "\n",
498 |     "            (A_, b_) = sess.run([A, b])\n",
499 |     "            print('Epoch #%i  A = %s b = %s' % (epoch, np.transpose(A_), b_))\n",
500 |     "            print('Loss = %.8f' % temp_loss)\n",
501 |     "            print(\"\")\n",
502 |     "\n",
503 |     "\n",
504 |     "    [slope, y_intercept] = sess.run([A, b])\n",
505 |     "    prediction = sess.run(model_output, feed_dict={X: x})\n",
506 |     "    mse = mean_squared_error(y, prediction)\n",
507 |     "    print(\"Mean squared error (on training data): {:.3}\".format(mse))\n",
508 |     "    rmse = np.sqrt(mse)\n",
509 |     "    print('RMSE (on training data): {}'.format(rmse))\n",
510 |     "    r2 = r2_score(y, prediction)\n",
511 |     "    print(\"R2 (on training data): {:.2}\".format(r2))\n",
512 |     "\n",
513 |     "best_fit = []\n",
514 |     "for i in x:\n",
515 |     "    best_fit.append(slope[0]*i+y_intercept[0])"
516 |    ]
517 |   },
518 |   {
519 |    "cell_type": "code",
520 |    "execution_count": null,
521 |    "metadata": {},
522 |    "outputs": [],
523 |    "source": [
524 |     "# Plot loss over time\n",
525 |     "plt.figure()\n",
526 |     "fig,ax = plt.subplots()\n",
527 |     "ax.set_title('Loss per Epoch')\n",
528 |     "ax.set_xlabel('Epoch')\n",
529 |     "ax.set_ylabel('Loss')\n",
530 |     "\n",
531 |     "ax.plot(loss_vec, 'k-')\n",
532 |     "\n",
533 |     "fig.savefig('REGRESSION_FIG_08.png')"
534 |    ]
535 |   },
536 |   {
537 |    "cell_type": "markdown",
538 |    "metadata": {
539 |     "collapsed": true
540 |    },
541 |    "source": [
542 |     "## E2006 Dataset"
543 |    ]
544 |   },
545 |   {
546 |    "cell_type": "markdown",
547 |    "metadata": {},
548 |    "source": [
549 |     "Load data:\n"
550 |    ]
551 |   },
552 |   {
553 |    "cell_type": "code",
554 |    "execution_count": null,
555 |    "metadata": {},
556 |    "outputs": [],
557 |    "source": [
558 |     "from sklearn.datasets import load_svmlight_file\n",
559 |     "data, target = load_svmlight_file('data/E2006.train')"
560 |    ]
561 |   },
562 |   {
563 |    "cell_type": "markdown",
564 |    "metadata": {},
565 |    "source": [
566 |     "Compute error on training data to demonstrate that we can obtain near perfect scores:"
567 |    ]
568 |   },
569 |   {
570 |    "cell_type": "code",
571 |    "execution_count": null,
572 |    "metadata": {},
573 |    "outputs": [],
574 |    "source": [
575 |     "lr = LinearRegression()\n",
576 |     "lr.fit(data, target)\n",
577 |     "pred = lr.predict(data) \n",
578 |     "\n",
579 |     "print('RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
580 |     "print('R2 on training, {:.2}'.format(r2_score(target, pred)))"
581 |    ]
582 |   },
583 |   {
584 |    "cell_type": "markdown",
585 |    "metadata": {},
586 |    "source": [
587 |     "However, we do not do so well on cross-validation:"
588 |    ]
589 |   },
590 |   {
591 |    "cell_type": "code",
592 |    "execution_count": null,
593 |    "metadata": {},
594 |    "outputs": [],
595 |    "source": [
596 |     "kf = KFold(n_splits=5)\n",
597 |     "pred = cross_val_predict(lr, data, target, cv=kf)\n",
598 |     "\n",
599 |     "print('RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
600 |     "print('R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))"
601 |    ]
602 |   },
603 |   {
604 |    "cell_type": "markdown",
605 |    "metadata": {},
606 |    "source": [
607 |     "Now, we try _an Elastic net_:"
608 |    ]
609 |   },
610 |   {
611 |    "cell_type": "code",
612 |    "execution_count": null,
613 |    "metadata": {},
614 |    "outputs": [],
615 |    "source": [
616 |     "# Edit the lines below if you want to switch method:                                                     \n",
617 |     "met = ElasticNet(alpha=0.1)\n",
618 |     "met.fit(data, target)\n",
619 |     "pred = met.predict(data)\n",
620 |     "\n",
621 |     "print('[EN 0.1] RMSE on training: {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
622 |     "print('[EN 0.1] R2 on training: {:.2}'.format(r2_score(target, pred)))"
623 |    ]
624 |   },
625 |   {
626 |    "cell_type": "markdown",
627 |    "metadata": {},
628 |    "source": [
629 |     "Not a perfect prediction on the training data anymore, but let us check the value on cross-validation:"
630 |    ]
631 |   },
632 |   {
633 |    "cell_type": "code",
634 |    "execution_count": null,
635 |    "metadata": {},
636 |    "outputs": [],
637 |    "source": [
638 |     "pred = cross_val_predict(met, data, target, cv=kf)\n",
639 |     "\n",
640 |     "print('[EN 0.1] RMSE on testing (5 fold): {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
641 |     "print('[EN 0.1] R2 on testing (5 fold): {:.2}'.format(r2_score(target, pred)))"
642 |    ]
643 |   },
644 |   {
645 |    "cell_type": "markdown",
646 |    "metadata": {},
647 |    "source": [
648 |     "We now use `ElasticNetCV` to set parameters automatically:"
649 |    ]
650 |   },
651 |   {
652 |    "cell_type": "code",
653 |    "execution_count": null,
654 |    "metadata": {},
655 |    "outputs": [],
656 |    "source": [
657 |     "from sklearn.linear_model import ElasticNetCV\n",
658 |     "# Construct an ElasticNetCV object (use all available CPUs)\n",
659 |     "met = ElasticNetCV(n_jobs=-1)\n",
660 |     "\n",
661 |     "met.fit(data, target)\n",
662 |     "pred = met.predict(data)\n",
663 |     "print('[EN CV] RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
664 |     "print('[EN CV] R2 on training, {:.2}'.format(r2_score(target, pred)))\n",
665 |     "\n",
666 |     "pred = cross_val_predict(met, data, target, cv=kf)\n",
667 |     "print('[EN CV] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
668 |     "print('[EN CV] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))"
669 |    ]
670 |   },
671 |   {
672 |    "cell_type": "markdown",
673 |    "metadata": {},
674 |    "source": [
675 |     "This is a a pretty good general-purpose regression object:"
676 |    ]
677 |   },
678 |   {
679 |    "cell_type": "code",
680 |    "execution_count": null,
681 |    "metadata": {
682 |     "scrolled": true
683 |    },
684 |    "outputs": [],
685 |    "source": [
686 |     "# Construct an ElasticNetCV object (use all available CPUs)\n",
687 |     "met = ElasticNetCV(n_jobs=-1, l1_ratio=[.01, .05, .25, .5, .75, .95, .99])\n",
688 |     "\n",
689 |     "pred = cross_val_predict(met, data, target, cv=kf)\n",
690 |     "\n",
691 |     "print('[EN CV l1_ratio] RMSE on testing(5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
692 |     "print('[EN CV l1_ratio] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))"
693 |    ]
694 |   },
695 |   {
696 |    "cell_type": "markdown",
697 |    "metadata": {},
698 |    "source": [
699 |     "Now the final result:"
700 |    ]
701 |   },
702 |   {
703 |    "cell_type": "code",
704 |    "execution_count": null,
705 |    "metadata": {},
706 |    "outputs": [],
707 |    "source": [
708 |     "fig, ax = plt.subplots()\n",
709 |     "ax.scatter(target, pred, c='k', s=1)\n",
710 |     "ax.plot([-5,-1], [-5,-1], 'r-', lw=2)\n",
711 |     "ax.set_xlabel('Actual value')\n",
712 |     "ax.set_ylabel('Predicted value')\n",
713 |     "fig.savefig('REGRESSION_FIG_05.png')"
714 |    ]
715 |   },
716 |   {
717 |    "cell_type": "code",
718 |    "execution_count": null,
719 |    "metadata": {},
720 |    "outputs": [],
721 |    "source": []
722 |   }
723 |  ],
724 |  "metadata": {
725 |   "anaconda-cloud": {},
726 |   "kernelspec": {
727 |    "display_name": "Python 3",
728 |    "language": "python",
729 |    "name": "python3"
730 |   },
731 |   "language_info": {
732 |    "codemirror_mode": {
733 |     "name": "ipython",
734 |     "version": 3
735 |    },
736 |    "file_extension": ".py",
737 |    "mimetype": "text/x-python",
738 |    "name": "python",
739 |    "nbconvert_exporter": "python",
740 |    "pygments_lexer": "ipython3",
741 |    "version": "3.6.5"
742 |   }
743 |  },
744 |  "nbformat": 4,
745 |  "nbformat_minor": 2
746 | }
747 | 


--------------------------------------------------------------------------------
/Chapter03/data/.gitignore:
--------------------------------------------------------------------------------
1 | E2006.train
2 | 


--------------------------------------------------------------------------------
/Chapter03/data/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | curl -O https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/E2006.train.bz2
3 | bunzip2 E2006.train.bz2
4 | 
5 | 


--------------------------------------------------------------------------------
/Chapter04/data/download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | curl -O https://ia800107.us.archive.org/27/items/stackexchange/stackoverflow.com-Posts.7z
4 | 
5 | p7zip -d stackoverflow.com-Posts.7z
6 | 


--------------------------------------------------------------------------------
/Chapter06/data/toy/01.txt:
--------------------------------------------------------------------------------
1 | This is a toy post about machine learning. Actually, it contains not much interesting stuff.


--------------------------------------------------------------------------------
/Chapter06/data/toy/02.txt:
--------------------------------------------------------------------------------
1 | Imaging databases provide storage capabilities.


--------------------------------------------------------------------------------
/Chapter06/data/toy/03.txt:
--------------------------------------------------------------------------------
1 | Most imaging databases save images permanently.
2 | 


--------------------------------------------------------------------------------
/Chapter06/data/toy/04.txt:
--------------------------------------------------------------------------------
1 | Imaging databases store data.


--------------------------------------------------------------------------------
/Chapter06/data/toy/05.txt:
--------------------------------------------------------------------------------
1 | Imaging databases store data. Imaging databases store data. Imaging databases store data.


--------------------------------------------------------------------------------
/Chapter07/README.rst:
--------------------------------------------------------------------------------
 1 | =========
 2 | Chapter 8
 3 | =========
 4 | 
 5 | Support code for *Chapter 8: Recommendations*.
 6 | 
 7 | The code refers to the second edition of the book and this code has been
 8 | significantly refactored when compared to the first one.
 9 | 
10 | Ratings Prediction
11 | ------------------
12 | 
13 | Note that since the partition of the data into training and testing is random,
14 | everytime you run the code, the results will be different.
15 | 
16 | 
17 | load_ml100k.py
18 |     Load data & partition into test/train
19 | norm.py
20 |     Normalize the data
21 | corrneighbours.py
22 |     Neighbour models based on ncrroaltoin
23 | regression.py
24 |     Regression models
25 | stacked.py
26 |     Stacked predictions
27 | averaged.py
28 |     Averaging of predictions (mentioned in book, but code is not shown there).
29 | 
30 | Association Rule Mining
31 | -----------------------
32 | 
33 | Check the folder ``apriori/``
34 | 
35 | apriori/histogram.py
36 |     Print a histogram of how many times each product was bought
37 | apriori/apriori.py
38 |     Implementation of Apriori algorithm and association rule building
39 | apriori/apriori_example.py
40 |     Example of Apriori algorithm in retail dataset
41 | 
42 | 


--------------------------------------------------------------------------------
/Chapter07/apriori/.gitignore:
--------------------------------------------------------------------------------
1 | retail.dat.gz
2 | 


--------------------------------------------------------------------------------
/Chapter07/apriori/apriori.py:
--------------------------------------------------------------------------------
  1 | # This code is supporting material for the book
  2 | # Building Machine Learning Systems with Python
  3 | # by Willi Richert and Luis Pedro Coelho
  4 | # published by PACKT Publishing
  5 | #
  6 | # It is made available under the MIT License
  7 | 
  8 | from collections import namedtuple
  9 | 
 10 | 
 11 | def apriori(dataset, minsupport, maxsize):
 12 |     '''
 13 |     freqsets, support = apriori(dataset, minsupport, maxsize)
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     dataset : sequence of sequences
 18 |         input dataset
 19 |     minsupport : int
 20 |         Minimal support for frequent items
 21 |     maxsize : int
 22 |         Maximal size of frequent items to return
 23 | 
 24 |     Returns
 25 |     -------
 26 |     freqsets : sequence of sequences
 27 |     support : dictionary
 28 |         This associates each itemset (represented as a frozenset) with a float
 29 |         (the support of that itemset)
 30 |     '''
 31 |     from collections import defaultdict
 32 | 
 33 |     baskets = defaultdict(list)
 34 |     pointers = defaultdict(list)
 35 | 
 36 |     for i, ds in enumerate(dataset):
 37 |         for ell in ds:
 38 |             pointers[ell].append(i)
 39 |             baskets[frozenset([ell])].append(i)
 40 | 
 41 |     # Convert pointer items to frozensets to speed up operations later
 42 |     new_pointers = dict()
 43 |     for k in pointers:
 44 |         if len(pointers[k]) >= minsupport:
 45 |             new_pointers[k] = frozenset(pointers[k])
 46 |     pointers = new_pointers
 47 |     for k in baskets:
 48 |         baskets[k] = frozenset(baskets[k])
 49 | 
 50 | 
 51 |     # Valid are all elements whose support is >= minsupport
 52 |     valid = set()
 53 |     for el, c in baskets.items():
 54 |         if len(c) >= minsupport:
 55 |             valid.update(el)
 56 | 
 57 |     # Itemsets at first iteration are simply all singleton with valid elements:
 58 |     itemsets = [frozenset([v]) for v in valid]
 59 |     freqsets = []
 60 |     for i in range(maxsize - 1):
 61 |         print("At iteration {}, number of frequent baskets: {}".format(
 62 |             i, len(itemsets)))
 63 |         newsets = []
 64 |         for it in itemsets:
 65 |             ccounts = baskets[it]
 66 | 
 67 |             for v, pv in pointers.items():
 68 |                 if v not in it:
 69 |                     csup = (ccounts & pv)
 70 |                     if len(csup) >= minsupport:
 71 |                         new = frozenset(it | frozenset([v]))
 72 |                         if new not in baskets:
 73 |                             newsets.append(new)
 74 |                             baskets[new] = csup
 75 |         freqsets.extend(itemsets)
 76 |         itemsets = newsets
 77 |         if not len(itemsets):
 78 |             break
 79 |     support = {}
 80 |     for k in baskets:
 81 |         support[k] = float(len(baskets[k]))
 82 |     return freqsets, support
 83 | 
 84 | 
 85 | # A namedtuple to collect all values that may be interesting
 86 | AssociationRule = namedtuple('AssociationRule', ['antecendent', 'consequent', 'base', 'py_x', 'lift'])
 87 | 
 88 | def association_rules(dataset, freqsets, support, minlift):
 89 |     '''
 90 |     for assoc_rule in association_rules(dataset, freqsets, support, minlift):
 91 |         ...
 92 | 
 93 |     This function takes the returns from ``apriori``.
 94 | 
 95 |     Parameters
 96 |     ----------
 97 |     dataset : sequence of sequences
 98 |         input dataset
 99 |     freqsets : sequence of sequences
100 |     support : dictionary
101 |     minlift : int
102 |         minimal lift of yielded rules
103 | 
104 |     Returns
105 |     -------
106 |     assoc_rule : sequence of AssociationRule objects
107 |     '''
108 |     nr_transactions = float(len(dataset))
109 |     freqsets = [f for f in freqsets if len(f) > 1]
110 |     for fset in freqsets:
111 |         for f in fset:
112 |             consequent = frozenset([f])
113 |             antecendent = fset - consequent
114 |             py_x = support[fset] / support[antecendent]
115 |             base = support[consequent] / nr_transactions
116 |             lift = py_x / base
117 |             if lift > minlift:
118 |                 yield AssociationRule(antecendent, consequent, base, py_x, lift)
119 | 
120 | 


--------------------------------------------------------------------------------
/Chapter07/apriori/apriori_example.py:
--------------------------------------------------------------------------------
 1 | # This code is supporting material for the book
 2 | # Building Machine Learning Systems with Python
 3 | # by Willi Richert and Luis Pedro Coelho
 4 | # published by PACKT Publishing
 5 | #
 6 | # It is made available under the MIT License
 7 | 
 8 | from apriori import apriori, association_rules
 9 | from gzip import GzipFile
10 | 
11 | # Load dataset
12 | dataset = [[int(tok) for tok in line.strip().split()]
13 |            for line in GzipFile('retail.dat.gz')]
14 | 
15 | freqsets, support = apriori(dataset, 80, maxsize=16)
16 | rules = list(association_rules(dataset, freqsets, support, minlift=30.0))
17 | 
18 | rules.sort(key=(lambda ar: -ar.lift))
19 | for ar in rules:
20 |     print('{} -> {} (lift = {:.4})'
21 |           .format(set(ar.antecendent),
22 |                     set(ar.consequent),
23 |                     ar.lift))
24 | 


--------------------------------------------------------------------------------
/Chapter07/apriori/apriori_naive.py:
--------------------------------------------------------------------------------
 1 | # This code is supporting material for the book
 2 | # Building Machine Learning Systems with Python
 3 | # by Willi Richert and Luis Pedro Coelho
 4 | # published by PACKT Publishing
 5 | #
 6 | # It is made available under the MIT License
 7 | 
 8 | from collections import defaultdict
 9 | from itertools import chain
10 | from gzip import GzipFile
11 | minsupport = 80
12 | 
13 | dataset = [[int(tok) for tok in line.strip().split()]
14 |            for line in GzipFile('retail.dat.gz')]
15 | 
16 | counts = defaultdict(int)
17 | for elem in chain(*dataset):
18 |     counts[elem] += 1
19 | 
20 | # Only elements that have at least minsupport should be considered.
21 | valid = set(el for el, c in counts.items() if (c >= minsupport))
22 | 
23 | # Filter the dataset to contain only valid elements
24 | # (This step is not strictly necessary, but will make the rest of the code
25 | # faster as the itemsets will be smaller):
26 | dataset = [[el for el in ds if (el in valid)] for ds in dataset]
27 | 
28 | # Convert to frozenset for fast processing
29 | dataset = [frozenset(ds) for ds in dataset]
30 | 
31 | itemsets = [frozenset([v]) for v in valid]
32 | freqsets = itemsets[:]
33 | for i in range(16):
34 |     print("At iteration {}, number of frequent baskets: {}".format(
35 |         i, len(itemsets)))
36 |     nextsets = []
37 | 
38 |     tested = set()
39 |     for it in itemsets:
40 |         for v in valid:
41 |             if v not in it:
42 |                 # Create a new candidate set by adding v to it
43 |                 c = (it | frozenset([v]))
44 | 
45 |                 # Check if we have tested it already:
46 |                 if c in tested:
47 |                     continue
48 |                 tested.add(c)
49 | 
50 |                 # Count support by looping over dataset
51 |                 # This step is slow.
52 |                 # Check `apriori.py` for a better implementation.
53 |                 support_c = sum(1 for d in dataset if d.issuperset(c))
54 |                 if support_c > minsupport:
55 |                     nextsets.append(c)
56 |     freqsets.extend(nextsets)
57 |     itemsets = nextsets
58 |     if not len(itemsets):
59 |         break
60 | print("Finished!")
61 | 
62 | 
63 | def rules_from_itemset(itemset, dataset, minlift=1.):
64 |     nr_transactions = float(len(dataset))
65 |     for item in itemset:
66 |         consequent = frozenset([item])
67 |         antecedent = itemset-consequent
68 |         base = 0.0
69 |         # acount: antecedent count
70 |         acount = 0.0
71 | 
72 |         # ccount : consequent count
73 |         ccount = 0.0
74 |         for d in dataset:
75 |           if item in d: base += 1
76 |           if d.issuperset(itemset): ccount += 1
77 |           if d.issuperset(antecedent): acount += 1
78 |         base /= nr_transactions
79 |         p_y_given_x = ccount/acount
80 |         lift = p_y_given_x / base
81 |         if lift > minlift:
82 |             print('Rule {0} ->  {1} has lift {2}'
83 |                   .format(antecedent, consequent,lift))
84 | 
85 | for itemset in freqsets:
86 |     if len(itemset) > 1:
87 |         rules_from_itemset(itemset, dataset, minlift=4.)
88 | 


--------------------------------------------------------------------------------
/Chapter07/apriori/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | wget http://fimi.ua.ac.be/data/retail.dat.gz
3 | 


--------------------------------------------------------------------------------
/Chapter07/data/.gitignore:
--------------------------------------------------------------------------------
1 | retail.dat.gz
2 | ml-100k.zip
3 | /ml-100k/
4 | 


--------------------------------------------------------------------------------
/Chapter07/data/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | curl -L -O http://files.grouplens.org/papers/ml-100k.zip
3 | unzip ml-100k.zip
4 | curl -L -O http://fimi.ua.ac.be/data/retail.dat.gz
5 | 


--------------------------------------------------------------------------------
/Chapter07/load_ml100k.py:
--------------------------------------------------------------------------------
 1 | # This code is supporting material for the book
 2 | # Building Machine Learning Systems with Python
 3 | # by Willi Richert and Luis Pedro Coelho
 4 | # published by PACKT Publishing
 5 | #
 6 | # It is made available under the MIT License
 7 | 
 8 | def load():
 9 |     '''Load ML-100k data
10 | 
11 |     Returns the review matrix as a numpy array'''
12 |     import numpy as np
13 |     from scipy import sparse
14 |     from os import path
15 | 
16 |     if not path.exists('data/ml-100k/u.data'):
17 |         raise IOError("Data has not been downloaded.\nTry the following:\n\n\tcd data\n\t./download.sh")
18 | 
19 |     # The input is in the form of a CSC sparse matrix, so it's a natural fit to
20 |     # load the data, but we then convert to a more traditional array before
21 |     # returning
22 |     data = np.loadtxt('data/ml-100k/u.data')
23 |     ij = data[:, :2]
24 |     ij -= 1  # original data is in 1-based system
25 |     values = data[:, 2]
26 |     reviews = sparse.csc_matrix((values, ij.T)).astype(float)
27 |     return reviews.toarray()
28 | 
29 | def get_train_test(reviews=None, random_state=None):
30 |     '''Split data into training & testing
31 | 
32 |     Parameters
33 |     ----------
34 |     reviews : ndarray, optional
35 |         Input data
36 | 
37 |     Returns
38 |     -------
39 |     train : ndarray
40 |         training data
41 |     test : ndarray
42 |         testing data
43 |     '''
44 |     import numpy as np
45 |     import random
46 |     r = random.Random(random_state)
47 | 
48 |     if reviews is None:
49 |         reviews = load()
50 |     U,M = np.where(reviews)
51 |     test_idxs = np.array(r.sample(range(len(U)), len(U)//10))
52 |     train = reviews.copy()
53 |     train[U[test_idxs], M[test_idxs]] = 0
54 | 
55 |     test = np.zeros_like(reviews)
56 |     test[U[test_idxs], M[test_idxs]] = reviews[U[test_idxs], M[test_idxs]]
57 | 
58 |     return train, test
59 | 
60 | 


--------------------------------------------------------------------------------
/Chapter07/stacked.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import load_ml100k
 3 | import regression
 4 | import corrneighbours
 5 | from sklearn import linear_model, metrics
 6 | import norm
 7 | 
 8 | def predict(train):
 9 |     tr_train,tr_test = load_ml100k.get_train_test(train, random_state=34)
10 |     tr_predicted0 = regression.predict(tr_train)
11 |     tr_predicted1 = regression.predict(tr_train.T).T
12 |     tr_predicted2 = corrneighbours.predict(tr_train)
13 |     tr_predicted3 = corrneighbours.predict(tr_train.T).T
14 |     tr_predicted4 = norm.predict(tr_train)
15 |     tr_predicted5 = norm.predict(tr_train.T).T
16 |     stack_tr = np.array([
17 |         tr_predicted0[tr_test > 0],
18 |         tr_predicted1[tr_test > 0],
19 |         tr_predicted2[tr_test > 0],
20 |         tr_predicted3[tr_test > 0],
21 |         tr_predicted4[tr_test > 0],
22 |         tr_predicted5[tr_test > 0],
23 |         ]).T
24 | 
25 |     lr = linear_model.LinearRegression()
26 |     lr.fit(stack_tr, tr_test[tr_test > 0])
27 | 
28 |     stack_te = np.array([
29 |         tr_predicted0.ravel(),
30 |         tr_predicted1.ravel(),
31 |         tr_predicted2.ravel(),
32 |         tr_predicted3.ravel(),
33 |         tr_predicted4.ravel(),
34 |         tr_predicted5.ravel(),
35 |         ]).T
36 | 
37 |     return lr.predict(stack_te).reshape(train.shape)
38 | 
39 | 
40 | def main():
41 |     train,test = load_ml100k.get_train_test(random_state=12)
42 |     predicted = predict(train)
43 |     r2 = metrics.r2_score(test[test > 0], predicted[test > 0])
44 |     print('R2 stacked: {:.2%}'.format(r2))
45 | 
46 | if __name__ == '__main__':
47 |     main()
48 | 


--------------------------------------------------------------------------------
/Chapter09/data/not_authorized.tsv:
--------------------------------------------------------------------------------
  1 | 126213333123743744
  2 | 126079414986485761
  3 | 126076743613284354
  4 | 126213333123743744
  5 | 126079414986485761
  6 | 126076743613284354
  7 | 126049183865114624
  8 | 125633065757310976
  9 | 126213333123743744
 10 | 126079414986485761
 11 | 126076743613284354
 12 | 126049183865114624
 13 | 125633065757310976
 14 | 126213333123743744
 15 | 126079414986485761
 16 | 126076743613284354
 17 | 126049183865114624
 18 | 125633065757310976
 19 | 126213333123743744
 20 | 126079414986485761
 21 | 126076743613284354
 22 | 126049183865114624
 23 | 125633065757310976
 24 | 126213333123743744
 25 | 126079414986485761
 26 | 126076743613284354
 27 | 126049183865114624
 28 | 125633065757310976
 29 | 126213333123743744
 30 | 126079414986485761
 31 | 126076743613284354
 32 | 126049183865114624
 33 | 125633065757310976
 34 | 125264731035537409
 35 | 126153311521996800
 36 | 126121175926571009
 37 | 125988395787882497
 38 | 125954651152592896
 39 | 125799384976863232
 40 | 125681375058735104
 41 | 125675806977556480
 42 | 125673358418391041
 43 | 125659125886623744
 44 | 126213333123743744
 45 | 126079414986485761
 46 | 126076743613284354
 47 | 126049183865114624
 48 | 125633065757310976
 49 | 125264731035537409
 50 | 126153311521996800
 51 | 126121175926571009
 52 | 125988395787882497
 53 | 125954651152592896
 54 | 125799384976863232
 55 | 125681375058735104
 56 | 125675806977556480
 57 | 125673358418391041
 58 | 125659125886623744
 59 | 125561930416013312
 60 | 125475953509015552
 61 | 125371779039502336
 62 | 125368089159286784
 63 | 125334519254482944
 64 | 125309427422203904
 65 | 126213333123743744
 66 | 126079414986485761
 67 | 126076743613284354
 68 | 126049183865114624
 69 | 125633065757310976
 70 | 125264731035537409
 71 | 126153311521996800
 72 | 126121175926571009
 73 | 125988395787882497
 74 | 125954651152592896
 75 | 125799384976863232
 76 | 125681375058735104
 77 | 125675806977556480
 78 | 125673358418391041
 79 | 125659125886623744
 80 | 125561930416013312
 81 | 125475953509015552
 82 | 125371779039502336
 83 | 125368089159286784
 84 | 125334519254482944
 85 | 125309427422203904
 86 | 125204228967903232
 87 | 126213333123743744
 88 | 126079414986485761
 89 | 126076743613284354
 90 | 126049183865114624
 91 | 125633065757310976
 92 | 125264731035537409
 93 | 126153311521996800
 94 | 126121175926571009
 95 | 125988395787882497
 96 | 125954651152592896
 97 | 125799384976863232
 98 | 125681375058735104
 99 | 125675806977556480
100 | 125673358418391041
101 | 125659125886623744
102 | 125561930416013312
103 | 125475953509015552
104 | 125371779039502336
105 | 125368089159286784
106 | 125334519254482944
107 | 125309427422203904
108 | 125204228967903232
109 | 126394795802370049
110 | 126386085164101634
111 | 126382776072146944
112 | 126380323733909504
113 | 126317201962700800
114 | 126229089651654656
115 | 126186795808456704
116 | 126110770864979968
117 | 126039090578735104
118 | 126029114850295809
119 | 126213333123743744
120 | 126079414986485761
121 | 126076743613284354
122 | 126049183865114624
123 | 125633065757310976
124 | 125264731035537409
125 | 126153311521996800
126 | 126121175926571009
127 | 125988395787882497
128 | 125954651152592896
129 | 125799384976863232
130 | 125681375058735104
131 | 125675806977556480
132 | 125673358418391041
133 | 125659125886623744
134 | 125561930416013312
135 | 125475953509015552
136 | 125371779039502336
137 | 125368089159286784
138 | 125334519254482944
139 | 125309427422203904
140 | 125204228967903232
141 | 126394795802370049
142 | 126386085164101634
143 | 126382776072146944
144 | 126380323733909504
145 | 126317201962700800
146 | 126229089651654656
147 | 126186795808456704
148 | 126110770864979968
149 | 126039090578735104
150 | 126029114850295809
151 | 125994997609803776
152 | 125992594395250688
153 | 125988651426512899
154 | 125981074114359297
155 | 125980615664336896
156 | 125958702455988225
157 | 125932876721168384
158 | 125918906215968771
159 | 125725274317914112
160 | 125708240225959936
161 | 125641351848136704
162 | 125630016485732352
163 | 125629788563050496
164 | 125538769632886784
165 | 125347618862792705
166 | 125305567148388352
167 | 125196751387889665
168 | 126213333123743744
169 | 126079414986485761
170 | 126076743613284354
171 | 126049183865114624
172 | 125633065757310976
173 | 125264731035537409
174 | 126153311521996800
175 | 126121175926571009
176 | 125988395787882497
177 | 125954651152592896
178 | 125930962545672192
179 | 125910538550124545
180 | 125797001337122817
181 | 125232405517844481
182 | 126534770095169536
183 | 126520518609350656
184 | 126516914678808578
185 | 126494834449063936
186 | 126494280318582784
187 | 126494100252925954
188 | 126492852615262208
189 | 126488447098695680
190 | 126488384410619906
191 | 126487332865056768
192 | 126532210210783232
193 | 126520550876127232
194 | 126505594290057216
195 | 126497514168922112
196 | 126494895501348864
197 | 126491509527805952
198 | 126528316978102272
199 | 126528078057963520
200 | 126523549493112832
201 | 126520920352358401
202 | 126510284536942592
203 | 126504105530236928
204 | 126499521344712704
205 | 126497100866387969
206 | 126496853742198784
207 | 126494691016441857
208 | 126494569184505856
209 | 126493312650719232
210 | 126487788433584129
211 | 126534127435530240
212 | 126529490582118400
213 | 126528938326495232
214 | 126526465280970752
215 | 126526113131413504
216 | 126519715085549568
217 | 126511257170886656
218 | 126504285436514304
219 | 126497446955188224
220 | 126495762568851456
221 | 126495208505479168
222 | 126494166145437696
223 | 126493860804308992
224 | 126492542610051072
225 | 126490549367738368
226 | 126484213737340928
227 | 126784810755690496
228 | 126700014385897472
229 | 126635317108289536
230 | 126795256225210368
231 | 126789710705213440
232 | 126728277896347649
233 | 126674460131606529
234 | 126671006302617600
235 | 126593636627513344
236 | 126519595682119681
237 | 126796467213058048
238 | 126734290850557952
239 | 126726063484178432
240 | 126679463839801344
241 | 126673062258147328
242 | 126637471676104704
243 | 126622818220785664
244 | 126622165595459584
245 | 126612152579657728
246 | 126506057613848576
247 | 126505970317787136
248 | 126495306681548800
249 | 126879662851887104
250 | 126877171926040576
251 | 126876654118240257
252 | 126876107881455616
253 | 126867350476697601
254 | 126863084433326080
255 | 126857095088840706
256 | 126883243726344193
257 | 126881376074076161
258 | 126858607789740032
259 | 126883335875203072
260 | 126883013236752384
261 | 126882832319651840
262 | 126878130353876992
263 | 126877869547855872
264 | 126875416760815616
265 | 126875059477426176
266 | 126870550546096128
267 | 126868828457144321
268 | 126868429796933632
269 | 126868271625539585
270 | 126867067776405506
271 | 126866413053939712
272 | 126865888724004864
273 | 126865837800951808
274 | 126865038085591041
275 | 126864886402777088
276 | 126864861576704000
277 | 126863772877996034
278 | 126863571912114177
279 | 126862618836221954
280 | 126860955605934081
281 | 126859710740701185
282 | 126853913591808002
283 | 126882080050262016
284 | 126881227729928193
285 | 126879417220874240
286 | 126875034135433216
287 | 126874145408561152
288 | 126873260385239040
289 | 126872615380987905
290 | 126872361462005760
291 | 126872241693667328
292 | 126872199620591617
293 | 126869762763522049
294 | 126868924590600192
295 | 126868586882007041
296 | 126868349396324352
297 | 126867170742374400
298 | 126866474806673408
299 | 126866312130609152
300 | 126865987365634048
301 | 126864954140803072
302 | 126864673416032256
303 | 126863938339094531
304 | 126862853822099456
305 | 126862343148802048
306 | 126860270181171201
307 | 126860114610241536
308 | 126859857604247552
309 | 126506057613848576
310 | 126505970317787136
311 | 126495306681548800
312 | 126879662851887104
313 | 126877171926040576
314 | 126876654118240257
315 | 126876107881455616
316 | 126867350476697601
317 | 126863084433326080
318 | 126857095088840706
319 | 126883243726344193
320 | 126881376074076161
321 | 126858607789740032
322 | 126883335875203072
323 | 126883013236752384
324 | 126882832319651840
325 | 126878130353876992
326 | 126877869547855872
327 | 126875416760815616
328 | 126875059477426176
329 | 126870550546096128
330 | 126868828457144321
331 | 126868429796933632
332 | 126868271625539585
333 | 126867067776405506
334 | 126866413053939712
335 | 126865888724004864
336 | 126865837800951808
337 | 126865038085591041
338 | 126864886402777088
339 | 126864861576704000
340 | 126863772877996034
341 | 126863571912114177
342 | 126862618836221954
343 | 126860955605934081
344 | 126859710740701185
345 | 126853913591808002
346 | 126882080050262016
347 | 126881227729928193
348 | 126879417220874240
349 | 126875034135433216
350 | 126874145408561152
351 | 126873260385239040
352 | 126872615380987905
353 | 126872361462005760
354 | 126872241693667328
355 | 126872199620591617
356 | 126869762763522049
357 | 126868924590600192
358 | 126868586882007041
359 | 126868349396324352
360 | 126867170742374400
361 | 126866474806673408
362 | 126866312130609152
363 | 126865987365634048
364 | 126864954140803072
365 | 126864673416032256
366 | 126863938339094531
367 | 126862853822099456
368 | 126862343148802048
369 | 126860270181171201
370 | 126860114610241536
371 | 126859857604247552
372 | 126404574230740992
373 | 126350302113824769
374 | 126148685737361408
375 | 126040352237961217
376 | 125995158679461888
377 | 125960325437722624
378 | 125643107260829697
379 | 125608381431025664
380 | 125523414298533888
381 | 125374540107886593
382 | 126405405667627008
383 | 126391082308206593
384 | 125945821240885248
385 | 125943204943114240
386 | 125476730067615744
387 | 125369698840887297
388 | 125202037293064192
389 | 126405821482532864
390 | 126405160934178816
391 | 126379730827083776
392 | 126370776013213697
393 | 126243528832593920
394 | 126225922159427584
395 | 126219340214304768
396 | 126113944891949056
397 | 126061182720278528
398 | 126042506717704192
399 | 126041773356232704
400 | 126016405085757440
401 | 126012833128390656
402 | 126009386022879232
403 | 125943078837161984
404 | 125887065861787648
405 | 125866627337162752
406 | 125866368758333440
407 | 125859792802693120
408 | 125250078108684288
409 | 126385587740610563
410 | 126360606042374144
411 | 126346705292640257
412 | 126260304819662849
413 | 126236984644612096
414 | 125973789526863872
415 | 125967413299773440
416 | 125957826500771840
417 | 125862601677737985
418 | 125699684693065728
419 | 125346522618535937
420 | 126525172969766912
421 | 126514474378203136
422 | 126511000907288576
423 | 126499965869625345
424 | 126497655785402368
425 | 126493192110612480
426 | 126489713782685696
427 | 126489263025033216
428 | 126496987192373248
429 | 126491870900666368
430 | 126491480087986176
431 | 126532019999096832
432 | 126531893649874945
433 | 126520914413236224
434 | 126520531934654465
435 | 126512842194161664
436 | 126509135842914304
437 | 126506232432439296
438 | 126492945057718272
439 | 126486051530354689
440 | 126497618258964480
441 | 126496237879959553
442 | 126492339559608320
443 | 126491356481859585
444 | 126487422249861120
445 | 126487385461633024
446 | 126779217911349248
447 | 126670032951443456
448 | 126583473929588736
449 | 126574432159408129
450 | 126803763603312640
451 | 126794825998663680
452 | 126732384602296320
453 | 126642779064504320
454 | 126611604925194240
455 | 126591976408748032
456 | 126798811262763009
457 | 126761498885361664
458 | 126759986780057600
459 | 126752126880858112
460 | 126749587133308928
461 | 126745438136176640
462 | 126701862383661056
463 | 126689077230698496
464 | 126680181359378432
465 | 126679552310251521
466 | 126611107266834433
467 | 126610365852303361
468 | 126601340242767872
469 | 126880912754475008
470 | 126877362632667136
471 | 126862735953768448
472 | 126858393909608448
473 | 126870358816067584
474 | 126869855621218304
475 | 126866003094290434
476 | 126864575508381696
477 | 126881380503273472
478 | 126881167541665792
479 | 126880571233280000
480 | 126880429256093696
481 | 126880253145657344
482 | 126879867731062784
483 | 126879122298372097
484 | 126877998115852288
485 | 126877965064740864
486 | 126876452762296321
487 | 126876009797656576
488 | 126875887093293056
489 | 126874662268452864
490 | 126872221292576768
491 | 126871857277308930
492 | 126871511326924800
493 | 126871066760065024
494 | 126870943489466368
495 | 126868570226425856
496 | 126867320005066752
497 | 126866353561927680
498 | 126865005009309696
499 | 126861011813810176
500 | 126860964992794624
501 | 126859978941276161
502 | 126882743819833345
503 | 126881169169063937
504 | 126880644105121792
505 | 126880556775522304
506 | 126879958529343488
507 | 126879219484606464
508 | 126878250541645824
509 | 126877540928331777
510 | 126876463965278208
511 | 126874165105008640
512 | 126873756437200896
513 | 126873447912587264
514 | 126873004494954496
515 | 126872365211725824
516 | 126871907302785024
517 | 126871831583002625
518 | 126867611546943490
519 | 126867000030007296
520 | 126866827715420160
521 | 126866759792852992
522 | 126865704380153856
523 | 126865416671862785
524 | 126862832489861121
525 | 126862595117424641
526 | 126862150265352193
527 | 126871857277308930
528 | 126871511326924800
529 | 126871066760065024
530 | 126870943489466368
531 | 126868570226425856
532 | 126867320005066752
533 | 126866353561927680
534 | 126865005009309696
535 | 126861011813810176
536 | 126860964992794624
537 | 126859978941276161
538 | 126882743819833345
539 | 126881169169063937
540 | 126880644105121792
541 | 126880556775522304
542 | 126879958529343488
543 | 126879219484606464
544 | 126878250541645824
545 | 126877540928331777
546 | 126876463965278208
547 | 126874165105008640
548 | 126873756437200896
549 | 126873447912587264
550 | 126873004494954496
551 | 126872365211725824
552 | 126877263311536128
553 | 126870792960086018
554 | 126877263311536128
555 | 126870792960086018
556 | 


--------------------------------------------------------------------------------
/Chapter09/twitterauth.py:
--------------------------------------------------------------------------------
 1 | # This code is supporting material for the book
 2 | # Building Machine Learning Systems with Python
 3 | # by Willi Richert and Luis Pedro Coelho
 4 | # published by PACKT Publishing
 5 | #
 6 | # It is made available under the MIT License
 7 | 
 8 | import sys
 9 | 
10 | CONSUMER_KEY = None
11 | CONSUMER_SECRET = None
12 | 
13 | ACCESS_TOKEN_KEY = None
14 | ACCESS_TOKEN_SECRET = None
15 | 
16 | if CONSUMER_KEY is None or CONSUMER_SECRET is None or ACCESS_TOKEN_KEY is None or ACCESS_TOKEN_SECRET is None:
17 |     print("""\
18 | When doing last code sanity checks for the book, Twitter
19 | was using the API 1.0, which did not require authentication.
20 | With its switch to version 1.1, this has now changed.
21 | 
22 | It seems that you don't have already created your personal Twitter
23 | access keys and tokens. Please do so at https://dev.twitter.com
24 | and paste the keys/secrets into twitterauth.py.
25 | 
26 | Sorry for the inconvenience,
27 | The authors.""")
28 | 
29 |     sys.exit(1)
30 | 


--------------------------------------------------------------------------------
/Chapter10/README.rst:
--------------------------------------------------------------------------------
 1 | =========
 2 | Chapter 4
 3 | =========
 4 | 
 5 | Support code for *Chapter 4: Topic Modeling*
 6 | 
 7 | 
 8 | AP Data
 9 | -------
10 | 
11 | To download the AP data, use the ``download_ap.sh`` script inside the ``data``
12 | directory::
13 | 
14 |     cd data
15 |     ./download_ap.sh
16 | 
17 | Word cloud creation
18 | -------------------
19 | 
20 | Word cloud creation requires that ``pytagcloud`` be installed (in turn, this
21 | requires ``pygame``). Since this is not an essential part of the chapter, the
22 | code will work even if you have not installed it (naturally, the cloud image
23 | will not be generated and a warning will be printed).
24 | 
25 | 
26 | Wikipedia processing
27 | --------------------
28 | 
29 | You will need **a lot of disk space**. The download of the Wikipedia text is
30 | 11GB and preprocessing it takes another 24GB to save it in the intermediate
31 | format that gensim uses for a total of 34GB!
32 | 
33 | Run the following two commands inside the ``data/`` directory::
34 | 
35 |     ./download_wp.sh
36 |     ./preprocess-wikidata.sh
37 | 
38 | As the filenames indicate, the first step will download the data and the second
39 | one will preprocess it. Preprocessing can take several hours, but it is
40 | feasible to run it on a modern laptop. Once the second step is finished, you
41 | may remove the input file if you want to save disk space
42 | (``data/enwiki-latest-pages-articles.xml.bz2``).
43 | 
44 | To generate the model, you can run the ``wikitopics_create.py`` script, while
45 | the ``wikitopics_plot.py`` script will plot the most heavily discussed topic as
46 | well as the least heavily discussed one. The code is split into steps as the
47 | first one can take a very long time. Then it saves the results so that you can
48 | later explore them at leisure.
49 | 
50 | You should not expect that your results will exactly match the results in the
51 | book, for two reasons:
52 | 
53 | 1. The LDA algorithm is a probabilistic algorithm and can give different
54 |    results every time it is run.
55 | 2. Wikipedia keeps changing. Thus, even your input data will be different.
56 | 
57 | Scripts
58 | -------
59 | 
60 | blei_lda.py
61 |     Computes LDA using the AP Corpus.
62 | wikitopics_create.py
63 |     Create the topic model for Wikipedia using LDA (must download wikipedia database first)
64 | wikitopics_create_hdp.py
65 |     Create the topic model for Wikipedia using HDP (must download wikipedia database first)
66 | 


--------------------------------------------------------------------------------
/Chapter10/Topic modeling.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Topic Modeling\n",
  8 |     "\n",
  9 |     "We start with importing `gensim`"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "**IMPORTANT**: You cannot run this example only from within the notebook. You must first download the data on the command line."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "metadata": {
 23 |     "collapsed": true
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import gensim\n",
 28 |     "from gensim import corpora, models, matutils"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "Now the usual imports:"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {
 42 |     "collapsed": true
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "import matplotlib.pyplot as plt\n",
 47 |     "import numpy as np\n",
 48 |     "from os import path\n",
 49 |     "\n",
 50 |     "\n",
 51 |     "# Check that data exists\n",
 52 |     "if not path.exists('./data/ap/ap.dat'):\n",
 53 |     "    print('Error: Expected data to be present at data/ap/')\n",
 54 |     "    print('Please cd into ./data & run ./download_ap.sh')\n",
 55 |     "\n"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "We will generate 100 topics as in the book, but you can changes this setting here:"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {
 69 |     "collapsed": true
 70 |    },
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "NUM_TOPICS = 100"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "Load the data"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {
 87 |     "collapsed": true
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "corpus = corpora.BleiCorpus('./data/ap/ap.dat', './data/ap/vocab.txt')"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "Build the LDA model"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {
105 |     "scrolled": true
106 |    },
107 |    "outputs": [],
108 |    "source": [
109 |     "model = models.ldamodel.LdaModel(\n",
110 |     "    corpus, num_topics=NUM_TOPICS, id2word=corpus.id2word, alpha=None)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "num_topics_used = [len(model[doc]) for doc in corpus]\n",
120 |     "fig,ax = plt.subplots()\n",
121 |     "ax.hist(num_topics_used, np.arange(42))\n",
122 |     "ax.set_ylabel('Nr of documents')\n",
123 |     "ax.set_xlabel('Nr of topics')\n",
124 |     "fig.tight_layout()\n",
125 |     "fig.savefig('Figure_04_01.png')\n",
126 |     "fig"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {
132 |     "collapsed": true
133 |    },
134 |    "source": [
135 |     "We can do the same after changing the $\\alpha$ value: "
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "ALPHA = 1.0\n",
145 |     "\n",
146 |     "model1 = models.ldamodel.LdaModel(\n",
147 |     "    corpus, num_topics=NUM_TOPICS, id2word=corpus.id2word, alpha=ALPHA)\n",
148 |     "num_topics_used1 = [len(model1[doc]) for doc in corpus]\n",
149 |     "\n",
150 |     "fig,ax = plt.subplots()\n",
151 |     "ax.hist([num_topics_used, num_topics_used1], np.arange(42))\n",
152 |     "ax.set_ylabel('Nr of documents')\n",
153 |     "ax.set_xlabel('Nr of topics')\n",
154 |     "\n",
155 |     "# The coordinates below were fit by trial and error to look good\n",
156 |     "ax.text(9, 223, r'default alpha')\n",
157 |     "ax.text(26, 156, 'alpha=1.0')\n",
158 |     "fig.tight_layout()\n",
159 |     "fig.savefig('Figure_04_02.png')\n",
160 |     "fig"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "### Exploring the topic model\n",
168 |     "\n",
169 |     "We can explore the mathematical structure of the topics:\n"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "doc = corpus.docbyoffset(0)\n",
179 |     "topics = model[doc]\n",
180 |     "print(topics)"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "markdown",
185 |    "metadata": {},
186 |    "source": [
187 |     "This is not very informative, however. Another way to explore is to identify the most discussed topic, i.e., the one with the highest total weight:"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {
194 |     "collapsed": true
195 |    },
196 |    "outputs": [],
197 |    "source": [
198 |     "topics = matutils.corpus2dense(model[corpus], num_terms=model.num_topics)\n",
199 |     "weight = topics.sum(1)\n",
200 |     "max_topic = weight.argmax()"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "markdown",
205 |    "metadata": {},
206 |    "source": [
207 |     "Get the top 64 words for this topic.\n",
208 |     "Without the argument, show_topic would return only 10 words"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {
215 |     "collapsed": true
216 |    },
217 |    "outputs": [],
218 |    "source": [
219 |     "words = model.show_topic(max_topic, 64)"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "markdown",
224 |    "metadata": {},
225 |    "source": [
226 |     "One way to visualize the results is to build a _word cloud_. For this we use the `wordcloud` module:"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": [
235 |     "from wordcloud import WordCloud\n",
236 |     "\n",
237 |     "wc = WordCloud(background_color='white', max_words=30, width=600, height=600)\n",
238 |     "wc = wc.generate_from_frequencies(dict(words))\n",
239 |     "\n",
240 |     "\n",
241 |     "fig,ax = plt.subplots()\n",
242 |     "\n",
243 |     "ax.imshow(wc, interpolation=\"bilinear\")\n",
244 |     "fig"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "markdown",
249 |    "metadata": {},
250 |    "source": [
251 |     "# NEWS DATA"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "markdown",
256 |    "metadata": {},
257 |    "source": [
258 |     "Now, repeat the same exercise using alpha=1.0.\n",
259 |     "\n",
260 |     "You can edit the constant below to play around with this parameter"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": null,
266 |    "metadata": {},
267 |    "outputs": [],
268 |    "source": [
269 |     "import nltk.stem\n",
270 |     "\n",
271 |     "nltk.download('stopwords')"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": null,
277 |    "metadata": {
278 |     "collapsed": true
279 |    },
280 |    "outputs": [],
281 |    "source": [
282 |     "english_stemmer = nltk.stem.SnowballStemmer('english')\n",
283 |     "stopwords = set(nltk.corpus.stopwords.words('english'))\n",
284 |     "stopwords.update(['from:', 'subject:', 'writes:', 'writes'])"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "markdown",
289 |    "metadata": {},
290 |    "source": [
291 |     "We need to add a little adaptor class:"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": null,
297 |    "metadata": {
298 |     "collapsed": true
299 |    },
300 |    "outputs": [],
301 |    "source": [
302 |     "class DirectText(corpora.textcorpus.TextCorpus):\n",
303 |     "\n",
304 |     "    def get_texts(self):\n",
305 |     "        return self.input\n",
306 |     "\n",
307 |     "    def __len__(self):\n",
308 |     "        return len(self.input)\n"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "markdown",
313 |    "metadata": {},
314 |    "source": [
315 |     "Load the data"
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "code",
320 |    "execution_count": null,
321 |    "metadata": {},
322 |    "outputs": [],
323 |    "source": [
324 |     "import sklearn.datasets\n",
325 |     "dataset = sklearn.datasets.load_mlcomp(\"20news-18828\", \"train\",\n",
326 |     "                                       mlcomp_root='./data')\n"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "markdown",
331 |    "metadata": {},
332 |    "source": [
333 |     "We preprocess the data to split the data into words and remove stopwords:"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": null,
339 |    "metadata": {
340 |     "collapsed": true
341 |    },
342 |    "outputs": [],
343 |    "source": [
344 |     "otexts = dataset.data\n",
345 |     "texts = dataset.data\n",
346 |     "\n",
347 |     "texts = [t.decode('utf-8', 'ignore') for t in texts]\n",
348 |     "texts = [t.split() for t in texts]\n",
349 |     "texts = [map(lambda w: w.lower(), t) for t in texts]\n",
350 |     "texts = [filter(lambda s: not len(set(\"+-.?!()>@012345689\") & set(s)), t)\n",
351 |     "         for t in texts]\n",
352 |     "texts = [filter(lambda s: (len(s) > 3) and (s not in stopwords), t)\n",
353 |     "         for t in texts]\n",
354 |     "texts = [[english_stemmer.stem(w) for w in t] for t in texts]"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "markdown",
359 |    "metadata": {},
360 |    "source": [
361 |     "We also remove words that are _too common_:"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": null,
367 |    "metadata": {
368 |     "collapsed": true
369 |    },
370 |    "outputs": [],
371 |    "source": [
372 |     "from collections import defaultdict\n",
373 |     "usage = defaultdict(int)\n",
374 |     "for t in texts:\n",
375 |     "    for w in set(t):\n",
376 |     "        usage[w] += 1\n",
377 |     "limit = len(texts) / 10\n",
378 |     "too_common = [w for w in usage if usage[w] > limit]\n",
379 |     "too_common = set(too_common)\n",
380 |     "texts = [[w for w in t if w not in too_common] for t in texts]"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "code",
385 |    "execution_count": null,
386 |    "metadata": {
387 |     "scrolled": true
388 |    },
389 |    "outputs": [],
390 |    "source": [
391 |     "corpus = DirectText(texts)\n",
392 |     "dictionary = corpus.dictionary\n",
393 |     "try:\n",
394 |     "    dictionary['computer']\n",
395 |     "except:\n",
396 |     "    pass\n",
397 |     "\n",
398 |     "model = models.ldamodel.LdaModel(\n",
399 |     "    corpus, num_topics=100, id2word=dictionary.id2token)\n",
400 |     "\n",
401 |     "thetas = np.zeros((len(texts), 100))\n",
402 |     "for i, c in enumerate(corpus):\n",
403 |     "    for ti, v in model[c]:\n",
404 |     "        thetas[i, ti] += v"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "markdown",
409 |    "metadata": {},
410 |    "source": [
411 |     "We compare all documents to each other **by the topics the contain**:"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "code",
416 |    "execution_count": null,
417 |    "metadata": {},
418 |    "outputs": [],
419 |    "source": [
420 |     "from scipy.spatial import distance\n",
421 |     "distances = distance.squareform(distance.pdist(thetas))\n",
422 |     "large = distances.max() + 1\n",
423 |     "for i in range(len(distances)):\n",
424 |     "    distances[i, i] = large\n",
425 |     "\n",
426 |     "print(otexts[1])\n",
427 |     "print()\n",
428 |     "print()\n",
429 |     "print()\n",
430 |     "print(otexts[distances[1].argmin()])"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "markdown",
435 |    "metadata": {
436 |     "collapsed": true
437 |    },
438 |    "source": [
439 |     "# Modeling Wikipedia"
440 |    ]
441 |   },
442 |   {
443 |    "cell_type": "markdown",
444 |    "metadata": {},
445 |    "source": [
446 |     "Load the data\n",
447 |     "\n",
448 |     "Note that you **must have run the `wikitopics_create.py` script**. This will take a few hours"
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "code",
453 |    "execution_count": null,
454 |    "metadata": {},
455 |    "outputs": [],
456 |    "source": [
457 |     "import gensim\n",
458 |     "if not path.exists('wiki_lda.pkl'):\n",
459 |     "    import sys\n",
460 |     "    sys.stderr.write('''\\\n",
461 |     "This script must be run after wikitopics_create.py!\n",
462 |     "\n",
463 |     "That script creates and saves the LDA model (this must onlly be done once).\n",
464 |     "This script is responsible for the analysis.''')\n",
465 |     "    \n",
466 |     "# Load the preprocessed Wikipedia corpus (id2word and mm)\n",
467 |     "id2word = gensim.corpora.Dictionary.load_from_text(\n",
468 |     "    'data/wiki_en_output_wordids.txt.bz2')\n",
469 |     "mm = gensim.corpora.MmCorpus('data/wiki_en_output_tfidf.mm')\n",
470 |     "\n",
471 |     "# Load the precomputed model\n",
472 |     "model = gensim.models.ldamodel.LdaModel.load('wiki_lda.pkl')\n",
473 |     "\n",
474 |     "topics = np.load('topics.npy', mmap_mode='r')"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "markdown",
479 |    "metadata": {},
480 |    "source": [
481 |     "Compute the number of topics mentioned in each document\n"
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "code",
486 |    "execution_count": null,
487 |    "metadata": {},
488 |    "outputs": [],
489 |    "source": [
490 |     "lens = (topics > 0).sum(axis=1)\n",
491 |     "print('Mean number of topics mentioned: {0:.3}'.format(np.mean(lens)))\n",
492 |     "print('Percentage of articles mentioning less than 10 topics: {0:.1%}'.format(np.mean(lens <= 10)))\n",
493 |     "\n",
494 |     "# Weights will be the total weight of each topic\n",
495 |     "weights = topics.sum(0)\n",
496 |     "\n"
497 |    ]
498 |   },
499 |   {
500 |    "cell_type": "markdown",
501 |    "metadata": {},
502 |    "source": [
503 |     "Retrieve the most heavily used topic and plot it as a word cloud:\n"
504 |    ]
505 |   },
506 |   {
507 |    "cell_type": "code",
508 |    "execution_count": null,
509 |    "metadata": {},
510 |    "outputs": [],
511 |    "source": [
512 |     "words = model.show_topic(weights.argmax(), 64)\n",
513 |     "\n",
514 |     "wc = WordCloud(background_color='white', max_words=30, width=600, height=600)\n",
515 |     "wc = wc.generate_from_frequencies(dict(words))\n",
516 |     "\n",
517 |     "fig,ax = plt.subplots()\n",
518 |     "\n",
519 |     "ax.imshow(wc, interpolation=\"bilinear\")\n",
520 |     "fig"
521 |    ]
522 |   },
523 |   {
524 |    "cell_type": "code",
525 |    "execution_count": null,
526 |    "metadata": {},
527 |    "outputs": [],
528 |    "source": [
529 |     "fraction_mention = np.mean(topics[:,weights.argmax()] > 0)\n",
530 |     "print(\"The most mentioned topics is mentioned in {:.1%} of documents.\".format(fraction_mention))\n",
531 |     "total_weight = np.mean(topics[:,weights.argmax()])\n",
532 |     "print(\"It represents {:.1%} of the total number of words.\".format(total_weight))\n"
533 |    ]
534 |   },
535 |   {
536 |    "cell_type": "markdown",
537 |    "metadata": {},
538 |    "source": [
539 |     "Retrieve the **least** heavily used topic and plot it as a word cloud:"
540 |    ]
541 |   },
542 |   {
543 |    "cell_type": "code",
544 |    "execution_count": null,
545 |    "metadata": {},
546 |    "outputs": [],
547 |    "source": [
548 |     "words = model.show_topic(weights.argmin(), 64)\n",
549 |     "\n",
550 |     "wc = WordCloud(background_color='white', max_words=30, width=600, height=600)\n",
551 |     "wc = wc.generate_from_frequencies(dict(words))\n",
552 |     "fig,ax = plt.subplots()\n",
553 |     "\n",
554 |     "ax.imshow(wc, interpolation=\"bilinear\")\n",
555 |     "fig"
556 |    ]
557 |   },
558 |   {
559 |    "cell_type": "markdown",
560 |    "metadata": {},
561 |    "source": [
562 |     "Again, we can measure how often this topic used:"
563 |    ]
564 |   },
565 |   {
566 |    "cell_type": "code",
567 |    "execution_count": null,
568 |    "metadata": {},
569 |    "outputs": [],
570 |    "source": [
571 |     "fraction_mention = np.mean(topics[:,weights.argmin()] > 0)\n",
572 |     "print(\"The least mentioned topics is mentioned in {:.1%} of documents.\".format(fraction_mention))\n",
573 |     "total_weight = np.mean(topics[:,weights.argmin()])\n",
574 |     "print(\"It represents {:.1%} of the total number of words.\".format(total_weight))"
575 |    ]
576 |   }
577 |  ],
578 |  "metadata": {
579 |   "kernelspec": {
580 |    "display_name": "Python 3",
581 |    "language": "python",
582 |    "name": "python3"
583 |   },
584 |   "language_info": {
585 |    "codemirror_mode": {
586 |     "name": "ipython",
587 |     "version": 3
588 |    },
589 |    "file_extension": ".py",
590 |    "mimetype": "text/x-python",
591 |    "name": "python",
592 |    "nbconvert_exporter": "python",
593 |    "pygments_lexer": "ipython3",
594 |    "version": "3.6.2"
595 |   }
596 |  },
597 |  "nbformat": 4,
598 |  "nbformat_minor": 2
599 | }
600 | 


--------------------------------------------------------------------------------
/Chapter10/data/.gitignore:
--------------------------------------------------------------------------------
 1 | ap.tgz
 2 | ap/
 3 | dataset-379-20news-18828_HJRZF.zip
 4 | 379/
 5 | enwiki-latest-pages-articles.xml.bz2
 6 | wiki_en_output_bow.mm
 7 | wiki_en_output_bow.mm.gz
 8 | wiki_en_output_bow.mm.index
 9 | wiki_en_output_tfidf.mm
10 | wiki_en_output_tfidf.mm.gz
11 | wiki_en_output_tfidf.mm.index
12 | wiki_en_output_wordids.txt.bz2
13 | 


--------------------------------------------------------------------------------
/Chapter10/data/download_ap.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | wget http://www.cs.columbia.edu/~blei/lda-c/ap.tgz
3 | tar xzf ap.tgz
4 | 


--------------------------------------------------------------------------------
/Chapter10/data/download_wp.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | wget http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
3 | 


--------------------------------------------------------------------------------
/Chapter10/data/preprocess-wikidata.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | python -m gensim.scripts.make_wiki enwiki-latest-pages-articles.xml.bz2 wiki_en_output
4 | 


--------------------------------------------------------------------------------
/Chapter10/wikitopics_create.py:
--------------------------------------------------------------------------------
 1 | # This code is supporting material for the book
 2 | # Building Machine Learning Systems with Python
 3 | # by Willi Richert and Luis Pedro Coelho
 4 | # published by PACKT Publishing
 5 | #
 6 | # It is made available under the MIT License
 7 | 
 8 | from __future__ import print_function
 9 | import logging
10 | import gensim
11 | import numpy as np
12 | 
13 | NR_OF_TOPICS = 100
14 | 
15 | # Set up logging in order to get progress information as the model is being built:
16 | logging.basicConfig(
17 |     format='%(asctime)s : %(levelname)s : %(message)s',
18 |     level=logging.INFO)
19 | 
20 | # Load the preprocessed corpus (id2word & mm):
21 | id2word = gensim.corpora.Dictionary.load_from_text(
22 |     'data/wiki_en_output_wordids.txt.bz2')
23 | mm = gensim.corpora.MmCorpus('data/wiki_en_output_tfidf.mm')
24 | 
25 | # Calling the constructor is enough to build the model
26 | # This call will take a few hours!
27 | model = gensim.models.ldamodel.LdaModel(
28 |     corpus=mm,
29 |     id2word=id2word,
30 |     num_topics=NR_OF_TOPICS,
31 |     update_every=1,
32 |     chunksize=10000,
33 |     passes=1)
34 | 
35 | # Save the model so we do not need to learn it again.
36 | model.save('wiki_lda.pkl')
37 | 
38 | # Compute the document/topic matrix
39 | topics = np.zeros((len(mm), model.num_topics))
40 | for di,doc in enumerate(mm):
41 |     doc_top = model[doc]
42 |     for ti,tv in doc_top:
43 |         topics[di,ti] += tv
44 | np.save('topics.npy', topics)
45 | 
46 | # Alternatively, we create a sparse matrix and save that. This alternative
47 | # saves disk space, at the cost of slightly more complex code:
48 | 
49 | ## from scipy import sparse, io
50 | ## sp = sparse.csr_matrix(topics)
51 | ## io.savemat('topics.mat', {'topics': sp})
52 | 


--------------------------------------------------------------------------------
/Chapter10/wikitopics_create_hdp.py:
--------------------------------------------------------------------------------
 1 | # This code is supporting material for the book
 2 | # Building Machine Learning Systems with Python
 3 | # by Willi Richert and Luis Pedro Coelho
 4 | # published by PACKT Publishing
 5 | #
 6 | # It is made available under the MIT License
 7 | 
 8 | from __future__ import print_function
 9 | import logging
10 | import gensim
11 | import numpy as np
12 | 
13 | # Set up logging in order to get progress information as the model is being built:
14 | logging.basicConfig(
15 |     format='%(asctime)s : %(levelname)s : %(message)s',
16 |     level=logging.INFO)
17 | 
18 | # Load the preprocessed corpus (id2word & mm):
19 | id2word = gensim.corpora.Dictionary.load_from_text(
20 |     'data/wiki_en_output_wordids.txt.bz2')
21 | mm = gensim.corpora.MmCorpus('data/wiki_en_output_tfidf.mm')
22 | 
23 | # Calling the constructor is enough to build the model
24 | # This call will take a few hours!
25 | model = gensim.models.hdpmodel.HdpModel(
26 |     corpus=mm,
27 |     id2word=id2word,
28 |     chunksize=10000)
29 | 
30 | # Save the model so we do not need to learn it again.
31 | model.save('wiki_hdp.pkl')
32 | 
33 | # Compute the document/topic matrix
34 | topics = np.zeros((len(mm), model.num_topics))
35 | for di,doc in enumerate(mm):
36 |     doc_top = model[doc]
37 |     for ti,tv in doc_top:
38 |         topics[di,ti] += tv
39 | np.save('topics_hdp.npy', topics)
40 | 


--------------------------------------------------------------------------------
/Chapter12/README.rst:
--------------------------------------------------------------------------------
 1 | ==========
 2 | Chapter 10
 3 | ==========
 4 | 
 5 | Support code for *Chapter 10: Pattern Recognition & Computer Vision*
 6 | 
 7 | Data
 8 | ----
 9 | 
10 | This chapter relies on a publicly available dataset (which can be downloaded
11 | using the ``download.sh`` script inside the ``data/`` directory) as well the
12 | dataset that is packaged with the repository at ``../SimpleImageDataset/``.
13 | 
14 | Running ``download.sh`` will retrieve the other dataset into a directory
15 | ``AnimTransDistr/``.
16 | 
17 | Scripts
18 | -------
19 | 
20 | chapter.py
21 |     Code as written in the book.
22 | thresholded_figure.py
23 |     Computes the thresholded figures, including after Gaussian blurring
24 | lena-ring.py
25 |     Lena image with center in focus and blurred edges
26 | figure10.py
27 |     Just paste two images next to each others
28 | features.py
29 |     Contains the color histogram function from the book as well as a simple
30 |     wrapper around ``mahotas.texture.haralick``
31 | simple_classification.py
32 |     Classify SimpleImageDataset with texture features + color histogram features
33 | large_classification.py
34 |     Classify ``AnimTransDistr`` with both texture and SURF features.
35 | neighbors.py
36 |     Computes image neighbors as well as the neighbor figure from the book.
37 | 
38 | 


--------------------------------------------------------------------------------
/Chapter12/ch12_3rd/chapter_12.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Computer Vision"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "This code is supporting material for the book `Building Machine Learning Systems with Python` by [Willi Richert](https://www.linkedin.com/in/willirichert/), [Luis Pedro Coelho](https://www.linkedin.com/in/luispedrocoelho/) and [Matthieu Brucher](https://www.linkedin.com/in/matthieubrucher/) published by PACKT Publishing. It is made available under the MIT License."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## Generative Adversarial Networks"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "Let's create a class for our GAN based on convolution networks."
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import tensorflow as tf\n",
 38 |     "\n",
 39 |     "def match(logits, labels):\n",
 40 |     "    logits = tf.clip_by_value(logits, 1e-7, 1. - 1e-7)\n",
 41 |     "    return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))\n",
 42 |     "\n",
 43 |     "def batchnormalize(X, eps=1e-8, g=None, b=None):\n",
 44 |     "    if X.get_shape().ndims == 4:\n",
 45 |     "        mean = tf.reduce_mean(X, [0,1,2])\n",
 46 |     "        std = tf.reduce_mean( tf.square(X-mean), [0,1,2] )\n",
 47 |     "        X = (X-mean) / tf.sqrt(std+eps)\n",
 48 |     "\n",
 49 |     "        if g is not None and b is not None:\n",
 50 |     "            g = tf.reshape(g, [1,1,1,-1])\n",
 51 |     "            b = tf.reshape(b, [1,1,1,-1])\n",
 52 |     "            X = X*g + b\n",
 53 |     "\n",
 54 |     "    elif X.get_shape().ndims == 2:\n",
 55 |     "        mean = tf.reduce_mean(X, 0)\n",
 56 |     "        std = tf.reduce_mean(tf.square(X-mean), 0)\n",
 57 |     "        X = (X-mean) / tf.sqrt(std+eps)\n",
 58 |     "\n",
 59 |     "        if g is not None and b is not None:\n",
 60 |     "            g = tf.reshape(g, [1,-1])\n",
 61 |     "            b = tf.reshape(b, [1,-1])\n",
 62 |     "            X = X*g + b\n",
 63 |     "\n",
 64 |     "    else:\n",
 65 |     "        raise NotImplementedError\n",
 66 |     "\n",
 67 |     "    return X\n",
 68 |     "\n",
 69 |     "class DCGAN():\n",
 70 |     "    def __init__(\n",
 71 |     "            self,\n",
 72 |     "            image_shape=[28,28,1],\n",
 73 |     "            dim_z=100,\n",
 74 |     "            dim_y=10,\n",
 75 |     "            dim_W1=1024,\n",
 76 |     "            dim_W2=128,\n",
 77 |     "            dim_W3=64,\n",
 78 |     "            dim_channel=1,\n",
 79 |     "            ):\n",
 80 |     "\n",
 81 |     "        self.image_shape = image_shape\n",
 82 |     "        self.dim_z = dim_z\n",
 83 |     "        self.dim_y = dim_y\n",
 84 |     "\n",
 85 |     "        self.dim_W1 = dim_W1\n",
 86 |     "        self.dim_W2 = dim_W2\n",
 87 |     "        self.dim_W3 = dim_W3\n",
 88 |     "        self.dim_channel = dim_channel\n",
 89 |     "\n",
 90 |     "    def build_model(self):\n",
 91 |     "\n",
 92 |     "        Z = tf.placeholder(tf.float32, [None, self.dim_z])\n",
 93 |     "        Y = tf.placeholder(tf.float32, [None, self.dim_y])\n",
 94 |     "\n",
 95 |     "        image_real = tf.placeholder(tf.float32, [None]+self.image_shape)\n",
 96 |     "        image_gen = self.generate(Z, Y)\n",
 97 |     "\n",
 98 |     "        raw_real = self.discriminate(image_real, Y, False)\n",
 99 |     "        raw_gen = self.discriminate(image_gen, Y, True)\n",
100 |     "\n",
101 |     "        discrim_cost_real = match(raw_real, tf.ones_like(raw_real))\n",
102 |     "        discrim_cost_gen = match(raw_gen, tf.zeros_like(raw_gen))\n",
103 |     "        discrim_cost = discrim_cost_real + discrim_cost_gen\n",
104 |     "\n",
105 |     "        gen_cost = match( raw_gen, tf.ones_like(raw_gen) )\n",
106 |     "\n",
107 |     "        return Z, Y, image_real, image_gen, discrim_cost, gen_cost\n",
108 |     "\n",
109 |     "    def create_conv2d(self, input, filters, kernel_size, name):\n",
110 |     "        layer = tf.layers.conv2d(\n",
111 |     "                    inputs=input,\n",
112 |     "                    filters=filters,\n",
113 |     "                    kernel_size=kernel_size,\n",
114 |     "                    strides=[2,2],\n",
115 |     "                    name=\"Conv2d_\" + name,\n",
116 |     "                    padding=\"SAME\")\n",
117 |     "        layer = tf.nn.leaky_relu(layer, name= \"LeakyRELU\" + name)\n",
118 |     "        return layer\n",
119 |     "\n",
120 |     "    def create_conv2d_transpose(self, input, filters, kernel_size, name, with_batch_norm):\n",
121 |     "        layer = tf.layers.conv2d_transpose(\n",
122 |     "                    inputs=input,\n",
123 |     "                    filters=filters,\n",
124 |     "                    kernel_size=kernel_size,\n",
125 |     "                    strides=[2,2],\n",
126 |     "                    name=\"Conv2d_\" + name,\n",
127 |     "                    padding=\"SAME\")\n",
128 |     "        if with_batch_norm:\n",
129 |     "            layer = batchnormalize(layer)\n",
130 |     "            layer = tf.nn.relu(layer)\n",
131 |     "        return layer\n",
132 |     "\n",
133 |     "    def create_dense(self, input, units, name, leaky):\n",
134 |     "        layer = tf.layers.dense(\n",
135 |     "                inputs=input,\n",
136 |     "                units=units,\n",
137 |     "                name=\"Dense\" + name,\n",
138 |     "                )\n",
139 |     "        layer = batchnormalize(layer)\n",
140 |     "        if leaky:\n",
141 |     "            layer = tf.nn.leaky_relu(layer, name= \"LeakyRELU\" + name)\n",
142 |     "        else:\n",
143 |     "            layer = tf.nn.relu(layer, name=\"RELU_\" + name)\n",
144 |     "        return layer\n",
145 |     "\n",
146 |     "    def discriminate(self, image, Y, reuse=False):\n",
147 |     "        with tf.variable_scope('discriminate', reuse=reuse):\n",
148 |     "            \n",
149 |     "            batch_size = Y.get_shape()[0]\n",
150 |     "            \n",
151 |     "            yb = tf.reshape(Y, tf.stack([-1, 1, 1, self.dim_y]))\n",
152 |     "            X = tf.concat(axis=3, values=[image, yb*tf.ones([1, 28, 28, self.dim_y])])\n",
153 |     "    \n",
154 |     "            h1 = self.create_conv2d(X, self.dim_W3, 5, \"Layer1\")\n",
155 |     "            h1 = tf.concat(axis=3, values=[h1, yb*tf.ones([1, 14, 14, self.dim_y])])\n",
156 |     "    \n",
157 |     "            h2 = self.create_conv2d(h1, self.dim_W2, 5, \"Layer2\")\n",
158 |     "            h2 = tf.reshape(h2, tf.stack([-1, 7*7*128]))\n",
159 |     "            h2 = tf.concat(axis=1, values=[h2, Y])\n",
160 |     "    \n",
161 |     "            h3 = self.create_dense(h2, self.dim_W1, \"Layer3\", True)\n",
162 |     "            h3 = tf.concat(axis=1, values=[h3, Y])\n",
163 |     "            \n",
164 |     "            h4 = self.create_dense(h3, 1, \"Layer4\", True)\n",
165 |     "            return h4\n",
166 |     "\n",
167 |     "    def generate(self, Z, Y, reuse=False):\n",
168 |     "        with tf.variable_scope('generate', reuse=reuse):\n",
169 |     "\n",
170 |     "            yb = tf.reshape(Y, tf.stack([-1, 1, 1, self.dim_y]))\n",
171 |     "            Z = tf.concat(axis=1, values=[Z,Y])\n",
172 |     "            h1 = self.create_dense(Z, self.dim_W1, \"Layer1\", False)\n",
173 |     "            h1 = tf.concat(axis=1, values=[h1, Y])\n",
174 |     "            h2 = self.create_dense(h1, self.dim_W2*7*7, \"Layer2\", False)\n",
175 |     "            h2 = tf.reshape(h2, tf.stack([-1,7,7,self.dim_W2]))\n",
176 |     "            h2 = tf.concat(axis=3, values=[h2, yb*tf.ones([1, 7, 7, self.dim_y])])\n",
177 |     "\n",
178 |     "            h3 = self.create_conv2d_transpose(h2, self.dim_W3, 5, \"Layer3\", True)\n",
179 |     "            h3 = tf.concat(axis=3, values=[h3, yb*tf.ones([1, 14,14,self.dim_y])] )\n",
180 |     "\n",
181 |     "            h4 = self.create_conv2d_transpose(h3, self.dim_channel, 7, \"Layer4\", False)\n",
182 |     "            x = tf.nn.sigmoid(h4)\n",
183 |     "            return x"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "We add 2 helper fucntions, one for transforming our data to one-hot encoding (without using Tensorflow, we could use it instead) and one to plot and save our sampled images."
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "import imageio\n",
200 |     "import numpy as np\n",
201 |     "from matplotlib import pyplot as plt\n",
202 |     "%matplotlib inline\n",
203 |     "\n",
204 |     "def one_hot(X, n):\n",
205 |     "    X = np.asarray(X).flatten()\n",
206 |     "    Xoh = np.zeros((len(X), n))\n",
207 |     "    Xoh[np.arange(len(X)), X] = 1.\n",
208 |     "    return Xoh\n",
209 |     "\n",
210 |     "def save_visualization(X, nh_nw, save_path='./sample.jpg'):\n",
211 |     "    h,w = X.shape[1], X.shape[2]\n",
212 |     "    img = np.zeros((h * nh_nw[0], w * nh_nw[1], 3))\n",
213 |     "\n",
214 |     "    for n,x in enumerate(X):\n",
215 |     "        j = n // nh_nw[1]\n",
216 |     "        i = n % nh_nw[1]\n",
217 |     "        img[j*h:j*h+h, i*w:i*w+w, :] = x / 255\n",
218 |     "\n",
219 |     "    imageio.imwrite(save_path, img)\n",
220 |     "    plt.imshow(img)\n",
221 |     "    plt.show()"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "markdown",
226 |    "metadata": {},
227 |    "source": [
228 |     "Our hyperparameters and our data"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "metadata": {},
235 |    "outputs": [],
236 |    "source": [
237 |     "import os\n",
238 |     "import numpy as np\n",
239 |     "\n",
240 |     "n_epochs = 10\n",
241 |     "learning_rate = 0.0002\n",
242 |     "batch_size = 128\n",
243 |     "image_shape = [28,28,1]\n",
244 |     "dim_z = 10\n",
245 |     "dim_y = 10\n",
246 |     "dim_W1 = 1024\n",
247 |     "dim_W2 = 128\n",
248 |     "dim_W3 = 64\n",
249 |     "dim_channel = 1\n",
250 |     "\n",
251 |     "visualize_dim=196\n",
252 |     "\n",
253 |     "from sklearn.datasets import fetch_mldata\n",
254 |     "mnist = fetch_mldata('MNIST original')\n",
255 |     "mnist.data.shape = (-1, 28, 28)\n",
256 |     "mnist.data = mnist.data.astype(np.float32).reshape( [-1, 28, 28, 1]) / 255.\n",
257 |     "mnist.num_examples = len(mnist.data)\n",
258 |     "mnist.target = one_hot(mnist.target.astype(np.int8), dim_y)"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "Let's generate some images!"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": null,
271 |    "metadata": {},
272 |    "outputs": [],
273 |    "source": [
274 |     "tf.reset_default_graph()\n",
275 |     "dcgan_model = DCGAN(\n",
276 |     "        image_shape=image_shape,\n",
277 |     "        dim_z=dim_z,\n",
278 |     "        dim_W1=dim_W1,\n",
279 |     "        dim_W2=dim_W2,\n",
280 |     "        dim_W3=dim_W3,\n",
281 |     "        )\n",
282 |     "Z_tf, Y_tf, image_tf, image_tf_sample, d_cost_tf, g_cost_tf, = dcgan_model.build_model()\n",
283 |     "\n",
284 |     "discrim_vars = list(filter(lambda x: x.name.startswith('discr'), tf.trainable_variables()))\n",
285 |     "gen_vars = list(filter(lambda x: x.name.startswith('gen'), tf.trainable_variables()))\n",
286 |     "\n",
287 |     "train_op_discrim = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(d_cost_tf, var_list=discrim_vars)\n",
288 |     "train_op_gen = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(g_cost_tf, var_list=gen_vars)\n",
289 |     "\n",
290 |     "Z_np_sample = np.random.uniform(-1, 1, size=(visualize_dim,dim_z))\n",
291 |     "Y_np_sample = one_hot( np.random.randint(10, size=[visualize_dim]), dim_y)\n",
292 |     "\n",
293 |     "step = 1000\n",
294 |     "\n",
295 |     "with tf.Session() as sess:\n",
296 |     "    sess.run(tf.global_variables_initializer())\n",
297 |     "    for epoch in range(n_epochs):\n",
298 |     "        permut = np.random.permutation(mnist.num_examples)\n",
299 |     "        trX = mnist.data[permut]\n",
300 |     "        trY = mnist.target[permut]\n",
301 |     "        Z = np.random.uniform(-1, 1, size=[mnist.num_examples, dim_z]).astype(np.float32)\n",
302 |     "\n",
303 |     "        print(\"epoch: %i\" % epoch)\n",
304 |     "        for j in range(0, mnist.num_examples, batch_size):\n",
305 |     "            if j % step == 0:\n",
306 |     "                print(\"  batch: %i\" % j)\n",
307 |     "\n",
308 |     "            batch = permut[j:j+batch_size]\n",
309 |     "\n",
310 |     "            Xs = trX[batch]\n",
311 |     "            Ys = trY[batch]\n",
312 |     "            Zs = Z[batch]\n",
313 |     "\n",
314 |     "            if (j / batch_size) % 2 == 0:\n",
315 |     "                sess.run(train_op_discrim,\n",
316 |     "                    feed_dict={\n",
317 |     "                        Z_tf:Zs,\n",
318 |     "                        Y_tf:Ys,\n",
319 |     "                        image_tf:Xs\n",
320 |     "                        })\n",
321 |     "            else:\n",
322 |     "                sess.run(train_op_gen,\n",
323 |     "                    feed_dict={\n",
324 |     "                        Z_tf:Zs,\n",
325 |     "                        Y_tf:Ys\n",
326 |     "                        })\n",
327 |     "\n",
328 |     "            if j % step == 0:\n",
329 |     "                generated_samples = sess.run(\n",
330 |     "                        image_tf_sample,\n",
331 |     "                        feed_dict={\n",
332 |     "                            Z_tf:Z_np_sample,\n",
333 |     "                            Y_tf:Y_np_sample\n",
334 |     "                            })\n",
335 |     "                generated_samples = generated_samples * 255\n",
336 |     "                save_visualization(generated_samples, (7,28), save_path='./B09124_11_sample_%03d_%04d.jpg' % (epoch, j / step))"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": null,
342 |    "metadata": {},
343 |    "outputs": [],
344 |    "source": []
345 |   }
346 |  ],
347 |  "metadata": {
348 |   "kernelspec": {
349 |    "display_name": "Python 3",
350 |    "language": "python",
351 |    "name": "python3"
352 |   },
353 |   "language_info": {
354 |    "codemirror_mode": {
355 |     "name": "ipython",
356 |     "version": 3
357 |    },
358 |    "file_extension": ".py",
359 |    "mimetype": "text/x-python",
360 |    "name": "python",
361 |    "nbconvert_exporter": "python",
362 |    "pygments_lexer": "ipython3",
363 |    "version": "3.6.5"
364 |   }
365 |  },
366 |  "nbformat": 4,
367 |  "nbformat_minor": 2
368 | }
369 | 


--------------------------------------------------------------------------------
/Chapter12/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | mkdir -p AnimTransDistr
4 | cd AnimTransDistr
5 | curl -O http://vision.stanford.edu/Datasets/AnimTransDistr.rar
6 | unrar x AnimTransDistr.rar
7 | # The following file is a weird file:
8 | rm Anims/104034.jpg
9 | 


--------------------------------------------------------------------------------
/Chapter12/forest.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/Chapter12/forest.jpeg


--------------------------------------------------------------------------------
/Chapter12/scene00.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/Chapter12/scene00.jpg


--------------------------------------------------------------------------------
/Chapter13/chapter_13.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Reinforcement learning with Tensorflow"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "This code is supporting material for the book `Building Machine Learning Systems with Python` by [Willi Richert](https://www.linkedin.com/in/willirichert/), [Luis Pedro Coelho](https://www.linkedin.com/in/luispedrocoelho/) and [Matthieu Brucher](https://www.linkedin.com/in/matthieubrucher/) published by PACKT Publishing.\n",
 15 |     "\n",
 16 |     "It is made available under the MIT License.\n",
 17 |     "\n",
 18 |     "All code examples use Python in version..."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import sys\n",
 28 |     "sys.version"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "## Utility functions"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "import os\n",
 45 |     "\n",
 46 |     "CHART_DIR = \"charts\"\n",
 47 |     "if not os.path.exists(CHART_DIR):\n",
 48 |     "    os.mkdir(CHART_DIR)\n",
 49 |     "\n",
 50 |     "def save_png(name):\n",
 51 |     "    fn = 'B09124_13_%s.png'%name # please ignore, it just helps our publisher :-)\n",
 52 |     "    plt.savefig(os.path.join(CHART_DIR, fn), bbox_inches=\"tight\")"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "## Simple text games"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "import gym\n",
 69 |     "import numpy as np\n",
 70 |     "\n",
 71 |     "env = gym.make('FrozenLake-v0')"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "### Estimating the Q function the old fashion way"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "Let's make a table with some Q values for this environment"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "# Start with an empty table\n",
 95 |     "Q = np.zeros((env.observation_space.n, env.action_space.n))\n",
 96 |     "# Set learning hyperparameters\n",
 97 |     "lr = .8\n",
 98 |     "y = .95\n",
 99 |     "num_episodes = 2000\n",
100 |     "\n",
101 |     "# Let's run!\n",
102 |     "for i in range(num_episodes):\n",
103 |     "    # Reset environment and get first new observation (top left)\n",
104 |     "    s = env.reset()\n",
105 |     "    # Do 100 iterations to update the table\n",
106 |     "    for i in range(100):\n",
107 |     "        # Choose an action by picking the max of the table + additional random noise ponderated by the episode\n",
108 |     "        a = np.argmax(Q[s,:] + np.random.randn(1,env.action_space.n)//(i+1))\n",
109 |     "        # Get new state and reward from environment after chosen step \n",
110 |     "        s1, r, d,_ = env.step(a)\n",
111 |     "        # Update Q-Table with new knowledge\n",
112 |     "        Q[s,a] = Q[s,a] + lr*(r + y*np.max(Q[s1,:]) - Q[s,a])\n",
113 |     "        s = s1\n",
114 |     "        if d == True:\n",
115 |     "            break"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "print(\"Final Q-Table Values\")\n",
125 |     "print(Q)"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "### Test games with TF"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "import random\n",
142 |     "import tensorflow as tf\n",
143 |     "import matplotlib.pyplot as plt\n",
144 |     "%matplotlib inline"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "Let's create a new network."
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "y = 0.99\n",
161 |     "e = 0.1 # 1 in 10 samples, we chose a new action for the network\n",
162 |     "num_episodes = 2000\n",
163 |     "learning_rate = 0.1"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "tf.reset_default_graph()\n",
173 |     "\n",
174 |     "# A simple one layer network\n",
175 |     "inputs = tf.placeholder(shape=[None, 16], dtype=tf.float32, name=\"input\")\n",
176 |     "Qout = tf.layers.dense(\n",
177 |     "    inputs=inputs,\n",
178 |     "    units=4,\n",
179 |     "    use_bias=False,\n",
180 |     "    name=\"dense\",\n",
181 |     "    kernel_initializer=tf.random_uniform_initializer(minval=0, maxval=.0125)\n",
182 |     ")\n",
183 |     "predict = tf.argmax(Qout, 1)\n",
184 |     "\n",
185 |     "# Our optimizer will try to optimize \n",
186 |     "nextQ = tf.placeholder(shape=[None, 4], dtype=tf.float32, name=\"target\")\n",
187 |     "loss = tf.reduce_sum(tf.square(nextQ - Qout))\n",
188 |     "\n",
189 |     "trainer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
190 |     "updateModel = trainer.minimize(loss)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {},
196 |    "source": [
197 |     "We can now train the network, and check that it will get more and more sucesses as the training progresses."
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "metadata": {},
204 |    "outputs": [],
205 |    "source": [
206 |     "# To keep track of our games and our results\n",
207 |     "jList = []\n",
208 |     "rList = []\n",
209 |     "with tf.Session() as sess:\n",
210 |     "    sess.run(tf.global_variables_initializer())\n",
211 |     "\n",
212 |     "    for i in range(num_episodes):\n",
213 |     "        s = env.reset()\n",
214 |     "        rAll = 0\n",
215 |     "        \n",
216 |     "        for j in range(100):\n",
217 |     "            a, targetQ = sess.run([predict, Qout], feed_dict={inputs:np.identity(16)[s:s+1]})\n",
218 |     "            # We randomly choose a new state that we may have not encountered before\n",
219 |     "            if np.random.rand(1) < e:\n",
220 |     "                a[0] = env.action_space.sample()\n",
221 |     "\n",
222 |     "            s1, r, d, _ = env.step(a[0])\n",
223 |     "            \n",
224 |     "            # Obtain the Q' values by feeding the new state through our network\n",
225 |     "            Q1 = sess.run(Qout, feed_dict={inputs:np.identity(16)[s1:s1+1]})\n",
226 |     "            # Obtain maxQ' and set our target value for chosen action.\n",
227 |     "            targetQ[0, a[0]] = r + y*np.max(Q1)\n",
228 |     "            \n",
229 |     "            # Train our network using target and predicted Q values\n",
230 |     "            sess.run(updateModel, feed_dict={inputs:np.identity(16)[s:s+1], nextQ:targetQ})\n",
231 |     "            rAll += r\n",
232 |     "            s = s1\n",
233 |     "            if d == True:\n",
234 |     "                # Reduce chance of random action as we train the model.\n",
235 |     "                e = 1 / ((i // 50) + 10)\n",
236 |     "                break\n",
237 |     "        jList.append(j)\n",
238 |     "        rList.append(rAll)\n",
239 |     "print(\"Percent of succesful episodes: %f%%\" % (sum(rList) / num_episodes))"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "We now display the evolution of the reward with each episode"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {},
253 |    "outputs": [],
254 |    "source": [
255 |     "from scipy.signal import lfilter\n",
256 |     "\n",
257 |     "plt.plot(lfilter(np.ones(20)/20, [1], rList))\n",
258 |     "save_png(\"reward\")"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "We can also see that the survival increases, even if we take suoptimal paths:"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": null,
271 |    "metadata": {},
272 |    "outputs": [],
273 |    "source": [
274 |     "plt.plot(jList)\n",
275 |     "save_png(\"length\")"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "markdown",
280 |    "metadata": {},
281 |    "source": [
282 |     "## Atari games"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "markdown",
287 |    "metadata": {},
288 |    "source": [
289 |     "The code here was inspired by several tutorials and courses online:\n",
290 |     "* https://becominghuman.ai/lets-build-an-atari-ai-part-1-dqn-df57e8ff3b26\n",
291 |     "* https://github.com/tokb23/dqn\n",
292 |     "* https://github.com/dennybritz/reinforcement-learning/blob/master/DQN/dqn.py"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "markdown",
297 |    "metadata": {},
298 |    "source": [
299 |     "We can now design a a network that can tackle more or less any of the Atari games available on the gym plaform."
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": null,
305 |    "metadata": {},
306 |    "outputs": [],
307 |    "source": [
308 |     "import gym\n",
309 |     "\n",
310 |     "import os\n",
311 |     "import six\n",
312 |     "import numpy as np\n",
313 |     "import tensorflow as tf\n",
314 |     "import random\n",
315 |     "import itertools\n",
316 |     "from collections import deque, namedtuple\n",
317 |     "\n",
318 |     "CHART_DIR = \"charts\"\n",
319 |     "if not os.path.exists(CHART_DIR):\n",
320 |     "    os.mkdir(CHART_DIR)"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "markdown",
325 |    "metadata": {},
326 |    "source": [
327 |     "We need a few helper function, one to preprocess our images and shrink them and two others that will transpose the data. The reason is that we use the past images as additional channels, so the axis order is wrong."
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": null,
333 |    "metadata": {},
334 |    "outputs": [],
335 |    "source": [
336 |     "def to_grayscale(img):\n",
337 |     "    return np.mean(img, axis=2).astype(np.uint8)\n",
338 |     "\n",
339 |     "def downsample(img):\n",
340 |     "    return img[::2, ::2]\n",
341 |     "\n",
342 |     "def preprocess(img):\n",
343 |     "    return to_grayscale(downsample(img))[None,:,:]\n",
344 |     "\n",
345 |     "def adapt_state(state):\n",
346 |     "    return [np.float32(np.transpose(state, (2, 1, 0)) / 255.0)]\n",
347 |     "\n",
348 |     "def adapt_batch_state(state):\n",
349 |     "    return np.transpose(np.array(state), (0, 3, 2, 1)) / 255.0\n",
350 |     "\n",
351 |     "def get_initial_state(frame):\n",
352 |     "    processed_frame = preprocess(frame)\n",
353 |     "    state = [processed_frame for _ in range(state_length)]\n",
354 |     "    return np.concatenate(state)"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "markdown",
359 |    "metadata": {},
360 |    "source": [
361 |     "We add a bunch of hyperparameters and constants"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": null,
367 |    "metadata": {},
368 |    "outputs": [],
369 |    "source": [
370 |     "\n",
371 |     "env_name = \"Breakout-v4\"\n",
372 |     "\n",
373 |     "width = 80  # Resized frame width\n",
374 |     "height = 105  # Resized frame height\n",
375 |     "\n",
376 |     "n_episodes = 12000  # Number of runs for the agent\n",
377 |     "state_length = 4  # Number of most frames we input to the network\n",
378 |     "\n",
379 |     "gamma = 0.99  # Discount factor\n",
380 |     "\n",
381 |     "exploration_steps = 1000000  # During all these steps, we progressively lower epsilon\n",
382 |     "initial_epsilon = 1.0  # Initial value of epsilon in epsilon-greedy\n",
383 |     "final_epsilon = 0.1  # Final value of epsilon in epsilon-greedy\n",
384 |     "\n",
385 |     "initial_random_search = 20000  # Number of steps to populate the replay memory before training starts\n",
386 |     "replay_memory_size = 400000  # Number of states we keep for training\n",
387 |     "batch_size = 32  # Batch size\n",
388 |     "network_update_interval = 10000  # The frequency with which the target network is updated\n",
389 |     "train_skips = 4  # The agent selects 4 actions between successive updates\n",
390 |     "\n",
391 |     "learning_rate = 0.00025  # Learning rate used by RMSProp\n",
392 |     "momentum = 0.95  # momentum used by RMSProp\n",
393 |     "min_gradient = 0.01  # Constant added to the squared gradient in the denominator of the RMSProp update\n",
394 |     "\n",
395 |     "network_path = 'saved_networks/' + env_name\n",
396 |     "tensorboard_path = 'summary/' + env_name\n",
397 |     "save_interval = 300000  # The frequency with which the network is saved"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "markdown",
402 |    "metadata": {},
403 |    "source": [
404 |     "We use a class to train, save and restore our network. We will use one instance for the Q network and another one for the target network.\n",
405 |     "get_trained_action() will be the method used to get a new action from the network."
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": null,
411 |    "metadata": {},
412 |    "outputs": [],
413 |    "source": [
414 |     "class Estimator():\n",
415 |     "    \"\"\"Q-Value Estimator neural network.\n",
416 |     "    This network is used for both the Q-Network and the Target Network.\n",
417 |     "    \"\"\"\n",
418 |     "\n",
419 |     "    def __init__(self, env, scope=\"estimator\", summaries_dir=None):\n",
420 |     "        self.scope = scope\n",
421 |     "        self.num_actions = env.action_space.n\n",
422 |     "        self.epsilon = initial_epsilon\n",
423 |     "        self.epsilon_step = (initial_epsilon - final_epsilon) / exploration_steps\n",
424 |     "        \n",
425 |     "        # Writes Tensorboard summaries to disk\n",
426 |     "        self.summary_writer = None\n",
427 |     "        with tf.variable_scope(scope):\n",
428 |     "            # Build the graph\n",
429 |     "            self.build_model()\n",
430 |     "        if summaries_dir:\n",
431 |     "            summary_dir = os.path.join(summaries_dir, \"summaries_%s\" % scope)\n",
432 |     "            if not os.path.exists(summary_dir):\n",
433 |     "                os.makedirs(summary_dir)\n",
434 |     "            self.summary_writer = tf.summary.FileWriter(summary_dir)\n",
435 |     "\n",
436 |     "    def build_model(self):\n",
437 |     "        \"\"\"\n",
438 |     "        Builds the Tensorflow graph.\n",
439 |     "        \"\"\"\n",
440 |     "        self.X = tf.placeholder(shape=[None, width, height, state_length], dtype=tf.float32, name=\"X\")\n",
441 |     "        # The TD target value\n",
442 |     "        self.y = tf.placeholder(shape=[None], dtype=tf.float32, name=\"y\")\n",
443 |     "        # Integer id of which action was selected\n",
444 |     "        self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name=\"actions\")\n",
445 |     "\n",
446 |     "        model = tf.keras.Sequential(self.scope)\n",
447 |     "        model.add(tf.keras.layers.Convolution2D(filters=32, kernel_size=8, strides=(4, 4), activation='relu', input_shape=(width, height, state_length), name=\"Layer1\"))\n",
448 |     "        model.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=4, strides=(2, 2), activation='relu', name=\"Layer2\"))\n",
449 |     "        model.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=3, strides=(1, 1), activation='relu', name=\"Layer3\"))\n",
450 |     "        model.add(tf.keras.layers.Flatten(name=\"Flatten\"))\n",
451 |     "        model.add(tf.keras.layers.Dense(512, activation='relu', name=\"Layer4\"))\n",
452 |     "        model.add(tf.keras.layers.Dense(self.num_actions, name=\"Output\"))\n",
453 |     "\n",
454 |     "        self.predictions = model(self.X)\n",
455 |     "\n",
456 |     "        a_one_hot = tf.one_hot(self.actions, self.num_actions, 1.0, 0.0)\n",
457 |     "        q_value = tf.reduce_sum(tf.multiply(self.predictions, a_one_hot), reduction_indices=1)\n",
458 |     "        \n",
459 |     "        # Calculate the loss\n",
460 |     "        self.losses = tf.squared_difference(self.y, q_value)\n",
461 |     "        self.loss = tf.reduce_mean(self.losses)\n",
462 |     "\n",
463 |     "        # Optimizer Parameters from original paper\n",
464 |     "        self.optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum, epsilon=min_gradient)\n",
465 |     "        self.train_op = self.optimizer.minimize(self.loss, global_step=tf.train.get_global_step())\n",
466 |     "\n",
467 |     "        # Summaries for Tensorboard\n",
468 |     "        self.summaries = tf.summary.merge([\n",
469 |     "            tf.summary.scalar(\"loss\", self.loss),\n",
470 |     "            tf.summary.histogram(\"loss_hist\", self.losses),\n",
471 |     "            tf.summary.histogram(\"q_values_hist\", self.predictions),\n",
472 |     "            tf.summary.scalar(\"max_q_value\", tf.reduce_max(self.predictions))\n",
473 |     "        ])\n",
474 |     "\n",
475 |     "\n",
476 |     "    def predict(self, sess, s):\n",
477 |     "        return sess.run(self.predictions, { self.X: s })\n",
478 |     "\n",
479 |     "    def update(self, sess, s, a, y):\n",
480 |     "        feed_dict = { self.X: s, self.y: y, self.actions: a }\n",
481 |     "        summaries, global_step, _, loss = sess.run(\n",
482 |     "            [self.summaries, tf.train.get_global_step(), self.train_op, self.loss],\n",
483 |     "            feed_dict)\n",
484 |     "        if self.summary_writer:\n",
485 |     "            self.summary_writer.add_summary(summaries, global_step)\n",
486 |     "        return loss\n",
487 |     "\n",
488 |     "    def get_action(self, sess, state):\n",
489 |     "        if self.epsilon >= random.random():\n",
490 |     "            action = random.randrange(self.num_actions)\n",
491 |     "        else:\n",
492 |     "            action = np.argmax(self.predict(sess, adapt_state(state)))\n",
493 |     "\n",
494 |     "        # Decay epsilon over time\n",
495 |     "        if self.epsilon > final_epsilon:\n",
496 |     "            self.epsilon -= self.epsilon_step\n",
497 |     "\n",
498 |     "        return action\n",
499 |     "\n",
500 |     "    def get_trained_action(self, state):\n",
501 |     "        action = np.argmax(self.predict(sess, adapt_state(state)))\n",
502 |     "        return action"
503 |    ]
504 |   },
505 |   {
506 |    "cell_type": "markdown",
507 |    "metadata": {},
508 |    "source": [
509 |     "We create also a function to copy parameters from one network to the other, a function to create an initial clean state as well as a function to create the summary reports for scalar by episode outputs."
510 |    ]
511 |   },
512 |   {
513 |    "cell_type": "code",
514 |    "execution_count": null,
515 |    "metadata": {},
516 |    "outputs": [],
517 |    "source": [
518 |     "def copy_model_parameters(estimator1, estimator2):\n",
519 |     "    \"\"\"\n",
520 |     "    Copies the model parameters of one estimator to another.\n",
521 |     "    Args:\n",
522 |     "      estimator1: Estimator to copy the paramters from\n",
523 |     "      estimator2: Estimator to copy the parameters to\n",
524 |     "    \"\"\"\n",
525 |     "    e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)]\n",
526 |     "    e1_params = sorted(e1_params, key=lambda v: v.name)\n",
527 |     "    e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)]\n",
528 |     "    e2_params = sorted(e2_params, key=lambda v: v.name)\n",
529 |     "\n",
530 |     "    update_ops = []\n",
531 |     "    for e1_v, e2_v in zip(e1_params, e2_params):\n",
532 |     "        op = e2_v.assign(e1_v)\n",
533 |     "        update_ops.append(op)\n",
534 |     "\n",
535 |     "    return update_ops\n",
536 |     "\n",
537 |     "def create_memory(env):\n",
538 |     "    # Populate the replay memory with initial experience    \n",
539 |     "    replay_memory = []\n",
540 |     "    \n",
541 |     "    frame = env.reset()\n",
542 |     "    state = get_initial_state(frame)\n",
543 |     "\n",
544 |     "    for i in range(replay_memory_init_size):\n",
545 |     "        action = np.random.choice(np.arange(env.action_space.n))\n",
546 |     "        frame, reward, done, _ = env.step(action)\n",
547 |     "        \n",
548 |     "        next_state = np.append(state[1:, :, :], preprocess(frame), axis=0)\n",
549 |     "        replay_memory.append(Transition(state, action, reward, next_state, done))\n",
550 |     "        if done:\n",
551 |     "            frame = env.reset()\n",
552 |     "            state = get_initial_state(frame)\n",
553 |     "        else:\n",
554 |     "            state = next_state\n",
555 |     "            \n",
556 |     "    return replay_memory\n",
557 |     "\n",
558 |     "\n",
559 |     "def setup_summary():\n",
560 |     "    with tf.variable_scope(\"episode\"):\n",
561 |     "        episode_total_reward = tf.Variable(0., name=\"EpisodeTotalReward\")\n",
562 |     "        tf.summary.scalar('Total Reward', episode_total_reward)\n",
563 |     "        episode_avg_max_q = tf.Variable(0., name=\"EpisodeAvgMaxQ\")\n",
564 |     "        tf.summary.scalar('Average Max Q', episode_avg_max_q)\n",
565 |     "        episode_duration = tf.Variable(0., name=\"EpisodeDuration\")\n",
566 |     "        tf.summary.scalar('Duration', episode_duration)\n",
567 |     "        episode_avg_loss = tf.Variable(0., name=\"EpisodeAverageLoss\")\n",
568 |     "        tf.summary.scalar('Average Loss', episode_avg_loss)\n",
569 |     "        summary_vars = [episode_total_reward, episode_avg_max_q, episode_duration, episode_avg_loss]\n",
570 |     "        summary_placeholders = [tf.placeholder(tf.float32) for _ in range(len(summary_vars))]\n",
571 |     "        update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))]\n",
572 |     "    summary_op = tf.summary.merge_all(scope=\"episode\")\n",
573 |     "    return summary_placeholders, update_ops, summary_op"
574 |    ]
575 |   },
576 |   {
577 |    "cell_type": "markdown",
578 |    "metadata": {},
579 |    "source": [
580 |     "We can now train our network (and save some final images from the trained network)"
581 |    ]
582 |   },
583 |   {
584 |    "cell_type": "code",
585 |    "execution_count": null,
586 |    "metadata": {},
587 |    "outputs": [],
588 |    "source": [
589 |     "from tqdm import tqdm\n",
590 |     "\n",
591 |     "env = gym.make(env_name)\n",
592 |     "tf.reset_default_graph()\n",
593 |     "\n",
594 |     "# Create a glboal step variable\n",
595 |     "global_step = tf.Variable(0, name='global_step', trainable=False)\n",
596 |     "\n",
597 |     "# Create estimators\n",
598 |     "q_estimator = Estimator(env, scope=\"q\", summaries_dir=tensorboard_path)\n",
599 |     "target_estimator = Estimator(env, scope=\"target_q\")\n",
600 |     "\n",
601 |     "copy_model = copy_model_parameters(q_estimator, target_estimator)\n",
602 |     "\n",
603 |     "summary_placeholders, update_ops, summary_op = setup_summary()\n",
604 |     "\n",
605 |     "# The replay memory\n",
606 |     "replay_memory = create_memory(env)\n",
607 |     "\n",
608 |     "with tf.Session() as sess:\n",
609 |     "    sess.run(tf.global_variables_initializer())\n",
610 |     "\n",
611 |     "    q_estimator.summary_writer.add_graph(sess.graph)\n",
612 |     "\n",
613 |     "    saver = tf.train.Saver()\n",
614 |     "    # Load a previous checkpoint if we find one\n",
615 |     "    latest_checkpoint = tf.train.latest_checkpoint(network_path)\n",
616 |     "    if latest_checkpoint:\n",
617 |     "        print(\"Loading model checkpoint %s...\\n\" % latest_checkpoint)\n",
618 |     "        saver.restore(sess, latest_checkpoint)\n",
619 |     "\n",
620 |     "    total_t = sess.run(tf.train.get_global_step())\n",
621 |     "\n",
622 |     "    for episode in tqdm(range(n_episodes)):\n",
623 |     "        if total_t % save_interval == 0:\n",
624 |     "            # Save the current checkpoint\n",
625 |     "            saver.save(tf.get_default_session(), network_path)\n",
626 |     "\n",
627 |     "        frame = env.reset()\n",
628 |     "        state = get_initial_state(frame)\n",
629 |     "\n",
630 |     "        total_reward = 0\n",
631 |     "        total_loss = 0\n",
632 |     "        total_q_max = 0\n",
633 |     "\n",
634 |     "        for duration in itertools.count():    \n",
635 |     "            # Maybe update the target estimator\n",
636 |     "            if total_t % network_update_interval == 0:\n",
637 |     "                sess.run(copy_model)\n",
638 |     "\n",
639 |     "            action = q_estimator.get_action(sess, state)\n",
640 |     "            frame, reward, terminal, _ = env.step(action)\n",
641 |     "\n",
642 |     "            processed_frame = preprocess(frame)\n",
643 |     "            next_state = np.append(state[1:, :, :], processed_frame, axis=0)\n",
644 |     "\n",
645 |     "            reward = np.clip(reward, -1, 1)\n",
646 |     "            replay_memory.append(Transition(state, action, reward, next_state, terminal))\n",
647 |     "            if len(replay_memory) > replay_memory_size:\n",
648 |     "                replay_memory.popleft()\n",
649 |     "\n",
650 |     "            samples = random.sample(replay_memory, batch_size)\n",
651 |     "            states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(np.array, zip(*samples))\n",
652 |     "\n",
653 |     "            # Calculate q values and targets (Double DQN)\n",
654 |     "            adapted_state = adapt_batch_state(next_states_batch)\n",
655 |     "\n",
656 |     "            q_values_next = q_estimator.predict(sess, adapted_state)\n",
657 |     "            best_actions = np.argmax(q_values_next, axis=1)\n",
658 |     "            q_values_next_target = target_estimator.predict(sess, adapted_state)\n",
659 |     "            targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * gamma * q_values_next_target[np.arange(batch_size), best_actions]\n",
660 |     "\n",
661 |     "            # Perform gradient descent update\n",
662 |     "            states_batch = adapt_batch_state(states_batch)\n",
663 |     "            loss = q_estimator.update(sess, states_batch, action_batch, targets_batch)\n",
664 |     "\n",
665 |     "            total_q_max += np.max(q_values_next)\n",
666 |     "            total_loss += loss\n",
667 |     "            total_t += 1\n",
668 |     "            total_reward += reward\n",
669 |     "            if terminal:\n",
670 |     "                break\n",
671 |     "\n",
672 |     "        stats = [total_reward, total_q_max / duration, duration, total_loss / duration]\n",
673 |     "        for i in range(len(stats)):\n",
674 |     "            sess.run(update_ops[i], feed_dict={\n",
675 |     "                summary_placeholders[i]: float(stats[i])\n",
676 |     "            })\n",
677 |     "        summary_str = sess.run(summary_op, )\n",
678 |     "        q_estimator.summary_writer.add_summary(summary_str, episode)\n",
679 |     "\n",
680 |     "        env.env.ale.saveScreenPNG(six.b('%s/test_image_%05i.png' % (CHART_DIR, episode)))\n",
681 |     "\n",
682 |     "    # Save the last checkpoint\n",
683 |     "    saver.save(tf.get_default_session(), network_path)"
684 |    ]
685 |   },
686 |   {
687 |    "cell_type": "code",
688 |    "execution_count": null,
689 |    "metadata": {},
690 |    "outputs": [],
691 |    "source": []
692 |   }
693 |  ],
694 |  "metadata": {
695 |   "kernelspec": {
696 |    "display_name": "Python 3",
697 |    "language": "python",
698 |    "name": "python3"
699 |   },
700 |   "language_info": {
701 |    "codemirror_mode": {
702 |     "name": "ipython",
703 |     "version": 3
704 |    },
705 |    "file_extension": ".py",
706 |    "mimetype": "text/x-python",
707 |    "name": "python",
708 |    "nbconvert_exporter": "python",
709 |    "pygments_lexer": "ipython3",
710 |    "version": "3.6.5"
711 |   }
712 |  },
713 |  "nbformat": 4,
714 |  "nbformat_minor": 2
715 | }
716 | 


--------------------------------------------------------------------------------
/Chapter13/simple_breakout.py:
--------------------------------------------------------------------------------
 1 | # Import the gym module
 2 | import gym
 3 | 
 4 | # Create a breakout environment
 5 | env = gym.make('BreakoutDeterministic-v4')
 6 | # Reset it, returns the starting frame
 7 | frame = env.reset()
 8 | # Render
 9 | env.render()
10 | 
11 | is_done = False
12 | while not is_done:
13 |     # Perform a random action, returns the new frame, reward and whether the game is over
14 |     frame, reward, is_done, _ = env.step(env.action_space.sample())
15 |     # Render
16 |     env.render()
17 | 


--------------------------------------------------------------------------------
/Chapter13/tf_breakout.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Built by merging different Q examples available online
  4 | 
  5 | import gym
  6 | 
  7 | import os
  8 | import six
  9 | import numpy as np
 10 | import tensorflow as tf
 11 | import random
 12 | import itertools
 13 | from collections import deque, namedtuple
 14 | 
 15 | CHART_DIR = "charts"
 16 | if not os.path.exists(CHART_DIR):
 17 |     os.mkdir(CHART_DIR)
 18 | 
 19 | env_name = "Breakout-v4"
 20 | 
 21 | width = 80  # Resized frame width
 22 | height = 105  # Resized frame height
 23 | 
 24 | n_episodes = 12000  # Number of runs for the agent
 25 | state_length = 4  # Number of most frames we input to the network
 26 | 
 27 | gamma = 0.99  # Discount factor
 28 | 
 29 | exploration_steps = 1000000  # During all these steps, we progressively lower epsilon
 30 | initial_epsilon = 1.0  # Initial value of epsilon in epsilon-greedy
 31 | final_epsilon = 0.1  # Final value of epsilon in epsilon-greedy
 32 | 
 33 | replay_memory_init_size = 1000  # Number of steps to populate the replay memory before training starts
 34 | replay_memory_size = 400000  # Number of states we keep for training
 35 | batch_size = 32  # Batch size
 36 | network_update_interval = 10000  # The frequency with which the target network is updated
 37 | train_skips = 4  # The agent selects 4 actions between successive updates
 38 | 
 39 | learning_rate = 0.00025  # Learning rate used by RMSProp
 40 | momentum = 0.95  # momentum used by RMSProp
 41 | min_gradient = 0.01  # Constant added to the squared gradient in the denominator of the RMSProp update
 42 | 
 43 | network_path = 'saved_networks/' + env_name
 44 | tensorboard_path = 'summary/' + env_name
 45 | save_interval = 300000  # The frequency with which the network is saved
 46 | 
 47 | Transition = namedtuple("Transition", ["state", "action", "reward", "next_state", "done"])
 48 | 
 49 | def to_grayscale(img):
 50 |     return np.mean(img, axis=2).astype(np.uint8)
 51 | 
 52 | def downsample(img):
 53 |     return img[::2, ::2]
 54 | 
 55 | def preprocess(img):
 56 |     return to_grayscale(downsample(img))[None,:,:]
 57 | 
 58 | def adapt_state(state):
 59 |     return [np.float32(np.transpose(state, (2, 1, 0)) / 255.0)]
 60 | 
 61 | def adapt_batch_state(state):
 62 |     return np.transpose(np.array(state), (0, 3, 2, 1)) / 255.0
 63 | 
 64 | def get_initial_state(frame):
 65 |     processed_frame = preprocess(frame)
 66 |     state = [processed_frame for _ in range(state_length)]
 67 |     return np.concatenate(state)
 68 | 
 69 | class Estimator():
 70 |     """Q-Value Estimator neural network.
 71 |     This network is used for both the Q-Network and the Target Network.
 72 |     """
 73 | 
 74 |     def __init__(self, env, scope="estimator", summaries_dir=None):
 75 |         self.scope = scope
 76 |         self.num_actions = env.action_space.n
 77 |         self.epsilon = initial_epsilon
 78 |         self.epsilon_step = (initial_epsilon - final_epsilon) / exploration_steps
 79 | 
 80 |         # Writes Tensorboard summaries to disk
 81 |         self.summary_writer = None
 82 |         with tf.variable_scope(scope):
 83 |             # Build the graph
 84 |             self.build_model()
 85 |         if summaries_dir:
 86 |             summary_dir = os.path.join(summaries_dir, "summaries_%s" % scope)
 87 |             if not os.path.exists(summary_dir):
 88 |                 os.makedirs(summary_dir)
 89 |             self.summary_writer = tf.summary.FileWriter(summary_dir)
 90 | 
 91 |     def build_model(self):
 92 |         """
 93 |         Builds the Tensorflow graph.
 94 |         """
 95 |         self.X = tf.placeholder(shape=[None, width, height, state_length], dtype=tf.float32, name="X")
 96 |         # The TD target value
 97 |         self.y = tf.placeholder(shape=[None], dtype=tf.float32, name="y")
 98 |         # Integer id of which action was selected
 99 |         self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name="actions")
100 | 
101 |         model = tf.keras.Sequential(name=self.scope)
102 |         model.add(tf.keras.layers.Convolution2D(filters=32, kernel_size=8, strides=(4, 4), activation='relu', input_shape=(width, height, state_length), name="Layer1"))
103 |         model.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=4, strides=(2, 2), activation='relu', name="Layer2"))
104 |         model.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=3, strides=(1, 1), activation='relu', name="Layer3"))
105 |         model.add(tf.keras.layers.Flatten(name="Flatten"))
106 |         model.add(tf.keras.layers.Dense(512, activation='relu', name="Layer4"))
107 |         model.add(tf.keras.layers.Dense(self.num_actions, name="Output"))
108 | 
109 |         self.predictions = model(self.X)
110 | 
111 |         a_one_hot = tf.one_hot(self.actions, self.num_actions, 1.0, 0.0)
112 |         q_value = tf.reduce_sum(tf.multiply(self.predictions, a_one_hot), reduction_indices=1)
113 | 
114 |         # Calculate the loss
115 |         self.losses = tf.squared_difference(self.y, q_value)
116 |         self.loss = tf.reduce_mean(self.losses)
117 | 
118 |         # Optimizer Parameters from original paper
119 |         self.optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum, epsilon=min_gradient)
120 |         self.train_op = self.optimizer.minimize(self.loss, global_step=tf.train.get_global_step())
121 | 
122 |         # Summaries for Tensorboard
123 |         self.summaries = tf.summary.merge([
124 |             tf.summary.scalar("loss", self.loss),
125 |             tf.summary.histogram("loss_hist", self.losses),
126 |             tf.summary.histogram("q_values_hist", self.predictions),
127 |             tf.summary.scalar("max_q_value", tf.reduce_max(self.predictions))
128 |         ])
129 | 
130 | 
131 |     def predict(self, sess, s):
132 |         return sess.run(self.predictions, { self.X: s })
133 | 
134 |     def update(self, sess, s, a, y):
135 |         feed_dict = { self.X: s, self.y: y, self.actions: a }
136 |         summaries, global_step, _, loss = sess.run(
137 |             [self.summaries, tf.train.get_global_step(), self.train_op, self.loss],
138 |             feed_dict)
139 |         if self.summary_writer:
140 |             self.summary_writer.add_summary(summaries, global_step)
141 |         return loss
142 | 
143 |     def get_action(self, sess, state):
144 |         if self.epsilon >= random.random():
145 |             action = random.randrange(self.num_actions)
146 |         else:
147 |             action = np.argmax(self.predict(sess, adapt_state(state)))
148 | 
149 |         # Decay epsilon over time
150 |         if self.epsilon > final_epsilon:
151 |             self.epsilon -= self.epsilon_step
152 | 
153 |         return action
154 | 
155 |     def get_trained_action(self, state):
156 |         action = np.argmax(self.predict(sess, adapt_state(state)))
157 |         return action
158 | 
159 | def copy_model_parameters(estimator1, estimator2):
160 |     """
161 |     Copies the model parameters of one estimator to another.
162 |     Args:
163 |       estimator1: Estimator to copy the paramters from
164 |       estimator2: Estimator to copy the parameters to
165 |     """
166 |     e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)]
167 |     e1_params = sorted(e1_params, key=lambda v: v.name)
168 |     e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)]
169 |     e2_params = sorted(e2_params, key=lambda v: v.name)
170 | 
171 |     update_ops = []
172 |     for e1_v, e2_v in zip(e1_params, e2_params):
173 |         op = e2_v.assign(e1_v)
174 |         update_ops.append(op)
175 | 
176 |     return update_ops
177 | 
178 | def create_memory(env):
179 |     # Populate the replay memory with initial experience
180 |     replay_memory = []
181 | 
182 |     frame = env.reset()
183 |     state = get_initial_state(frame)
184 | 
185 |     for i in range(replay_memory_init_size):
186 |         action = np.random.choice(np.arange(env.action_space.n))
187 |         frame, reward, done, _ = env.step(action)
188 | 
189 |         next_state = np.append(state[1:, :, :], preprocess(frame), axis=0)
190 |         replay_memory.append(Transition(state, action, reward, next_state, done))
191 |         if done:
192 |             frame = env.reset()
193 |             state = get_initial_state(frame)
194 |         else:
195 |             state = next_state
196 | 
197 |     return replay_memory
198 | 
199 | 
200 | def setup_summary():
201 |     with tf.variable_scope("episode"):
202 |         episode_total_reward = tf.Variable(0., name="EpisodeTotalReward")
203 |         tf.summary.scalar('Total Reward', episode_total_reward)
204 |         episode_avg_max_q = tf.Variable(0., name="EpisodeAvgMaxQ")
205 |         tf.summary.scalar('Average Max Q', episode_avg_max_q)
206 |         episode_duration = tf.Variable(0., name="EpisodeDuration")
207 |         tf.summary.scalar('Duration', episode_duration)
208 |         episode_avg_loss = tf.Variable(0., name="EpisodeAverageLoss")
209 |         tf.summary.scalar('Average Loss', episode_avg_loss)
210 |         summary_vars = [episode_total_reward, episode_avg_max_q, episode_duration, episode_avg_loss]
211 |         summary_placeholders = [tf.placeholder(tf.float32) for _ in range(len(summary_vars))]
212 |         update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))]
213 |     summary_op = tf.summary.merge_all(scope="episode")
214 |     return summary_placeholders, update_ops, summary_op
215 | 
216 | 
217 | if __name__ == "__main__":
218 |     from tqdm import tqdm
219 | 
220 |     env = gym.make(env_name)
221 |     tf.reset_default_graph()
222 | 
223 |     # Create a glboal step variable
224 |     global_step = tf.Variable(0, name='global_step', trainable=False)
225 | 
226 |     # Create estimators
227 |     q_estimator = Estimator(env, scope="q", summaries_dir=tensorboard_path)
228 |     target_estimator = Estimator(env, scope="target_q")
229 | 
230 |     copy_model = copy_model_parameters(q_estimator, target_estimator)
231 | 
232 |     summary_placeholders, update_ops, summary_op = setup_summary()
233 | 
234 |     # The replay memory
235 |     replay_memory = create_memory(env)
236 | 
237 |     with tf.Session() as sess:
238 |         sess.run(tf.global_variables_initializer())
239 | 
240 |         q_estimator.summary_writer.add_graph(sess.graph)
241 | 
242 |         saver = tf.train.Saver()
243 |         # Load a previous checkpoint if we find one
244 |         latest_checkpoint = tf.train.latest_checkpoint(network_path)
245 |         if latest_checkpoint:
246 |             print("Loading model checkpoint %s...\n" % latest_checkpoint)
247 |             saver.restore(sess, latest_checkpoint)
248 | 
249 |         total_t = sess.run(tf.train.get_global_step())
250 | 
251 |         for episode in tqdm(range(n_episodes)):
252 |             if total_t % save_interval == 0:
253 |                 # Save the current checkpoint
254 |                 saver.save(tf.get_default_session(), network_path)
255 | 
256 |             frame = env.reset()
257 |             state = get_initial_state(frame)
258 | 
259 |             total_reward = 0
260 |             total_loss = 0
261 |             total_q_max = 0
262 | 
263 |             for duration in itertools.count():    
264 |                 # Maybe update the target estimator
265 |                 if total_t % network_update_interval == 0:
266 |                     sess.run(copy_model)
267 | 
268 |                 action = q_estimator.get_action(sess, state)
269 |                 frame, reward, terminal, _ = env.step(action)
270 | 
271 |                 processed_frame = preprocess(frame)
272 |                 next_state = np.append(state[1:, :, :], processed_frame, axis=0)
273 | 
274 |                 reward = np.clip(reward, -1, 1)
275 |                 replay_memory.append(Transition(state, action, reward, next_state, terminal))
276 |                 if len(replay_memory) > replay_memory_size:
277 |                     replay_memory.popleft()
278 | 
279 |                 samples = random.sample(replay_memory, batch_size)
280 |                 states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(np.array, zip(*samples))
281 | 
282 |                 # Calculate q values and targets (Double DQN)
283 |                 adapted_state = adapt_batch_state(next_states_batch)
284 | 
285 |                 q_values_next = q_estimator.predict(sess, adapted_state)
286 |                 best_actions = np.argmax(q_values_next, axis=1)
287 |                 q_values_next_target = target_estimator.predict(sess, adapted_state)
288 |                 targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * gamma * q_values_next_target[np.arange(batch_size), best_actions]
289 | 
290 |                 # Perform gradient descent update
291 |                 states_batch = adapt_batch_state(states_batch)
292 |                 loss = q_estimator.update(sess, states_batch, action_batch, targets_batch)
293 | 
294 |                 total_q_max += np.max(q_values_next)
295 |                 total_loss += loss
296 |                 total_t += 1
297 |                 total_reward += reward
298 |                 if terminal:
299 |                     break
300 | 
301 |             stats = [total_reward, total_q_max / duration, duration, total_loss / duration]
302 |             for i in range(len(stats)):
303 |                 sess.run(update_ops[i], feed_dict={
304 |                     summary_placeholders[i]: float(stats[i])
305 |                 })
306 |             summary_str = sess.run(summary_op, )
307 |             q_estimator.summary_writer.add_summary(summary_str, episode)
308 |                 
309 |             env.env.ale.saveScreenPNG(six.b('%s/test_image_%05i.png' % (CHART_DIR, episode)))
310 |             
311 |         # Save the last checkpoint
312 |         saver.save(tf.get_default_session(), network_path)
313 | 


--------------------------------------------------------------------------------
/Chapter14/README.rst:
--------------------------------------------------------------------------------
 1 | ==========
 2 | Chapter 12
 3 | ==========
 4 | 
 5 | Support code for *Chapter 12: Big(ger) Data* 
 6 | 
 7 | Data
 8 | ----
 9 | 
10 | This chapter relies only on the image dataset that is packaged with the
11 | repository at ``../SimpleImageDataset/``.
12 | 
13 | Scripts
14 | -------
15 | 
16 | chapter.py
17 |     Code as written in the book
18 | jugfile.py
19 |     Example jugfile
20 | image-classification.py
21 |     Jugfile implementation of image classification from Chapter 10
22 | 
23 | setup-aws.txt
24 |     Commands to setup Amazon WebServices machine
25 | run-jugfile.sh
26 |     Wrapper script to run jug file on jugfile.py
27 | run-image-classification.sh
28 |     Wrapper script to run jug file on image-classification.py
29 | 


--------------------------------------------------------------------------------
/Chapter14/chapter.py:
--------------------------------------------------------------------------------
 1 | from jug import TaskGenerator
 2 | from glob import glob
 3 | import mahotas as mh
 4 | @TaskGenerator
 5 | def compute_texture(im):   
 6 |     from features import texture
 7 |     imc = mh.imread(im)
 8 |     return texture(mh.colors.rgb2gray(imc))
 9 | 
10 | @TaskGenerator
11 | def chist_file(fname):
12 |     from features import chist
13 |     im = mh.imread(fname)
14 |     return chist(im)
15 | 
16 | import numpy as np
17 | to_array = TaskGenerator(np.array)
18 | hstack = TaskGenerator(np.hstack)
19 | 
20 | haralicks = []
21 | chists = []
22 | labels = []
23 | 
24 | # Change this variable to point to
25 | # the location of the dataset is on disk
26 | basedir = '../SimpleImageDataset/'
27 | # Use glob to get all the images
28 | images = glob('{}/*.jpg'.format(basedir))
29 | 
30 | for fname in sorted(images):
31 |     haralicks.append(compute_texture(fname))
32 |     chists.append(chist_file(fname))
33 |     # The class is encoded in the filename as xxxx00.jpg
34 |     labels.append(fname[:-len('00.jpg')])
35 | 
36 | haralicks = to_array(haralicks)
37 | chists = to_array(chists)
38 | labels = to_array(labels)
39 | 
40 | @TaskGenerator
41 | def accuracy(features, labels):
42 |     from sklearn.linear_model import LogisticRegression
43 |     from sklearn.pipeline import Pipeline
44 |     from sklearn.preprocessing import StandardScaler
45 |     from sklearn import cross_validation
46 |     
47 |     clf = Pipeline([('preproc', StandardScaler()),
48 |                 ('classifier', LogisticRegression())])
49 |     cv = cross_validation.LeaveOneOut(len(features))
50 |     scores = cross_validation.cross_val_score(
51 |         clf, features, labels, cv=cv)
52 |     return scores.mean()
53 | scores_base = accuracy(haralicks, labels)
54 | scores_chist = accuracy(chists, labels)
55 | 
56 | combined = hstack([chists, haralicks])
57 | scores_combined  = accuracy(combined, labels)
58 | 
59 | @TaskGenerator
60 | def print_results(scores):
61 |     with open('results.image.txt', 'w') as output:
62 |         for k,v in scores:
63 |             output.write('Accuracy [{}]: {:.1%}\n'.format(
64 |                 k, v.mean()))
65 | 
66 | print_results([
67 |         ('base', scores_base),
68 |         ('chists', scores_chist),
69 |         ('combined' , scores_combined),
70 |         ])
71 | 
72 | @TaskGenerator
73 | def compute_lbp(fname):
74 |     from mahotas.features import lbp
75 |     imc = mh.imread(fname)
76 |     im = mh.colors.rgb2grey(imc)
77 |     return lbp(im, radius=8, points=6)
78 | 
79 | lbps = []
80 | for fname in sorted(images):
81 |     # the rest of the loop as before
82 |     lbps.append(compute_lbp(fname))
83 | lbps = to_array(lbps)
84 | 
85 | scores_lbps = accuracy(lbps, labels)
86 | combined_all = hstack([chists, haralicks, lbps])
87 | scores_combined_all = accuracy(combined_all, labels)
88 | 
89 | print_results([
90 |         ('base', scores_base),
91 |         ('chists', scores_chist),
92 |         ('lbps', scores_lbps),
93 |         ('combined' , scores_combined),
94 |         ('combined_all' , scores_combined_all),
95 |         ])
96 | 


--------------------------------------------------------------------------------
/Chapter14/features.py:
--------------------------------------------------------------------------------
 1 | # This code is supporting material for the book
 2 | # Building Machine Learning Systems with Python
 3 | # by Willi Richert and Luis Pedro Coelho
 4 | # published by PACKT Publishing
 5 | #
 6 | # It is made available under the MIT License
 7 | 
 8 | import numpy as np
 9 | import mahotas as mh
10 | 
11 | 
12 | def edginess_sobel(image):
13 |     '''Measure the "edginess" of an image
14 | 
15 |     image should be a 2d numpy array (an image)
16 | 
17 |     Returns a floating point value which is higher the "edgier" the image is.
18 | 
19 |     '''
20 |     edges = mh.sobel(image, just_filter=True)
21 |     edges = edges.ravel()
22 |     return np.sqrt(np.dot(edges, edges))
23 | 
24 | def texture(im):
25 |     '''Compute features for an image
26 | 
27 |     Parameters
28 |     ----------
29 |     im : ndarray
30 | 
31 |     Returns
32 |     -------
33 |     fs : ndarray
34 |         1-D array of features
35 |     '''
36 |     im = im.astype(np.uint8)
37 |     return mh.features.haralick(im).ravel()
38 | 
39 | 
40 | def color_histogram(im):
41 |     '''Compute color histogram of input image
42 | 
43 |     Parameters
44 |     ----------
45 |     im : ndarray
46 |         should be an RGB image
47 | 
48 |     Returns
49 |     -------
50 |     c : ndarray
51 |         1-D array of histogram values
52 |     '''
53 | 
54 |     # Downsample pixel values:
55 |     im = im // 64
56 | 
57 |     # We can also implement the following by using np.histogramdd
58 |     # im = im.reshape((-1,3))
59 |     # bins = [np.arange(5), np.arange(5), np.arange(5)]
60 |     # hist = np.histogramdd(im, bins=bins)[0]
61 |     # hist = hist.ravel()
62 | 
63 |     # Separate RGB channels:
64 |     r,g,b = im.transpose((2,0,1))
65 | 
66 |     pixels = 1 * r + 4 * g + 16 * b
67 |     hist = np.bincount(pixels.ravel(), minlength=64)
68 |     hist = hist.astype(float)
69 |     return np.log1p(hist)
70 | 
71 | 


--------------------------------------------------------------------------------
/Chapter14/image-classification.py:
--------------------------------------------------------------------------------
  1 | # This code is supporting material for the book
  2 | # Building Machine Learning Systems with Python
  3 | # by Willi Richert and Luis Pedro Coelho
  4 | # published by PACKT Publishing
  5 | #
  6 | # It is made available under the MIT License
  7 | 
  8 | import mahotas as mh
  9 | import numpy as np
 10 | from glob import glob
 11 | from jug import TaskGenerator
 12 | 
 13 | # We need to use the `features` module from chapter 10.
 14 | from sys import path
 15 | path.append('../ch10')
 16 | 
 17 | 
 18 | # This is the jug-enabled version of the script ``figure18.py`` in Chapter 10
 19 | 
 20 | basedir = '../SimpleImageDataset/'
 21 | 
 22 | @TaskGenerator
 23 | def compute_texture(im):
 24 |     '''Compute features for an image
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     im : str
 29 |         filepath for image to process
 30 | 
 31 |     Returns
 32 |     -------
 33 |     fs : ndarray
 34 |         1-D array of features
 35 |     '''
 36 |     from features import texture
 37 |     imc = mh.imread(im)
 38 |     return texture(mh.colors.rgb2grey(imc))
 39 | 
 40 | @TaskGenerator
 41 | def chist(fname):
 42 |     from features import color_histogram
 43 |     im = mh.imread(fname)
 44 |     return color_histogram(im)
 45 | 
 46 | @TaskGenerator
 47 | def compute_lbp(fname):
 48 |     from mahotas.features import lbp
 49 |     imc = mh.imread(fname)
 50 |     im = mh.colors.rgb2grey(imc)
 51 |     return lbp(im, radius=8, points=6)
 52 | 
 53 | 
 54 | @TaskGenerator
 55 | def accuracy(features, labels):
 56 |     from sklearn.linear_model import LogisticRegression
 57 |     from sklearn.pipeline import Pipeline
 58 |     from sklearn.preprocessing import StandardScaler
 59 |     from sklearn import cross_validation
 60 |     # We use logistic regression because it is very fast.
 61 |     # Feel free to experiment with other classifiers
 62 |     clf = Pipeline([('preproc', StandardScaler()),
 63 |                 ('classifier', LogisticRegression())])
 64 |     cv = cross_validation.LeaveOneOut(len(features))
 65 |     scores = cross_validation.cross_val_score(
 66 |         clf, features, labels, cv=cv)
 67 |     return scores.mean()
 68 | 
 69 | 
 70 | @TaskGenerator
 71 | def print_results(scores):
 72 |     with open('results.image.txt', 'w') as output:
 73 |         for k,v in scores:
 74 |             output.write('Accuracy (LOO x-val) with Logistic Regression [{0}]: {1:.1%}\n'.format(
 75 |                 k, v.mean()))
 76 | 
 77 | 
 78 | to_array = TaskGenerator(np.array)
 79 | hstack = TaskGenerator(np.hstack)
 80 | 
 81 | haralicks = []
 82 | chists = []
 83 | lbps = []
 84 | labels = []
 85 | 
 86 | # Use glob to get all the images
 87 | images = glob('{0}/*.jpg'.format(basedir))
 88 | for fname in sorted(images):
 89 |     haralicks.append(compute_texture(fname))
 90 |     chists.append(chist(fname))
 91 |     lbps.append(compute_lbp(fname))
 92 |     labels.append(fname[:-len('00.jpg')]) # The class is encoded in the filename as xxxx00.jpg
 93 | 
 94 | haralicks = to_array(haralicks)
 95 | chists = to_array(chists)
 96 | lbps = to_array(lbps)
 97 | labels = to_array(labels)
 98 | 
 99 | scores_base = accuracy(haralicks, labels)
100 | scores_chist = accuracy(chists, labels)
101 | scores_lbps = accuracy(lbps, labels)
102 | 
103 | combined = hstack([chists, haralicks])
104 | scores_combined = accuracy(combined, labels)
105 | 
106 | combined_all = hstack([chists, haralicks, lbps])
107 | scores_combined_all = accuracy(combined_all, labels)
108 | 
109 | print_results([
110 |         ('base', scores_base),
111 |         ('chists', scores_chist),
112 |         ('lbps', scores_lbps),
113 |         ('combined' , scores_combined),
114 |         ('combined_all' , scores_combined_all),
115 |         ])
116 | 
117 | 


--------------------------------------------------------------------------------
/Chapter14/jugfile.py:
--------------------------------------------------------------------------------
 1 | # This code is supporting material for the book
 2 | # Building Machine Learning Systems with Python
 3 | # by Willi Richert and Luis Pedro Coelho
 4 | # published by PACKT Publishing
 5 | #
 6 | # It is made available under the MIT License
 7 | 
 8 | from jug import TaskGenerator
 9 | from time import sleep
10 | 
11 | 
12 | @TaskGenerator
13 | def double(x):
14 |     sleep(4)
15 |     return 2 * x
16 | 
17 | 
18 | @TaskGenerator
19 | def add(a, b):
20 |     return a + b
21 | 
22 | 
23 | @TaskGenerator
24 | def print_final_result(oname, value):
25 |     with open(oname, 'w') as output:
26 |         output.write("Final result: {0}\n".format(value))
27 | 
28 | input = 2
29 | y = double(input)
30 | z = double(y)
31 | 
32 | y2 = double(7)
33 | z2 = double(y2)
34 | print_final_result('output.txt', add(z, z2))
35 | 


--------------------------------------------------------------------------------
/Chapter14/run-image-classification.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | jug execute image-classification.py
4 | 


--------------------------------------------------------------------------------
/Chapter14/run-jugfile.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | jug execute
4 | 
5 | 


--------------------------------------------------------------------------------
/Chapter14/setup-aws.txt:
--------------------------------------------------------------------------------
1 | sudo yum update
2 | sudo yum -y install python-devel python-pip numpy scipy python-matplotlib
3 | sudo yum -y install gcc-c++
4 | sudo yum -y install git
5 | sudo pip-python install -U pip
6 | sudo pip install scikit-learn jug mahotas
7 | 
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Packt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Building Machine Learning Systems with Python - Third edition
 2 | 
 3 | <a href="https://www.packtpub.com/big-data-and-business-intelligence/building-machine-learning-systems-python-third-edition?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781788623223"><img src="https://d255esdrn735hr.cloudfront.net/sites/default/files/imagecache/ppv4_main_book_cover/B09124_MockupCoverNew.png" alt="Building Machine Learning Systems with Python - Third edition" height="256px" align="right"></a>
 4 | 
 5 | This is the code repository for [Building Machine Learning Systems with Python - Third edition](https://www.packtpub.com/big-data-and-business-intelligence/building-machine-learning-systems-python-third-edition?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781788623223), published by Packt.
 6 | 
 7 | **Explore machine learning and deep learning techniques for building intelligent systems using scikit-learn and TensorFlow**
 8 | 
 9 | ## What is this book about?
10 | Machine learning allows systems to learn without being explicitly programmed. Python is one of the most popular languages used to develop machine learning applications which take advantage of its extensive library support. This third edition of Building Machine Learning Systems with Python addresses recent developments in the field, by covering the most used datasets and libraries to help you build practical machine learning systems.
11 | 
12 | This book covers the following exciting features:
13 | * Build a classification system that can be applied to text, image, and sound
14 | * Employ Amazon Web Services (AWS) to run analysis on the cloud
15 | * Solve problems related to regression using TensorFlow
16 | * Recommend products to users based on their past purchases
17 | * Explore the steps required to add collaborative filtering using TensorFlow
18 | 
19 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/1788623223) today!
20 | 
21 | <a href="https://www.packtpub.com/?utm_source=github&utm_medium=banner&utm_campaign=GitHubBanner"><img src="https://raw.githubusercontent.com/PacktPublishing/GitHub/master/GitHub.png" 
22 | alt="https://www.packtpub.com/" border="5" /></a>
23 | 
24 | ## Instructions and Navigations
25 | All of the code is organized into folders. For example, Chapter01.
26 | 
27 | The code will look like the following:
28 | ```
29 |  def fetch_posts(fn):
30 |      for line in open(fn, "r"):
31 |          post_id, text = line.split("\t")
32 |          yield int(post_id), text.strip()
33 |  
34 | ```
35 | 
36 | **Following is what you need for this book:**
37 | Building Machine Learning Systems with Python is for data scientists, machine learning developers, and Python developers who want to learn how to build increasingly complex machine learning systems. You will use Python's machine learning capabilities to develop effective solutions. Prior knowledge of Python programming is expected.
38 | 
39 | With the following software and hardware list you can run all code files present in the book (Chapter 1-14).
40 | 
41 | ### Software and Hardware List
42 | 
43 | | Chapter  | Software required                                     | OS required                        |
44 | | -------- | ------------------------------------------------------| -----------------------------------|
45 | | 1-14     | Python 3, NumPy, SciPy, scikit-learn (latest version) | Ubuntu/Linux, macOS or Windows     |
46 | 
47 | 
48 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it](https://www.packtpub.com/sites/default/files/downloads/BuildingMachineLearningSystemswithPythonThirdedition_ColorImages.pdf).
49 | 
50 | ### Related products <Paste books from the Other books you may enjoy section>
51 | * Mastering Machine Learning Algorithms [[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/mastering-machine-learning-algorithms?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781788621113) [[Amazon]](https://www.amazon.com/dp/1788621115)
52 | 
53 | * Machine Learning Solutions [[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/machine-learning-solutions?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781788390040) [[Amazon]](https://www.amazon.com/dp/1788390040)
54 | 
55 | ## Get to Know the Authors
56 | **Luis Pedro Coelho**
57 | is a computational biologist who analyzes DNA from microbial communities to characterize their behavior. He has also worked extensively in bioimage informatics―the application of machine learning techniques for the analysis of images of biological specimens. His main focus is on the processing and integration of large-scale datasets. He has a PhD from Carnegie Mellon University and has authored several scientific publications. In 2004, he began developing in Python and has contributed to several open source libraries. He is currently a faculty member at Fudan University in Shanghai.
58 | 
59 | **Willi Richert**
60 | has a PhD in machine learning/robotics, where he has used reinforcement learning, hidden Markov models, and Bayesian networks to let heterogeneous robots learn by imitation. Now at Microsoft, he is involved in various machine learning areas, such as deep learning, active learning, or statistical machine translation. Willi started as a child with BASIC on his Commodore 128. Later, he discovered Turbo Pascal, then Java, then C++—only to finally arrive at his true love: Python.
61 | 
62 | **Matthieu Brucher** 
63 | is a computer scientist who specializes in high-performance computing and computational modeling and currently works for JPMorgan in their quantitative research branch. He is also the lead developer of Audio ToolKit, a library for real-time audio signal processing. He has a PhD in machine learning and signals processing from the University of Strasbourg, two Master of Science degrees—one in digital electronics and signal processing and another in automation – from the University of Paris XI and Supelec, as well as a Master of Music degree from Bath Spa University.
64 | 
65 | ## Other books by the authors
66 | * [Building Machine Learning Systems with Python](https://www.packtpub.com/big-data-and-business-intelligence/building-machine-learning-systems-python?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781782161400)
67 | * [Building Machine Learning Systems with Python - Second Edition](https://www.packtpub.com/big-data-and-business-intelligence/building-machine-learning-systems-python-second-edition?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781784392772)
68 | 
69 | 
70 | ### Suggestions and Feedback
71 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSdy7dATC6QmEL81FIUuymZ0Wy9vH1jHkvpY57OiMeKGqib_Ow/viewform) if you have any feedback or suggestions.
72 | 
73 | 


--------------------------------------------------------------------------------
/SimpleImageDataset/building00.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building00.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building01.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building02.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building03.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building04.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building05.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building06.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building07.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building08.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building09.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building10.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building11.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building12.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building13.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building14.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building15.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building16.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building17.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building18.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building19.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building20.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building21.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building22.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building22.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building23.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building23.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building24.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building24.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building25.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building26.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building26.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building27.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building27.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building28.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building28.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/building29.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building29.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene00.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene00.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene01.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene02.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene03.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene04.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene05.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene06.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene07.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene08.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene09.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene10.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene11.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene12.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene13.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene14.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene15.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene16.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene17.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene18.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene19.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene20.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene21.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene22.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene22.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene23.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene23.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene24.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene24.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene25.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene26.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene26.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene27.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene27.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene28.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene28.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/scene29.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene29.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text00.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text00.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text01.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text02.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text03.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text04.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text05.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text06.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text07.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text08.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text09.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text10.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text11.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text12.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text13.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text14.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text15.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text16.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text17.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text18.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text19.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text20.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text21.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text22.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text22.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text23.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text23.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text24.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text24.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text25.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text26.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text26.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text27.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text27.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text28.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text28.jpg


--------------------------------------------------------------------------------
/SimpleImageDataset/text29.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text29.jpg


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: BMLS3
  2 | channels:
  3 | - conda-forge
  4 | - defaults
  5 | dependencies:
  6 | - bottle=0.12.9=py36_0
  7 | - jug=1.6.4=py_0
  8 | - pyyaml=3.12=py36_1
  9 | - yaml=0.1.6=0
 10 | - bleach=1.5.0=py36_0
 11 | - cairo=1.14.8=0
 12 | - certifi=2016.2.28=py36_0
 13 | - cycler=0.10.0=py36_0
 14 | - dbus=1.10.20=0
 15 | - decorator=4.1.2=py36_0
 16 | - entrypoints=0.2.3=py36_0
 17 | - expat=2.1.0=0
 18 | - fontconfig=2.12.1=3
 19 | - freetype=2.5.5=2
 20 | - glib=2.50.2=1
 21 | - graphviz=2.38.0=5
 22 | - gst-plugins-base=1.8.0=0
 23 | - gstreamer=1.8.0=0
 24 | - harfbuzz=0.9.39=2
 25 | - html5lib=0.9999999=py36_0
 26 | - icu=54.1=0
 27 | - ipykernel=4.6.1=py36_0
 28 | - ipython=6.1.0=py36_0
 29 | - ipython_genutils=0.2.0=py36_0
 30 | - ipywidgets=6.0.0=py36_0
 31 | - jbig=2.1=0
 32 | - jedi=0.10.2=py36_2
 33 | - jinja2=2.9.6=py36_0
 34 | - jpeg=9b=0
 35 | - jsonschema=2.6.0=py36_0
 36 | - jupyter=1.0.0=py36_3
 37 | - jupyter_client=5.1.0=py36_0
 38 | - jupyter_console=5.2.0=py36_0
 39 | - jupyter_core=4.3.0=py36_0
 40 | - libffi=3.2.1=1
 41 | - libgcc=5.2.0=0
 42 | - libgfortran=3.0.0=1
 43 | - libiconv=1.14=0
 44 | - libpng=1.6.30=1
 45 | - libsodium=1.0.10=0
 46 | - libtiff=4.0.6=3
 47 | - libtool=2.4.2=0
 48 | - libxcb=1.12=1
 49 | - libxml2=2.9.4=0
 50 | - markupsafe=1.0=py36_0
 51 | - matplotlib=2.0.2=np113py36_0
 52 | - mistune=0.7.4=py36_0
 53 | - mkl=2017.0.3=0
 54 | - nbconvert=5.2.1=py36_0
 55 | - nbformat=4.4.0=py36_0
 56 | - notebook=5.0.0=py36_0
 57 | - numpy=1.13.1=py36_0
 58 | - openssl=1.0.2l=0
 59 | - pandocfilters=1.4.2=py36_0
 60 | - pango=1.40.3=1
 61 | - path.py=10.3.1=py36_0
 62 | - pcre=8.39=1
 63 | - pexpect=4.2.1=py36_0
 64 | - pickleshare=0.7.4=py36_0
 65 | - pip=9.0.1=py36_1
 66 | - pixman=0.34.0=0
 67 | - prompt_toolkit=1.0.15=py36_0
 68 | - ptyprocess=0.5.2=py36_0
 69 | - pygments=2.2.0=py36_0
 70 | - pyparsing=2.2.0=py36_0
 71 | - pyqt=5.6.0=py36_2
 72 | - python=3.6.2=0
 73 | - python-dateutil=2.6.1=py36_0
 74 | - python-graphviz=0.5.2=py36_0
 75 | - pytz=2017.2=py36_0
 76 | - pyzmq=16.0.2=py36_0
 77 | - qt=5.6.2=5
 78 | - qtconsole=4.3.1=py36_0
 79 | - readline=6.2=2
 80 | - scikit-learn=0.19.0=np113py36_0
 81 | - scipy=0.19.1=np113py36_0
 82 | - setuptools=36.4.0=py36_1
 83 | - simplegeneric=0.8.1=py36_1
 84 | - sip=4.18=py36_0
 85 | - six=1.10.0=py36_0
 86 | - sqlite=3.13.0=0
 87 | - terminado=0.6=py36_0
 88 | - testpath=0.3.1=py36_0
 89 | - tk=8.5.18=0
 90 | - tornado=4.5.2=py36_0
 91 | - traitlets=4.3.2=py36_0
 92 | - wcwidth=0.1.7=py36_0
 93 | - wheel=0.29.0=py36_0
 94 | - widgetsnbextension=3.0.2=py36_0
 95 | - xz=5.2.3=0
 96 | - zeromq=4.1.5=0
 97 | - zlib=1.2.11=0
 98 | - pip:
 99 |   - ipython-genutils==0.2.0
100 |   - jupyter-client==5.1.0
101 |   - jupyter-console==5.2.0
102 |   - jupyter-core==4.3.0
103 |   - prompt-toolkit==1.0.15
104 | prefix: /home/luispedro/.conda/envs/BMLS3
105 | 
106 | 


--------------------------------------------------------------------------------