├── Chapter01
├── chapter_01.ipynb
└── data
│ └── web_traffic.tsv
├── Chapter02
├── README.rst
├── chapter_02.ipynb
├── data
│ └── seeds.tsv
├── load.py
└── tests
│ └── test_load.py
├── Chapter03
├── chapter_03.ipynb
└── data
│ ├── .gitignore
│ └── download.sh
├── Chapter04
├── chapter_04.ipynb
└── data
│ └── download.sh
├── Chapter05
└── chapter_05.ipynb
├── Chapter06
├── chapter_06.ipynb
└── data
│ └── toy
│ ├── 01.txt
│ ├── 02.txt
│ ├── 03.txt
│ ├── 04.txt
│ └── 05.txt
├── Chapter07
├── README.rst
├── Recommendations.ipynb
├── apriori
│ ├── .gitignore
│ ├── apriori.py
│ ├── apriori_example.py
│ ├── apriori_naive.py
│ └── download.sh
├── data
│ ├── .gitignore
│ └── download.sh
├── load_ml100k.py
└── stacked.py
├── Chapter08
└── chapter_08.ipynb
├── Chapter09
├── chapter_09.ipynb
├── data
│ ├── corpus.csv
│ ├── missing.tsv
│ └── not_authorized.tsv
└── twitterauth.py
├── Chapter10
├── README.rst
├── Topic modeling.ipynb
├── data
│ ├── .gitignore
│ ├── download_ap.sh
│ ├── download_wp.sh
│ └── preprocess-wikidata.sh
├── wikitopics_create.py
└── wikitopics_create_hdp.py
├── Chapter11
└── chapter_11.ipynb
├── Chapter12
├── Computer Vision.ipynb
├── README.rst
├── ch12_3rd
│ └── chapter_12.ipynb
├── download.sh
├── forest.jpeg
└── scene00.jpg
├── Chapter13
├── chapter_13.ipynb
├── simple_breakout.py
└── tf_breakout.py
├── Chapter14
├── README.rst
├── chapter.py
├── features.py
├── image-classification.py
├── jugfile.py
├── run-image-classification.sh
├── run-jugfile.sh
└── setup-aws.txt
├── LICENSE
├── README.md
├── SimpleImageDataset
├── building00.jpg
├── building01.jpg
├── building02.jpg
├── building03.jpg
├── building04.jpg
├── building05.jpg
├── building06.jpg
├── building07.jpg
├── building08.jpg
├── building09.jpg
├── building10.jpg
├── building11.jpg
├── building12.jpg
├── building13.jpg
├── building14.jpg
├── building15.jpg
├── building16.jpg
├── building17.jpg
├── building18.jpg
├── building19.jpg
├── building20.jpg
├── building21.jpg
├── building22.jpg
├── building23.jpg
├── building24.jpg
├── building25.jpg
├── building26.jpg
├── building27.jpg
├── building28.jpg
├── building29.jpg
├── scene00.jpg
├── scene01.jpg
├── scene02.jpg
├── scene03.jpg
├── scene04.jpg
├── scene05.jpg
├── scene06.jpg
├── scene07.jpg
├── scene08.jpg
├── scene09.jpg
├── scene10.jpg
├── scene11.jpg
├── scene12.jpg
├── scene13.jpg
├── scene14.jpg
├── scene15.jpg
├── scene16.jpg
├── scene17.jpg
├── scene18.jpg
├── scene19.jpg
├── scene20.jpg
├── scene21.jpg
├── scene22.jpg
├── scene23.jpg
├── scene24.jpg
├── scene25.jpg
├── scene26.jpg
├── scene27.jpg
├── scene28.jpg
├── scene29.jpg
├── text00.jpg
├── text01.jpg
├── text02.jpg
├── text03.jpg
├── text04.jpg
├── text05.jpg
├── text06.jpg
├── text07.jpg
├── text08.jpg
├── text09.jpg
├── text10.jpg
├── text11.jpg
├── text12.jpg
├── text13.jpg
├── text14.jpg
├── text15.jpg
├── text16.jpg
├── text17.jpg
├── text18.jpg
├── text19.jpg
├── text20.jpg
├── text21.jpg
├── text22.jpg
├── text23.jpg
├── text24.jpg
├── text25.jpg
├── text26.jpg
├── text27.jpg
├── text28.jpg
└── text29.jpg
└── environment.yml
/Chapter01/data/web_traffic.tsv:
--------------------------------------------------------------------------------
1 | 1.000000 2273.331055
2 | 2.000000 1657.255493
3 | 3.000000 nan
4 | 4.000000 1366.846436
5 | 5.000000 1489.234375
6 | 6.000000 1338.020020
7 | 7.000000 1884.647339
8 | 8.000000 2284.754150
9 | 9.000000 1335.810913
10 | 10.000000 1025.832397
11 | 11.000000 1140.241089
12 | 12.000000 1478.341797
13 | 13.000000 1204.218384
14 | 14.000000 1312.506348
15 | 15.000000 1300.218872
16 | 16.000000 1495.334717
17 | 17.000000 1161.070801
18 | 18.000000 1366.701904
19 | 19.000000 1273.441162
20 | 20.000000 1246.935425
21 | 21.000000 1072.582886
22 | 22.000000 1877.628296
23 | 23.000000 1403.939697
24 | 24.000000 nan
25 | 25.000000 926.635559
26 | 26.000000 1534.334595
27 | 27.000000 2105.287109
28 | 28.000000 2114.336182
29 | 29.000000 1994.525146
30 | 30.000000 1046.091919
31 | 31.000000 2091.849854
32 | 32.000000 2227.968018
33 | 33.000000 1414.702515
34 | 34.000000 1719.032471
35 | 35.000000 1722.046875
36 | 36.000000 1293.547974
37 | 37.000000 1840.270752
38 | 38.000000 2542.300781
39 | 39.000000 1609.983643
40 | 40.000000 2456.552246
41 | 41.000000 1929.899170
42 | 42.000000 1767.186646
43 | 43.000000 1204.809082
44 | 44.000000 1762.485840
45 | 45.000000 1724.805054
46 | 46.000000 2161.871338
47 | 47.000000 809.148987
48 | 48.000000 1323.292603
49 | 49.000000 nan
50 | 50.000000 1810.368774
51 | 51.000000 1934.512695
52 | 52.000000 1352.385010
53 | 53.000000 2014.328369
54 | 54.000000 1208.587036
55 | 55.000000 2171.869629
56 | 56.000000 1701.173584
57 | 57.000000 1900.717651
58 | 58.000000 1758.676025
59 | 59.000000 1477.506836
60 | 60.000000 1922.072266
61 | 61.000000 1972.815430
62 | 62.000000 1811.206665
63 | 63.000000 1367.138306
64 | 64.000000 1775.942993
65 | 65.000000 1689.120850
66 | 66.000000 1707.929565
67 | 67.000000 1354.767578
68 | 68.000000 1318.591553
69 | 69.000000 1513.624146
70 | 70.000000 2430.133789
71 | 71.000000 1788.733276
72 | 72.000000 1381.874512
73 | 73.000000 1358.690796
74 | 74.000000 991.249329
75 | 75.000000 1586.527954
76 | 76.000000 2058.635498
77 | 77.000000 1692.005859
78 | 78.000000 1459.202759
79 | 79.000000 1202.182495
80 | 80.000000 1950.823730
81 | 81.000000 1494.491699
82 | 82.000000 1654.861328
83 | 83.000000 1218.084351
84 | 84.000000 1457.957764
85 | 85.000000 1179.684082
86 | 86.000000 1484.483154
87 | 87.000000 2731.174561
88 | 88.000000 1414.573853
89 | 89.000000 1061.369995
90 | 90.000000 1573.748169
91 | 91.000000 1260.964722
92 | 92.000000 1215.403687
93 | 93.000000 981.535828
94 | 94.000000 1345.459351
95 | 95.000000 2158.874512
96 | 96.000000 nan
97 | 97.000000 730.229004
98 | 98.000000 1033.958618
99 | 99.000000 1627.994995
100 | 100.000000 1155.129639
101 | 101.000000 1305.006836
102 | 102.000000 1444.623901
103 | 103.000000 2242.751709
104 | 104.000000 1843.219116
105 | 105.000000 1211.218140
106 | 106.000000 1384.472168
107 | 107.000000 1313.780762
108 | 108.000000 1509.269897
109 | 109.000000 1796.398926
110 | 110.000000 1265.616333
111 | 111.000000 1089.800781
112 | 112.000000 2159.838135
113 | 113.000000 1166.384277
114 | 114.000000 1391.697388
115 | 115.000000 1445.436523
116 | 116.000000 1196.357056
117 | 117.000000 1049.317017
118 | 118.000000 1999.745605
119 | 119.000000 473.342102
120 | 120.000000 1285.387329
121 | 121.000000 1737.291260
122 | 122.000000 1534.551758
123 | 123.000000 2636.690674
124 | 124.000000 1372.776123
125 | 125.000000 1325.509033
126 | 126.000000 833.302063
127 | 127.000000 1199.291992
128 | 128.000000 2431.282959
129 | 129.000000 1739.882080
130 | 130.000000 2121.373779
131 | 131.000000 1726.600342
132 | 132.000000 1343.868774
133 | 133.000000 1072.934570
134 | 134.000000 1387.351807
135 | 135.000000 1054.316284
136 | 136.000000 1051.666626
137 | 137.000000 1270.661377
138 | 138.000000 1857.948853
139 | 139.000000 1436.369629
140 | 140.000000 2016.855469
141 | 141.000000 1352.831787
142 | 142.000000 909.600891
143 | 143.000000 1761.136353
144 | 144.000000 1009.373230
145 | 145.000000 2035.223267
146 | 146.000000 1534.073975
147 | 147.000000 1708.339966
148 | 148.000000 734.669800
149 | 149.000000 1456.019043
150 | 150.000000 1332.946411
151 | 151.000000 1605.986450
152 | 152.000000 1065.177856
153 | 153.000000 1291.167480
154 | 154.000000 1370.269043
155 | 155.000000 nan
156 | 156.000000 1928.732788
157 | 157.000000 2249.301270
158 | 158.000000 988.290894
159 | 159.000000 1024.199097
160 | 160.000000 875.135132
161 | 161.000000 1568.285400
162 | 162.000000 1031.664551
163 | 163.000000 1079.630859
164 | 164.000000 1086.948853
165 | 165.000000 1152.780884
166 | 166.000000 961.387634
167 | 167.000000 1232.227417
168 | 168.000000 2189.118408
169 | 169.000000 1181.132080
170 | 170.000000 1477.397705
171 | 171.000000 1613.063110
172 | 172.000000 922.071716
173 | 173.000000 2432.531006
174 | 174.000000 1651.096313
175 | 175.000000 1078.927734
176 | 176.000000 825.445740
177 | 177.000000 1579.604736
178 | 178.000000 1873.424316
179 | 179.000000 1671.580200
180 | 180.000000 2454.900146
181 | 181.000000 nan
182 | 182.000000 nan
183 | 183.000000 1620.557739
184 | 184.000000 896.071289
185 | 185.000000 1950.104126
186 | 186.000000 2299.738281
187 | 187.000000 2165.413818
188 | 188.000000 1108.689819
189 | 189.000000 1732.473877
190 | 190.000000 1602.138550
191 | 191.000000 1685.260254
192 | 192.000000 2026.701294
193 | 193.000000 1690.662964
194 | 194.000000 1737.694214
195 | 195.000000 1475.258423
196 | 196.000000 1770.715698
197 | 197.000000 1349.187500
198 | 198.000000 1571.474609
199 | 199.000000 1862.707397
200 | 200.000000 1459.782349
201 | 201.000000 2284.336426
202 | 202.000000 1553.837158
203 | 203.000000 2323.653320
204 | 204.000000 1204.110352
205 | 205.000000 1769.132324
206 | 206.000000 2186.001709
207 | 207.000000 1331.175537
208 | 208.000000 1781.712402
209 | 209.000000 1243.196533
210 | 210.000000 1287.143433
211 | 211.000000 nan
212 | 212.000000 1502.286255
213 | 213.000000 877.458313
214 | 214.000000 1522.805054
215 | 215.000000 2611.905029
216 | 216.000000 1949.547485
217 | 217.000000 1707.867432
218 | 218.000000 1336.154785
219 | 219.000000 2212.902832
220 | 220.000000 1358.864380
221 | 221.000000 2502.499023
222 | 222.000000 1765.352539
223 | 223.000000 1529.414673
224 | 224.000000 1422.890625
225 | 225.000000 1950.468262
226 | 226.000000 2156.668945
227 | 227.000000 1504.507324
228 | 228.000000 1659.369995
229 | 229.000000 1033.489746
230 | 230.000000 1538.519165
231 | 231.000000 1345.894897
232 | 232.000000 2022.561157
233 | 233.000000 2036.099121
234 | 234.000000 2111.207275
235 | 235.000000 1589.440796
236 | 236.000000 1667.526733
237 | 237.000000 1064.860840
238 | 238.000000 1458.587402
239 | 239.000000 2401.041992
240 | 240.000000 1449.993530
241 | 241.000000 2407.700684
242 | 242.000000 1832.315430
243 | 243.000000 1424.621704
244 | 244.000000 1756.471436
245 | 245.000000 1642.072632
246 | 246.000000 1429.027832
247 | 247.000000 1928.955200
248 | 248.000000 1620.687744
249 | 249.000000 1362.290161
250 | 250.000000 1275.254883
251 | 251.000000 1301.666138
252 | 252.000000 998.833984
253 | 253.000000 1163.223877
254 | 254.000000 1480.306641
255 | 255.000000 2131.771240
256 | 256.000000 1833.486206
257 | 257.000000 1161.478271
258 | 258.000000 1168.261841
259 | 259.000000 1569.966431
260 | 260.000000 1675.275146
261 | 261.000000 966.771240
262 | 262.000000 1395.518433
263 | 263.000000 1638.024780
264 | 264.000000 1712.951782
265 | 265.000000 1799.802979
266 | 266.000000 1916.816895
267 | 267.000000 1895.225952
268 | 268.000000 1008.570923
269 | 269.000000 1002.869019
270 | 270.000000 1962.243896
271 | 271.000000 1729.660400
272 | 272.000000 732.257080
273 | 273.000000 2166.750244
274 | 274.000000 1060.113159
275 | 275.000000 1519.845337
276 | 276.000000 1708.907227
277 | 277.000000 1227.915405
278 | 278.000000 1085.683716
279 | 279.000000 1045.782104
280 | 280.000000 1720.696899
281 | 281.000000 1494.705444
282 | 282.000000 961.153259
283 | 283.000000 1420.741089
284 | 284.000000 1318.101196
285 | 285.000000 740.344238
286 | 286.000000 879.328247
287 | 287.000000 1358.047974
288 | 288.000000 2318.087402
289 | 289.000000 1545.019775
290 | 290.000000 1582.846069
291 | 291.000000 1693.926636
292 | 292.000000 1152.875244
293 | 293.000000 1469.117554
294 | 294.000000 2005.669189
295 | 295.000000 1113.713867
296 | 296.000000 1281.609741
297 | 297.000000 1500.906860
298 | 298.000000 1409.276733
299 | 299.000000 943.180420
300 | 300.000000 791.694214
301 | 301.000000 704.541565
302 | 302.000000 1585.458862
303 | 303.000000 1004.198181
304 | 304.000000 796.337952
305 | 305.000000 1000.802917
306 | 306.000000 2156.751465
307 | 307.000000 638.728699
308 | 308.000000 1391.960815
309 | 309.000000 1644.898071
310 | 310.000000 1398.569580
311 | 311.000000 967.325500
312 | 312.000000 1578.804077
313 | 313.000000 1068.719360
314 | 314.000000 1418.943726
315 | 315.000000 1784.473877
316 | 316.000000 1952.727905
317 | 317.000000 997.095337
318 | 318.000000 1485.097778
319 | 319.000000 1419.496948
320 | 320.000000 1534.019897
321 | 321.000000 1633.627075
322 | 322.000000 1012.951843
323 | 323.000000 2085.274414
324 | 324.000000 3101.601562
325 | 325.000000 1858.955200
326 | 326.000000 983.584900
327 | 327.000000 2169.784180
328 | 328.000000 2086.046875
329 | 329.000000 2204.625488
330 | 330.000000 1578.105591
331 | 331.000000 1526.881104
332 | 332.000000 1725.510986
333 | 333.000000 937.253723
334 | 334.000000 1678.458130
335 | 335.000000 1572.530029
336 | 336.000000 1188.498413
337 | 337.000000 1535.775879
338 | 338.000000 1335.063721
339 | 339.000000 1702.118652
340 | 340.000000 1927.334839
341 | 341.000000 1652.505371
342 | 342.000000 1492.118774
343 | 343.000000 1801.889038
344 | 344.000000 1977.426025
345 | 345.000000 1246.210693
346 | 346.000000 2142.636719
347 | 347.000000 1352.310547
348 | 348.000000 1507.071777
349 | 349.000000 1378.349976
350 | 350.000000 2387.540283
351 | 351.000000 1306.161377
352 | 352.000000 1425.368164
353 | 353.000000 1882.434814
354 | 354.000000 2395.280762
355 | 355.000000 1600.453857
356 | 356.000000 1445.337036
357 | 357.000000 1985.960449
358 | 358.000000 1160.152100
359 | 359.000000 2099.111816
360 | 360.000000 1541.235962
361 | 361.000000 1412.315308
362 | 362.000000 2116.764404
363 | 363.000000 1279.255859
364 | 364.000000 2040.119995
365 | 365.000000 2022.776611
366 | 366.000000 1902.603638
367 | 367.000000 1140.585327
368 | 368.000000 1904.104980
369 | 369.000000 2075.255127
370 | 370.000000 3662.633301
371 | 371.000000 1800.689453
372 | 372.000000 2432.671631
373 | 373.000000 1499.937500
374 | 374.000000 1041.650879
375 | 375.000000 1826.106323
376 | 376.000000 1734.499390
377 | 377.000000 1729.217041
378 | 378.000000 1077.025391
379 | 379.000000 1599.761108
380 | 380.000000 1147.693237
381 | 381.000000 1535.584473
382 | 382.000000 1515.563477
383 | 383.000000 1541.500366
384 | 384.000000 1446.428467
385 | 385.000000 1249.276855
386 | 386.000000 1711.814209
387 | 387.000000 2115.800293
388 | 388.000000 1817.904053
389 | 389.000000 1761.030518
390 | 390.000000 2174.820312
391 | 391.000000 1793.098755
392 | 392.000000 1711.772339
393 | 393.000000 1931.489136
394 | 394.000000 1804.897095
395 | 395.000000 1881.685181
396 | 396.000000 2290.734131
397 | 397.000000 1840.967407
398 | 398.000000 1642.179443
399 | 399.000000 1375.341309
400 | 400.000000 1524.707642
401 | 401.000000 1361.021362
402 | 402.000000 1304.565796
403 | 403.000000 1655.716919
404 | 404.000000 1930.118652
405 | 405.000000 1559.966187
406 | 406.000000 1737.071411
407 | 407.000000 1753.080200
408 | 408.000000 1043.204834
409 | 409.000000 1202.575317
410 | 410.000000 1499.095825
411 | 411.000000 2102.189453
412 | 412.000000 2390.331543
413 | 413.000000 1327.265259
414 | 414.000000 1286.826416
415 | 415.000000 1414.089966
416 | 416.000000 1971.299805
417 | 417.000000 1243.213623
418 | 418.000000 1922.367920
419 | 419.000000 1163.862671
420 | 420.000000 1651.475464
421 | 421.000000 1301.186523
422 | 422.000000 1849.299316
423 | 423.000000 1799.256348
424 | 424.000000 1703.327393
425 | 425.000000 1627.862061
426 | 426.000000 1522.336914
427 | 427.000000 1408.989502
428 | 428.000000 2630.947754
429 | 429.000000 1648.483032
430 | 430.000000 1536.905884
431 | 431.000000 1433.750366
432 | 432.000000 1748.919678
433 | 433.000000 1274.653442
434 | 434.000000 1658.341675
435 | 435.000000 1580.411011
436 | 436.000000 1607.185913
437 | 437.000000 1381.490356
438 | 438.000000 1322.875366
439 | 439.000000 1168.433716
440 | 440.000000 1067.946533
441 | 441.000000 1890.483154
442 | 442.000000 1658.906250
443 | 443.000000 1064.380005
444 | 444.000000 868.906921
445 | 445.000000 1287.892456
446 | 446.000000 2167.587646
447 | 447.000000 1383.131226
448 | 448.000000 1417.915161
449 | 449.000000 2017.528442
450 | 450.000000 1777.718750
451 | 451.000000 1596.717407
452 | 452.000000 1421.328735
453 | 453.000000 1324.599243
454 | 454.000000 1899.612427
455 | 455.000000 1513.721191
456 | 456.000000 1683.056152
457 | 457.000000 1369.445557
458 | 458.000000 1265.907593
459 | 459.000000 1035.090088
460 | 460.000000 2046.150024
461 | 461.000000 1498.508667
462 | 462.000000 1608.036011
463 | 463.000000 1330.513794
464 | 464.000000 1132.405518
465 | 465.000000 1237.636108
466 | 466.000000 2298.409180
467 | 467.000000 1241.165283
468 | 468.000000 2039.370850
469 | 469.000000 1177.535522
470 | 470.000000 1221.716675
471 | 471.000000 1745.758301
472 | 472.000000 1917.593384
473 | 473.000000 1165.316650
474 | 474.000000 861.017334
475 | 475.000000 1830.155396
476 | 476.000000 1170.794067
477 | 477.000000 1230.492554
478 | 478.000000 1274.034912
479 | 479.000000 1899.829224
480 | 480.000000 1867.080078
481 | 481.000000 1609.885742
482 | 482.000000 1963.965942
483 | 483.000000 1669.859253
484 | 484.000000 1292.068359
485 | 485.000000 1751.724243
486 | 486.000000 1335.341431
487 | 487.000000 1323.624023
488 | 488.000000 1651.736572
489 | 489.000000 2087.386963
490 | 490.000000 1438.429565
491 | 491.000000 1731.568237
492 | 492.000000 1949.754028
493 | 493.000000 2203.080078
494 | 494.000000 2261.097168
495 | 495.000000 1580.708740
496 | 496.000000 1562.130615
497 | 497.000000 1859.436646
498 | 498.000000 1793.891113
499 | 499.000000 1001.056335
500 | 500.000000 1912.867676
501 | 501.000000 2475.812744
502 | 502.000000 2105.730469
503 | 503.000000 1732.766724
504 | 504.000000 2310.781738
505 | 505.000000 1875.141357
506 | 506.000000 1817.766724
507 | 507.000000 1097.887329
508 | 508.000000 2017.046753
509 | 509.000000 2242.245361
510 | 510.000000 2773.306641
511 | 511.000000 1321.350464
512 | 512.000000 2739.834229
513 | 513.000000 1389.539062
514 | 514.000000 2251.552490
515 | 515.000000 2169.031006
516 | 516.000000 2029.887329
517 | 517.000000 1591.404053
518 | 518.000000 2343.211182
519 | 519.000000 2012.653320
520 | 520.000000 1614.831421
521 | 521.000000 1672.772339
522 | 522.000000 2000.651978
523 | 523.000000 2896.021973
524 | 524.000000 2637.968750
525 | 525.000000 1884.990601
526 | 526.000000 2405.921143
527 | 527.000000 2257.248779
528 | 528.000000 1961.182495
529 | 529.000000 1849.048218
530 | 530.000000 1559.181519
531 | 531.000000 1560.701660
532 | 532.000000 2041.094482
533 | 533.000000 1998.698853
534 | 534.000000 2052.123291
535 | 535.000000 1803.678223
536 | 536.000000 1970.451904
537 | 537.000000 1939.131104
538 | 538.000000 2082.247803
539 | 539.000000 1409.396606
540 | 540.000000 2733.470947
541 | 541.000000 2221.219238
542 | 542.000000 2331.755371
543 | 543.000000 2438.380615
544 | 544.000000 1917.306030
545 | 545.000000 1988.092041
546 | 546.000000 2145.496094
547 | 547.000000 2278.642578
548 | 548.000000 2159.122803
549 | 549.000000 2627.566895
550 | 550.000000 1537.308228
551 | 551.000000 1559.624634
552 | 552.000000 3045.290527
553 | 553.000000 2246.550781
554 | 554.000000 2384.003906
555 | 555.000000 2010.736084
556 | 556.000000 1972.834229
557 | 557.000000 2146.448242
558 | 558.000000 2102.908203
559 | 559.000000 2329.290527
560 | 560.000000 1733.708252
561 | 561.000000 2641.247070
562 | 562.000000 1993.119873
563 | 563.000000 2200.874268
564 | 564.000000 2394.948975
565 | 565.000000 2191.825684
566 | 566.000000 2496.806396
567 | 567.000000 2391.000732
568 | 568.000000 2436.711182
569 | 569.000000 1738.463013
570 | 570.000000 2054.031982
571 | 571.000000 2036.267822
572 | 572.000000 1836.029175
573 | 573.000000 3007.133545
574 | 574.000000 1429.928833
575 | 575.000000 2216.402588
576 | 576.000000 1904.106812
577 | 577.000000 2285.255371
578 | 578.000000 1994.338013
579 | 579.000000 2059.176758
580 | 580.000000 2171.187012
581 | 581.000000 1982.419312
582 | 582.000000 2099.515381
583 | 583.000000 2507.017334
584 | 584.000000 1913.215332
585 | 585.000000 2561.822021
586 | 586.000000 1302.399536
587 | 587.000000 1860.632202
588 | 588.000000 2287.544434
589 | 589.000000 1734.690063
590 | 590.000000 2156.122559
591 | 591.000000 2402.931885
592 | 592.000000 2404.802734
593 | 593.000000 3244.411377
594 | 594.000000 1978.216064
595 | 595.000000 2411.874023
596 | 596.000000 2007.088379
597 | 597.000000 2014.276733
598 | 598.000000 1565.664917
599 | 599.000000 2022.515991
600 | 600.000000 1772.145020
601 | 601.000000 2583.096436
602 | 602.000000 1844.953979
603 | 603.000000 1621.984863
604 | 604.000000 1770.774658
605 | 605.000000 2020.567627
606 | 606.000000 2355.657471
607 | 607.000000 1996.695801
608 | 608.000000 2127.384277
609 | 609.000000 2114.290771
610 | 610.000000 1935.230835
611 | 611.000000 2125.324707
612 | 612.000000 1787.222656
613 | 613.000000 2276.241211
614 | 614.000000 2978.175049
615 | 615.000000 2542.808594
616 | 616.000000 2113.446289
617 | 617.000000 1968.088379
618 | 618.000000 2368.984619
619 | 619.000000 2241.410400
620 | 620.000000 2073.782227
621 | 621.000000 2121.806152
622 | 622.000000 2167.166504
623 | 623.000000 2575.725342
624 | 624.000000 2500.377930
625 | 625.000000 2181.297363
626 | 626.000000 1967.151733
627 | 627.000000 2072.927246
628 | 628.000000 2027.206543
629 | 629.000000 2345.307617
630 | 630.000000 2024.258789
631 | 631.000000 2248.471924
632 | 632.000000 2455.304688
633 | 633.000000 2265.372070
634 | 634.000000 2424.891113
635 | 635.000000 2852.068115
636 | 636.000000 1997.479370
637 | 637.000000 3298.773438
638 | 638.000000 2367.100342
639 | 639.000000 1853.859985
640 | 640.000000 2896.925537
641 | 641.000000 2537.741943
642 | 642.000000 2300.602051
643 | 643.000000 2849.175781
644 | 644.000000 2975.004150
645 | 645.000000 1931.866577
646 | 646.000000 3009.414307
647 | 647.000000 2538.314941
648 | 648.000000 2783.420410
649 | 649.000000 2490.887939
650 | 650.000000 2407.929199
651 | 651.000000 2003.909668
652 | 652.000000 2752.269531
653 | 653.000000 2576.723145
654 | 654.000000 2817.946289
655 | 655.000000 2683.553467
656 | 656.000000 2628.284424
657 | 657.000000 2995.034912
658 | 658.000000 2303.661621
659 | 659.000000 2772.099609
660 | 660.000000 2606.699463
661 | 661.000000 2703.995361
662 | 662.000000 2840.417725
663 | 663.000000 3256.972412
664 | 664.000000 3024.288574
665 | 665.000000 2684.777588
666 | 666.000000 3006.578857
667 | 667.000000 3310.774902
668 | 668.000000 3183.121826
669 | 669.000000 2523.464600
670 | 670.000000 3401.200928
671 | 671.000000 2839.752686
672 | 672.000000 3193.740479
673 | 673.000000 2970.214355
674 | 674.000000 3338.654541
675 | 675.000000 3464.786621
676 | 676.000000 3265.989502
677 | 677.000000 3536.363037
678 | 678.000000 3090.552734
679 | 679.000000 2936.692627
680 | 680.000000 3009.171387
681 | 681.000000 4000.848389
682 | 682.000000 3490.038086
683 | 683.000000 2815.461914
684 | 684.000000 3383.949463
685 | 685.000000 2902.276611
686 | 686.000000 4261.487793
687 | 687.000000 3787.093262
688 | 688.000000 4140.599121
689 | 689.000000 3589.932617
690 | 690.000000 3345.195801
691 | 691.000000 3119.029297
692 | 692.000000 3456.691406
693 | 693.000000 4152.050293
694 | 694.000000 3828.693115
695 | 695.000000 3993.070557
696 | 696.000000 4668.495117
697 | 697.000000 3303.204834
698 | 698.000000 3932.197998
699 | 699.000000 4497.727539
700 | 700.000000 3402.736572
701 | 701.000000 3674.006592
702 | 702.000000 3551.005127
703 | 703.000000 4231.209961
704 | 704.000000 3806.804443
705 | 705.000000 3354.349121
706 | 706.000000 3603.937988
707 | 707.000000 4015.734131
708 | 708.000000 3550.420166
709 | 709.000000 3318.288818
710 | 710.000000 3933.817627
711 | 711.000000 3597.578125
712 | 712.000000 5290.647949
713 | 713.000000 3563.370850
714 | 714.000000 3991.379395
715 | 715.000000 3890.932861
716 | 716.000000 3637.689453
717 | 717.000000 3800.525146
718 | 718.000000 4190.283203
719 | 719.000000 5249.075195
720 | 720.000000 4178.081543
721 | 721.000000 4830.328125
722 | 722.000000 4347.202637
723 | 723.000000 4226.013672
724 | 724.000000 4813.762695
725 | 725.000000 3998.185547
726 | 726.000000 4358.066406
727 | 727.000000 4323.617188
728 | 728.000000 4157.835938
729 | 729.000000 4630.654297
730 | 730.000000 4415.905273
731 | 731.000000 4411.992188
732 | 732.000000 4725.586426
733 | 733.000000 4364.381348
734 | 734.000000 4800.028809
735 | 735.000000 4749.926758
736 | 736.000000 5144.264160
737 | 737.000000 4907.322754
738 | 738.000000 4310.609375
739 | 739.000000 4971.517578
740 | 740.000000 4815.629395
741 | 741.000000 5393.541992
742 | 742.000000 5906.814941
743 | 743.000000 4883.022461
744 |
--------------------------------------------------------------------------------
/Chapter02/README.rst:
--------------------------------------------------------------------------------
1 | =========
2 | Chapter 2
3 | =========
4 |
5 | Support code for *Chapter 2: Learning How to Classify with Real-world
6 | Examples*. The directory data contains the seeds dataset, originally downloaded
7 | from https://archive.ics.uci.edu/ml/datasets/seeds
8 |
9 | chapter_02.py
10 | The code from the book (with a few extras)
11 | load.py
12 | Code to load the seeds data
13 |
14 |
--------------------------------------------------------------------------------
/Chapter02/data/seeds.tsv:
--------------------------------------------------------------------------------
1 | 15.26 14.84 0.871 5.763 3.312 2.221 5.22 Kama
2 | 14.88 14.57 0.8811 5.554 3.333 1.018 4.956 Kama
3 | 14.29 14.09 0.905 5.291 3.337 2.699 4.825 Kama
4 | 13.84 13.94 0.8955 5.324 3.379 2.259 4.805 Kama
5 | 16.14 14.99 0.9034 5.658 3.562 1.355 5.175 Kama
6 | 14.38 14.21 0.8951 5.386 3.312 2.462 4.956 Kama
7 | 14.69 14.49 0.8799 5.563 3.259 3.586 5.219 Kama
8 | 14.11 14.1 0.8911 5.42 3.302 2.7 5.0 Kama
9 | 16.63 15.46 0.8747 6.053 3.465 2.04 5.877 Kama
10 | 16.44 15.25 0.888 5.884 3.505 1.969 5.533 Kama
11 | 15.26 14.85 0.8696 5.714 3.242 4.543 5.314 Kama
12 | 14.03 14.16 0.8796 5.438 3.201 1.717 5.001 Kama
13 | 13.89 14.02 0.888 5.439 3.199 3.986 4.738 Kama
14 | 13.78 14.06 0.8759 5.479 3.156 3.136 4.872 Kama
15 | 13.74 14.05 0.8744 5.482 3.114 2.932 4.825 Kama
16 | 14.59 14.28 0.8993 5.351 3.333 4.185 4.781 Kama
17 | 13.99 13.83 0.9183 5.119 3.383 5.234 4.781 Kama
18 | 15.69 14.75 0.9058 5.527 3.514 1.599 5.046 Kama
19 | 14.7 14.21 0.9153 5.205 3.466 1.767 4.649 Kama
20 | 12.72 13.57 0.8686 5.226 3.049 4.102 4.914 Kama
21 | 14.16 14.4 0.8584 5.658 3.129 3.072 5.176 Kama
22 | 14.11 14.26 0.8722 5.52 3.168 2.688 5.219 Kama
23 | 15.88 14.9 0.8988 5.618 3.507 0.7651 5.091 Kama
24 | 12.08 13.23 0.8664 5.099 2.936 1.415 4.961 Kama
25 | 15.01 14.76 0.8657 5.789 3.245 1.791 5.001 Kama
26 | 16.19 15.16 0.8849 5.833 3.421 0.903 5.307 Kama
27 | 13.02 13.76 0.8641 5.395 3.026 3.373 4.825 Kama
28 | 12.74 13.67 0.8564 5.395 2.956 2.504 4.869 Kama
29 | 14.11 14.18 0.882 5.541 3.221 2.754 5.038 Kama
30 | 13.45 14.02 0.8604 5.516 3.065 3.531 5.097 Kama
31 | 13.16 13.82 0.8662 5.454 2.975 0.8551 5.056 Kama
32 | 15.49 14.94 0.8724 5.757 3.371 3.412 5.228 Kama
33 | 14.09 14.41 0.8529 5.717 3.186 3.92 5.299 Kama
34 | 13.94 14.17 0.8728 5.585 3.15 2.124 5.012 Kama
35 | 15.05 14.68 0.8779 5.712 3.328 2.129 5.36 Kama
36 | 16.12 15.0 0.9 5.709 3.485 2.27 5.443 Kama
37 | 16.2 15.27 0.8734 5.826 3.464 2.823 5.527 Kama
38 | 17.08 15.38 0.9079 5.832 3.683 2.956 5.484 Kama
39 | 14.8 14.52 0.8823 5.656 3.288 3.112 5.309 Kama
40 | 14.28 14.17 0.8944 5.397 3.298 6.685 5.001 Kama
41 | 13.54 13.85 0.8871 5.348 3.156 2.587 5.178 Kama
42 | 13.5 13.85 0.8852 5.351 3.158 2.249 5.176 Kama
43 | 13.16 13.55 0.9009 5.138 3.201 2.461 4.783 Kama
44 | 15.5 14.86 0.882 5.877 3.396 4.711 5.528 Kama
45 | 15.11 14.54 0.8986 5.579 3.462 3.128 5.18 Kama
46 | 13.8 14.04 0.8794 5.376 3.155 1.56 4.961 Kama
47 | 15.36 14.76 0.8861 5.701 3.393 1.367 5.132 Kama
48 | 14.99 14.56 0.8883 5.57 3.377 2.958 5.175 Kama
49 | 14.79 14.52 0.8819 5.545 3.291 2.704 5.111 Kama
50 | 14.86 14.67 0.8676 5.678 3.258 2.129 5.351 Kama
51 | 14.43 14.4 0.8751 5.585 3.272 3.975 5.144 Kama
52 | 15.78 14.91 0.8923 5.674 3.434 5.593 5.136 Kama
53 | 14.49 14.61 0.8538 5.715 3.113 4.116 5.396 Kama
54 | 14.33 14.28 0.8831 5.504 3.199 3.328 5.224 Kama
55 | 14.52 14.6 0.8557 5.741 3.113 1.481 5.487 Kama
56 | 15.03 14.77 0.8658 5.702 3.212 1.933 5.439 Kama
57 | 14.46 14.35 0.8818 5.388 3.377 2.802 5.044 Kama
58 | 14.92 14.43 0.9006 5.384 3.412 1.142 5.088 Kama
59 | 15.38 14.77 0.8857 5.662 3.419 1.999 5.222 Kama
60 | 12.11 13.47 0.8392 5.159 3.032 1.502 4.519 Kama
61 | 11.42 12.86 0.8683 5.008 2.85 2.7 4.607 Kama
62 | 11.23 12.63 0.884 4.902 2.879 2.269 4.703 Kama
63 | 12.36 13.19 0.8923 5.076 3.042 3.22 4.605 Kama
64 | 13.22 13.84 0.868 5.395 3.07 4.157 5.088 Kama
65 | 12.78 13.57 0.8716 5.262 3.026 1.176 4.782 Kama
66 | 12.88 13.5 0.8879 5.139 3.119 2.352 4.607 Kama
67 | 14.34 14.37 0.8726 5.63 3.19 1.313 5.15 Kama
68 | 14.01 14.29 0.8625 5.609 3.158 2.217 5.132 Kama
69 | 14.37 14.39 0.8726 5.569 3.153 1.464 5.3 Kama
70 | 12.73 13.75 0.8458 5.412 2.882 3.533 5.067 Kama
71 | 17.63 15.98 0.8673 6.191 3.561 4.076 6.06 Rosa
72 | 16.84 15.67 0.8623 5.998 3.484 4.675 5.877 Rosa
73 | 17.26 15.73 0.8763 5.978 3.594 4.539 5.791 Rosa
74 | 19.11 16.26 0.9081 6.154 3.93 2.936 6.079 Rosa
75 | 16.82 15.51 0.8786 6.017 3.486 4.004 5.841 Rosa
76 | 16.77 15.62 0.8638 5.927 3.438 4.92 5.795 Rosa
77 | 17.32 15.91 0.8599 6.064 3.403 3.824 5.922 Rosa
78 | 20.71 17.23 0.8763 6.579 3.814 4.451 6.451 Rosa
79 | 18.94 16.49 0.875 6.445 3.639 5.064 6.362 Rosa
80 | 17.12 15.55 0.8892 5.85 3.566 2.858 5.746 Rosa
81 | 16.53 15.34 0.8823 5.875 3.467 5.532 5.88 Rosa
82 | 18.72 16.19 0.8977 6.006 3.857 5.324 5.879 Rosa
83 | 20.2 16.89 0.8894 6.285 3.864 5.173 6.187 Rosa
84 | 19.57 16.74 0.8779 6.384 3.772 1.472 6.273 Rosa
85 | 19.51 16.71 0.878 6.366 3.801 2.962 6.185 Rosa
86 | 18.27 16.09 0.887 6.173 3.651 2.443 6.197 Rosa
87 | 18.88 16.26 0.8969 6.084 3.764 1.649 6.109 Rosa
88 | 18.98 16.66 0.859 6.549 3.67 3.691 6.498 Rosa
89 | 21.18 17.21 0.8989 6.573 4.033 5.78 6.231 Rosa
90 | 20.88 17.05 0.9031 6.45 4.032 5.016 6.321 Rosa
91 | 20.1 16.99 0.8746 6.581 3.785 1.955 6.449 Rosa
92 | 18.76 16.2 0.8984 6.172 3.796 3.12 6.053 Rosa
93 | 18.81 16.29 0.8906 6.272 3.693 3.237 6.053 Rosa
94 | 18.59 16.05 0.9066 6.037 3.86 6.001 5.877 Rosa
95 | 18.36 16.52 0.8452 6.666 3.485 4.933 6.448 Rosa
96 | 16.87 15.65 0.8648 6.139 3.463 3.696 5.967 Rosa
97 | 19.31 16.59 0.8815 6.341 3.81 3.477 6.238 Rosa
98 | 18.98 16.57 0.8687 6.449 3.552 2.144 6.453 Rosa
99 | 18.17 16.26 0.8637 6.271 3.512 2.853 6.273 Rosa
100 | 18.72 16.34 0.881 6.219 3.684 2.188 6.097 Rosa
101 | 16.41 15.25 0.8866 5.718 3.525 4.217 5.618 Rosa
102 | 17.99 15.86 0.8992 5.89 3.694 2.068 5.837 Rosa
103 | 19.46 16.5 0.8985 6.113 3.892 4.308 6.009 Rosa
104 | 19.18 16.63 0.8717 6.369 3.681 3.357 6.229 Rosa
105 | 18.95 16.42 0.8829 6.248 3.755 3.368 6.148 Rosa
106 | 18.83 16.29 0.8917 6.037 3.786 2.553 5.879 Rosa
107 | 18.85 16.17 0.9056 6.152 3.806 2.843 6.2 Rosa
108 | 17.63 15.86 0.88 6.033 3.573 3.747 5.929 Rosa
109 | 19.94 16.92 0.8752 6.675 3.763 3.252 6.55 Rosa
110 | 18.55 16.22 0.8865 6.153 3.674 1.738 5.894 Rosa
111 | 18.45 16.12 0.8921 6.107 3.769 2.235 5.794 Rosa
112 | 19.38 16.72 0.8716 6.303 3.791 3.678 5.965 Rosa
113 | 19.13 16.31 0.9035 6.183 3.902 2.109 5.924 Rosa
114 | 19.14 16.61 0.8722 6.259 3.737 6.682 6.053 Rosa
115 | 20.97 17.25 0.8859 6.563 3.991 4.677 6.316 Rosa
116 | 19.06 16.45 0.8854 6.416 3.719 2.248 6.163 Rosa
117 | 18.96 16.2 0.9077 6.051 3.897 4.334 5.75 Rosa
118 | 19.15 16.45 0.889 6.245 3.815 3.084 6.185 Rosa
119 | 18.89 16.23 0.9008 6.227 3.769 3.639 5.966 Rosa
120 | 20.03 16.9 0.8811 6.493 3.857 3.063 6.32 Rosa
121 | 20.24 16.91 0.8897 6.315 3.962 5.901 6.188 Rosa
122 | 18.14 16.12 0.8772 6.059 3.563 3.619 6.011 Rosa
123 | 16.17 15.38 0.8588 5.762 3.387 4.286 5.703 Rosa
124 | 18.43 15.97 0.9077 5.98 3.771 2.984 5.905 Rosa
125 | 15.99 14.89 0.9064 5.363 3.582 3.336 5.144 Rosa
126 | 18.75 16.18 0.8999 6.111 3.869 4.188 5.992 Rosa
127 | 18.65 16.41 0.8698 6.285 3.594 4.391 6.102 Rosa
128 | 17.98 15.85 0.8993 5.979 3.687 2.257 5.919 Rosa
129 | 20.16 17.03 0.8735 6.513 3.773 1.91 6.185 Rosa
130 | 17.55 15.66 0.8991 5.791 3.69 5.366 5.661 Rosa
131 | 18.3 15.89 0.9108 5.979 3.755 2.837 5.962 Rosa
132 | 18.94 16.32 0.8942 6.144 3.825 2.908 5.949 Rosa
133 | 15.38 14.9 0.8706 5.884 3.268 4.462 5.795 Rosa
134 | 16.16 15.33 0.8644 5.845 3.395 4.266 5.795 Rosa
135 | 15.56 14.89 0.8823 5.776 3.408 4.972 5.847 Rosa
136 | 15.38 14.66 0.899 5.477 3.465 3.6 5.439 Rosa
137 | 17.36 15.76 0.8785 6.145 3.574 3.526 5.971 Rosa
138 | 15.57 15.15 0.8527 5.92 3.231 2.64 5.879 Rosa
139 | 15.6 15.11 0.858 5.832 3.286 2.725 5.752 Rosa
140 | 16.23 15.18 0.885 5.872 3.472 3.769 5.922 Rosa
141 | 13.07 13.92 0.848 5.472 2.994 5.304 5.395 Canadian
142 | 13.32 13.94 0.8613 5.541 3.073 7.035 5.44 Canadian
143 | 13.34 13.95 0.862 5.389 3.074 5.995 5.307 Canadian
144 | 12.22 13.32 0.8652 5.224 2.967 5.469 5.221 Canadian
145 | 11.82 13.4 0.8274 5.314 2.777 4.471 5.178 Canadian
146 | 11.21 13.13 0.8167 5.279 2.687 6.169 5.275 Canadian
147 | 11.43 13.13 0.8335 5.176 2.719 2.221 5.132 Canadian
148 | 12.49 13.46 0.8658 5.267 2.967 4.421 5.002 Canadian
149 | 12.7 13.71 0.8491 5.386 2.911 3.26 5.316 Canadian
150 | 10.79 12.93 0.8107 5.317 2.648 5.462 5.194 Canadian
151 | 11.83 13.23 0.8496 5.263 2.84 5.195 5.307 Canadian
152 | 12.01 13.52 0.8249 5.405 2.776 6.992 5.27 Canadian
153 | 12.26 13.6 0.8333 5.408 2.833 4.756 5.36 Canadian
154 | 11.18 13.04 0.8266 5.22 2.693 3.332 5.001 Canadian
155 | 11.36 13.05 0.8382 5.175 2.755 4.048 5.263 Canadian
156 | 11.19 13.05 0.8253 5.25 2.675 5.813 5.219 Canadian
157 | 11.34 12.87 0.8596 5.053 2.849 3.347 5.003 Canadian
158 | 12.13 13.73 0.8081 5.394 2.745 4.825 5.22 Canadian
159 | 11.75 13.52 0.8082 5.444 2.678 4.378 5.31 Canadian
160 | 11.49 13.22 0.8263 5.304 2.695 5.388 5.31 Canadian
161 | 12.54 13.67 0.8425 5.451 2.879 3.082 5.491 Canadian
162 | 12.02 13.33 0.8503 5.35 2.81 4.271 5.308 Canadian
163 | 12.05 13.41 0.8416 5.267 2.847 4.988 5.046 Canadian
164 | 12.55 13.57 0.8558 5.333 2.968 4.419 5.176 Canadian
165 | 11.14 12.79 0.8558 5.011 2.794 6.388 5.049 Canadian
166 | 12.1 13.15 0.8793 5.105 2.941 2.201 5.056 Canadian
167 | 12.44 13.59 0.8462 5.319 2.897 4.924 5.27 Canadian
168 | 12.15 13.45 0.8443 5.417 2.837 3.638 5.338 Canadian
169 | 11.35 13.12 0.8291 5.176 2.668 4.337 5.132 Canadian
170 | 11.24 13.0 0.8359 5.09 2.715 3.521 5.088 Canadian
171 | 11.02 13.0 0.8189 5.325 2.701 6.735 5.163 Canadian
172 | 11.55 13.1 0.8455 5.167 2.845 6.715 4.956 Canadian
173 | 11.27 12.97 0.8419 5.088 2.763 4.309 5.0 Canadian
174 | 11.4 13.08 0.8375 5.136 2.763 5.588 5.089 Canadian
175 | 10.83 12.96 0.8099 5.278 2.641 5.182 5.185 Canadian
176 | 10.8 12.57 0.859 4.981 2.821 4.773 5.063 Canadian
177 | 11.26 13.01 0.8355 5.186 2.71 5.335 5.092 Canadian
178 | 10.74 12.73 0.8329 5.145 2.642 4.702 4.963 Canadian
179 | 11.48 13.05 0.8473 5.18 2.758 5.876 5.002 Canadian
180 | 12.21 13.47 0.8453 5.357 2.893 1.661 5.178 Canadian
181 | 11.41 12.95 0.856 5.09 2.775 4.957 4.825 Canadian
182 | 12.46 13.41 0.8706 5.236 3.017 4.987 5.147 Canadian
183 | 12.19 13.36 0.8579 5.24 2.909 4.857 5.158 Canadian
184 | 11.65 13.07 0.8575 5.108 2.85 5.209 5.135 Canadian
185 | 12.89 13.77 0.8541 5.495 3.026 6.185 5.316 Canadian
186 | 11.56 13.31 0.8198 5.363 2.683 4.062 5.182 Canadian
187 | 11.81 13.45 0.8198 5.413 2.716 4.898 5.352 Canadian
188 | 10.91 12.8 0.8372 5.088 2.675 4.179 4.956 Canadian
189 | 11.23 12.82 0.8594 5.089 2.821 7.524 4.957 Canadian
190 | 10.59 12.41 0.8648 4.899 2.787 4.975 4.794 Canadian
191 | 10.93 12.8 0.839 5.046 2.717 5.398 5.045 Canadian
192 | 11.27 12.86 0.8563 5.091 2.804 3.985 5.001 Canadian
193 | 11.87 13.02 0.8795 5.132 2.953 3.597 5.132 Canadian
194 | 10.82 12.83 0.8256 5.18 2.63 4.853 5.089 Canadian
195 | 12.11 13.27 0.8639 5.236 2.975 4.132 5.012 Canadian
196 | 12.8 13.47 0.886 5.16 3.126 4.873 4.914 Canadian
197 | 12.79 13.53 0.8786 5.224 3.054 5.483 4.958 Canadian
198 | 13.37 13.78 0.8849 5.32 3.128 4.67 5.091 Canadian
199 | 12.62 13.67 0.8481 5.41 2.911 3.306 5.231 Canadian
200 | 12.76 13.38 0.8964 5.073 3.155 2.828 4.83 Canadian
201 | 12.38 13.44 0.8609 5.219 2.989 5.472 5.045 Canadian
202 | 12.67 13.32 0.8977 4.984 3.135 2.3 4.745 Canadian
203 | 11.18 12.72 0.868 5.009 2.81 4.051 4.828 Canadian
204 | 12.7 13.41 0.8874 5.183 3.091 8.456 5.0 Canadian
205 | 12.37 13.47 0.8567 5.204 2.96 3.919 5.001 Canadian
206 | 12.19 13.2 0.8783 5.137 2.981 3.631 4.87 Canadian
207 | 11.23 12.88 0.8511 5.14 2.795 4.325 5.003 Canadian
208 | 13.2 13.66 0.8883 5.236 3.232 8.315 5.056 Canadian
209 | 11.84 13.21 0.8521 5.175 2.836 3.598 5.044 Canadian
210 | 12.3 13.34 0.8684 5.243 2.974 5.637 5.063 Canadian
211 |
--------------------------------------------------------------------------------
/Chapter02/load.py:
--------------------------------------------------------------------------------
1 | # This code is supporting material for the book
2 | # Building Machine Learning Systems with Python
3 | # by Willi Richert and Luis Pedro Coelho
4 | # published by PACKT Publishing
5 | #
6 | # It is made available under the MIT License
7 |
8 | import numpy as np
9 |
10 |
11 | def load_dataset(dataset_name):
12 | '''
13 | data = load_dataset(dataset_name)
14 |
15 | Load a given dataset
16 |
17 | Returns
18 | -------
19 | data : dictionary
20 | '''
21 | features = []
22 | target = []
23 | target_names = set()
24 | with open('./data/{0}.tsv'.format(dataset_name)) as ifile:
25 | for line in ifile:
26 | tokens = line.strip().split('\t')
27 | features.append([float(tk) for tk in tokens[:-1]])
28 | target.append(tokens[-1])
29 | target_names.add(tokens[-1])
30 | features = np.array(features)
31 |
32 | target_names = list(target_names)
33 | target_names.sort()
34 | target = np.array([target_names.index(t) for t in target])
35 | return {
36 | 'features': features,
37 | 'target_names': target_names,
38 | 'target': target,
39 | }
40 |
--------------------------------------------------------------------------------
/Chapter02/tests/test_load.py:
--------------------------------------------------------------------------------
1 | # This code is supporting material for the book
2 | # Building Machine Learning Systems with Python
3 | # by Willi Richert and Luis Pedro Coelho
4 | # published by PACKT Publishing
5 | #
6 | # It is made available under the MIT License
7 |
8 | from load import load_dataset
9 |
10 |
11 | def test_iris():
12 | features, labels = load_dataset('iris')
13 | assert len(features[0]) == 4
14 | assert len(features)
15 | assert len(features) == len(labels)
16 |
17 |
18 | def test_seeds():
19 | features, labels = load_dataset('seeds')
20 | assert len(features[0]) == 7
21 | assert len(features)
22 | assert len(features) == len(labels)
23 |
--------------------------------------------------------------------------------
/Chapter03/chapter_03.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Regression"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "# This code is supporting material for the book\n",
17 | "# Building Machine Learning Systems with Python\n",
18 | "# by Willi Richert, Luis Pedro Coelho and Matthieu Brucher\n",
19 | "# published by PACKT Publishing\n",
20 | "#\n",
21 | "# It is made available under the MIT License\n",
22 | "\n",
23 | "import numpy as np\n",
24 | "from matplotlib import pyplot as plt"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {},
30 | "source": [
31 | "Use the magic command `%matplotlib` to see the plots inline:"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "%matplotlib inline"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "## Boston dataset\n",
48 | "\n",
49 | "Load the data"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "from sklearn.datasets import load_boston\n",
59 | "boston = load_boston()"
60 | ]
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "metadata": {},
65 | "source": [
66 | "The first regression attempt:"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": null,
72 | "metadata": {},
73 | "outputs": [],
74 | "source": [
75 | "from sklearn.linear_model import LinearRegression\n",
76 | "lr = LinearRegression(fit_intercept=True)\n",
77 | "\n",
78 | "# Index number five in the number of rooms\n",
79 | "x = boston.data[:, 5]\n",
80 | "y = boston.target\n",
81 | "\n",
82 | "# lr.fit takes a two-dimensional array as input. We use np.atleast_2d\n",
83 | "# to convert from one to two dimensional, then transpose to make sure that the\n",
84 | "# format matches:\n",
85 | "x = np.transpose(np.atleast_2d(x))\n",
86 | "lr.fit(x, y)\n",
87 | "\n",
88 | "fig,ax = plt.subplots()\n",
89 | "ax.set_xlabel(\"Average number of rooms (RM)\")\n",
90 | "ax.set_ylabel(\"House Price\")\n",
91 | "xmin = x.min()\n",
92 | "xmax = x.max()\n",
93 | "ax.plot([xmin, xmax],\n",
94 | " [lr.predict(xmin), lr.predict(xmax)],\n",
95 | " '-', lw=2, color=\"#f9a602\")\n",
96 | "ax.scatter(x, y, s=2)\n",
97 | "fig.savefig('Regression_Fig_01.png')"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {},
104 | "outputs": [],
105 | "source": [
106 | "from sklearn.metrics import mean_squared_error\n",
107 | "mse = mean_squared_error(y, lr.predict(x))\n",
108 | "print(\"Mean squared error (on training data): {:.3}\".format(mse))"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": null,
114 | "metadata": {
115 | "scrolled": true
116 | },
117 | "outputs": [],
118 | "source": [
119 | "rmse = np.sqrt(mse)\n",
120 | "print('RMSE (on training data): {}'.format(rmse))"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": null,
126 | "metadata": {},
127 | "outputs": [],
128 | "source": [
129 | "from sklearn.metrics import r2_score\n",
130 | "r2 = r2_score(y, lr.predict(x))\n",
131 | "print(\"R2 (on training data): {:.2}\".format(r2))"
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {},
137 | "source": [
138 | "Repeat, but using all the input variables now"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": null,
144 | "metadata": {},
145 | "outputs": [],
146 | "source": [
147 | "x = boston.data\n",
148 | "\n",
149 | "lr.fit(x,y)\n",
150 | "\n",
151 | "mse = mean_squared_error(y, lr.predict(x))\n",
152 | "print(\"Mean squared error (on training data): {:.3}\".format(mse))\n",
153 | "rmse = np.sqrt(mse)\n",
154 | "print('RMSE (on training data): {}'.format(rmse))\n",
155 | "r2 = r2_score(y, lr.predict(x))\n",
156 | "print(\"R2 (on training data): {:.2}\".format(r2))"
157 | ]
158 | },
159 | {
160 | "cell_type": "markdown",
161 | "metadata": {},
162 | "source": [
163 | "To see how well we do, we plot _prediction vs. gold reality_:"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": null,
169 | "metadata": {},
170 | "outputs": [],
171 | "source": [
172 | "fig,ax = plt.subplots()\n",
173 | "ax.set_xlabel('Predicted price')\n",
174 | "ax.set_ylabel('Actual price')\n",
175 | "ax.plot([y.min(), y.max()], [y.min(), y.max()], ':', lw=2, color=\"#f9a602\")\n",
176 | "ax.scatter(lr.predict(x), y, s=2)\n",
177 | "fig.savefig(\"Regression_FIG_02.png\")"
178 | ]
179 | },
180 | {
181 | "cell_type": "markdown",
182 | "metadata": {},
183 | "source": [
184 | "Now, we will use **cross-validation** for evaluating the regression quality:"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": null,
190 | "metadata": {
191 | "scrolled": true
192 | },
193 | "outputs": [],
194 | "source": [
195 | "from sklearn.model_selection import KFold, cross_val_predict\n",
196 | "kf = KFold(n_splits=5)\n",
197 | "p = cross_val_predict(lr, x, y, cv=kf)\n",
198 | "rmse_cv = np.sqrt(mean_squared_error(p, y))\n",
199 | "print('RMSE on 5-fold CV: {:.2}'.format(rmse_cv))"
200 | ]
201 | },
202 | {
203 | "cell_type": "markdown",
204 | "metadata": {},
205 | "source": [
206 | "We now compare a few different regression models on _both training data and using cross-validation_:"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": null,
212 | "metadata": {},
213 | "outputs": [],
214 | "source": [
215 | "from sklearn.linear_model import LinearRegression, ElasticNet, Lasso, Ridge \n",
216 | "\n",
217 | "for name, met in [\n",
218 | " ('linear regression', LinearRegression()),\n",
219 | " ('elastic-net(.5)', ElasticNet(alpha=0.5)),\n",
220 | " ('lasso(.5)', Lasso(alpha=0.5)),\n",
221 | " ('ridge(.5)', Ridge(alpha=0.5)),\n",
222 | "]:\n",
223 | " # Fit on the whole data:\n",
224 | " met.fit(x, y)\n",
225 | "\n",
226 | " # Predict on the whole data:\n",
227 | " p = met.predict(x)\n",
228 | " r2_train = r2_score(y, p)\n",
229 | "\n",
230 | " kf = KFold(n_splits=5)\n",
231 | " p = np.zeros_like(y)\n",
232 | " for train, test in kf.split(x):\n",
233 | " met.fit(x[train], y[train])\n",
234 | " p[test] = met.predict(x[test])\n",
235 | "\n",
236 | " r2_cv = r2_score(y, p)\n",
237 | " print('Method: {}'.format(name))\n",
238 | " print('R2 on training: {:.2}'.format(r2_train))\n",
239 | " print('R2 on 5-fold CV: {:.2}'.format(r2_cv))\n",
240 | " print('\\n')"
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": null,
246 | "metadata": {},
247 | "outputs": [],
248 | "source": [
249 | "las = Lasso(normalize=True) \n",
250 | "alphas = np.logspace(-5, 2, 1000) \n",
251 | "alphas, coefs, _= las.path(x, y, alphas=alphas) \n",
252 | "\n",
253 | "fig,ax = plt.subplots() \n",
254 | "ax.plot(alphas, coefs.T) \n",
255 | "ax.set_xscale('log') \n",
256 | "ax.set_xlim(alphas.max(), alphas.min()) \n",
257 | "\n",
258 | "\n",
259 | "ax.set_xlabel('Lasso coefficient path as a function of alpha') \n",
260 | "ax.set_xlabel('Alpha') \n",
261 | "ax.set_ylabel('Coefficient weight') \n",
262 | "fig.savefig('REGRESSION_FIG_03.png')"
263 | ]
264 | },
265 | {
266 | "cell_type": "markdown",
267 | "metadata": {},
268 | "source": [
269 | "## Linear regression with Tensorflow"
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {},
275 | "source": [
276 | "Let's try and do the same with Tensorflow."
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": null,
282 | "metadata": {},
283 | "outputs": [],
284 | "source": [
285 | "# Batch size, epochs\n",
286 | "batch_size = 100\n",
287 | "n_epochs = 50000\n",
288 | "steps = 1000"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": null,
294 | "metadata": {},
295 | "outputs": [],
296 | "source": [
297 | "# Creation of the scaffolding\n",
298 | "\n",
299 | "import tensorflow as tf\n",
300 | "tf.reset_default_graph()\n",
301 | "\n",
302 | "x = boston.data[:,5][:,None]\n",
303 | "y = np.reshape(boston.target, (-1, 1))\n",
304 | "\n",
305 | "nb_features = x.shape[1]\n",
306 | "\n",
307 | "X = tf.placeholder(shape=[None, nb_features], dtype=tf.float32, name=\"X\")\n",
308 | "Y = tf.placeholder(shape=[None, 1], dtype=tf.float32, name=\"y\")\n",
309 | "\n",
310 | "A = tf.Variable(tf.random_normal(shape=[nb_features, 1]), name=\"A\")\n",
311 | "b = tf.Variable(tf.random_normal(shape=[1,1]), name=\"b\")"
312 | ]
313 | },
314 | {
315 | "cell_type": "code",
316 | "execution_count": null,
317 | "metadata": {},
318 | "outputs": [],
319 | "source": [
320 | "# Creation of the graph\n",
321 | "model_output = tf.matmul(X, A) + b\n",
322 | "\n",
323 | "loss = tf.reduce_mean(tf.square(Y - model_output))\n",
324 | "\n",
325 | "# Uncomment to get Ridge or Lasso\n",
326 | "\"\"\"\n",
327 | "beta = 0.005\n",
328 | "regularizer = tf.nn.l2_loss(A)\n",
329 | "loss = loss + beta * regularizer\n",
330 | "\"\"\"\n",
331 | "\"\"\"\n",
332 | "beta = 0.5\n",
333 | "regularizer = tf.reduce_mean(tf.abs(A))\n",
334 | "loss = loss + beta * regularizer\n",
335 | "\"\"\"\n",
336 | "\n",
337 | "grad_speed = 1e-3\n",
338 | "my_opt = tf.train.GradientDescentOptimizer(grad_speed)\n",
339 | "train_step = my_opt.minimize(loss)"
340 | ]
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": null,
345 | "metadata": {},
346 | "outputs": [],
347 | "source": [
348 | "# Run the optimization\n",
349 | "loss_vec = []\n",
350 | "with tf.Session() as sess:\n",
351 | " sess.run(tf.global_variables_initializer())\n",
352 | " for epoch in range(n_epochs):\n",
353 | " permut = np.random.permutation(len(x))\n",
354 | " for j in range(0, len(x), batch_size):\n",
355 | " batch = permut[j:j+batch_size]\n",
356 | " Xs = x[batch]\n",
357 | " Ys = y[batch]\n",
358 | "\n",
359 | " sess.run(train_step, feed_dict={X: Xs, Y: Ys})\n",
360 | " temp_loss = sess.run(loss, feed_dict={X: Xs, Y: Ys})\n",
361 | " \n",
362 | " if epoch % steps == steps - 1:\n",
363 | " temp_loss = sess.run(loss, feed_dict={X: x, Y: y})\n",
364 | " loss_vec.append(temp_loss)\n",
365 | "\n",
366 | " (A_, b_) = sess.run([A, b])\n",
367 | " print('Epoch #%i A = %s b = %s' % (epoch, np.transpose(A_), b_))\n",
368 | " print('Loss = %.8f' % temp_loss)\n",
369 | " print(\"\")\n",
370 | "\n",
371 | "\n",
372 | " [slope, y_intercept] = sess.run([A, b])\n",
373 | " prediction = sess.run(model_output, feed_dict={X: x})\n",
374 | " mse = mean_squared_error(y, prediction)\n",
375 | " print(\"Mean squared error (on training data): {:.3}\".format(mse))\n",
376 | " rmse = np.sqrt(mse)\n",
377 | " print('RMSE (on training data): {}'.format(rmse))\n",
378 | " r2 = r2_score(y, prediction)\n",
379 | " print(\"R2 (on training data): {:.2}\".format(r2))\n",
380 | "\n",
381 | "best_fit = []\n",
382 | "for i in x:\n",
383 | " best_fit.append(slope[0]*i+y_intercept[0])"
384 | ]
385 | },
386 | {
387 | "cell_type": "code",
388 | "execution_count": null,
389 | "metadata": {},
390 | "outputs": [],
391 | "source": [
392 | "# Plot 1D best fit\n",
393 | "\n",
394 | "fig,ax = plt.subplots()\n",
395 | "ax.set_xlabel(\"Average number of rooms (RM)\")\n",
396 | "ax.set_ylabel(\"House Price\")\n",
397 | "\n",
398 | "ax.scatter(x, y, s=2, label='Data Points')\n",
399 | "ax.plot(x, np.array(best_fit), '-', lw=2, color=\"#f9a602\", label='Best fit line')\n",
400 | "ax.legend(loc='upper left')\n",
401 | "\n",
402 | "fig.savefig('REGRESSION_FIG_06.png')\n",
403 | "\n",
404 | "# Plot loss over time\n",
405 | "plt.figure()\n",
406 | "fig,ax = plt.subplots()\n",
407 | "ax.set_title('Loss per Epoch')\n",
408 | "ax.set_xlabel('Epoch')\n",
409 | "ax.set_ylabel('Loss')\n",
410 | "\n",
411 | "ax.plot(loss_vec, 'k-')\n",
412 | "\n",
413 | "fig.savefig('REGRESSION_FIG_07.png')"
414 | ]
415 | },
416 | {
417 | "cell_type": "markdown",
418 | "metadata": {},
419 | "source": [
420 | "What happens if we move to use all the features?"
421 | ]
422 | },
423 | {
424 | "cell_type": "code",
425 | "execution_count": null,
426 | "metadata": {},
427 | "outputs": [],
428 | "source": [
429 | "# Creation of the scaffolding\n",
430 | "\n",
431 | "import tensorflow as tf\n",
432 | "tf.reset_default_graph()\n",
433 | "\n",
434 | "x = boston.data\n",
435 | "y = np.reshape(boston.target, (-1, 1))\n",
436 | "\n",
437 | "nb_features = x.shape[1]\n",
438 | "\n",
439 | "X = tf.placeholder(shape=[None, nb_features], dtype=tf.float32, name=\"X\")\n",
440 | "Y = tf.placeholder(shape=[None, 1], dtype=tf.float32, name=\"y\")\n",
441 | "\n",
442 | "A = tf.Variable(tf.random_normal(shape=[nb_features, 1]), name=\"A\")\n",
443 | "b = tf.Variable(tf.random_normal(shape=[1,1]), name=\"b\")"
444 | ]
445 | },
446 | {
447 | "cell_type": "code",
448 | "execution_count": null,
449 | "metadata": {},
450 | "outputs": [],
451 | "source": [
452 | "# Creation of the graph\n",
453 | "model_output = tf.matmul(X, A) + b\n",
454 | "\n",
455 | "loss = tf.reduce_mean(tf.square(Y - model_output))\n",
456 | "\n",
457 | "# Uncomment to get Ridge or Lasso\n",
458 | "\"\"\"\n",
459 | "beta = 0.005\n",
460 | "regularizer = tf.nn.l2_loss(A)\n",
461 | "loss = loss + beta * regularizer\n",
462 | "\"\"\"\n",
463 | "\"\"\"\n",
464 | "beta = 0.5\n",
465 | "regularizer = tf.reduce_mean(tf.abs(A))\n",
466 | "loss = loss + beta * regularizer\n",
467 | "\"\"\"\n",
468 | "\n",
469 | "grad_speed = 5e-7\n",
470 | "my_opt = tf.train.GradientDescentOptimizer(grad_speed)\n",
471 | "train_step = my_opt.minimize(loss)"
472 | ]
473 | },
474 | {
475 | "cell_type": "code",
476 | "execution_count": null,
477 | "metadata": {},
478 | "outputs": [],
479 | "source": [
480 | "# Run the optimization\n",
481 | "loss_vec = []\n",
482 | "with tf.Session() as sess:\n",
483 | " sess.run(tf.global_variables_initializer())\n",
484 | " for epoch in range(n_epochs):\n",
485 | " permut = np.random.permutation(len(x))\n",
486 | " for j in range(0, len(x), batch_size):\n",
487 | " batch = permut[j:j+batch_size]\n",
488 | " Xs = x[batch]\n",
489 | " Ys = y[batch]\n",
490 | "\n",
491 | " sess.run(train_step, feed_dict={X: Xs, Y: Ys})\n",
492 | " temp_loss = sess.run(loss, feed_dict={X: Xs, Y: Ys})\n",
493 | " \n",
494 | " if epoch % steps == steps - 1:\n",
495 | " temp_loss = sess.run(loss, feed_dict={X: x, Y: y})\n",
496 | " loss_vec.append(temp_loss)\n",
497 | "\n",
498 | " (A_, b_) = sess.run([A, b])\n",
499 | " print('Epoch #%i A = %s b = %s' % (epoch, np.transpose(A_), b_))\n",
500 | " print('Loss = %.8f' % temp_loss)\n",
501 | " print(\"\")\n",
502 | "\n",
503 | "\n",
504 | " [slope, y_intercept] = sess.run([A, b])\n",
505 | " prediction = sess.run(model_output, feed_dict={X: x})\n",
506 | " mse = mean_squared_error(y, prediction)\n",
507 | " print(\"Mean squared error (on training data): {:.3}\".format(mse))\n",
508 | " rmse = np.sqrt(mse)\n",
509 | " print('RMSE (on training data): {}'.format(rmse))\n",
510 | " r2 = r2_score(y, prediction)\n",
511 | " print(\"R2 (on training data): {:.2}\".format(r2))\n",
512 | "\n",
513 | "best_fit = []\n",
514 | "for i in x:\n",
515 | " best_fit.append(slope[0]*i+y_intercept[0])"
516 | ]
517 | },
518 | {
519 | "cell_type": "code",
520 | "execution_count": null,
521 | "metadata": {},
522 | "outputs": [],
523 | "source": [
524 | "# Plot loss over time\n",
525 | "plt.figure()\n",
526 | "fig,ax = plt.subplots()\n",
527 | "ax.set_title('Loss per Epoch')\n",
528 | "ax.set_xlabel('Epoch')\n",
529 | "ax.set_ylabel('Loss')\n",
530 | "\n",
531 | "ax.plot(loss_vec, 'k-')\n",
532 | "\n",
533 | "fig.savefig('REGRESSION_FIG_08.png')"
534 | ]
535 | },
536 | {
537 | "cell_type": "markdown",
538 | "metadata": {
539 | "collapsed": true
540 | },
541 | "source": [
542 | "## E2006 Dataset"
543 | ]
544 | },
545 | {
546 | "cell_type": "markdown",
547 | "metadata": {},
548 | "source": [
549 | "Load data:\n"
550 | ]
551 | },
552 | {
553 | "cell_type": "code",
554 | "execution_count": null,
555 | "metadata": {},
556 | "outputs": [],
557 | "source": [
558 | "from sklearn.datasets import load_svmlight_file\n",
559 | "data, target = load_svmlight_file('data/E2006.train')"
560 | ]
561 | },
562 | {
563 | "cell_type": "markdown",
564 | "metadata": {},
565 | "source": [
566 | "Compute error on training data to demonstrate that we can obtain near perfect scores:"
567 | ]
568 | },
569 | {
570 | "cell_type": "code",
571 | "execution_count": null,
572 | "metadata": {},
573 | "outputs": [],
574 | "source": [
575 | "lr = LinearRegression()\n",
576 | "lr.fit(data, target)\n",
577 | "pred = lr.predict(data) \n",
578 | "\n",
579 | "print('RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
580 | "print('R2 on training, {:.2}'.format(r2_score(target, pred)))"
581 | ]
582 | },
583 | {
584 | "cell_type": "markdown",
585 | "metadata": {},
586 | "source": [
587 | "However, we do not do so well on cross-validation:"
588 | ]
589 | },
590 | {
591 | "cell_type": "code",
592 | "execution_count": null,
593 | "metadata": {},
594 | "outputs": [],
595 | "source": [
596 | "kf = KFold(n_splits=5)\n",
597 | "pred = cross_val_predict(lr, data, target, cv=kf)\n",
598 | "\n",
599 | "print('RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
600 | "print('R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))"
601 | ]
602 | },
603 | {
604 | "cell_type": "markdown",
605 | "metadata": {},
606 | "source": [
607 | "Now, we try _an Elastic net_:"
608 | ]
609 | },
610 | {
611 | "cell_type": "code",
612 | "execution_count": null,
613 | "metadata": {},
614 | "outputs": [],
615 | "source": [
616 | "# Edit the lines below if you want to switch method: \n",
617 | "met = ElasticNet(alpha=0.1)\n",
618 | "met.fit(data, target)\n",
619 | "pred = met.predict(data)\n",
620 | "\n",
621 | "print('[EN 0.1] RMSE on training: {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
622 | "print('[EN 0.1] R2 on training: {:.2}'.format(r2_score(target, pred)))"
623 | ]
624 | },
625 | {
626 | "cell_type": "markdown",
627 | "metadata": {},
628 | "source": [
629 | "Not a perfect prediction on the training data anymore, but let us check the value on cross-validation:"
630 | ]
631 | },
632 | {
633 | "cell_type": "code",
634 | "execution_count": null,
635 | "metadata": {},
636 | "outputs": [],
637 | "source": [
638 | "pred = cross_val_predict(met, data, target, cv=kf)\n",
639 | "\n",
640 | "print('[EN 0.1] RMSE on testing (5 fold): {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
641 | "print('[EN 0.1] R2 on testing (5 fold): {:.2}'.format(r2_score(target, pred)))"
642 | ]
643 | },
644 | {
645 | "cell_type": "markdown",
646 | "metadata": {},
647 | "source": [
648 | "We now use `ElasticNetCV` to set parameters automatically:"
649 | ]
650 | },
651 | {
652 | "cell_type": "code",
653 | "execution_count": null,
654 | "metadata": {},
655 | "outputs": [],
656 | "source": [
657 | "from sklearn.linear_model import ElasticNetCV\n",
658 | "# Construct an ElasticNetCV object (use all available CPUs)\n",
659 | "met = ElasticNetCV(n_jobs=-1)\n",
660 | "\n",
661 | "met.fit(data, target)\n",
662 | "pred = met.predict(data)\n",
663 | "print('[EN CV] RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
664 | "print('[EN CV] R2 on training, {:.2}'.format(r2_score(target, pred)))\n",
665 | "\n",
666 | "pred = cross_val_predict(met, data, target, cv=kf)\n",
667 | "print('[EN CV] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
668 | "print('[EN CV] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))"
669 | ]
670 | },
671 | {
672 | "cell_type": "markdown",
673 | "metadata": {},
674 | "source": [
675 | "This is a a pretty good general-purpose regression object:"
676 | ]
677 | },
678 | {
679 | "cell_type": "code",
680 | "execution_count": null,
681 | "metadata": {
682 | "scrolled": true
683 | },
684 | "outputs": [],
685 | "source": [
686 | "# Construct an ElasticNetCV object (use all available CPUs)\n",
687 | "met = ElasticNetCV(n_jobs=-1, l1_ratio=[.01, .05, .25, .5, .75, .95, .99])\n",
688 | "\n",
689 | "pred = cross_val_predict(met, data, target, cv=kf)\n",
690 | "\n",
691 | "print('[EN CV l1_ratio] RMSE on testing(5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n",
692 | "print('[EN CV l1_ratio] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))"
693 | ]
694 | },
695 | {
696 | "cell_type": "markdown",
697 | "metadata": {},
698 | "source": [
699 | "Now the final result:"
700 | ]
701 | },
702 | {
703 | "cell_type": "code",
704 | "execution_count": null,
705 | "metadata": {},
706 | "outputs": [],
707 | "source": [
708 | "fig, ax = plt.subplots()\n",
709 | "ax.scatter(target, pred, c='k', s=1)\n",
710 | "ax.plot([-5,-1], [-5,-1], 'r-', lw=2)\n",
711 | "ax.set_xlabel('Actual value')\n",
712 | "ax.set_ylabel('Predicted value')\n",
713 | "fig.savefig('REGRESSION_FIG_05.png')"
714 | ]
715 | },
716 | {
717 | "cell_type": "code",
718 | "execution_count": null,
719 | "metadata": {},
720 | "outputs": [],
721 | "source": []
722 | }
723 | ],
724 | "metadata": {
725 | "anaconda-cloud": {},
726 | "kernelspec": {
727 | "display_name": "Python 3",
728 | "language": "python",
729 | "name": "python3"
730 | },
731 | "language_info": {
732 | "codemirror_mode": {
733 | "name": "ipython",
734 | "version": 3
735 | },
736 | "file_extension": ".py",
737 | "mimetype": "text/x-python",
738 | "name": "python",
739 | "nbconvert_exporter": "python",
740 | "pygments_lexer": "ipython3",
741 | "version": "3.6.5"
742 | }
743 | },
744 | "nbformat": 4,
745 | "nbformat_minor": 2
746 | }
747 |
--------------------------------------------------------------------------------
/Chapter03/data/.gitignore:
--------------------------------------------------------------------------------
1 | E2006.train
2 |
--------------------------------------------------------------------------------
/Chapter03/data/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | curl -O https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/E2006.train.bz2
3 | bunzip2 E2006.train.bz2
4 |
5 |
--------------------------------------------------------------------------------
/Chapter04/data/download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | curl -O https://ia800107.us.archive.org/27/items/stackexchange/stackoverflow.com-Posts.7z
4 |
5 | p7zip -d stackoverflow.com-Posts.7z
6 |
--------------------------------------------------------------------------------
/Chapter06/data/toy/01.txt:
--------------------------------------------------------------------------------
1 | This is a toy post about machine learning. Actually, it contains not much interesting stuff.
--------------------------------------------------------------------------------
/Chapter06/data/toy/02.txt:
--------------------------------------------------------------------------------
1 | Imaging databases provide storage capabilities.
--------------------------------------------------------------------------------
/Chapter06/data/toy/03.txt:
--------------------------------------------------------------------------------
1 | Most imaging databases save images permanently.
2 |
--------------------------------------------------------------------------------
/Chapter06/data/toy/04.txt:
--------------------------------------------------------------------------------
1 | Imaging databases store data.
--------------------------------------------------------------------------------
/Chapter06/data/toy/05.txt:
--------------------------------------------------------------------------------
1 | Imaging databases store data. Imaging databases store data. Imaging databases store data.
--------------------------------------------------------------------------------
/Chapter07/README.rst:
--------------------------------------------------------------------------------
1 | =========
2 | Chapter 8
3 | =========
4 |
5 | Support code for *Chapter 8: Recommendations*.
6 |
7 | The code refers to the second edition of the book and this code has been
8 | significantly refactored when compared to the first one.
9 |
10 | Ratings Prediction
11 | ------------------
12 |
13 | Note that since the partition of the data into training and testing is random,
14 | everytime you run the code, the results will be different.
15 |
16 |
17 | load_ml100k.py
18 | Load data & partition into test/train
19 | norm.py
20 | Normalize the data
21 | corrneighbours.py
22 | Neighbour models based on ncrroaltoin
23 | regression.py
24 | Regression models
25 | stacked.py
26 | Stacked predictions
27 | averaged.py
28 | Averaging of predictions (mentioned in book, but code is not shown there).
29 |
30 | Association Rule Mining
31 | -----------------------
32 |
33 | Check the folder ``apriori/``
34 |
35 | apriori/histogram.py
36 | Print a histogram of how many times each product was bought
37 | apriori/apriori.py
38 | Implementation of Apriori algorithm and association rule building
39 | apriori/apriori_example.py
40 | Example of Apriori algorithm in retail dataset
41 |
42 |
--------------------------------------------------------------------------------
/Chapter07/apriori/.gitignore:
--------------------------------------------------------------------------------
1 | retail.dat.gz
2 |
--------------------------------------------------------------------------------
/Chapter07/apriori/apriori.py:
--------------------------------------------------------------------------------
1 | # This code is supporting material for the book
2 | # Building Machine Learning Systems with Python
3 | # by Willi Richert and Luis Pedro Coelho
4 | # published by PACKT Publishing
5 | #
6 | # It is made available under the MIT License
7 |
8 | from collections import namedtuple
9 |
10 |
11 | def apriori(dataset, minsupport, maxsize):
12 | '''
13 | freqsets, support = apriori(dataset, minsupport, maxsize)
14 |
15 | Parameters
16 | ----------
17 | dataset : sequence of sequences
18 | input dataset
19 | minsupport : int
20 | Minimal support for frequent items
21 | maxsize : int
22 | Maximal size of frequent items to return
23 |
24 | Returns
25 | -------
26 | freqsets : sequence of sequences
27 | support : dictionary
28 | This associates each itemset (represented as a frozenset) with a float
29 | (the support of that itemset)
30 | '''
31 | from collections import defaultdict
32 |
33 | baskets = defaultdict(list)
34 | pointers = defaultdict(list)
35 |
36 | for i, ds in enumerate(dataset):
37 | for ell in ds:
38 | pointers[ell].append(i)
39 | baskets[frozenset([ell])].append(i)
40 |
41 | # Convert pointer items to frozensets to speed up operations later
42 | new_pointers = dict()
43 | for k in pointers:
44 | if len(pointers[k]) >= minsupport:
45 | new_pointers[k] = frozenset(pointers[k])
46 | pointers = new_pointers
47 | for k in baskets:
48 | baskets[k] = frozenset(baskets[k])
49 |
50 |
51 | # Valid are all elements whose support is >= minsupport
52 | valid = set()
53 | for el, c in baskets.items():
54 | if len(c) >= minsupport:
55 | valid.update(el)
56 |
57 | # Itemsets at first iteration are simply all singleton with valid elements:
58 | itemsets = [frozenset([v]) for v in valid]
59 | freqsets = []
60 | for i in range(maxsize - 1):
61 | print("At iteration {}, number of frequent baskets: {}".format(
62 | i, len(itemsets)))
63 | newsets = []
64 | for it in itemsets:
65 | ccounts = baskets[it]
66 |
67 | for v, pv in pointers.items():
68 | if v not in it:
69 | csup = (ccounts & pv)
70 | if len(csup) >= minsupport:
71 | new = frozenset(it | frozenset([v]))
72 | if new not in baskets:
73 | newsets.append(new)
74 | baskets[new] = csup
75 | freqsets.extend(itemsets)
76 | itemsets = newsets
77 | if not len(itemsets):
78 | break
79 | support = {}
80 | for k in baskets:
81 | support[k] = float(len(baskets[k]))
82 | return freqsets, support
83 |
84 |
85 | # A namedtuple to collect all values that may be interesting
86 | AssociationRule = namedtuple('AssociationRule', ['antecendent', 'consequent', 'base', 'py_x', 'lift'])
87 |
88 | def association_rules(dataset, freqsets, support, minlift):
89 | '''
90 | for assoc_rule in association_rules(dataset, freqsets, support, minlift):
91 | ...
92 |
93 | This function takes the returns from ``apriori``.
94 |
95 | Parameters
96 | ----------
97 | dataset : sequence of sequences
98 | input dataset
99 | freqsets : sequence of sequences
100 | support : dictionary
101 | minlift : int
102 | minimal lift of yielded rules
103 |
104 | Returns
105 | -------
106 | assoc_rule : sequence of AssociationRule objects
107 | '''
108 | nr_transactions = float(len(dataset))
109 | freqsets = [f for f in freqsets if len(f) > 1]
110 | for fset in freqsets:
111 | for f in fset:
112 | consequent = frozenset([f])
113 | antecendent = fset - consequent
114 | py_x = support[fset] / support[antecendent]
115 | base = support[consequent] / nr_transactions
116 | lift = py_x / base
117 | if lift > minlift:
118 | yield AssociationRule(antecendent, consequent, base, py_x, lift)
119 |
120 |
--------------------------------------------------------------------------------
/Chapter07/apriori/apriori_example.py:
--------------------------------------------------------------------------------
1 | # This code is supporting material for the book
2 | # Building Machine Learning Systems with Python
3 | # by Willi Richert and Luis Pedro Coelho
4 | # published by PACKT Publishing
5 | #
6 | # It is made available under the MIT License
7 |
8 | from apriori import apriori, association_rules
9 | from gzip import GzipFile
10 |
11 | # Load dataset
12 | dataset = [[int(tok) for tok in line.strip().split()]
13 | for line in GzipFile('retail.dat.gz')]
14 |
15 | freqsets, support = apriori(dataset, 80, maxsize=16)
16 | rules = list(association_rules(dataset, freqsets, support, minlift=30.0))
17 |
18 | rules.sort(key=(lambda ar: -ar.lift))
19 | for ar in rules:
20 | print('{} -> {} (lift = {:.4})'
21 | .format(set(ar.antecendent),
22 | set(ar.consequent),
23 | ar.lift))
24 |
--------------------------------------------------------------------------------
/Chapter07/apriori/apriori_naive.py:
--------------------------------------------------------------------------------
1 | # This code is supporting material for the book
2 | # Building Machine Learning Systems with Python
3 | # by Willi Richert and Luis Pedro Coelho
4 | # published by PACKT Publishing
5 | #
6 | # It is made available under the MIT License
7 |
8 | from collections import defaultdict
9 | from itertools import chain
10 | from gzip import GzipFile
11 | minsupport = 80
12 |
13 | dataset = [[int(tok) for tok in line.strip().split()]
14 | for line in GzipFile('retail.dat.gz')]
15 |
16 | counts = defaultdict(int)
17 | for elem in chain(*dataset):
18 | counts[elem] += 1
19 |
20 | # Only elements that have at least minsupport should be considered.
21 | valid = set(el for el, c in counts.items() if (c >= minsupport))
22 |
23 | # Filter the dataset to contain only valid elements
24 | # (This step is not strictly necessary, but will make the rest of the code
25 | # faster as the itemsets will be smaller):
26 | dataset = [[el for el in ds if (el in valid)] for ds in dataset]
27 |
28 | # Convert to frozenset for fast processing
29 | dataset = [frozenset(ds) for ds in dataset]
30 |
31 | itemsets = [frozenset([v]) for v in valid]
32 | freqsets = itemsets[:]
33 | for i in range(16):
34 | print("At iteration {}, number of frequent baskets: {}".format(
35 | i, len(itemsets)))
36 | nextsets = []
37 |
38 | tested = set()
39 | for it in itemsets:
40 | for v in valid:
41 | if v not in it:
42 | # Create a new candidate set by adding v to it
43 | c = (it | frozenset([v]))
44 |
45 | # Check if we have tested it already:
46 | if c in tested:
47 | continue
48 | tested.add(c)
49 |
50 | # Count support by looping over dataset
51 | # This step is slow.
52 | # Check `apriori.py` for a better implementation.
53 | support_c = sum(1 for d in dataset if d.issuperset(c))
54 | if support_c > minsupport:
55 | nextsets.append(c)
56 | freqsets.extend(nextsets)
57 | itemsets = nextsets
58 | if not len(itemsets):
59 | break
60 | print("Finished!")
61 |
62 |
63 | def rules_from_itemset(itemset, dataset, minlift=1.):
64 | nr_transactions = float(len(dataset))
65 | for item in itemset:
66 | consequent = frozenset([item])
67 | antecedent = itemset-consequent
68 | base = 0.0
69 | # acount: antecedent count
70 | acount = 0.0
71 |
72 | # ccount : consequent count
73 | ccount = 0.0
74 | for d in dataset:
75 | if item in d: base += 1
76 | if d.issuperset(itemset): ccount += 1
77 | if d.issuperset(antecedent): acount += 1
78 | base /= nr_transactions
79 | p_y_given_x = ccount/acount
80 | lift = p_y_given_x / base
81 | if lift > minlift:
82 | print('Rule {0} -> {1} has lift {2}'
83 | .format(antecedent, consequent,lift))
84 |
85 | for itemset in freqsets:
86 | if len(itemset) > 1:
87 | rules_from_itemset(itemset, dataset, minlift=4.)
88 |
--------------------------------------------------------------------------------
/Chapter07/apriori/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | wget http://fimi.ua.ac.be/data/retail.dat.gz
3 |
--------------------------------------------------------------------------------
/Chapter07/data/.gitignore:
--------------------------------------------------------------------------------
1 | retail.dat.gz
2 | ml-100k.zip
3 | /ml-100k/
4 |
--------------------------------------------------------------------------------
/Chapter07/data/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | curl -L -O http://files.grouplens.org/papers/ml-100k.zip
3 | unzip ml-100k.zip
4 | curl -L -O http://fimi.ua.ac.be/data/retail.dat.gz
5 |
--------------------------------------------------------------------------------
/Chapter07/load_ml100k.py:
--------------------------------------------------------------------------------
1 | # This code is supporting material for the book
2 | # Building Machine Learning Systems with Python
3 | # by Willi Richert and Luis Pedro Coelho
4 | # published by PACKT Publishing
5 | #
6 | # It is made available under the MIT License
7 |
8 | def load():
9 | '''Load ML-100k data
10 |
11 | Returns the review matrix as a numpy array'''
12 | import numpy as np
13 | from scipy import sparse
14 | from os import path
15 |
16 | if not path.exists('data/ml-100k/u.data'):
17 | raise IOError("Data has not been downloaded.\nTry the following:\n\n\tcd data\n\t./download.sh")
18 |
19 | # The input is in the form of a CSC sparse matrix, so it's a natural fit to
20 | # load the data, but we then convert to a more traditional array before
21 | # returning
22 | data = np.loadtxt('data/ml-100k/u.data')
23 | ij = data[:, :2]
24 | ij -= 1 # original data is in 1-based system
25 | values = data[:, 2]
26 | reviews = sparse.csc_matrix((values, ij.T)).astype(float)
27 | return reviews.toarray()
28 |
29 | def get_train_test(reviews=None, random_state=None):
30 | '''Split data into training & testing
31 |
32 | Parameters
33 | ----------
34 | reviews : ndarray, optional
35 | Input data
36 |
37 | Returns
38 | -------
39 | train : ndarray
40 | training data
41 | test : ndarray
42 | testing data
43 | '''
44 | import numpy as np
45 | import random
46 | r = random.Random(random_state)
47 |
48 | if reviews is None:
49 | reviews = load()
50 | U,M = np.where(reviews)
51 | test_idxs = np.array(r.sample(range(len(U)), len(U)//10))
52 | train = reviews.copy()
53 | train[U[test_idxs], M[test_idxs]] = 0
54 |
55 | test = np.zeros_like(reviews)
56 | test[U[test_idxs], M[test_idxs]] = reviews[U[test_idxs], M[test_idxs]]
57 |
58 | return train, test
59 |
60 |
--------------------------------------------------------------------------------
/Chapter07/stacked.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import load_ml100k
3 | import regression
4 | import corrneighbours
5 | from sklearn import linear_model, metrics
6 | import norm
7 |
8 | def predict(train):
9 | tr_train,tr_test = load_ml100k.get_train_test(train, random_state=34)
10 | tr_predicted0 = regression.predict(tr_train)
11 | tr_predicted1 = regression.predict(tr_train.T).T
12 | tr_predicted2 = corrneighbours.predict(tr_train)
13 | tr_predicted3 = corrneighbours.predict(tr_train.T).T
14 | tr_predicted4 = norm.predict(tr_train)
15 | tr_predicted5 = norm.predict(tr_train.T).T
16 | stack_tr = np.array([
17 | tr_predicted0[tr_test > 0],
18 | tr_predicted1[tr_test > 0],
19 | tr_predicted2[tr_test > 0],
20 | tr_predicted3[tr_test > 0],
21 | tr_predicted4[tr_test > 0],
22 | tr_predicted5[tr_test > 0],
23 | ]).T
24 |
25 | lr = linear_model.LinearRegression()
26 | lr.fit(stack_tr, tr_test[tr_test > 0])
27 |
28 | stack_te = np.array([
29 | tr_predicted0.ravel(),
30 | tr_predicted1.ravel(),
31 | tr_predicted2.ravel(),
32 | tr_predicted3.ravel(),
33 | tr_predicted4.ravel(),
34 | tr_predicted5.ravel(),
35 | ]).T
36 |
37 | return lr.predict(stack_te).reshape(train.shape)
38 |
39 |
40 | def main():
41 | train,test = load_ml100k.get_train_test(random_state=12)
42 | predicted = predict(train)
43 | r2 = metrics.r2_score(test[test > 0], predicted[test > 0])
44 | print('R2 stacked: {:.2%}'.format(r2))
45 |
46 | if __name__ == '__main__':
47 | main()
48 |
--------------------------------------------------------------------------------
/Chapter09/data/not_authorized.tsv:
--------------------------------------------------------------------------------
1 | 126213333123743744
2 | 126079414986485761
3 | 126076743613284354
4 | 126213333123743744
5 | 126079414986485761
6 | 126076743613284354
7 | 126049183865114624
8 | 125633065757310976
9 | 126213333123743744
10 | 126079414986485761
11 | 126076743613284354
12 | 126049183865114624
13 | 125633065757310976
14 | 126213333123743744
15 | 126079414986485761
16 | 126076743613284354
17 | 126049183865114624
18 | 125633065757310976
19 | 126213333123743744
20 | 126079414986485761
21 | 126076743613284354
22 | 126049183865114624
23 | 125633065757310976
24 | 126213333123743744
25 | 126079414986485761
26 | 126076743613284354
27 | 126049183865114624
28 | 125633065757310976
29 | 126213333123743744
30 | 126079414986485761
31 | 126076743613284354
32 | 126049183865114624
33 | 125633065757310976
34 | 125264731035537409
35 | 126153311521996800
36 | 126121175926571009
37 | 125988395787882497
38 | 125954651152592896
39 | 125799384976863232
40 | 125681375058735104
41 | 125675806977556480
42 | 125673358418391041
43 | 125659125886623744
44 | 126213333123743744
45 | 126079414986485761
46 | 126076743613284354
47 | 126049183865114624
48 | 125633065757310976
49 | 125264731035537409
50 | 126153311521996800
51 | 126121175926571009
52 | 125988395787882497
53 | 125954651152592896
54 | 125799384976863232
55 | 125681375058735104
56 | 125675806977556480
57 | 125673358418391041
58 | 125659125886623744
59 | 125561930416013312
60 | 125475953509015552
61 | 125371779039502336
62 | 125368089159286784
63 | 125334519254482944
64 | 125309427422203904
65 | 126213333123743744
66 | 126079414986485761
67 | 126076743613284354
68 | 126049183865114624
69 | 125633065757310976
70 | 125264731035537409
71 | 126153311521996800
72 | 126121175926571009
73 | 125988395787882497
74 | 125954651152592896
75 | 125799384976863232
76 | 125681375058735104
77 | 125675806977556480
78 | 125673358418391041
79 | 125659125886623744
80 | 125561930416013312
81 | 125475953509015552
82 | 125371779039502336
83 | 125368089159286784
84 | 125334519254482944
85 | 125309427422203904
86 | 125204228967903232
87 | 126213333123743744
88 | 126079414986485761
89 | 126076743613284354
90 | 126049183865114624
91 | 125633065757310976
92 | 125264731035537409
93 | 126153311521996800
94 | 126121175926571009
95 | 125988395787882497
96 | 125954651152592896
97 | 125799384976863232
98 | 125681375058735104
99 | 125675806977556480
100 | 125673358418391041
101 | 125659125886623744
102 | 125561930416013312
103 | 125475953509015552
104 | 125371779039502336
105 | 125368089159286784
106 | 125334519254482944
107 | 125309427422203904
108 | 125204228967903232
109 | 126394795802370049
110 | 126386085164101634
111 | 126382776072146944
112 | 126380323733909504
113 | 126317201962700800
114 | 126229089651654656
115 | 126186795808456704
116 | 126110770864979968
117 | 126039090578735104
118 | 126029114850295809
119 | 126213333123743744
120 | 126079414986485761
121 | 126076743613284354
122 | 126049183865114624
123 | 125633065757310976
124 | 125264731035537409
125 | 126153311521996800
126 | 126121175926571009
127 | 125988395787882497
128 | 125954651152592896
129 | 125799384976863232
130 | 125681375058735104
131 | 125675806977556480
132 | 125673358418391041
133 | 125659125886623744
134 | 125561930416013312
135 | 125475953509015552
136 | 125371779039502336
137 | 125368089159286784
138 | 125334519254482944
139 | 125309427422203904
140 | 125204228967903232
141 | 126394795802370049
142 | 126386085164101634
143 | 126382776072146944
144 | 126380323733909504
145 | 126317201962700800
146 | 126229089651654656
147 | 126186795808456704
148 | 126110770864979968
149 | 126039090578735104
150 | 126029114850295809
151 | 125994997609803776
152 | 125992594395250688
153 | 125988651426512899
154 | 125981074114359297
155 | 125980615664336896
156 | 125958702455988225
157 | 125932876721168384
158 | 125918906215968771
159 | 125725274317914112
160 | 125708240225959936
161 | 125641351848136704
162 | 125630016485732352
163 | 125629788563050496
164 | 125538769632886784
165 | 125347618862792705
166 | 125305567148388352
167 | 125196751387889665
168 | 126213333123743744
169 | 126079414986485761
170 | 126076743613284354
171 | 126049183865114624
172 | 125633065757310976
173 | 125264731035537409
174 | 126153311521996800
175 | 126121175926571009
176 | 125988395787882497
177 | 125954651152592896
178 | 125930962545672192
179 | 125910538550124545
180 | 125797001337122817
181 | 125232405517844481
182 | 126534770095169536
183 | 126520518609350656
184 | 126516914678808578
185 | 126494834449063936
186 | 126494280318582784
187 | 126494100252925954
188 | 126492852615262208
189 | 126488447098695680
190 | 126488384410619906
191 | 126487332865056768
192 | 126532210210783232
193 | 126520550876127232
194 | 126505594290057216
195 | 126497514168922112
196 | 126494895501348864
197 | 126491509527805952
198 | 126528316978102272
199 | 126528078057963520
200 | 126523549493112832
201 | 126520920352358401
202 | 126510284536942592
203 | 126504105530236928
204 | 126499521344712704
205 | 126497100866387969
206 | 126496853742198784
207 | 126494691016441857
208 | 126494569184505856
209 | 126493312650719232
210 | 126487788433584129
211 | 126534127435530240
212 | 126529490582118400
213 | 126528938326495232
214 | 126526465280970752
215 | 126526113131413504
216 | 126519715085549568
217 | 126511257170886656
218 | 126504285436514304
219 | 126497446955188224
220 | 126495762568851456
221 | 126495208505479168
222 | 126494166145437696
223 | 126493860804308992
224 | 126492542610051072
225 | 126490549367738368
226 | 126484213737340928
227 | 126784810755690496
228 | 126700014385897472
229 | 126635317108289536
230 | 126795256225210368
231 | 126789710705213440
232 | 126728277896347649
233 | 126674460131606529
234 | 126671006302617600
235 | 126593636627513344
236 | 126519595682119681
237 | 126796467213058048
238 | 126734290850557952
239 | 126726063484178432
240 | 126679463839801344
241 | 126673062258147328
242 | 126637471676104704
243 | 126622818220785664
244 | 126622165595459584
245 | 126612152579657728
246 | 126506057613848576
247 | 126505970317787136
248 | 126495306681548800
249 | 126879662851887104
250 | 126877171926040576
251 | 126876654118240257
252 | 126876107881455616
253 | 126867350476697601
254 | 126863084433326080
255 | 126857095088840706
256 | 126883243726344193
257 | 126881376074076161
258 | 126858607789740032
259 | 126883335875203072
260 | 126883013236752384
261 | 126882832319651840
262 | 126878130353876992
263 | 126877869547855872
264 | 126875416760815616
265 | 126875059477426176
266 | 126870550546096128
267 | 126868828457144321
268 | 126868429796933632
269 | 126868271625539585
270 | 126867067776405506
271 | 126866413053939712
272 | 126865888724004864
273 | 126865837800951808
274 | 126865038085591041
275 | 126864886402777088
276 | 126864861576704000
277 | 126863772877996034
278 | 126863571912114177
279 | 126862618836221954
280 | 126860955605934081
281 | 126859710740701185
282 | 126853913591808002
283 | 126882080050262016
284 | 126881227729928193
285 | 126879417220874240
286 | 126875034135433216
287 | 126874145408561152
288 | 126873260385239040
289 | 126872615380987905
290 | 126872361462005760
291 | 126872241693667328
292 | 126872199620591617
293 | 126869762763522049
294 | 126868924590600192
295 | 126868586882007041
296 | 126868349396324352
297 | 126867170742374400
298 | 126866474806673408
299 | 126866312130609152
300 | 126865987365634048
301 | 126864954140803072
302 | 126864673416032256
303 | 126863938339094531
304 | 126862853822099456
305 | 126862343148802048
306 | 126860270181171201
307 | 126860114610241536
308 | 126859857604247552
309 | 126506057613848576
310 | 126505970317787136
311 | 126495306681548800
312 | 126879662851887104
313 | 126877171926040576
314 | 126876654118240257
315 | 126876107881455616
316 | 126867350476697601
317 | 126863084433326080
318 | 126857095088840706
319 | 126883243726344193
320 | 126881376074076161
321 | 126858607789740032
322 | 126883335875203072
323 | 126883013236752384
324 | 126882832319651840
325 | 126878130353876992
326 | 126877869547855872
327 | 126875416760815616
328 | 126875059477426176
329 | 126870550546096128
330 | 126868828457144321
331 | 126868429796933632
332 | 126868271625539585
333 | 126867067776405506
334 | 126866413053939712
335 | 126865888724004864
336 | 126865837800951808
337 | 126865038085591041
338 | 126864886402777088
339 | 126864861576704000
340 | 126863772877996034
341 | 126863571912114177
342 | 126862618836221954
343 | 126860955605934081
344 | 126859710740701185
345 | 126853913591808002
346 | 126882080050262016
347 | 126881227729928193
348 | 126879417220874240
349 | 126875034135433216
350 | 126874145408561152
351 | 126873260385239040
352 | 126872615380987905
353 | 126872361462005760
354 | 126872241693667328
355 | 126872199620591617
356 | 126869762763522049
357 | 126868924590600192
358 | 126868586882007041
359 | 126868349396324352
360 | 126867170742374400
361 | 126866474806673408
362 | 126866312130609152
363 | 126865987365634048
364 | 126864954140803072
365 | 126864673416032256
366 | 126863938339094531
367 | 126862853822099456
368 | 126862343148802048
369 | 126860270181171201
370 | 126860114610241536
371 | 126859857604247552
372 | 126404574230740992
373 | 126350302113824769
374 | 126148685737361408
375 | 126040352237961217
376 | 125995158679461888
377 | 125960325437722624
378 | 125643107260829697
379 | 125608381431025664
380 | 125523414298533888
381 | 125374540107886593
382 | 126405405667627008
383 | 126391082308206593
384 | 125945821240885248
385 | 125943204943114240
386 | 125476730067615744
387 | 125369698840887297
388 | 125202037293064192
389 | 126405821482532864
390 | 126405160934178816
391 | 126379730827083776
392 | 126370776013213697
393 | 126243528832593920
394 | 126225922159427584
395 | 126219340214304768
396 | 126113944891949056
397 | 126061182720278528
398 | 126042506717704192
399 | 126041773356232704
400 | 126016405085757440
401 | 126012833128390656
402 | 126009386022879232
403 | 125943078837161984
404 | 125887065861787648
405 | 125866627337162752
406 | 125866368758333440
407 | 125859792802693120
408 | 125250078108684288
409 | 126385587740610563
410 | 126360606042374144
411 | 126346705292640257
412 | 126260304819662849
413 | 126236984644612096
414 | 125973789526863872
415 | 125967413299773440
416 | 125957826500771840
417 | 125862601677737985
418 | 125699684693065728
419 | 125346522618535937
420 | 126525172969766912
421 | 126514474378203136
422 | 126511000907288576
423 | 126499965869625345
424 | 126497655785402368
425 | 126493192110612480
426 | 126489713782685696
427 | 126489263025033216
428 | 126496987192373248
429 | 126491870900666368
430 | 126491480087986176
431 | 126532019999096832
432 | 126531893649874945
433 | 126520914413236224
434 | 126520531934654465
435 | 126512842194161664
436 | 126509135842914304
437 | 126506232432439296
438 | 126492945057718272
439 | 126486051530354689
440 | 126497618258964480
441 | 126496237879959553
442 | 126492339559608320
443 | 126491356481859585
444 | 126487422249861120
445 | 126487385461633024
446 | 126779217911349248
447 | 126670032951443456
448 | 126583473929588736
449 | 126574432159408129
450 | 126803763603312640
451 | 126794825998663680
452 | 126732384602296320
453 | 126642779064504320
454 | 126611604925194240
455 | 126591976408748032
456 | 126798811262763009
457 | 126761498885361664
458 | 126759986780057600
459 | 126752126880858112
460 | 126749587133308928
461 | 126745438136176640
462 | 126701862383661056
463 | 126689077230698496
464 | 126680181359378432
465 | 126679552310251521
466 | 126611107266834433
467 | 126610365852303361
468 | 126601340242767872
469 | 126880912754475008
470 | 126877362632667136
471 | 126862735953768448
472 | 126858393909608448
473 | 126870358816067584
474 | 126869855621218304
475 | 126866003094290434
476 | 126864575508381696
477 | 126881380503273472
478 | 126881167541665792
479 | 126880571233280000
480 | 126880429256093696
481 | 126880253145657344
482 | 126879867731062784
483 | 126879122298372097
484 | 126877998115852288
485 | 126877965064740864
486 | 126876452762296321
487 | 126876009797656576
488 | 126875887093293056
489 | 126874662268452864
490 | 126872221292576768
491 | 126871857277308930
492 | 126871511326924800
493 | 126871066760065024
494 | 126870943489466368
495 | 126868570226425856
496 | 126867320005066752
497 | 126866353561927680
498 | 126865005009309696
499 | 126861011813810176
500 | 126860964992794624
501 | 126859978941276161
502 | 126882743819833345
503 | 126881169169063937
504 | 126880644105121792
505 | 126880556775522304
506 | 126879958529343488
507 | 126879219484606464
508 | 126878250541645824
509 | 126877540928331777
510 | 126876463965278208
511 | 126874165105008640
512 | 126873756437200896
513 | 126873447912587264
514 | 126873004494954496
515 | 126872365211725824
516 | 126871907302785024
517 | 126871831583002625
518 | 126867611546943490
519 | 126867000030007296
520 | 126866827715420160
521 | 126866759792852992
522 | 126865704380153856
523 | 126865416671862785
524 | 126862832489861121
525 | 126862595117424641
526 | 126862150265352193
527 | 126871857277308930
528 | 126871511326924800
529 | 126871066760065024
530 | 126870943489466368
531 | 126868570226425856
532 | 126867320005066752
533 | 126866353561927680
534 | 126865005009309696
535 | 126861011813810176
536 | 126860964992794624
537 | 126859978941276161
538 | 126882743819833345
539 | 126881169169063937
540 | 126880644105121792
541 | 126880556775522304
542 | 126879958529343488
543 | 126879219484606464
544 | 126878250541645824
545 | 126877540928331777
546 | 126876463965278208
547 | 126874165105008640
548 | 126873756437200896
549 | 126873447912587264
550 | 126873004494954496
551 | 126872365211725824
552 | 126877263311536128
553 | 126870792960086018
554 | 126877263311536128
555 | 126870792960086018
556 |
--------------------------------------------------------------------------------
/Chapter09/twitterauth.py:
--------------------------------------------------------------------------------
1 | # This code is supporting material for the book
2 | # Building Machine Learning Systems with Python
3 | # by Willi Richert and Luis Pedro Coelho
4 | # published by PACKT Publishing
5 | #
6 | # It is made available under the MIT License
7 |
8 | import sys
9 |
10 | CONSUMER_KEY = None
11 | CONSUMER_SECRET = None
12 |
13 | ACCESS_TOKEN_KEY = None
14 | ACCESS_TOKEN_SECRET = None
15 |
16 | if CONSUMER_KEY is None or CONSUMER_SECRET is None or ACCESS_TOKEN_KEY is None or ACCESS_TOKEN_SECRET is None:
17 | print("""\
18 | When doing last code sanity checks for the book, Twitter
19 | was using the API 1.0, which did not require authentication.
20 | With its switch to version 1.1, this has now changed.
21 |
22 | It seems that you don't have already created your personal Twitter
23 | access keys and tokens. Please do so at https://dev.twitter.com
24 | and paste the keys/secrets into twitterauth.py.
25 |
26 | Sorry for the inconvenience,
27 | The authors.""")
28 |
29 | sys.exit(1)
30 |
--------------------------------------------------------------------------------
/Chapter10/README.rst:
--------------------------------------------------------------------------------
1 | =========
2 | Chapter 4
3 | =========
4 |
5 | Support code for *Chapter 4: Topic Modeling*
6 |
7 |
8 | AP Data
9 | -------
10 |
11 | To download the AP data, use the ``download_ap.sh`` script inside the ``data``
12 | directory::
13 |
14 | cd data
15 | ./download_ap.sh
16 |
17 | Word cloud creation
18 | -------------------
19 |
20 | Word cloud creation requires that ``pytagcloud`` be installed (in turn, this
21 | requires ``pygame``). Since this is not an essential part of the chapter, the
22 | code will work even if you have not installed it (naturally, the cloud image
23 | will not be generated and a warning will be printed).
24 |
25 |
26 | Wikipedia processing
27 | --------------------
28 |
29 | You will need **a lot of disk space**. The download of the Wikipedia text is
30 | 11GB and preprocessing it takes another 24GB to save it in the intermediate
31 | format that gensim uses for a total of 34GB!
32 |
33 | Run the following two commands inside the ``data/`` directory::
34 |
35 | ./download_wp.sh
36 | ./preprocess-wikidata.sh
37 |
38 | As the filenames indicate, the first step will download the data and the second
39 | one will preprocess it. Preprocessing can take several hours, but it is
40 | feasible to run it on a modern laptop. Once the second step is finished, you
41 | may remove the input file if you want to save disk space
42 | (``data/enwiki-latest-pages-articles.xml.bz2``).
43 |
44 | To generate the model, you can run the ``wikitopics_create.py`` script, while
45 | the ``wikitopics_plot.py`` script will plot the most heavily discussed topic as
46 | well as the least heavily discussed one. The code is split into steps as the
47 | first one can take a very long time. Then it saves the results so that you can
48 | later explore them at leisure.
49 |
50 | You should not expect that your results will exactly match the results in the
51 | book, for two reasons:
52 |
53 | 1. The LDA algorithm is a probabilistic algorithm and can give different
54 | results every time it is run.
55 | 2. Wikipedia keeps changing. Thus, even your input data will be different.
56 |
57 | Scripts
58 | -------
59 |
60 | blei_lda.py
61 | Computes LDA using the AP Corpus.
62 | wikitopics_create.py
63 | Create the topic model for Wikipedia using LDA (must download wikipedia database first)
64 | wikitopics_create_hdp.py
65 | Create the topic model for Wikipedia using HDP (must download wikipedia database first)
66 |
--------------------------------------------------------------------------------
/Chapter10/Topic modeling.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Topic Modeling\n",
8 | "\n",
9 | "We start with importing `gensim`"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "**IMPORTANT**: You cannot run this example only from within the notebook. You must first download the data on the command line."
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "import gensim\n",
28 | "from gensim import corpora, models, matutils"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "Now the usual imports:"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {
42 | "collapsed": true
43 | },
44 | "outputs": [],
45 | "source": [
46 | "import matplotlib.pyplot as plt\n",
47 | "import numpy as np\n",
48 | "from os import path\n",
49 | "\n",
50 | "\n",
51 | "# Check that data exists\n",
52 | "if not path.exists('./data/ap/ap.dat'):\n",
53 | " print('Error: Expected data to be present at data/ap/')\n",
54 | " print('Please cd into ./data & run ./download_ap.sh')\n",
55 | "\n"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "We will generate 100 topics as in the book, but you can changes this setting here:"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "metadata": {
69 | "collapsed": true
70 | },
71 | "outputs": [],
72 | "source": [
73 | "NUM_TOPICS = 100"
74 | ]
75 | },
76 | {
77 | "cell_type": "markdown",
78 | "metadata": {},
79 | "source": [
80 | "Load the data"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": null,
86 | "metadata": {
87 | "collapsed": true
88 | },
89 | "outputs": [],
90 | "source": [
91 | "corpus = corpora.BleiCorpus('./data/ap/ap.dat', './data/ap/vocab.txt')"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "metadata": {},
97 | "source": [
98 | "Build the LDA model"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": null,
104 | "metadata": {
105 | "scrolled": true
106 | },
107 | "outputs": [],
108 | "source": [
109 | "model = models.ldamodel.LdaModel(\n",
110 | " corpus, num_topics=NUM_TOPICS, id2word=corpus.id2word, alpha=None)"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": null,
116 | "metadata": {},
117 | "outputs": [],
118 | "source": [
119 | "num_topics_used = [len(model[doc]) for doc in corpus]\n",
120 | "fig,ax = plt.subplots()\n",
121 | "ax.hist(num_topics_used, np.arange(42))\n",
122 | "ax.set_ylabel('Nr of documents')\n",
123 | "ax.set_xlabel('Nr of topics')\n",
124 | "fig.tight_layout()\n",
125 | "fig.savefig('Figure_04_01.png')\n",
126 | "fig"
127 | ]
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "metadata": {
132 | "collapsed": true
133 | },
134 | "source": [
135 | "We can do the same after changing the $\\alpha$ value: "
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": null,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "ALPHA = 1.0\n",
145 | "\n",
146 | "model1 = models.ldamodel.LdaModel(\n",
147 | " corpus, num_topics=NUM_TOPICS, id2word=corpus.id2word, alpha=ALPHA)\n",
148 | "num_topics_used1 = [len(model1[doc]) for doc in corpus]\n",
149 | "\n",
150 | "fig,ax = plt.subplots()\n",
151 | "ax.hist([num_topics_used, num_topics_used1], np.arange(42))\n",
152 | "ax.set_ylabel('Nr of documents')\n",
153 | "ax.set_xlabel('Nr of topics')\n",
154 | "\n",
155 | "# The coordinates below were fit by trial and error to look good\n",
156 | "ax.text(9, 223, r'default alpha')\n",
157 | "ax.text(26, 156, 'alpha=1.0')\n",
158 | "fig.tight_layout()\n",
159 | "fig.savefig('Figure_04_02.png')\n",
160 | "fig"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "### Exploring the topic model\n",
168 | "\n",
169 | "We can explore the mathematical structure of the topics:\n"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": null,
175 | "metadata": {},
176 | "outputs": [],
177 | "source": [
178 | "doc = corpus.docbyoffset(0)\n",
179 | "topics = model[doc]\n",
180 | "print(topics)"
181 | ]
182 | },
183 | {
184 | "cell_type": "markdown",
185 | "metadata": {},
186 | "source": [
187 | "This is not very informative, however. Another way to explore is to identify the most discussed topic, i.e., the one with the highest total weight:"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": null,
193 | "metadata": {
194 | "collapsed": true
195 | },
196 | "outputs": [],
197 | "source": [
198 | "topics = matutils.corpus2dense(model[corpus], num_terms=model.num_topics)\n",
199 | "weight = topics.sum(1)\n",
200 | "max_topic = weight.argmax()"
201 | ]
202 | },
203 | {
204 | "cell_type": "markdown",
205 | "metadata": {},
206 | "source": [
207 | "Get the top 64 words for this topic.\n",
208 | "Without the argument, show_topic would return only 10 words"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {
215 | "collapsed": true
216 | },
217 | "outputs": [],
218 | "source": [
219 | "words = model.show_topic(max_topic, 64)"
220 | ]
221 | },
222 | {
223 | "cell_type": "markdown",
224 | "metadata": {},
225 | "source": [
226 | "One way to visualize the results is to build a _word cloud_. For this we use the `wordcloud` module:"
227 | ]
228 | },
229 | {
230 | "cell_type": "code",
231 | "execution_count": null,
232 | "metadata": {},
233 | "outputs": [],
234 | "source": [
235 | "from wordcloud import WordCloud\n",
236 | "\n",
237 | "wc = WordCloud(background_color='white', max_words=30, width=600, height=600)\n",
238 | "wc = wc.generate_from_frequencies(dict(words))\n",
239 | "\n",
240 | "\n",
241 | "fig,ax = plt.subplots()\n",
242 | "\n",
243 | "ax.imshow(wc, interpolation=\"bilinear\")\n",
244 | "fig"
245 | ]
246 | },
247 | {
248 | "cell_type": "markdown",
249 | "metadata": {},
250 | "source": [
251 | "# NEWS DATA"
252 | ]
253 | },
254 | {
255 | "cell_type": "markdown",
256 | "metadata": {},
257 | "source": [
258 | "Now, repeat the same exercise using alpha=1.0.\n",
259 | "\n",
260 | "You can edit the constant below to play around with this parameter"
261 | ]
262 | },
263 | {
264 | "cell_type": "code",
265 | "execution_count": null,
266 | "metadata": {},
267 | "outputs": [],
268 | "source": [
269 | "import nltk.stem\n",
270 | "\n",
271 | "nltk.download('stopwords')"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": null,
277 | "metadata": {
278 | "collapsed": true
279 | },
280 | "outputs": [],
281 | "source": [
282 | "english_stemmer = nltk.stem.SnowballStemmer('english')\n",
283 | "stopwords = set(nltk.corpus.stopwords.words('english'))\n",
284 | "stopwords.update(['from:', 'subject:', 'writes:', 'writes'])"
285 | ]
286 | },
287 | {
288 | "cell_type": "markdown",
289 | "metadata": {},
290 | "source": [
291 | "We need to add a little adaptor class:"
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": null,
297 | "metadata": {
298 | "collapsed": true
299 | },
300 | "outputs": [],
301 | "source": [
302 | "class DirectText(corpora.textcorpus.TextCorpus):\n",
303 | "\n",
304 | " def get_texts(self):\n",
305 | " return self.input\n",
306 | "\n",
307 | " def __len__(self):\n",
308 | " return len(self.input)\n"
309 | ]
310 | },
311 | {
312 | "cell_type": "markdown",
313 | "metadata": {},
314 | "source": [
315 | "Load the data"
316 | ]
317 | },
318 | {
319 | "cell_type": "code",
320 | "execution_count": null,
321 | "metadata": {},
322 | "outputs": [],
323 | "source": [
324 | "import sklearn.datasets\n",
325 | "dataset = sklearn.datasets.load_mlcomp(\"20news-18828\", \"train\",\n",
326 | " mlcomp_root='./data')\n"
327 | ]
328 | },
329 | {
330 | "cell_type": "markdown",
331 | "metadata": {},
332 | "source": [
333 | "We preprocess the data to split the data into words and remove stopwords:"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": null,
339 | "metadata": {
340 | "collapsed": true
341 | },
342 | "outputs": [],
343 | "source": [
344 | "otexts = dataset.data\n",
345 | "texts = dataset.data\n",
346 | "\n",
347 | "texts = [t.decode('utf-8', 'ignore') for t in texts]\n",
348 | "texts = [t.split() for t in texts]\n",
349 | "texts = [map(lambda w: w.lower(), t) for t in texts]\n",
350 | "texts = [filter(lambda s: not len(set(\"+-.?!()>@012345689\") & set(s)), t)\n",
351 | " for t in texts]\n",
352 | "texts = [filter(lambda s: (len(s) > 3) and (s not in stopwords), t)\n",
353 | " for t in texts]\n",
354 | "texts = [[english_stemmer.stem(w) for w in t] for t in texts]"
355 | ]
356 | },
357 | {
358 | "cell_type": "markdown",
359 | "metadata": {},
360 | "source": [
361 | "We also remove words that are _too common_:"
362 | ]
363 | },
364 | {
365 | "cell_type": "code",
366 | "execution_count": null,
367 | "metadata": {
368 | "collapsed": true
369 | },
370 | "outputs": [],
371 | "source": [
372 | "from collections import defaultdict\n",
373 | "usage = defaultdict(int)\n",
374 | "for t in texts:\n",
375 | " for w in set(t):\n",
376 | " usage[w] += 1\n",
377 | "limit = len(texts) / 10\n",
378 | "too_common = [w for w in usage if usage[w] > limit]\n",
379 | "too_common = set(too_common)\n",
380 | "texts = [[w for w in t if w not in too_common] for t in texts]"
381 | ]
382 | },
383 | {
384 | "cell_type": "code",
385 | "execution_count": null,
386 | "metadata": {
387 | "scrolled": true
388 | },
389 | "outputs": [],
390 | "source": [
391 | "corpus = DirectText(texts)\n",
392 | "dictionary = corpus.dictionary\n",
393 | "try:\n",
394 | " dictionary['computer']\n",
395 | "except:\n",
396 | " pass\n",
397 | "\n",
398 | "model = models.ldamodel.LdaModel(\n",
399 | " corpus, num_topics=100, id2word=dictionary.id2token)\n",
400 | "\n",
401 | "thetas = np.zeros((len(texts), 100))\n",
402 | "for i, c in enumerate(corpus):\n",
403 | " for ti, v in model[c]:\n",
404 | " thetas[i, ti] += v"
405 | ]
406 | },
407 | {
408 | "cell_type": "markdown",
409 | "metadata": {},
410 | "source": [
411 | "We compare all documents to each other **by the topics the contain**:"
412 | ]
413 | },
414 | {
415 | "cell_type": "code",
416 | "execution_count": null,
417 | "metadata": {},
418 | "outputs": [],
419 | "source": [
420 | "from scipy.spatial import distance\n",
421 | "distances = distance.squareform(distance.pdist(thetas))\n",
422 | "large = distances.max() + 1\n",
423 | "for i in range(len(distances)):\n",
424 | " distances[i, i] = large\n",
425 | "\n",
426 | "print(otexts[1])\n",
427 | "print()\n",
428 | "print()\n",
429 | "print()\n",
430 | "print(otexts[distances[1].argmin()])"
431 | ]
432 | },
433 | {
434 | "cell_type": "markdown",
435 | "metadata": {
436 | "collapsed": true
437 | },
438 | "source": [
439 | "# Modeling Wikipedia"
440 | ]
441 | },
442 | {
443 | "cell_type": "markdown",
444 | "metadata": {},
445 | "source": [
446 | "Load the data\n",
447 | "\n",
448 | "Note that you **must have run the `wikitopics_create.py` script**. This will take a few hours"
449 | ]
450 | },
451 | {
452 | "cell_type": "code",
453 | "execution_count": null,
454 | "metadata": {},
455 | "outputs": [],
456 | "source": [
457 | "import gensim\n",
458 | "if not path.exists('wiki_lda.pkl'):\n",
459 | " import sys\n",
460 | " sys.stderr.write('''\\\n",
461 | "This script must be run after wikitopics_create.py!\n",
462 | "\n",
463 | "That script creates and saves the LDA model (this must onlly be done once).\n",
464 | "This script is responsible for the analysis.''')\n",
465 | " \n",
466 | "# Load the preprocessed Wikipedia corpus (id2word and mm)\n",
467 | "id2word = gensim.corpora.Dictionary.load_from_text(\n",
468 | " 'data/wiki_en_output_wordids.txt.bz2')\n",
469 | "mm = gensim.corpora.MmCorpus('data/wiki_en_output_tfidf.mm')\n",
470 | "\n",
471 | "# Load the precomputed model\n",
472 | "model = gensim.models.ldamodel.LdaModel.load('wiki_lda.pkl')\n",
473 | "\n",
474 | "topics = np.load('topics.npy', mmap_mode='r')"
475 | ]
476 | },
477 | {
478 | "cell_type": "markdown",
479 | "metadata": {},
480 | "source": [
481 | "Compute the number of topics mentioned in each document\n"
482 | ]
483 | },
484 | {
485 | "cell_type": "code",
486 | "execution_count": null,
487 | "metadata": {},
488 | "outputs": [],
489 | "source": [
490 | "lens = (topics > 0).sum(axis=1)\n",
491 | "print('Mean number of topics mentioned: {0:.3}'.format(np.mean(lens)))\n",
492 | "print('Percentage of articles mentioning less than 10 topics: {0:.1%}'.format(np.mean(lens <= 10)))\n",
493 | "\n",
494 | "# Weights will be the total weight of each topic\n",
495 | "weights = topics.sum(0)\n",
496 | "\n"
497 | ]
498 | },
499 | {
500 | "cell_type": "markdown",
501 | "metadata": {},
502 | "source": [
503 | "Retrieve the most heavily used topic and plot it as a word cloud:\n"
504 | ]
505 | },
506 | {
507 | "cell_type": "code",
508 | "execution_count": null,
509 | "metadata": {},
510 | "outputs": [],
511 | "source": [
512 | "words = model.show_topic(weights.argmax(), 64)\n",
513 | "\n",
514 | "wc = WordCloud(background_color='white', max_words=30, width=600, height=600)\n",
515 | "wc = wc.generate_from_frequencies(dict(words))\n",
516 | "\n",
517 | "fig,ax = plt.subplots()\n",
518 | "\n",
519 | "ax.imshow(wc, interpolation=\"bilinear\")\n",
520 | "fig"
521 | ]
522 | },
523 | {
524 | "cell_type": "code",
525 | "execution_count": null,
526 | "metadata": {},
527 | "outputs": [],
528 | "source": [
529 | "fraction_mention = np.mean(topics[:,weights.argmax()] > 0)\n",
530 | "print(\"The most mentioned topics is mentioned in {:.1%} of documents.\".format(fraction_mention))\n",
531 | "total_weight = np.mean(topics[:,weights.argmax()])\n",
532 | "print(\"It represents {:.1%} of the total number of words.\".format(total_weight))\n"
533 | ]
534 | },
535 | {
536 | "cell_type": "markdown",
537 | "metadata": {},
538 | "source": [
539 | "Retrieve the **least** heavily used topic and plot it as a word cloud:"
540 | ]
541 | },
542 | {
543 | "cell_type": "code",
544 | "execution_count": null,
545 | "metadata": {},
546 | "outputs": [],
547 | "source": [
548 | "words = model.show_topic(weights.argmin(), 64)\n",
549 | "\n",
550 | "wc = WordCloud(background_color='white', max_words=30, width=600, height=600)\n",
551 | "wc = wc.generate_from_frequencies(dict(words))\n",
552 | "fig,ax = plt.subplots()\n",
553 | "\n",
554 | "ax.imshow(wc, interpolation=\"bilinear\")\n",
555 | "fig"
556 | ]
557 | },
558 | {
559 | "cell_type": "markdown",
560 | "metadata": {},
561 | "source": [
562 | "Again, we can measure how often this topic used:"
563 | ]
564 | },
565 | {
566 | "cell_type": "code",
567 | "execution_count": null,
568 | "metadata": {},
569 | "outputs": [],
570 | "source": [
571 | "fraction_mention = np.mean(topics[:,weights.argmin()] > 0)\n",
572 | "print(\"The least mentioned topics is mentioned in {:.1%} of documents.\".format(fraction_mention))\n",
573 | "total_weight = np.mean(topics[:,weights.argmin()])\n",
574 | "print(\"It represents {:.1%} of the total number of words.\".format(total_weight))"
575 | ]
576 | }
577 | ],
578 | "metadata": {
579 | "kernelspec": {
580 | "display_name": "Python 3",
581 | "language": "python",
582 | "name": "python3"
583 | },
584 | "language_info": {
585 | "codemirror_mode": {
586 | "name": "ipython",
587 | "version": 3
588 | },
589 | "file_extension": ".py",
590 | "mimetype": "text/x-python",
591 | "name": "python",
592 | "nbconvert_exporter": "python",
593 | "pygments_lexer": "ipython3",
594 | "version": "3.6.2"
595 | }
596 | },
597 | "nbformat": 4,
598 | "nbformat_minor": 2
599 | }
600 |
--------------------------------------------------------------------------------
/Chapter10/data/.gitignore:
--------------------------------------------------------------------------------
1 | ap.tgz
2 | ap/
3 | dataset-379-20news-18828_HJRZF.zip
4 | 379/
5 | enwiki-latest-pages-articles.xml.bz2
6 | wiki_en_output_bow.mm
7 | wiki_en_output_bow.mm.gz
8 | wiki_en_output_bow.mm.index
9 | wiki_en_output_tfidf.mm
10 | wiki_en_output_tfidf.mm.gz
11 | wiki_en_output_tfidf.mm.index
12 | wiki_en_output_wordids.txt.bz2
13 |
--------------------------------------------------------------------------------
/Chapter10/data/download_ap.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | wget http://www.cs.columbia.edu/~blei/lda-c/ap.tgz
3 | tar xzf ap.tgz
4 |
--------------------------------------------------------------------------------
/Chapter10/data/download_wp.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | wget http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
3 |
--------------------------------------------------------------------------------
/Chapter10/data/preprocess-wikidata.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | python -m gensim.scripts.make_wiki enwiki-latest-pages-articles.xml.bz2 wiki_en_output
4 |
--------------------------------------------------------------------------------
/Chapter10/wikitopics_create.py:
--------------------------------------------------------------------------------
1 | # This code is supporting material for the book
2 | # Building Machine Learning Systems with Python
3 | # by Willi Richert and Luis Pedro Coelho
4 | # published by PACKT Publishing
5 | #
6 | # It is made available under the MIT License
7 |
8 | from __future__ import print_function
9 | import logging
10 | import gensim
11 | import numpy as np
12 |
13 | NR_OF_TOPICS = 100
14 |
15 | # Set up logging in order to get progress information as the model is being built:
16 | logging.basicConfig(
17 | format='%(asctime)s : %(levelname)s : %(message)s',
18 | level=logging.INFO)
19 |
20 | # Load the preprocessed corpus (id2word & mm):
21 | id2word = gensim.corpora.Dictionary.load_from_text(
22 | 'data/wiki_en_output_wordids.txt.bz2')
23 | mm = gensim.corpora.MmCorpus('data/wiki_en_output_tfidf.mm')
24 |
25 | # Calling the constructor is enough to build the model
26 | # This call will take a few hours!
27 | model = gensim.models.ldamodel.LdaModel(
28 | corpus=mm,
29 | id2word=id2word,
30 | num_topics=NR_OF_TOPICS,
31 | update_every=1,
32 | chunksize=10000,
33 | passes=1)
34 |
35 | # Save the model so we do not need to learn it again.
36 | model.save('wiki_lda.pkl')
37 |
38 | # Compute the document/topic matrix
39 | topics = np.zeros((len(mm), model.num_topics))
40 | for di,doc in enumerate(mm):
41 | doc_top = model[doc]
42 | for ti,tv in doc_top:
43 | topics[di,ti] += tv
44 | np.save('topics.npy', topics)
45 |
46 | # Alternatively, we create a sparse matrix and save that. This alternative
47 | # saves disk space, at the cost of slightly more complex code:
48 |
49 | ## from scipy import sparse, io
50 | ## sp = sparse.csr_matrix(topics)
51 | ## io.savemat('topics.mat', {'topics': sp})
52 |
--------------------------------------------------------------------------------
/Chapter10/wikitopics_create_hdp.py:
--------------------------------------------------------------------------------
1 | # This code is supporting material for the book
2 | # Building Machine Learning Systems with Python
3 | # by Willi Richert and Luis Pedro Coelho
4 | # published by PACKT Publishing
5 | #
6 | # It is made available under the MIT License
7 |
8 | from __future__ import print_function
9 | import logging
10 | import gensim
11 | import numpy as np
12 |
13 | # Set up logging in order to get progress information as the model is being built:
14 | logging.basicConfig(
15 | format='%(asctime)s : %(levelname)s : %(message)s',
16 | level=logging.INFO)
17 |
18 | # Load the preprocessed corpus (id2word & mm):
19 | id2word = gensim.corpora.Dictionary.load_from_text(
20 | 'data/wiki_en_output_wordids.txt.bz2')
21 | mm = gensim.corpora.MmCorpus('data/wiki_en_output_tfidf.mm')
22 |
23 | # Calling the constructor is enough to build the model
24 | # This call will take a few hours!
25 | model = gensim.models.hdpmodel.HdpModel(
26 | corpus=mm,
27 | id2word=id2word,
28 | chunksize=10000)
29 |
30 | # Save the model so we do not need to learn it again.
31 | model.save('wiki_hdp.pkl')
32 |
33 | # Compute the document/topic matrix
34 | topics = np.zeros((len(mm), model.num_topics))
35 | for di,doc in enumerate(mm):
36 | doc_top = model[doc]
37 | for ti,tv in doc_top:
38 | topics[di,ti] += tv
39 | np.save('topics_hdp.npy', topics)
40 |
--------------------------------------------------------------------------------
/Chapter12/README.rst:
--------------------------------------------------------------------------------
1 | ==========
2 | Chapter 10
3 | ==========
4 |
5 | Support code for *Chapter 10: Pattern Recognition & Computer Vision*
6 |
7 | Data
8 | ----
9 |
10 | This chapter relies on a publicly available dataset (which can be downloaded
11 | using the ``download.sh`` script inside the ``data/`` directory) as well the
12 | dataset that is packaged with the repository at ``../SimpleImageDataset/``.
13 |
14 | Running ``download.sh`` will retrieve the other dataset into a directory
15 | ``AnimTransDistr/``.
16 |
17 | Scripts
18 | -------
19 |
20 | chapter.py
21 | Code as written in the book.
22 | thresholded_figure.py
23 | Computes the thresholded figures, including after Gaussian blurring
24 | lena-ring.py
25 | Lena image with center in focus and blurred edges
26 | figure10.py
27 | Just paste two images next to each others
28 | features.py
29 | Contains the color histogram function from the book as well as a simple
30 | wrapper around ``mahotas.texture.haralick``
31 | simple_classification.py
32 | Classify SimpleImageDataset with texture features + color histogram features
33 | large_classification.py
34 | Classify ``AnimTransDistr`` with both texture and SURF features.
35 | neighbors.py
36 | Computes image neighbors as well as the neighbor figure from the book.
37 |
38 |
--------------------------------------------------------------------------------
/Chapter12/ch12_3rd/chapter_12.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Computer Vision"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "This code is supporting material for the book `Building Machine Learning Systems with Python` by [Willi Richert](https://www.linkedin.com/in/willirichert/), [Luis Pedro Coelho](https://www.linkedin.com/in/luispedrocoelho/) and [Matthieu Brucher](https://www.linkedin.com/in/matthieubrucher/) published by PACKT Publishing. It is made available under the MIT License."
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## Generative Adversarial Networks"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "Let's create a class for our GAN based on convolution networks."
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": null,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "import tensorflow as tf\n",
38 | "\n",
39 | "def match(logits, labels):\n",
40 | " logits = tf.clip_by_value(logits, 1e-7, 1. - 1e-7)\n",
41 | " return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))\n",
42 | "\n",
43 | "def batchnormalize(X, eps=1e-8, g=None, b=None):\n",
44 | " if X.get_shape().ndims == 4:\n",
45 | " mean = tf.reduce_mean(X, [0,1,2])\n",
46 | " std = tf.reduce_mean( tf.square(X-mean), [0,1,2] )\n",
47 | " X = (X-mean) / tf.sqrt(std+eps)\n",
48 | "\n",
49 | " if g is not None and b is not None:\n",
50 | " g = tf.reshape(g, [1,1,1,-1])\n",
51 | " b = tf.reshape(b, [1,1,1,-1])\n",
52 | " X = X*g + b\n",
53 | "\n",
54 | " elif X.get_shape().ndims == 2:\n",
55 | " mean = tf.reduce_mean(X, 0)\n",
56 | " std = tf.reduce_mean(tf.square(X-mean), 0)\n",
57 | " X = (X-mean) / tf.sqrt(std+eps)\n",
58 | "\n",
59 | " if g is not None and b is not None:\n",
60 | " g = tf.reshape(g, [1,-1])\n",
61 | " b = tf.reshape(b, [1,-1])\n",
62 | " X = X*g + b\n",
63 | "\n",
64 | " else:\n",
65 | " raise NotImplementedError\n",
66 | "\n",
67 | " return X\n",
68 | "\n",
69 | "class DCGAN():\n",
70 | " def __init__(\n",
71 | " self,\n",
72 | " image_shape=[28,28,1],\n",
73 | " dim_z=100,\n",
74 | " dim_y=10,\n",
75 | " dim_W1=1024,\n",
76 | " dim_W2=128,\n",
77 | " dim_W3=64,\n",
78 | " dim_channel=1,\n",
79 | " ):\n",
80 | "\n",
81 | " self.image_shape = image_shape\n",
82 | " self.dim_z = dim_z\n",
83 | " self.dim_y = dim_y\n",
84 | "\n",
85 | " self.dim_W1 = dim_W1\n",
86 | " self.dim_W2 = dim_W2\n",
87 | " self.dim_W3 = dim_W3\n",
88 | " self.dim_channel = dim_channel\n",
89 | "\n",
90 | " def build_model(self):\n",
91 | "\n",
92 | " Z = tf.placeholder(tf.float32, [None, self.dim_z])\n",
93 | " Y = tf.placeholder(tf.float32, [None, self.dim_y])\n",
94 | "\n",
95 | " image_real = tf.placeholder(tf.float32, [None]+self.image_shape)\n",
96 | " image_gen = self.generate(Z, Y)\n",
97 | "\n",
98 | " raw_real = self.discriminate(image_real, Y, False)\n",
99 | " raw_gen = self.discriminate(image_gen, Y, True)\n",
100 | "\n",
101 | " discrim_cost_real = match(raw_real, tf.ones_like(raw_real))\n",
102 | " discrim_cost_gen = match(raw_gen, tf.zeros_like(raw_gen))\n",
103 | " discrim_cost = discrim_cost_real + discrim_cost_gen\n",
104 | "\n",
105 | " gen_cost = match( raw_gen, tf.ones_like(raw_gen) )\n",
106 | "\n",
107 | " return Z, Y, image_real, image_gen, discrim_cost, gen_cost\n",
108 | "\n",
109 | " def create_conv2d(self, input, filters, kernel_size, name):\n",
110 | " layer = tf.layers.conv2d(\n",
111 | " inputs=input,\n",
112 | " filters=filters,\n",
113 | " kernel_size=kernel_size,\n",
114 | " strides=[2,2],\n",
115 | " name=\"Conv2d_\" + name,\n",
116 | " padding=\"SAME\")\n",
117 | " layer = tf.nn.leaky_relu(layer, name= \"LeakyRELU\" + name)\n",
118 | " return layer\n",
119 | "\n",
120 | " def create_conv2d_transpose(self, input, filters, kernel_size, name, with_batch_norm):\n",
121 | " layer = tf.layers.conv2d_transpose(\n",
122 | " inputs=input,\n",
123 | " filters=filters,\n",
124 | " kernel_size=kernel_size,\n",
125 | " strides=[2,2],\n",
126 | " name=\"Conv2d_\" + name,\n",
127 | " padding=\"SAME\")\n",
128 | " if with_batch_norm:\n",
129 | " layer = batchnormalize(layer)\n",
130 | " layer = tf.nn.relu(layer)\n",
131 | " return layer\n",
132 | "\n",
133 | " def create_dense(self, input, units, name, leaky):\n",
134 | " layer = tf.layers.dense(\n",
135 | " inputs=input,\n",
136 | " units=units,\n",
137 | " name=\"Dense\" + name,\n",
138 | " )\n",
139 | " layer = batchnormalize(layer)\n",
140 | " if leaky:\n",
141 | " layer = tf.nn.leaky_relu(layer, name= \"LeakyRELU\" + name)\n",
142 | " else:\n",
143 | " layer = tf.nn.relu(layer, name=\"RELU_\" + name)\n",
144 | " return layer\n",
145 | "\n",
146 | " def discriminate(self, image, Y, reuse=False):\n",
147 | " with tf.variable_scope('discriminate', reuse=reuse):\n",
148 | " \n",
149 | " batch_size = Y.get_shape()[0]\n",
150 | " \n",
151 | " yb = tf.reshape(Y, tf.stack([-1, 1, 1, self.dim_y]))\n",
152 | " X = tf.concat(axis=3, values=[image, yb*tf.ones([1, 28, 28, self.dim_y])])\n",
153 | " \n",
154 | " h1 = self.create_conv2d(X, self.dim_W3, 5, \"Layer1\")\n",
155 | " h1 = tf.concat(axis=3, values=[h1, yb*tf.ones([1, 14, 14, self.dim_y])])\n",
156 | " \n",
157 | " h2 = self.create_conv2d(h1, self.dim_W2, 5, \"Layer2\")\n",
158 | " h2 = tf.reshape(h2, tf.stack([-1, 7*7*128]))\n",
159 | " h2 = tf.concat(axis=1, values=[h2, Y])\n",
160 | " \n",
161 | " h3 = self.create_dense(h2, self.dim_W1, \"Layer3\", True)\n",
162 | " h3 = tf.concat(axis=1, values=[h3, Y])\n",
163 | " \n",
164 | " h4 = self.create_dense(h3, 1, \"Layer4\", True)\n",
165 | " return h4\n",
166 | "\n",
167 | " def generate(self, Z, Y, reuse=False):\n",
168 | " with tf.variable_scope('generate', reuse=reuse):\n",
169 | "\n",
170 | " yb = tf.reshape(Y, tf.stack([-1, 1, 1, self.dim_y]))\n",
171 | " Z = tf.concat(axis=1, values=[Z,Y])\n",
172 | " h1 = self.create_dense(Z, self.dim_W1, \"Layer1\", False)\n",
173 | " h1 = tf.concat(axis=1, values=[h1, Y])\n",
174 | " h2 = self.create_dense(h1, self.dim_W2*7*7, \"Layer2\", False)\n",
175 | " h2 = tf.reshape(h2, tf.stack([-1,7,7,self.dim_W2]))\n",
176 | " h2 = tf.concat(axis=3, values=[h2, yb*tf.ones([1, 7, 7, self.dim_y])])\n",
177 | "\n",
178 | " h3 = self.create_conv2d_transpose(h2, self.dim_W3, 5, \"Layer3\", True)\n",
179 | " h3 = tf.concat(axis=3, values=[h3, yb*tf.ones([1, 14,14,self.dim_y])] )\n",
180 | "\n",
181 | " h4 = self.create_conv2d_transpose(h3, self.dim_channel, 7, \"Layer4\", False)\n",
182 | " x = tf.nn.sigmoid(h4)\n",
183 | " return x"
184 | ]
185 | },
186 | {
187 | "cell_type": "markdown",
188 | "metadata": {},
189 | "source": [
190 | "We add 2 helper fucntions, one for transforming our data to one-hot encoding (without using Tensorflow, we could use it instead) and one to plot and save our sampled images."
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": null,
196 | "metadata": {},
197 | "outputs": [],
198 | "source": [
199 | "import imageio\n",
200 | "import numpy as np\n",
201 | "from matplotlib import pyplot as plt\n",
202 | "%matplotlib inline\n",
203 | "\n",
204 | "def one_hot(X, n):\n",
205 | " X = np.asarray(X).flatten()\n",
206 | " Xoh = np.zeros((len(X), n))\n",
207 | " Xoh[np.arange(len(X)), X] = 1.\n",
208 | " return Xoh\n",
209 | "\n",
210 | "def save_visualization(X, nh_nw, save_path='./sample.jpg'):\n",
211 | " h,w = X.shape[1], X.shape[2]\n",
212 | " img = np.zeros((h * nh_nw[0], w * nh_nw[1], 3))\n",
213 | "\n",
214 | " for n,x in enumerate(X):\n",
215 | " j = n // nh_nw[1]\n",
216 | " i = n % nh_nw[1]\n",
217 | " img[j*h:j*h+h, i*w:i*w+w, :] = x / 255\n",
218 | "\n",
219 | " imageio.imwrite(save_path, img)\n",
220 | " plt.imshow(img)\n",
221 | " plt.show()"
222 | ]
223 | },
224 | {
225 | "cell_type": "markdown",
226 | "metadata": {},
227 | "source": [
228 | "Our hyperparameters and our data"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": null,
234 | "metadata": {},
235 | "outputs": [],
236 | "source": [
237 | "import os\n",
238 | "import numpy as np\n",
239 | "\n",
240 | "n_epochs = 10\n",
241 | "learning_rate = 0.0002\n",
242 | "batch_size = 128\n",
243 | "image_shape = [28,28,1]\n",
244 | "dim_z = 10\n",
245 | "dim_y = 10\n",
246 | "dim_W1 = 1024\n",
247 | "dim_W2 = 128\n",
248 | "dim_W3 = 64\n",
249 | "dim_channel = 1\n",
250 | "\n",
251 | "visualize_dim=196\n",
252 | "\n",
253 | "from sklearn.datasets import fetch_mldata\n",
254 | "mnist = fetch_mldata('MNIST original')\n",
255 | "mnist.data.shape = (-1, 28, 28)\n",
256 | "mnist.data = mnist.data.astype(np.float32).reshape( [-1, 28, 28, 1]) / 255.\n",
257 | "mnist.num_examples = len(mnist.data)\n",
258 | "mnist.target = one_hot(mnist.target.astype(np.int8), dim_y)"
259 | ]
260 | },
261 | {
262 | "cell_type": "markdown",
263 | "metadata": {},
264 | "source": [
265 | "Let's generate some images!"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": null,
271 | "metadata": {},
272 | "outputs": [],
273 | "source": [
274 | "tf.reset_default_graph()\n",
275 | "dcgan_model = DCGAN(\n",
276 | " image_shape=image_shape,\n",
277 | " dim_z=dim_z,\n",
278 | " dim_W1=dim_W1,\n",
279 | " dim_W2=dim_W2,\n",
280 | " dim_W3=dim_W3,\n",
281 | " )\n",
282 | "Z_tf, Y_tf, image_tf, image_tf_sample, d_cost_tf, g_cost_tf, = dcgan_model.build_model()\n",
283 | "\n",
284 | "discrim_vars = list(filter(lambda x: x.name.startswith('discr'), tf.trainable_variables()))\n",
285 | "gen_vars = list(filter(lambda x: x.name.startswith('gen'), tf.trainable_variables()))\n",
286 | "\n",
287 | "train_op_discrim = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(d_cost_tf, var_list=discrim_vars)\n",
288 | "train_op_gen = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(g_cost_tf, var_list=gen_vars)\n",
289 | "\n",
290 | "Z_np_sample = np.random.uniform(-1, 1, size=(visualize_dim,dim_z))\n",
291 | "Y_np_sample = one_hot( np.random.randint(10, size=[visualize_dim]), dim_y)\n",
292 | "\n",
293 | "step = 1000\n",
294 | "\n",
295 | "with tf.Session() as sess:\n",
296 | " sess.run(tf.global_variables_initializer())\n",
297 | " for epoch in range(n_epochs):\n",
298 | " permut = np.random.permutation(mnist.num_examples)\n",
299 | " trX = mnist.data[permut]\n",
300 | " trY = mnist.target[permut]\n",
301 | " Z = np.random.uniform(-1, 1, size=[mnist.num_examples, dim_z]).astype(np.float32)\n",
302 | "\n",
303 | " print(\"epoch: %i\" % epoch)\n",
304 | " for j in range(0, mnist.num_examples, batch_size):\n",
305 | " if j % step == 0:\n",
306 | " print(\" batch: %i\" % j)\n",
307 | "\n",
308 | " batch = permut[j:j+batch_size]\n",
309 | "\n",
310 | " Xs = trX[batch]\n",
311 | " Ys = trY[batch]\n",
312 | " Zs = Z[batch]\n",
313 | "\n",
314 | " if (j / batch_size) % 2 == 0:\n",
315 | " sess.run(train_op_discrim,\n",
316 | " feed_dict={\n",
317 | " Z_tf:Zs,\n",
318 | " Y_tf:Ys,\n",
319 | " image_tf:Xs\n",
320 | " })\n",
321 | " else:\n",
322 | " sess.run(train_op_gen,\n",
323 | " feed_dict={\n",
324 | " Z_tf:Zs,\n",
325 | " Y_tf:Ys\n",
326 | " })\n",
327 | "\n",
328 | " if j % step == 0:\n",
329 | " generated_samples = sess.run(\n",
330 | " image_tf_sample,\n",
331 | " feed_dict={\n",
332 | " Z_tf:Z_np_sample,\n",
333 | " Y_tf:Y_np_sample\n",
334 | " })\n",
335 | " generated_samples = generated_samples * 255\n",
336 | " save_visualization(generated_samples, (7,28), save_path='./B09124_11_sample_%03d_%04d.jpg' % (epoch, j / step))"
337 | ]
338 | },
339 | {
340 | "cell_type": "code",
341 | "execution_count": null,
342 | "metadata": {},
343 | "outputs": [],
344 | "source": []
345 | }
346 | ],
347 | "metadata": {
348 | "kernelspec": {
349 | "display_name": "Python 3",
350 | "language": "python",
351 | "name": "python3"
352 | },
353 | "language_info": {
354 | "codemirror_mode": {
355 | "name": "ipython",
356 | "version": 3
357 | },
358 | "file_extension": ".py",
359 | "mimetype": "text/x-python",
360 | "name": "python",
361 | "nbconvert_exporter": "python",
362 | "pygments_lexer": "ipython3",
363 | "version": "3.6.5"
364 | }
365 | },
366 | "nbformat": 4,
367 | "nbformat_minor": 2
368 | }
369 |
--------------------------------------------------------------------------------
/Chapter12/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | mkdir -p AnimTransDistr
4 | cd AnimTransDistr
5 | curl -O http://vision.stanford.edu/Datasets/AnimTransDistr.rar
6 | unrar x AnimTransDistr.rar
7 | # The following file is a weird file:
8 | rm Anims/104034.jpg
9 |
--------------------------------------------------------------------------------
/Chapter12/forest.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/Chapter12/forest.jpeg
--------------------------------------------------------------------------------
/Chapter12/scene00.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/Chapter12/scene00.jpg
--------------------------------------------------------------------------------
/Chapter13/chapter_13.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Reinforcement learning with Tensorflow"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "This code is supporting material for the book `Building Machine Learning Systems with Python` by [Willi Richert](https://www.linkedin.com/in/willirichert/), [Luis Pedro Coelho](https://www.linkedin.com/in/luispedrocoelho/) and [Matthieu Brucher](https://www.linkedin.com/in/matthieubrucher/) published by PACKT Publishing.\n",
15 | "\n",
16 | "It is made available under the MIT License.\n",
17 | "\n",
18 | "All code examples use Python in version..."
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": null,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "import sys\n",
28 | "sys.version"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "## Utility functions"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "import os\n",
45 | "\n",
46 | "CHART_DIR = \"charts\"\n",
47 | "if not os.path.exists(CHART_DIR):\n",
48 | " os.mkdir(CHART_DIR)\n",
49 | "\n",
50 | "def save_png(name):\n",
51 | " fn = 'B09124_13_%s.png'%name # please ignore, it just helps our publisher :-)\n",
52 | " plt.savefig(os.path.join(CHART_DIR, fn), bbox_inches=\"tight\")"
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {},
58 | "source": [
59 | "## Simple text games"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {},
66 | "outputs": [],
67 | "source": [
68 | "import gym\n",
69 | "import numpy as np\n",
70 | "\n",
71 | "env = gym.make('FrozenLake-v0')"
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {},
77 | "source": [
78 | "### Estimating the Q function the old fashion way"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "Let's make a table with some Q values for this environment"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "# Start with an empty table\n",
95 | "Q = np.zeros((env.observation_space.n, env.action_space.n))\n",
96 | "# Set learning hyperparameters\n",
97 | "lr = .8\n",
98 | "y = .95\n",
99 | "num_episodes = 2000\n",
100 | "\n",
101 | "# Let's run!\n",
102 | "for i in range(num_episodes):\n",
103 | " # Reset environment and get first new observation (top left)\n",
104 | " s = env.reset()\n",
105 | " # Do 100 iterations to update the table\n",
106 | " for i in range(100):\n",
107 | " # Choose an action by picking the max of the table + additional random noise ponderated by the episode\n",
108 | " a = np.argmax(Q[s,:] + np.random.randn(1,env.action_space.n)//(i+1))\n",
109 | " # Get new state and reward from environment after chosen step \n",
110 | " s1, r, d,_ = env.step(a)\n",
111 | " # Update Q-Table with new knowledge\n",
112 | " Q[s,a] = Q[s,a] + lr*(r + y*np.max(Q[s1,:]) - Q[s,a])\n",
113 | " s = s1\n",
114 | " if d == True:\n",
115 | " break"
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": null,
121 | "metadata": {},
122 | "outputs": [],
123 | "source": [
124 | "print(\"Final Q-Table Values\")\n",
125 | "print(Q)"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "### Test games with TF"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": null,
138 | "metadata": {},
139 | "outputs": [],
140 | "source": [
141 | "import random\n",
142 | "import tensorflow as tf\n",
143 | "import matplotlib.pyplot as plt\n",
144 | "%matplotlib inline"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {},
150 | "source": [
151 | "Let's create a new network."
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": null,
157 | "metadata": {},
158 | "outputs": [],
159 | "source": [
160 | "y = 0.99\n",
161 | "e = 0.1 # 1 in 10 samples, we chose a new action for the network\n",
162 | "num_episodes = 2000\n",
163 | "learning_rate = 0.1"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": null,
169 | "metadata": {},
170 | "outputs": [],
171 | "source": [
172 | "tf.reset_default_graph()\n",
173 | "\n",
174 | "# A simple one layer network\n",
175 | "inputs = tf.placeholder(shape=[None, 16], dtype=tf.float32, name=\"input\")\n",
176 | "Qout = tf.layers.dense(\n",
177 | " inputs=inputs,\n",
178 | " units=4,\n",
179 | " use_bias=False,\n",
180 | " name=\"dense\",\n",
181 | " kernel_initializer=tf.random_uniform_initializer(minval=0, maxval=.0125)\n",
182 | ")\n",
183 | "predict = tf.argmax(Qout, 1)\n",
184 | "\n",
185 | "# Our optimizer will try to optimize \n",
186 | "nextQ = tf.placeholder(shape=[None, 4], dtype=tf.float32, name=\"target\")\n",
187 | "loss = tf.reduce_sum(tf.square(nextQ - Qout))\n",
188 | "\n",
189 | "trainer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
190 | "updateModel = trainer.minimize(loss)"
191 | ]
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {},
196 | "source": [
197 | "We can now train the network, and check that it will get more and more sucesses as the training progresses."
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "metadata": {},
204 | "outputs": [],
205 | "source": [
206 | "# To keep track of our games and our results\n",
207 | "jList = []\n",
208 | "rList = []\n",
209 | "with tf.Session() as sess:\n",
210 | " sess.run(tf.global_variables_initializer())\n",
211 | "\n",
212 | " for i in range(num_episodes):\n",
213 | " s = env.reset()\n",
214 | " rAll = 0\n",
215 | " \n",
216 | " for j in range(100):\n",
217 | " a, targetQ = sess.run([predict, Qout], feed_dict={inputs:np.identity(16)[s:s+1]})\n",
218 | " # We randomly choose a new state that we may have not encountered before\n",
219 | " if np.random.rand(1) < e:\n",
220 | " a[0] = env.action_space.sample()\n",
221 | "\n",
222 | " s1, r, d, _ = env.step(a[0])\n",
223 | " \n",
224 | " # Obtain the Q' values by feeding the new state through our network\n",
225 | " Q1 = sess.run(Qout, feed_dict={inputs:np.identity(16)[s1:s1+1]})\n",
226 | " # Obtain maxQ' and set our target value for chosen action.\n",
227 | " targetQ[0, a[0]] = r + y*np.max(Q1)\n",
228 | " \n",
229 | " # Train our network using target and predicted Q values\n",
230 | " sess.run(updateModel, feed_dict={inputs:np.identity(16)[s:s+1], nextQ:targetQ})\n",
231 | " rAll += r\n",
232 | " s = s1\n",
233 | " if d == True:\n",
234 | " # Reduce chance of random action as we train the model.\n",
235 | " e = 1 / ((i // 50) + 10)\n",
236 | " break\n",
237 | " jList.append(j)\n",
238 | " rList.append(rAll)\n",
239 | "print(\"Percent of succesful episodes: %f%%\" % (sum(rList) / num_episodes))"
240 | ]
241 | },
242 | {
243 | "cell_type": "markdown",
244 | "metadata": {},
245 | "source": [
246 | "We now display the evolution of the reward with each episode"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": null,
252 | "metadata": {},
253 | "outputs": [],
254 | "source": [
255 | "from scipy.signal import lfilter\n",
256 | "\n",
257 | "plt.plot(lfilter(np.ones(20)/20, [1], rList))\n",
258 | "save_png(\"reward\")"
259 | ]
260 | },
261 | {
262 | "cell_type": "markdown",
263 | "metadata": {},
264 | "source": [
265 | "We can also see that the survival increases, even if we take suoptimal paths:"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": null,
271 | "metadata": {},
272 | "outputs": [],
273 | "source": [
274 | "plt.plot(jList)\n",
275 | "save_png(\"length\")"
276 | ]
277 | },
278 | {
279 | "cell_type": "markdown",
280 | "metadata": {},
281 | "source": [
282 | "## Atari games"
283 | ]
284 | },
285 | {
286 | "cell_type": "markdown",
287 | "metadata": {},
288 | "source": [
289 | "The code here was inspired by several tutorials and courses online:\n",
290 | "* https://becominghuman.ai/lets-build-an-atari-ai-part-1-dqn-df57e8ff3b26\n",
291 | "* https://github.com/tokb23/dqn\n",
292 | "* https://github.com/dennybritz/reinforcement-learning/blob/master/DQN/dqn.py"
293 | ]
294 | },
295 | {
296 | "cell_type": "markdown",
297 | "metadata": {},
298 | "source": [
299 | "We can now design a a network that can tackle more or less any of the Atari games available on the gym plaform."
300 | ]
301 | },
302 | {
303 | "cell_type": "code",
304 | "execution_count": null,
305 | "metadata": {},
306 | "outputs": [],
307 | "source": [
308 | "import gym\n",
309 | "\n",
310 | "import os\n",
311 | "import six\n",
312 | "import numpy as np\n",
313 | "import tensorflow as tf\n",
314 | "import random\n",
315 | "import itertools\n",
316 | "from collections import deque, namedtuple\n",
317 | "\n",
318 | "CHART_DIR = \"charts\"\n",
319 | "if not os.path.exists(CHART_DIR):\n",
320 | " os.mkdir(CHART_DIR)"
321 | ]
322 | },
323 | {
324 | "cell_type": "markdown",
325 | "metadata": {},
326 | "source": [
327 | "We need a few helper function, one to preprocess our images and shrink them and two others that will transpose the data. The reason is that we use the past images as additional channels, so the axis order is wrong."
328 | ]
329 | },
330 | {
331 | "cell_type": "code",
332 | "execution_count": null,
333 | "metadata": {},
334 | "outputs": [],
335 | "source": [
336 | "def to_grayscale(img):\n",
337 | " return np.mean(img, axis=2).astype(np.uint8)\n",
338 | "\n",
339 | "def downsample(img):\n",
340 | " return img[::2, ::2]\n",
341 | "\n",
342 | "def preprocess(img):\n",
343 | " return to_grayscale(downsample(img))[None,:,:]\n",
344 | "\n",
345 | "def adapt_state(state):\n",
346 | " return [np.float32(np.transpose(state, (2, 1, 0)) / 255.0)]\n",
347 | "\n",
348 | "def adapt_batch_state(state):\n",
349 | " return np.transpose(np.array(state), (0, 3, 2, 1)) / 255.0\n",
350 | "\n",
351 | "def get_initial_state(frame):\n",
352 | " processed_frame = preprocess(frame)\n",
353 | " state = [processed_frame for _ in range(state_length)]\n",
354 | " return np.concatenate(state)"
355 | ]
356 | },
357 | {
358 | "cell_type": "markdown",
359 | "metadata": {},
360 | "source": [
361 | "We add a bunch of hyperparameters and constants"
362 | ]
363 | },
364 | {
365 | "cell_type": "code",
366 | "execution_count": null,
367 | "metadata": {},
368 | "outputs": [],
369 | "source": [
370 | "\n",
371 | "env_name = \"Breakout-v4\"\n",
372 | "\n",
373 | "width = 80 # Resized frame width\n",
374 | "height = 105 # Resized frame height\n",
375 | "\n",
376 | "n_episodes = 12000 # Number of runs for the agent\n",
377 | "state_length = 4 # Number of most frames we input to the network\n",
378 | "\n",
379 | "gamma = 0.99 # Discount factor\n",
380 | "\n",
381 | "exploration_steps = 1000000 # During all these steps, we progressively lower epsilon\n",
382 | "initial_epsilon = 1.0 # Initial value of epsilon in epsilon-greedy\n",
383 | "final_epsilon = 0.1 # Final value of epsilon in epsilon-greedy\n",
384 | "\n",
385 | "initial_random_search = 20000 # Number of steps to populate the replay memory before training starts\n",
386 | "replay_memory_size = 400000 # Number of states we keep for training\n",
387 | "batch_size = 32 # Batch size\n",
388 | "network_update_interval = 10000 # The frequency with which the target network is updated\n",
389 | "train_skips = 4 # The agent selects 4 actions between successive updates\n",
390 | "\n",
391 | "learning_rate = 0.00025 # Learning rate used by RMSProp\n",
392 | "momentum = 0.95 # momentum used by RMSProp\n",
393 | "min_gradient = 0.01 # Constant added to the squared gradient in the denominator of the RMSProp update\n",
394 | "\n",
395 | "network_path = 'saved_networks/' + env_name\n",
396 | "tensorboard_path = 'summary/' + env_name\n",
397 | "save_interval = 300000 # The frequency with which the network is saved"
398 | ]
399 | },
400 | {
401 | "cell_type": "markdown",
402 | "metadata": {},
403 | "source": [
404 | "We use a class to train, save and restore our network. We will use one instance for the Q network and another one for the target network.\n",
405 | "get_trained_action() will be the method used to get a new action from the network."
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": null,
411 | "metadata": {},
412 | "outputs": [],
413 | "source": [
414 | "class Estimator():\n",
415 | " \"\"\"Q-Value Estimator neural network.\n",
416 | " This network is used for both the Q-Network and the Target Network.\n",
417 | " \"\"\"\n",
418 | "\n",
419 | " def __init__(self, env, scope=\"estimator\", summaries_dir=None):\n",
420 | " self.scope = scope\n",
421 | " self.num_actions = env.action_space.n\n",
422 | " self.epsilon = initial_epsilon\n",
423 | " self.epsilon_step = (initial_epsilon - final_epsilon) / exploration_steps\n",
424 | " \n",
425 | " # Writes Tensorboard summaries to disk\n",
426 | " self.summary_writer = None\n",
427 | " with tf.variable_scope(scope):\n",
428 | " # Build the graph\n",
429 | " self.build_model()\n",
430 | " if summaries_dir:\n",
431 | " summary_dir = os.path.join(summaries_dir, \"summaries_%s\" % scope)\n",
432 | " if not os.path.exists(summary_dir):\n",
433 | " os.makedirs(summary_dir)\n",
434 | " self.summary_writer = tf.summary.FileWriter(summary_dir)\n",
435 | "\n",
436 | " def build_model(self):\n",
437 | " \"\"\"\n",
438 | " Builds the Tensorflow graph.\n",
439 | " \"\"\"\n",
440 | " self.X = tf.placeholder(shape=[None, width, height, state_length], dtype=tf.float32, name=\"X\")\n",
441 | " # The TD target value\n",
442 | " self.y = tf.placeholder(shape=[None], dtype=tf.float32, name=\"y\")\n",
443 | " # Integer id of which action was selected\n",
444 | " self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name=\"actions\")\n",
445 | "\n",
446 | " model = tf.keras.Sequential(self.scope)\n",
447 | " model.add(tf.keras.layers.Convolution2D(filters=32, kernel_size=8, strides=(4, 4), activation='relu', input_shape=(width, height, state_length), name=\"Layer1\"))\n",
448 | " model.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=4, strides=(2, 2), activation='relu', name=\"Layer2\"))\n",
449 | " model.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=3, strides=(1, 1), activation='relu', name=\"Layer3\"))\n",
450 | " model.add(tf.keras.layers.Flatten(name=\"Flatten\"))\n",
451 | " model.add(tf.keras.layers.Dense(512, activation='relu', name=\"Layer4\"))\n",
452 | " model.add(tf.keras.layers.Dense(self.num_actions, name=\"Output\"))\n",
453 | "\n",
454 | " self.predictions = model(self.X)\n",
455 | "\n",
456 | " a_one_hot = tf.one_hot(self.actions, self.num_actions, 1.0, 0.0)\n",
457 | " q_value = tf.reduce_sum(tf.multiply(self.predictions, a_one_hot), reduction_indices=1)\n",
458 | " \n",
459 | " # Calculate the loss\n",
460 | " self.losses = tf.squared_difference(self.y, q_value)\n",
461 | " self.loss = tf.reduce_mean(self.losses)\n",
462 | "\n",
463 | " # Optimizer Parameters from original paper\n",
464 | " self.optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum, epsilon=min_gradient)\n",
465 | " self.train_op = self.optimizer.minimize(self.loss, global_step=tf.train.get_global_step())\n",
466 | "\n",
467 | " # Summaries for Tensorboard\n",
468 | " self.summaries = tf.summary.merge([\n",
469 | " tf.summary.scalar(\"loss\", self.loss),\n",
470 | " tf.summary.histogram(\"loss_hist\", self.losses),\n",
471 | " tf.summary.histogram(\"q_values_hist\", self.predictions),\n",
472 | " tf.summary.scalar(\"max_q_value\", tf.reduce_max(self.predictions))\n",
473 | " ])\n",
474 | "\n",
475 | "\n",
476 | " def predict(self, sess, s):\n",
477 | " return sess.run(self.predictions, { self.X: s })\n",
478 | "\n",
479 | " def update(self, sess, s, a, y):\n",
480 | " feed_dict = { self.X: s, self.y: y, self.actions: a }\n",
481 | " summaries, global_step, _, loss = sess.run(\n",
482 | " [self.summaries, tf.train.get_global_step(), self.train_op, self.loss],\n",
483 | " feed_dict)\n",
484 | " if self.summary_writer:\n",
485 | " self.summary_writer.add_summary(summaries, global_step)\n",
486 | " return loss\n",
487 | "\n",
488 | " def get_action(self, sess, state):\n",
489 | " if self.epsilon >= random.random():\n",
490 | " action = random.randrange(self.num_actions)\n",
491 | " else:\n",
492 | " action = np.argmax(self.predict(sess, adapt_state(state)))\n",
493 | "\n",
494 | " # Decay epsilon over time\n",
495 | " if self.epsilon > final_epsilon:\n",
496 | " self.epsilon -= self.epsilon_step\n",
497 | "\n",
498 | " return action\n",
499 | "\n",
500 | " def get_trained_action(self, state):\n",
501 | " action = np.argmax(self.predict(sess, adapt_state(state)))\n",
502 | " return action"
503 | ]
504 | },
505 | {
506 | "cell_type": "markdown",
507 | "metadata": {},
508 | "source": [
509 | "We create also a function to copy parameters from one network to the other, a function to create an initial clean state as well as a function to create the summary reports for scalar by episode outputs."
510 | ]
511 | },
512 | {
513 | "cell_type": "code",
514 | "execution_count": null,
515 | "metadata": {},
516 | "outputs": [],
517 | "source": [
518 | "def copy_model_parameters(estimator1, estimator2):\n",
519 | " \"\"\"\n",
520 | " Copies the model parameters of one estimator to another.\n",
521 | " Args:\n",
522 | " estimator1: Estimator to copy the paramters from\n",
523 | " estimator2: Estimator to copy the parameters to\n",
524 | " \"\"\"\n",
525 | " e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)]\n",
526 | " e1_params = sorted(e1_params, key=lambda v: v.name)\n",
527 | " e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)]\n",
528 | " e2_params = sorted(e2_params, key=lambda v: v.name)\n",
529 | "\n",
530 | " update_ops = []\n",
531 | " for e1_v, e2_v in zip(e1_params, e2_params):\n",
532 | " op = e2_v.assign(e1_v)\n",
533 | " update_ops.append(op)\n",
534 | "\n",
535 | " return update_ops\n",
536 | "\n",
537 | "def create_memory(env):\n",
538 | " # Populate the replay memory with initial experience \n",
539 | " replay_memory = []\n",
540 | " \n",
541 | " frame = env.reset()\n",
542 | " state = get_initial_state(frame)\n",
543 | "\n",
544 | " for i in range(replay_memory_init_size):\n",
545 | " action = np.random.choice(np.arange(env.action_space.n))\n",
546 | " frame, reward, done, _ = env.step(action)\n",
547 | " \n",
548 | " next_state = np.append(state[1:, :, :], preprocess(frame), axis=0)\n",
549 | " replay_memory.append(Transition(state, action, reward, next_state, done))\n",
550 | " if done:\n",
551 | " frame = env.reset()\n",
552 | " state = get_initial_state(frame)\n",
553 | " else:\n",
554 | " state = next_state\n",
555 | " \n",
556 | " return replay_memory\n",
557 | "\n",
558 | "\n",
559 | "def setup_summary():\n",
560 | " with tf.variable_scope(\"episode\"):\n",
561 | " episode_total_reward = tf.Variable(0., name=\"EpisodeTotalReward\")\n",
562 | " tf.summary.scalar('Total Reward', episode_total_reward)\n",
563 | " episode_avg_max_q = tf.Variable(0., name=\"EpisodeAvgMaxQ\")\n",
564 | " tf.summary.scalar('Average Max Q', episode_avg_max_q)\n",
565 | " episode_duration = tf.Variable(0., name=\"EpisodeDuration\")\n",
566 | " tf.summary.scalar('Duration', episode_duration)\n",
567 | " episode_avg_loss = tf.Variable(0., name=\"EpisodeAverageLoss\")\n",
568 | " tf.summary.scalar('Average Loss', episode_avg_loss)\n",
569 | " summary_vars = [episode_total_reward, episode_avg_max_q, episode_duration, episode_avg_loss]\n",
570 | " summary_placeholders = [tf.placeholder(tf.float32) for _ in range(len(summary_vars))]\n",
571 | " update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))]\n",
572 | " summary_op = tf.summary.merge_all(scope=\"episode\")\n",
573 | " return summary_placeholders, update_ops, summary_op"
574 | ]
575 | },
576 | {
577 | "cell_type": "markdown",
578 | "metadata": {},
579 | "source": [
580 | "We can now train our network (and save some final images from the trained network)"
581 | ]
582 | },
583 | {
584 | "cell_type": "code",
585 | "execution_count": null,
586 | "metadata": {},
587 | "outputs": [],
588 | "source": [
589 | "from tqdm import tqdm\n",
590 | "\n",
591 | "env = gym.make(env_name)\n",
592 | "tf.reset_default_graph()\n",
593 | "\n",
594 | "# Create a glboal step variable\n",
595 | "global_step = tf.Variable(0, name='global_step', trainable=False)\n",
596 | "\n",
597 | "# Create estimators\n",
598 | "q_estimator = Estimator(env, scope=\"q\", summaries_dir=tensorboard_path)\n",
599 | "target_estimator = Estimator(env, scope=\"target_q\")\n",
600 | "\n",
601 | "copy_model = copy_model_parameters(q_estimator, target_estimator)\n",
602 | "\n",
603 | "summary_placeholders, update_ops, summary_op = setup_summary()\n",
604 | "\n",
605 | "# The replay memory\n",
606 | "replay_memory = create_memory(env)\n",
607 | "\n",
608 | "with tf.Session() as sess:\n",
609 | " sess.run(tf.global_variables_initializer())\n",
610 | "\n",
611 | " q_estimator.summary_writer.add_graph(sess.graph)\n",
612 | "\n",
613 | " saver = tf.train.Saver()\n",
614 | " # Load a previous checkpoint if we find one\n",
615 | " latest_checkpoint = tf.train.latest_checkpoint(network_path)\n",
616 | " if latest_checkpoint:\n",
617 | " print(\"Loading model checkpoint %s...\\n\" % latest_checkpoint)\n",
618 | " saver.restore(sess, latest_checkpoint)\n",
619 | "\n",
620 | " total_t = sess.run(tf.train.get_global_step())\n",
621 | "\n",
622 | " for episode in tqdm(range(n_episodes)):\n",
623 | " if total_t % save_interval == 0:\n",
624 | " # Save the current checkpoint\n",
625 | " saver.save(tf.get_default_session(), network_path)\n",
626 | "\n",
627 | " frame = env.reset()\n",
628 | " state = get_initial_state(frame)\n",
629 | "\n",
630 | " total_reward = 0\n",
631 | " total_loss = 0\n",
632 | " total_q_max = 0\n",
633 | "\n",
634 | " for duration in itertools.count(): \n",
635 | " # Maybe update the target estimator\n",
636 | " if total_t % network_update_interval == 0:\n",
637 | " sess.run(copy_model)\n",
638 | "\n",
639 | " action = q_estimator.get_action(sess, state)\n",
640 | " frame, reward, terminal, _ = env.step(action)\n",
641 | "\n",
642 | " processed_frame = preprocess(frame)\n",
643 | " next_state = np.append(state[1:, :, :], processed_frame, axis=0)\n",
644 | "\n",
645 | " reward = np.clip(reward, -1, 1)\n",
646 | " replay_memory.append(Transition(state, action, reward, next_state, terminal))\n",
647 | " if len(replay_memory) > replay_memory_size:\n",
648 | " replay_memory.popleft()\n",
649 | "\n",
650 | " samples = random.sample(replay_memory, batch_size)\n",
651 | " states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(np.array, zip(*samples))\n",
652 | "\n",
653 | " # Calculate q values and targets (Double DQN)\n",
654 | " adapted_state = adapt_batch_state(next_states_batch)\n",
655 | "\n",
656 | " q_values_next = q_estimator.predict(sess, adapted_state)\n",
657 | " best_actions = np.argmax(q_values_next, axis=1)\n",
658 | " q_values_next_target = target_estimator.predict(sess, adapted_state)\n",
659 | " targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * gamma * q_values_next_target[np.arange(batch_size), best_actions]\n",
660 | "\n",
661 | " # Perform gradient descent update\n",
662 | " states_batch = adapt_batch_state(states_batch)\n",
663 | " loss = q_estimator.update(sess, states_batch, action_batch, targets_batch)\n",
664 | "\n",
665 | " total_q_max += np.max(q_values_next)\n",
666 | " total_loss += loss\n",
667 | " total_t += 1\n",
668 | " total_reward += reward\n",
669 | " if terminal:\n",
670 | " break\n",
671 | "\n",
672 | " stats = [total_reward, total_q_max / duration, duration, total_loss / duration]\n",
673 | " for i in range(len(stats)):\n",
674 | " sess.run(update_ops[i], feed_dict={\n",
675 | " summary_placeholders[i]: float(stats[i])\n",
676 | " })\n",
677 | " summary_str = sess.run(summary_op, )\n",
678 | " q_estimator.summary_writer.add_summary(summary_str, episode)\n",
679 | "\n",
680 | " env.env.ale.saveScreenPNG(six.b('%s/test_image_%05i.png' % (CHART_DIR, episode)))\n",
681 | "\n",
682 | " # Save the last checkpoint\n",
683 | " saver.save(tf.get_default_session(), network_path)"
684 | ]
685 | },
686 | {
687 | "cell_type": "code",
688 | "execution_count": null,
689 | "metadata": {},
690 | "outputs": [],
691 | "source": []
692 | }
693 | ],
694 | "metadata": {
695 | "kernelspec": {
696 | "display_name": "Python 3",
697 | "language": "python",
698 | "name": "python3"
699 | },
700 | "language_info": {
701 | "codemirror_mode": {
702 | "name": "ipython",
703 | "version": 3
704 | },
705 | "file_extension": ".py",
706 | "mimetype": "text/x-python",
707 | "name": "python",
708 | "nbconvert_exporter": "python",
709 | "pygments_lexer": "ipython3",
710 | "version": "3.6.5"
711 | }
712 | },
713 | "nbformat": 4,
714 | "nbformat_minor": 2
715 | }
716 |
--------------------------------------------------------------------------------
/Chapter13/simple_breakout.py:
--------------------------------------------------------------------------------
1 | # Import the gym module
2 | import gym
3 |
4 | # Create a breakout environment
5 | env = gym.make('BreakoutDeterministic-v4')
6 | # Reset it, returns the starting frame
7 | frame = env.reset()
8 | # Render
9 | env.render()
10 |
11 | is_done = False
12 | while not is_done:
13 | # Perform a random action, returns the new frame, reward and whether the game is over
14 | frame, reward, is_done, _ = env.step(env.action_space.sample())
15 | # Render
16 | env.render()
17 |
--------------------------------------------------------------------------------
/Chapter13/tf_breakout.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Built by merging different Q examples available online
4 |
5 | import gym
6 |
7 | import os
8 | import six
9 | import numpy as np
10 | import tensorflow as tf
11 | import random
12 | import itertools
13 | from collections import deque, namedtuple
14 |
15 | CHART_DIR = "charts"
16 | if not os.path.exists(CHART_DIR):
17 | os.mkdir(CHART_DIR)
18 |
19 | env_name = "Breakout-v4"
20 |
21 | width = 80 # Resized frame width
22 | height = 105 # Resized frame height
23 |
24 | n_episodes = 12000 # Number of runs for the agent
25 | state_length = 4 # Number of most frames we input to the network
26 |
27 | gamma = 0.99 # Discount factor
28 |
29 | exploration_steps = 1000000 # During all these steps, we progressively lower epsilon
30 | initial_epsilon = 1.0 # Initial value of epsilon in epsilon-greedy
31 | final_epsilon = 0.1 # Final value of epsilon in epsilon-greedy
32 |
33 | replay_memory_init_size = 1000 # Number of steps to populate the replay memory before training starts
34 | replay_memory_size = 400000 # Number of states we keep for training
35 | batch_size = 32 # Batch size
36 | network_update_interval = 10000 # The frequency with which the target network is updated
37 | train_skips = 4 # The agent selects 4 actions between successive updates
38 |
39 | learning_rate = 0.00025 # Learning rate used by RMSProp
40 | momentum = 0.95 # momentum used by RMSProp
41 | min_gradient = 0.01 # Constant added to the squared gradient in the denominator of the RMSProp update
42 |
43 | network_path = 'saved_networks/' + env_name
44 | tensorboard_path = 'summary/' + env_name
45 | save_interval = 300000 # The frequency with which the network is saved
46 |
47 | Transition = namedtuple("Transition", ["state", "action", "reward", "next_state", "done"])
48 |
49 | def to_grayscale(img):
50 | return np.mean(img, axis=2).astype(np.uint8)
51 |
52 | def downsample(img):
53 | return img[::2, ::2]
54 |
55 | def preprocess(img):
56 | return to_grayscale(downsample(img))[None,:,:]
57 |
58 | def adapt_state(state):
59 | return [np.float32(np.transpose(state, (2, 1, 0)) / 255.0)]
60 |
61 | def adapt_batch_state(state):
62 | return np.transpose(np.array(state), (0, 3, 2, 1)) / 255.0
63 |
64 | def get_initial_state(frame):
65 | processed_frame = preprocess(frame)
66 | state = [processed_frame for _ in range(state_length)]
67 | return np.concatenate(state)
68 |
69 | class Estimator():
70 | """Q-Value Estimator neural network.
71 | This network is used for both the Q-Network and the Target Network.
72 | """
73 |
74 | def __init__(self, env, scope="estimator", summaries_dir=None):
75 | self.scope = scope
76 | self.num_actions = env.action_space.n
77 | self.epsilon = initial_epsilon
78 | self.epsilon_step = (initial_epsilon - final_epsilon) / exploration_steps
79 |
80 | # Writes Tensorboard summaries to disk
81 | self.summary_writer = None
82 | with tf.variable_scope(scope):
83 | # Build the graph
84 | self.build_model()
85 | if summaries_dir:
86 | summary_dir = os.path.join(summaries_dir, "summaries_%s" % scope)
87 | if not os.path.exists(summary_dir):
88 | os.makedirs(summary_dir)
89 | self.summary_writer = tf.summary.FileWriter(summary_dir)
90 |
91 | def build_model(self):
92 | """
93 | Builds the Tensorflow graph.
94 | """
95 | self.X = tf.placeholder(shape=[None, width, height, state_length], dtype=tf.float32, name="X")
96 | # The TD target value
97 | self.y = tf.placeholder(shape=[None], dtype=tf.float32, name="y")
98 | # Integer id of which action was selected
99 | self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name="actions")
100 |
101 | model = tf.keras.Sequential(name=self.scope)
102 | model.add(tf.keras.layers.Convolution2D(filters=32, kernel_size=8, strides=(4, 4), activation='relu', input_shape=(width, height, state_length), name="Layer1"))
103 | model.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=4, strides=(2, 2), activation='relu', name="Layer2"))
104 | model.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=3, strides=(1, 1), activation='relu', name="Layer3"))
105 | model.add(tf.keras.layers.Flatten(name="Flatten"))
106 | model.add(tf.keras.layers.Dense(512, activation='relu', name="Layer4"))
107 | model.add(tf.keras.layers.Dense(self.num_actions, name="Output"))
108 |
109 | self.predictions = model(self.X)
110 |
111 | a_one_hot = tf.one_hot(self.actions, self.num_actions, 1.0, 0.0)
112 | q_value = tf.reduce_sum(tf.multiply(self.predictions, a_one_hot), reduction_indices=1)
113 |
114 | # Calculate the loss
115 | self.losses = tf.squared_difference(self.y, q_value)
116 | self.loss = tf.reduce_mean(self.losses)
117 |
118 | # Optimizer Parameters from original paper
119 | self.optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum, epsilon=min_gradient)
120 | self.train_op = self.optimizer.minimize(self.loss, global_step=tf.train.get_global_step())
121 |
122 | # Summaries for Tensorboard
123 | self.summaries = tf.summary.merge([
124 | tf.summary.scalar("loss", self.loss),
125 | tf.summary.histogram("loss_hist", self.losses),
126 | tf.summary.histogram("q_values_hist", self.predictions),
127 | tf.summary.scalar("max_q_value", tf.reduce_max(self.predictions))
128 | ])
129 |
130 |
131 | def predict(self, sess, s):
132 | return sess.run(self.predictions, { self.X: s })
133 |
134 | def update(self, sess, s, a, y):
135 | feed_dict = { self.X: s, self.y: y, self.actions: a }
136 | summaries, global_step, _, loss = sess.run(
137 | [self.summaries, tf.train.get_global_step(), self.train_op, self.loss],
138 | feed_dict)
139 | if self.summary_writer:
140 | self.summary_writer.add_summary(summaries, global_step)
141 | return loss
142 |
143 | def get_action(self, sess, state):
144 | if self.epsilon >= random.random():
145 | action = random.randrange(self.num_actions)
146 | else:
147 | action = np.argmax(self.predict(sess, adapt_state(state)))
148 |
149 | # Decay epsilon over time
150 | if self.epsilon > final_epsilon:
151 | self.epsilon -= self.epsilon_step
152 |
153 | return action
154 |
155 | def get_trained_action(self, state):
156 | action = np.argmax(self.predict(sess, adapt_state(state)))
157 | return action
158 |
159 | def copy_model_parameters(estimator1, estimator2):
160 | """
161 | Copies the model parameters of one estimator to another.
162 | Args:
163 | estimator1: Estimator to copy the paramters from
164 | estimator2: Estimator to copy the parameters to
165 | """
166 | e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)]
167 | e1_params = sorted(e1_params, key=lambda v: v.name)
168 | e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)]
169 | e2_params = sorted(e2_params, key=lambda v: v.name)
170 |
171 | update_ops = []
172 | for e1_v, e2_v in zip(e1_params, e2_params):
173 | op = e2_v.assign(e1_v)
174 | update_ops.append(op)
175 |
176 | return update_ops
177 |
178 | def create_memory(env):
179 | # Populate the replay memory with initial experience
180 | replay_memory = []
181 |
182 | frame = env.reset()
183 | state = get_initial_state(frame)
184 |
185 | for i in range(replay_memory_init_size):
186 | action = np.random.choice(np.arange(env.action_space.n))
187 | frame, reward, done, _ = env.step(action)
188 |
189 | next_state = np.append(state[1:, :, :], preprocess(frame), axis=0)
190 | replay_memory.append(Transition(state, action, reward, next_state, done))
191 | if done:
192 | frame = env.reset()
193 | state = get_initial_state(frame)
194 | else:
195 | state = next_state
196 |
197 | return replay_memory
198 |
199 |
200 | def setup_summary():
201 | with tf.variable_scope("episode"):
202 | episode_total_reward = tf.Variable(0., name="EpisodeTotalReward")
203 | tf.summary.scalar('Total Reward', episode_total_reward)
204 | episode_avg_max_q = tf.Variable(0., name="EpisodeAvgMaxQ")
205 | tf.summary.scalar('Average Max Q', episode_avg_max_q)
206 | episode_duration = tf.Variable(0., name="EpisodeDuration")
207 | tf.summary.scalar('Duration', episode_duration)
208 | episode_avg_loss = tf.Variable(0., name="EpisodeAverageLoss")
209 | tf.summary.scalar('Average Loss', episode_avg_loss)
210 | summary_vars = [episode_total_reward, episode_avg_max_q, episode_duration, episode_avg_loss]
211 | summary_placeholders = [tf.placeholder(tf.float32) for _ in range(len(summary_vars))]
212 | update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))]
213 | summary_op = tf.summary.merge_all(scope="episode")
214 | return summary_placeholders, update_ops, summary_op
215 |
216 |
217 | if __name__ == "__main__":
218 | from tqdm import tqdm
219 |
220 | env = gym.make(env_name)
221 | tf.reset_default_graph()
222 |
223 | # Create a glboal step variable
224 | global_step = tf.Variable(0, name='global_step', trainable=False)
225 |
226 | # Create estimators
227 | q_estimator = Estimator(env, scope="q", summaries_dir=tensorboard_path)
228 | target_estimator = Estimator(env, scope="target_q")
229 |
230 | copy_model = copy_model_parameters(q_estimator, target_estimator)
231 |
232 | summary_placeholders, update_ops, summary_op = setup_summary()
233 |
234 | # The replay memory
235 | replay_memory = create_memory(env)
236 |
237 | with tf.Session() as sess:
238 | sess.run(tf.global_variables_initializer())
239 |
240 | q_estimator.summary_writer.add_graph(sess.graph)
241 |
242 | saver = tf.train.Saver()
243 | # Load a previous checkpoint if we find one
244 | latest_checkpoint = tf.train.latest_checkpoint(network_path)
245 | if latest_checkpoint:
246 | print("Loading model checkpoint %s...\n" % latest_checkpoint)
247 | saver.restore(sess, latest_checkpoint)
248 |
249 | total_t = sess.run(tf.train.get_global_step())
250 |
251 | for episode in tqdm(range(n_episodes)):
252 | if total_t % save_interval == 0:
253 | # Save the current checkpoint
254 | saver.save(tf.get_default_session(), network_path)
255 |
256 | frame = env.reset()
257 | state = get_initial_state(frame)
258 |
259 | total_reward = 0
260 | total_loss = 0
261 | total_q_max = 0
262 |
263 | for duration in itertools.count():
264 | # Maybe update the target estimator
265 | if total_t % network_update_interval == 0:
266 | sess.run(copy_model)
267 |
268 | action = q_estimator.get_action(sess, state)
269 | frame, reward, terminal, _ = env.step(action)
270 |
271 | processed_frame = preprocess(frame)
272 | next_state = np.append(state[1:, :, :], processed_frame, axis=0)
273 |
274 | reward = np.clip(reward, -1, 1)
275 | replay_memory.append(Transition(state, action, reward, next_state, terminal))
276 | if len(replay_memory) > replay_memory_size:
277 | replay_memory.popleft()
278 |
279 | samples = random.sample(replay_memory, batch_size)
280 | states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(np.array, zip(*samples))
281 |
282 | # Calculate q values and targets (Double DQN)
283 | adapted_state = adapt_batch_state(next_states_batch)
284 |
285 | q_values_next = q_estimator.predict(sess, adapted_state)
286 | best_actions = np.argmax(q_values_next, axis=1)
287 | q_values_next_target = target_estimator.predict(sess, adapted_state)
288 | targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * gamma * q_values_next_target[np.arange(batch_size), best_actions]
289 |
290 | # Perform gradient descent update
291 | states_batch = adapt_batch_state(states_batch)
292 | loss = q_estimator.update(sess, states_batch, action_batch, targets_batch)
293 |
294 | total_q_max += np.max(q_values_next)
295 | total_loss += loss
296 | total_t += 1
297 | total_reward += reward
298 | if terminal:
299 | break
300 |
301 | stats = [total_reward, total_q_max / duration, duration, total_loss / duration]
302 | for i in range(len(stats)):
303 | sess.run(update_ops[i], feed_dict={
304 | summary_placeholders[i]: float(stats[i])
305 | })
306 | summary_str = sess.run(summary_op, )
307 | q_estimator.summary_writer.add_summary(summary_str, episode)
308 |
309 | env.env.ale.saveScreenPNG(six.b('%s/test_image_%05i.png' % (CHART_DIR, episode)))
310 |
311 | # Save the last checkpoint
312 | saver.save(tf.get_default_session(), network_path)
313 |
--------------------------------------------------------------------------------
/Chapter14/README.rst:
--------------------------------------------------------------------------------
1 | ==========
2 | Chapter 12
3 | ==========
4 |
5 | Support code for *Chapter 12: Big(ger) Data*
6 |
7 | Data
8 | ----
9 |
10 | This chapter relies only on the image dataset that is packaged with the
11 | repository at ``../SimpleImageDataset/``.
12 |
13 | Scripts
14 | -------
15 |
16 | chapter.py
17 | Code as written in the book
18 | jugfile.py
19 | Example jugfile
20 | image-classification.py
21 | Jugfile implementation of image classification from Chapter 10
22 |
23 | setup-aws.txt
24 | Commands to setup Amazon WebServices machine
25 | run-jugfile.sh
26 | Wrapper script to run jug file on jugfile.py
27 | run-image-classification.sh
28 | Wrapper script to run jug file on image-classification.py
29 |
--------------------------------------------------------------------------------
/Chapter14/chapter.py:
--------------------------------------------------------------------------------
1 | from jug import TaskGenerator
2 | from glob import glob
3 | import mahotas as mh
4 | @TaskGenerator
5 | def compute_texture(im):
6 | from features import texture
7 | imc = mh.imread(im)
8 | return texture(mh.colors.rgb2gray(imc))
9 |
10 | @TaskGenerator
11 | def chist_file(fname):
12 | from features import chist
13 | im = mh.imread(fname)
14 | return chist(im)
15 |
16 | import numpy as np
17 | to_array = TaskGenerator(np.array)
18 | hstack = TaskGenerator(np.hstack)
19 |
20 | haralicks = []
21 | chists = []
22 | labels = []
23 |
24 | # Change this variable to point to
25 | # the location of the dataset is on disk
26 | basedir = '../SimpleImageDataset/'
27 | # Use glob to get all the images
28 | images = glob('{}/*.jpg'.format(basedir))
29 |
30 | for fname in sorted(images):
31 | haralicks.append(compute_texture(fname))
32 | chists.append(chist_file(fname))
33 | # The class is encoded in the filename as xxxx00.jpg
34 | labels.append(fname[:-len('00.jpg')])
35 |
36 | haralicks = to_array(haralicks)
37 | chists = to_array(chists)
38 | labels = to_array(labels)
39 |
40 | @TaskGenerator
41 | def accuracy(features, labels):
42 | from sklearn.linear_model import LogisticRegression
43 | from sklearn.pipeline import Pipeline
44 | from sklearn.preprocessing import StandardScaler
45 | from sklearn import cross_validation
46 |
47 | clf = Pipeline([('preproc', StandardScaler()),
48 | ('classifier', LogisticRegression())])
49 | cv = cross_validation.LeaveOneOut(len(features))
50 | scores = cross_validation.cross_val_score(
51 | clf, features, labels, cv=cv)
52 | return scores.mean()
53 | scores_base = accuracy(haralicks, labels)
54 | scores_chist = accuracy(chists, labels)
55 |
56 | combined = hstack([chists, haralicks])
57 | scores_combined = accuracy(combined, labels)
58 |
59 | @TaskGenerator
60 | def print_results(scores):
61 | with open('results.image.txt', 'w') as output:
62 | for k,v in scores:
63 | output.write('Accuracy [{}]: {:.1%}\n'.format(
64 | k, v.mean()))
65 |
66 | print_results([
67 | ('base', scores_base),
68 | ('chists', scores_chist),
69 | ('combined' , scores_combined),
70 | ])
71 |
72 | @TaskGenerator
73 | def compute_lbp(fname):
74 | from mahotas.features import lbp
75 | imc = mh.imread(fname)
76 | im = mh.colors.rgb2grey(imc)
77 | return lbp(im, radius=8, points=6)
78 |
79 | lbps = []
80 | for fname in sorted(images):
81 | # the rest of the loop as before
82 | lbps.append(compute_lbp(fname))
83 | lbps = to_array(lbps)
84 |
85 | scores_lbps = accuracy(lbps, labels)
86 | combined_all = hstack([chists, haralicks, lbps])
87 | scores_combined_all = accuracy(combined_all, labels)
88 |
89 | print_results([
90 | ('base', scores_base),
91 | ('chists', scores_chist),
92 | ('lbps', scores_lbps),
93 | ('combined' , scores_combined),
94 | ('combined_all' , scores_combined_all),
95 | ])
96 |
--------------------------------------------------------------------------------
/Chapter14/features.py:
--------------------------------------------------------------------------------
1 | # This code is supporting material for the book
2 | # Building Machine Learning Systems with Python
3 | # by Willi Richert and Luis Pedro Coelho
4 | # published by PACKT Publishing
5 | #
6 | # It is made available under the MIT License
7 |
8 | import numpy as np
9 | import mahotas as mh
10 |
11 |
12 | def edginess_sobel(image):
13 | '''Measure the "edginess" of an image
14 |
15 | image should be a 2d numpy array (an image)
16 |
17 | Returns a floating point value which is higher the "edgier" the image is.
18 |
19 | '''
20 | edges = mh.sobel(image, just_filter=True)
21 | edges = edges.ravel()
22 | return np.sqrt(np.dot(edges, edges))
23 |
24 | def texture(im):
25 | '''Compute features for an image
26 |
27 | Parameters
28 | ----------
29 | im : ndarray
30 |
31 | Returns
32 | -------
33 | fs : ndarray
34 | 1-D array of features
35 | '''
36 | im = im.astype(np.uint8)
37 | return mh.features.haralick(im).ravel()
38 |
39 |
40 | def color_histogram(im):
41 | '''Compute color histogram of input image
42 |
43 | Parameters
44 | ----------
45 | im : ndarray
46 | should be an RGB image
47 |
48 | Returns
49 | -------
50 | c : ndarray
51 | 1-D array of histogram values
52 | '''
53 |
54 | # Downsample pixel values:
55 | im = im // 64
56 |
57 | # We can also implement the following by using np.histogramdd
58 | # im = im.reshape((-1,3))
59 | # bins = [np.arange(5), np.arange(5), np.arange(5)]
60 | # hist = np.histogramdd(im, bins=bins)[0]
61 | # hist = hist.ravel()
62 |
63 | # Separate RGB channels:
64 | r,g,b = im.transpose((2,0,1))
65 |
66 | pixels = 1 * r + 4 * g + 16 * b
67 | hist = np.bincount(pixels.ravel(), minlength=64)
68 | hist = hist.astype(float)
69 | return np.log1p(hist)
70 |
71 |
--------------------------------------------------------------------------------
/Chapter14/image-classification.py:
--------------------------------------------------------------------------------
1 | # This code is supporting material for the book
2 | # Building Machine Learning Systems with Python
3 | # by Willi Richert and Luis Pedro Coelho
4 | # published by PACKT Publishing
5 | #
6 | # It is made available under the MIT License
7 |
8 | import mahotas as mh
9 | import numpy as np
10 | from glob import glob
11 | from jug import TaskGenerator
12 |
13 | # We need to use the `features` module from chapter 10.
14 | from sys import path
15 | path.append('../ch10')
16 |
17 |
18 | # This is the jug-enabled version of the script ``figure18.py`` in Chapter 10
19 |
20 | basedir = '../SimpleImageDataset/'
21 |
22 | @TaskGenerator
23 | def compute_texture(im):
24 | '''Compute features for an image
25 |
26 | Parameters
27 | ----------
28 | im : str
29 | filepath for image to process
30 |
31 | Returns
32 | -------
33 | fs : ndarray
34 | 1-D array of features
35 | '''
36 | from features import texture
37 | imc = mh.imread(im)
38 | return texture(mh.colors.rgb2grey(imc))
39 |
40 | @TaskGenerator
41 | def chist(fname):
42 | from features import color_histogram
43 | im = mh.imread(fname)
44 | return color_histogram(im)
45 |
46 | @TaskGenerator
47 | def compute_lbp(fname):
48 | from mahotas.features import lbp
49 | imc = mh.imread(fname)
50 | im = mh.colors.rgb2grey(imc)
51 | return lbp(im, radius=8, points=6)
52 |
53 |
54 | @TaskGenerator
55 | def accuracy(features, labels):
56 | from sklearn.linear_model import LogisticRegression
57 | from sklearn.pipeline import Pipeline
58 | from sklearn.preprocessing import StandardScaler
59 | from sklearn import cross_validation
60 | # We use logistic regression because it is very fast.
61 | # Feel free to experiment with other classifiers
62 | clf = Pipeline([('preproc', StandardScaler()),
63 | ('classifier', LogisticRegression())])
64 | cv = cross_validation.LeaveOneOut(len(features))
65 | scores = cross_validation.cross_val_score(
66 | clf, features, labels, cv=cv)
67 | return scores.mean()
68 |
69 |
70 | @TaskGenerator
71 | def print_results(scores):
72 | with open('results.image.txt', 'w') as output:
73 | for k,v in scores:
74 | output.write('Accuracy (LOO x-val) with Logistic Regression [{0}]: {1:.1%}\n'.format(
75 | k, v.mean()))
76 |
77 |
78 | to_array = TaskGenerator(np.array)
79 | hstack = TaskGenerator(np.hstack)
80 |
81 | haralicks = []
82 | chists = []
83 | lbps = []
84 | labels = []
85 |
86 | # Use glob to get all the images
87 | images = glob('{0}/*.jpg'.format(basedir))
88 | for fname in sorted(images):
89 | haralicks.append(compute_texture(fname))
90 | chists.append(chist(fname))
91 | lbps.append(compute_lbp(fname))
92 | labels.append(fname[:-len('00.jpg')]) # The class is encoded in the filename as xxxx00.jpg
93 |
94 | haralicks = to_array(haralicks)
95 | chists = to_array(chists)
96 | lbps = to_array(lbps)
97 | labels = to_array(labels)
98 |
99 | scores_base = accuracy(haralicks, labels)
100 | scores_chist = accuracy(chists, labels)
101 | scores_lbps = accuracy(lbps, labels)
102 |
103 | combined = hstack([chists, haralicks])
104 | scores_combined = accuracy(combined, labels)
105 |
106 | combined_all = hstack([chists, haralicks, lbps])
107 | scores_combined_all = accuracy(combined_all, labels)
108 |
109 | print_results([
110 | ('base', scores_base),
111 | ('chists', scores_chist),
112 | ('lbps', scores_lbps),
113 | ('combined' , scores_combined),
114 | ('combined_all' , scores_combined_all),
115 | ])
116 |
117 |
--------------------------------------------------------------------------------
/Chapter14/jugfile.py:
--------------------------------------------------------------------------------
1 | # This code is supporting material for the book
2 | # Building Machine Learning Systems with Python
3 | # by Willi Richert and Luis Pedro Coelho
4 | # published by PACKT Publishing
5 | #
6 | # It is made available under the MIT License
7 |
8 | from jug import TaskGenerator
9 | from time import sleep
10 |
11 |
12 | @TaskGenerator
13 | def double(x):
14 | sleep(4)
15 | return 2 * x
16 |
17 |
18 | @TaskGenerator
19 | def add(a, b):
20 | return a + b
21 |
22 |
23 | @TaskGenerator
24 | def print_final_result(oname, value):
25 | with open(oname, 'w') as output:
26 | output.write("Final result: {0}\n".format(value))
27 |
28 | input = 2
29 | y = double(input)
30 | z = double(y)
31 |
32 | y2 = double(7)
33 | z2 = double(y2)
34 | print_final_result('output.txt', add(z, z2))
35 |
--------------------------------------------------------------------------------
/Chapter14/run-image-classification.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | jug execute image-classification.py
4 |
--------------------------------------------------------------------------------
/Chapter14/run-jugfile.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | jug execute
4 |
5 |
--------------------------------------------------------------------------------
/Chapter14/setup-aws.txt:
--------------------------------------------------------------------------------
1 | sudo yum update
2 | sudo yum -y install python-devel python-pip numpy scipy python-matplotlib
3 | sudo yum -y install gcc-c++
4 | sudo yum -y install git
5 | sudo pip-python install -U pip
6 | sudo pip install scikit-learn jug mahotas
7 |
8 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Packt
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Building Machine Learning Systems with Python - Third edition
2 |
3 |
4 |
5 | This is the code repository for [Building Machine Learning Systems with Python - Third edition](https://www.packtpub.com/big-data-and-business-intelligence/building-machine-learning-systems-python-third-edition?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781788623223), published by Packt.
6 |
7 | **Explore machine learning and deep learning techniques for building intelligent systems using scikit-learn and TensorFlow**
8 |
9 | ## What is this book about?
10 | Machine learning allows systems to learn without being explicitly programmed. Python is one of the most popular languages used to develop machine learning applications which take advantage of its extensive library support. This third edition of Building Machine Learning Systems with Python addresses recent developments in the field, by covering the most used datasets and libraries to help you build practical machine learning systems.
11 |
12 | This book covers the following exciting features:
13 | * Build a classification system that can be applied to text, image, and sound
14 | * Employ Amazon Web Services (AWS) to run analysis on the cloud
15 | * Solve problems related to regression using TensorFlow
16 | * Recommend products to users based on their past purchases
17 | * Explore the steps required to add collaborative filtering using TensorFlow
18 |
19 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/1788623223) today!
20 |
21 |
23 |
24 | ## Instructions and Navigations
25 | All of the code is organized into folders. For example, Chapter01.
26 |
27 | The code will look like the following:
28 | ```
29 | def fetch_posts(fn):
30 | for line in open(fn, "r"):
31 | post_id, text = line.split("\t")
32 | yield int(post_id), text.strip()
33 |
34 | ```
35 |
36 | **Following is what you need for this book:**
37 | Building Machine Learning Systems with Python is for data scientists, machine learning developers, and Python developers who want to learn how to build increasingly complex machine learning systems. You will use Python's machine learning capabilities to develop effective solutions. Prior knowledge of Python programming is expected.
38 |
39 | With the following software and hardware list you can run all code files present in the book (Chapter 1-14).
40 |
41 | ### Software and Hardware List
42 |
43 | | Chapter | Software required | OS required |
44 | | -------- | ------------------------------------------------------| -----------------------------------|
45 | | 1-14 | Python 3, NumPy, SciPy, scikit-learn (latest version) | Ubuntu/Linux, macOS or Windows |
46 |
47 |
48 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it](https://www.packtpub.com/sites/default/files/downloads/BuildingMachineLearningSystemswithPythonThirdedition_ColorImages.pdf).
49 |
50 | ### Related products
51 | * Mastering Machine Learning Algorithms [[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/mastering-machine-learning-algorithms?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781788621113) [[Amazon]](https://www.amazon.com/dp/1788621115)
52 |
53 | * Machine Learning Solutions [[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/machine-learning-solutions?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781788390040) [[Amazon]](https://www.amazon.com/dp/1788390040)
54 |
55 | ## Get to Know the Authors
56 | **Luis Pedro Coelho**
57 | is a computational biologist who analyzes DNA from microbial communities to characterize their behavior. He has also worked extensively in bioimage informatics―the application of machine learning techniques for the analysis of images of biological specimens. His main focus is on the processing and integration of large-scale datasets. He has a PhD from Carnegie Mellon University and has authored several scientific publications. In 2004, he began developing in Python and has contributed to several open source libraries. He is currently a faculty member at Fudan University in Shanghai.
58 |
59 | **Willi Richert**
60 | has a PhD in machine learning/robotics, where he has used reinforcement learning, hidden Markov models, and Bayesian networks to let heterogeneous robots learn by imitation. Now at Microsoft, he is involved in various machine learning areas, such as deep learning, active learning, or statistical machine translation. Willi started as a child with BASIC on his Commodore 128. Later, he discovered Turbo Pascal, then Java, then C++—only to finally arrive at his true love: Python.
61 |
62 | **Matthieu Brucher**
63 | is a computer scientist who specializes in high-performance computing and computational modeling and currently works for JPMorgan in their quantitative research branch. He is also the lead developer of Audio ToolKit, a library for real-time audio signal processing. He has a PhD in machine learning and signals processing from the University of Strasbourg, two Master of Science degrees—one in digital electronics and signal processing and another in automation – from the University of Paris XI and Supelec, as well as a Master of Music degree from Bath Spa University.
64 |
65 | ## Other books by the authors
66 | * [Building Machine Learning Systems with Python](https://www.packtpub.com/big-data-and-business-intelligence/building-machine-learning-systems-python?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781782161400)
67 | * [Building Machine Learning Systems with Python - Second Edition](https://www.packtpub.com/big-data-and-business-intelligence/building-machine-learning-systems-python-second-edition?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781784392772)
68 |
69 |
70 | ### Suggestions and Feedback
71 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSdy7dATC6QmEL81FIUuymZ0Wy9vH1jHkvpY57OiMeKGqib_Ow/viewform) if you have any feedback or suggestions.
72 |
73 |
--------------------------------------------------------------------------------
/SimpleImageDataset/building00.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building00.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building01.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building02.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building03.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building04.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building05.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building06.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building07.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building08.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building09.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building10.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building11.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building12.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building13.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building14.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building15.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building16.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building17.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building18.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building19.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building20.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building21.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building22.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building22.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building23.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building23.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building24.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building24.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building25.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building26.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building26.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building27.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building27.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building28.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building28.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/building29.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building29.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene00.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene00.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene01.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene02.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene03.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene04.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene05.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene06.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene07.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene08.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene09.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene10.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene11.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene12.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene13.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene14.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene15.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene16.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene17.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene18.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene19.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene20.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene21.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene22.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene22.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene23.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene23.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene24.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene24.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene25.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene26.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene26.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene27.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene27.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene28.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene28.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/scene29.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene29.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text00.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text00.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text01.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text02.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text03.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text04.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text05.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text06.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text07.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text08.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text09.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text10.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text11.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text12.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text13.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text14.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text15.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text16.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text17.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text18.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text19.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text20.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text21.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text22.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text22.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text23.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text23.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text24.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text24.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text25.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text26.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text26.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text27.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text27.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text28.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text28.jpg
--------------------------------------------------------------------------------
/SimpleImageDataset/text29.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text29.jpg
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: BMLS3
2 | channels:
3 | - conda-forge
4 | - defaults
5 | dependencies:
6 | - bottle=0.12.9=py36_0
7 | - jug=1.6.4=py_0
8 | - pyyaml=3.12=py36_1
9 | - yaml=0.1.6=0
10 | - bleach=1.5.0=py36_0
11 | - cairo=1.14.8=0
12 | - certifi=2016.2.28=py36_0
13 | - cycler=0.10.0=py36_0
14 | - dbus=1.10.20=0
15 | - decorator=4.1.2=py36_0
16 | - entrypoints=0.2.3=py36_0
17 | - expat=2.1.0=0
18 | - fontconfig=2.12.1=3
19 | - freetype=2.5.5=2
20 | - glib=2.50.2=1
21 | - graphviz=2.38.0=5
22 | - gst-plugins-base=1.8.0=0
23 | - gstreamer=1.8.0=0
24 | - harfbuzz=0.9.39=2
25 | - html5lib=0.9999999=py36_0
26 | - icu=54.1=0
27 | - ipykernel=4.6.1=py36_0
28 | - ipython=6.1.0=py36_0
29 | - ipython_genutils=0.2.0=py36_0
30 | - ipywidgets=6.0.0=py36_0
31 | - jbig=2.1=0
32 | - jedi=0.10.2=py36_2
33 | - jinja2=2.9.6=py36_0
34 | - jpeg=9b=0
35 | - jsonschema=2.6.0=py36_0
36 | - jupyter=1.0.0=py36_3
37 | - jupyter_client=5.1.0=py36_0
38 | - jupyter_console=5.2.0=py36_0
39 | - jupyter_core=4.3.0=py36_0
40 | - libffi=3.2.1=1
41 | - libgcc=5.2.0=0
42 | - libgfortran=3.0.0=1
43 | - libiconv=1.14=0
44 | - libpng=1.6.30=1
45 | - libsodium=1.0.10=0
46 | - libtiff=4.0.6=3
47 | - libtool=2.4.2=0
48 | - libxcb=1.12=1
49 | - libxml2=2.9.4=0
50 | - markupsafe=1.0=py36_0
51 | - matplotlib=2.0.2=np113py36_0
52 | - mistune=0.7.4=py36_0
53 | - mkl=2017.0.3=0
54 | - nbconvert=5.2.1=py36_0
55 | - nbformat=4.4.0=py36_0
56 | - notebook=5.0.0=py36_0
57 | - numpy=1.13.1=py36_0
58 | - openssl=1.0.2l=0
59 | - pandocfilters=1.4.2=py36_0
60 | - pango=1.40.3=1
61 | - path.py=10.3.1=py36_0
62 | - pcre=8.39=1
63 | - pexpect=4.2.1=py36_0
64 | - pickleshare=0.7.4=py36_0
65 | - pip=9.0.1=py36_1
66 | - pixman=0.34.0=0
67 | - prompt_toolkit=1.0.15=py36_0
68 | - ptyprocess=0.5.2=py36_0
69 | - pygments=2.2.0=py36_0
70 | - pyparsing=2.2.0=py36_0
71 | - pyqt=5.6.0=py36_2
72 | - python=3.6.2=0
73 | - python-dateutil=2.6.1=py36_0
74 | - python-graphviz=0.5.2=py36_0
75 | - pytz=2017.2=py36_0
76 | - pyzmq=16.0.2=py36_0
77 | - qt=5.6.2=5
78 | - qtconsole=4.3.1=py36_0
79 | - readline=6.2=2
80 | - scikit-learn=0.19.0=np113py36_0
81 | - scipy=0.19.1=np113py36_0
82 | - setuptools=36.4.0=py36_1
83 | - simplegeneric=0.8.1=py36_1
84 | - sip=4.18=py36_0
85 | - six=1.10.0=py36_0
86 | - sqlite=3.13.0=0
87 | - terminado=0.6=py36_0
88 | - testpath=0.3.1=py36_0
89 | - tk=8.5.18=0
90 | - tornado=4.5.2=py36_0
91 | - traitlets=4.3.2=py36_0
92 | - wcwidth=0.1.7=py36_0
93 | - wheel=0.29.0=py36_0
94 | - widgetsnbextension=3.0.2=py36_0
95 | - xz=5.2.3=0
96 | - zeromq=4.1.5=0
97 | - zlib=1.2.11=0
98 | - pip:
99 | - ipython-genutils==0.2.0
100 | - jupyter-client==5.1.0
101 | - jupyter-console==5.2.0
102 | - jupyter-core==4.3.0
103 | - prompt-toolkit==1.0.15
104 | prefix: /home/luispedro/.conda/envs/BMLS3
105 |
106 |
--------------------------------------------------------------------------------