├── Chapter01 ├── chapter_01.ipynb └── data │ └── web_traffic.tsv ├── Chapter02 ├── README.rst ├── chapter_02.ipynb ├── data │ └── seeds.tsv ├── load.py └── tests │ └── test_load.py ├── Chapter03 ├── chapter_03.ipynb └── data │ ├── .gitignore │ └── download.sh ├── Chapter04 ├── chapter_04.ipynb └── data │ └── download.sh ├── Chapter05 └── chapter_05.ipynb ├── Chapter06 ├── chapter_06.ipynb └── data │ └── toy │ ├── 01.txt │ ├── 02.txt │ ├── 03.txt │ ├── 04.txt │ └── 05.txt ├── Chapter07 ├── README.rst ├── Recommendations.ipynb ├── apriori │ ├── .gitignore │ ├── apriori.py │ ├── apriori_example.py │ ├── apriori_naive.py │ └── download.sh ├── data │ ├── .gitignore │ └── download.sh ├── load_ml100k.py └── stacked.py ├── Chapter08 └── chapter_08.ipynb ├── Chapter09 ├── chapter_09.ipynb ├── data │ ├── corpus.csv │ ├── missing.tsv │ └── not_authorized.tsv └── twitterauth.py ├── Chapter10 ├── README.rst ├── Topic modeling.ipynb ├── data │ ├── .gitignore │ ├── download_ap.sh │ ├── download_wp.sh │ └── preprocess-wikidata.sh ├── wikitopics_create.py └── wikitopics_create_hdp.py ├── Chapter11 └── chapter_11.ipynb ├── Chapter12 ├── Computer Vision.ipynb ├── README.rst ├── ch12_3rd │ └── chapter_12.ipynb ├── download.sh ├── forest.jpeg └── scene00.jpg ├── Chapter13 ├── chapter_13.ipynb ├── simple_breakout.py └── tf_breakout.py ├── Chapter14 ├── README.rst ├── chapter.py ├── features.py ├── image-classification.py ├── jugfile.py ├── run-image-classification.sh ├── run-jugfile.sh └── setup-aws.txt ├── LICENSE ├── README.md ├── SimpleImageDataset ├── building00.jpg ├── building01.jpg ├── building02.jpg ├── building03.jpg ├── building04.jpg ├── building05.jpg ├── building06.jpg ├── building07.jpg ├── building08.jpg ├── building09.jpg ├── building10.jpg ├── building11.jpg ├── building12.jpg ├── building13.jpg ├── building14.jpg ├── building15.jpg ├── building16.jpg ├── building17.jpg ├── building18.jpg ├── building19.jpg ├── building20.jpg ├── building21.jpg ├── building22.jpg ├── building23.jpg ├── building24.jpg ├── building25.jpg ├── building26.jpg ├── building27.jpg ├── building28.jpg ├── building29.jpg ├── scene00.jpg ├── scene01.jpg ├── scene02.jpg ├── scene03.jpg ├── scene04.jpg ├── scene05.jpg ├── scene06.jpg ├── scene07.jpg ├── scene08.jpg ├── scene09.jpg ├── scene10.jpg ├── scene11.jpg ├── scene12.jpg ├── scene13.jpg ├── scene14.jpg ├── scene15.jpg ├── scene16.jpg ├── scene17.jpg ├── scene18.jpg ├── scene19.jpg ├── scene20.jpg ├── scene21.jpg ├── scene22.jpg ├── scene23.jpg ├── scene24.jpg ├── scene25.jpg ├── scene26.jpg ├── scene27.jpg ├── scene28.jpg ├── scene29.jpg ├── text00.jpg ├── text01.jpg ├── text02.jpg ├── text03.jpg ├── text04.jpg ├── text05.jpg ├── text06.jpg ├── text07.jpg ├── text08.jpg ├── text09.jpg ├── text10.jpg ├── text11.jpg ├── text12.jpg ├── text13.jpg ├── text14.jpg ├── text15.jpg ├── text16.jpg ├── text17.jpg ├── text18.jpg ├── text19.jpg ├── text20.jpg ├── text21.jpg ├── text22.jpg ├── text23.jpg ├── text24.jpg ├── text25.jpg ├── text26.jpg ├── text27.jpg ├── text28.jpg └── text29.jpg └── environment.yml /Chapter01/data/web_traffic.tsv: -------------------------------------------------------------------------------- 1 | 1.000000 2273.331055 2 | 2.000000 1657.255493 3 | 3.000000 nan 4 | 4.000000 1366.846436 5 | 5.000000 1489.234375 6 | 6.000000 1338.020020 7 | 7.000000 1884.647339 8 | 8.000000 2284.754150 9 | 9.000000 1335.810913 10 | 10.000000 1025.832397 11 | 11.000000 1140.241089 12 | 12.000000 1478.341797 13 | 13.000000 1204.218384 14 | 14.000000 1312.506348 15 | 15.000000 1300.218872 16 | 16.000000 1495.334717 17 | 17.000000 1161.070801 18 | 18.000000 1366.701904 19 | 19.000000 1273.441162 20 | 20.000000 1246.935425 21 | 21.000000 1072.582886 22 | 22.000000 1877.628296 23 | 23.000000 1403.939697 24 | 24.000000 nan 25 | 25.000000 926.635559 26 | 26.000000 1534.334595 27 | 27.000000 2105.287109 28 | 28.000000 2114.336182 29 | 29.000000 1994.525146 30 | 30.000000 1046.091919 31 | 31.000000 2091.849854 32 | 32.000000 2227.968018 33 | 33.000000 1414.702515 34 | 34.000000 1719.032471 35 | 35.000000 1722.046875 36 | 36.000000 1293.547974 37 | 37.000000 1840.270752 38 | 38.000000 2542.300781 39 | 39.000000 1609.983643 40 | 40.000000 2456.552246 41 | 41.000000 1929.899170 42 | 42.000000 1767.186646 43 | 43.000000 1204.809082 44 | 44.000000 1762.485840 45 | 45.000000 1724.805054 46 | 46.000000 2161.871338 47 | 47.000000 809.148987 48 | 48.000000 1323.292603 49 | 49.000000 nan 50 | 50.000000 1810.368774 51 | 51.000000 1934.512695 52 | 52.000000 1352.385010 53 | 53.000000 2014.328369 54 | 54.000000 1208.587036 55 | 55.000000 2171.869629 56 | 56.000000 1701.173584 57 | 57.000000 1900.717651 58 | 58.000000 1758.676025 59 | 59.000000 1477.506836 60 | 60.000000 1922.072266 61 | 61.000000 1972.815430 62 | 62.000000 1811.206665 63 | 63.000000 1367.138306 64 | 64.000000 1775.942993 65 | 65.000000 1689.120850 66 | 66.000000 1707.929565 67 | 67.000000 1354.767578 68 | 68.000000 1318.591553 69 | 69.000000 1513.624146 70 | 70.000000 2430.133789 71 | 71.000000 1788.733276 72 | 72.000000 1381.874512 73 | 73.000000 1358.690796 74 | 74.000000 991.249329 75 | 75.000000 1586.527954 76 | 76.000000 2058.635498 77 | 77.000000 1692.005859 78 | 78.000000 1459.202759 79 | 79.000000 1202.182495 80 | 80.000000 1950.823730 81 | 81.000000 1494.491699 82 | 82.000000 1654.861328 83 | 83.000000 1218.084351 84 | 84.000000 1457.957764 85 | 85.000000 1179.684082 86 | 86.000000 1484.483154 87 | 87.000000 2731.174561 88 | 88.000000 1414.573853 89 | 89.000000 1061.369995 90 | 90.000000 1573.748169 91 | 91.000000 1260.964722 92 | 92.000000 1215.403687 93 | 93.000000 981.535828 94 | 94.000000 1345.459351 95 | 95.000000 2158.874512 96 | 96.000000 nan 97 | 97.000000 730.229004 98 | 98.000000 1033.958618 99 | 99.000000 1627.994995 100 | 100.000000 1155.129639 101 | 101.000000 1305.006836 102 | 102.000000 1444.623901 103 | 103.000000 2242.751709 104 | 104.000000 1843.219116 105 | 105.000000 1211.218140 106 | 106.000000 1384.472168 107 | 107.000000 1313.780762 108 | 108.000000 1509.269897 109 | 109.000000 1796.398926 110 | 110.000000 1265.616333 111 | 111.000000 1089.800781 112 | 112.000000 2159.838135 113 | 113.000000 1166.384277 114 | 114.000000 1391.697388 115 | 115.000000 1445.436523 116 | 116.000000 1196.357056 117 | 117.000000 1049.317017 118 | 118.000000 1999.745605 119 | 119.000000 473.342102 120 | 120.000000 1285.387329 121 | 121.000000 1737.291260 122 | 122.000000 1534.551758 123 | 123.000000 2636.690674 124 | 124.000000 1372.776123 125 | 125.000000 1325.509033 126 | 126.000000 833.302063 127 | 127.000000 1199.291992 128 | 128.000000 2431.282959 129 | 129.000000 1739.882080 130 | 130.000000 2121.373779 131 | 131.000000 1726.600342 132 | 132.000000 1343.868774 133 | 133.000000 1072.934570 134 | 134.000000 1387.351807 135 | 135.000000 1054.316284 136 | 136.000000 1051.666626 137 | 137.000000 1270.661377 138 | 138.000000 1857.948853 139 | 139.000000 1436.369629 140 | 140.000000 2016.855469 141 | 141.000000 1352.831787 142 | 142.000000 909.600891 143 | 143.000000 1761.136353 144 | 144.000000 1009.373230 145 | 145.000000 2035.223267 146 | 146.000000 1534.073975 147 | 147.000000 1708.339966 148 | 148.000000 734.669800 149 | 149.000000 1456.019043 150 | 150.000000 1332.946411 151 | 151.000000 1605.986450 152 | 152.000000 1065.177856 153 | 153.000000 1291.167480 154 | 154.000000 1370.269043 155 | 155.000000 nan 156 | 156.000000 1928.732788 157 | 157.000000 2249.301270 158 | 158.000000 988.290894 159 | 159.000000 1024.199097 160 | 160.000000 875.135132 161 | 161.000000 1568.285400 162 | 162.000000 1031.664551 163 | 163.000000 1079.630859 164 | 164.000000 1086.948853 165 | 165.000000 1152.780884 166 | 166.000000 961.387634 167 | 167.000000 1232.227417 168 | 168.000000 2189.118408 169 | 169.000000 1181.132080 170 | 170.000000 1477.397705 171 | 171.000000 1613.063110 172 | 172.000000 922.071716 173 | 173.000000 2432.531006 174 | 174.000000 1651.096313 175 | 175.000000 1078.927734 176 | 176.000000 825.445740 177 | 177.000000 1579.604736 178 | 178.000000 1873.424316 179 | 179.000000 1671.580200 180 | 180.000000 2454.900146 181 | 181.000000 nan 182 | 182.000000 nan 183 | 183.000000 1620.557739 184 | 184.000000 896.071289 185 | 185.000000 1950.104126 186 | 186.000000 2299.738281 187 | 187.000000 2165.413818 188 | 188.000000 1108.689819 189 | 189.000000 1732.473877 190 | 190.000000 1602.138550 191 | 191.000000 1685.260254 192 | 192.000000 2026.701294 193 | 193.000000 1690.662964 194 | 194.000000 1737.694214 195 | 195.000000 1475.258423 196 | 196.000000 1770.715698 197 | 197.000000 1349.187500 198 | 198.000000 1571.474609 199 | 199.000000 1862.707397 200 | 200.000000 1459.782349 201 | 201.000000 2284.336426 202 | 202.000000 1553.837158 203 | 203.000000 2323.653320 204 | 204.000000 1204.110352 205 | 205.000000 1769.132324 206 | 206.000000 2186.001709 207 | 207.000000 1331.175537 208 | 208.000000 1781.712402 209 | 209.000000 1243.196533 210 | 210.000000 1287.143433 211 | 211.000000 nan 212 | 212.000000 1502.286255 213 | 213.000000 877.458313 214 | 214.000000 1522.805054 215 | 215.000000 2611.905029 216 | 216.000000 1949.547485 217 | 217.000000 1707.867432 218 | 218.000000 1336.154785 219 | 219.000000 2212.902832 220 | 220.000000 1358.864380 221 | 221.000000 2502.499023 222 | 222.000000 1765.352539 223 | 223.000000 1529.414673 224 | 224.000000 1422.890625 225 | 225.000000 1950.468262 226 | 226.000000 2156.668945 227 | 227.000000 1504.507324 228 | 228.000000 1659.369995 229 | 229.000000 1033.489746 230 | 230.000000 1538.519165 231 | 231.000000 1345.894897 232 | 232.000000 2022.561157 233 | 233.000000 2036.099121 234 | 234.000000 2111.207275 235 | 235.000000 1589.440796 236 | 236.000000 1667.526733 237 | 237.000000 1064.860840 238 | 238.000000 1458.587402 239 | 239.000000 2401.041992 240 | 240.000000 1449.993530 241 | 241.000000 2407.700684 242 | 242.000000 1832.315430 243 | 243.000000 1424.621704 244 | 244.000000 1756.471436 245 | 245.000000 1642.072632 246 | 246.000000 1429.027832 247 | 247.000000 1928.955200 248 | 248.000000 1620.687744 249 | 249.000000 1362.290161 250 | 250.000000 1275.254883 251 | 251.000000 1301.666138 252 | 252.000000 998.833984 253 | 253.000000 1163.223877 254 | 254.000000 1480.306641 255 | 255.000000 2131.771240 256 | 256.000000 1833.486206 257 | 257.000000 1161.478271 258 | 258.000000 1168.261841 259 | 259.000000 1569.966431 260 | 260.000000 1675.275146 261 | 261.000000 966.771240 262 | 262.000000 1395.518433 263 | 263.000000 1638.024780 264 | 264.000000 1712.951782 265 | 265.000000 1799.802979 266 | 266.000000 1916.816895 267 | 267.000000 1895.225952 268 | 268.000000 1008.570923 269 | 269.000000 1002.869019 270 | 270.000000 1962.243896 271 | 271.000000 1729.660400 272 | 272.000000 732.257080 273 | 273.000000 2166.750244 274 | 274.000000 1060.113159 275 | 275.000000 1519.845337 276 | 276.000000 1708.907227 277 | 277.000000 1227.915405 278 | 278.000000 1085.683716 279 | 279.000000 1045.782104 280 | 280.000000 1720.696899 281 | 281.000000 1494.705444 282 | 282.000000 961.153259 283 | 283.000000 1420.741089 284 | 284.000000 1318.101196 285 | 285.000000 740.344238 286 | 286.000000 879.328247 287 | 287.000000 1358.047974 288 | 288.000000 2318.087402 289 | 289.000000 1545.019775 290 | 290.000000 1582.846069 291 | 291.000000 1693.926636 292 | 292.000000 1152.875244 293 | 293.000000 1469.117554 294 | 294.000000 2005.669189 295 | 295.000000 1113.713867 296 | 296.000000 1281.609741 297 | 297.000000 1500.906860 298 | 298.000000 1409.276733 299 | 299.000000 943.180420 300 | 300.000000 791.694214 301 | 301.000000 704.541565 302 | 302.000000 1585.458862 303 | 303.000000 1004.198181 304 | 304.000000 796.337952 305 | 305.000000 1000.802917 306 | 306.000000 2156.751465 307 | 307.000000 638.728699 308 | 308.000000 1391.960815 309 | 309.000000 1644.898071 310 | 310.000000 1398.569580 311 | 311.000000 967.325500 312 | 312.000000 1578.804077 313 | 313.000000 1068.719360 314 | 314.000000 1418.943726 315 | 315.000000 1784.473877 316 | 316.000000 1952.727905 317 | 317.000000 997.095337 318 | 318.000000 1485.097778 319 | 319.000000 1419.496948 320 | 320.000000 1534.019897 321 | 321.000000 1633.627075 322 | 322.000000 1012.951843 323 | 323.000000 2085.274414 324 | 324.000000 3101.601562 325 | 325.000000 1858.955200 326 | 326.000000 983.584900 327 | 327.000000 2169.784180 328 | 328.000000 2086.046875 329 | 329.000000 2204.625488 330 | 330.000000 1578.105591 331 | 331.000000 1526.881104 332 | 332.000000 1725.510986 333 | 333.000000 937.253723 334 | 334.000000 1678.458130 335 | 335.000000 1572.530029 336 | 336.000000 1188.498413 337 | 337.000000 1535.775879 338 | 338.000000 1335.063721 339 | 339.000000 1702.118652 340 | 340.000000 1927.334839 341 | 341.000000 1652.505371 342 | 342.000000 1492.118774 343 | 343.000000 1801.889038 344 | 344.000000 1977.426025 345 | 345.000000 1246.210693 346 | 346.000000 2142.636719 347 | 347.000000 1352.310547 348 | 348.000000 1507.071777 349 | 349.000000 1378.349976 350 | 350.000000 2387.540283 351 | 351.000000 1306.161377 352 | 352.000000 1425.368164 353 | 353.000000 1882.434814 354 | 354.000000 2395.280762 355 | 355.000000 1600.453857 356 | 356.000000 1445.337036 357 | 357.000000 1985.960449 358 | 358.000000 1160.152100 359 | 359.000000 2099.111816 360 | 360.000000 1541.235962 361 | 361.000000 1412.315308 362 | 362.000000 2116.764404 363 | 363.000000 1279.255859 364 | 364.000000 2040.119995 365 | 365.000000 2022.776611 366 | 366.000000 1902.603638 367 | 367.000000 1140.585327 368 | 368.000000 1904.104980 369 | 369.000000 2075.255127 370 | 370.000000 3662.633301 371 | 371.000000 1800.689453 372 | 372.000000 2432.671631 373 | 373.000000 1499.937500 374 | 374.000000 1041.650879 375 | 375.000000 1826.106323 376 | 376.000000 1734.499390 377 | 377.000000 1729.217041 378 | 378.000000 1077.025391 379 | 379.000000 1599.761108 380 | 380.000000 1147.693237 381 | 381.000000 1535.584473 382 | 382.000000 1515.563477 383 | 383.000000 1541.500366 384 | 384.000000 1446.428467 385 | 385.000000 1249.276855 386 | 386.000000 1711.814209 387 | 387.000000 2115.800293 388 | 388.000000 1817.904053 389 | 389.000000 1761.030518 390 | 390.000000 2174.820312 391 | 391.000000 1793.098755 392 | 392.000000 1711.772339 393 | 393.000000 1931.489136 394 | 394.000000 1804.897095 395 | 395.000000 1881.685181 396 | 396.000000 2290.734131 397 | 397.000000 1840.967407 398 | 398.000000 1642.179443 399 | 399.000000 1375.341309 400 | 400.000000 1524.707642 401 | 401.000000 1361.021362 402 | 402.000000 1304.565796 403 | 403.000000 1655.716919 404 | 404.000000 1930.118652 405 | 405.000000 1559.966187 406 | 406.000000 1737.071411 407 | 407.000000 1753.080200 408 | 408.000000 1043.204834 409 | 409.000000 1202.575317 410 | 410.000000 1499.095825 411 | 411.000000 2102.189453 412 | 412.000000 2390.331543 413 | 413.000000 1327.265259 414 | 414.000000 1286.826416 415 | 415.000000 1414.089966 416 | 416.000000 1971.299805 417 | 417.000000 1243.213623 418 | 418.000000 1922.367920 419 | 419.000000 1163.862671 420 | 420.000000 1651.475464 421 | 421.000000 1301.186523 422 | 422.000000 1849.299316 423 | 423.000000 1799.256348 424 | 424.000000 1703.327393 425 | 425.000000 1627.862061 426 | 426.000000 1522.336914 427 | 427.000000 1408.989502 428 | 428.000000 2630.947754 429 | 429.000000 1648.483032 430 | 430.000000 1536.905884 431 | 431.000000 1433.750366 432 | 432.000000 1748.919678 433 | 433.000000 1274.653442 434 | 434.000000 1658.341675 435 | 435.000000 1580.411011 436 | 436.000000 1607.185913 437 | 437.000000 1381.490356 438 | 438.000000 1322.875366 439 | 439.000000 1168.433716 440 | 440.000000 1067.946533 441 | 441.000000 1890.483154 442 | 442.000000 1658.906250 443 | 443.000000 1064.380005 444 | 444.000000 868.906921 445 | 445.000000 1287.892456 446 | 446.000000 2167.587646 447 | 447.000000 1383.131226 448 | 448.000000 1417.915161 449 | 449.000000 2017.528442 450 | 450.000000 1777.718750 451 | 451.000000 1596.717407 452 | 452.000000 1421.328735 453 | 453.000000 1324.599243 454 | 454.000000 1899.612427 455 | 455.000000 1513.721191 456 | 456.000000 1683.056152 457 | 457.000000 1369.445557 458 | 458.000000 1265.907593 459 | 459.000000 1035.090088 460 | 460.000000 2046.150024 461 | 461.000000 1498.508667 462 | 462.000000 1608.036011 463 | 463.000000 1330.513794 464 | 464.000000 1132.405518 465 | 465.000000 1237.636108 466 | 466.000000 2298.409180 467 | 467.000000 1241.165283 468 | 468.000000 2039.370850 469 | 469.000000 1177.535522 470 | 470.000000 1221.716675 471 | 471.000000 1745.758301 472 | 472.000000 1917.593384 473 | 473.000000 1165.316650 474 | 474.000000 861.017334 475 | 475.000000 1830.155396 476 | 476.000000 1170.794067 477 | 477.000000 1230.492554 478 | 478.000000 1274.034912 479 | 479.000000 1899.829224 480 | 480.000000 1867.080078 481 | 481.000000 1609.885742 482 | 482.000000 1963.965942 483 | 483.000000 1669.859253 484 | 484.000000 1292.068359 485 | 485.000000 1751.724243 486 | 486.000000 1335.341431 487 | 487.000000 1323.624023 488 | 488.000000 1651.736572 489 | 489.000000 2087.386963 490 | 490.000000 1438.429565 491 | 491.000000 1731.568237 492 | 492.000000 1949.754028 493 | 493.000000 2203.080078 494 | 494.000000 2261.097168 495 | 495.000000 1580.708740 496 | 496.000000 1562.130615 497 | 497.000000 1859.436646 498 | 498.000000 1793.891113 499 | 499.000000 1001.056335 500 | 500.000000 1912.867676 501 | 501.000000 2475.812744 502 | 502.000000 2105.730469 503 | 503.000000 1732.766724 504 | 504.000000 2310.781738 505 | 505.000000 1875.141357 506 | 506.000000 1817.766724 507 | 507.000000 1097.887329 508 | 508.000000 2017.046753 509 | 509.000000 2242.245361 510 | 510.000000 2773.306641 511 | 511.000000 1321.350464 512 | 512.000000 2739.834229 513 | 513.000000 1389.539062 514 | 514.000000 2251.552490 515 | 515.000000 2169.031006 516 | 516.000000 2029.887329 517 | 517.000000 1591.404053 518 | 518.000000 2343.211182 519 | 519.000000 2012.653320 520 | 520.000000 1614.831421 521 | 521.000000 1672.772339 522 | 522.000000 2000.651978 523 | 523.000000 2896.021973 524 | 524.000000 2637.968750 525 | 525.000000 1884.990601 526 | 526.000000 2405.921143 527 | 527.000000 2257.248779 528 | 528.000000 1961.182495 529 | 529.000000 1849.048218 530 | 530.000000 1559.181519 531 | 531.000000 1560.701660 532 | 532.000000 2041.094482 533 | 533.000000 1998.698853 534 | 534.000000 2052.123291 535 | 535.000000 1803.678223 536 | 536.000000 1970.451904 537 | 537.000000 1939.131104 538 | 538.000000 2082.247803 539 | 539.000000 1409.396606 540 | 540.000000 2733.470947 541 | 541.000000 2221.219238 542 | 542.000000 2331.755371 543 | 543.000000 2438.380615 544 | 544.000000 1917.306030 545 | 545.000000 1988.092041 546 | 546.000000 2145.496094 547 | 547.000000 2278.642578 548 | 548.000000 2159.122803 549 | 549.000000 2627.566895 550 | 550.000000 1537.308228 551 | 551.000000 1559.624634 552 | 552.000000 3045.290527 553 | 553.000000 2246.550781 554 | 554.000000 2384.003906 555 | 555.000000 2010.736084 556 | 556.000000 1972.834229 557 | 557.000000 2146.448242 558 | 558.000000 2102.908203 559 | 559.000000 2329.290527 560 | 560.000000 1733.708252 561 | 561.000000 2641.247070 562 | 562.000000 1993.119873 563 | 563.000000 2200.874268 564 | 564.000000 2394.948975 565 | 565.000000 2191.825684 566 | 566.000000 2496.806396 567 | 567.000000 2391.000732 568 | 568.000000 2436.711182 569 | 569.000000 1738.463013 570 | 570.000000 2054.031982 571 | 571.000000 2036.267822 572 | 572.000000 1836.029175 573 | 573.000000 3007.133545 574 | 574.000000 1429.928833 575 | 575.000000 2216.402588 576 | 576.000000 1904.106812 577 | 577.000000 2285.255371 578 | 578.000000 1994.338013 579 | 579.000000 2059.176758 580 | 580.000000 2171.187012 581 | 581.000000 1982.419312 582 | 582.000000 2099.515381 583 | 583.000000 2507.017334 584 | 584.000000 1913.215332 585 | 585.000000 2561.822021 586 | 586.000000 1302.399536 587 | 587.000000 1860.632202 588 | 588.000000 2287.544434 589 | 589.000000 1734.690063 590 | 590.000000 2156.122559 591 | 591.000000 2402.931885 592 | 592.000000 2404.802734 593 | 593.000000 3244.411377 594 | 594.000000 1978.216064 595 | 595.000000 2411.874023 596 | 596.000000 2007.088379 597 | 597.000000 2014.276733 598 | 598.000000 1565.664917 599 | 599.000000 2022.515991 600 | 600.000000 1772.145020 601 | 601.000000 2583.096436 602 | 602.000000 1844.953979 603 | 603.000000 1621.984863 604 | 604.000000 1770.774658 605 | 605.000000 2020.567627 606 | 606.000000 2355.657471 607 | 607.000000 1996.695801 608 | 608.000000 2127.384277 609 | 609.000000 2114.290771 610 | 610.000000 1935.230835 611 | 611.000000 2125.324707 612 | 612.000000 1787.222656 613 | 613.000000 2276.241211 614 | 614.000000 2978.175049 615 | 615.000000 2542.808594 616 | 616.000000 2113.446289 617 | 617.000000 1968.088379 618 | 618.000000 2368.984619 619 | 619.000000 2241.410400 620 | 620.000000 2073.782227 621 | 621.000000 2121.806152 622 | 622.000000 2167.166504 623 | 623.000000 2575.725342 624 | 624.000000 2500.377930 625 | 625.000000 2181.297363 626 | 626.000000 1967.151733 627 | 627.000000 2072.927246 628 | 628.000000 2027.206543 629 | 629.000000 2345.307617 630 | 630.000000 2024.258789 631 | 631.000000 2248.471924 632 | 632.000000 2455.304688 633 | 633.000000 2265.372070 634 | 634.000000 2424.891113 635 | 635.000000 2852.068115 636 | 636.000000 1997.479370 637 | 637.000000 3298.773438 638 | 638.000000 2367.100342 639 | 639.000000 1853.859985 640 | 640.000000 2896.925537 641 | 641.000000 2537.741943 642 | 642.000000 2300.602051 643 | 643.000000 2849.175781 644 | 644.000000 2975.004150 645 | 645.000000 1931.866577 646 | 646.000000 3009.414307 647 | 647.000000 2538.314941 648 | 648.000000 2783.420410 649 | 649.000000 2490.887939 650 | 650.000000 2407.929199 651 | 651.000000 2003.909668 652 | 652.000000 2752.269531 653 | 653.000000 2576.723145 654 | 654.000000 2817.946289 655 | 655.000000 2683.553467 656 | 656.000000 2628.284424 657 | 657.000000 2995.034912 658 | 658.000000 2303.661621 659 | 659.000000 2772.099609 660 | 660.000000 2606.699463 661 | 661.000000 2703.995361 662 | 662.000000 2840.417725 663 | 663.000000 3256.972412 664 | 664.000000 3024.288574 665 | 665.000000 2684.777588 666 | 666.000000 3006.578857 667 | 667.000000 3310.774902 668 | 668.000000 3183.121826 669 | 669.000000 2523.464600 670 | 670.000000 3401.200928 671 | 671.000000 2839.752686 672 | 672.000000 3193.740479 673 | 673.000000 2970.214355 674 | 674.000000 3338.654541 675 | 675.000000 3464.786621 676 | 676.000000 3265.989502 677 | 677.000000 3536.363037 678 | 678.000000 3090.552734 679 | 679.000000 2936.692627 680 | 680.000000 3009.171387 681 | 681.000000 4000.848389 682 | 682.000000 3490.038086 683 | 683.000000 2815.461914 684 | 684.000000 3383.949463 685 | 685.000000 2902.276611 686 | 686.000000 4261.487793 687 | 687.000000 3787.093262 688 | 688.000000 4140.599121 689 | 689.000000 3589.932617 690 | 690.000000 3345.195801 691 | 691.000000 3119.029297 692 | 692.000000 3456.691406 693 | 693.000000 4152.050293 694 | 694.000000 3828.693115 695 | 695.000000 3993.070557 696 | 696.000000 4668.495117 697 | 697.000000 3303.204834 698 | 698.000000 3932.197998 699 | 699.000000 4497.727539 700 | 700.000000 3402.736572 701 | 701.000000 3674.006592 702 | 702.000000 3551.005127 703 | 703.000000 4231.209961 704 | 704.000000 3806.804443 705 | 705.000000 3354.349121 706 | 706.000000 3603.937988 707 | 707.000000 4015.734131 708 | 708.000000 3550.420166 709 | 709.000000 3318.288818 710 | 710.000000 3933.817627 711 | 711.000000 3597.578125 712 | 712.000000 5290.647949 713 | 713.000000 3563.370850 714 | 714.000000 3991.379395 715 | 715.000000 3890.932861 716 | 716.000000 3637.689453 717 | 717.000000 3800.525146 718 | 718.000000 4190.283203 719 | 719.000000 5249.075195 720 | 720.000000 4178.081543 721 | 721.000000 4830.328125 722 | 722.000000 4347.202637 723 | 723.000000 4226.013672 724 | 724.000000 4813.762695 725 | 725.000000 3998.185547 726 | 726.000000 4358.066406 727 | 727.000000 4323.617188 728 | 728.000000 4157.835938 729 | 729.000000 4630.654297 730 | 730.000000 4415.905273 731 | 731.000000 4411.992188 732 | 732.000000 4725.586426 733 | 733.000000 4364.381348 734 | 734.000000 4800.028809 735 | 735.000000 4749.926758 736 | 736.000000 5144.264160 737 | 737.000000 4907.322754 738 | 738.000000 4310.609375 739 | 739.000000 4971.517578 740 | 740.000000 4815.629395 741 | 741.000000 5393.541992 742 | 742.000000 5906.814941 743 | 743.000000 4883.022461 744 | -------------------------------------------------------------------------------- /Chapter02/README.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Chapter 2 3 | ========= 4 | 5 | Support code for *Chapter 2: Learning How to Classify with Real-world 6 | Examples*. The directory data contains the seeds dataset, originally downloaded 7 | from https://archive.ics.uci.edu/ml/datasets/seeds 8 | 9 | chapter_02.py 10 | The code from the book (with a few extras) 11 | load.py 12 | Code to load the seeds data 13 | 14 | -------------------------------------------------------------------------------- /Chapter02/data/seeds.tsv: -------------------------------------------------------------------------------- 1 | 15.26 14.84 0.871 5.763 3.312 2.221 5.22 Kama 2 | 14.88 14.57 0.8811 5.554 3.333 1.018 4.956 Kama 3 | 14.29 14.09 0.905 5.291 3.337 2.699 4.825 Kama 4 | 13.84 13.94 0.8955 5.324 3.379 2.259 4.805 Kama 5 | 16.14 14.99 0.9034 5.658 3.562 1.355 5.175 Kama 6 | 14.38 14.21 0.8951 5.386 3.312 2.462 4.956 Kama 7 | 14.69 14.49 0.8799 5.563 3.259 3.586 5.219 Kama 8 | 14.11 14.1 0.8911 5.42 3.302 2.7 5.0 Kama 9 | 16.63 15.46 0.8747 6.053 3.465 2.04 5.877 Kama 10 | 16.44 15.25 0.888 5.884 3.505 1.969 5.533 Kama 11 | 15.26 14.85 0.8696 5.714 3.242 4.543 5.314 Kama 12 | 14.03 14.16 0.8796 5.438 3.201 1.717 5.001 Kama 13 | 13.89 14.02 0.888 5.439 3.199 3.986 4.738 Kama 14 | 13.78 14.06 0.8759 5.479 3.156 3.136 4.872 Kama 15 | 13.74 14.05 0.8744 5.482 3.114 2.932 4.825 Kama 16 | 14.59 14.28 0.8993 5.351 3.333 4.185 4.781 Kama 17 | 13.99 13.83 0.9183 5.119 3.383 5.234 4.781 Kama 18 | 15.69 14.75 0.9058 5.527 3.514 1.599 5.046 Kama 19 | 14.7 14.21 0.9153 5.205 3.466 1.767 4.649 Kama 20 | 12.72 13.57 0.8686 5.226 3.049 4.102 4.914 Kama 21 | 14.16 14.4 0.8584 5.658 3.129 3.072 5.176 Kama 22 | 14.11 14.26 0.8722 5.52 3.168 2.688 5.219 Kama 23 | 15.88 14.9 0.8988 5.618 3.507 0.7651 5.091 Kama 24 | 12.08 13.23 0.8664 5.099 2.936 1.415 4.961 Kama 25 | 15.01 14.76 0.8657 5.789 3.245 1.791 5.001 Kama 26 | 16.19 15.16 0.8849 5.833 3.421 0.903 5.307 Kama 27 | 13.02 13.76 0.8641 5.395 3.026 3.373 4.825 Kama 28 | 12.74 13.67 0.8564 5.395 2.956 2.504 4.869 Kama 29 | 14.11 14.18 0.882 5.541 3.221 2.754 5.038 Kama 30 | 13.45 14.02 0.8604 5.516 3.065 3.531 5.097 Kama 31 | 13.16 13.82 0.8662 5.454 2.975 0.8551 5.056 Kama 32 | 15.49 14.94 0.8724 5.757 3.371 3.412 5.228 Kama 33 | 14.09 14.41 0.8529 5.717 3.186 3.92 5.299 Kama 34 | 13.94 14.17 0.8728 5.585 3.15 2.124 5.012 Kama 35 | 15.05 14.68 0.8779 5.712 3.328 2.129 5.36 Kama 36 | 16.12 15.0 0.9 5.709 3.485 2.27 5.443 Kama 37 | 16.2 15.27 0.8734 5.826 3.464 2.823 5.527 Kama 38 | 17.08 15.38 0.9079 5.832 3.683 2.956 5.484 Kama 39 | 14.8 14.52 0.8823 5.656 3.288 3.112 5.309 Kama 40 | 14.28 14.17 0.8944 5.397 3.298 6.685 5.001 Kama 41 | 13.54 13.85 0.8871 5.348 3.156 2.587 5.178 Kama 42 | 13.5 13.85 0.8852 5.351 3.158 2.249 5.176 Kama 43 | 13.16 13.55 0.9009 5.138 3.201 2.461 4.783 Kama 44 | 15.5 14.86 0.882 5.877 3.396 4.711 5.528 Kama 45 | 15.11 14.54 0.8986 5.579 3.462 3.128 5.18 Kama 46 | 13.8 14.04 0.8794 5.376 3.155 1.56 4.961 Kama 47 | 15.36 14.76 0.8861 5.701 3.393 1.367 5.132 Kama 48 | 14.99 14.56 0.8883 5.57 3.377 2.958 5.175 Kama 49 | 14.79 14.52 0.8819 5.545 3.291 2.704 5.111 Kama 50 | 14.86 14.67 0.8676 5.678 3.258 2.129 5.351 Kama 51 | 14.43 14.4 0.8751 5.585 3.272 3.975 5.144 Kama 52 | 15.78 14.91 0.8923 5.674 3.434 5.593 5.136 Kama 53 | 14.49 14.61 0.8538 5.715 3.113 4.116 5.396 Kama 54 | 14.33 14.28 0.8831 5.504 3.199 3.328 5.224 Kama 55 | 14.52 14.6 0.8557 5.741 3.113 1.481 5.487 Kama 56 | 15.03 14.77 0.8658 5.702 3.212 1.933 5.439 Kama 57 | 14.46 14.35 0.8818 5.388 3.377 2.802 5.044 Kama 58 | 14.92 14.43 0.9006 5.384 3.412 1.142 5.088 Kama 59 | 15.38 14.77 0.8857 5.662 3.419 1.999 5.222 Kama 60 | 12.11 13.47 0.8392 5.159 3.032 1.502 4.519 Kama 61 | 11.42 12.86 0.8683 5.008 2.85 2.7 4.607 Kama 62 | 11.23 12.63 0.884 4.902 2.879 2.269 4.703 Kama 63 | 12.36 13.19 0.8923 5.076 3.042 3.22 4.605 Kama 64 | 13.22 13.84 0.868 5.395 3.07 4.157 5.088 Kama 65 | 12.78 13.57 0.8716 5.262 3.026 1.176 4.782 Kama 66 | 12.88 13.5 0.8879 5.139 3.119 2.352 4.607 Kama 67 | 14.34 14.37 0.8726 5.63 3.19 1.313 5.15 Kama 68 | 14.01 14.29 0.8625 5.609 3.158 2.217 5.132 Kama 69 | 14.37 14.39 0.8726 5.569 3.153 1.464 5.3 Kama 70 | 12.73 13.75 0.8458 5.412 2.882 3.533 5.067 Kama 71 | 17.63 15.98 0.8673 6.191 3.561 4.076 6.06 Rosa 72 | 16.84 15.67 0.8623 5.998 3.484 4.675 5.877 Rosa 73 | 17.26 15.73 0.8763 5.978 3.594 4.539 5.791 Rosa 74 | 19.11 16.26 0.9081 6.154 3.93 2.936 6.079 Rosa 75 | 16.82 15.51 0.8786 6.017 3.486 4.004 5.841 Rosa 76 | 16.77 15.62 0.8638 5.927 3.438 4.92 5.795 Rosa 77 | 17.32 15.91 0.8599 6.064 3.403 3.824 5.922 Rosa 78 | 20.71 17.23 0.8763 6.579 3.814 4.451 6.451 Rosa 79 | 18.94 16.49 0.875 6.445 3.639 5.064 6.362 Rosa 80 | 17.12 15.55 0.8892 5.85 3.566 2.858 5.746 Rosa 81 | 16.53 15.34 0.8823 5.875 3.467 5.532 5.88 Rosa 82 | 18.72 16.19 0.8977 6.006 3.857 5.324 5.879 Rosa 83 | 20.2 16.89 0.8894 6.285 3.864 5.173 6.187 Rosa 84 | 19.57 16.74 0.8779 6.384 3.772 1.472 6.273 Rosa 85 | 19.51 16.71 0.878 6.366 3.801 2.962 6.185 Rosa 86 | 18.27 16.09 0.887 6.173 3.651 2.443 6.197 Rosa 87 | 18.88 16.26 0.8969 6.084 3.764 1.649 6.109 Rosa 88 | 18.98 16.66 0.859 6.549 3.67 3.691 6.498 Rosa 89 | 21.18 17.21 0.8989 6.573 4.033 5.78 6.231 Rosa 90 | 20.88 17.05 0.9031 6.45 4.032 5.016 6.321 Rosa 91 | 20.1 16.99 0.8746 6.581 3.785 1.955 6.449 Rosa 92 | 18.76 16.2 0.8984 6.172 3.796 3.12 6.053 Rosa 93 | 18.81 16.29 0.8906 6.272 3.693 3.237 6.053 Rosa 94 | 18.59 16.05 0.9066 6.037 3.86 6.001 5.877 Rosa 95 | 18.36 16.52 0.8452 6.666 3.485 4.933 6.448 Rosa 96 | 16.87 15.65 0.8648 6.139 3.463 3.696 5.967 Rosa 97 | 19.31 16.59 0.8815 6.341 3.81 3.477 6.238 Rosa 98 | 18.98 16.57 0.8687 6.449 3.552 2.144 6.453 Rosa 99 | 18.17 16.26 0.8637 6.271 3.512 2.853 6.273 Rosa 100 | 18.72 16.34 0.881 6.219 3.684 2.188 6.097 Rosa 101 | 16.41 15.25 0.8866 5.718 3.525 4.217 5.618 Rosa 102 | 17.99 15.86 0.8992 5.89 3.694 2.068 5.837 Rosa 103 | 19.46 16.5 0.8985 6.113 3.892 4.308 6.009 Rosa 104 | 19.18 16.63 0.8717 6.369 3.681 3.357 6.229 Rosa 105 | 18.95 16.42 0.8829 6.248 3.755 3.368 6.148 Rosa 106 | 18.83 16.29 0.8917 6.037 3.786 2.553 5.879 Rosa 107 | 18.85 16.17 0.9056 6.152 3.806 2.843 6.2 Rosa 108 | 17.63 15.86 0.88 6.033 3.573 3.747 5.929 Rosa 109 | 19.94 16.92 0.8752 6.675 3.763 3.252 6.55 Rosa 110 | 18.55 16.22 0.8865 6.153 3.674 1.738 5.894 Rosa 111 | 18.45 16.12 0.8921 6.107 3.769 2.235 5.794 Rosa 112 | 19.38 16.72 0.8716 6.303 3.791 3.678 5.965 Rosa 113 | 19.13 16.31 0.9035 6.183 3.902 2.109 5.924 Rosa 114 | 19.14 16.61 0.8722 6.259 3.737 6.682 6.053 Rosa 115 | 20.97 17.25 0.8859 6.563 3.991 4.677 6.316 Rosa 116 | 19.06 16.45 0.8854 6.416 3.719 2.248 6.163 Rosa 117 | 18.96 16.2 0.9077 6.051 3.897 4.334 5.75 Rosa 118 | 19.15 16.45 0.889 6.245 3.815 3.084 6.185 Rosa 119 | 18.89 16.23 0.9008 6.227 3.769 3.639 5.966 Rosa 120 | 20.03 16.9 0.8811 6.493 3.857 3.063 6.32 Rosa 121 | 20.24 16.91 0.8897 6.315 3.962 5.901 6.188 Rosa 122 | 18.14 16.12 0.8772 6.059 3.563 3.619 6.011 Rosa 123 | 16.17 15.38 0.8588 5.762 3.387 4.286 5.703 Rosa 124 | 18.43 15.97 0.9077 5.98 3.771 2.984 5.905 Rosa 125 | 15.99 14.89 0.9064 5.363 3.582 3.336 5.144 Rosa 126 | 18.75 16.18 0.8999 6.111 3.869 4.188 5.992 Rosa 127 | 18.65 16.41 0.8698 6.285 3.594 4.391 6.102 Rosa 128 | 17.98 15.85 0.8993 5.979 3.687 2.257 5.919 Rosa 129 | 20.16 17.03 0.8735 6.513 3.773 1.91 6.185 Rosa 130 | 17.55 15.66 0.8991 5.791 3.69 5.366 5.661 Rosa 131 | 18.3 15.89 0.9108 5.979 3.755 2.837 5.962 Rosa 132 | 18.94 16.32 0.8942 6.144 3.825 2.908 5.949 Rosa 133 | 15.38 14.9 0.8706 5.884 3.268 4.462 5.795 Rosa 134 | 16.16 15.33 0.8644 5.845 3.395 4.266 5.795 Rosa 135 | 15.56 14.89 0.8823 5.776 3.408 4.972 5.847 Rosa 136 | 15.38 14.66 0.899 5.477 3.465 3.6 5.439 Rosa 137 | 17.36 15.76 0.8785 6.145 3.574 3.526 5.971 Rosa 138 | 15.57 15.15 0.8527 5.92 3.231 2.64 5.879 Rosa 139 | 15.6 15.11 0.858 5.832 3.286 2.725 5.752 Rosa 140 | 16.23 15.18 0.885 5.872 3.472 3.769 5.922 Rosa 141 | 13.07 13.92 0.848 5.472 2.994 5.304 5.395 Canadian 142 | 13.32 13.94 0.8613 5.541 3.073 7.035 5.44 Canadian 143 | 13.34 13.95 0.862 5.389 3.074 5.995 5.307 Canadian 144 | 12.22 13.32 0.8652 5.224 2.967 5.469 5.221 Canadian 145 | 11.82 13.4 0.8274 5.314 2.777 4.471 5.178 Canadian 146 | 11.21 13.13 0.8167 5.279 2.687 6.169 5.275 Canadian 147 | 11.43 13.13 0.8335 5.176 2.719 2.221 5.132 Canadian 148 | 12.49 13.46 0.8658 5.267 2.967 4.421 5.002 Canadian 149 | 12.7 13.71 0.8491 5.386 2.911 3.26 5.316 Canadian 150 | 10.79 12.93 0.8107 5.317 2.648 5.462 5.194 Canadian 151 | 11.83 13.23 0.8496 5.263 2.84 5.195 5.307 Canadian 152 | 12.01 13.52 0.8249 5.405 2.776 6.992 5.27 Canadian 153 | 12.26 13.6 0.8333 5.408 2.833 4.756 5.36 Canadian 154 | 11.18 13.04 0.8266 5.22 2.693 3.332 5.001 Canadian 155 | 11.36 13.05 0.8382 5.175 2.755 4.048 5.263 Canadian 156 | 11.19 13.05 0.8253 5.25 2.675 5.813 5.219 Canadian 157 | 11.34 12.87 0.8596 5.053 2.849 3.347 5.003 Canadian 158 | 12.13 13.73 0.8081 5.394 2.745 4.825 5.22 Canadian 159 | 11.75 13.52 0.8082 5.444 2.678 4.378 5.31 Canadian 160 | 11.49 13.22 0.8263 5.304 2.695 5.388 5.31 Canadian 161 | 12.54 13.67 0.8425 5.451 2.879 3.082 5.491 Canadian 162 | 12.02 13.33 0.8503 5.35 2.81 4.271 5.308 Canadian 163 | 12.05 13.41 0.8416 5.267 2.847 4.988 5.046 Canadian 164 | 12.55 13.57 0.8558 5.333 2.968 4.419 5.176 Canadian 165 | 11.14 12.79 0.8558 5.011 2.794 6.388 5.049 Canadian 166 | 12.1 13.15 0.8793 5.105 2.941 2.201 5.056 Canadian 167 | 12.44 13.59 0.8462 5.319 2.897 4.924 5.27 Canadian 168 | 12.15 13.45 0.8443 5.417 2.837 3.638 5.338 Canadian 169 | 11.35 13.12 0.8291 5.176 2.668 4.337 5.132 Canadian 170 | 11.24 13.0 0.8359 5.09 2.715 3.521 5.088 Canadian 171 | 11.02 13.0 0.8189 5.325 2.701 6.735 5.163 Canadian 172 | 11.55 13.1 0.8455 5.167 2.845 6.715 4.956 Canadian 173 | 11.27 12.97 0.8419 5.088 2.763 4.309 5.0 Canadian 174 | 11.4 13.08 0.8375 5.136 2.763 5.588 5.089 Canadian 175 | 10.83 12.96 0.8099 5.278 2.641 5.182 5.185 Canadian 176 | 10.8 12.57 0.859 4.981 2.821 4.773 5.063 Canadian 177 | 11.26 13.01 0.8355 5.186 2.71 5.335 5.092 Canadian 178 | 10.74 12.73 0.8329 5.145 2.642 4.702 4.963 Canadian 179 | 11.48 13.05 0.8473 5.18 2.758 5.876 5.002 Canadian 180 | 12.21 13.47 0.8453 5.357 2.893 1.661 5.178 Canadian 181 | 11.41 12.95 0.856 5.09 2.775 4.957 4.825 Canadian 182 | 12.46 13.41 0.8706 5.236 3.017 4.987 5.147 Canadian 183 | 12.19 13.36 0.8579 5.24 2.909 4.857 5.158 Canadian 184 | 11.65 13.07 0.8575 5.108 2.85 5.209 5.135 Canadian 185 | 12.89 13.77 0.8541 5.495 3.026 6.185 5.316 Canadian 186 | 11.56 13.31 0.8198 5.363 2.683 4.062 5.182 Canadian 187 | 11.81 13.45 0.8198 5.413 2.716 4.898 5.352 Canadian 188 | 10.91 12.8 0.8372 5.088 2.675 4.179 4.956 Canadian 189 | 11.23 12.82 0.8594 5.089 2.821 7.524 4.957 Canadian 190 | 10.59 12.41 0.8648 4.899 2.787 4.975 4.794 Canadian 191 | 10.93 12.8 0.839 5.046 2.717 5.398 5.045 Canadian 192 | 11.27 12.86 0.8563 5.091 2.804 3.985 5.001 Canadian 193 | 11.87 13.02 0.8795 5.132 2.953 3.597 5.132 Canadian 194 | 10.82 12.83 0.8256 5.18 2.63 4.853 5.089 Canadian 195 | 12.11 13.27 0.8639 5.236 2.975 4.132 5.012 Canadian 196 | 12.8 13.47 0.886 5.16 3.126 4.873 4.914 Canadian 197 | 12.79 13.53 0.8786 5.224 3.054 5.483 4.958 Canadian 198 | 13.37 13.78 0.8849 5.32 3.128 4.67 5.091 Canadian 199 | 12.62 13.67 0.8481 5.41 2.911 3.306 5.231 Canadian 200 | 12.76 13.38 0.8964 5.073 3.155 2.828 4.83 Canadian 201 | 12.38 13.44 0.8609 5.219 2.989 5.472 5.045 Canadian 202 | 12.67 13.32 0.8977 4.984 3.135 2.3 4.745 Canadian 203 | 11.18 12.72 0.868 5.009 2.81 4.051 4.828 Canadian 204 | 12.7 13.41 0.8874 5.183 3.091 8.456 5.0 Canadian 205 | 12.37 13.47 0.8567 5.204 2.96 3.919 5.001 Canadian 206 | 12.19 13.2 0.8783 5.137 2.981 3.631 4.87 Canadian 207 | 11.23 12.88 0.8511 5.14 2.795 4.325 5.003 Canadian 208 | 13.2 13.66 0.8883 5.236 3.232 8.315 5.056 Canadian 209 | 11.84 13.21 0.8521 5.175 2.836 3.598 5.044 Canadian 210 | 12.3 13.34 0.8684 5.243 2.974 5.637 5.063 Canadian 211 | -------------------------------------------------------------------------------- /Chapter02/load.py: -------------------------------------------------------------------------------- 1 | # This code is supporting material for the book 2 | # Building Machine Learning Systems with Python 3 | # by Willi Richert and Luis Pedro Coelho 4 | # published by PACKT Publishing 5 | # 6 | # It is made available under the MIT License 7 | 8 | import numpy as np 9 | 10 | 11 | def load_dataset(dataset_name): 12 | ''' 13 | data = load_dataset(dataset_name) 14 | 15 | Load a given dataset 16 | 17 | Returns 18 | ------- 19 | data : dictionary 20 | ''' 21 | features = [] 22 | target = [] 23 | target_names = set() 24 | with open('./data/{0}.tsv'.format(dataset_name)) as ifile: 25 | for line in ifile: 26 | tokens = line.strip().split('\t') 27 | features.append([float(tk) for tk in tokens[:-1]]) 28 | target.append(tokens[-1]) 29 | target_names.add(tokens[-1]) 30 | features = np.array(features) 31 | 32 | target_names = list(target_names) 33 | target_names.sort() 34 | target = np.array([target_names.index(t) for t in target]) 35 | return { 36 | 'features': features, 37 | 'target_names': target_names, 38 | 'target': target, 39 | } 40 | -------------------------------------------------------------------------------- /Chapter02/tests/test_load.py: -------------------------------------------------------------------------------- 1 | # This code is supporting material for the book 2 | # Building Machine Learning Systems with Python 3 | # by Willi Richert and Luis Pedro Coelho 4 | # published by PACKT Publishing 5 | # 6 | # It is made available under the MIT License 7 | 8 | from load import load_dataset 9 | 10 | 11 | def test_iris(): 12 | features, labels = load_dataset('iris') 13 | assert len(features[0]) == 4 14 | assert len(features) 15 | assert len(features) == len(labels) 16 | 17 | 18 | def test_seeds(): 19 | features, labels = load_dataset('seeds') 20 | assert len(features[0]) == 7 21 | assert len(features) 22 | assert len(features) == len(labels) 23 | -------------------------------------------------------------------------------- /Chapter03/chapter_03.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Regression" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# This code is supporting material for the book\n", 17 | "# Building Machine Learning Systems with Python\n", 18 | "# by Willi Richert, Luis Pedro Coelho and Matthieu Brucher\n", 19 | "# published by PACKT Publishing\n", 20 | "#\n", 21 | "# It is made available under the MIT License\n", 22 | "\n", 23 | "import numpy as np\n", 24 | "from matplotlib import pyplot as plt" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "Use the magic command `%matplotlib` to see the plots inline:" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "%matplotlib inline" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## Boston dataset\n", 48 | "\n", 49 | "Load the data" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "from sklearn.datasets import load_boston\n", 59 | "boston = load_boston()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "The first regression attempt:" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "from sklearn.linear_model import LinearRegression\n", 76 | "lr = LinearRegression(fit_intercept=True)\n", 77 | "\n", 78 | "# Index number five in the number of rooms\n", 79 | "x = boston.data[:, 5]\n", 80 | "y = boston.target\n", 81 | "\n", 82 | "# lr.fit takes a two-dimensional array as input. We use np.atleast_2d\n", 83 | "# to convert from one to two dimensional, then transpose to make sure that the\n", 84 | "# format matches:\n", 85 | "x = np.transpose(np.atleast_2d(x))\n", 86 | "lr.fit(x, y)\n", 87 | "\n", 88 | "fig,ax = plt.subplots()\n", 89 | "ax.set_xlabel(\"Average number of rooms (RM)\")\n", 90 | "ax.set_ylabel(\"House Price\")\n", 91 | "xmin = x.min()\n", 92 | "xmax = x.max()\n", 93 | "ax.plot([xmin, xmax],\n", 94 | " [lr.predict(xmin), lr.predict(xmax)],\n", 95 | " '-', lw=2, color=\"#f9a602\")\n", 96 | "ax.scatter(x, y, s=2)\n", 97 | "fig.savefig('Regression_Fig_01.png')" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "from sklearn.metrics import mean_squared_error\n", 107 | "mse = mean_squared_error(y, lr.predict(x))\n", 108 | "print(\"Mean squared error (on training data): {:.3}\".format(mse))" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "scrolled": true 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "rmse = np.sqrt(mse)\n", 120 | "print('RMSE (on training data): {}'.format(rmse))" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "from sklearn.metrics import r2_score\n", 130 | "r2 = r2_score(y, lr.predict(x))\n", 131 | "print(\"R2 (on training data): {:.2}\".format(r2))" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "Repeat, but using all the input variables now" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "x = boston.data\n", 148 | "\n", 149 | "lr.fit(x,y)\n", 150 | "\n", 151 | "mse = mean_squared_error(y, lr.predict(x))\n", 152 | "print(\"Mean squared error (on training data): {:.3}\".format(mse))\n", 153 | "rmse = np.sqrt(mse)\n", 154 | "print('RMSE (on training data): {}'.format(rmse))\n", 155 | "r2 = r2_score(y, lr.predict(x))\n", 156 | "print(\"R2 (on training data): {:.2}\".format(r2))" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "To see how well we do, we plot _prediction vs. gold reality_:" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "fig,ax = plt.subplots()\n", 173 | "ax.set_xlabel('Predicted price')\n", 174 | "ax.set_ylabel('Actual price')\n", 175 | "ax.plot([y.min(), y.max()], [y.min(), y.max()], ':', lw=2, color=\"#f9a602\")\n", 176 | "ax.scatter(lr.predict(x), y, s=2)\n", 177 | "fig.savefig(\"Regression_FIG_02.png\")" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "Now, we will use **cross-validation** for evaluating the regression quality:" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": { 191 | "scrolled": true 192 | }, 193 | "outputs": [], 194 | "source": [ 195 | "from sklearn.model_selection import KFold, cross_val_predict\n", 196 | "kf = KFold(n_splits=5)\n", 197 | "p = cross_val_predict(lr, x, y, cv=kf)\n", 198 | "rmse_cv = np.sqrt(mean_squared_error(p, y))\n", 199 | "print('RMSE on 5-fold CV: {:.2}'.format(rmse_cv))" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "We now compare a few different regression models on _both training data and using cross-validation_:" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "from sklearn.linear_model import LinearRegression, ElasticNet, Lasso, Ridge \n", 216 | "\n", 217 | "for name, met in [\n", 218 | " ('linear regression', LinearRegression()),\n", 219 | " ('elastic-net(.5)', ElasticNet(alpha=0.5)),\n", 220 | " ('lasso(.5)', Lasso(alpha=0.5)),\n", 221 | " ('ridge(.5)', Ridge(alpha=0.5)),\n", 222 | "]:\n", 223 | " # Fit on the whole data:\n", 224 | " met.fit(x, y)\n", 225 | "\n", 226 | " # Predict on the whole data:\n", 227 | " p = met.predict(x)\n", 228 | " r2_train = r2_score(y, p)\n", 229 | "\n", 230 | " kf = KFold(n_splits=5)\n", 231 | " p = np.zeros_like(y)\n", 232 | " for train, test in kf.split(x):\n", 233 | " met.fit(x[train], y[train])\n", 234 | " p[test] = met.predict(x[test])\n", 235 | "\n", 236 | " r2_cv = r2_score(y, p)\n", 237 | " print('Method: {}'.format(name))\n", 238 | " print('R2 on training: {:.2}'.format(r2_train))\n", 239 | " print('R2 on 5-fold CV: {:.2}'.format(r2_cv))\n", 240 | " print('\\n')" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "las = Lasso(normalize=True) \n", 250 | "alphas = np.logspace(-5, 2, 1000) \n", 251 | "alphas, coefs, _= las.path(x, y, alphas=alphas) \n", 252 | "\n", 253 | "fig,ax = plt.subplots() \n", 254 | "ax.plot(alphas, coefs.T) \n", 255 | "ax.set_xscale('log') \n", 256 | "ax.set_xlim(alphas.max(), alphas.min()) \n", 257 | "\n", 258 | "\n", 259 | "ax.set_xlabel('Lasso coefficient path as a function of alpha') \n", 260 | "ax.set_xlabel('Alpha') \n", 261 | "ax.set_ylabel('Coefficient weight') \n", 262 | "fig.savefig('REGRESSION_FIG_03.png')" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "## Linear regression with Tensorflow" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "Let's try and do the same with Tensorflow." 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "# Batch size, epochs\n", 286 | "batch_size = 100\n", 287 | "n_epochs = 50000\n", 288 | "steps = 1000" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "# Creation of the scaffolding\n", 298 | "\n", 299 | "import tensorflow as tf\n", 300 | "tf.reset_default_graph()\n", 301 | "\n", 302 | "x = boston.data[:,5][:,None]\n", 303 | "y = np.reshape(boston.target, (-1, 1))\n", 304 | "\n", 305 | "nb_features = x.shape[1]\n", 306 | "\n", 307 | "X = tf.placeholder(shape=[None, nb_features], dtype=tf.float32, name=\"X\")\n", 308 | "Y = tf.placeholder(shape=[None, 1], dtype=tf.float32, name=\"y\")\n", 309 | "\n", 310 | "A = tf.Variable(tf.random_normal(shape=[nb_features, 1]), name=\"A\")\n", 311 | "b = tf.Variable(tf.random_normal(shape=[1,1]), name=\"b\")" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "# Creation of the graph\n", 321 | "model_output = tf.matmul(X, A) + b\n", 322 | "\n", 323 | "loss = tf.reduce_mean(tf.square(Y - model_output))\n", 324 | "\n", 325 | "# Uncomment to get Ridge or Lasso\n", 326 | "\"\"\"\n", 327 | "beta = 0.005\n", 328 | "regularizer = tf.nn.l2_loss(A)\n", 329 | "loss = loss + beta * regularizer\n", 330 | "\"\"\"\n", 331 | "\"\"\"\n", 332 | "beta = 0.5\n", 333 | "regularizer = tf.reduce_mean(tf.abs(A))\n", 334 | "loss = loss + beta * regularizer\n", 335 | "\"\"\"\n", 336 | "\n", 337 | "grad_speed = 1e-3\n", 338 | "my_opt = tf.train.GradientDescentOptimizer(grad_speed)\n", 339 | "train_step = my_opt.minimize(loss)" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [ 348 | "# Run the optimization\n", 349 | "loss_vec = []\n", 350 | "with tf.Session() as sess:\n", 351 | " sess.run(tf.global_variables_initializer())\n", 352 | " for epoch in range(n_epochs):\n", 353 | " permut = np.random.permutation(len(x))\n", 354 | " for j in range(0, len(x), batch_size):\n", 355 | " batch = permut[j:j+batch_size]\n", 356 | " Xs = x[batch]\n", 357 | " Ys = y[batch]\n", 358 | "\n", 359 | " sess.run(train_step, feed_dict={X: Xs, Y: Ys})\n", 360 | " temp_loss = sess.run(loss, feed_dict={X: Xs, Y: Ys})\n", 361 | " \n", 362 | " if epoch % steps == steps - 1:\n", 363 | " temp_loss = sess.run(loss, feed_dict={X: x, Y: y})\n", 364 | " loss_vec.append(temp_loss)\n", 365 | "\n", 366 | " (A_, b_) = sess.run([A, b])\n", 367 | " print('Epoch #%i A = %s b = %s' % (epoch, np.transpose(A_), b_))\n", 368 | " print('Loss = %.8f' % temp_loss)\n", 369 | " print(\"\")\n", 370 | "\n", 371 | "\n", 372 | " [slope, y_intercept] = sess.run([A, b])\n", 373 | " prediction = sess.run(model_output, feed_dict={X: x})\n", 374 | " mse = mean_squared_error(y, prediction)\n", 375 | " print(\"Mean squared error (on training data): {:.3}\".format(mse))\n", 376 | " rmse = np.sqrt(mse)\n", 377 | " print('RMSE (on training data): {}'.format(rmse))\n", 378 | " r2 = r2_score(y, prediction)\n", 379 | " print(\"R2 (on training data): {:.2}\".format(r2))\n", 380 | "\n", 381 | "best_fit = []\n", 382 | "for i in x:\n", 383 | " best_fit.append(slope[0]*i+y_intercept[0])" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [ 392 | "# Plot 1D best fit\n", 393 | "\n", 394 | "fig,ax = plt.subplots()\n", 395 | "ax.set_xlabel(\"Average number of rooms (RM)\")\n", 396 | "ax.set_ylabel(\"House Price\")\n", 397 | "\n", 398 | "ax.scatter(x, y, s=2, label='Data Points')\n", 399 | "ax.plot(x, np.array(best_fit), '-', lw=2, color=\"#f9a602\", label='Best fit line')\n", 400 | "ax.legend(loc='upper left')\n", 401 | "\n", 402 | "fig.savefig('REGRESSION_FIG_06.png')\n", 403 | "\n", 404 | "# Plot loss over time\n", 405 | "plt.figure()\n", 406 | "fig,ax = plt.subplots()\n", 407 | "ax.set_title('Loss per Epoch')\n", 408 | "ax.set_xlabel('Epoch')\n", 409 | "ax.set_ylabel('Loss')\n", 410 | "\n", 411 | "ax.plot(loss_vec, 'k-')\n", 412 | "\n", 413 | "fig.savefig('REGRESSION_FIG_07.png')" 414 | ] 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "metadata": {}, 419 | "source": [ 420 | "What happens if we move to use all the features?" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": null, 426 | "metadata": {}, 427 | "outputs": [], 428 | "source": [ 429 | "# Creation of the scaffolding\n", 430 | "\n", 431 | "import tensorflow as tf\n", 432 | "tf.reset_default_graph()\n", 433 | "\n", 434 | "x = boston.data\n", 435 | "y = np.reshape(boston.target, (-1, 1))\n", 436 | "\n", 437 | "nb_features = x.shape[1]\n", 438 | "\n", 439 | "X = tf.placeholder(shape=[None, nb_features], dtype=tf.float32, name=\"X\")\n", 440 | "Y = tf.placeholder(shape=[None, 1], dtype=tf.float32, name=\"y\")\n", 441 | "\n", 442 | "A = tf.Variable(tf.random_normal(shape=[nb_features, 1]), name=\"A\")\n", 443 | "b = tf.Variable(tf.random_normal(shape=[1,1]), name=\"b\")" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": null, 449 | "metadata": {}, 450 | "outputs": [], 451 | "source": [ 452 | "# Creation of the graph\n", 453 | "model_output = tf.matmul(X, A) + b\n", 454 | "\n", 455 | "loss = tf.reduce_mean(tf.square(Y - model_output))\n", 456 | "\n", 457 | "# Uncomment to get Ridge or Lasso\n", 458 | "\"\"\"\n", 459 | "beta = 0.005\n", 460 | "regularizer = tf.nn.l2_loss(A)\n", 461 | "loss = loss + beta * regularizer\n", 462 | "\"\"\"\n", 463 | "\"\"\"\n", 464 | "beta = 0.5\n", 465 | "regularizer = tf.reduce_mean(tf.abs(A))\n", 466 | "loss = loss + beta * regularizer\n", 467 | "\"\"\"\n", 468 | "\n", 469 | "grad_speed = 5e-7\n", 470 | "my_opt = tf.train.GradientDescentOptimizer(grad_speed)\n", 471 | "train_step = my_opt.minimize(loss)" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": null, 477 | "metadata": {}, 478 | "outputs": [], 479 | "source": [ 480 | "# Run the optimization\n", 481 | "loss_vec = []\n", 482 | "with tf.Session() as sess:\n", 483 | " sess.run(tf.global_variables_initializer())\n", 484 | " for epoch in range(n_epochs):\n", 485 | " permut = np.random.permutation(len(x))\n", 486 | " for j in range(0, len(x), batch_size):\n", 487 | " batch = permut[j:j+batch_size]\n", 488 | " Xs = x[batch]\n", 489 | " Ys = y[batch]\n", 490 | "\n", 491 | " sess.run(train_step, feed_dict={X: Xs, Y: Ys})\n", 492 | " temp_loss = sess.run(loss, feed_dict={X: Xs, Y: Ys})\n", 493 | " \n", 494 | " if epoch % steps == steps - 1:\n", 495 | " temp_loss = sess.run(loss, feed_dict={X: x, Y: y})\n", 496 | " loss_vec.append(temp_loss)\n", 497 | "\n", 498 | " (A_, b_) = sess.run([A, b])\n", 499 | " print('Epoch #%i A = %s b = %s' % (epoch, np.transpose(A_), b_))\n", 500 | " print('Loss = %.8f' % temp_loss)\n", 501 | " print(\"\")\n", 502 | "\n", 503 | "\n", 504 | " [slope, y_intercept] = sess.run([A, b])\n", 505 | " prediction = sess.run(model_output, feed_dict={X: x})\n", 506 | " mse = mean_squared_error(y, prediction)\n", 507 | " print(\"Mean squared error (on training data): {:.3}\".format(mse))\n", 508 | " rmse = np.sqrt(mse)\n", 509 | " print('RMSE (on training data): {}'.format(rmse))\n", 510 | " r2 = r2_score(y, prediction)\n", 511 | " print(\"R2 (on training data): {:.2}\".format(r2))\n", 512 | "\n", 513 | "best_fit = []\n", 514 | "for i in x:\n", 515 | " best_fit.append(slope[0]*i+y_intercept[0])" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": {}, 522 | "outputs": [], 523 | "source": [ 524 | "# Plot loss over time\n", 525 | "plt.figure()\n", 526 | "fig,ax = plt.subplots()\n", 527 | "ax.set_title('Loss per Epoch')\n", 528 | "ax.set_xlabel('Epoch')\n", 529 | "ax.set_ylabel('Loss')\n", 530 | "\n", 531 | "ax.plot(loss_vec, 'k-')\n", 532 | "\n", 533 | "fig.savefig('REGRESSION_FIG_08.png')" 534 | ] 535 | }, 536 | { 537 | "cell_type": "markdown", 538 | "metadata": { 539 | "collapsed": true 540 | }, 541 | "source": [ 542 | "## E2006 Dataset" 543 | ] 544 | }, 545 | { 546 | "cell_type": "markdown", 547 | "metadata": {}, 548 | "source": [ 549 | "Load data:\n" 550 | ] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "execution_count": null, 555 | "metadata": {}, 556 | "outputs": [], 557 | "source": [ 558 | "from sklearn.datasets import load_svmlight_file\n", 559 | "data, target = load_svmlight_file('data/E2006.train')" 560 | ] 561 | }, 562 | { 563 | "cell_type": "markdown", 564 | "metadata": {}, 565 | "source": [ 566 | "Compute error on training data to demonstrate that we can obtain near perfect scores:" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": null, 572 | "metadata": {}, 573 | "outputs": [], 574 | "source": [ 575 | "lr = LinearRegression()\n", 576 | "lr.fit(data, target)\n", 577 | "pred = lr.predict(data) \n", 578 | "\n", 579 | "print('RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n", 580 | "print('R2 on training, {:.2}'.format(r2_score(target, pred)))" 581 | ] 582 | }, 583 | { 584 | "cell_type": "markdown", 585 | "metadata": {}, 586 | "source": [ 587 | "However, we do not do so well on cross-validation:" 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": null, 593 | "metadata": {}, 594 | "outputs": [], 595 | "source": [ 596 | "kf = KFold(n_splits=5)\n", 597 | "pred = cross_val_predict(lr, data, target, cv=kf)\n", 598 | "\n", 599 | "print('RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n", 600 | "print('R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))" 601 | ] 602 | }, 603 | { 604 | "cell_type": "markdown", 605 | "metadata": {}, 606 | "source": [ 607 | "Now, we try _an Elastic net_:" 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "execution_count": null, 613 | "metadata": {}, 614 | "outputs": [], 615 | "source": [ 616 | "# Edit the lines below if you want to switch method: \n", 617 | "met = ElasticNet(alpha=0.1)\n", 618 | "met.fit(data, target)\n", 619 | "pred = met.predict(data)\n", 620 | "\n", 621 | "print('[EN 0.1] RMSE on training: {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n", 622 | "print('[EN 0.1] R2 on training: {:.2}'.format(r2_score(target, pred)))" 623 | ] 624 | }, 625 | { 626 | "cell_type": "markdown", 627 | "metadata": {}, 628 | "source": [ 629 | "Not a perfect prediction on the training data anymore, but let us check the value on cross-validation:" 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": null, 635 | "metadata": {}, 636 | "outputs": [], 637 | "source": [ 638 | "pred = cross_val_predict(met, data, target, cv=kf)\n", 639 | "\n", 640 | "print('[EN 0.1] RMSE on testing (5 fold): {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n", 641 | "print('[EN 0.1] R2 on testing (5 fold): {:.2}'.format(r2_score(target, pred)))" 642 | ] 643 | }, 644 | { 645 | "cell_type": "markdown", 646 | "metadata": {}, 647 | "source": [ 648 | "We now use `ElasticNetCV` to set parameters automatically:" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": null, 654 | "metadata": {}, 655 | "outputs": [], 656 | "source": [ 657 | "from sklearn.linear_model import ElasticNetCV\n", 658 | "# Construct an ElasticNetCV object (use all available CPUs)\n", 659 | "met = ElasticNetCV(n_jobs=-1)\n", 660 | "\n", 661 | "met.fit(data, target)\n", 662 | "pred = met.predict(data)\n", 663 | "print('[EN CV] RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n", 664 | "print('[EN CV] R2 on training, {:.2}'.format(r2_score(target, pred)))\n", 665 | "\n", 666 | "pred = cross_val_predict(met, data, target, cv=kf)\n", 667 | "print('[EN CV] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n", 668 | "print('[EN CV] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))" 669 | ] 670 | }, 671 | { 672 | "cell_type": "markdown", 673 | "metadata": {}, 674 | "source": [ 675 | "This is a a pretty good general-purpose regression object:" 676 | ] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": null, 681 | "metadata": { 682 | "scrolled": true 683 | }, 684 | "outputs": [], 685 | "source": [ 686 | "# Construct an ElasticNetCV object (use all available CPUs)\n", 687 | "met = ElasticNetCV(n_jobs=-1, l1_ratio=[.01, .05, .25, .5, .75, .95, .99])\n", 688 | "\n", 689 | "pred = cross_val_predict(met, data, target, cv=kf)\n", 690 | "\n", 691 | "print('[EN CV l1_ratio] RMSE on testing(5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))\n", 692 | "print('[EN CV l1_ratio] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))" 693 | ] 694 | }, 695 | { 696 | "cell_type": "markdown", 697 | "metadata": {}, 698 | "source": [ 699 | "Now the final result:" 700 | ] 701 | }, 702 | { 703 | "cell_type": "code", 704 | "execution_count": null, 705 | "metadata": {}, 706 | "outputs": [], 707 | "source": [ 708 | "fig, ax = plt.subplots()\n", 709 | "ax.scatter(target, pred, c='k', s=1)\n", 710 | "ax.plot([-5,-1], [-5,-1], 'r-', lw=2)\n", 711 | "ax.set_xlabel('Actual value')\n", 712 | "ax.set_ylabel('Predicted value')\n", 713 | "fig.savefig('REGRESSION_FIG_05.png')" 714 | ] 715 | }, 716 | { 717 | "cell_type": "code", 718 | "execution_count": null, 719 | "metadata": {}, 720 | "outputs": [], 721 | "source": [] 722 | } 723 | ], 724 | "metadata": { 725 | "anaconda-cloud": {}, 726 | "kernelspec": { 727 | "display_name": "Python 3", 728 | "language": "python", 729 | "name": "python3" 730 | }, 731 | "language_info": { 732 | "codemirror_mode": { 733 | "name": "ipython", 734 | "version": 3 735 | }, 736 | "file_extension": ".py", 737 | "mimetype": "text/x-python", 738 | "name": "python", 739 | "nbconvert_exporter": "python", 740 | "pygments_lexer": "ipython3", 741 | "version": "3.6.5" 742 | } 743 | }, 744 | "nbformat": 4, 745 | "nbformat_minor": 2 746 | } 747 | -------------------------------------------------------------------------------- /Chapter03/data/.gitignore: -------------------------------------------------------------------------------- 1 | E2006.train 2 | -------------------------------------------------------------------------------- /Chapter03/data/download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | curl -O https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/E2006.train.bz2 3 | bunzip2 E2006.train.bz2 4 | 5 | -------------------------------------------------------------------------------- /Chapter04/data/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | curl -O https://ia800107.us.archive.org/27/items/stackexchange/stackoverflow.com-Posts.7z 4 | 5 | p7zip -d stackoverflow.com-Posts.7z 6 | -------------------------------------------------------------------------------- /Chapter06/data/toy/01.txt: -------------------------------------------------------------------------------- 1 | This is a toy post about machine learning. Actually, it contains not much interesting stuff. -------------------------------------------------------------------------------- /Chapter06/data/toy/02.txt: -------------------------------------------------------------------------------- 1 | Imaging databases provide storage capabilities. -------------------------------------------------------------------------------- /Chapter06/data/toy/03.txt: -------------------------------------------------------------------------------- 1 | Most imaging databases save images permanently. 2 | -------------------------------------------------------------------------------- /Chapter06/data/toy/04.txt: -------------------------------------------------------------------------------- 1 | Imaging databases store data. -------------------------------------------------------------------------------- /Chapter06/data/toy/05.txt: -------------------------------------------------------------------------------- 1 | Imaging databases store data. Imaging databases store data. Imaging databases store data. -------------------------------------------------------------------------------- /Chapter07/README.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Chapter 8 3 | ========= 4 | 5 | Support code for *Chapter 8: Recommendations*. 6 | 7 | The code refers to the second edition of the book and this code has been 8 | significantly refactored when compared to the first one. 9 | 10 | Ratings Prediction 11 | ------------------ 12 | 13 | Note that since the partition of the data into training and testing is random, 14 | everytime you run the code, the results will be different. 15 | 16 | 17 | load_ml100k.py 18 | Load data & partition into test/train 19 | norm.py 20 | Normalize the data 21 | corrneighbours.py 22 | Neighbour models based on ncrroaltoin 23 | regression.py 24 | Regression models 25 | stacked.py 26 | Stacked predictions 27 | averaged.py 28 | Averaging of predictions (mentioned in book, but code is not shown there). 29 | 30 | Association Rule Mining 31 | ----------------------- 32 | 33 | Check the folder ``apriori/`` 34 | 35 | apriori/histogram.py 36 | Print a histogram of how many times each product was bought 37 | apriori/apriori.py 38 | Implementation of Apriori algorithm and association rule building 39 | apriori/apriori_example.py 40 | Example of Apriori algorithm in retail dataset 41 | 42 | -------------------------------------------------------------------------------- /Chapter07/apriori/.gitignore: -------------------------------------------------------------------------------- 1 | retail.dat.gz 2 | -------------------------------------------------------------------------------- /Chapter07/apriori/apriori.py: -------------------------------------------------------------------------------- 1 | # This code is supporting material for the book 2 | # Building Machine Learning Systems with Python 3 | # by Willi Richert and Luis Pedro Coelho 4 | # published by PACKT Publishing 5 | # 6 | # It is made available under the MIT License 7 | 8 | from collections import namedtuple 9 | 10 | 11 | def apriori(dataset, minsupport, maxsize): 12 | ''' 13 | freqsets, support = apriori(dataset, minsupport, maxsize) 14 | 15 | Parameters 16 | ---------- 17 | dataset : sequence of sequences 18 | input dataset 19 | minsupport : int 20 | Minimal support for frequent items 21 | maxsize : int 22 | Maximal size of frequent items to return 23 | 24 | Returns 25 | ------- 26 | freqsets : sequence of sequences 27 | support : dictionary 28 | This associates each itemset (represented as a frozenset) with a float 29 | (the support of that itemset) 30 | ''' 31 | from collections import defaultdict 32 | 33 | baskets = defaultdict(list) 34 | pointers = defaultdict(list) 35 | 36 | for i, ds in enumerate(dataset): 37 | for ell in ds: 38 | pointers[ell].append(i) 39 | baskets[frozenset([ell])].append(i) 40 | 41 | # Convert pointer items to frozensets to speed up operations later 42 | new_pointers = dict() 43 | for k in pointers: 44 | if len(pointers[k]) >= minsupport: 45 | new_pointers[k] = frozenset(pointers[k]) 46 | pointers = new_pointers 47 | for k in baskets: 48 | baskets[k] = frozenset(baskets[k]) 49 | 50 | 51 | # Valid are all elements whose support is >= minsupport 52 | valid = set() 53 | for el, c in baskets.items(): 54 | if len(c) >= minsupport: 55 | valid.update(el) 56 | 57 | # Itemsets at first iteration are simply all singleton with valid elements: 58 | itemsets = [frozenset([v]) for v in valid] 59 | freqsets = [] 60 | for i in range(maxsize - 1): 61 | print("At iteration {}, number of frequent baskets: {}".format( 62 | i, len(itemsets))) 63 | newsets = [] 64 | for it in itemsets: 65 | ccounts = baskets[it] 66 | 67 | for v, pv in pointers.items(): 68 | if v not in it: 69 | csup = (ccounts & pv) 70 | if len(csup) >= minsupport: 71 | new = frozenset(it | frozenset([v])) 72 | if new not in baskets: 73 | newsets.append(new) 74 | baskets[new] = csup 75 | freqsets.extend(itemsets) 76 | itemsets = newsets 77 | if not len(itemsets): 78 | break 79 | support = {} 80 | for k in baskets: 81 | support[k] = float(len(baskets[k])) 82 | return freqsets, support 83 | 84 | 85 | # A namedtuple to collect all values that may be interesting 86 | AssociationRule = namedtuple('AssociationRule', ['antecendent', 'consequent', 'base', 'py_x', 'lift']) 87 | 88 | def association_rules(dataset, freqsets, support, minlift): 89 | ''' 90 | for assoc_rule in association_rules(dataset, freqsets, support, minlift): 91 | ... 92 | 93 | This function takes the returns from ``apriori``. 94 | 95 | Parameters 96 | ---------- 97 | dataset : sequence of sequences 98 | input dataset 99 | freqsets : sequence of sequences 100 | support : dictionary 101 | minlift : int 102 | minimal lift of yielded rules 103 | 104 | Returns 105 | ------- 106 | assoc_rule : sequence of AssociationRule objects 107 | ''' 108 | nr_transactions = float(len(dataset)) 109 | freqsets = [f for f in freqsets if len(f) > 1] 110 | for fset in freqsets: 111 | for f in fset: 112 | consequent = frozenset([f]) 113 | antecendent = fset - consequent 114 | py_x = support[fset] / support[antecendent] 115 | base = support[consequent] / nr_transactions 116 | lift = py_x / base 117 | if lift > minlift: 118 | yield AssociationRule(antecendent, consequent, base, py_x, lift) 119 | 120 | -------------------------------------------------------------------------------- /Chapter07/apriori/apriori_example.py: -------------------------------------------------------------------------------- 1 | # This code is supporting material for the book 2 | # Building Machine Learning Systems with Python 3 | # by Willi Richert and Luis Pedro Coelho 4 | # published by PACKT Publishing 5 | # 6 | # It is made available under the MIT License 7 | 8 | from apriori import apriori, association_rules 9 | from gzip import GzipFile 10 | 11 | # Load dataset 12 | dataset = [[int(tok) for tok in line.strip().split()] 13 | for line in GzipFile('retail.dat.gz')] 14 | 15 | freqsets, support = apriori(dataset, 80, maxsize=16) 16 | rules = list(association_rules(dataset, freqsets, support, minlift=30.0)) 17 | 18 | rules.sort(key=(lambda ar: -ar.lift)) 19 | for ar in rules: 20 | print('{} -> {} (lift = {:.4})' 21 | .format(set(ar.antecendent), 22 | set(ar.consequent), 23 | ar.lift)) 24 | -------------------------------------------------------------------------------- /Chapter07/apriori/apriori_naive.py: -------------------------------------------------------------------------------- 1 | # This code is supporting material for the book 2 | # Building Machine Learning Systems with Python 3 | # by Willi Richert and Luis Pedro Coelho 4 | # published by PACKT Publishing 5 | # 6 | # It is made available under the MIT License 7 | 8 | from collections import defaultdict 9 | from itertools import chain 10 | from gzip import GzipFile 11 | minsupport = 80 12 | 13 | dataset = [[int(tok) for tok in line.strip().split()] 14 | for line in GzipFile('retail.dat.gz')] 15 | 16 | counts = defaultdict(int) 17 | for elem in chain(*dataset): 18 | counts[elem] += 1 19 | 20 | # Only elements that have at least minsupport should be considered. 21 | valid = set(el for el, c in counts.items() if (c >= minsupport)) 22 | 23 | # Filter the dataset to contain only valid elements 24 | # (This step is not strictly necessary, but will make the rest of the code 25 | # faster as the itemsets will be smaller): 26 | dataset = [[el for el in ds if (el in valid)] for ds in dataset] 27 | 28 | # Convert to frozenset for fast processing 29 | dataset = [frozenset(ds) for ds in dataset] 30 | 31 | itemsets = [frozenset([v]) for v in valid] 32 | freqsets = itemsets[:] 33 | for i in range(16): 34 | print("At iteration {}, number of frequent baskets: {}".format( 35 | i, len(itemsets))) 36 | nextsets = [] 37 | 38 | tested = set() 39 | for it in itemsets: 40 | for v in valid: 41 | if v not in it: 42 | # Create a new candidate set by adding v to it 43 | c = (it | frozenset([v])) 44 | 45 | # Check if we have tested it already: 46 | if c in tested: 47 | continue 48 | tested.add(c) 49 | 50 | # Count support by looping over dataset 51 | # This step is slow. 52 | # Check `apriori.py` for a better implementation. 53 | support_c = sum(1 for d in dataset if d.issuperset(c)) 54 | if support_c > minsupport: 55 | nextsets.append(c) 56 | freqsets.extend(nextsets) 57 | itemsets = nextsets 58 | if not len(itemsets): 59 | break 60 | print("Finished!") 61 | 62 | 63 | def rules_from_itemset(itemset, dataset, minlift=1.): 64 | nr_transactions = float(len(dataset)) 65 | for item in itemset: 66 | consequent = frozenset([item]) 67 | antecedent = itemset-consequent 68 | base = 0.0 69 | # acount: antecedent count 70 | acount = 0.0 71 | 72 | # ccount : consequent count 73 | ccount = 0.0 74 | for d in dataset: 75 | if item in d: base += 1 76 | if d.issuperset(itemset): ccount += 1 77 | if d.issuperset(antecedent): acount += 1 78 | base /= nr_transactions 79 | p_y_given_x = ccount/acount 80 | lift = p_y_given_x / base 81 | if lift > minlift: 82 | print('Rule {0} -> {1} has lift {2}' 83 | .format(antecedent, consequent,lift)) 84 | 85 | for itemset in freqsets: 86 | if len(itemset) > 1: 87 | rules_from_itemset(itemset, dataset, minlift=4.) 88 | -------------------------------------------------------------------------------- /Chapter07/apriori/download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | wget http://fimi.ua.ac.be/data/retail.dat.gz 3 | -------------------------------------------------------------------------------- /Chapter07/data/.gitignore: -------------------------------------------------------------------------------- 1 | retail.dat.gz 2 | ml-100k.zip 3 | /ml-100k/ 4 | -------------------------------------------------------------------------------- /Chapter07/data/download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | curl -L -O http://files.grouplens.org/papers/ml-100k.zip 3 | unzip ml-100k.zip 4 | curl -L -O http://fimi.ua.ac.be/data/retail.dat.gz 5 | -------------------------------------------------------------------------------- /Chapter07/load_ml100k.py: -------------------------------------------------------------------------------- 1 | # This code is supporting material for the book 2 | # Building Machine Learning Systems with Python 3 | # by Willi Richert and Luis Pedro Coelho 4 | # published by PACKT Publishing 5 | # 6 | # It is made available under the MIT License 7 | 8 | def load(): 9 | '''Load ML-100k data 10 | 11 | Returns the review matrix as a numpy array''' 12 | import numpy as np 13 | from scipy import sparse 14 | from os import path 15 | 16 | if not path.exists('data/ml-100k/u.data'): 17 | raise IOError("Data has not been downloaded.\nTry the following:\n\n\tcd data\n\t./download.sh") 18 | 19 | # The input is in the form of a CSC sparse matrix, so it's a natural fit to 20 | # load the data, but we then convert to a more traditional array before 21 | # returning 22 | data = np.loadtxt('data/ml-100k/u.data') 23 | ij = data[:, :2] 24 | ij -= 1 # original data is in 1-based system 25 | values = data[:, 2] 26 | reviews = sparse.csc_matrix((values, ij.T)).astype(float) 27 | return reviews.toarray() 28 | 29 | def get_train_test(reviews=None, random_state=None): 30 | '''Split data into training & testing 31 | 32 | Parameters 33 | ---------- 34 | reviews : ndarray, optional 35 | Input data 36 | 37 | Returns 38 | ------- 39 | train : ndarray 40 | training data 41 | test : ndarray 42 | testing data 43 | ''' 44 | import numpy as np 45 | import random 46 | r = random.Random(random_state) 47 | 48 | if reviews is None: 49 | reviews = load() 50 | U,M = np.where(reviews) 51 | test_idxs = np.array(r.sample(range(len(U)), len(U)//10)) 52 | train = reviews.copy() 53 | train[U[test_idxs], M[test_idxs]] = 0 54 | 55 | test = np.zeros_like(reviews) 56 | test[U[test_idxs], M[test_idxs]] = reviews[U[test_idxs], M[test_idxs]] 57 | 58 | return train, test 59 | 60 | -------------------------------------------------------------------------------- /Chapter07/stacked.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import load_ml100k 3 | import regression 4 | import corrneighbours 5 | from sklearn import linear_model, metrics 6 | import norm 7 | 8 | def predict(train): 9 | tr_train,tr_test = load_ml100k.get_train_test(train, random_state=34) 10 | tr_predicted0 = regression.predict(tr_train) 11 | tr_predicted1 = regression.predict(tr_train.T).T 12 | tr_predicted2 = corrneighbours.predict(tr_train) 13 | tr_predicted3 = corrneighbours.predict(tr_train.T).T 14 | tr_predicted4 = norm.predict(tr_train) 15 | tr_predicted5 = norm.predict(tr_train.T).T 16 | stack_tr = np.array([ 17 | tr_predicted0[tr_test > 0], 18 | tr_predicted1[tr_test > 0], 19 | tr_predicted2[tr_test > 0], 20 | tr_predicted3[tr_test > 0], 21 | tr_predicted4[tr_test > 0], 22 | tr_predicted5[tr_test > 0], 23 | ]).T 24 | 25 | lr = linear_model.LinearRegression() 26 | lr.fit(stack_tr, tr_test[tr_test > 0]) 27 | 28 | stack_te = np.array([ 29 | tr_predicted0.ravel(), 30 | tr_predicted1.ravel(), 31 | tr_predicted2.ravel(), 32 | tr_predicted3.ravel(), 33 | tr_predicted4.ravel(), 34 | tr_predicted5.ravel(), 35 | ]).T 36 | 37 | return lr.predict(stack_te).reshape(train.shape) 38 | 39 | 40 | def main(): 41 | train,test = load_ml100k.get_train_test(random_state=12) 42 | predicted = predict(train) 43 | r2 = metrics.r2_score(test[test > 0], predicted[test > 0]) 44 | print('R2 stacked: {:.2%}'.format(r2)) 45 | 46 | if __name__ == '__main__': 47 | main() 48 | -------------------------------------------------------------------------------- /Chapter09/data/not_authorized.tsv: -------------------------------------------------------------------------------- 1 | 126213333123743744 2 | 126079414986485761 3 | 126076743613284354 4 | 126213333123743744 5 | 126079414986485761 6 | 126076743613284354 7 | 126049183865114624 8 | 125633065757310976 9 | 126213333123743744 10 | 126079414986485761 11 | 126076743613284354 12 | 126049183865114624 13 | 125633065757310976 14 | 126213333123743744 15 | 126079414986485761 16 | 126076743613284354 17 | 126049183865114624 18 | 125633065757310976 19 | 126213333123743744 20 | 126079414986485761 21 | 126076743613284354 22 | 126049183865114624 23 | 125633065757310976 24 | 126213333123743744 25 | 126079414986485761 26 | 126076743613284354 27 | 126049183865114624 28 | 125633065757310976 29 | 126213333123743744 30 | 126079414986485761 31 | 126076743613284354 32 | 126049183865114624 33 | 125633065757310976 34 | 125264731035537409 35 | 126153311521996800 36 | 126121175926571009 37 | 125988395787882497 38 | 125954651152592896 39 | 125799384976863232 40 | 125681375058735104 41 | 125675806977556480 42 | 125673358418391041 43 | 125659125886623744 44 | 126213333123743744 45 | 126079414986485761 46 | 126076743613284354 47 | 126049183865114624 48 | 125633065757310976 49 | 125264731035537409 50 | 126153311521996800 51 | 126121175926571009 52 | 125988395787882497 53 | 125954651152592896 54 | 125799384976863232 55 | 125681375058735104 56 | 125675806977556480 57 | 125673358418391041 58 | 125659125886623744 59 | 125561930416013312 60 | 125475953509015552 61 | 125371779039502336 62 | 125368089159286784 63 | 125334519254482944 64 | 125309427422203904 65 | 126213333123743744 66 | 126079414986485761 67 | 126076743613284354 68 | 126049183865114624 69 | 125633065757310976 70 | 125264731035537409 71 | 126153311521996800 72 | 126121175926571009 73 | 125988395787882497 74 | 125954651152592896 75 | 125799384976863232 76 | 125681375058735104 77 | 125675806977556480 78 | 125673358418391041 79 | 125659125886623744 80 | 125561930416013312 81 | 125475953509015552 82 | 125371779039502336 83 | 125368089159286784 84 | 125334519254482944 85 | 125309427422203904 86 | 125204228967903232 87 | 126213333123743744 88 | 126079414986485761 89 | 126076743613284354 90 | 126049183865114624 91 | 125633065757310976 92 | 125264731035537409 93 | 126153311521996800 94 | 126121175926571009 95 | 125988395787882497 96 | 125954651152592896 97 | 125799384976863232 98 | 125681375058735104 99 | 125675806977556480 100 | 125673358418391041 101 | 125659125886623744 102 | 125561930416013312 103 | 125475953509015552 104 | 125371779039502336 105 | 125368089159286784 106 | 125334519254482944 107 | 125309427422203904 108 | 125204228967903232 109 | 126394795802370049 110 | 126386085164101634 111 | 126382776072146944 112 | 126380323733909504 113 | 126317201962700800 114 | 126229089651654656 115 | 126186795808456704 116 | 126110770864979968 117 | 126039090578735104 118 | 126029114850295809 119 | 126213333123743744 120 | 126079414986485761 121 | 126076743613284354 122 | 126049183865114624 123 | 125633065757310976 124 | 125264731035537409 125 | 126153311521996800 126 | 126121175926571009 127 | 125988395787882497 128 | 125954651152592896 129 | 125799384976863232 130 | 125681375058735104 131 | 125675806977556480 132 | 125673358418391041 133 | 125659125886623744 134 | 125561930416013312 135 | 125475953509015552 136 | 125371779039502336 137 | 125368089159286784 138 | 125334519254482944 139 | 125309427422203904 140 | 125204228967903232 141 | 126394795802370049 142 | 126386085164101634 143 | 126382776072146944 144 | 126380323733909504 145 | 126317201962700800 146 | 126229089651654656 147 | 126186795808456704 148 | 126110770864979968 149 | 126039090578735104 150 | 126029114850295809 151 | 125994997609803776 152 | 125992594395250688 153 | 125988651426512899 154 | 125981074114359297 155 | 125980615664336896 156 | 125958702455988225 157 | 125932876721168384 158 | 125918906215968771 159 | 125725274317914112 160 | 125708240225959936 161 | 125641351848136704 162 | 125630016485732352 163 | 125629788563050496 164 | 125538769632886784 165 | 125347618862792705 166 | 125305567148388352 167 | 125196751387889665 168 | 126213333123743744 169 | 126079414986485761 170 | 126076743613284354 171 | 126049183865114624 172 | 125633065757310976 173 | 125264731035537409 174 | 126153311521996800 175 | 126121175926571009 176 | 125988395787882497 177 | 125954651152592896 178 | 125930962545672192 179 | 125910538550124545 180 | 125797001337122817 181 | 125232405517844481 182 | 126534770095169536 183 | 126520518609350656 184 | 126516914678808578 185 | 126494834449063936 186 | 126494280318582784 187 | 126494100252925954 188 | 126492852615262208 189 | 126488447098695680 190 | 126488384410619906 191 | 126487332865056768 192 | 126532210210783232 193 | 126520550876127232 194 | 126505594290057216 195 | 126497514168922112 196 | 126494895501348864 197 | 126491509527805952 198 | 126528316978102272 199 | 126528078057963520 200 | 126523549493112832 201 | 126520920352358401 202 | 126510284536942592 203 | 126504105530236928 204 | 126499521344712704 205 | 126497100866387969 206 | 126496853742198784 207 | 126494691016441857 208 | 126494569184505856 209 | 126493312650719232 210 | 126487788433584129 211 | 126534127435530240 212 | 126529490582118400 213 | 126528938326495232 214 | 126526465280970752 215 | 126526113131413504 216 | 126519715085549568 217 | 126511257170886656 218 | 126504285436514304 219 | 126497446955188224 220 | 126495762568851456 221 | 126495208505479168 222 | 126494166145437696 223 | 126493860804308992 224 | 126492542610051072 225 | 126490549367738368 226 | 126484213737340928 227 | 126784810755690496 228 | 126700014385897472 229 | 126635317108289536 230 | 126795256225210368 231 | 126789710705213440 232 | 126728277896347649 233 | 126674460131606529 234 | 126671006302617600 235 | 126593636627513344 236 | 126519595682119681 237 | 126796467213058048 238 | 126734290850557952 239 | 126726063484178432 240 | 126679463839801344 241 | 126673062258147328 242 | 126637471676104704 243 | 126622818220785664 244 | 126622165595459584 245 | 126612152579657728 246 | 126506057613848576 247 | 126505970317787136 248 | 126495306681548800 249 | 126879662851887104 250 | 126877171926040576 251 | 126876654118240257 252 | 126876107881455616 253 | 126867350476697601 254 | 126863084433326080 255 | 126857095088840706 256 | 126883243726344193 257 | 126881376074076161 258 | 126858607789740032 259 | 126883335875203072 260 | 126883013236752384 261 | 126882832319651840 262 | 126878130353876992 263 | 126877869547855872 264 | 126875416760815616 265 | 126875059477426176 266 | 126870550546096128 267 | 126868828457144321 268 | 126868429796933632 269 | 126868271625539585 270 | 126867067776405506 271 | 126866413053939712 272 | 126865888724004864 273 | 126865837800951808 274 | 126865038085591041 275 | 126864886402777088 276 | 126864861576704000 277 | 126863772877996034 278 | 126863571912114177 279 | 126862618836221954 280 | 126860955605934081 281 | 126859710740701185 282 | 126853913591808002 283 | 126882080050262016 284 | 126881227729928193 285 | 126879417220874240 286 | 126875034135433216 287 | 126874145408561152 288 | 126873260385239040 289 | 126872615380987905 290 | 126872361462005760 291 | 126872241693667328 292 | 126872199620591617 293 | 126869762763522049 294 | 126868924590600192 295 | 126868586882007041 296 | 126868349396324352 297 | 126867170742374400 298 | 126866474806673408 299 | 126866312130609152 300 | 126865987365634048 301 | 126864954140803072 302 | 126864673416032256 303 | 126863938339094531 304 | 126862853822099456 305 | 126862343148802048 306 | 126860270181171201 307 | 126860114610241536 308 | 126859857604247552 309 | 126506057613848576 310 | 126505970317787136 311 | 126495306681548800 312 | 126879662851887104 313 | 126877171926040576 314 | 126876654118240257 315 | 126876107881455616 316 | 126867350476697601 317 | 126863084433326080 318 | 126857095088840706 319 | 126883243726344193 320 | 126881376074076161 321 | 126858607789740032 322 | 126883335875203072 323 | 126883013236752384 324 | 126882832319651840 325 | 126878130353876992 326 | 126877869547855872 327 | 126875416760815616 328 | 126875059477426176 329 | 126870550546096128 330 | 126868828457144321 331 | 126868429796933632 332 | 126868271625539585 333 | 126867067776405506 334 | 126866413053939712 335 | 126865888724004864 336 | 126865837800951808 337 | 126865038085591041 338 | 126864886402777088 339 | 126864861576704000 340 | 126863772877996034 341 | 126863571912114177 342 | 126862618836221954 343 | 126860955605934081 344 | 126859710740701185 345 | 126853913591808002 346 | 126882080050262016 347 | 126881227729928193 348 | 126879417220874240 349 | 126875034135433216 350 | 126874145408561152 351 | 126873260385239040 352 | 126872615380987905 353 | 126872361462005760 354 | 126872241693667328 355 | 126872199620591617 356 | 126869762763522049 357 | 126868924590600192 358 | 126868586882007041 359 | 126868349396324352 360 | 126867170742374400 361 | 126866474806673408 362 | 126866312130609152 363 | 126865987365634048 364 | 126864954140803072 365 | 126864673416032256 366 | 126863938339094531 367 | 126862853822099456 368 | 126862343148802048 369 | 126860270181171201 370 | 126860114610241536 371 | 126859857604247552 372 | 126404574230740992 373 | 126350302113824769 374 | 126148685737361408 375 | 126040352237961217 376 | 125995158679461888 377 | 125960325437722624 378 | 125643107260829697 379 | 125608381431025664 380 | 125523414298533888 381 | 125374540107886593 382 | 126405405667627008 383 | 126391082308206593 384 | 125945821240885248 385 | 125943204943114240 386 | 125476730067615744 387 | 125369698840887297 388 | 125202037293064192 389 | 126405821482532864 390 | 126405160934178816 391 | 126379730827083776 392 | 126370776013213697 393 | 126243528832593920 394 | 126225922159427584 395 | 126219340214304768 396 | 126113944891949056 397 | 126061182720278528 398 | 126042506717704192 399 | 126041773356232704 400 | 126016405085757440 401 | 126012833128390656 402 | 126009386022879232 403 | 125943078837161984 404 | 125887065861787648 405 | 125866627337162752 406 | 125866368758333440 407 | 125859792802693120 408 | 125250078108684288 409 | 126385587740610563 410 | 126360606042374144 411 | 126346705292640257 412 | 126260304819662849 413 | 126236984644612096 414 | 125973789526863872 415 | 125967413299773440 416 | 125957826500771840 417 | 125862601677737985 418 | 125699684693065728 419 | 125346522618535937 420 | 126525172969766912 421 | 126514474378203136 422 | 126511000907288576 423 | 126499965869625345 424 | 126497655785402368 425 | 126493192110612480 426 | 126489713782685696 427 | 126489263025033216 428 | 126496987192373248 429 | 126491870900666368 430 | 126491480087986176 431 | 126532019999096832 432 | 126531893649874945 433 | 126520914413236224 434 | 126520531934654465 435 | 126512842194161664 436 | 126509135842914304 437 | 126506232432439296 438 | 126492945057718272 439 | 126486051530354689 440 | 126497618258964480 441 | 126496237879959553 442 | 126492339559608320 443 | 126491356481859585 444 | 126487422249861120 445 | 126487385461633024 446 | 126779217911349248 447 | 126670032951443456 448 | 126583473929588736 449 | 126574432159408129 450 | 126803763603312640 451 | 126794825998663680 452 | 126732384602296320 453 | 126642779064504320 454 | 126611604925194240 455 | 126591976408748032 456 | 126798811262763009 457 | 126761498885361664 458 | 126759986780057600 459 | 126752126880858112 460 | 126749587133308928 461 | 126745438136176640 462 | 126701862383661056 463 | 126689077230698496 464 | 126680181359378432 465 | 126679552310251521 466 | 126611107266834433 467 | 126610365852303361 468 | 126601340242767872 469 | 126880912754475008 470 | 126877362632667136 471 | 126862735953768448 472 | 126858393909608448 473 | 126870358816067584 474 | 126869855621218304 475 | 126866003094290434 476 | 126864575508381696 477 | 126881380503273472 478 | 126881167541665792 479 | 126880571233280000 480 | 126880429256093696 481 | 126880253145657344 482 | 126879867731062784 483 | 126879122298372097 484 | 126877998115852288 485 | 126877965064740864 486 | 126876452762296321 487 | 126876009797656576 488 | 126875887093293056 489 | 126874662268452864 490 | 126872221292576768 491 | 126871857277308930 492 | 126871511326924800 493 | 126871066760065024 494 | 126870943489466368 495 | 126868570226425856 496 | 126867320005066752 497 | 126866353561927680 498 | 126865005009309696 499 | 126861011813810176 500 | 126860964992794624 501 | 126859978941276161 502 | 126882743819833345 503 | 126881169169063937 504 | 126880644105121792 505 | 126880556775522304 506 | 126879958529343488 507 | 126879219484606464 508 | 126878250541645824 509 | 126877540928331777 510 | 126876463965278208 511 | 126874165105008640 512 | 126873756437200896 513 | 126873447912587264 514 | 126873004494954496 515 | 126872365211725824 516 | 126871907302785024 517 | 126871831583002625 518 | 126867611546943490 519 | 126867000030007296 520 | 126866827715420160 521 | 126866759792852992 522 | 126865704380153856 523 | 126865416671862785 524 | 126862832489861121 525 | 126862595117424641 526 | 126862150265352193 527 | 126871857277308930 528 | 126871511326924800 529 | 126871066760065024 530 | 126870943489466368 531 | 126868570226425856 532 | 126867320005066752 533 | 126866353561927680 534 | 126865005009309696 535 | 126861011813810176 536 | 126860964992794624 537 | 126859978941276161 538 | 126882743819833345 539 | 126881169169063937 540 | 126880644105121792 541 | 126880556775522304 542 | 126879958529343488 543 | 126879219484606464 544 | 126878250541645824 545 | 126877540928331777 546 | 126876463965278208 547 | 126874165105008640 548 | 126873756437200896 549 | 126873447912587264 550 | 126873004494954496 551 | 126872365211725824 552 | 126877263311536128 553 | 126870792960086018 554 | 126877263311536128 555 | 126870792960086018 556 | -------------------------------------------------------------------------------- /Chapter09/twitterauth.py: -------------------------------------------------------------------------------- 1 | # This code is supporting material for the book 2 | # Building Machine Learning Systems with Python 3 | # by Willi Richert and Luis Pedro Coelho 4 | # published by PACKT Publishing 5 | # 6 | # It is made available under the MIT License 7 | 8 | import sys 9 | 10 | CONSUMER_KEY = None 11 | CONSUMER_SECRET = None 12 | 13 | ACCESS_TOKEN_KEY = None 14 | ACCESS_TOKEN_SECRET = None 15 | 16 | if CONSUMER_KEY is None or CONSUMER_SECRET is None or ACCESS_TOKEN_KEY is None or ACCESS_TOKEN_SECRET is None: 17 | print("""\ 18 | When doing last code sanity checks for the book, Twitter 19 | was using the API 1.0, which did not require authentication. 20 | With its switch to version 1.1, this has now changed. 21 | 22 | It seems that you don't have already created your personal Twitter 23 | access keys and tokens. Please do so at https://dev.twitter.com 24 | and paste the keys/secrets into twitterauth.py. 25 | 26 | Sorry for the inconvenience, 27 | The authors.""") 28 | 29 | sys.exit(1) 30 | -------------------------------------------------------------------------------- /Chapter10/README.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Chapter 4 3 | ========= 4 | 5 | Support code for *Chapter 4: Topic Modeling* 6 | 7 | 8 | AP Data 9 | ------- 10 | 11 | To download the AP data, use the ``download_ap.sh`` script inside the ``data`` 12 | directory:: 13 | 14 | cd data 15 | ./download_ap.sh 16 | 17 | Word cloud creation 18 | ------------------- 19 | 20 | Word cloud creation requires that ``pytagcloud`` be installed (in turn, this 21 | requires ``pygame``). Since this is not an essential part of the chapter, the 22 | code will work even if you have not installed it (naturally, the cloud image 23 | will not be generated and a warning will be printed). 24 | 25 | 26 | Wikipedia processing 27 | -------------------- 28 | 29 | You will need **a lot of disk space**. The download of the Wikipedia text is 30 | 11GB and preprocessing it takes another 24GB to save it in the intermediate 31 | format that gensim uses for a total of 34GB! 32 | 33 | Run the following two commands inside the ``data/`` directory:: 34 | 35 | ./download_wp.sh 36 | ./preprocess-wikidata.sh 37 | 38 | As the filenames indicate, the first step will download the data and the second 39 | one will preprocess it. Preprocessing can take several hours, but it is 40 | feasible to run it on a modern laptop. Once the second step is finished, you 41 | may remove the input file if you want to save disk space 42 | (``data/enwiki-latest-pages-articles.xml.bz2``). 43 | 44 | To generate the model, you can run the ``wikitopics_create.py`` script, while 45 | the ``wikitopics_plot.py`` script will plot the most heavily discussed topic as 46 | well as the least heavily discussed one. The code is split into steps as the 47 | first one can take a very long time. Then it saves the results so that you can 48 | later explore them at leisure. 49 | 50 | You should not expect that your results will exactly match the results in the 51 | book, for two reasons: 52 | 53 | 1. The LDA algorithm is a probabilistic algorithm and can give different 54 | results every time it is run. 55 | 2. Wikipedia keeps changing. Thus, even your input data will be different. 56 | 57 | Scripts 58 | ------- 59 | 60 | blei_lda.py 61 | Computes LDA using the AP Corpus. 62 | wikitopics_create.py 63 | Create the topic model for Wikipedia using LDA (must download wikipedia database first) 64 | wikitopics_create_hdp.py 65 | Create the topic model for Wikipedia using HDP (must download wikipedia database first) 66 | -------------------------------------------------------------------------------- /Chapter10/Topic modeling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Topic Modeling\n", 8 | "\n", 9 | "We start with importing `gensim`" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "**IMPORTANT**: You cannot run this example only from within the notebook. You must first download the data on the command line." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "import gensim\n", 28 | "from gensim import corpora, models, matutils" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "Now the usual imports:" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "import matplotlib.pyplot as plt\n", 47 | "import numpy as np\n", 48 | "from os import path\n", 49 | "\n", 50 | "\n", 51 | "# Check that data exists\n", 52 | "if not path.exists('./data/ap/ap.dat'):\n", 53 | " print('Error: Expected data to be present at data/ap/')\n", 54 | " print('Please cd into ./data & run ./download_ap.sh')\n", 55 | "\n" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "We will generate 100 topics as in the book, but you can changes this setting here:" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "collapsed": true 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "NUM_TOPICS = 100" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "Load the data" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "collapsed": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "corpus = corpora.BleiCorpus('./data/ap/ap.dat', './data/ap/vocab.txt')" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "Build the LDA model" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": { 105 | "scrolled": true 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "model = models.ldamodel.LdaModel(\n", 110 | " corpus, num_topics=NUM_TOPICS, id2word=corpus.id2word, alpha=None)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "num_topics_used = [len(model[doc]) for doc in corpus]\n", 120 | "fig,ax = plt.subplots()\n", 121 | "ax.hist(num_topics_used, np.arange(42))\n", 122 | "ax.set_ylabel('Nr of documents')\n", 123 | "ax.set_xlabel('Nr of topics')\n", 124 | "fig.tight_layout()\n", 125 | "fig.savefig('Figure_04_01.png')\n", 126 | "fig" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": { 132 | "collapsed": true 133 | }, 134 | "source": [ 135 | "We can do the same after changing the $\\alpha$ value: " 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "ALPHA = 1.0\n", 145 | "\n", 146 | "model1 = models.ldamodel.LdaModel(\n", 147 | " corpus, num_topics=NUM_TOPICS, id2word=corpus.id2word, alpha=ALPHA)\n", 148 | "num_topics_used1 = [len(model1[doc]) for doc in corpus]\n", 149 | "\n", 150 | "fig,ax = plt.subplots()\n", 151 | "ax.hist([num_topics_used, num_topics_used1], np.arange(42))\n", 152 | "ax.set_ylabel('Nr of documents')\n", 153 | "ax.set_xlabel('Nr of topics')\n", 154 | "\n", 155 | "# The coordinates below were fit by trial and error to look good\n", 156 | "ax.text(9, 223, r'default alpha')\n", 157 | "ax.text(26, 156, 'alpha=1.0')\n", 158 | "fig.tight_layout()\n", 159 | "fig.savefig('Figure_04_02.png')\n", 160 | "fig" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "### Exploring the topic model\n", 168 | "\n", 169 | "We can explore the mathematical structure of the topics:\n" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "doc = corpus.docbyoffset(0)\n", 179 | "topics = model[doc]\n", 180 | "print(topics)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "This is not very informative, however. Another way to explore is to identify the most discussed topic, i.e., the one with the highest total weight:" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "collapsed": true 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "topics = matutils.corpus2dense(model[corpus], num_terms=model.num_topics)\n", 199 | "weight = topics.sum(1)\n", 200 | "max_topic = weight.argmax()" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "Get the top 64 words for this topic.\n", 208 | "Without the argument, show_topic would return only 10 words" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "collapsed": true 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "words = model.show_topic(max_topic, 64)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "One way to visualize the results is to build a _word cloud_. For this we use the `wordcloud` module:" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "from wordcloud import WordCloud\n", 236 | "\n", 237 | "wc = WordCloud(background_color='white', max_words=30, width=600, height=600)\n", 238 | "wc = wc.generate_from_frequencies(dict(words))\n", 239 | "\n", 240 | "\n", 241 | "fig,ax = plt.subplots()\n", 242 | "\n", 243 | "ax.imshow(wc, interpolation=\"bilinear\")\n", 244 | "fig" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "# NEWS DATA" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "Now, repeat the same exercise using alpha=1.0.\n", 259 | "\n", 260 | "You can edit the constant below to play around with this parameter" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "import nltk.stem\n", 270 | "\n", 271 | "nltk.download('stopwords')" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": { 278 | "collapsed": true 279 | }, 280 | "outputs": [], 281 | "source": [ 282 | "english_stemmer = nltk.stem.SnowballStemmer('english')\n", 283 | "stopwords = set(nltk.corpus.stopwords.words('english'))\n", 284 | "stopwords.update(['from:', 'subject:', 'writes:', 'writes'])" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "We need to add a little adaptor class:" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": { 298 | "collapsed": true 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "class DirectText(corpora.textcorpus.TextCorpus):\n", 303 | "\n", 304 | " def get_texts(self):\n", 305 | " return self.input\n", 306 | "\n", 307 | " def __len__(self):\n", 308 | " return len(self.input)\n" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "Load the data" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "import sklearn.datasets\n", 325 | "dataset = sklearn.datasets.load_mlcomp(\"20news-18828\", \"train\",\n", 326 | " mlcomp_root='./data')\n" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "We preprocess the data to split the data into words and remove stopwords:" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": { 340 | "collapsed": true 341 | }, 342 | "outputs": [], 343 | "source": [ 344 | "otexts = dataset.data\n", 345 | "texts = dataset.data\n", 346 | "\n", 347 | "texts = [t.decode('utf-8', 'ignore') for t in texts]\n", 348 | "texts = [t.split() for t in texts]\n", 349 | "texts = [map(lambda w: w.lower(), t) for t in texts]\n", 350 | "texts = [filter(lambda s: not len(set(\"+-.?!()>@012345689\") & set(s)), t)\n", 351 | " for t in texts]\n", 352 | "texts = [filter(lambda s: (len(s) > 3) and (s not in stopwords), t)\n", 353 | " for t in texts]\n", 354 | "texts = [[english_stemmer.stem(w) for w in t] for t in texts]" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "We also remove words that are _too common_:" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "metadata": { 368 | "collapsed": true 369 | }, 370 | "outputs": [], 371 | "source": [ 372 | "from collections import defaultdict\n", 373 | "usage = defaultdict(int)\n", 374 | "for t in texts:\n", 375 | " for w in set(t):\n", 376 | " usage[w] += 1\n", 377 | "limit = len(texts) / 10\n", 378 | "too_common = [w for w in usage if usage[w] > limit]\n", 379 | "too_common = set(too_common)\n", 380 | "texts = [[w for w in t if w not in too_common] for t in texts]" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": null, 386 | "metadata": { 387 | "scrolled": true 388 | }, 389 | "outputs": [], 390 | "source": [ 391 | "corpus = DirectText(texts)\n", 392 | "dictionary = corpus.dictionary\n", 393 | "try:\n", 394 | " dictionary['computer']\n", 395 | "except:\n", 396 | " pass\n", 397 | "\n", 398 | "model = models.ldamodel.LdaModel(\n", 399 | " corpus, num_topics=100, id2word=dictionary.id2token)\n", 400 | "\n", 401 | "thetas = np.zeros((len(texts), 100))\n", 402 | "for i, c in enumerate(corpus):\n", 403 | " for ti, v in model[c]:\n", 404 | " thetas[i, ti] += v" 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": {}, 410 | "source": [ 411 | "We compare all documents to each other **by the topics the contain**:" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": null, 417 | "metadata": {}, 418 | "outputs": [], 419 | "source": [ 420 | "from scipy.spatial import distance\n", 421 | "distances = distance.squareform(distance.pdist(thetas))\n", 422 | "large = distances.max() + 1\n", 423 | "for i in range(len(distances)):\n", 424 | " distances[i, i] = large\n", 425 | "\n", 426 | "print(otexts[1])\n", 427 | "print()\n", 428 | "print()\n", 429 | "print()\n", 430 | "print(otexts[distances[1].argmin()])" 431 | ] 432 | }, 433 | { 434 | "cell_type": "markdown", 435 | "metadata": { 436 | "collapsed": true 437 | }, 438 | "source": [ 439 | "# Modeling Wikipedia" 440 | ] 441 | }, 442 | { 443 | "cell_type": "markdown", 444 | "metadata": {}, 445 | "source": [ 446 | "Load the data\n", 447 | "\n", 448 | "Note that you **must have run the `wikitopics_create.py` script**. This will take a few hours" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": null, 454 | "metadata": {}, 455 | "outputs": [], 456 | "source": [ 457 | "import gensim\n", 458 | "if not path.exists('wiki_lda.pkl'):\n", 459 | " import sys\n", 460 | " sys.stderr.write('''\\\n", 461 | "This script must be run after wikitopics_create.py!\n", 462 | "\n", 463 | "That script creates and saves the LDA model (this must onlly be done once).\n", 464 | "This script is responsible for the analysis.''')\n", 465 | " \n", 466 | "# Load the preprocessed Wikipedia corpus (id2word and mm)\n", 467 | "id2word = gensim.corpora.Dictionary.load_from_text(\n", 468 | " 'data/wiki_en_output_wordids.txt.bz2')\n", 469 | "mm = gensim.corpora.MmCorpus('data/wiki_en_output_tfidf.mm')\n", 470 | "\n", 471 | "# Load the precomputed model\n", 472 | "model = gensim.models.ldamodel.LdaModel.load('wiki_lda.pkl')\n", 473 | "\n", 474 | "topics = np.load('topics.npy', mmap_mode='r')" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": {}, 480 | "source": [ 481 | "Compute the number of topics mentioned in each document\n" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [ 490 | "lens = (topics > 0).sum(axis=1)\n", 491 | "print('Mean number of topics mentioned: {0:.3}'.format(np.mean(lens)))\n", 492 | "print('Percentage of articles mentioning less than 10 topics: {0:.1%}'.format(np.mean(lens <= 10)))\n", 493 | "\n", 494 | "# Weights will be the total weight of each topic\n", 495 | "weights = topics.sum(0)\n", 496 | "\n" 497 | ] 498 | }, 499 | { 500 | "cell_type": "markdown", 501 | "metadata": {}, 502 | "source": [ 503 | "Retrieve the most heavily used topic and plot it as a word cloud:\n" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": null, 509 | "metadata": {}, 510 | "outputs": [], 511 | "source": [ 512 | "words = model.show_topic(weights.argmax(), 64)\n", 513 | "\n", 514 | "wc = WordCloud(background_color='white', max_words=30, width=600, height=600)\n", 515 | "wc = wc.generate_from_frequencies(dict(words))\n", 516 | "\n", 517 | "fig,ax = plt.subplots()\n", 518 | "\n", 519 | "ax.imshow(wc, interpolation=\"bilinear\")\n", 520 | "fig" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": null, 526 | "metadata": {}, 527 | "outputs": [], 528 | "source": [ 529 | "fraction_mention = np.mean(topics[:,weights.argmax()] > 0)\n", 530 | "print(\"The most mentioned topics is mentioned in {:.1%} of documents.\".format(fraction_mention))\n", 531 | "total_weight = np.mean(topics[:,weights.argmax()])\n", 532 | "print(\"It represents {:.1%} of the total number of words.\".format(total_weight))\n" 533 | ] 534 | }, 535 | { 536 | "cell_type": "markdown", 537 | "metadata": {}, 538 | "source": [ 539 | "Retrieve the **least** heavily used topic and plot it as a word cloud:" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": null, 545 | "metadata": {}, 546 | "outputs": [], 547 | "source": [ 548 | "words = model.show_topic(weights.argmin(), 64)\n", 549 | "\n", 550 | "wc = WordCloud(background_color='white', max_words=30, width=600, height=600)\n", 551 | "wc = wc.generate_from_frequencies(dict(words))\n", 552 | "fig,ax = plt.subplots()\n", 553 | "\n", 554 | "ax.imshow(wc, interpolation=\"bilinear\")\n", 555 | "fig" 556 | ] 557 | }, 558 | { 559 | "cell_type": "markdown", 560 | "metadata": {}, 561 | "source": [ 562 | "Again, we can measure how often this topic used:" 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": null, 568 | "metadata": {}, 569 | "outputs": [], 570 | "source": [ 571 | "fraction_mention = np.mean(topics[:,weights.argmin()] > 0)\n", 572 | "print(\"The least mentioned topics is mentioned in {:.1%} of documents.\".format(fraction_mention))\n", 573 | "total_weight = np.mean(topics[:,weights.argmin()])\n", 574 | "print(\"It represents {:.1%} of the total number of words.\".format(total_weight))" 575 | ] 576 | } 577 | ], 578 | "metadata": { 579 | "kernelspec": { 580 | "display_name": "Python 3", 581 | "language": "python", 582 | "name": "python3" 583 | }, 584 | "language_info": { 585 | "codemirror_mode": { 586 | "name": "ipython", 587 | "version": 3 588 | }, 589 | "file_extension": ".py", 590 | "mimetype": "text/x-python", 591 | "name": "python", 592 | "nbconvert_exporter": "python", 593 | "pygments_lexer": "ipython3", 594 | "version": "3.6.2" 595 | } 596 | }, 597 | "nbformat": 4, 598 | "nbformat_minor": 2 599 | } 600 | -------------------------------------------------------------------------------- /Chapter10/data/.gitignore: -------------------------------------------------------------------------------- 1 | ap.tgz 2 | ap/ 3 | dataset-379-20news-18828_HJRZF.zip 4 | 379/ 5 | enwiki-latest-pages-articles.xml.bz2 6 | wiki_en_output_bow.mm 7 | wiki_en_output_bow.mm.gz 8 | wiki_en_output_bow.mm.index 9 | wiki_en_output_tfidf.mm 10 | wiki_en_output_tfidf.mm.gz 11 | wiki_en_output_tfidf.mm.index 12 | wiki_en_output_wordids.txt.bz2 13 | -------------------------------------------------------------------------------- /Chapter10/data/download_ap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | wget http://www.cs.columbia.edu/~blei/lda-c/ap.tgz 3 | tar xzf ap.tgz 4 | -------------------------------------------------------------------------------- /Chapter10/data/download_wp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | wget http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2 3 | -------------------------------------------------------------------------------- /Chapter10/data/preprocess-wikidata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | python -m gensim.scripts.make_wiki enwiki-latest-pages-articles.xml.bz2 wiki_en_output 4 | -------------------------------------------------------------------------------- /Chapter10/wikitopics_create.py: -------------------------------------------------------------------------------- 1 | # This code is supporting material for the book 2 | # Building Machine Learning Systems with Python 3 | # by Willi Richert and Luis Pedro Coelho 4 | # published by PACKT Publishing 5 | # 6 | # It is made available under the MIT License 7 | 8 | from __future__ import print_function 9 | import logging 10 | import gensim 11 | import numpy as np 12 | 13 | NR_OF_TOPICS = 100 14 | 15 | # Set up logging in order to get progress information as the model is being built: 16 | logging.basicConfig( 17 | format='%(asctime)s : %(levelname)s : %(message)s', 18 | level=logging.INFO) 19 | 20 | # Load the preprocessed corpus (id2word & mm): 21 | id2word = gensim.corpora.Dictionary.load_from_text( 22 | 'data/wiki_en_output_wordids.txt.bz2') 23 | mm = gensim.corpora.MmCorpus('data/wiki_en_output_tfidf.mm') 24 | 25 | # Calling the constructor is enough to build the model 26 | # This call will take a few hours! 27 | model = gensim.models.ldamodel.LdaModel( 28 | corpus=mm, 29 | id2word=id2word, 30 | num_topics=NR_OF_TOPICS, 31 | update_every=1, 32 | chunksize=10000, 33 | passes=1) 34 | 35 | # Save the model so we do not need to learn it again. 36 | model.save('wiki_lda.pkl') 37 | 38 | # Compute the document/topic matrix 39 | topics = np.zeros((len(mm), model.num_topics)) 40 | for di,doc in enumerate(mm): 41 | doc_top = model[doc] 42 | for ti,tv in doc_top: 43 | topics[di,ti] += tv 44 | np.save('topics.npy', topics) 45 | 46 | # Alternatively, we create a sparse matrix and save that. This alternative 47 | # saves disk space, at the cost of slightly more complex code: 48 | 49 | ## from scipy import sparse, io 50 | ## sp = sparse.csr_matrix(topics) 51 | ## io.savemat('topics.mat', {'topics': sp}) 52 | -------------------------------------------------------------------------------- /Chapter10/wikitopics_create_hdp.py: -------------------------------------------------------------------------------- 1 | # This code is supporting material for the book 2 | # Building Machine Learning Systems with Python 3 | # by Willi Richert and Luis Pedro Coelho 4 | # published by PACKT Publishing 5 | # 6 | # It is made available under the MIT License 7 | 8 | from __future__ import print_function 9 | import logging 10 | import gensim 11 | import numpy as np 12 | 13 | # Set up logging in order to get progress information as the model is being built: 14 | logging.basicConfig( 15 | format='%(asctime)s : %(levelname)s : %(message)s', 16 | level=logging.INFO) 17 | 18 | # Load the preprocessed corpus (id2word & mm): 19 | id2word = gensim.corpora.Dictionary.load_from_text( 20 | 'data/wiki_en_output_wordids.txt.bz2') 21 | mm = gensim.corpora.MmCorpus('data/wiki_en_output_tfidf.mm') 22 | 23 | # Calling the constructor is enough to build the model 24 | # This call will take a few hours! 25 | model = gensim.models.hdpmodel.HdpModel( 26 | corpus=mm, 27 | id2word=id2word, 28 | chunksize=10000) 29 | 30 | # Save the model so we do not need to learn it again. 31 | model.save('wiki_hdp.pkl') 32 | 33 | # Compute the document/topic matrix 34 | topics = np.zeros((len(mm), model.num_topics)) 35 | for di,doc in enumerate(mm): 36 | doc_top = model[doc] 37 | for ti,tv in doc_top: 38 | topics[di,ti] += tv 39 | np.save('topics_hdp.npy', topics) 40 | -------------------------------------------------------------------------------- /Chapter12/README.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Chapter 10 3 | ========== 4 | 5 | Support code for *Chapter 10: Pattern Recognition & Computer Vision* 6 | 7 | Data 8 | ---- 9 | 10 | This chapter relies on a publicly available dataset (which can be downloaded 11 | using the ``download.sh`` script inside the ``data/`` directory) as well the 12 | dataset that is packaged with the repository at ``../SimpleImageDataset/``. 13 | 14 | Running ``download.sh`` will retrieve the other dataset into a directory 15 | ``AnimTransDistr/``. 16 | 17 | Scripts 18 | ------- 19 | 20 | chapter.py 21 | Code as written in the book. 22 | thresholded_figure.py 23 | Computes the thresholded figures, including after Gaussian blurring 24 | lena-ring.py 25 | Lena image with center in focus and blurred edges 26 | figure10.py 27 | Just paste two images next to each others 28 | features.py 29 | Contains the color histogram function from the book as well as a simple 30 | wrapper around ``mahotas.texture.haralick`` 31 | simple_classification.py 32 | Classify SimpleImageDataset with texture features + color histogram features 33 | large_classification.py 34 | Classify ``AnimTransDistr`` with both texture and SURF features. 35 | neighbors.py 36 | Computes image neighbors as well as the neighbor figure from the book. 37 | 38 | -------------------------------------------------------------------------------- /Chapter12/ch12_3rd/chapter_12.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Computer Vision" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This code is supporting material for the book `Building Machine Learning Systems with Python` by [Willi Richert](https://www.linkedin.com/in/willirichert/), [Luis Pedro Coelho](https://www.linkedin.com/in/luispedrocoelho/) and [Matthieu Brucher](https://www.linkedin.com/in/matthieubrucher/) published by PACKT Publishing. It is made available under the MIT License." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## Generative Adversarial Networks" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "Let's create a class for our GAN based on convolution networks." 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import tensorflow as tf\n", 38 | "\n", 39 | "def match(logits, labels):\n", 40 | " logits = tf.clip_by_value(logits, 1e-7, 1. - 1e-7)\n", 41 | " return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))\n", 42 | "\n", 43 | "def batchnormalize(X, eps=1e-8, g=None, b=None):\n", 44 | " if X.get_shape().ndims == 4:\n", 45 | " mean = tf.reduce_mean(X, [0,1,2])\n", 46 | " std = tf.reduce_mean( tf.square(X-mean), [0,1,2] )\n", 47 | " X = (X-mean) / tf.sqrt(std+eps)\n", 48 | "\n", 49 | " if g is not None and b is not None:\n", 50 | " g = tf.reshape(g, [1,1,1,-1])\n", 51 | " b = tf.reshape(b, [1,1,1,-1])\n", 52 | " X = X*g + b\n", 53 | "\n", 54 | " elif X.get_shape().ndims == 2:\n", 55 | " mean = tf.reduce_mean(X, 0)\n", 56 | " std = tf.reduce_mean(tf.square(X-mean), 0)\n", 57 | " X = (X-mean) / tf.sqrt(std+eps)\n", 58 | "\n", 59 | " if g is not None and b is not None:\n", 60 | " g = tf.reshape(g, [1,-1])\n", 61 | " b = tf.reshape(b, [1,-1])\n", 62 | " X = X*g + b\n", 63 | "\n", 64 | " else:\n", 65 | " raise NotImplementedError\n", 66 | "\n", 67 | " return X\n", 68 | "\n", 69 | "class DCGAN():\n", 70 | " def __init__(\n", 71 | " self,\n", 72 | " image_shape=[28,28,1],\n", 73 | " dim_z=100,\n", 74 | " dim_y=10,\n", 75 | " dim_W1=1024,\n", 76 | " dim_W2=128,\n", 77 | " dim_W3=64,\n", 78 | " dim_channel=1,\n", 79 | " ):\n", 80 | "\n", 81 | " self.image_shape = image_shape\n", 82 | " self.dim_z = dim_z\n", 83 | " self.dim_y = dim_y\n", 84 | "\n", 85 | " self.dim_W1 = dim_W1\n", 86 | " self.dim_W2 = dim_W2\n", 87 | " self.dim_W3 = dim_W3\n", 88 | " self.dim_channel = dim_channel\n", 89 | "\n", 90 | " def build_model(self):\n", 91 | "\n", 92 | " Z = tf.placeholder(tf.float32, [None, self.dim_z])\n", 93 | " Y = tf.placeholder(tf.float32, [None, self.dim_y])\n", 94 | "\n", 95 | " image_real = tf.placeholder(tf.float32, [None]+self.image_shape)\n", 96 | " image_gen = self.generate(Z, Y)\n", 97 | "\n", 98 | " raw_real = self.discriminate(image_real, Y, False)\n", 99 | " raw_gen = self.discriminate(image_gen, Y, True)\n", 100 | "\n", 101 | " discrim_cost_real = match(raw_real, tf.ones_like(raw_real))\n", 102 | " discrim_cost_gen = match(raw_gen, tf.zeros_like(raw_gen))\n", 103 | " discrim_cost = discrim_cost_real + discrim_cost_gen\n", 104 | "\n", 105 | " gen_cost = match( raw_gen, tf.ones_like(raw_gen) )\n", 106 | "\n", 107 | " return Z, Y, image_real, image_gen, discrim_cost, gen_cost\n", 108 | "\n", 109 | " def create_conv2d(self, input, filters, kernel_size, name):\n", 110 | " layer = tf.layers.conv2d(\n", 111 | " inputs=input,\n", 112 | " filters=filters,\n", 113 | " kernel_size=kernel_size,\n", 114 | " strides=[2,2],\n", 115 | " name=\"Conv2d_\" + name,\n", 116 | " padding=\"SAME\")\n", 117 | " layer = tf.nn.leaky_relu(layer, name= \"LeakyRELU\" + name)\n", 118 | " return layer\n", 119 | "\n", 120 | " def create_conv2d_transpose(self, input, filters, kernel_size, name, with_batch_norm):\n", 121 | " layer = tf.layers.conv2d_transpose(\n", 122 | " inputs=input,\n", 123 | " filters=filters,\n", 124 | " kernel_size=kernel_size,\n", 125 | " strides=[2,2],\n", 126 | " name=\"Conv2d_\" + name,\n", 127 | " padding=\"SAME\")\n", 128 | " if with_batch_norm:\n", 129 | " layer = batchnormalize(layer)\n", 130 | " layer = tf.nn.relu(layer)\n", 131 | " return layer\n", 132 | "\n", 133 | " def create_dense(self, input, units, name, leaky):\n", 134 | " layer = tf.layers.dense(\n", 135 | " inputs=input,\n", 136 | " units=units,\n", 137 | " name=\"Dense\" + name,\n", 138 | " )\n", 139 | " layer = batchnormalize(layer)\n", 140 | " if leaky:\n", 141 | " layer = tf.nn.leaky_relu(layer, name= \"LeakyRELU\" + name)\n", 142 | " else:\n", 143 | " layer = tf.nn.relu(layer, name=\"RELU_\" + name)\n", 144 | " return layer\n", 145 | "\n", 146 | " def discriminate(self, image, Y, reuse=False):\n", 147 | " with tf.variable_scope('discriminate', reuse=reuse):\n", 148 | " \n", 149 | " batch_size = Y.get_shape()[0]\n", 150 | " \n", 151 | " yb = tf.reshape(Y, tf.stack([-1, 1, 1, self.dim_y]))\n", 152 | " X = tf.concat(axis=3, values=[image, yb*tf.ones([1, 28, 28, self.dim_y])])\n", 153 | " \n", 154 | " h1 = self.create_conv2d(X, self.dim_W3, 5, \"Layer1\")\n", 155 | " h1 = tf.concat(axis=3, values=[h1, yb*tf.ones([1, 14, 14, self.dim_y])])\n", 156 | " \n", 157 | " h2 = self.create_conv2d(h1, self.dim_W2, 5, \"Layer2\")\n", 158 | " h2 = tf.reshape(h2, tf.stack([-1, 7*7*128]))\n", 159 | " h2 = tf.concat(axis=1, values=[h2, Y])\n", 160 | " \n", 161 | " h3 = self.create_dense(h2, self.dim_W1, \"Layer3\", True)\n", 162 | " h3 = tf.concat(axis=1, values=[h3, Y])\n", 163 | " \n", 164 | " h4 = self.create_dense(h3, 1, \"Layer4\", True)\n", 165 | " return h4\n", 166 | "\n", 167 | " def generate(self, Z, Y, reuse=False):\n", 168 | " with tf.variable_scope('generate', reuse=reuse):\n", 169 | "\n", 170 | " yb = tf.reshape(Y, tf.stack([-1, 1, 1, self.dim_y]))\n", 171 | " Z = tf.concat(axis=1, values=[Z,Y])\n", 172 | " h1 = self.create_dense(Z, self.dim_W1, \"Layer1\", False)\n", 173 | " h1 = tf.concat(axis=1, values=[h1, Y])\n", 174 | " h2 = self.create_dense(h1, self.dim_W2*7*7, \"Layer2\", False)\n", 175 | " h2 = tf.reshape(h2, tf.stack([-1,7,7,self.dim_W2]))\n", 176 | " h2 = tf.concat(axis=3, values=[h2, yb*tf.ones([1, 7, 7, self.dim_y])])\n", 177 | "\n", 178 | " h3 = self.create_conv2d_transpose(h2, self.dim_W3, 5, \"Layer3\", True)\n", 179 | " h3 = tf.concat(axis=3, values=[h3, yb*tf.ones([1, 14,14,self.dim_y])] )\n", 180 | "\n", 181 | " h4 = self.create_conv2d_transpose(h3, self.dim_channel, 7, \"Layer4\", False)\n", 182 | " x = tf.nn.sigmoid(h4)\n", 183 | " return x" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "We add 2 helper fucntions, one for transforming our data to one-hot encoding (without using Tensorflow, we could use it instead) and one to plot and save our sampled images." 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "import imageio\n", 200 | "import numpy as np\n", 201 | "from matplotlib import pyplot as plt\n", 202 | "%matplotlib inline\n", 203 | "\n", 204 | "def one_hot(X, n):\n", 205 | " X = np.asarray(X).flatten()\n", 206 | " Xoh = np.zeros((len(X), n))\n", 207 | " Xoh[np.arange(len(X)), X] = 1.\n", 208 | " return Xoh\n", 209 | "\n", 210 | "def save_visualization(X, nh_nw, save_path='./sample.jpg'):\n", 211 | " h,w = X.shape[1], X.shape[2]\n", 212 | " img = np.zeros((h * nh_nw[0], w * nh_nw[1], 3))\n", 213 | "\n", 214 | " for n,x in enumerate(X):\n", 215 | " j = n // nh_nw[1]\n", 216 | " i = n % nh_nw[1]\n", 217 | " img[j*h:j*h+h, i*w:i*w+w, :] = x / 255\n", 218 | "\n", 219 | " imageio.imwrite(save_path, img)\n", 220 | " plt.imshow(img)\n", 221 | " plt.show()" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "Our hyperparameters and our data" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "import os\n", 238 | "import numpy as np\n", 239 | "\n", 240 | "n_epochs = 10\n", 241 | "learning_rate = 0.0002\n", 242 | "batch_size = 128\n", 243 | "image_shape = [28,28,1]\n", 244 | "dim_z = 10\n", 245 | "dim_y = 10\n", 246 | "dim_W1 = 1024\n", 247 | "dim_W2 = 128\n", 248 | "dim_W3 = 64\n", 249 | "dim_channel = 1\n", 250 | "\n", 251 | "visualize_dim=196\n", 252 | "\n", 253 | "from sklearn.datasets import fetch_mldata\n", 254 | "mnist = fetch_mldata('MNIST original')\n", 255 | "mnist.data.shape = (-1, 28, 28)\n", 256 | "mnist.data = mnist.data.astype(np.float32).reshape( [-1, 28, 28, 1]) / 255.\n", 257 | "mnist.num_examples = len(mnist.data)\n", 258 | "mnist.target = one_hot(mnist.target.astype(np.int8), dim_y)" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "Let's generate some images!" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "tf.reset_default_graph()\n", 275 | "dcgan_model = DCGAN(\n", 276 | " image_shape=image_shape,\n", 277 | " dim_z=dim_z,\n", 278 | " dim_W1=dim_W1,\n", 279 | " dim_W2=dim_W2,\n", 280 | " dim_W3=dim_W3,\n", 281 | " )\n", 282 | "Z_tf, Y_tf, image_tf, image_tf_sample, d_cost_tf, g_cost_tf, = dcgan_model.build_model()\n", 283 | "\n", 284 | "discrim_vars = list(filter(lambda x: x.name.startswith('discr'), tf.trainable_variables()))\n", 285 | "gen_vars = list(filter(lambda x: x.name.startswith('gen'), tf.trainable_variables()))\n", 286 | "\n", 287 | "train_op_discrim = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(d_cost_tf, var_list=discrim_vars)\n", 288 | "train_op_gen = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(g_cost_tf, var_list=gen_vars)\n", 289 | "\n", 290 | "Z_np_sample = np.random.uniform(-1, 1, size=(visualize_dim,dim_z))\n", 291 | "Y_np_sample = one_hot( np.random.randint(10, size=[visualize_dim]), dim_y)\n", 292 | "\n", 293 | "step = 1000\n", 294 | "\n", 295 | "with tf.Session() as sess:\n", 296 | " sess.run(tf.global_variables_initializer())\n", 297 | " for epoch in range(n_epochs):\n", 298 | " permut = np.random.permutation(mnist.num_examples)\n", 299 | " trX = mnist.data[permut]\n", 300 | " trY = mnist.target[permut]\n", 301 | " Z = np.random.uniform(-1, 1, size=[mnist.num_examples, dim_z]).astype(np.float32)\n", 302 | "\n", 303 | " print(\"epoch: %i\" % epoch)\n", 304 | " for j in range(0, mnist.num_examples, batch_size):\n", 305 | " if j % step == 0:\n", 306 | " print(\" batch: %i\" % j)\n", 307 | "\n", 308 | " batch = permut[j:j+batch_size]\n", 309 | "\n", 310 | " Xs = trX[batch]\n", 311 | " Ys = trY[batch]\n", 312 | " Zs = Z[batch]\n", 313 | "\n", 314 | " if (j / batch_size) % 2 == 0:\n", 315 | " sess.run(train_op_discrim,\n", 316 | " feed_dict={\n", 317 | " Z_tf:Zs,\n", 318 | " Y_tf:Ys,\n", 319 | " image_tf:Xs\n", 320 | " })\n", 321 | " else:\n", 322 | " sess.run(train_op_gen,\n", 323 | " feed_dict={\n", 324 | " Z_tf:Zs,\n", 325 | " Y_tf:Ys\n", 326 | " })\n", 327 | "\n", 328 | " if j % step == 0:\n", 329 | " generated_samples = sess.run(\n", 330 | " image_tf_sample,\n", 331 | " feed_dict={\n", 332 | " Z_tf:Z_np_sample,\n", 333 | " Y_tf:Y_np_sample\n", 334 | " })\n", 335 | " generated_samples = generated_samples * 255\n", 336 | " save_visualization(generated_samples, (7,28), save_path='./B09124_11_sample_%03d_%04d.jpg' % (epoch, j / step))" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [] 345 | } 346 | ], 347 | "metadata": { 348 | "kernelspec": { 349 | "display_name": "Python 3", 350 | "language": "python", 351 | "name": "python3" 352 | }, 353 | "language_info": { 354 | "codemirror_mode": { 355 | "name": "ipython", 356 | "version": 3 357 | }, 358 | "file_extension": ".py", 359 | "mimetype": "text/x-python", 360 | "name": "python", 361 | "nbconvert_exporter": "python", 362 | "pygments_lexer": "ipython3", 363 | "version": "3.6.5" 364 | } 365 | }, 366 | "nbformat": 4, 367 | "nbformat_minor": 2 368 | } 369 | -------------------------------------------------------------------------------- /Chapter12/download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | mkdir -p AnimTransDistr 4 | cd AnimTransDistr 5 | curl -O http://vision.stanford.edu/Datasets/AnimTransDistr.rar 6 | unrar x AnimTransDistr.rar 7 | # The following file is a weird file: 8 | rm Anims/104034.jpg 9 | -------------------------------------------------------------------------------- /Chapter12/forest.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/Chapter12/forest.jpeg -------------------------------------------------------------------------------- /Chapter12/scene00.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/Chapter12/scene00.jpg -------------------------------------------------------------------------------- /Chapter13/chapter_13.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Reinforcement learning with Tensorflow" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This code is supporting material for the book `Building Machine Learning Systems with Python` by [Willi Richert](https://www.linkedin.com/in/willirichert/), [Luis Pedro Coelho](https://www.linkedin.com/in/luispedrocoelho/) and [Matthieu Brucher](https://www.linkedin.com/in/matthieubrucher/) published by PACKT Publishing.\n", 15 | "\n", 16 | "It is made available under the MIT License.\n", 17 | "\n", 18 | "All code examples use Python in version..." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import sys\n", 28 | "sys.version" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Utility functions" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "import os\n", 45 | "\n", 46 | "CHART_DIR = \"charts\"\n", 47 | "if not os.path.exists(CHART_DIR):\n", 48 | " os.mkdir(CHART_DIR)\n", 49 | "\n", 50 | "def save_png(name):\n", 51 | " fn = 'B09124_13_%s.png'%name # please ignore, it just helps our publisher :-)\n", 52 | " plt.savefig(os.path.join(CHART_DIR, fn), bbox_inches=\"tight\")" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## Simple text games" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "import gym\n", 69 | "import numpy as np\n", 70 | "\n", 71 | "env = gym.make('FrozenLake-v0')" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "### Estimating the Q function the old fashion way" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "Let's make a table with some Q values for this environment" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "# Start with an empty table\n", 95 | "Q = np.zeros((env.observation_space.n, env.action_space.n))\n", 96 | "# Set learning hyperparameters\n", 97 | "lr = .8\n", 98 | "y = .95\n", 99 | "num_episodes = 2000\n", 100 | "\n", 101 | "# Let's run!\n", 102 | "for i in range(num_episodes):\n", 103 | " # Reset environment and get first new observation (top left)\n", 104 | " s = env.reset()\n", 105 | " # Do 100 iterations to update the table\n", 106 | " for i in range(100):\n", 107 | " # Choose an action by picking the max of the table + additional random noise ponderated by the episode\n", 108 | " a = np.argmax(Q[s,:] + np.random.randn(1,env.action_space.n)//(i+1))\n", 109 | " # Get new state and reward from environment after chosen step \n", 110 | " s1, r, d,_ = env.step(a)\n", 111 | " # Update Q-Table with new knowledge\n", 112 | " Q[s,a] = Q[s,a] + lr*(r + y*np.max(Q[s1,:]) - Q[s,a])\n", 113 | " s = s1\n", 114 | " if d == True:\n", 115 | " break" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "print(\"Final Q-Table Values\")\n", 125 | "print(Q)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "### Test games with TF" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "import random\n", 142 | "import tensorflow as tf\n", 143 | "import matplotlib.pyplot as plt\n", 144 | "%matplotlib inline" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "Let's create a new network." 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "y = 0.99\n", 161 | "e = 0.1 # 1 in 10 samples, we chose a new action for the network\n", 162 | "num_episodes = 2000\n", 163 | "learning_rate = 0.1" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "tf.reset_default_graph()\n", 173 | "\n", 174 | "# A simple one layer network\n", 175 | "inputs = tf.placeholder(shape=[None, 16], dtype=tf.float32, name=\"input\")\n", 176 | "Qout = tf.layers.dense(\n", 177 | " inputs=inputs,\n", 178 | " units=4,\n", 179 | " use_bias=False,\n", 180 | " name=\"dense\",\n", 181 | " kernel_initializer=tf.random_uniform_initializer(minval=0, maxval=.0125)\n", 182 | ")\n", 183 | "predict = tf.argmax(Qout, 1)\n", 184 | "\n", 185 | "# Our optimizer will try to optimize \n", 186 | "nextQ = tf.placeholder(shape=[None, 4], dtype=tf.float32, name=\"target\")\n", 187 | "loss = tf.reduce_sum(tf.square(nextQ - Qout))\n", 188 | "\n", 189 | "trainer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n", 190 | "updateModel = trainer.minimize(loss)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "We can now train the network, and check that it will get more and more sucesses as the training progresses." 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "# To keep track of our games and our results\n", 207 | "jList = []\n", 208 | "rList = []\n", 209 | "with tf.Session() as sess:\n", 210 | " sess.run(tf.global_variables_initializer())\n", 211 | "\n", 212 | " for i in range(num_episodes):\n", 213 | " s = env.reset()\n", 214 | " rAll = 0\n", 215 | " \n", 216 | " for j in range(100):\n", 217 | " a, targetQ = sess.run([predict, Qout], feed_dict={inputs:np.identity(16)[s:s+1]})\n", 218 | " # We randomly choose a new state that we may have not encountered before\n", 219 | " if np.random.rand(1) < e:\n", 220 | " a[0] = env.action_space.sample()\n", 221 | "\n", 222 | " s1, r, d, _ = env.step(a[0])\n", 223 | " \n", 224 | " # Obtain the Q' values by feeding the new state through our network\n", 225 | " Q1 = sess.run(Qout, feed_dict={inputs:np.identity(16)[s1:s1+1]})\n", 226 | " # Obtain maxQ' and set our target value for chosen action.\n", 227 | " targetQ[0, a[0]] = r + y*np.max(Q1)\n", 228 | " \n", 229 | " # Train our network using target and predicted Q values\n", 230 | " sess.run(updateModel, feed_dict={inputs:np.identity(16)[s:s+1], nextQ:targetQ})\n", 231 | " rAll += r\n", 232 | " s = s1\n", 233 | " if d == True:\n", 234 | " # Reduce chance of random action as we train the model.\n", 235 | " e = 1 / ((i // 50) + 10)\n", 236 | " break\n", 237 | " jList.append(j)\n", 238 | " rList.append(rAll)\n", 239 | "print(\"Percent of succesful episodes: %f%%\" % (sum(rList) / num_episodes))" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "We now display the evolution of the reward with each episode" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "from scipy.signal import lfilter\n", 256 | "\n", 257 | "plt.plot(lfilter(np.ones(20)/20, [1], rList))\n", 258 | "save_png(\"reward\")" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "We can also see that the survival increases, even if we take suoptimal paths:" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "plt.plot(jList)\n", 275 | "save_png(\"length\")" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "## Atari games" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "The code here was inspired by several tutorials and courses online:\n", 290 | "* https://becominghuman.ai/lets-build-an-atari-ai-part-1-dqn-df57e8ff3b26\n", 291 | "* https://github.com/tokb23/dqn\n", 292 | "* https://github.com/dennybritz/reinforcement-learning/blob/master/DQN/dqn.py" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "We can now design a a network that can tackle more or less any of the Atari games available on the gym plaform." 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "import gym\n", 309 | "\n", 310 | "import os\n", 311 | "import six\n", 312 | "import numpy as np\n", 313 | "import tensorflow as tf\n", 314 | "import random\n", 315 | "import itertools\n", 316 | "from collections import deque, namedtuple\n", 317 | "\n", 318 | "CHART_DIR = \"charts\"\n", 319 | "if not os.path.exists(CHART_DIR):\n", 320 | " os.mkdir(CHART_DIR)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "We need a few helper function, one to preprocess our images and shrink them and two others that will transpose the data. The reason is that we use the past images as additional channels, so the axis order is wrong." 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "metadata": {}, 334 | "outputs": [], 335 | "source": [ 336 | "def to_grayscale(img):\n", 337 | " return np.mean(img, axis=2).astype(np.uint8)\n", 338 | "\n", 339 | "def downsample(img):\n", 340 | " return img[::2, ::2]\n", 341 | "\n", 342 | "def preprocess(img):\n", 343 | " return to_grayscale(downsample(img))[None,:,:]\n", 344 | "\n", 345 | "def adapt_state(state):\n", 346 | " return [np.float32(np.transpose(state, (2, 1, 0)) / 255.0)]\n", 347 | "\n", 348 | "def adapt_batch_state(state):\n", 349 | " return np.transpose(np.array(state), (0, 3, 2, 1)) / 255.0\n", 350 | "\n", 351 | "def get_initial_state(frame):\n", 352 | " processed_frame = preprocess(frame)\n", 353 | " state = [processed_frame for _ in range(state_length)]\n", 354 | " return np.concatenate(state)" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "We add a bunch of hyperparameters and constants" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": null, 367 | "metadata": {}, 368 | "outputs": [], 369 | "source": [ 370 | "\n", 371 | "env_name = \"Breakout-v4\"\n", 372 | "\n", 373 | "width = 80 # Resized frame width\n", 374 | "height = 105 # Resized frame height\n", 375 | "\n", 376 | "n_episodes = 12000 # Number of runs for the agent\n", 377 | "state_length = 4 # Number of most frames we input to the network\n", 378 | "\n", 379 | "gamma = 0.99 # Discount factor\n", 380 | "\n", 381 | "exploration_steps = 1000000 # During all these steps, we progressively lower epsilon\n", 382 | "initial_epsilon = 1.0 # Initial value of epsilon in epsilon-greedy\n", 383 | "final_epsilon = 0.1 # Final value of epsilon in epsilon-greedy\n", 384 | "\n", 385 | "initial_random_search = 20000 # Number of steps to populate the replay memory before training starts\n", 386 | "replay_memory_size = 400000 # Number of states we keep for training\n", 387 | "batch_size = 32 # Batch size\n", 388 | "network_update_interval = 10000 # The frequency with which the target network is updated\n", 389 | "train_skips = 4 # The agent selects 4 actions between successive updates\n", 390 | "\n", 391 | "learning_rate = 0.00025 # Learning rate used by RMSProp\n", 392 | "momentum = 0.95 # momentum used by RMSProp\n", 393 | "min_gradient = 0.01 # Constant added to the squared gradient in the denominator of the RMSProp update\n", 394 | "\n", 395 | "network_path = 'saved_networks/' + env_name\n", 396 | "tensorboard_path = 'summary/' + env_name\n", 397 | "save_interval = 300000 # The frequency with which the network is saved" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "We use a class to train, save and restore our network. We will use one instance for the Q network and another one for the target network.\n", 405 | "get_trained_action() will be the method used to get a new action from the network." 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [ 414 | "class Estimator():\n", 415 | " \"\"\"Q-Value Estimator neural network.\n", 416 | " This network is used for both the Q-Network and the Target Network.\n", 417 | " \"\"\"\n", 418 | "\n", 419 | " def __init__(self, env, scope=\"estimator\", summaries_dir=None):\n", 420 | " self.scope = scope\n", 421 | " self.num_actions = env.action_space.n\n", 422 | " self.epsilon = initial_epsilon\n", 423 | " self.epsilon_step = (initial_epsilon - final_epsilon) / exploration_steps\n", 424 | " \n", 425 | " # Writes Tensorboard summaries to disk\n", 426 | " self.summary_writer = None\n", 427 | " with tf.variable_scope(scope):\n", 428 | " # Build the graph\n", 429 | " self.build_model()\n", 430 | " if summaries_dir:\n", 431 | " summary_dir = os.path.join(summaries_dir, \"summaries_%s\" % scope)\n", 432 | " if not os.path.exists(summary_dir):\n", 433 | " os.makedirs(summary_dir)\n", 434 | " self.summary_writer = tf.summary.FileWriter(summary_dir)\n", 435 | "\n", 436 | " def build_model(self):\n", 437 | " \"\"\"\n", 438 | " Builds the Tensorflow graph.\n", 439 | " \"\"\"\n", 440 | " self.X = tf.placeholder(shape=[None, width, height, state_length], dtype=tf.float32, name=\"X\")\n", 441 | " # The TD target value\n", 442 | " self.y = tf.placeholder(shape=[None], dtype=tf.float32, name=\"y\")\n", 443 | " # Integer id of which action was selected\n", 444 | " self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name=\"actions\")\n", 445 | "\n", 446 | " model = tf.keras.Sequential(self.scope)\n", 447 | " model.add(tf.keras.layers.Convolution2D(filters=32, kernel_size=8, strides=(4, 4), activation='relu', input_shape=(width, height, state_length), name=\"Layer1\"))\n", 448 | " model.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=4, strides=(2, 2), activation='relu', name=\"Layer2\"))\n", 449 | " model.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=3, strides=(1, 1), activation='relu', name=\"Layer3\"))\n", 450 | " model.add(tf.keras.layers.Flatten(name=\"Flatten\"))\n", 451 | " model.add(tf.keras.layers.Dense(512, activation='relu', name=\"Layer4\"))\n", 452 | " model.add(tf.keras.layers.Dense(self.num_actions, name=\"Output\"))\n", 453 | "\n", 454 | " self.predictions = model(self.X)\n", 455 | "\n", 456 | " a_one_hot = tf.one_hot(self.actions, self.num_actions, 1.0, 0.0)\n", 457 | " q_value = tf.reduce_sum(tf.multiply(self.predictions, a_one_hot), reduction_indices=1)\n", 458 | " \n", 459 | " # Calculate the loss\n", 460 | " self.losses = tf.squared_difference(self.y, q_value)\n", 461 | " self.loss = tf.reduce_mean(self.losses)\n", 462 | "\n", 463 | " # Optimizer Parameters from original paper\n", 464 | " self.optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum, epsilon=min_gradient)\n", 465 | " self.train_op = self.optimizer.minimize(self.loss, global_step=tf.train.get_global_step())\n", 466 | "\n", 467 | " # Summaries for Tensorboard\n", 468 | " self.summaries = tf.summary.merge([\n", 469 | " tf.summary.scalar(\"loss\", self.loss),\n", 470 | " tf.summary.histogram(\"loss_hist\", self.losses),\n", 471 | " tf.summary.histogram(\"q_values_hist\", self.predictions),\n", 472 | " tf.summary.scalar(\"max_q_value\", tf.reduce_max(self.predictions))\n", 473 | " ])\n", 474 | "\n", 475 | "\n", 476 | " def predict(self, sess, s):\n", 477 | " return sess.run(self.predictions, { self.X: s })\n", 478 | "\n", 479 | " def update(self, sess, s, a, y):\n", 480 | " feed_dict = { self.X: s, self.y: y, self.actions: a }\n", 481 | " summaries, global_step, _, loss = sess.run(\n", 482 | " [self.summaries, tf.train.get_global_step(), self.train_op, self.loss],\n", 483 | " feed_dict)\n", 484 | " if self.summary_writer:\n", 485 | " self.summary_writer.add_summary(summaries, global_step)\n", 486 | " return loss\n", 487 | "\n", 488 | " def get_action(self, sess, state):\n", 489 | " if self.epsilon >= random.random():\n", 490 | " action = random.randrange(self.num_actions)\n", 491 | " else:\n", 492 | " action = np.argmax(self.predict(sess, adapt_state(state)))\n", 493 | "\n", 494 | " # Decay epsilon over time\n", 495 | " if self.epsilon > final_epsilon:\n", 496 | " self.epsilon -= self.epsilon_step\n", 497 | "\n", 498 | " return action\n", 499 | "\n", 500 | " def get_trained_action(self, state):\n", 501 | " action = np.argmax(self.predict(sess, adapt_state(state)))\n", 502 | " return action" 503 | ] 504 | }, 505 | { 506 | "cell_type": "markdown", 507 | "metadata": {}, 508 | "source": [ 509 | "We create also a function to copy parameters from one network to the other, a function to create an initial clean state as well as a function to create the summary reports for scalar by episode outputs." 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": null, 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [ 518 | "def copy_model_parameters(estimator1, estimator2):\n", 519 | " \"\"\"\n", 520 | " Copies the model parameters of one estimator to another.\n", 521 | " Args:\n", 522 | " estimator1: Estimator to copy the paramters from\n", 523 | " estimator2: Estimator to copy the parameters to\n", 524 | " \"\"\"\n", 525 | " e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)]\n", 526 | " e1_params = sorted(e1_params, key=lambda v: v.name)\n", 527 | " e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)]\n", 528 | " e2_params = sorted(e2_params, key=lambda v: v.name)\n", 529 | "\n", 530 | " update_ops = []\n", 531 | " for e1_v, e2_v in zip(e1_params, e2_params):\n", 532 | " op = e2_v.assign(e1_v)\n", 533 | " update_ops.append(op)\n", 534 | "\n", 535 | " return update_ops\n", 536 | "\n", 537 | "def create_memory(env):\n", 538 | " # Populate the replay memory with initial experience \n", 539 | " replay_memory = []\n", 540 | " \n", 541 | " frame = env.reset()\n", 542 | " state = get_initial_state(frame)\n", 543 | "\n", 544 | " for i in range(replay_memory_init_size):\n", 545 | " action = np.random.choice(np.arange(env.action_space.n))\n", 546 | " frame, reward, done, _ = env.step(action)\n", 547 | " \n", 548 | " next_state = np.append(state[1:, :, :], preprocess(frame), axis=0)\n", 549 | " replay_memory.append(Transition(state, action, reward, next_state, done))\n", 550 | " if done:\n", 551 | " frame = env.reset()\n", 552 | " state = get_initial_state(frame)\n", 553 | " else:\n", 554 | " state = next_state\n", 555 | " \n", 556 | " return replay_memory\n", 557 | "\n", 558 | "\n", 559 | "def setup_summary():\n", 560 | " with tf.variable_scope(\"episode\"):\n", 561 | " episode_total_reward = tf.Variable(0., name=\"EpisodeTotalReward\")\n", 562 | " tf.summary.scalar('Total Reward', episode_total_reward)\n", 563 | " episode_avg_max_q = tf.Variable(0., name=\"EpisodeAvgMaxQ\")\n", 564 | " tf.summary.scalar('Average Max Q', episode_avg_max_q)\n", 565 | " episode_duration = tf.Variable(0., name=\"EpisodeDuration\")\n", 566 | " tf.summary.scalar('Duration', episode_duration)\n", 567 | " episode_avg_loss = tf.Variable(0., name=\"EpisodeAverageLoss\")\n", 568 | " tf.summary.scalar('Average Loss', episode_avg_loss)\n", 569 | " summary_vars = [episode_total_reward, episode_avg_max_q, episode_duration, episode_avg_loss]\n", 570 | " summary_placeholders = [tf.placeholder(tf.float32) for _ in range(len(summary_vars))]\n", 571 | " update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))]\n", 572 | " summary_op = tf.summary.merge_all(scope=\"episode\")\n", 573 | " return summary_placeholders, update_ops, summary_op" 574 | ] 575 | }, 576 | { 577 | "cell_type": "markdown", 578 | "metadata": {}, 579 | "source": [ 580 | "We can now train our network (and save some final images from the trained network)" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": null, 586 | "metadata": {}, 587 | "outputs": [], 588 | "source": [ 589 | "from tqdm import tqdm\n", 590 | "\n", 591 | "env = gym.make(env_name)\n", 592 | "tf.reset_default_graph()\n", 593 | "\n", 594 | "# Create a glboal step variable\n", 595 | "global_step = tf.Variable(0, name='global_step', trainable=False)\n", 596 | "\n", 597 | "# Create estimators\n", 598 | "q_estimator = Estimator(env, scope=\"q\", summaries_dir=tensorboard_path)\n", 599 | "target_estimator = Estimator(env, scope=\"target_q\")\n", 600 | "\n", 601 | "copy_model = copy_model_parameters(q_estimator, target_estimator)\n", 602 | "\n", 603 | "summary_placeholders, update_ops, summary_op = setup_summary()\n", 604 | "\n", 605 | "# The replay memory\n", 606 | "replay_memory = create_memory(env)\n", 607 | "\n", 608 | "with tf.Session() as sess:\n", 609 | " sess.run(tf.global_variables_initializer())\n", 610 | "\n", 611 | " q_estimator.summary_writer.add_graph(sess.graph)\n", 612 | "\n", 613 | " saver = tf.train.Saver()\n", 614 | " # Load a previous checkpoint if we find one\n", 615 | " latest_checkpoint = tf.train.latest_checkpoint(network_path)\n", 616 | " if latest_checkpoint:\n", 617 | " print(\"Loading model checkpoint %s...\\n\" % latest_checkpoint)\n", 618 | " saver.restore(sess, latest_checkpoint)\n", 619 | "\n", 620 | " total_t = sess.run(tf.train.get_global_step())\n", 621 | "\n", 622 | " for episode in tqdm(range(n_episodes)):\n", 623 | " if total_t % save_interval == 0:\n", 624 | " # Save the current checkpoint\n", 625 | " saver.save(tf.get_default_session(), network_path)\n", 626 | "\n", 627 | " frame = env.reset()\n", 628 | " state = get_initial_state(frame)\n", 629 | "\n", 630 | " total_reward = 0\n", 631 | " total_loss = 0\n", 632 | " total_q_max = 0\n", 633 | "\n", 634 | " for duration in itertools.count(): \n", 635 | " # Maybe update the target estimator\n", 636 | " if total_t % network_update_interval == 0:\n", 637 | " sess.run(copy_model)\n", 638 | "\n", 639 | " action = q_estimator.get_action(sess, state)\n", 640 | " frame, reward, terminal, _ = env.step(action)\n", 641 | "\n", 642 | " processed_frame = preprocess(frame)\n", 643 | " next_state = np.append(state[1:, :, :], processed_frame, axis=0)\n", 644 | "\n", 645 | " reward = np.clip(reward, -1, 1)\n", 646 | " replay_memory.append(Transition(state, action, reward, next_state, terminal))\n", 647 | " if len(replay_memory) > replay_memory_size:\n", 648 | " replay_memory.popleft()\n", 649 | "\n", 650 | " samples = random.sample(replay_memory, batch_size)\n", 651 | " states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(np.array, zip(*samples))\n", 652 | "\n", 653 | " # Calculate q values and targets (Double DQN)\n", 654 | " adapted_state = adapt_batch_state(next_states_batch)\n", 655 | "\n", 656 | " q_values_next = q_estimator.predict(sess, adapted_state)\n", 657 | " best_actions = np.argmax(q_values_next, axis=1)\n", 658 | " q_values_next_target = target_estimator.predict(sess, adapted_state)\n", 659 | " targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * gamma * q_values_next_target[np.arange(batch_size), best_actions]\n", 660 | "\n", 661 | " # Perform gradient descent update\n", 662 | " states_batch = adapt_batch_state(states_batch)\n", 663 | " loss = q_estimator.update(sess, states_batch, action_batch, targets_batch)\n", 664 | "\n", 665 | " total_q_max += np.max(q_values_next)\n", 666 | " total_loss += loss\n", 667 | " total_t += 1\n", 668 | " total_reward += reward\n", 669 | " if terminal:\n", 670 | " break\n", 671 | "\n", 672 | " stats = [total_reward, total_q_max / duration, duration, total_loss / duration]\n", 673 | " for i in range(len(stats)):\n", 674 | " sess.run(update_ops[i], feed_dict={\n", 675 | " summary_placeholders[i]: float(stats[i])\n", 676 | " })\n", 677 | " summary_str = sess.run(summary_op, )\n", 678 | " q_estimator.summary_writer.add_summary(summary_str, episode)\n", 679 | "\n", 680 | " env.env.ale.saveScreenPNG(six.b('%s/test_image_%05i.png' % (CHART_DIR, episode)))\n", 681 | "\n", 682 | " # Save the last checkpoint\n", 683 | " saver.save(tf.get_default_session(), network_path)" 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": null, 689 | "metadata": {}, 690 | "outputs": [], 691 | "source": [] 692 | } 693 | ], 694 | "metadata": { 695 | "kernelspec": { 696 | "display_name": "Python 3", 697 | "language": "python", 698 | "name": "python3" 699 | }, 700 | "language_info": { 701 | "codemirror_mode": { 702 | "name": "ipython", 703 | "version": 3 704 | }, 705 | "file_extension": ".py", 706 | "mimetype": "text/x-python", 707 | "name": "python", 708 | "nbconvert_exporter": "python", 709 | "pygments_lexer": "ipython3", 710 | "version": "3.6.5" 711 | } 712 | }, 713 | "nbformat": 4, 714 | "nbformat_minor": 2 715 | } 716 | -------------------------------------------------------------------------------- /Chapter13/simple_breakout.py: -------------------------------------------------------------------------------- 1 | # Import the gym module 2 | import gym 3 | 4 | # Create a breakout environment 5 | env = gym.make('BreakoutDeterministic-v4') 6 | # Reset it, returns the starting frame 7 | frame = env.reset() 8 | # Render 9 | env.render() 10 | 11 | is_done = False 12 | while not is_done: 13 | # Perform a random action, returns the new frame, reward and whether the game is over 14 | frame, reward, is_done, _ = env.step(env.action_space.sample()) 15 | # Render 16 | env.render() 17 | -------------------------------------------------------------------------------- /Chapter13/tf_breakout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Built by merging different Q examples available online 4 | 5 | import gym 6 | 7 | import os 8 | import six 9 | import numpy as np 10 | import tensorflow as tf 11 | import random 12 | import itertools 13 | from collections import deque, namedtuple 14 | 15 | CHART_DIR = "charts" 16 | if not os.path.exists(CHART_DIR): 17 | os.mkdir(CHART_DIR) 18 | 19 | env_name = "Breakout-v4" 20 | 21 | width = 80 # Resized frame width 22 | height = 105 # Resized frame height 23 | 24 | n_episodes = 12000 # Number of runs for the agent 25 | state_length = 4 # Number of most frames we input to the network 26 | 27 | gamma = 0.99 # Discount factor 28 | 29 | exploration_steps = 1000000 # During all these steps, we progressively lower epsilon 30 | initial_epsilon = 1.0 # Initial value of epsilon in epsilon-greedy 31 | final_epsilon = 0.1 # Final value of epsilon in epsilon-greedy 32 | 33 | replay_memory_init_size = 1000 # Number of steps to populate the replay memory before training starts 34 | replay_memory_size = 400000 # Number of states we keep for training 35 | batch_size = 32 # Batch size 36 | network_update_interval = 10000 # The frequency with which the target network is updated 37 | train_skips = 4 # The agent selects 4 actions between successive updates 38 | 39 | learning_rate = 0.00025 # Learning rate used by RMSProp 40 | momentum = 0.95 # momentum used by RMSProp 41 | min_gradient = 0.01 # Constant added to the squared gradient in the denominator of the RMSProp update 42 | 43 | network_path = 'saved_networks/' + env_name 44 | tensorboard_path = 'summary/' + env_name 45 | save_interval = 300000 # The frequency with which the network is saved 46 | 47 | Transition = namedtuple("Transition", ["state", "action", "reward", "next_state", "done"]) 48 | 49 | def to_grayscale(img): 50 | return np.mean(img, axis=2).astype(np.uint8) 51 | 52 | def downsample(img): 53 | return img[::2, ::2] 54 | 55 | def preprocess(img): 56 | return to_grayscale(downsample(img))[None,:,:] 57 | 58 | def adapt_state(state): 59 | return [np.float32(np.transpose(state, (2, 1, 0)) / 255.0)] 60 | 61 | def adapt_batch_state(state): 62 | return np.transpose(np.array(state), (0, 3, 2, 1)) / 255.0 63 | 64 | def get_initial_state(frame): 65 | processed_frame = preprocess(frame) 66 | state = [processed_frame for _ in range(state_length)] 67 | return np.concatenate(state) 68 | 69 | class Estimator(): 70 | """Q-Value Estimator neural network. 71 | This network is used for both the Q-Network and the Target Network. 72 | """ 73 | 74 | def __init__(self, env, scope="estimator", summaries_dir=None): 75 | self.scope = scope 76 | self.num_actions = env.action_space.n 77 | self.epsilon = initial_epsilon 78 | self.epsilon_step = (initial_epsilon - final_epsilon) / exploration_steps 79 | 80 | # Writes Tensorboard summaries to disk 81 | self.summary_writer = None 82 | with tf.variable_scope(scope): 83 | # Build the graph 84 | self.build_model() 85 | if summaries_dir: 86 | summary_dir = os.path.join(summaries_dir, "summaries_%s" % scope) 87 | if not os.path.exists(summary_dir): 88 | os.makedirs(summary_dir) 89 | self.summary_writer = tf.summary.FileWriter(summary_dir) 90 | 91 | def build_model(self): 92 | """ 93 | Builds the Tensorflow graph. 94 | """ 95 | self.X = tf.placeholder(shape=[None, width, height, state_length], dtype=tf.float32, name="X") 96 | # The TD target value 97 | self.y = tf.placeholder(shape=[None], dtype=tf.float32, name="y") 98 | # Integer id of which action was selected 99 | self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name="actions") 100 | 101 | model = tf.keras.Sequential(name=self.scope) 102 | model.add(tf.keras.layers.Convolution2D(filters=32, kernel_size=8, strides=(4, 4), activation='relu', input_shape=(width, height, state_length), name="Layer1")) 103 | model.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=4, strides=(2, 2), activation='relu', name="Layer2")) 104 | model.add(tf.keras.layers.Convolution2D(filters=64, kernel_size=3, strides=(1, 1), activation='relu', name="Layer3")) 105 | model.add(tf.keras.layers.Flatten(name="Flatten")) 106 | model.add(tf.keras.layers.Dense(512, activation='relu', name="Layer4")) 107 | model.add(tf.keras.layers.Dense(self.num_actions, name="Output")) 108 | 109 | self.predictions = model(self.X) 110 | 111 | a_one_hot = tf.one_hot(self.actions, self.num_actions, 1.0, 0.0) 112 | q_value = tf.reduce_sum(tf.multiply(self.predictions, a_one_hot), reduction_indices=1) 113 | 114 | # Calculate the loss 115 | self.losses = tf.squared_difference(self.y, q_value) 116 | self.loss = tf.reduce_mean(self.losses) 117 | 118 | # Optimizer Parameters from original paper 119 | self.optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum, epsilon=min_gradient) 120 | self.train_op = self.optimizer.minimize(self.loss, global_step=tf.train.get_global_step()) 121 | 122 | # Summaries for Tensorboard 123 | self.summaries = tf.summary.merge([ 124 | tf.summary.scalar("loss", self.loss), 125 | tf.summary.histogram("loss_hist", self.losses), 126 | tf.summary.histogram("q_values_hist", self.predictions), 127 | tf.summary.scalar("max_q_value", tf.reduce_max(self.predictions)) 128 | ]) 129 | 130 | 131 | def predict(self, sess, s): 132 | return sess.run(self.predictions, { self.X: s }) 133 | 134 | def update(self, sess, s, a, y): 135 | feed_dict = { self.X: s, self.y: y, self.actions: a } 136 | summaries, global_step, _, loss = sess.run( 137 | [self.summaries, tf.train.get_global_step(), self.train_op, self.loss], 138 | feed_dict) 139 | if self.summary_writer: 140 | self.summary_writer.add_summary(summaries, global_step) 141 | return loss 142 | 143 | def get_action(self, sess, state): 144 | if self.epsilon >= random.random(): 145 | action = random.randrange(self.num_actions) 146 | else: 147 | action = np.argmax(self.predict(sess, adapt_state(state))) 148 | 149 | # Decay epsilon over time 150 | if self.epsilon > final_epsilon: 151 | self.epsilon -= self.epsilon_step 152 | 153 | return action 154 | 155 | def get_trained_action(self, state): 156 | action = np.argmax(self.predict(sess, adapt_state(state))) 157 | return action 158 | 159 | def copy_model_parameters(estimator1, estimator2): 160 | """ 161 | Copies the model parameters of one estimator to another. 162 | Args: 163 | estimator1: Estimator to copy the paramters from 164 | estimator2: Estimator to copy the parameters to 165 | """ 166 | e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)] 167 | e1_params = sorted(e1_params, key=lambda v: v.name) 168 | e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)] 169 | e2_params = sorted(e2_params, key=lambda v: v.name) 170 | 171 | update_ops = [] 172 | for e1_v, e2_v in zip(e1_params, e2_params): 173 | op = e2_v.assign(e1_v) 174 | update_ops.append(op) 175 | 176 | return update_ops 177 | 178 | def create_memory(env): 179 | # Populate the replay memory with initial experience 180 | replay_memory = [] 181 | 182 | frame = env.reset() 183 | state = get_initial_state(frame) 184 | 185 | for i in range(replay_memory_init_size): 186 | action = np.random.choice(np.arange(env.action_space.n)) 187 | frame, reward, done, _ = env.step(action) 188 | 189 | next_state = np.append(state[1:, :, :], preprocess(frame), axis=0) 190 | replay_memory.append(Transition(state, action, reward, next_state, done)) 191 | if done: 192 | frame = env.reset() 193 | state = get_initial_state(frame) 194 | else: 195 | state = next_state 196 | 197 | return replay_memory 198 | 199 | 200 | def setup_summary(): 201 | with tf.variable_scope("episode"): 202 | episode_total_reward = tf.Variable(0., name="EpisodeTotalReward") 203 | tf.summary.scalar('Total Reward', episode_total_reward) 204 | episode_avg_max_q = tf.Variable(0., name="EpisodeAvgMaxQ") 205 | tf.summary.scalar('Average Max Q', episode_avg_max_q) 206 | episode_duration = tf.Variable(0., name="EpisodeDuration") 207 | tf.summary.scalar('Duration', episode_duration) 208 | episode_avg_loss = tf.Variable(0., name="EpisodeAverageLoss") 209 | tf.summary.scalar('Average Loss', episode_avg_loss) 210 | summary_vars = [episode_total_reward, episode_avg_max_q, episode_duration, episode_avg_loss] 211 | summary_placeholders = [tf.placeholder(tf.float32) for _ in range(len(summary_vars))] 212 | update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))] 213 | summary_op = tf.summary.merge_all(scope="episode") 214 | return summary_placeholders, update_ops, summary_op 215 | 216 | 217 | if __name__ == "__main__": 218 | from tqdm import tqdm 219 | 220 | env = gym.make(env_name) 221 | tf.reset_default_graph() 222 | 223 | # Create a glboal step variable 224 | global_step = tf.Variable(0, name='global_step', trainable=False) 225 | 226 | # Create estimators 227 | q_estimator = Estimator(env, scope="q", summaries_dir=tensorboard_path) 228 | target_estimator = Estimator(env, scope="target_q") 229 | 230 | copy_model = copy_model_parameters(q_estimator, target_estimator) 231 | 232 | summary_placeholders, update_ops, summary_op = setup_summary() 233 | 234 | # The replay memory 235 | replay_memory = create_memory(env) 236 | 237 | with tf.Session() as sess: 238 | sess.run(tf.global_variables_initializer()) 239 | 240 | q_estimator.summary_writer.add_graph(sess.graph) 241 | 242 | saver = tf.train.Saver() 243 | # Load a previous checkpoint if we find one 244 | latest_checkpoint = tf.train.latest_checkpoint(network_path) 245 | if latest_checkpoint: 246 | print("Loading model checkpoint %s...\n" % latest_checkpoint) 247 | saver.restore(sess, latest_checkpoint) 248 | 249 | total_t = sess.run(tf.train.get_global_step()) 250 | 251 | for episode in tqdm(range(n_episodes)): 252 | if total_t % save_interval == 0: 253 | # Save the current checkpoint 254 | saver.save(tf.get_default_session(), network_path) 255 | 256 | frame = env.reset() 257 | state = get_initial_state(frame) 258 | 259 | total_reward = 0 260 | total_loss = 0 261 | total_q_max = 0 262 | 263 | for duration in itertools.count(): 264 | # Maybe update the target estimator 265 | if total_t % network_update_interval == 0: 266 | sess.run(copy_model) 267 | 268 | action = q_estimator.get_action(sess, state) 269 | frame, reward, terminal, _ = env.step(action) 270 | 271 | processed_frame = preprocess(frame) 272 | next_state = np.append(state[1:, :, :], processed_frame, axis=0) 273 | 274 | reward = np.clip(reward, -1, 1) 275 | replay_memory.append(Transition(state, action, reward, next_state, terminal)) 276 | if len(replay_memory) > replay_memory_size: 277 | replay_memory.popleft() 278 | 279 | samples = random.sample(replay_memory, batch_size) 280 | states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(np.array, zip(*samples)) 281 | 282 | # Calculate q values and targets (Double DQN) 283 | adapted_state = adapt_batch_state(next_states_batch) 284 | 285 | q_values_next = q_estimator.predict(sess, adapted_state) 286 | best_actions = np.argmax(q_values_next, axis=1) 287 | q_values_next_target = target_estimator.predict(sess, adapted_state) 288 | targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * gamma * q_values_next_target[np.arange(batch_size), best_actions] 289 | 290 | # Perform gradient descent update 291 | states_batch = adapt_batch_state(states_batch) 292 | loss = q_estimator.update(sess, states_batch, action_batch, targets_batch) 293 | 294 | total_q_max += np.max(q_values_next) 295 | total_loss += loss 296 | total_t += 1 297 | total_reward += reward 298 | if terminal: 299 | break 300 | 301 | stats = [total_reward, total_q_max / duration, duration, total_loss / duration] 302 | for i in range(len(stats)): 303 | sess.run(update_ops[i], feed_dict={ 304 | summary_placeholders[i]: float(stats[i]) 305 | }) 306 | summary_str = sess.run(summary_op, ) 307 | q_estimator.summary_writer.add_summary(summary_str, episode) 308 | 309 | env.env.ale.saveScreenPNG(six.b('%s/test_image_%05i.png' % (CHART_DIR, episode))) 310 | 311 | # Save the last checkpoint 312 | saver.save(tf.get_default_session(), network_path) 313 | -------------------------------------------------------------------------------- /Chapter14/README.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Chapter 12 3 | ========== 4 | 5 | Support code for *Chapter 12: Big(ger) Data* 6 | 7 | Data 8 | ---- 9 | 10 | This chapter relies only on the image dataset that is packaged with the 11 | repository at ``../SimpleImageDataset/``. 12 | 13 | Scripts 14 | ------- 15 | 16 | chapter.py 17 | Code as written in the book 18 | jugfile.py 19 | Example jugfile 20 | image-classification.py 21 | Jugfile implementation of image classification from Chapter 10 22 | 23 | setup-aws.txt 24 | Commands to setup Amazon WebServices machine 25 | run-jugfile.sh 26 | Wrapper script to run jug file on jugfile.py 27 | run-image-classification.sh 28 | Wrapper script to run jug file on image-classification.py 29 | -------------------------------------------------------------------------------- /Chapter14/chapter.py: -------------------------------------------------------------------------------- 1 | from jug import TaskGenerator 2 | from glob import glob 3 | import mahotas as mh 4 | @TaskGenerator 5 | def compute_texture(im): 6 | from features import texture 7 | imc = mh.imread(im) 8 | return texture(mh.colors.rgb2gray(imc)) 9 | 10 | @TaskGenerator 11 | def chist_file(fname): 12 | from features import chist 13 | im = mh.imread(fname) 14 | return chist(im) 15 | 16 | import numpy as np 17 | to_array = TaskGenerator(np.array) 18 | hstack = TaskGenerator(np.hstack) 19 | 20 | haralicks = [] 21 | chists = [] 22 | labels = [] 23 | 24 | # Change this variable to point to 25 | # the location of the dataset is on disk 26 | basedir = '../SimpleImageDataset/' 27 | # Use glob to get all the images 28 | images = glob('{}/*.jpg'.format(basedir)) 29 | 30 | for fname in sorted(images): 31 | haralicks.append(compute_texture(fname)) 32 | chists.append(chist_file(fname)) 33 | # The class is encoded in the filename as xxxx00.jpg 34 | labels.append(fname[:-len('00.jpg')]) 35 | 36 | haralicks = to_array(haralicks) 37 | chists = to_array(chists) 38 | labels = to_array(labels) 39 | 40 | @TaskGenerator 41 | def accuracy(features, labels): 42 | from sklearn.linear_model import LogisticRegression 43 | from sklearn.pipeline import Pipeline 44 | from sklearn.preprocessing import StandardScaler 45 | from sklearn import cross_validation 46 | 47 | clf = Pipeline([('preproc', StandardScaler()), 48 | ('classifier', LogisticRegression())]) 49 | cv = cross_validation.LeaveOneOut(len(features)) 50 | scores = cross_validation.cross_val_score( 51 | clf, features, labels, cv=cv) 52 | return scores.mean() 53 | scores_base = accuracy(haralicks, labels) 54 | scores_chist = accuracy(chists, labels) 55 | 56 | combined = hstack([chists, haralicks]) 57 | scores_combined = accuracy(combined, labels) 58 | 59 | @TaskGenerator 60 | def print_results(scores): 61 | with open('results.image.txt', 'w') as output: 62 | for k,v in scores: 63 | output.write('Accuracy [{}]: {:.1%}\n'.format( 64 | k, v.mean())) 65 | 66 | print_results([ 67 | ('base', scores_base), 68 | ('chists', scores_chist), 69 | ('combined' , scores_combined), 70 | ]) 71 | 72 | @TaskGenerator 73 | def compute_lbp(fname): 74 | from mahotas.features import lbp 75 | imc = mh.imread(fname) 76 | im = mh.colors.rgb2grey(imc) 77 | return lbp(im, radius=8, points=6) 78 | 79 | lbps = [] 80 | for fname in sorted(images): 81 | # the rest of the loop as before 82 | lbps.append(compute_lbp(fname)) 83 | lbps = to_array(lbps) 84 | 85 | scores_lbps = accuracy(lbps, labels) 86 | combined_all = hstack([chists, haralicks, lbps]) 87 | scores_combined_all = accuracy(combined_all, labels) 88 | 89 | print_results([ 90 | ('base', scores_base), 91 | ('chists', scores_chist), 92 | ('lbps', scores_lbps), 93 | ('combined' , scores_combined), 94 | ('combined_all' , scores_combined_all), 95 | ]) 96 | -------------------------------------------------------------------------------- /Chapter14/features.py: -------------------------------------------------------------------------------- 1 | # This code is supporting material for the book 2 | # Building Machine Learning Systems with Python 3 | # by Willi Richert and Luis Pedro Coelho 4 | # published by PACKT Publishing 5 | # 6 | # It is made available under the MIT License 7 | 8 | import numpy as np 9 | import mahotas as mh 10 | 11 | 12 | def edginess_sobel(image): 13 | '''Measure the "edginess" of an image 14 | 15 | image should be a 2d numpy array (an image) 16 | 17 | Returns a floating point value which is higher the "edgier" the image is. 18 | 19 | ''' 20 | edges = mh.sobel(image, just_filter=True) 21 | edges = edges.ravel() 22 | return np.sqrt(np.dot(edges, edges)) 23 | 24 | def texture(im): 25 | '''Compute features for an image 26 | 27 | Parameters 28 | ---------- 29 | im : ndarray 30 | 31 | Returns 32 | ------- 33 | fs : ndarray 34 | 1-D array of features 35 | ''' 36 | im = im.astype(np.uint8) 37 | return mh.features.haralick(im).ravel() 38 | 39 | 40 | def color_histogram(im): 41 | '''Compute color histogram of input image 42 | 43 | Parameters 44 | ---------- 45 | im : ndarray 46 | should be an RGB image 47 | 48 | Returns 49 | ------- 50 | c : ndarray 51 | 1-D array of histogram values 52 | ''' 53 | 54 | # Downsample pixel values: 55 | im = im // 64 56 | 57 | # We can also implement the following by using np.histogramdd 58 | # im = im.reshape((-1,3)) 59 | # bins = [np.arange(5), np.arange(5), np.arange(5)] 60 | # hist = np.histogramdd(im, bins=bins)[0] 61 | # hist = hist.ravel() 62 | 63 | # Separate RGB channels: 64 | r,g,b = im.transpose((2,0,1)) 65 | 66 | pixels = 1 * r + 4 * g + 16 * b 67 | hist = np.bincount(pixels.ravel(), minlength=64) 68 | hist = hist.astype(float) 69 | return np.log1p(hist) 70 | 71 | -------------------------------------------------------------------------------- /Chapter14/image-classification.py: -------------------------------------------------------------------------------- 1 | # This code is supporting material for the book 2 | # Building Machine Learning Systems with Python 3 | # by Willi Richert and Luis Pedro Coelho 4 | # published by PACKT Publishing 5 | # 6 | # It is made available under the MIT License 7 | 8 | import mahotas as mh 9 | import numpy as np 10 | from glob import glob 11 | from jug import TaskGenerator 12 | 13 | # We need to use the `features` module from chapter 10. 14 | from sys import path 15 | path.append('../ch10') 16 | 17 | 18 | # This is the jug-enabled version of the script ``figure18.py`` in Chapter 10 19 | 20 | basedir = '../SimpleImageDataset/' 21 | 22 | @TaskGenerator 23 | def compute_texture(im): 24 | '''Compute features for an image 25 | 26 | Parameters 27 | ---------- 28 | im : str 29 | filepath for image to process 30 | 31 | Returns 32 | ------- 33 | fs : ndarray 34 | 1-D array of features 35 | ''' 36 | from features import texture 37 | imc = mh.imread(im) 38 | return texture(mh.colors.rgb2grey(imc)) 39 | 40 | @TaskGenerator 41 | def chist(fname): 42 | from features import color_histogram 43 | im = mh.imread(fname) 44 | return color_histogram(im) 45 | 46 | @TaskGenerator 47 | def compute_lbp(fname): 48 | from mahotas.features import lbp 49 | imc = mh.imread(fname) 50 | im = mh.colors.rgb2grey(imc) 51 | return lbp(im, radius=8, points=6) 52 | 53 | 54 | @TaskGenerator 55 | def accuracy(features, labels): 56 | from sklearn.linear_model import LogisticRegression 57 | from sklearn.pipeline import Pipeline 58 | from sklearn.preprocessing import StandardScaler 59 | from sklearn import cross_validation 60 | # We use logistic regression because it is very fast. 61 | # Feel free to experiment with other classifiers 62 | clf = Pipeline([('preproc', StandardScaler()), 63 | ('classifier', LogisticRegression())]) 64 | cv = cross_validation.LeaveOneOut(len(features)) 65 | scores = cross_validation.cross_val_score( 66 | clf, features, labels, cv=cv) 67 | return scores.mean() 68 | 69 | 70 | @TaskGenerator 71 | def print_results(scores): 72 | with open('results.image.txt', 'w') as output: 73 | for k,v in scores: 74 | output.write('Accuracy (LOO x-val) with Logistic Regression [{0}]: {1:.1%}\n'.format( 75 | k, v.mean())) 76 | 77 | 78 | to_array = TaskGenerator(np.array) 79 | hstack = TaskGenerator(np.hstack) 80 | 81 | haralicks = [] 82 | chists = [] 83 | lbps = [] 84 | labels = [] 85 | 86 | # Use glob to get all the images 87 | images = glob('{0}/*.jpg'.format(basedir)) 88 | for fname in sorted(images): 89 | haralicks.append(compute_texture(fname)) 90 | chists.append(chist(fname)) 91 | lbps.append(compute_lbp(fname)) 92 | labels.append(fname[:-len('00.jpg')]) # The class is encoded in the filename as xxxx00.jpg 93 | 94 | haralicks = to_array(haralicks) 95 | chists = to_array(chists) 96 | lbps = to_array(lbps) 97 | labels = to_array(labels) 98 | 99 | scores_base = accuracy(haralicks, labels) 100 | scores_chist = accuracy(chists, labels) 101 | scores_lbps = accuracy(lbps, labels) 102 | 103 | combined = hstack([chists, haralicks]) 104 | scores_combined = accuracy(combined, labels) 105 | 106 | combined_all = hstack([chists, haralicks, lbps]) 107 | scores_combined_all = accuracy(combined_all, labels) 108 | 109 | print_results([ 110 | ('base', scores_base), 111 | ('chists', scores_chist), 112 | ('lbps', scores_lbps), 113 | ('combined' , scores_combined), 114 | ('combined_all' , scores_combined_all), 115 | ]) 116 | 117 | -------------------------------------------------------------------------------- /Chapter14/jugfile.py: -------------------------------------------------------------------------------- 1 | # This code is supporting material for the book 2 | # Building Machine Learning Systems with Python 3 | # by Willi Richert and Luis Pedro Coelho 4 | # published by PACKT Publishing 5 | # 6 | # It is made available under the MIT License 7 | 8 | from jug import TaskGenerator 9 | from time import sleep 10 | 11 | 12 | @TaskGenerator 13 | def double(x): 14 | sleep(4) 15 | return 2 * x 16 | 17 | 18 | @TaskGenerator 19 | def add(a, b): 20 | return a + b 21 | 22 | 23 | @TaskGenerator 24 | def print_final_result(oname, value): 25 | with open(oname, 'w') as output: 26 | output.write("Final result: {0}\n".format(value)) 27 | 28 | input = 2 29 | y = double(input) 30 | z = double(y) 31 | 32 | y2 = double(7) 33 | z2 = double(y2) 34 | print_final_result('output.txt', add(z, z2)) 35 | -------------------------------------------------------------------------------- /Chapter14/run-image-classification.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | jug execute image-classification.py 4 | -------------------------------------------------------------------------------- /Chapter14/run-jugfile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | jug execute 4 | 5 | -------------------------------------------------------------------------------- /Chapter14/setup-aws.txt: -------------------------------------------------------------------------------- 1 | sudo yum update 2 | sudo yum -y install python-devel python-pip numpy scipy python-matplotlib 3 | sudo yum -y install gcc-c++ 4 | sudo yum -y install git 5 | sudo pip-python install -U pip 6 | sudo pip install scikit-learn jug mahotas 7 | 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Building Machine Learning Systems with Python - Third edition 2 | 3 | Building Machine Learning Systems with Python - Third edition 4 | 5 | This is the code repository for [Building Machine Learning Systems with Python - Third edition](https://www.packtpub.com/big-data-and-business-intelligence/building-machine-learning-systems-python-third-edition?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781788623223), published by Packt. 6 | 7 | **Explore machine learning and deep learning techniques for building intelligent systems using scikit-learn and TensorFlow** 8 | 9 | ## What is this book about? 10 | Machine learning allows systems to learn without being explicitly programmed. Python is one of the most popular languages used to develop machine learning applications which take advantage of its extensive library support. This third edition of Building Machine Learning Systems with Python addresses recent developments in the field, by covering the most used datasets and libraries to help you build practical machine learning systems. 11 | 12 | This book covers the following exciting features: 13 | * Build a classification system that can be applied to text, image, and sound 14 | * Employ Amazon Web Services (AWS) to run analysis on the cloud 15 | * Solve problems related to regression using TensorFlow 16 | * Recommend products to users based on their past purchases 17 | * Explore the steps required to add collaborative filtering using TensorFlow 18 | 19 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/1788623223) today! 20 | 21 | https://www.packtpub.com/ 23 | 24 | ## Instructions and Navigations 25 | All of the code is organized into folders. For example, Chapter01. 26 | 27 | The code will look like the following: 28 | ``` 29 | def fetch_posts(fn): 30 | for line in open(fn, "r"): 31 | post_id, text = line.split("\t") 32 | yield int(post_id), text.strip() 33 | 34 | ``` 35 | 36 | **Following is what you need for this book:** 37 | Building Machine Learning Systems with Python is for data scientists, machine learning developers, and Python developers who want to learn how to build increasingly complex machine learning systems. You will use Python's machine learning capabilities to develop effective solutions. Prior knowledge of Python programming is expected. 38 | 39 | With the following software and hardware list you can run all code files present in the book (Chapter 1-14). 40 | 41 | ### Software and Hardware List 42 | 43 | | Chapter | Software required | OS required | 44 | | -------- | ------------------------------------------------------| -----------------------------------| 45 | | 1-14 | Python 3, NumPy, SciPy, scikit-learn (latest version) | Ubuntu/Linux, macOS or Windows | 46 | 47 | 48 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it](https://www.packtpub.com/sites/default/files/downloads/BuildingMachineLearningSystemswithPythonThirdedition_ColorImages.pdf). 49 | 50 | ### Related products 51 | * Mastering Machine Learning Algorithms [[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/mastering-machine-learning-algorithms?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781788621113) [[Amazon]](https://www.amazon.com/dp/1788621115) 52 | 53 | * Machine Learning Solutions [[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/machine-learning-solutions?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781788390040) [[Amazon]](https://www.amazon.com/dp/1788390040) 54 | 55 | ## Get to Know the Authors 56 | **Luis Pedro Coelho** 57 | is a computational biologist who analyzes DNA from microbial communities to characterize their behavior. He has also worked extensively in bioimage informatics―the application of machine learning techniques for the analysis of images of biological specimens. His main focus is on the processing and integration of large-scale datasets. He has a PhD from Carnegie Mellon University and has authored several scientific publications. In 2004, he began developing in Python and has contributed to several open source libraries. He is currently a faculty member at Fudan University in Shanghai. 58 | 59 | **Willi Richert** 60 | has a PhD in machine learning/robotics, where he has used reinforcement learning, hidden Markov models, and Bayesian networks to let heterogeneous robots learn by imitation. Now at Microsoft, he is involved in various machine learning areas, such as deep learning, active learning, or statistical machine translation. Willi started as a child with BASIC on his Commodore 128. Later, he discovered Turbo Pascal, then Java, then C++—only to finally arrive at his true love: Python. 61 | 62 | **Matthieu Brucher** 63 | is a computer scientist who specializes in high-performance computing and computational modeling and currently works for JPMorgan in their quantitative research branch. He is also the lead developer of Audio ToolKit, a library for real-time audio signal processing. He has a PhD in machine learning and signals processing from the University of Strasbourg, two Master of Science degrees—one in digital electronics and signal processing and another in automation – from the University of Paris XI and Supelec, as well as a Master of Music degree from Bath Spa University. 64 | 65 | ## Other books by the authors 66 | * [Building Machine Learning Systems with Python](https://www.packtpub.com/big-data-and-business-intelligence/building-machine-learning-systems-python?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781782161400) 67 | * [Building Machine Learning Systems with Python - Second Edition](https://www.packtpub.com/big-data-and-business-intelligence/building-machine-learning-systems-python-second-edition?utm_source=repository&utm_medium=github&utm_campaign=repository&utm_term=9781784392772) 68 | 69 | 70 | ### Suggestions and Feedback 71 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSdy7dATC6QmEL81FIUuymZ0Wy9vH1jHkvpY57OiMeKGqib_Ow/viewform) if you have any feedback or suggestions. 72 | 73 | -------------------------------------------------------------------------------- /SimpleImageDataset/building00.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building00.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building01.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building02.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building03.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building04.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building05.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building06.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building07.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building08.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building09.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building09.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building10.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building11.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building12.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building13.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building14.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building15.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building16.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building17.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building17.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building18.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building18.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building19.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building19.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building20.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building21.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building21.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building22.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building22.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building23.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building23.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building24.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building24.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building25.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building25.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building26.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building26.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building27.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building27.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building28.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building28.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/building29.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/building29.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene00.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene00.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene01.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene02.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene03.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene04.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene05.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene06.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene07.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene08.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene09.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene09.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene10.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene11.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene12.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene13.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene14.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene15.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene16.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene17.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene17.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene18.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene18.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene19.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene19.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene20.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene21.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene21.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene22.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene22.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene23.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene23.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene24.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene24.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene25.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene25.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene26.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene26.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene27.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene27.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene28.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene28.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/scene29.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/scene29.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text00.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text00.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text01.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text02.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text03.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text04.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text05.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text06.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text07.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text08.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text09.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text09.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text10.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text11.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text12.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text13.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text14.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text15.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text16.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text17.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text17.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text18.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text18.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text19.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text19.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text20.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text21.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text21.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text22.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text22.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text23.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text23.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text24.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text24.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text25.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text25.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text26.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text26.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text27.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text27.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text28.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text28.jpg -------------------------------------------------------------------------------- /SimpleImageDataset/text29.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Building-Machine-Learning-Systems-with-Python-Third-edition/41225b131f3215cc6e10ddba1dc1b27264e01ad3/SimpleImageDataset/text29.jpg -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: BMLS3 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - bottle=0.12.9=py36_0 7 | - jug=1.6.4=py_0 8 | - pyyaml=3.12=py36_1 9 | - yaml=0.1.6=0 10 | - bleach=1.5.0=py36_0 11 | - cairo=1.14.8=0 12 | - certifi=2016.2.28=py36_0 13 | - cycler=0.10.0=py36_0 14 | - dbus=1.10.20=0 15 | - decorator=4.1.2=py36_0 16 | - entrypoints=0.2.3=py36_0 17 | - expat=2.1.0=0 18 | - fontconfig=2.12.1=3 19 | - freetype=2.5.5=2 20 | - glib=2.50.2=1 21 | - graphviz=2.38.0=5 22 | - gst-plugins-base=1.8.0=0 23 | - gstreamer=1.8.0=0 24 | - harfbuzz=0.9.39=2 25 | - html5lib=0.9999999=py36_0 26 | - icu=54.1=0 27 | - ipykernel=4.6.1=py36_0 28 | - ipython=6.1.0=py36_0 29 | - ipython_genutils=0.2.0=py36_0 30 | - ipywidgets=6.0.0=py36_0 31 | - jbig=2.1=0 32 | - jedi=0.10.2=py36_2 33 | - jinja2=2.9.6=py36_0 34 | - jpeg=9b=0 35 | - jsonschema=2.6.0=py36_0 36 | - jupyter=1.0.0=py36_3 37 | - jupyter_client=5.1.0=py36_0 38 | - jupyter_console=5.2.0=py36_0 39 | - jupyter_core=4.3.0=py36_0 40 | - libffi=3.2.1=1 41 | - libgcc=5.2.0=0 42 | - libgfortran=3.0.0=1 43 | - libiconv=1.14=0 44 | - libpng=1.6.30=1 45 | - libsodium=1.0.10=0 46 | - libtiff=4.0.6=3 47 | - libtool=2.4.2=0 48 | - libxcb=1.12=1 49 | - libxml2=2.9.4=0 50 | - markupsafe=1.0=py36_0 51 | - matplotlib=2.0.2=np113py36_0 52 | - mistune=0.7.4=py36_0 53 | - mkl=2017.0.3=0 54 | - nbconvert=5.2.1=py36_0 55 | - nbformat=4.4.0=py36_0 56 | - notebook=5.0.0=py36_0 57 | - numpy=1.13.1=py36_0 58 | - openssl=1.0.2l=0 59 | - pandocfilters=1.4.2=py36_0 60 | - pango=1.40.3=1 61 | - path.py=10.3.1=py36_0 62 | - pcre=8.39=1 63 | - pexpect=4.2.1=py36_0 64 | - pickleshare=0.7.4=py36_0 65 | - pip=9.0.1=py36_1 66 | - pixman=0.34.0=0 67 | - prompt_toolkit=1.0.15=py36_0 68 | - ptyprocess=0.5.2=py36_0 69 | - pygments=2.2.0=py36_0 70 | - pyparsing=2.2.0=py36_0 71 | - pyqt=5.6.0=py36_2 72 | - python=3.6.2=0 73 | - python-dateutil=2.6.1=py36_0 74 | - python-graphviz=0.5.2=py36_0 75 | - pytz=2017.2=py36_0 76 | - pyzmq=16.0.2=py36_0 77 | - qt=5.6.2=5 78 | - qtconsole=4.3.1=py36_0 79 | - readline=6.2=2 80 | - scikit-learn=0.19.0=np113py36_0 81 | - scipy=0.19.1=np113py36_0 82 | - setuptools=36.4.0=py36_1 83 | - simplegeneric=0.8.1=py36_1 84 | - sip=4.18=py36_0 85 | - six=1.10.0=py36_0 86 | - sqlite=3.13.0=0 87 | - terminado=0.6=py36_0 88 | - testpath=0.3.1=py36_0 89 | - tk=8.5.18=0 90 | - tornado=4.5.2=py36_0 91 | - traitlets=4.3.2=py36_0 92 | - wcwidth=0.1.7=py36_0 93 | - wheel=0.29.0=py36_0 94 | - widgetsnbextension=3.0.2=py36_0 95 | - xz=5.2.3=0 96 | - zeromq=4.1.5=0 97 | - zlib=1.2.11=0 98 | - pip: 99 | - ipython-genutils==0.2.0 100 | - jupyter-client==5.1.0 101 | - jupyter-console==5.2.0 102 | - jupyter-core==4.3.0 103 | - prompt-toolkit==1.0.15 104 | prefix: /home/luispedro/.conda/envs/BMLS3 105 | 106 | --------------------------------------------------------------------------------