├── LICENSE ├── README.md ├── chapter02 ├── data │ ├── least-squares.gnumeric │ ├── winequality-red.csv │ └── winequality-white.csv ├── ed2-ch2-s1.ipynb ├── ed2-ch2-s2.ipynb ├── ed2-ch2-s3.ipynb ├── ed2-ch2-s4.ipynb ├── ed2-ch2-s5.ipynb └── ed2-ch2-s7.ipynb ├── chapter03 ├── e2-ch3-s1.ipynb ├── e2-ch3-s2.ipynb └── e2-ch3-s3.ipynb ├── chapter04 ├── e2-ch4-s2.ipynb ├── ed2-ch4-s1.ipynb ├── ed2-ch4-s10.ipynb ├── ed2-ch4-s3.ipynb ├── ed2-ch4-s4.ipynb ├── ed2-ch4-s5.ipynb ├── ed2-ch4-s6.ipynb ├── ed2-ch4-s7.ipynb ├── ed2-ch4-s8.ipynb └── ed2-ch4-s9.ipynb ├── chapter05 ├── e2-ch5-s3.ipynb ├── ed2-ch5-s1.ipynb ├── ed2-ch5-s2.ipynb ├── ed2-ch5-s4.ipynb ├── ed2-ch5-s5.ipynb ├── ed2-ch5-s6.ipynb ├── ed2-ch5-s7.ipynb ├── ed2-ch5-s8.ipynb └── winequality-red.csv ├── chapter06 ├── SMSSpamCollection ├── ed2-ch6-s0.ipynb ├── ed2-ch6-s1.ipynb ├── ed2-ch6-s2.ipynb ├── ed2-ch6-s3.ipynb ├── ed2-ch6-s4.ipynb ├── sms.csv └── train.tsv ├── chapter07 ├── ed2-ch7-s1.ipynb └── pima-indians-diabetes.data ├── chapter08 ├── ad.DOCUMENTATION ├── ad.data ├── ad.names └── ed2-ch8-s1.ipynb ├── chapter09 ├── ad.data ├── ed2-ch9-s1.ipynb ├── ed2-ch9-s2.ipynb ├── ed2-ch9-s3.ipynb └── ed2-ch9-s4.ipynb ├── chapter10 └── e2-ch10-s1.ipynb ├── chapter11 ├── ed2-ch11-s1.ipynb └── ed2-ch11-s2.ipynb ├── chapter12 ├── ed2-ch12-s1.ipynb └── ed2-ch12-s2.ipynb ├── chapter13 ├── ed2-ch13-s1.ipynb ├── ed2-ch13-s2.ipynb ├── ed2-ch13-s3.ipynb ├── ed2-ch13-s4.ipynb └── tree.jpg └── chapter14 ├── ed2-ch14-s1.ipynb ├── ed2-ch14-s2.ipynb └── ed2-ch14-s3.ipynb /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mastering Machine Learning with scikit-learn - Second Edition 2 | This is the code repository for [Mastering Machine Learning with scikit-learn - Second Edition](https://www.packtpub.com/big-data-and-business-intelligence/mastering-machine-learning-scikit-learn-second-edition?utm_source=github&utm_medium=repository&utm_campaign=9781788299879), published by [Packt](https://www.packtpub.com/?utm_source=github). It contains all the supporting project files necessary to work through the book from start to finish. 3 | ## About the Book 4 | This book examines a variety of machine learning models including k-nearest neighbors, logistic regression, naive Bayes, k-means, decision trees, and artificial neural networks. It discusses data preprocessing, hyperparameter optimization, and ensemble methods. You will build systems that classify documents, recognize images, detect ads, and more. You will learn to use scikit-learn’s API to extract features from categorical variables, text and images; evaluate model performance; and develop an intuition for how to improve your model’s performance. 5 | 6 | ## Instructions and Navigation 7 | All of the code is organized into folders. Each folder starts with a number followed by the application name. For example, Chapter02. 8 | 9 | 10 | 11 | The code will look like the following: 12 | ``` 13 | Code words in text, database table names, folder names, filenames, file extensions, 14 | pathnames, dummy URLs, user input, and Twitter handles are shown as follows: "The 15 | package is named sklearn because scikit-learn is not a valid Python package name." 16 | # In[1]: 17 | import sklearn 18 | sklearn.__version__ 19 | # Out[1]: 20 | '0.18.1' 21 | ``` 22 | 23 | The examples in this book require Python >= 2.7 or >= 3.3 and pip, the PyPA recommended tool for installing Python packages. The examples are intended to be executed in a Jupyter notebook or an IPython interpreter. Chapter 1, The Fundamentals of Machine Learning shows how to install scikit-learn 0.18.1, its dependencies, and other libraries on Ubuntu, Mac OS, and Windows. 24 | 25 | ## Related Products 26 | * [Mastering Machine Learning with scikit-learn](https://www.packtpub.com/big-data-and-business-intelligence/mastering-machine-learning-scikit-learn?utm_source=github&utm_medium=repository&utm_campaign=9781783988365) 27 | 28 | * [Learning scikit-learn: Machine Learning in Python](https://www.packtpub.com/big-data-and-business-intelligence/learning-scikit-learn-machine-learning-python?utm_source=github&utm_medium=repository&utm_campaign=9781783281930) 29 | 30 | * [Data Science and Machine Learning with Python - Hands On!](https://www.packtpub.com/big-data-and-business-intelligence/data-science-and-machine-learning-python-hands?utm_source=github&utm_medium=repository&utm_campaign=9781787280748) 31 | 32 | -------------------------------------------------------------------------------- /chapter02/data/least-squares.gnumeric: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Mastering-Machine-Learning-with-scikit-learn-Second-Edition/db39c2f407fbf515f67b368e0af5a7f042c0f0f7/chapter02/data/least-squares.gnumeric -------------------------------------------------------------------------------- /chapter02/ed2-ch2-s1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [ 12 | { 13 | "data": { 14 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHRlJREFUeJzt3XucHGWd7/HPlyRDgISbjBggkiNCNCAXQXAQddigouJy\nOYiyoiBqdFdUXqKLh909BFk26FF0lV1dEAQhiFFALl5xlhHUUSEYgQABhECAkAgkQNDNkOR3/qhn\nqotmpqd7Qnf1zHzfr1e/pi5PVf366Zr+dT1V9ZQiAjMzM4BNyg7AzMzah5OCmZnlnBTMzCznpGBm\nZjknBTMzyzkpmJlZzklhFJH0E0nHlx3HUCQtltTdwu3NkBSSJrZqm1XbP0HSr0rY7mmSvlXCdrsl\nPVwYb+nnba1Ryj+TDU3SUmB7YD3wLPAT4KSIWBMRby8ztuFExO5lxzAUSXOBV0bEcYVpvcClEdHy\nL9iNERH/trHrkDQDeACYFBHrRhhHKZ93+h/5cET8ooztj3U+UmhP74qIKcBrgf2Afy45nprK+qVu\n1ihl/L1XgyunjUXEI2RHCntA9stW0ofT8B8lrSm8Ih3en1s1fV36lYykz0n6k6RnJN0p6cihti1p\nrqQfSPpeKn+rpL0K85dKOlXSbcCzkiamaYek+RNSM8fA9hZKmp7mvUrS9ZKelLRE0jE14uiVNE/S\n7yU9LelqSdsOUXYHSdek9d4n6SNp+qHAacB7Up38UdJZwBuBgfo6d7jYJL0krf9pSb8Hdqn1+Un6\nvqTHJD0l6UZJuxfmvUTStWldN0v612JTlKR/l7QszV8o6Y1Vn82laXigCe14SQ9JelzSPxXK7i/p\nlrSeFZLOSbNuTH9Xp/ffNUj8m0m6SNIqSXcCr6uaX/y895fUJ2m1pOVpP+wolA1J/yDp3rQ/nClp\nF0m/SbEtqCp/mKRFaX2/kbRnmn4J8HLg2hT3P6bpr0/lVqfPt7uwrl5JZ0n6NfAX4BW1PrdxLyL8\naqMXsBQ4JA1PBxYDZ6bxXrLD5upl5gB3A1tWTd8b+DOwTxp/N7AD2Y+B95A1T00bIo65wHPA0cAk\n4DNUmhsG4lyUYtxskNg/C9wOzAQE7AW8BNgCWAZ8kKz5ch/gcWDWEHH0Ao+QJcYtgCvImnwAZgAB\nTEzjNwL/CUwuvPe/KbyfSwdZ94cL4zVjAy4HFqRye6S4flXjszwRmApsCnwVWFSYd3l6bQ7MStv9\nVWH+cam+JgKnAI8Bk6vfS6EOzgc2S/W8Fnh1mt8HvD8NTwFeP1jdDRH/2cBNwLbpc74DeHiIfXVf\n4PUp3hnAXcDJhbIBXA1sCeyeYuwh+4LeCrgTOD6V3QdYCRwATACOT9vatHq7aXxH4AngHWT79lvS\neGfhc34obXciaR/2a4jPvewA/Kr6QLIdfg2wGniQ7Etu4Eu3l6qkAByU/oF2q5remdb13hrbWgQc\nPsS8ucBvC+ObAMuBNxbiPHGQ2Ae+JJYMtm6yZHRT1bT/Ak4fIo5e4OzC+CygP31Z5F9s6UtrPTC1\nUHYecFHh/QyXFIaMLW3vOeBVhXn/Ro2kULWerVOsWxXWNbMw/19rrQtYBexV/V4KdbBToezvBz53\nskR5BrBd1fryuquxzfuBQwvjcxgiKQyy7MnAVYXxAN5QGF8InFoY/zLw1TT8DdIPocL8JcCbB9su\ncCpwSVX5n1FJMr3A5zf2f3O8vNx81J6OiIitI2LniPiHiPjrYIVSc8wCsp3/nsL0ScAPgMsi4vLC\n9A8UDslXk/3a3a5GHMsGBiJiA/Aw2ZHGC+YPYjrwp0Gm7wwcMBBDiuN9wMvqiYMsUU4aJO4dgCcj\n4pmqsjvWWG8jsXWSJZ/qWAaVms/OTs1nT5N9kZHiHmxdy6qW/4yku1LT02qyZFLrs3qsMPwXsqMC\ngA8BuwF3p2aqw2qso9oO1P9+d5N0XWoue5osYVbHu6Iw/NdBxgdi3hk4pepzmM7z972inYF3V5U/\nCJhWKFNrX7UCnyAcpSRtBvyQ7NfVT6pmfx14msIJakk7kzUxzAb6ImK9pEVkTTtDmV5YfhNgJ+DR\nwvxaXewuI2tzv2OQ6b+MiLfUWHbIOMjak58ja9YpTn8U2FbS1EJieDlZE89QsVZPGzI2SROAdWmb\ndxfWP5S/Aw4HDiFLCFuR/doXWbPWOrL6HEjmxbp+I/CPZJ/V4ojYIGlg2YZExL3AsenzOwr4gaSX\nUPuzG7CcShMm1H6/3wD+ABwbEc9IOpms6XEklgFnRcRZQ8wf7HO7JCI+UmOd7g66Tj5SGL0uBO6O\niC8WJ0r6KPBm4H3p1/2ALcj+Mf6cyn2QdAK7hn0lHaXs6qKTydqBf1tnfN8CzpS0qzJ7pi+j64Dd\nJL1f0qT0ep2kV9dY13GSZknaHPg88IOIWF8sEBHLgN8A8yRNTicmPwRcmoqsAGbo+VeerOD5Jx2H\njC1t70pgrqTNJc0ia+seylSy+nqC7LxBfhnpIOt6FfCBqmXXkX1WEyX9X7K2+IZJOk5SZ9oXVqfJ\nG9K6N1D7pOsC4P9I2kbSTsAnapSdSvZDZE16P38/kniT84GPSTog7TtbSHqnpKlpfvXndinwLklv\nS0dok5VddLHTRsQwbjkpjF7vBY7U8680eiNwLNk/zKOF6adFxJ1k7bZ9ZP9UrwF+Pcw2riZrZ18F\nvB84KiKeqzO+c8i+VH5O9mVxAdm5kWeAt6b4HyVr9vgC2cnYoVwCXJTKTgY+OUS5Y8nayh8FriI7\nTzFwLfv3098nJN2ahv8dOFrZ1TVfqyO2k8iaOB5L8Xy7RszfIWtueYTsJGp1Mj2J7OjhsfT+vkuW\nRCBrD/8p2VHEg8D/MPLmj0OBxZLWkL3f90bEXyPiL8BZwK9Tk8vrB1n2jLT9B8g+x0tqbOczZEdH\nz5B9qX9vhPESEbcAHwHOJdv37gNOKBSZB/xzivsz6QfB4WRXmP2ZrK4+i7/fRkTpRIzZ82iQm71K\niqOXUXiDWaMkfQF4WUS07R3rNj44k5qVQNn9EHum5pH9yZq6rio7LrOmJQVJ0yXdoOwmqcWSPpWm\nz5X0SLoKZpGkdzQrBrM2NpXsvMKzZE0tXyZrrjMrVdOajyRNI7sx6tZ0gmghcARwDLAmIr7UlA2b\nmdmINe2S1IhYTnZJG+kStbto7JpxMzNrsZacaFbWI+ONZJdAfprsSoKngVuAUyJi1SDLzCG7g5LJ\nkyfv+/KX17pEevzYsGEDm2ziU0HguihyXVS4LiruueeexyOis5Flmp4UJE0Bfkl2M8qVkrYnu/Eo\ngDPJmphOrLWOmTNnxpIlS5oa52jR29tLd3d32WG0BddFheuiwnVRIWlhROzXyDJNTafKulu4Apgf\nEVcCRMSKiFifbqY5H9i/mTGYmVn9mnn1kchuWLorIs4pTC/2R3IkL+wGwczMStLMvo/eQHYX7O2p\njx3I7jg8VtLeZM1HS4GPNjEGMzNrQDOvPvoVg3fg9eNmbdPMzDaOT9GbmVnOScHMzHJOCmZmlnNS\nMDOznJOCmZnlnBTMzCznpGBmZjknBTMzyzkpmJlZzknBzMxyTgpmZpZzUjAzs5yTgpmZ5ZwUzMws\n56RgZmY5JwUzM8s5KZiZWc5JwczMck4KZmaWc1IwM7Ock4KZmeWcFMzMLOekYGZmOScFMzPLOSmY\nmVnOScHMzHJOCmZmlnNSMDOznJOCmZnlnBTMzCznpGBmZjknBTMzyzkpmJlZzknBzMxyTgpmZpZr\nWlKQNF3SDZLulLRY0qfS9G0lXS/p3vR3m2bFYGZmjWnmkcI64JSImAW8Hvi4pFnA54CeiNgV6Enj\nZmbWBpqWFCJieUTcmoafAe4CdgQOBy5OxS4GjmhWDGZm1hhFRPM3Is0AbgT2AB6KiK3TdAGrBsar\nlpkDzAHo7Ozcd8GCBU2PczRYs2YNU6ZMKTuMtuC6qHBdVLguKg4++OCFEbFfI8s0PSlImgL8Ejgr\nIq6UtLqYBCStioia5xVmzpwZS5YsaWqco0Vvby/d3d1lh9EWXBcVrosK10WFpIaTQlOvPpI0CbgC\nmB8RV6bJKyRNS/OnASubGYOZmdWvmVcfCbgAuCsizinMugY4Pg0fD1zdrBjMzKwxE5u47jcA7wdu\nl7QoTTsNOBtYIOlDwIPAMU2MwczMGtC0pBARvwI0xOzZzdqumZmNnO9oNjOznJOCmZnlnBTMzCzn\npGBmZjknBTMzyzkpmJlZzknBzMxyTgpmZpZzUjAzs5yTgpk1VV9fH/PmzaOvr6/sUKwOzez7yMzG\nub6+PmbPnk1/fz8dHR309PTQ1dVVdlhWg48UzKxpent76e/vZ/369fT399Pb21t2SDYMJwUza5ru\n7m46OjqYMGECHR0dfvjNKODmIzNrmq6uLnp6evKnobnpqP05KZi1ib6+vjH55dnV1TWm3s9Y56Rg\n1gZ8Qtbahc8pmLUBn5C1duGkYNYGfELW2oWbj8zagE/IWrtwUjBrEz4ha+3AzUdmZpZzUjAzs5yT\ngpmZ5ZwUzMws11BSkLSNpD2bFYyZmZVr2KQgqVfSlpK2BW4Fzpd0TvNDMzOzVqvnSGGriHgaOAr4\nTkQcABzS3LDMzKwM9SSFiZKmAccA1zU5HjMzK1E9SeEM4GfAfRFxs6RXAPc2NywzMytDzTuaJU0A\npkdEfnI5Iu4H/nezAzMzs9areaQQEeuBY1sUi5mZlayevo9+Lelc4HvAswMTI+LWpkVlZmalqCcp\n7J3+fr4wLYC/efHDMTOzMg2bFCLi4FYEYmZm5aur62xJ7wR2ByYPTIuIzw+9BEi6EDgMWBkRe6Rp\nc4GPAH9OxU6LiB83HraZmTVDPXc0fxN4D/AJQMC7gZ3rWPdFwKGDTP9KROydXk4IZmZtpJ77FA6M\niA8AqyLiDKAL2G24hSLiRuDJjYzPzMxaqJ7mo7+mv3+RtAPwBDBtI7Z5kqQPALcAp0TEqsEKSZoD\nzAHo7Oz0g8yTNWvWuC4S10WF66LCdbFxFBG1C0j/AnwdmA38B9mVR9+KiH8ZduXSDOC6wjmF7YHH\n0zrOBKZFxInDrWfmzJmxZMmS4YqNCwPP8DXXRZHrosJ1USFpYUTs18gy9Vx9dGYavELSdcDkiHhq\nJAFGxIqBYUnn476UzMzaypBJQdJRNeYREVc2ujFJ0yJieRo9Erij0XWYmVnz1DpSeFeNeQHUTAqS\nvgt0A9tJehg4HeiWtHdafinw0UaCNTOz5hoyKUTEBzdmxRExWJ9JF2zMOs3MrLlqNR99utaCEeGn\nr5mZjTG1mo+mtiwKMzNrC7Waj85oZSBmZla+erq52EnSVZJWptcVknZqRXBmZtZa9XRz8W3gGmCH\n9Lo2TTMzszGmnqTQGRHfjoh16XUR0NnkuMzMrAT1JIUnJB0naUJ6HUfW/5GZmY0x9SSFE4FjgMeA\n5cDRwEbdw2BmZu2pnr6PHgT+tgWxmJlZyWrdvPZ1su4oBhURn2xKRGZmVppazUe3AAvJHsH5WuDe\n9Nob6Gh+aGZD6+vrY/78+fT19ZUditmYUuvmtYsBJP09cFBErEvj3wRuak14Zi/U19fH7NmzWbt2\nLfPnz6enp4eurq6ywzIbE+o50bwNsGVhfEqaZlaK3t5e+vv72bBhA/39/X7KltmLqJ7HcZ4N/EHS\nDYCANwFzmxmUWS3d3d10dHSwdu1aOjo6/JQtsxdRPVcffVvST4AD0qRTI+Kx5oZlNrSuri56enq4\n8MILOfHEE910ZPYiqudIgZQErm5yLGZ16+rqYu3atU4IZi+yes4pmJnZOOGkYGZmubqajyRNALYv\nlo+Ih5oVlJmZlWPYpCDpE8DpwApgQ5ocwJ5NjMvMzEpQz5HCp4CZEeGeUc3Mxrh6ziksA55qdiBm\nZla+eo4U7gd6Jf0IWDswMSLOaVpUZmZWinqSwkPp1YE7wjMzG9PquaP5jFYEYmZm5av1PIWvRsTJ\nkq5lkOcqRIQfvGNmNsbUOlK4JP39UisCMTOz8tV6nsLC9PeXrQvHzMzK5G4uzMws56RgZma5upOC\npM2bGYiZmZVv2KQg6UBJdwJ3p/G9JP1n0yMzM7OWq+dI4SvA24AnACLij2SP5DQzszGmruajiFhW\nNWl9E2IxM7OS1dPNxTJJBwIhaRJZr6l3NTcsMzMrQz1HCh8DPg7sCDwC7J3Ga5J0oaSVku4oTNtW\n0vWS7k1/txlp4GZm9uIbNilExOMR8b6I2D4iXhoRx9X5bIWLgEOrpn0O6ImIXYGeNG5mZm2inquP\nLpa0dWF8G0kXDrdcRNwIPFk1+XDg4jR8MXBEA7GamVmT1XNOYc+IWD0wEhGrJO0zwu1tHxHL0/Bj\nZM99HpSkOcAcgM7OTnp7e0e4ybFlzZo1rovEdVHhuqhwXWycepLCJpK2iYhVkJ0XqHO5miIiJL2g\n99XC/POA8wBmzpwZ3d3dG7vJMaG3txfXRcZ1UeG6qHBdbJx6vty/DPRJ+j4g4GjgrBFub4WkaRGx\nXNI0YOUI12NmZk1Qz4nm7wBHASvImnyOiohLai81pGuA49Pw8cDVI1yPmZk1Qa2H7GwZEU+n5qLH\ngMsK87aNiOqTyNXLfxfoBraT9DBwOnA2sEDSh4AHgWM2/i2YmdmLpVbz0WXAYcBCnv/kNaXxV9Ra\ncUQcO8Ss2Y0EaGZmrVPrITuHSRLw5oh4qIUxmZlZSWqeU4iIAH7UolhslOvr62PevHn09fWVHYqZ\njVA9Vx/dKul1EXFz06OxUauvr4/Zs2fT399PR0cHPT09dHV1lR2WmTWonr6PDgB+K+lPkm6TdLuk\n25odmI0uvb299Pf3s379evr7+33zkNkoVc+RwtuaHoWNet3d3XR0dORHCr55yGx0qnVJ6mSyHlJf\nCdwOXBAR61oVmI0uXV1d9PT05HeTuunIbHSqdaRwMfAccBPwdmAW2bMUzAbV1dXlZGA2ytVKCrMi\n4jUAki4Aft+akMzMrCy1TjQ/NzDgZiMzs/Gh1pHCXpKeTsMCNkvjIruFYcumR2dmZi1V647mCa0M\nxMzMylfPfQpmZjZOOCmYmVnOScHMzHJOCmZmlnNSMDOznJOCmZnlnBTMzCznpDDG+cE3ZtaIerrO\ntlHKD74xs0b5SGEM84NvzKxRTgpj2MCDbyZMmOAH35hZXdx8NIb5wTdm1ignhTHOD74xs0a4+cjM\nzHJOCmZmlnNSMDOznJOCmZnlnBTMzCznpGBmZjknBTMzyzkpmJlZzknBzMxyTgpmZpYrpZsLSUuB\nZ4D1wLqI2K+MOMzM7PnK7Pvo4Ih4vMTtm5lZFTcfmZlZThHR+o1KDwCrgAD+KyLOG6TMHGAOQGdn\n574LFixobZBtas2aNUyZMqXsMNqC66LCdVHhuqg4+OCDFzbaPF9WUtgxIh6R9FLgeuATEXHjUOVn\nzpwZS5YsaV2AbWzg2QjmuihyXVS4LiokNZwUSmk+iohH0t+VwFXA/mXEYWZmz9fypCBpC0lTB4aB\ntwJ3tDoOMzN7oTKuPtoeuErSwPYvi4iflhCHmZlVaXlSiIj7gb1avV0zMxueL0k1M7Ock4KZmeWc\nFMzMLOekYGZmOScFMzPLOSmYmVnOScHMzHJOCmZmlnNSMDOznJOCmZnlnBTMzCznpGBmZjknBTMz\nyzkpmJlZzknBzMxyTgpmZpZzUjAzs5yTgpmZ5ZwUzMws56RgZmY5JwUzM8s5KZiZWc5JwczMck4K\nZmaWc1IwM7Ock4KZmeWcFMzMLOekYGZmOScFMzPLOSmYmVnOScHMzHJOCmZmlnNSMDOznJOCmZnl\nnBTMzCxXSlKQdKikJZLuk/S5MmIwM7MXanlSkDQB+A/g7cAs4FhJs1odh5mZvVAZRwr7A/dFxP0R\n0Q9cDhxeQhxmZlZlYgnb3BFYVhh/GDigupCkOcCcNLpW0h0tiG002A54vOwg2oTrosJ1UeG6qJjZ\n6AJlJIW6RMR5wHkAkm6JiP1KDqktuC4qXBcVrosK10WFpFsaXaaM5qNHgOmF8Z3SNDMzK1kZSeFm\nYFdJ/0tSB/Be4JoS4jAzsyotbz6KiHWSTgJ+BkwALoyIxcMsdl7zIxs1XBcVrosK10WF66Ki4bpQ\nRDQjEDMzG4V8R7OZmeWcFMzMLNfWScHdYTyfpKWSbpe0aCSXmo1mki6UtLJ4v4qkbSVdL+ne9Heb\nMmNslSHqYq6kR9K+sUjSO8qMsRUkTZd0g6Q7JS2W9Kk0fdztFzXqouH9om3PKaTuMO4B3kJ2g9vN\nwLERcWepgZVI0lJgv4gYdzfmSHoTsAb4TkTskaZ9EXgyIs5OPxq2iYhTy4yzFYaoi7nAmoj4Upmx\ntZKkacC0iLhV0lRgIXAEcALjbL+oURfH0OB+0c5HCu4Ow3IRcSPwZNXkw4GL0/DFZP8EY94QdTHu\nRMTyiLg1DT8D3EXWY8K42y9q1EXD2jkpDNYdxoje5BgSwM8lLUzdgIx320fE8jT8GLB9mcG0gZMk\n3Zaal8Z8k0mRpBnAPsDvGOf7RVVdQIP7RTsnBXuhgyLitWQ9zH48NSMYEFk7aHu2hbbGN4BdgL2B\n5cCXyw2ndSRNAa4ATo6Ip4vzxtt+MUhdNLxftHNScHcYVSLikfR3JXAVWRPbeLYitaUOtKmuLDme\n0kTEiohYHxEbgPMZJ/uGpElkX4LzI+LKNHlc7heD1cVI9ot2TgruDqNA0hbpBBKStgDeCoz3nmOv\nAY5Pw8cDV5cYS6kGvgSTIxkH+4YkARcAd0XEOYVZ426/GKouRrJftO3VRwDp8qmvUukO46ySQyqN\npFeQHR1A1j3JZeOpPiR9F+gm6xZ5BXA68ENgAfBy4EHgmIgY8ydgh6iLbrImggCWAh8ttKuPSZIO\nAm4Cbgc2pMmnkbWlj6v9okZdHEuD+0VbJwUzM2utdm4+MjOzFnNSMDOznJOCmZnlnBTMzCznpGBm\nZjknBWsLktanXhwXS/qjpFMkbZLm7Sfpa03e/hGSZm3kOhqOU9KPJW09gm11S7qu0eXMhtPyx3Ga\nDeGvEbE3gKSXApcBWwKnR8QtQLO7Cj8CuA6ouxdeSRMjYt3A+EjijIgx38W1jS4+UrC2k7rxmEPW\nkZeKv4ol7S+pT9IfJP1G0sw0/QRJP0z95y+VdJKkT6dyv5W0bSq3i6Sfpk4Fb5L0KkkHAn8L/L90\ntLLLYOXS8hdJ+qak3wFfLMZdFefc1AFZr6T7JX1ysPeaYt1O0gxJd0k6Px0t/VzSZqnMKyX9Ih1B\n3Sppl7T4FEk/kHS3pPnprlYk7Svplyn2nxW6fPiksv72b5N0+Yv5mdkYEhF++VX6i6zP9+ppq8l6\nuOwGrkvTtgQmpuFDgCvS8AnAfcBUoBN4CvhYmvcVsg7CAHqAXdPwAcB/p+GLgKML265V7jpgwiDx\nFuOcC/wG2JTszuMngEmDLLM0zZ8BrAP2TtMXAMel4d8BR6bhycDmaVtPkfUJtgnQBxwETErb7Uzl\n30PWGwDAo8CmaXjrsj9zv9rz5eYjG222Ai6WtCvZrfuTCvNuiKwv+WckPQVcm6bfDuyZepA8EPh+\n+lEN2Zf289RR7vsRsb6OWH8UEWuBtZJWkiW4h2uUfyAiFqXhhcCM1N/VjhFxFUBE/E+KEeD3EfFw\nGl9EllhWA3sA16cyE8h6xwS4DZgv6YdkXYSYvYCTgrWl1NfTerIeLl9dmHUm2Zf/kcr6je8tzFtb\nGN5QGN9Atq9vAqyOdO6ihuHKPVvHW6iOZz3D/79Vl99sBOsXsDgiugYp/07gTcC7gH+S9JoonBMx\nA59TsDYkqRP4JnBuRFR3zrUVlS7UT2hkvZH1L/+ApHen7UjSXmn2M2RNT8OVa6l05POwpCNSLJtK\n2rzGIkuATkldqfwkSbunK7mmR8QNwKlk9TilyeHbKOSkYO1is4FLUoFfAD8Hzhik3BeBeZL+wMiO\ndN8HfEjSH4HFVB7xejnw2XRiepca5crwfuCTkm4jO1/wsqEKRvbo2qOBL6TYF5E1hU0ALpV0O/AH\n4GsRsbrpkduo415Szcws5yMFMzPLOSmYmVnOScHMzHJOCmZmlnNSMDOznJOCmZnlnBTMzCz3/wGr\nc/riLTZG8AAAAABJRU5ErkJggg==\n", 15 | "text/plain": [ 16 | "" 17 | ] 18 | }, 19 | "metadata": {}, 20 | "output_type": "display_data" 21 | } 22 | ], 23 | "source": [ 24 | "# \"np\" and \"plt\" are common aliases for NumPy and Matplotlib, respectively.\n", 25 | "import numpy as np\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "\n", 28 | "# X represents the features of our training data, the diameters of the pizzas.\n", 29 | "# A scikit-learn convention is to name the matrix of feature vectors X. \n", 30 | "# Uppercase letters indicate matrices, and lowercase letters indicate vectors.\n", 31 | "X = np.array([6, 8, 10, 14, 18]).reshape(-1, 1)\n", 32 | "\n", 33 | "# y is a vector representing the prices of the pizzas.\n", 34 | "y = [7, 9, 13, 17.5, 18]\n", 35 | "\n", 36 | "plt.figure()\n", 37 | "plt.title('Pizza price plotted against diameter')\n", 38 | "plt.xlabel('Diameter in inches')\n", 39 | "plt.ylabel('Price in dollars')\n", 40 | "plt.plot(X, y, 'k.')\n", 41 | "plt.axis([0, 25, 0, 25])\n", 42 | "plt.grid(True)\n", 43 | "plt.show()" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "collapsed": true, 51 | "deletable": true, 52 | "editable": true 53 | }, 54 | "outputs": [], 55 | "source": [] 56 | } 57 | ], 58 | "metadata": { 59 | "kernelspec": { 60 | "display_name": "Python 3", 61 | "language": "python", 62 | "name": "python3" 63 | }, 64 | "language_info": { 65 | "codemirror_mode": { 66 | "name": "ipython", 67 | "version": 3 68 | }, 69 | "file_extension": ".py", 70 | "mimetype": "text/x-python", 71 | "name": "python", 72 | "nbconvert_exporter": "python", 73 | "pygments_lexer": "ipython3", 74 | "version": "3.5.2" 75 | } 76 | }, 77 | "nbformat": 4, 78 | "nbformat_minor": 2 79 | } 80 | -------------------------------------------------------------------------------- /chapter02/ed2-ch2-s2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "import numpy as np\n", 14 | "from sklearn.linear_model import LinearRegression" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 4, 20 | "metadata": { 21 | "collapsed": false, 22 | "deletable": true, 23 | "editable": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "X = np.array([6, 8, 10, 14, 18]).reshape(-1, 1)\n", 28 | "y = [7, 9, 13, 17.5, 18]" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 6, 34 | "metadata": { 35 | "collapsed": false, 36 | "deletable": true, 37 | "editable": true 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "A 12\" pizza should cost: $13.68\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "# Create an instance of the estimator, LinearRegression\n", 50 | "model = LinearRegression()\n", 51 | "# Fit the model on the training data\n", 52 | "model.fit(X, y)\n", 53 | "# Predict the price of a pizza with a diameter that has never been seen before\n", 54 | "test_pizza = np.array([[12]])\n", 55 | "predicted_price = model.predict(test_pizza)[0]\n", 56 | "print('A 12\" pizza should cost: $%.2f' % predicted_price)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": { 63 | "collapsed": true 64 | }, 65 | "outputs": [], 66 | "source": [] 67 | } 68 | ], 69 | "metadata": { 70 | "kernelspec": { 71 | "display_name": "Python 2", 72 | "language": "python", 73 | "name": "python2" 74 | }, 75 | "language_info": { 76 | "codemirror_mode": { 77 | "name": "ipython", 78 | "version": 2 79 | }, 80 | "file_extension": ".py", 81 | "mimetype": "text/x-python", 82 | "name": "python", 83 | "nbconvert_exporter": "python", 84 | "pygments_lexer": "ipython2", 85 | "version": "2.7.12" 86 | } 87 | }, 88 | "nbformat": 4, 89 | "nbformat_minor": 2 90 | } 91 | -------------------------------------------------------------------------------- /chapter02/ed2-ch2-s4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "import numpy as np\n", 14 | "from sklearn.linear_model import LinearRegression" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 4, 20 | "metadata": { 21 | "collapsed": false, 22 | "deletable": true, 23 | "editable": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "X_train = np.array([6, 8, 10, 14, 18]).reshape(-1, 1)\n", 28 | "y = [7, 9, 13, 17.5, 18]" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 5, 34 | "metadata": { 35 | "collapsed": false, 36 | "deletable": true, 37 | "editable": true 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "A 12\" pizza should cost: $13.68\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "# Create an instance of the estimator, LinearRegression\n", 50 | "model = LinearRegression()\n", 51 | "# Fit the model on the training data\n", 52 | "model.fit(X, y)\n", 53 | "# Predict the price of a pizza with a diameter that has never been seen before\n", 54 | "test_pizza = np.array([[12]])\n", 55 | "predicted_price = model.predict(test_pizza)[0]\n", 56 | "print('A 12\" pizza should cost: $%.2f' % predicted_price)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 6, 62 | "metadata": { 63 | "collapsed": false, 64 | "deletable": true, 65 | "editable": true 66 | }, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "Residual sum of squares: 1.75\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "print('Residual sum of squares: %.2f' % np.mean((model.predict(X) - y) ** 2))" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": true 85 | }, 86 | "outputs": [], 87 | "source": [] 88 | } 89 | ], 90 | "metadata": { 91 | "kernelspec": { 92 | "display_name": "Python 2", 93 | "language": "python", 94 | "name": "python2" 95 | }, 96 | "language_info": { 97 | "codemirror_mode": { 98 | "name": "ipython", 99 | "version": 2 100 | }, 101 | "file_extension": ".py", 102 | "mimetype": "text/x-python", 103 | "name": "python", 104 | "nbconvert_exporter": "python", 105 | "pygments_lexer": "ipython2", 106 | "version": "2.7.12" 107 | } 108 | }, 109 | "nbformat": 4, 110 | "nbformat_minor": 2 111 | } 112 | -------------------------------------------------------------------------------- /chapter02/ed2-ch2-s5.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [ 12 | { 13 | "name": "stdout", 14 | "output_type": "stream", 15 | "text": [ 16 | "11.2\n", 17 | "23.2\n" 18 | ] 19 | } 20 | ], 21 | "source": [ 22 | "import numpy as np\n", 23 | "\n", 24 | "X = np.array([[6], [8], [10], [14], [18]]).reshape(-1, 1)\n", 25 | "x_bar = X.mean()\n", 26 | "print(x_bar)\n", 27 | "\n", 28 | "# Note that we subtract one from the number of training instances when calculating the sample variance. \n", 29 | "# This technique is called Bessel's correction. It corrects the bias in the estimation of the population variance\n", 30 | "# from a sample.\n", 31 | "variance = ((X - x_bar)**2).sum() / (X.shape[0] - 1)\n", 32 | "print(variance)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 4, 38 | "metadata": { 39 | "collapsed": false, 40 | "deletable": true, 41 | "editable": true 42 | }, 43 | "outputs": [ 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "23.2\n" 49 | ] 50 | } 51 | ], 52 | "source": [ 53 | "print(np.var(X, ddof=1))" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 8, 59 | "metadata": { 60 | "collapsed": false, 61 | "deletable": true, 62 | "editable": true 63 | }, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "22.65\n", 70 | "22.65\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "# We previously used a List to represent y.\n", 76 | "# Here we switch to a NumPy ndarray, which provides a method to calulcate the sample mean.\n", 77 | "y = np.array([7, 9, 13, 17.5, 18])\n", 78 | "\n", 79 | "y_bar = y.mean()\n", 80 | "# We transpose X because both operands must be row vectors\n", 81 | "covariance = np.multiply((X - x_bar).transpose(), y - y_bar).sum() / (X.shape[0] - 1)\n", 82 | "print(covariance)\n", 83 | "print(np.cov(X.transpose(), y)[0][1])" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "collapsed": true, 91 | "deletable": true, 92 | "editable": true 93 | }, 94 | "outputs": [], 95 | "source": [] 96 | } 97 | ], 98 | "metadata": { 99 | "kernelspec": { 100 | "display_name": "Python 3", 101 | "language": "python", 102 | "name": "python3" 103 | }, 104 | "language_info": { 105 | "codemirror_mode": { 106 | "name": "ipython", 107 | "version": 3 108 | }, 109 | "file_extension": ".py", 110 | "mimetype": "text/x-python", 111 | "name": "python", 112 | "nbconvert_exporter": "python", 113 | "pygments_lexer": "ipython3", 114 | "version": "3.5.2" 115 | } 116 | }, 117 | "nbformat": 4, 118 | "nbformat_minor": 2 119 | } 120 | -------------------------------------------------------------------------------- /chapter02/ed2-ch2-s7.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [ 12 | { 13 | "data": { 14 | "text/plain": [ 15 | "0.6620052929422553" 16 | ] 17 | }, 18 | "execution_count": 1, 19 | "metadata": {}, 20 | "output_type": "execute_result" 21 | } 22 | ], 23 | "source": [ 24 | "import numpy as np\n", 25 | "from sklearn.linear_model import LinearRegression\n", 26 | "\n", 27 | "X_train = np.array([6, 8, 10, 14, 18]).reshape(-1, 1)\n", 28 | "y_train = [7, 9, 13, 17.5, 18]\n", 29 | "\n", 30 | "X_test = np.array([8, 9, 11, 16, 12]).reshape(-1, 1)\n", 31 | "y_test = [11, 8.5, 15, 18, 11]\n", 32 | "\n", 33 | "model = LinearRegression()\n", 34 | "model.fit(X_train, y_train)\n", 35 | "r_squared = model.score(X_test, y_test)\n", 36 | "print(r_squared)" 37 | ] 38 | } 39 | ], 40 | "metadata": { 41 | "kernelspec": { 42 | "display_name": "Python 3", 43 | "language": "python", 44 | "name": "python3" 45 | }, 46 | "language_info": { 47 | "codemirror_mode": { 48 | "name": "ipython", 49 | "version": 3 50 | }, 51 | "file_extension": ".py", 52 | "mimetype": "text/x-python", 53 | "name": "python", 54 | "nbconvert_exporter": "python", 55 | "pygments_lexer": "ipython3", 56 | "version": "3.5.2" 57 | } 58 | }, 59 | "nbformat": 4, 60 | "nbformat_minor": 2 61 | } 62 | -------------------------------------------------------------------------------- /chapter03/e2-ch3-s1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8ZXV9//HXW3DQECBQMEVFYcQNF9CM4C4RpdafFVwK\nqFVQ67i0EgZry6Sj5Kchcf1N4y4oFesyIBUXqnYUR9BW0BkEBxWqhH0VmAAxlAHm8/vjfO9w5s5N\ncmeSc+89Oe/n43EfuWd/5zuZ87nne849RxGBmZlV10PaHcDMzNrLhcDMrOJcCMzMKs6FwMys4lwI\nzMwqzoXAzKziXAiso0h6jKRJSTs0Me++kkLSjq3Itr1amVPS5yS9r8l5vyRpuKAcIWn/ItZt88+F\noANJulrSS+rGHSfpZ+3K1CxJQ5K+0mB8UzuGiLg2Iroj4oGisnQSScslfb9u3O+nGXfMbOuLiHdE\nxAfnKVvbd+aSFkn6uKTr0weEqyX9SzszLUQuBGbtdQHw3NoRkKS9gYcCz6gbt3+at2qWA0uAg4Fd\ngEOBi9sZaCFyISip+k9r+cN8SYemT1D/KOlWSTdJOlLSyyX9j6Q7JA3mlj1Y0s8lTaR5PyVpUd22\n3pE+lU5I+rQkzSH7QySdJOlKSbdLOkvSHmnaFt0okvaTdIGkuyX9KG27/lP+GyRdK+k2Sf+clnsZ\nMAgcnT5JXprGHydpPK3vKklvmCbjdreJpB0kfSzlGQf+zwzN8UuyHf9BafgFwBrgirpxV0bEjWn9\nT5L0w/TveIWko3K5tujuSX8DN0m6UdLfNviUv7uk/0jtcZGkx6XlakXn0tR+R0vaU9K56fe9Q9JP\nJc20D3l5auvbJH00/bsvSss+LZfxEZKmJO3VYB3PAs6JiBsjc3VEfDm37CMl/bukP6Z/z+Nz074n\n6eO54VWSTp8hb2W5ECxcfw48DHgU8H7gNOBvgD6yHcv7JO2X5n0AWAbsCTwHOAx4V936XkH2n/Lp\nwFHAX8wh27uBI4EXAY8ENgCfnmberwG/AP4MGALe2GCe5wNPTLnfL+nJEfEDYAQ4M3U1HShpZ+AT\nwF9GxC7Ac4FLptnuXNrkbWnaM8g+zb52mm0QERuBi4AXplEvBH4K/Kxu3AUA6Xf4YWqXRwDHAJ+R\ndED9ulMxPBF4CdkRxaENIhwD/F9gd+APwCkpV23bB6b2OxN4D3A9sBfQS1ZoZ7pHzavS7/9M4Ajg\nLen3XUX2t1jzOuC8iPhjg3VcCJwo6V2Snpb/AJKK0HeBS8n+zg8DTpBU+3d4C/BGSS9OBf9gYGCG\nvNUVEX512Au4GpgEJnKvKeBnuXkC2D83/CVgOL0/FLgH2CEN75LmPyQ3/zrgyGm2fwLZp7D8tp6f\nGz4LOGmaZYeAjXXZJ/J5gd8Bh+WW2Ru4D9gR2DfNuyPwGOB+oCs371eAr6T3tXkfnZv+C+CYXJav\n5KbtnLK8Bnj4Nv6bNN0mwI+Bd+SmHV77nWZos3PS+0uBxwMvqxt3bHp/NPDTuuU/D5zc4O/gdGA0\nN9/+df8OXwK+kJv+cuDyGf7GPgB8Oz9uhvYK4GW54XeR7ewBDgGuBZSG1wJHTbOeHYC/A/4LuBe4\nMdcWhwDX1s2/HPjX3PBrgOuA2/L/Xn5t+fIRQec6MiJ6ai+2/jQ6m9vjwROu96Sft+Sm3wN0A0h6\nQjrkv1nSXWSfpPesW9/NufdTtWWncVY+e8qf91jgnNTFMEFWGB4g+5SZ90jgjoiYyo27rsH2msoW\nEX8i25G+A7gpdYk8qdG8c2yTR9blvKbRNnIuAJ6fusf2iojfA/9Ndu5gD+CpPHh+4LHAIbW2S+33\nBrIjwHr1Oba77ZKPkh01rE5dPifN8nvVt8EjASLiorStQ1P77w98p9EKIuKBiPh0RDwP6CE7Yjld\n0pPJ2uKRdW0xyJZ/R98lKyZXRETHX2zRLi4E5TUFdOWGG+0ImvVZ4HLg8RGxK9l/pu0+B9CE68i6\nZ/LF4mERcUPdfDcBe0jK/577bMN2tuq2iIj/jIiXkh2FXE7WZdbIXNrkprqcj5ll/p8Du5F1Kf1X\nynkX2afftwE3RsRVad7rgPPr2q47It45TY5H54a3pe22EhF3R8R7ImIx8EqyLpvDZlikvg1uzA2f\nQdY99Ebg7Ij43ya2f09EfJqsK/EAsra4qq4tdomIl+cWO4Xsg8bekl7XzO9ZRS4E5XUJ8Pp0YvJl\nZP3t22sX4C5gMn1Ca7RTmU+fA06R9FgASXtJOqJ+poi4hqzbYCidZHwO8FfbsJ1bgH1rJzQl9Uo6\nIvWz30vW/bZpmmXn0iZnAcdLerSk3YEZPzlHxD1kv+eJZOcHan6WxuWvFjoXeIKkN0p6aHo9K31C\nbpTjzZKenIppU98vyLkFWFwbkPQKSfunfvo7yY7ipms/gPdK2l3SPmR982fmpn2F7BzC3wBfbrRw\n2uYJyi5+eLikHSUdS/Zv8yuybsC7Jf1Tmr6DpKdKelZa9oXAm4E3AccCn5T0qG1sg0pwISivAbKd\nYq1r4FtzWNc/AK8H7ib7hHzmzLPP2RhZV8BqSXeTnRA8ZJp530B2svZ2YDhlu7fJ7Xwj/bxd0sVk\nf+8nkn0yvYOseE63g59Lm5wG/CdZ3/7FwDebWOZ8spO/+e6Ln6ZxmwtBRNxNds7hmPR73Ax8GNip\nfoUR8X2yk+NryLp0LkyTmm2/IeCM1O1yFNm5ix+RFdCfA5+JiDUzLP9tsnNRlwD/AXwxl+06srYJ\ntix+9aaAj5P9nreRnS94TUSMp67PV5BdXXVVmv4FYDdJu5IVmL+PiBsi4qdp+/+aP+FsmdrJGrNS\nkHQm2QnNk9udpWzSUcNlwE4RcX8H5DmdrNtrRbuzVJ2PCKyjpW6Px6Vr0F9GdhniXI5+KkXSqyTt\nlLqoPgx8t0OKwL7Aq8kdJVj7uBBYp/tz4Cdk3RGfAN4ZEb9qa6JyeTtwK3AlWZ9+0ed/ZiXpg2RH\nJh/NnQS3NnLXkJlZxfmIwMys4jr69r01e+65Z+y7777zsq4//elP7LzzzvOyrqKUISOUI2cZMkI5\ncpYhI5QjZ6syrlu37raIaHQPpy21+6vNzbz6+vpivqxZs2be1lWUMmSMKEfOMmSMKEfOMmSMKEfO\nVmUE1oZvMWFmZrNxITAzqzgXAjOzinMhMDOrOBcCW5Ci7vsx9cNm9iAXAltwhoaGWLZs2eadf0Sw\nbNkyhoaG2hvMrEO5ENiCEhFMTEwwNjbGsmXLAFi2bBljY2NMTEz4yMCsgVJ8ocysWZJYuXIlAGNj\nY+yzzz6MjY0xMDDAypUr8R2IzbbmIwJbcPLFoMZFwGx6LgS24NTOCeTlzxmY2ZYKLQSSlkn6jaTL\nJH1d0sMkfUnSVZIuSa+Disxg1VIrArXuoL6+PgYGBjafM3AxMNtaYecI0rNBjwcOiIh7JJ1F9ng9\ngPdGxNlFbduqSxI9PT2bzwmcf/75m7uJenp63D1k1kDRJ4t3BB4u6T6gi+wZq2aFGhoaIiI27/Rr\n5wxcBMwaK6xrKCJuAD4GXAvcBNwZEavT5FMk/VrSSklbPXTbbK7qd/ouAmbTK+wJZekZqf8OHA1M\nAN8AzgbOA24GFgGnAldGxAcaLL8UWArQ29vbt2rVqnnJNTk5SXd397ysqyhlyAjlyFmGjFCOnGXI\nCOXI2aqM/f396yJiyawzNnOv6u15AX8NfDE3/CbgM3XzHAqcO9u6/DyCzlSGnGXIGFGOnEVm3LRp\n04zD26LqbZlHBzyP4Frg2ZK6lB2XHwb8TtLeAGnckWQPsTazivItQdqvyHMEF5F1BV0MrE/bOhX4\nqqT1adyewHBRGcyss0XdLUFqRcC3BGmtQq8aioiTgZPrRr+4yG2aWXnU3xJkbGwMwLcEaTF/s9jM\n2sq3BGk/FwIza6tad1CevwXeWi4EZtY2+XMCAwMDbNq0ybcEaQPfhtrM2qb+liD5biLfEqR1XAjM\nrK18S5D2c9eQmbWdbwnSXi4EZmYV50JgZlZxLgRmZhXnQmBmVnEuBGZmFedCYGZWcS4EZmYV50Jg\nZlZxLgRmZhXnQmBmVnEuBGZmFedCYGbWIepvu92q23AXWggkLZP0G0mXSfq6pIdJ2k/SRZL+IOlM\nSYuKzGBmVgZDQ0NbPIOh9qyGoaGhwrddWCGQ9CjgeGBJRDwV2AE4BvgwsDIi9gc2AG8tKoOZWRlE\nBBMTE1s8kKf2wJ6JiYnCjwyKfh7BjsDDJd0HdAE3kT28/vVp+hnAEPDZgnOYmXWs/AN5xsbGGBsb\nA9jigT2Fbr/ISiNpADgFuAdYDQwAF6ajASTtA3w/HTHUL7sUWArQ29vbt2rVqnnJNDk5SXd397ys\nqyhlyAjlyFmGjFCOnGXICOXIOVPGdevWbX7f19c3p+309/evi4gls84YEYW8gN2BHwN7AQ8FvgX8\nDfCH3Dz7AJfNtq6+vr6YL2vWrJm3dRWlDBkjypGzDBkjypGzDBkjypGzUcZNmzbFwMBAAJtfAwMD\nsWnTpu3eDrA2mthfF3my+CXAVRHxx4i4D/gm8DygR1KtS+rRwA0FZjAz63iROycwMDDApk2bGBgY\n2OKcQZGKPEdwLfBsSV1kXUOHAWuBNcBrgVXAscC3C8xgZtbxJNHT07PFOYHaOYOenp7CzxEUVggi\n4iJJZwMXA/cDvwJOBf4DWCVpOI37YlEZzMzKYmhoiIjYvNOvFYNWPL+50KuGIuJk4OS60ePAwUVu\n18ysjOp3+q0oAuBvFpuZVZ4LgZlZxbkQmJlVnAuBmVnFuRCYmVWcC4GZWcW5EJiZVZwLgZlZxbkQ\nmJlVnAuBmVnFuRCYmVWcC4GZWcW5EJiZVZwLgZlZxbkQmJlVnAuBmVnFuRCYmVVcYU8ok/RE4Mzc\nqMXA+4Ee4G3AH9P4wYj4XlE5zMxsZkU+s/gK4CAASTsANwDnAG8GVkbEx4ratpmZNa9VXUOHAVdG\nxDUt2p6ZmTVJEVH8RqTTgYsj4lOShoDjgLuAtcB7ImJDg2WWAksBent7+1atWjUvWSYnJ+nu7p6X\ndRWlDBmhHDnLkBHKkbMMGaEcOVuVsb+/f11ELJl1xogo9AUsAm4DetNwL7AD2dHIKcDps62jr68v\n5suaNWvmbV1FKUPGiHLkLEPGiHLkLEPGiHLkbFVGYG00sZ9uRdfQX5IdDdySCs8tEfFARGwCTgMO\nbkEGMzObRisKweuAr9cGJO2dm/Yq4LIWZDAzs2kUdtUQgKSdgZcCb8+N/oikg4AArq6bZmZmLVZo\nIYiIPwF/VjfujUVu08zMto2/WWxmVnEuBGZmFedCYGZWcS4EZmYV50JgZlZxs141JOmZDUbfCVwT\nEffPfyQzM2ulZi4f/QzwTODXgICnAr8BdpP0zohYXWA+MzMrWDNdQzcCz4iIJRHRBzwDGCf7othH\nigxnZmbFa6YQPCEiflMbiIjfAk+KiPHiYpmZWas00zX0G0mfBWr3gT4a+K2knYD7CktmZmYt0cwR\nwXHAH4AT0ms8jbsP6C8qmJmZtUYzRwQHRMTHgY/XRkh6RUScC0wWlszMzFqimSOC0yQ9tTYg6Rjg\nfcVFMjOzVmrmiOC1wNmSXg+8AHgTcHihqczMrGVmLQQRMZ6OAr4FXAscHhH3FJ7MzMxaYtpCIGk9\n2cNjavYge9bwRZKIiKcXHc7MzIo30xHBK1qWwszM2mbaQhAR18xlxZKeCJyZG7UYeD/w5TR+X7JH\nVR4VERvmsi0zM9t+hd19NCKuiIiDIuIgoA+YAs4BTgLOi4jHA+elYTMza5NW3Yb6MODKdJRxBHBG\nGn8GcGSLMpiZWQOtKgTHAF9P73sj4qb0/magt0UZzMysAUXEzDNIrwY+DDyC7DbUAiIidm1qA9Ii\nsjuYPiUibpE0ERE9uekbImL3BsstBZYC9Pb29q1atap+lu0yOTlJd3f3vKyrKGXICOXIWYaMUI6c\nZcgI5cjZqoz9/f3rImLJrDNGxIwvsvsMPXm2+WZY/ghgdW74CmDv9H5v4IrZ1tHX1xfzZc2aNfO2\nrqKUIWNEOXKWIWNEOXKWIWNEOXK2KiOwNprYTzfTNXRLRPxuu8pR5nU82C0E8B3g2PT+WODbc1i3\nmZnNUTO3mFgr6UyybxbfWxsZEd+cbUFJO5M9wObtudEfAs6S9FbgGuCobUpsZmbzqplCsCvZpZ/5\n+wsFMGshiIg/AX9WN+52squIzMysAzRzr6E3tyKImZm1x0z3GvrHiPiIpE+y5T2HAIiI4wtNZmZm\nLTHTEUHtBPHaVgQxM7P2mOleQ99NP8+Ybh4zMyu/Vn2z2MzMOpQLgS1oGzdubHeEBcNtuXDNWggk\nPa+ZcWadZnR0lPXr1zM6OtruKKXntlzYmjki+GST48w6xujoKMPDwwAMDw97BzYHbsuFb6bLR58D\nPBfYS9KJuUm7kj2y0qwj1XZcU1NTAExNTW3ekS1fvryd0UrHbVkNM10+ugjoTvPskht/F/DaIkOZ\nba/6HVeNd2Dbzm1ZHTNdPno+cL6kL8UcH1tp1grj4+MMDg5OO31qaorBwUGOPvpoFi9e3MJk5eO2\nrJZmzhHsJOlUSasl/bj2KjyZ2TZavHgxIyMjdHV1NZze1dXFyMiId1xNcFtWSzM3nfsG8DngC8AD\nxcYxm5taV0V9l0ZXVxcrVqxwV8Y2cFtWRzOF4P6I+GzhSczmSX4HBt5xzYXbshpmumpoj/T2u5Le\nBZzDls8juKPgbGbbLb+j8o5rbtyWC99MRwTryO46qjT83ty0ANw5aB1t+fLlrF69msMPP3z2mW1G\nbsuFbaarhvZrZRCzIixatKjdERYMt+XCNes5AkmvbjD6TmB9RNw6/5HMzKyVmjlZ/FbgOcCaNHwo\nWbfRfpI+EBH/Nt2CknrIrjZ6Kll30luAvwDeBvwxzTYYEd/brvRmZjZnzRSCHYEnR8QtAJJ6gS8D\nhwAXANMWAmAM+EFEvFbSIqCLrBCsjIiPzSm5mZnNi2YKwT61IpDcmsbdIem+6RaStBvwQuA4gIjY\nCGyUNN0iZmbWBorY6nHEW84gfQZ4DNkXywBeA1xPdhXRuRHRP81yBwGnAr8FDiTrThpIyx1Hds+i\ntcB7ImJDg+WXAksBent7+1atWrWNv1pjk5OTdHd3z8u6ilKGjFCOnGXICOXIWYaMUI6crcrY39+/\nLiKWzDpjRMz4Irt89LXAyvR6LamAzLLcEuB+4JA0PAZ8EOglu3vpQ4BTgNNnW1dfX1/MlzVr1szb\nuupt2rRpxuFmFZlxPpUhZxkyRpQjZxkyRpQjZ6syAmtjlv1rRMzeNZRWdnZ6bYvrgesj4qI0fDZw\nUuS6mSSdBpy7jevtSENDQ0xMTLBy5UokEREsW7aMnp4ehoaG2h3PzGxa0950TtLP0s+7Jd2Ve90t\n6a7ZVhwRNwPXSXpiGnUY8FtJe+dmexVw2Rzyd4SIYGJigrGxMZYtW7a5CIyNjTExMVE7QjIz60gz\nfaHs+ennLtPN04R3A19NVwyNA28GPpHOHwRwNfD2Oay/I0hi5cqVAIyNjTE2NgbAwMDA5iMEM7NO\n1dTD6yU9X9Kb0/s9JTX1reOIuCQilkTE0yPiyIjYEBFvjIinpXGvjIib5vILdIp8MahxETCzMmjm\n4fUnA/8E1O40tQj4SpGhyqjWHZRX6yYyM+tkzRwRvAp4JfAngIi4kS0fXVl5+XMCAwMDbNq0iYGB\ngS3OGZiZdapmvlC2MSJCUnYtqbRzwZlKRxI9PT1bnBOodRP19PS4e8jMOlozheAsSZ8HeiS9jex+\nQacVG6t8hoaGiIjNO/1aMXARMLNO18z3CD4m6aVk3wR+IvD+iPhh4clKqH6n7yJgZmUw0xPKTgD+\nG7g47fi98zczW4BmOiJ4NPAvwJMkrQf+i6ww/Hf4MZVmZgvGTF8o+weA9GWwJcBzyb4QdqqkiYg4\noDURzcysSM2cLH44sCuwW3rdCKwvMpSZmbXOTOcITgWeAtwNXETWLfT/osEto83MrLxm+kLZY4Cd\ngJuBG8juJjrRilBmZtY6M50jeJmy6x+fQnZ+4D3AUyXdAfw8Ik5uUUYzMyvQjOcI0rMILpM0AdyZ\nXq8ADgZcCMzMFoCZzhEcT3Yk8FzgPtKlo8Dp+GSxmdmCMdMRwb5kzyletlBuFW1mZlub6RzBia0M\nYmZm7dHUg2nMzGzhKrQQSOqRdLakyyX9TtJzJO0h6YeSfp9+7l5kBjMzm1nRRwRjwA8i4knAgcDv\ngJOA8yLi8cB5adjMzNqksEIgaTfghcAXASJiY0RMAEcAZ6TZzgCOLCqDmZnNTkU9RlHSQcCpwG/J\njgbWAQPADRHRk+YRsKE2XLf8UmApQG9vb9+qVavmJdfk5CTd3d3zsq6ilCEjlCNnGTJCOXKWISOU\nI2erMvb396+LiCWzzhgRhbzI7lh6P3BIGh4DPghM1M23YbZ19fX1xXxZs2bNvK2rKK3IeOWVV855\nHW7L+VOGnGXIGFGOnK3KCKyNJvbXRZ4juB64PiIuSsNnA88EbpG0N0D6eWuBGayB0dFRHve4xzE6\nOtruKGbWAQorBBFxM3CdpCemUYeRdRN9Bzg2jTsW+HZRGWxro6OjDA8PAzA8POxiYGZNPY9gLt4N\nfDU93Gac7ME2DwHOkvRW4BrgqIIzWFIrAlNTUwBMTU1tLgrLly9vZzQza6NCC0FEXEJ2rqDeYUVu\n17ZWXwRqXAzMzN8sroDx8XEGBwe3KgI1U1NTDA4OMj4+3uJkZtYJXAgqYPHixYyMjNDV1dVweldX\nFyMjIyxevLjFycysE7gQVMTy5ctZsWLFVsWgq6uLFStWuFvIrMJcCCqkvhi4CJgZFH/VkHWY2k5/\ncHDQRcDMABeCSlq+fDlHH320zwmYGeCuocpyETCzGhcCM7OKcyEwM6s4FwIzs4pzITAzqzgXAjOz\ninMhMDOrOBcCM7OKcyEwM6s4FwIzs4pzITAzqzgXAjOziiu0EEi6WtJ6SZdIWpvGDUm6IY27RNLL\ni8xgZmYza8XdR/sj4ra6cSsj4mMt2LaZmc3CXUNmZhWniChu5dJVwAYggM9HxKmShoDjgLuAtcB7\nImJDg2WXAksBent7+1atWjUvmSYnJ+nu7p6XdU1n48aNLFq0aLuXb0XG+VCGnGXICOXIWYaMUI6c\nrcrY39+/LiKWzDpjRBT2Ah6Vfj4CuBR4IdAL7EB2NHIKcPps6+nr64v5smbNmnlbVyMjIyMBxMjI\nyHavo+iM86UMOcuQMaIcOcuQMaIcOVuVEVgbTeyrC+0aiogb0s9bgXOAgyPiloh4ICI2AacBBxeZ\noZVGR0cZHh4GYHh4mNHR0TYnMjObXWGFQNLOknapvQcOBy6TtHdutlcBlxWVoZVqRWBqagqAqakp\nFwMzK4UirxrqBc6RVNvO1yLiB5L+TdJBZOcNrgbeXmCGlqgvAjW1YgD4IfFm1rEKKwQRMQ4c2GD8\nG4vaZjuMj48zODg47fSpqSkGBwf9sHgz61i+fHSOFi9ezMjICF1dXQ2nd3V1MTIy4iJgZh3LhWAe\nLF++nBUrVmxVDLq6ulixYoW7hcyso7kQzJP6YuAiYGZl0YpbTFRGbac/ODjoImBmpeFCMM+WL1/u\nE8NmViruGiqAi4CZlYkLgZlZxbkQmJlVnAuBmVnFuRCYmVWcC4GZWcW5EJiZVZwLgZlZxbkQmJlV\nnAuBmVnFuRCYmVWcC4GZWcUVetM5SVcDdwMPAPdHxBJJewBnAvuSParyqIjYUGQOMzObXiuOCPoj\n4qCIWJKGTwLOi4jHA+elYTMza5N2dA0dAZyR3p8BHNmGDGZmligiilu5dBWwAQjg8xFxqqSJiOhJ\n0wVsqA3XLbsUWArQ29vbt2rVqnnJdNddd7HrrrvOy7qKMjk5SXd3d7tjzKoMOcuQEcqRswwZoRw5\nW5Wxv79/Xa43ZlpFP5jm+RFxg6RHAD+UdHl+YkSEpIaVKCJOBU4FWLJkSRx66KFzDjM6OsqiRYvY\nuHFjRz897Cc/+Qnz8fsWrQw5y5ARypGzDBmhHDk7LWOhXUMRcUP6eStwDnAwcIukvQHSz1uLzFAz\nOjrK8PAwAMPDw4yOjrZis2ZmHa+wQiBpZ0m71N4DhwOXAd8Bjk2zHQt8u6gMNbUiMDU1BcDU1JSL\ngZlZUmTXUC9wTnYagB2Br0XEDyT9EjhL0luBa4CjCsywVRGoqRUDoKO7iczMilZYIYiIceDABuNv\nBw4rart54+PjDA4OTjt9amqKwcFBP2zezCptQX+zePHixYyMjNDV1dVweldXFyMjIy4CZlZpC7oQ\nQNbts2LFiq2KQVdXFytWrHC3kJlVXtGXj3aE2s6+dk7ARcDM7EGVKASw5QlhFwEzswdVphBAVgxW\nr17N4Ycf3u4oZmYdY8GfI6i3aNGidkcwM+solSsEZma2JRcCM7OKcyEwM6s4FwIzs4pzITAzqzgX\nAjOziiv0CWXzRdIfye5UOh/2BG6bp3UVpQwZoRw5y5ARypGzDBmhHDlblfGxEbHXbDOVohDMJ0lr\nm3l0WzuVISOUI2cZMkI5cpYhI5QjZ6dldNeQmVnFuRCYmVVcFQvBqe0O0IQyZIRy5CxDRihHzjJk\nhHLk7KiMlTtHYGZmW6riEYGZmeW4EJiZVdyCKwSSTpd0q6TLcuOGJN0g6ZL0enkav6+ke3LjP9eu\njGn8uyVdLuk3kj6SG79c0h8kXSHpLzotY7vacbqcks7MZbla0iW5aR3RltNl7MC2PEjShSnLWkkH\np/GS9InUlr+W9MwOzHiopDtzbfn+VmScIeeBkn4uab2k70raNTet5X+XW4iIBfUCXgg8E7gsN24I\n+IcG8+6bn6/NGfuBHwE7peFHpJ8HAJcCOwH7AVcCO3RYxra043Q566Z/HHh/p7XlDBk7qi2B1cBf\npvcvB36Se/99QMCzgYs6MOOhwLkd1Ja/BF6U3r8F+GA7/y7zrwV3RBARFwB3tDvHTKbJ+E7gQxFx\nb5rn1jT+CGBVRNwbEVcBfwAO7rCMbTPTv7ckAUcBX0+jOqktp8vYNtPkDKD2yXU34Mb0/gjgy5G5\nEOiRtHc2meJrAAAE3klEQVSHZWybaXI+Abggvf8h8Jr0vi1/l3kLrhDM4O/TIezpknbPjd9P0q8k\nnS/pBW1Ll/2RvEDSRSnLs9L4RwHX5ea7Po1rh+kyQue0Y94LgFsi4vdpuJPasqY+I3RWW54AfFTS\ndcDHgNrDvjupLafLCPAcSZdK+r6kp7Qn3ma/IdvpA/w1sE963/a2rEoh+CzwOOAg4CayQ3HS+8dE\nxDOAE4Gv5fvtWmxHYA+yw+z3AmelT4udZLqMndSOea+jAz5pz6I+Y6e15TuBZRGxD7AM+GIbs0xn\nuowXk91r50Dgk8C32pSv5i3AuyStA3YBNrY5z2aVKAQRcUtEPBARm4DTSIdd6VDs9vR+HVnf3BPa\nFPN64JvpUPsXwCayG1PdwIOfHAAenca1Q8OMHdaOAEjaEXg1cGZudCe1ZcOMHdiWxwLfTO+/wYNd\nFp3Ulg0zRsRdETGZ3n8PeKikPdsTESLi8og4PCL6yIr/lWlS29uyEoWgru/yVcBlafxeknZI7xcD\njwfGW58QyD6t9KcsTwAWkd2d8DvAMZJ2krRfyviLTsrYYe1Y8xLg8oi4Pjeuk9oSGmTswLa8EXhR\nev9ioNaF9R3gTenqoWcDd0bETe0IyDQZJf157ag6XUn0EOD2tiTMMjwi/XwIsAKoXRHW/r/LdpxR\nL/JFVmlvAu4j+wT7VuDfgPXAr1Oj753mfQ1Zv90lZIeRf9XGjIuAr5AVqYuBF+fm/2eyTw9XkK6O\n6KSM7WrH6XKm8V8C3tFg/o5oy+kydlpbAs8H1pFd1XIR0JfmFfDp1JbrgSUdmPHvU1teClwIPLfN\nbTkA/E96fYh0Z4d2/V3mX77FhJlZxVWia8jMzKbnQmBmVnEuBGZmFedCYGZWcS4EZmYV50JgpSdp\nsm74OEmfmmWZV0o6aZZ5DpV07jTTTpDUNc20L0g6YLbcZp3ChcAqKSK+ExEfmsMqTgAaFoKI+NuI\n+O0c1m3WUi4EtqClb+r+u6Rfptfz0vjNRw2SHpfuZ79e0nDdEUa3pLOVPYPhq+mbtMcDjwTWSFrT\nYJs/kbQkvZ+UdEq68dmFknobzN8t6V/T9n8t6TW5ZT+q7NkPP5J0cFr3uKRXFtBcVlEuBLYQPFwP\nPnzkEuADuWljwMqIeBbZt3a/0GD5MWAsIp5G9i3QvGeQffo/AFgMPC8iPkF2W4P+iOifJdvOwIWR\n3fjsAuBtDeZ5H9ktGp4WEU8Hfpxb9scR8RTgbmAYeCnZbVI+0GA9Zttlx3YHMJsH90TEQbUBSccB\nS9LgS4ADcjdy3VVSd93yzwGOTO+/RnYr45pfRLoXUCoy+wI/24ZsG4HaeYZ1ZDvyei8BjqkNRMSG\n3LI/SO/XA/dGxH2S1qccZvPChcAWuocAz46I/82P3IY7fN+be/8A2/5/5r548D4u27p8ftlNtSwR\nsSndudRsXrhryBa61cC7awOSDmowz4U8+LSoYxpMb+RusnvKz4cfAn9XG9CWD04yK5wLgS10xwNL\n0knY3wLvaDDPCcCJkn4N7A/c2cR6TwV+0Ohk8XYYBnaXdJmkS0m3+jZrFd991CovfR/gnogISccA\nr4uII2ZbzmyhcD+jGfQBn0oPMZkge6SgWWX4iMDMrOJ8jsDMrOJcCMzMKs6FwMys4lwIzMwqzoXA\nzKzi/j+JHU7ybsLJHgAAAABJRU5ErkJggg==\n", 13 | "text/plain": [ 14 | "" 15 | ] 16 | }, 17 | "metadata": {}, 18 | "output_type": "display_data" 19 | } 20 | ], 21 | "source": [ 22 | "import numpy as np\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "\n", 25 | "X_train = np.array([\n", 26 | " [158, 64],\n", 27 | " [170, 86],\n", 28 | " [183, 84],\n", 29 | " [191, 80],\n", 30 | " [155, 49],\n", 31 | " [163, 59],\n", 32 | " [180, 67],\n", 33 | " [158, 54],\n", 34 | " [170, 67]\n", 35 | "])\n", 36 | "y_train = ['male', 'male', 'male', 'male', 'female', 'female', 'female', 'female', 'female']\n", 37 | "\n", 38 | "plt.figure()\n", 39 | "plt.title('Human Heights and Weights by Sex')\n", 40 | "plt.xlabel('Height in cm')\n", 41 | "plt.ylabel('Weight in kg')\n", 42 | "\n", 43 | "for i, x in enumerate(X_train):\n", 44 | " plt.scatter(x[0], x[1], c='k', marker='x' if y_train[i] == 'male' else 'D')\n", 45 | "plt.grid(True)\n", 46 | "plt.show()" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [] 57 | } 58 | ], 59 | "metadata": { 60 | "kernelspec": { 61 | "display_name": "Python 2", 62 | "language": "python", 63 | "name": "python2" 64 | }, 65 | "language_info": { 66 | "codemirror_mode": { 67 | "name": "ipython", 68 | "version": 2 69 | }, 70 | "file_extension": ".py", 71 | "mimetype": "text/x-python", 72 | "name": "python", 73 | "nbconvert_exporter": "python", 74 | "pygments_lexer": "ipython2", 75 | "version": "2.7.12" 76 | } 77 | }, 78 | "nbformat": 4, 79 | "nbformat_minor": 2 80 | } 81 | -------------------------------------------------------------------------------- /chapter03/e2-ch3-s3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 27, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "from sklearn.neighbors import KNeighborsRegressor\n", 14 | "\n", 15 | "X_train = np.array([\n", 16 | " [158, 64, 1],\n", 17 | " [170, 86, 1],\n", 18 | " [183, 84, 1],\n", 19 | " [191, 80, 1],\n", 20 | " [155, 49, 0],\n", 21 | " [163, 59, 0],\n", 22 | " [180, 67, 0],\n", 23 | " [158, 54, 0],\n", 24 | " [170, 67, 0]\n", 25 | "])\n", 26 | "y_train = [7, 12, 29, 18, 11, 16, 29, 22, 36]\n", 27 | "\n", 28 | "X_test = np.array([\n", 29 | " [160, 66, 1],\n", 30 | " [196, 87, 1],\n", 31 | " [168, 68, 0],\n", 32 | " [177, 74, 0]\n", 33 | "])\n", 34 | "y_test = [9, 13, 26, 21]\n", 35 | "\n", 36 | "K = 1\n", 37 | "clf = KNeighborsRegressor(n_neighbors=K)\n", 38 | "clf.fit(X_train, y_train)\n", 39 | "predictions = clf.predict(np.array(X_test))\n", 40 | "predictions" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 35, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/plain": [ 53 | "array([ 7., 18., 36., 29.])" 54 | ] 55 | }, 56 | "execution_count": 35, 57 | "metadata": {}, 58 | "output_type": "execute_result" 59 | } 60 | ], 61 | "source": [] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 38, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "-0.0919377652051\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "from sklearn.metrics import r2_score\n", 80 | "print(r2_score(y_test, predictions))" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "collapsed": true 88 | }, 89 | "outputs": [], 90 | "source": [] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": true 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "# todo, instead, weight from height and gender" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 47, 106 | "metadata": { 107 | "collapsed": false 108 | }, 109 | "outputs": [ 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "Predicted weights: [ 59. 77. 70.66666667 72.66666667]\n", 115 | "Actual weights: [66, 87, 68, 74]\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "import numpy as np\n", 121 | "import matplotlib.pyplot as plt\n", 122 | "from sklearn.neighbors import KNeighborsRegressor\n", 123 | "\n", 124 | "X_train = np.array([\n", 125 | " [158, 1],\n", 126 | " [170, 1],\n", 127 | " [183, 1],\n", 128 | " [191, 1],\n", 129 | " [155, 0],\n", 130 | " [163, 0],\n", 131 | " [180, 0],\n", 132 | " [158, 0],\n", 133 | " [170, 0]\n", 134 | "])\n", 135 | "y_train = [64, 86, 84, 80, 49, 59, 67, 54, 67]\n", 136 | "\n", 137 | "X_test = np.array([\n", 138 | " [160, 1],\n", 139 | " [196, 1],\n", 140 | " [168, 0],\n", 141 | " [177, 0]\n", 142 | "])\n", 143 | "y_test = [66, 87, 68, 74]\n", 144 | "\n", 145 | "K = 3\n", 146 | "clf = KNeighborsRegressor(n_neighbors=K)\n", 147 | "clf.fit(X_train, y_train)\n", 148 | "predictions = clf.predict(np.array(X_test))\n", 149 | "print('Predicted weights: %s' % predictions)\n", 150 | "print('Actual weights: %s' % y_test)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 44, 156 | "metadata": { 157 | "collapsed": false 158 | }, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "0.616744186047\n" 165 | ] 166 | } 167 | ], 168 | "source": [ 169 | "from sklearn.metrics import r2_score\n", 170 | "print(r2_score(y_test, predictions))" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": { 177 | "collapsed": true 178 | }, 179 | "outputs": [], 180 | "source": [] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": true 187 | }, 188 | "outputs": [], 189 | "source": [] 190 | } 191 | ], 192 | "metadata": { 193 | "kernelspec": { 194 | "display_name": "Python 2", 195 | "language": "python", 196 | "name": "python2" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 2 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython2", 208 | "version": "2.7.12" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 2 213 | } 214 | -------------------------------------------------------------------------------- /chapter04/e2-ch4-s2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "[[ 0. -0.70710678 -1.38873015 0.52489066 0.59299945 -1.35873244]\n", 15 | " [ 0. -0.70710678 0.46291005 0.87481777 0.81537425 1.01904933]\n", 16 | " [ 0. 1.41421356 0.9258201 -1.39970842 -1.4083737 0.33968311]]\n" 17 | ] 18 | } 19 | ], 20 | "source": [ 21 | "from sklearn import preprocessing\n", 22 | "import numpy as np\n", 23 | "X = np.array([\n", 24 | " [0., 0., 5., 13., 9., 1.],\n", 25 | " [0., 0., 13., 15., 10., 15.],\n", 26 | " [0., 3., 15., 2., 0., 11.]\n", 27 | "])\n", 28 | "print(preprocessing.scale(X))" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": true 36 | }, 37 | "outputs": [], 38 | "source": [] 39 | } 40 | ], 41 | "metadata": { 42 | "kernelspec": { 43 | "display_name": "Python 3", 44 | "language": "python", 45 | "name": "python3" 46 | }, 47 | "language_info": { 48 | "codemirror_mode": { 49 | "name": "ipython", 50 | "version": 3 51 | }, 52 | "file_extension": ".py", 53 | "mimetype": "text/x-python", 54 | "name": "python", 55 | "nbconvert_exporter": "python", 56 | "pygments_lexer": "ipython3", 57 | "version": "3.5.2" 58 | } 59 | }, 60 | "nbformat": 4, 61 | "nbformat_minor": 2 62 | } 63 | -------------------------------------------------------------------------------- /chapter04/ed2-ch4-s1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "[[ 0. 1. 0.]\n", 15 | " [ 0. 0. 1.]\n", 16 | " [ 1. 0. 0.]]\n" 17 | ] 18 | } 19 | ], 20 | "source": [ 21 | "from sklearn.feature_extraction import DictVectorizer\n", 22 | "onehot_encoder = DictVectorizer()\n", 23 | "X = [\n", 24 | " {'city': 'New York'},\n", 25 | " {'city': 'San Francisco'},\n", 26 | " {'city': 'Chapel Hill'}\n", 27 | "]\n", 28 | "print(onehot_encoder.fit_transform(X).toarray())" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": true 36 | }, 37 | "outputs": [], 38 | "source": [] 39 | } 40 | ], 41 | "metadata": { 42 | "kernelspec": { 43 | "display_name": "Python 3", 44 | "language": "python", 45 | "name": "python3" 46 | }, 47 | "language_info": { 48 | "codemirror_mode": { 49 | "name": "ipython", 50 | "version": 3 51 | }, 52 | "file_extension": ".py", 53 | "mimetype": "text/x-python", 54 | "name": "python", 55 | "nbconvert_exporter": "python", 56 | "pygments_lexer": "ipython3", 57 | "version": "3.5.2" 58 | } 59 | }, 60 | "nbformat": 4, 61 | "nbformat_minor": 2 62 | } 63 | -------------------------------------------------------------------------------- /chapter04/ed2-ch4-s10.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 55, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "(4096,)\n", 15 | "[ 0. 0. 0.77542615 ..., 0. 0. 0. ]\n" 16 | ] 17 | } 18 | ], 19 | "source": [ 20 | "# Append the path to caffe/python to your PYTHONPATH\n", 21 | "import sys\n", 22 | "sys.path.append('/home/gavin/caffe/python')\n", 23 | "\n", 24 | "import os\n", 25 | "import caffe\n", 26 | "import numpy as np\n", 27 | "\n", 28 | "CAFFE_DIR = '/home/gavin/caffe'\n", 29 | "MEAN_PATH = os.path.join(CAFFE_DIR, 'python/caffe/imagenet/ilsvrc_2012_mean.npy')\n", 30 | "PROTOTXT_PATH = os.path.join(CAFFE_DIR, 'models/bvlc_reference_caffenet/deploy.prototxt')\n", 31 | "CAFFEMODEL_PATH = os.path.join(CAFFE_DIR, 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel')\n", 32 | "IMAGE_PATH = 'img/zipper-1.jpg'\n", 33 | "\n", 34 | "caffe.set_mode_cpu()\n", 35 | "net = caffe.Net(PROTOTXT_PATH, CAFFEMODEL_PATH, caffe.TEST)\n", 36 | "transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})\n", 37 | "transformer.set_transpose('data', (2, 0, 1))\n", 38 | "transformer.set_mean('data', np.load(MEAN_PATH).mean(1).mean(1))\n", 39 | "transformer.set_raw_scale('data', 255)\n", 40 | "transformer.set_channel_swap('data', (2,1,0))\n", 41 | "\n", 42 | "net.blobs['data'].reshape(1, 3, 227, 227)\n", 43 | "net.blobs['data'].data[0] = transformer.preprocess('data', caffe.io.load_image(IMAGE_PATH))\n", 44 | "net.forward()\n", 45 | "features = net.blobs['fc7'].data.reshape(-1,)\n", 46 | "print(features.shape)\n", 47 | "print(features)" 48 | ] 49 | } 50 | ], 51 | "metadata": { 52 | "kernelspec": { 53 | "display_name": "Python 2", 54 | "language": "python", 55 | "name": "python2" 56 | }, 57 | "language_info": { 58 | "codemirror_mode": { 59 | "name": "ipython", 60 | "version": 2 61 | }, 62 | "file_extension": ".py", 63 | "mimetype": "text/x-python", 64 | "name": "python", 65 | "nbconvert_exporter": "python", 66 | "pygments_lexer": "ipython2", 67 | "version": "2.7.12" 68 | } 69 | }, 70 | "nbformat": 4, 71 | "nbformat_minor": 2 72 | } 73 | -------------------------------------------------------------------------------- /chapter04/ed2-ch4-s3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 13, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "corpus = [\n", 12 | " 'UNC played Duke in basketball',\n", 13 | " 'Duke lost the basketball game'\n", 14 | "]" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 14, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "[[1 1 0 1 0 1 0 1]\n", 29 | " [1 1 1 0 1 0 1 0]]\n", 30 | "{'played': 5, 'the': 6, 'in': 3, 'lost': 4, 'game': 2, 'basketball': 0, 'unc': 7, 'duke': 1}\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "from sklearn.feature_extraction.text import CountVectorizer\n", 36 | "vectorizer = CountVectorizer()\n", 37 | "print(vectorizer.fit_transform(corpus).todense())\n", 38 | "print(vectorizer.vocabulary_)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 15, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "[[0 1 1 0 1 0 1 0 0 1]\n", 53 | " [0 1 1 1 0 1 0 0 1 0]\n", 54 | " [1 0 0 0 0 0 0 1 0 0]]\n", 55 | "{'played': 6, 'the': 8, 'in': 4, 'game': 3, 'lost': 5, 'ate': 0, 'sandwich': 7, 'basketball': 1, 'unc': 9, 'duke': 2}\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "corpus.append('I ate a sandwich')\n", 61 | "print(vectorizer.fit_transform(corpus).todense())\n", 62 | "print(vectorizer.vocabulary_)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 17, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | "Distance between 1st and 2nd documents: [[ 2.44948974]]\n", 77 | "Distance between 1st and 3rd documents: [[ 2.64575131]]\n", 78 | "Distance between 2nd and 3rd documents: [[ 2.64575131]]\n" 79 | ] 80 | } 81 | ], 82 | "source": [ 83 | "from sklearn.metrics.pairwise import euclidean_distances\n", 84 | "X = vectorizer.fit_transform(corpus).todense()\n", 85 | "print('Distance between 1st and 2nd documents:', euclidean_distances(X[0], X[1]))\n", 86 | "print('Distance between 1st and 3rd documents:', euclidean_distances(X[0], X[2]))\n", 87 | "print('Distance between 2nd and 3rd documents:', euclidean_distances(X[1], X[2]))" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 18, 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "outputs": [ 97 | { 98 | "name": "stdout", 99 | "output_type": "stream", 100 | "text": [ 101 | "[[0 1 1 0 0 1 0 1]\n", 102 | " [0 1 1 1 1 0 0 0]\n", 103 | " [1 0 0 0 0 0 1 0]]\n", 104 | "{'played': 5, 'game': 3, 'lost': 4, 'ate': 0, 'sandwich': 6, 'basketball': 1, 'unc': 7, 'duke': 2}\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "vectorizer = CountVectorizer(stop_words='english')\n", 110 | "print(vectorizer.fit_transform(corpus).todense())\n", 111 | "print(vectorizer.vocabulary_)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 19, 117 | "metadata": { 118 | "collapsed": false 119 | }, 120 | "outputs": [ 121 | { 122 | "name": "stdout", 123 | "output_type": "stream", 124 | "text": [ 125 | "[[1 0 0 1]\n", 126 | " [0 1 1 0]]\n", 127 | "{'ate': 0, 'eaten': 1, 'sandwich': 2, 'sandwiches': 3}\n" 128 | ] 129 | } 130 | ], 131 | "source": [ 132 | "corpus = [\n", 133 | " 'He ate the sandwiches',\n", 134 | " 'Every sandwich was eaten by him'\n", 135 | "]\n", 136 | "vectorizer = CountVectorizer(binary=True, stop_words='english')\n", 137 | "print(vectorizer.fit_transform(corpus).todense())\n", 138 | "print(vectorizer.vocabulary_)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 20, 144 | "metadata": { 145 | "collapsed": true 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "corpus = [\n", 150 | " 'I am gathering ingredients for the sandwich.',\n", 151 | " 'There were many wizards at the gathering.'\n", 152 | "]" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 23, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "gather\n", 167 | "gathering\n" 168 | ] 169 | } 170 | ], 171 | "source": [ 172 | "from nltk.stem.wordnet import WordNetLemmatizer\n", 173 | "\n", 174 | "lemmatizer = WordNetLemmatizer()\n", 175 | "print(lemmatizer.lemmatize('gathering', 'v'))\n", 176 | "print(lemmatizer.lemmatize('gathering', 'n'))" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 25, 182 | "metadata": { 183 | "collapsed": false 184 | }, 185 | "outputs": [ 186 | { 187 | "name": "stdout", 188 | "output_type": "stream", 189 | "text": [ 190 | "gather\n" 191 | ] 192 | } 193 | ], 194 | "source": [ 195 | "from nltk.stem import PorterStemmer\n", 196 | "\n", 197 | "stemmer = PorterStemmer()\n", 198 | "print(stemmer.stem('gathering'))" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": { 205 | "collapsed": true 206 | }, 207 | "outputs": [], 208 | "source": [] 209 | } 210 | ], 211 | "metadata": { 212 | "kernelspec": { 213 | "display_name": "Python 3", 214 | "language": "python", 215 | "name": "python3" 216 | }, 217 | "language_info": { 218 | "codemirror_mode": { 219 | "name": "ipython", 220 | "version": 3 221 | }, 222 | "file_extension": ".py", 223 | "mimetype": "text/x-python", 224 | "name": "python", 225 | "nbconvert_exporter": "python", 226 | "pygments_lexer": "ipython3", 227 | "version": "3.5.2" 228 | } 229 | }, 230 | "nbformat": 4, 231 | "nbformat_minor": 2 232 | } 233 | -------------------------------------------------------------------------------- /chapter04/ed2-ch4-s4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [ 12 | { 13 | "name": "stdout", 14 | "output_type": "stream", 15 | "text": [ 16 | "Stemmed: [['He', 'ate', 'the', 'sandwich'], ['everi', 'sandwich', 'wa', 'eaten', 'by', 'him']]\n", 17 | "Lemmatized: [['He', 'eat', 'the', 'sandwich'], ['Every', 'sandwich', 'be', 'eat', 'by', 'him']]\n" 18 | ] 19 | } 20 | ], 21 | "source": [ 22 | "from nltk import word_tokenize\n", 23 | "from nltk.stem import PorterStemmer\n", 24 | "from nltk.stem.wordnet import WordNetLemmatizer\n", 25 | "from nltk import pos_tag\n", 26 | "\n", 27 | "wordnet_tags = ['n', 'v']\n", 28 | "corpus = [\n", 29 | " 'He ate the sandwiches',\n", 30 | " 'Every sandwich was eaten by him'\n", 31 | "]\n", 32 | "stemmer = PorterStemmer()\n", 33 | "print('Stemmed:', [[stemmer.stem(token) for token in word_tokenize(document)] for document in corpus])\n", 34 | "\n", 35 | "\n", 36 | "def lemmatize(token, tag):\n", 37 | " if tag[0].lower() in ['n', 'v']:\n", 38 | " return lemmatizer.lemmatize(token, tag[0].lower())\n", 39 | " return token\n", 40 | "\n", 41 | "lemmatizer = WordNetLemmatizer()\n", 42 | "tagged_corpus = [pos_tag(word_tokenize(document)) for document in corpus]\n", 43 | "print('Lemmatized:', [[lemmatize(token, tag) for token, tag in document] for document in tagged_corpus])" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "collapsed": true, 51 | "deletable": true, 52 | "editable": true 53 | }, 54 | "outputs": [], 55 | "source": [] 56 | } 57 | ], 58 | "metadata": { 59 | "kernelspec": { 60 | "display_name": "Python 3", 61 | "language": "python", 62 | "name": "python3" 63 | }, 64 | "language_info": { 65 | "codemirror_mode": { 66 | "name": "ipython", 67 | "version": 3 68 | }, 69 | "file_extension": ".py", 70 | "mimetype": "text/x-python", 71 | "name": "python", 72 | "nbconvert_exporter": "python", 73 | "pygments_lexer": "ipython3", 74 | "version": "3.5.2" 75 | } 76 | }, 77 | "nbformat": 4, 78 | "nbformat_minor": 2 79 | } 80 | -------------------------------------------------------------------------------- /chapter04/ed2-ch4-s5.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 24, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [ 12 | { 13 | "name": "stdout", 14 | "output_type": "stream", 15 | "text": [ 16 | "[2 1 3 1 1]\n", 17 | "Token indices {'ate': 0, 'sandwich': 2, 'dog': 1, 'wizard': 4, 'transfigured': 3}\n", 18 | "The token \"ate\" appears 2 times\n", 19 | "The token \"sandwich\" appears 3 times\n", 20 | "The token \"dog\" appears 1 times\n", 21 | "The token \"wizard\" appears 1 times\n", 22 | "The token \"transfigured\" appears 1 times\n" 23 | ] 24 | } 25 | ], 26 | "source": [ 27 | "import numpy as np\n", 28 | "from sklearn.feature_extraction.text import CountVectorizer\n", 29 | "\n", 30 | "corpus = ['The dog ate a sandwich, the wizard transfigured a sandwich, and I ate a sandwich']\n", 31 | "vectorizer = CountVectorizer(stop_words='english')\n", 32 | "frequencies = np.array(vectorizer.fit_transform(corpus).todense())[0]\n", 33 | "print(frequencies)\n", 34 | "print('Token indices %s' % vectorizer.vocabulary_)\n", 35 | "for token, index in vectorizer.vocabulary_.items():\n", 36 | " print('The token \"%s\" appears %s times' % (token, frequencies[index]))" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "collapsed": true 44 | }, 45 | "outputs": [], 46 | "source": [] 47 | } 48 | ], 49 | "metadata": { 50 | "kernelspec": { 51 | "display_name": "Python 3", 52 | "language": "python", 53 | "name": "python3" 54 | }, 55 | "language_info": { 56 | "codemirror_mode": { 57 | "name": "ipython", 58 | "version": 3 59 | }, 60 | "file_extension": ".py", 61 | "mimetype": "text/x-python", 62 | "name": "python", 63 | "nbconvert_exporter": "python", 64 | "pygments_lexer": "ipython3", 65 | "version": "3.5.2" 66 | } 67 | }, 68 | "nbformat": 4, 69 | "nbformat_minor": 2 70 | } 71 | -------------------------------------------------------------------------------- /chapter04/ed2-ch4-s6.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [ 12 | { 13 | "name": "stdout", 14 | "output_type": "stream", 15 | "text": [ 16 | "[[ 0.75458397 0.37729199 0.53689271 0. 0. ]\n", 17 | " [ 0. 0. 0.44943642 0.6316672 0.6316672 ]]\n" 18 | ] 19 | } 20 | ], 21 | "source": [ 22 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 23 | "corpus = [\n", 24 | " 'The dog ate a sandwich and I ate a sandwich',\n", 25 | " 'The wizard transfigured a sandwich'\n", 26 | "]\n", 27 | "vectorizer = TfidfVectorizer(stop_words='english')\n", 28 | "print(vectorizer.fit_transform(corpus).todense())" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": true, 36 | "deletable": true, 37 | "editable": true 38 | }, 39 | "outputs": [], 40 | "source": [] 41 | } 42 | ], 43 | "metadata": { 44 | "kernelspec": { 45 | "display_name": "Python 3", 46 | "language": "python", 47 | "name": "python3" 48 | }, 49 | "language_info": { 50 | "codemirror_mode": { 51 | "name": "ipython", 52 | "version": 3 53 | }, 54 | "file_extension": ".py", 55 | "mimetype": "text/x-python", 56 | "name": "python", 57 | "nbconvert_exporter": "python", 58 | "pygments_lexer": "ipython3", 59 | "version": "3.5.2" 60 | } 61 | }, 62 | "nbformat": 4, 63 | "nbformat_minor": 2 64 | } 65 | -------------------------------------------------------------------------------- /chapter04/ed2-ch4-s7.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [ 12 | { 13 | "name": "stdout", 14 | "output_type": "stream", 15 | "text": [ 16 | "[[-1. 0. 0. 0. 0. 0.]\n", 17 | " [ 0. 0. 0. 1. 0. 0.]\n", 18 | " [ 0. 0. 0. 0. -1. 0.]\n", 19 | " [ 0. 1. 0. 0. 0. 0.]]\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "from sklearn.feature_extraction.text import HashingVectorizer\n", 25 | "\n", 26 | "corpus = ['the', 'ate', 'bacon', 'cat']\n", 27 | "vectorizer = HashingVectorizer(n_features=6)\n", 28 | "print(vectorizer.transform(corpus).todense())" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "collapsed": true, 36 | "deletable": true, 37 | "editable": true 38 | }, 39 | "outputs": [], 40 | "source": [] 41 | } 42 | ], 43 | "metadata": { 44 | "kernelspec": { 45 | "display_name": "Python 3", 46 | "language": "python", 47 | "name": "python3" 48 | }, 49 | "language_info": { 50 | "codemirror_mode": { 51 | "name": "ipython", 52 | "version": 3 53 | }, 54 | "file_extension": ".py", 55 | "mimetype": "text/x-python", 56 | "name": "python", 57 | "nbconvert_exporter": "python", 58 | "pygments_lexer": "ipython3", 59 | "version": "3.5.2" 60 | } 61 | }, 62 | "nbformat": 4, 63 | "nbformat_minor": 2 64 | } 65 | -------------------------------------------------------------------------------- /chapter04/ed2-ch4-s8.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Dimensions: 300\n", 15 | "[ 0.0123291 0.20410156 -0.28515625 0.21679688 0.11816406 0.08300781\n", 16 | " 0.04980469 -0.00952148 0.22070312 -0.12597656 0.08056641 -0.5859375\n", 17 | " -0.00445557 -0.296875 -0.01312256 -0.08349609 0.05053711 0.15136719\n", 18 | " -0.44921875 -0.0135498 0.21484375 -0.14746094 0.22460938 -0.125\n", 19 | " -0.09716797 0.24902344 -0.2890625 0.36523438 0.41210938 -0.0859375\n", 20 | " -0.07861328 -0.19726562 -0.09082031 -0.14160156 -0.10253906 0.13085938\n", 21 | " -0.00346375 0.07226562 0.04418945 0.34570312 0.07470703 -0.11230469\n", 22 | " 0.06738281 0.11230469 0.01977539 -0.12353516 0.20996094 -0.07226562\n", 23 | " -0.02783203 0.05541992 -0.33398438 0.08544922 0.34375 0.13964844\n", 24 | " 0.04931641 -0.13476562 0.16308594 -0.37304688 0.39648438 0.10693359\n", 25 | " 0.22167969 0.21289062 -0.08984375 0.20703125 0.08935547 -0.08251953\n", 26 | " 0.05957031 0.10205078 -0.19238281 -0.09082031 0.4921875 0.03955078\n", 27 | " -0.07080078 -0.0019989 -0.23046875 0.25585938 0.08984375 -0.10644531\n", 28 | " 0.00105286 -0.05883789 0.05102539 -0.0291748 0.19335938 -0.14160156\n", 29 | " -0.33398438 0.08154297 -0.27539062 0.10058594 -0.10449219 -0.12353516\n", 30 | " -0.140625 0.03491211 -0.11767578 -0.1796875 -0.21484375 -0.23828125\n", 31 | " 0.08447266 -0.07519531 -0.25976562 -0.21289062 -0.22363281 -0.09716797\n", 32 | " 0.11572266 0.15429688 0.07373047 -0.27539062 0.14257812 -0.0201416\n", 33 | " 0.10009766 -0.19042969 -0.09375 0.14160156 0.17089844 0.3125\n", 34 | " -0.16699219 -0.08691406 -0.05004883 -0.24902344 -0.20800781 -0.09423828\n", 35 | " -0.12255859 -0.09472656 -0.390625 -0.06640625 -0.31640625 0.10986328\n", 36 | " -0.00156403 0.04345703 0.15625 -0.18945312 -0.03491211 0.03393555\n", 37 | " -0.14453125 0.01611328 -0.14160156 -0.02392578 0.01501465 0.07568359\n", 38 | " 0.10742188 0.12695312 0.10693359 -0.01184082 -0.24023438 0.0291748\n", 39 | " 0.16210938 0.19921875 -0.28125 0.16699219 -0.11621094 -0.25585938\n", 40 | " 0.38671875 -0.06640625 -0.4609375 -0.06176758 -0.14453125 -0.11621094\n", 41 | " 0.05688477 0.03588867 -0.10693359 0.18847656 -0.16699219 -0.01794434\n", 42 | " 0.10986328 -0.12353516 -0.16308594 -0.14453125 0.12890625 0.11523438\n", 43 | " 0.13671875 0.05688477 -0.08105469 -0.06152344 -0.06689453 0.27929688\n", 44 | " -0.19628906 0.07226562 0.12304688 -0.20996094 -0.22070312 0.21386719\n", 45 | " -0.1484375 -0.05932617 0.05224609 0.06445312 -0.02636719 0.13183594\n", 46 | " 0.19433594 0.27148438 0.18652344 0.140625 0.06542969 -0.14453125\n", 47 | " 0.05029297 0.08837891 0.12255859 0.26757812 0.0534668 -0.32226562\n", 48 | " -0.20703125 0.18164062 0.04418945 -0.22167969 -0.13769531 -0.04174805\n", 49 | " -0.00286865 0.04077148 0.07275391 -0.08300781 0.08398438 -0.3359375\n", 50 | " -0.40039062 0.01757812 -0.18652344 -0.0480957 -0.19140625 0.10107422\n", 51 | " 0.09277344 -0.30664062 -0.19921875 -0.0168457 0.12207031 0.14648438\n", 52 | " -0.12890625 -0.23535156 -0.05371094 -0.06640625 0.06884766 -0.03637695\n", 53 | " 0.2109375 -0.06005859 0.19335938 0.05151367 -0.05322266 0.02893066\n", 54 | " -0.27539062 0.08447266 0.328125 0.01818848 0.01495361 0.04711914\n", 55 | " 0.37695312 -0.21875 -0.03393555 0.01116943 0.36914062 0.02160645\n", 56 | " 0.03466797 0.07275391 0.16015625 -0.16503906 -0.296875 0.15039062\n", 57 | " -0.29101562 0.13964844 0.00448608 0.171875 -0.21972656 0.09326172\n", 58 | " -0.19042969 0.01599121 -0.09228516 0.15722656 -0.14160156 -0.0534668\n", 59 | " 0.03613281 0.23632812 -0.15136719 -0.00689697 -0.27148438 -0.07128906\n", 60 | " -0.16503906 0.18457031 -0.08398438 0.18554688 0.11669922 0.02758789\n", 61 | " -0.04760742 0.17871094 0.06542969 -0.03540039 0.22949219 0.02697754\n", 62 | " -0.09765625 0.26953125 0.08349609 -0.13085938 -0.10107422 -0.00738525\n", 63 | " 0.07128906 0.14941406 -0.20605469 0.18066406 -0.15820312 0.05932617\n", 64 | " 0.28710938 -0.04663086 0.15136719 0.4921875 -0.27539062 0.05615234]\n" 65 | ] 66 | } 67 | ], 68 | "source": [ 69 | "# See https://radimrehurek.com/gensim/install.html for gensim installation instructions\n", 70 | "# Download and gunzip the word2vec embeddings from \n", 71 | "# https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit?usp=sharing\n", 72 | "import gensim\n", 73 | "\n", 74 | "# The model is large\n", 75 | "model = gensim.models.KeyedVectors.load_word2vec_format('./GoogleNews-vectors-negative300.bin', binary=True)\n", 76 | "\n", 77 | "# Let's inspect the embedding for \"cat\"\n", 78 | "embedding = model.word_vec('cat')\n", 79 | "print(\"Dimensions: %s\" % embedding.shape)\n", 80 | "print(embedding)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "metadata": { 87 | "collapsed": false 88 | }, 89 | "outputs": [ 90 | { 91 | "name": "stdout", 92 | "output_type": "stream", 93 | "text": [ 94 | "0.760945708978\n", 95 | "0.172112036738\n" 96 | ] 97 | } 98 | ], 99 | "source": [ 100 | "# The vectors for semantically similar words are more similar than the vectors for semantically dissimilar words\n", 101 | "print(model.similarity('cat', 'dog'))\n", 102 | "print(model.similarity('cat', 'sandwich'))" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 6, 108 | "metadata": { 109 | "collapsed": false 110 | }, 111 | "outputs": [ 112 | { 113 | "name": "stdout", 114 | "output_type": "stream", 115 | "text": [ 116 | "[(u'dog', 0.7762665152549744)]\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "# Puppy is to cat as kitten is to...\n", 122 | "print(model.most_similar(positive=['puppy', 'cat'], negative=['kitten'], topn=1))" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 10, 128 | "metadata": { 129 | "collapsed": false 130 | }, 131 | "outputs": [ 132 | { 133 | "name": "stdout", 134 | "output_type": "stream", 135 | "text": [ 136 | "(u'saddles', 0.5282258987426758)\n", 137 | "(u'horseman', 0.5179383158683777)\n", 138 | "(u'jockey', 0.48861297965049744)\n" 139 | ] 140 | } 141 | ], 142 | "source": [ 143 | "# Palette is to painter as saddle is to...\n", 144 | "for i in model.most_similar(positive=['saddle', 'painter'], negative=['palette'], topn=3):\n", 145 | " print(i)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": { 152 | "collapsed": true 153 | }, 154 | "outputs": [], 155 | "source": [] 156 | } 157 | ], 158 | "metadata": { 159 | "kernelspec": { 160 | "display_name": "Python 2", 161 | "language": "python", 162 | "name": "python2" 163 | }, 164 | "language_info": { 165 | "codemirror_mode": { 166 | "name": "ipython", 167 | "version": 2 168 | }, 169 | "file_extension": ".py", 170 | "mimetype": "text/x-python", 171 | "name": "python", 172 | "nbconvert_exporter": "python", 173 | "pygments_lexer": "ipython2", 174 | "version": "2.7.12" 175 | } 176 | }, 177 | "nbformat": 4, 178 | "nbformat_minor": 2 179 | } 180 | -------------------------------------------------------------------------------- /chapter04/ed2-ch4-s9.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Digit: 0\n", 15 | "[[ 0. 0. 5. 13. 9. 1. 0. 0.]\n", 16 | " [ 0. 0. 13. 15. 10. 15. 5. 0.]\n", 17 | " [ 0. 3. 15. 2. 0. 11. 8. 0.]\n", 18 | " [ 0. 4. 12. 0. 0. 8. 8. 0.]\n", 19 | " [ 0. 5. 8. 0. 0. 9. 8. 0.]\n", 20 | " [ 0. 4. 11. 0. 1. 12. 7. 0.]\n", 21 | " [ 0. 2. 14. 5. 10. 12. 0. 0.]\n", 22 | " [ 0. 0. 6. 13. 10. 0. 0. 0.]]\n", 23 | "Feature vector:\n", 24 | " [[ 0. 0. 5. 13. 9. 1. 0. 0. 0. 0. 13. 15. 10. 15.\n", 25 | " 5. 0. 0. 3. 15. 2. 0. 11. 8. 0. 0. 4. 12. 0.\n", 26 | " 0. 8. 8. 0. 0. 5. 8. 0. 0. 9. 8. 0. 0. 4.\n", 27 | " 11. 0. 1. 12. 7. 0. 0. 2. 14. 5. 10. 12. 0. 0.\n", 28 | " 0. 0. 6. 13. 10. 0. 0. 0.]]\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "from sklearn import datasets\n", 34 | "\n", 35 | "digits = datasets.load_digits()\n", 36 | "print('Digit: %s' % digits.target[0])\n", 37 | "print(digits.images[0])\n", 38 | "print('Feature vector:\\n %s' % digits.images[0].reshape(-1, 64))" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [] 49 | } 50 | ], 51 | "metadata": { 52 | "kernelspec": { 53 | "display_name": "Python 2", 54 | "language": "python", 55 | "name": "python2" 56 | }, 57 | "language_info": { 58 | "codemirror_mode": { 59 | "name": "ipython", 60 | "version": 2 61 | }, 62 | "file_extension": ".py", 63 | "mimetype": "text/x-python", 64 | "name": "python", 65 | "nbconvert_exporter": "python", 66 | "pygments_lexer": "ipython2", 67 | "version": "2.7.12" 68 | } 69 | }, 70 | "nbformat": 4, 71 | "nbformat_minor": 2 72 | } 73 | -------------------------------------------------------------------------------- /chapter05/e2-ch5-s3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Predicted: [ 10.0625], Target: [11]\n", 15 | "R-squared: 0.77\n", 16 | "Predicted: [ 10.28125], Target: [8.5]\n", 17 | "R-squared: 0.77\n", 18 | "Predicted: [ 13.09375], Target: [15]\n", 19 | "R-squared: 0.77\n", 20 | "Predicted: [ 18.14583333], Target: [18]\n", 21 | "R-squared: 0.77\n", 22 | "Predicted: [ 13.3125], Target: [11]\n", 23 | "R-squared: 0.77\n" 24 | ] 25 | } 26 | ], 27 | "source": [ 28 | "from sklearn.linear_model import LinearRegression\n", 29 | "\n", 30 | "X = [[6, 2], [8, 1], [10, 0], [14, 2], [18, 0]]\n", 31 | "y = [[7], [9], [13], [17.5], [18]]\n", 32 | "model = LinearRegression()\n", 33 | "model.fit(X, y)\n", 34 | "X_test = [[8, 2], [9, 0], [11, 2], [16, 2], [12, 0]]\n", 35 | "y_test = [[11], [8.5], [15], [18], [11]]\n", 36 | "predictions = model.predict(X_test)\n", 37 | "for i, prediction in enumerate(predictions):\n", 38 | " print('Predicted: %s, Target: %s' % (prediction, y_test[i]))\n", 39 | " print('R-squared: %.2f' % model.score(X_test, y_test))" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [] 50 | } 51 | ], 52 | "metadata": { 53 | "kernelspec": { 54 | "display_name": "Python 2", 55 | "language": "python", 56 | "name": "python2" 57 | }, 58 | "language_info": { 59 | "codemirror_mode": { 60 | "name": "ipython", 61 | "version": 2 62 | }, 63 | "file_extension": ".py", 64 | "mimetype": "text/x-python", 65 | "name": "python", 66 | "nbconvert_exporter": "python", 67 | "pygments_lexer": "ipython2", 68 | "version": "2.7.12" 69 | } 70 | }, 71 | "nbformat": 4, 72 | "nbformat_minor": 2 73 | } 74 | -------------------------------------------------------------------------------- /chapter05/ed2-ch5-s1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "[[ 1.1875 ]\n", 15 | " [ 1.01041667]\n", 16 | " [ 0.39583333]]\n" 17 | ] 18 | } 19 | ], 20 | "source": [ 21 | "from numpy.linalg import inv\n", 22 | "from numpy import dot, transpose\n", 23 | "\n", 24 | "X = [[1, 6, 2], [1, 8, 1], [1, 10, 0], [1, 14, 2], [1, 18, 0]]\n", 25 | "y = [[7], [9], [13],` [17.5], [18]]\n", 26 | "print(dot(inv(dot(transpose(X), X)), dot(transpose(X), y)))" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": true 34 | }, 35 | "outputs": [], 36 | "source": [] 37 | } 38 | ], 39 | "metadata": { 40 | "kernelspec": { 41 | "display_name": "Python 2", 42 | "language": "python", 43 | "name": "python2" 44 | }, 45 | "language_info": { 46 | "codemirror_mode": { 47 | "name": "ipython", 48 | "version": 2 49 | }, 50 | "file_extension": ".py", 51 | "mimetype": "text/x-python", 52 | "name": "python", 53 | "nbconvert_exporter": "python", 54 | "pygments_lexer": "ipython2", 55 | "version": "2.7.12" 56 | } 57 | }, 58 | "nbformat": 4, 59 | "nbformat_minor": 2 60 | } 61 | -------------------------------------------------------------------------------- /chapter05/ed2-ch5-s2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "[[ 1.1875 ]\n", 15 | " [ 1.01041667]\n", 16 | " [ 0.39583333]]\n" 17 | ] 18 | } 19 | ], 20 | "source": [ 21 | "from numpy.linalg import lstsq\n", 22 | "\n", 23 | "X = [[1, 6, 2], [1, 8, 1], [1, 10, 0], [1, 14, 2], [1, 18, 0]]\n", 24 | "y = [[7], [9], [13], [17.5], [18]]\n", 25 | "print(lstsq(X, y)[0])" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": true 33 | }, 34 | "outputs": [], 35 | "source": [] 36 | } 37 | ], 38 | "metadata": { 39 | "kernelspec": { 40 | "display_name": "Python 2", 41 | "language": "python", 42 | "name": "python2" 43 | }, 44 | "language_info": { 45 | "codemirror_mode": { 46 | "name": "ipython", 47 | "version": 2 48 | }, 49 | "file_extension": ".py", 50 | "mimetype": "text/x-python", 51 | "name": "python", 52 | "nbconvert_exporter": "python", 53 | "pygments_lexer": "ipython2", 54 | "version": "2.7.12" 55 | } 56 | }, 57 | "nbformat": 4, 58 | "nbformat_minor": 2 59 | } 60 | -------------------------------------------------------------------------------- /chapter05/ed2-ch5-s6.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "R-squared: 0.398550890379\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "from sklearn.linear_model import LinearRegression\n", 20 | "import pandas as pd\n", 21 | "import matplotlib.pylab as plt\n", 22 | "from sklearn.cross_validation import train_test_split\n", 23 | "\n", 24 | "df = pd.read_csv('./winequality-red.csv', sep=';')\n", 25 | "X = df[list(df.columns)[:-1]]\n", 26 | "y = df['quality']\n", 27 | "X_train, X_test, y_train, y_test = train_test_split(X, y)\n", 28 | "regressor = LinearRegression()\n", 29 | "regressor.fit(X_train, y_train)\n", 30 | "y_predictions = regressor.predict(X_test)\n", 31 | "print('R-squared: %s' % regressor.score(X_test, y_test))" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [] 42 | } 43 | ], 44 | "metadata": { 45 | "kernelspec": { 46 | "display_name": "Python 2", 47 | "language": "python", 48 | "name": "python2" 49 | }, 50 | "language_info": { 51 | "codemirror_mode": { 52 | "name": "ipython", 53 | "version": 2 54 | }, 55 | "file_extension": ".py", 56 | "mimetype": "text/x-python", 57 | "name": "python", 58 | "nbconvert_exporter": "python", 59 | "pygments_lexer": "ipython2", 60 | "version": "2.7.12" 61 | } 62 | }, 63 | "nbformat": 4, 64 | "nbformat_minor": 2 65 | } 66 | -------------------------------------------------------------------------------- /chapter05/ed2-ch5-s7.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "0.290041628842\n", 15 | "[ 0.13200871 0.31858135 0.34955348 0.369145 0.2809196 ]\n" 16 | ] 17 | } 18 | ], 19 | "source": [ 20 | "import pandas as pd\n", 21 | "from sklearn.model_selection import cross_val_score\n", 22 | "from sklearn.linear_model import LinearRegression\n", 23 | "\n", 24 | "df = pd.read_csv('./winequality-red.csv', sep=';')\n", 25 | "X = df[list(df.columns)[:-1]]\n", 26 | "y = df['quality']\n", 27 | "regressor = LinearRegression()\n", 28 | "scores = cross_val_score(regressor, X, y, cv=5)\n", 29 | "print(scores.mean())\n", 30 | "print(scores)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [] 41 | } 42 | ], 43 | "metadata": { 44 | "kernelspec": { 45 | "display_name": "Python 2", 46 | "language": "python", 47 | "name": "python2" 48 | }, 49 | "language_info": { 50 | "codemirror_mode": { 51 | "name": "ipython", 52 | "version": 2 53 | }, 54 | "file_extension": ".py", 55 | "mimetype": "text/x-python", 56 | "name": "python", 57 | "nbconvert_exporter": "python", 58 | "pygments_lexer": "ipython2", 59 | "version": "2.7.12" 60 | } 61 | }, 62 | "nbformat": 4, 63 | "nbformat_minor": 2 64 | } 65 | -------------------------------------------------------------------------------- /chapter05/ed2-ch5-s8.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 25, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "from sklearn.datasets import load_boston\n", 13 | "from sklearn.linear_model import SGDRegressor\n", 14 | "from sklearn.model_selection import cross_val_score\n", 15 | "from sklearn.preprocessing import StandardScaler\n", 16 | "from sklearn.model_selection import train_test_split\n", 17 | "\n", 18 | "data = load_boston()\n", 19 | "X_train, X_test, y_train, y_test = train_test_split(data.data, data.target)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 29, 25 | "metadata": { 26 | "collapsed": false 27 | }, 28 | "outputs": [ 29 | { 30 | "name": "stdout", 31 | "output_type": "stream", 32 | "text": [ 33 | "Cross validation r-squared scores: [ 0.55323539 0.77067053 0.78551352 0.69416906 0.53274918]\n", 34 | "Average cross validation r-squared score: 0.667267533715\n", 35 | "Test set r-squared score 0.733718249165\n" 36 | ] 37 | } 38 | ], 39 | "source": [ 40 | "X_scaler = StandardScaler()\n", 41 | "y_scaler = StandardScaler()\n", 42 | "X_train = X_scaler.fit_transform(X_train)\n", 43 | "y_train = y_scaler.fit_transform(y_train.reshape(-1, 1))\n", 44 | "X_test = X_scaler.transform(X_test)\n", 45 | "y_test = y_scaler.transform(y_test.reshape(-1, 1))\n", 46 | "regressor = SGDRegressor(loss='squared_loss')\n", 47 | "scores = cross_val_score(regressor, X_train, y_train, cv=5)\n", 48 | "print('Cross validation r-squared scores: %s' % scores)\n", 49 | "print('Average cross validation r-squared score: %s' % np.mean(scores))\n", 50 | "regressor.fit(X_train, y_train)\n", 51 | "print('Test set r-squared score %s' % regressor.score(X_test, y_test))" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": true 59 | }, 60 | "outputs": [], 61 | "source": [] 62 | } 63 | ], 64 | "metadata": { 65 | "kernelspec": { 66 | "display_name": "Python 2", 67 | "language": "python", 68 | "name": "python2" 69 | }, 70 | "language_info": { 71 | "codemirror_mode": { 72 | "name": "ipython", 73 | "version": 2 74 | }, 75 | "file_extension": ".py", 76 | "mimetype": "text/x-python", 77 | "name": "python", 78 | "nbconvert_exporter": "python", 79 | "pygments_lexer": "ipython2", 80 | "version": "2.7.12" 81 | } 82 | }, 83 | "nbformat": 4, 84 | "nbformat_minor": 2 85 | } 86 | -------------------------------------------------------------------------------- /chapter06/ed2-ch6-s0.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | " 0 1\n", 15 | "0 ham Go until jurong point, crazy.. Available only ...\n", 16 | "1 ham Ok lar... Joking wif u oni...\n", 17 | "2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n", 18 | "3 ham U dun say so early hor... U c already then say...\n", 19 | "4 ham Nah I don't think he goes to usf, he lives aro...\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "import pandas as pd\n", 25 | "df = pd.read_csv('./SMSSpamCollection', delimiter='\\t', header=None)\n", 26 | "print(df.head())" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 5, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [ 36 | { 37 | "name": "stdout", 38 | "output_type": "stream", 39 | "text": [ 40 | "Number of spam messages: 747\n", 41 | "Number of ham messages: 4825\n" 42 | ] 43 | } 44 | ], 45 | "source": [ 46 | "print('Number of spam messages: %s' % df[df[0] == 'spam'][0].count())\n", 47 | "print('Number of ham messages: %s' % df[df[0] == 'ham'][0].count())" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 35, 53 | "metadata": { 54 | "collapsed": false 55 | }, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "Predicted: ham, message: Now thats going to ruin your thesis!\n", 62 | "Predicted: ham, message: Ok...\n", 63 | "Predicted: ham, message: Its a part of checking IQ\n", 64 | "Predicted: spam, message: Ringtone Club: Gr8 new polys direct to your mobile every week !\n", 65 | "Predicted: ham, message: Talk sexy!! Make new friends or fall in love in the worlds most discreet text dating service. Just text VIP to 83110 and see who you could meet.\n" 66 | ] 67 | } 68 | ], 69 | "source": [ 70 | "import numpy as np\n", 71 | "import pandas as pd\n", 72 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 73 | "from sklearn.linear_model.logistic import LogisticRegression\n", 74 | "from sklearn.model_selection import train_test_split, cross_val_score\n", 75 | "\n", 76 | "X = df[1].values\n", 77 | "y = df[0].values\n", 78 | "X_train_raw, X_test_raw, y_train, y_test = train_test_split(X, y)\n", 79 | "vectorizer = TfidfVectorizer()\n", 80 | "X_train = vectorizer.fit_transform(X_train_raw)\n", 81 | "X_test = vectorizer.transform(X_test_raw)\n", 82 | "classifier = LogisticRegression()\n", 83 | "classifier.fit(X_train, y_train)\n", 84 | "predictions = classifier.predict(X_test)\n", 85 | "for i, prediction in enumerate(predictions[:5]):\n", 86 | " print('Predicted: %s, message: %s' % (prediction, X_test_raw[i]))" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 36, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "[[4 1]\n", 101 | " [2 3]]\n" 102 | ] 103 | }, 104 | { 105 | "data": { 106 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQkAAAD3CAYAAAAOh6G5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAF/xJREFUeJzt3X20XXV95/H3Jw8QIBGUUIEQjOXBjmXJU6SiU0vVIiAF\npgMjilosmpGOTxXbAaSATB3b6Zo+sLBiEAXUoVCUrhTDSqnVAVyAJDEgAcRgZQikhgAC4SEm937m\nj72vHi737LPvzTl3n3vP57XWXjl779/97d85ued7f0/7t2WbiIh2ZjRdgIjobwkSEVEpQSIiKiVI\nRESlBImIqJQgERGVEiQiphlJMyV9X9INY5zbUdI1ktZJukPSok75JUhETD8fA+5rc+4M4Enb+wN/\nDfxFp8wSJBomaSdJ/yTpKUn/sB35nCbpn7tZtqZI+k1JP2y6HFORpH2AdwBfbJPkRODK8vV1wFsl\nqSrPBImaJL1b0kpJmyVtkHSjpP/YhaxPBl4J7G77lIlmYvtrto/uQnl6SpIl7V+VxvYttl8zWWWa\nZv4G+BNguM35BcDDALa3AU8Bu1dlOKubpZuuJH0COBv4ELAC+DlwDEVUvnU7s38V8ED5HzbwJM0a\npM/i7b+9ix9/YqhW2lV3b1kLvNByaKntpSM7ko4HNtpeJemorhXSdraKDdgV2AycUpFmR4oI/mi5\n/Q2wY3nuKGA9cBawEdgAvL8892mKgLO1vMYZwIXAV1vyXgQYmFXunw78GHgG+DfgtJbjt7b83BuB\nOyn+UtwJvLHl3HeA/wF8t8znn4H5bd7bSPn/pKX8JwHHAQ8ATwDntqQ/ArgN+FmZ9hJgh/LczeV7\nebZ8v+9syf+/A/8OfGXkWPkz+5XXOKzc3xt4DDiq6d+NbmyHvW5Hb92wX60NWNnhd/Wz5Wf5k/Kz\nfK71d6lMswI4snw9C9gEqCrfNDc6OxKYA1xfkeZTwBuAQ4CDKb4o57Wc35Mi2CygCASfk/Ry2xcA\n/xO4xvZc25dXFUTSLsDFwLG251EEgjVjpHsF8M0y7e7AXwHflNRarXw38H7gV4AdgE9WXHpPis9g\nAXA+cBnwHuBw4DeBP5X06jLtEPBHwHyKz+6twB8C2H5zmebg8v1e05L/KyhqVUtaL2z7QYoA8lVJ\nOwNfBq60/Z2K8k4hZsjDtbaOOdnn2N7H9iLgVOBfbb9nVLJlwO+Xr08u01Te5Zkg0dnuwCZXV4FP\nAy6yvdH2YxQ1hPe2nN9ant9qeznFX9GJtrmHgYMk7WR7g+21Y6R5B/Aj21+xvc321cD9wO+2pPmy\n7QdsPw9cSxHg2tkKfMb2VuDvKQLA39p+prz+vRTBEdurbN9eXvcnwBeA36rxni6wvaUsz4vYvgxY\nB9wB7EURlKcFA8O41jZRki6SdEK5ezmwu6R1wEgzulL6JDp7HJjfoa28N/BQy/5D5bFf5DHqZ58D\n5o63ILaflfROir/6l0v6LnCW7fs7lGekTAta9v99HOV53PZIw3nkS/zTlvPPj/y8pAMpai6LgZ0p\nfsdWVb0v4DHbL3RIcxnFX8Eltrd0SDtlGLPV9fokxpVvUdP6Tvn6/JbjLwDj6iBPTaKz24AtFO3w\ndh6lqCqP2Lc8NhHPUny5RuzZetL2Ctu/Q/EX9X6KL0+n8oyU6ZEJlmk8Pk9RrgNsvww4F6gcYoPq\nP5OS5lL081wOXFg2p6aNXtcktleCRAe2n6Joh39O0kmSdpY0W9J5kp4oq20PAedJ2kPS/DL9Vyd4\nyTXAmyXtK2lX4JyRE5JeKenEsm9iC0WzZazG6nLgwHLYdlZZ+3gt8JIZeD0wD3ga2Czp14AzR53/\nKfCrNfJ5uaSNku4B/pai0+4DFH0tl3azwE0yMIRrbU1JkKjB9v+maL+dR9Gz/jDFl3cJxZdvPsWI\nw93AD4DVwJ9N8Fo3AdeUea3ixV/sGWU5HqXo8f8tXvolxPbjwPEUIyqPU4xMHG9700TKNE6fpOgU\nfYailnPNqPMXAldK+pmk/1KRz7MUw8zzyn9H3ucngMMkndbNQjep32sS6tCxGWOQdCRwoe23l/vn\nANj+bKMFm2bK+wpusH1Qw0XpmYMP3sErls+vlXavfTassr24x0V6idQkJuYXs9ZK63lxp2BEbcM1\nt6ZkdCOiQW64v6GOBImJeQRY2LK/D5MzchDTjA1b+ztGJEhM0J3AAeUsw0coZre9u9kixdQkhjqO\nEDcrfRITUE6M+jDFPPj7gGvbzHyMCZJ0NcUclddIWi/pjKbL1AsGhl1va0pqEhNUTq9e3nQ5pivb\n72q6DJOl32sSCRIRDSomUyVIRESFYSdIREQbqUlERCUjtnpm08WolNGN7SBpSedUsT2m+2c8UpOo\nszUlQWL7TOtf4D4xzT9jMeQZtbampLkR0aBiZar+/lvdV0Fi/itmetHC2U0Xo7Z9F8xi8cFz+nxS\n7Ys9cPfOnRP1kTnszMv0iin1Gb/As/zcW2q3D9JxOQ6LFs7meysWdk4YE/b2vauWsoxuuMPfqp3W\nVqNNiTr6KkhEDKLh1CQioh0jfu7+/hr2d+kiprl0XEZER0OZlh0R7RgxlJpERFQZzuhGRLRTTMtO\nkIiINqbCDV4JEhENsun7yVT9XbqIaU8M19w65iTNkfQ9SXdJWivp02OkOV3SY5LWlNsHOuWbmkRE\ng0xXaxJbgLfY3ixpNnCrpBtt3z4q3TW2P1w30wSJiIZ1q+PSxTM7N5e7s8ttu2+OS3MjokFGDLve\nVoekmZLWABuBm2zfMUay/yzpbknXSep4R2WCRETDhphRawPmS1rZsr1kQR7bQ7YPoXiq3BGSRj9s\n+Z+ARbZfB9wEXNmpfGluRDRonEOgm+o+Vdz2zyR9GzgGuKfl+OMtyb4I/K9OeaUmEdGg4gleM2pt\nnUjaQ9Ju5eudgN8B7h+VZq+W3RMonkBXKTWJiIZ1cWWqvYArJc2kqABca/sGSRcBK20vAz4q6QRg\nG/AEcHqnTBMkIhpkq2v3bti+Gzh0jOPnt7w+BzhnPPkmSEQ0rN9nXCZIRDSoWHQm60lERFtZCDci\nKhhyF2hEtDcy47KfJUhENCwL4UZEW8V6EqlJRESFNDcioq2iTyLNjYiokAcGR0RbRmwbzhBoRFTI\njMuIaCujGxHRUTouI6KtzLiMiI7SJxERbRXL1yVIREQ7zhBoRFTIojMR0VGaGxHR1lTok+jpAK2k\nYyT9UNI6SWf38loRU1U3H/PXCz2rSZRr/3+O4gEh64E7JS2zfW+vrhkx1Qz6PIkjgHW2fwwg6e+B\nE4EEiYgRhm0DPONyAfBwy/564Dd6eL2IKWcq9Ek03nFZPhl5CcC+CxovTsSk6/cg0ct6ziPAwpb9\nfcpjL2J7qe3FthfvsXt/TyqJ6LaRPol+7rjsZZC4EzhA0qsl7QCcCizr4fUipiRbtbam9Kx+b3ub\npA8DK4CZwJdsr+3V9SKmqoGecWl7ObC8l9eImMrs7vVJSJoD3AzsSPHdvs72BaPS7AhcBRwOPA68\n0/ZPqvJNT2FEo8TQcNda/VuAt9jeLGk2cKukG23f3pLmDOBJ2/tLOhX4C+CdVZn29wBtxADoVp+E\nC5vL3dnl5lHJTgSuLF9fB7xVUmXmCRIRDRqZJ1FzdGO+pJUt25LR+UmaKWkNsBG4yfYdo5L8Yv6S\n7W3AU8DuVWVMcyOiSS76JWraZHtxZXb2EHCIpN2A6yUdZPue7SliahIRDRtGtbbxsP0z4NvAMaNO\n/WL+kqRZwK4UHZhtJUhENMh0r09C0h5lDQJJO1HcXHn/qGTLgN8vX58M/KtdXZdJcyOiUV2dTbkX\ncGV5B/YM4FrbN0i6CFhpexlwOfAVSeuAJygmOVZKkIho2PBwd4KE7buBQ8c4fn7L6xeAU8aTb4JE\nRINsGp1yXUeCRETD+v0u0ASJiIaNYwi0EQkSEQ1LcyMi2jLN3gZeR4JERMP6vLWRIBHRKIO7NATa\nKwkSEQ2bss0NSS+r+kHbT3e/OBGDZyqPbqylaC61hrmRfQP79rBcEQNh5N6NftY2SNhe2O5cRHSJ\ngT4PErXuApV0qqRzy9f7SDq8t8WKGBx2va0pHYOEpEuA3wbeWx56Dri0l4WKGCiuuTWkzujGG20f\nJun7ALafKJ+jERHbTdNiCHSrpBmUsUzS7sBwT0sVMSimwF2gdfokPgd8HdhD0qeBWymW4Y6Ibpjq\nzQ3bV0laBbytPHTK9i6sGRGt+rsmUXfG5UxgK0U8y7qYEd3U55Op6oxufAq4Gtib4sng/0fSOb0u\nWMTAmOrNDeB9wKG2nwOQ9Bng+8Bne1mwiIEwTW7w2jAq3azyWER0Q583N6pu8PpriuI/AayVtKLc\nPxq4c3KKFzEA+nwItKomMTKCsRb4Zsvx28dIGxETpKlak7B9+WQWJGIgNdwpWUfHPglJ+wGfAV4L\nzBk5bvvAHpYrYkCo75sbdeY8XAF8mWLGx7HAtcA1PSxTxGDp8yHQOkFiZ9srAGw/aPs8imAREd0w\nXHNrSJ0h0C3lDV4PSvoQxaPL5/W2WBEDYposOvNHwC7AR4E3AR8E/qCXhYoYJHK9rWM+0kJJ35Z0\nr6S1kj42RpqjJD0laU25nT9WXq3q3OB1R/nyGX658ExEdEv3+hu2AWfZXi1pHrBK0k227x2V7hbb\nx9fNtGoy1fVUFN/279W9SF33ProHh194ZrezjRazlz/WdBGmvaGP3trIdW1voJwNbfsZSfcBC4DR\nQWJcqmoSl2xPxhFRzzgmU82XtLJlf6ntpWPmKS0CDgXuGOP0kZLuAh4FPml7bdVFqyZTfatTiSOi\nC+p3XG6yvbhTIklzKRaK+vgYz8dZDbzK9mZJxwH/CBxQlV/WhohokunqEKik2RQB4mu2v/GSy9lP\n295cvl4OzJY0vyrPBImIhnVxdEPA5cB9tv+qTZo9y3RIOoIiBjxelW/tZ4FK2tH2lrrpI6Km7o1u\nvIliBPIHktaUx86lfNqe7UuBk4EzJW0DngdOtauf6lHn3o0jKKLTrsC+kg4GPmD7IxN9JxHRoktB\nwvatdFgw0/YljHNQok5z42LgeMoqie27KB7WExHbqW5To8nbyes0N2bYfqhsxowY6lF5IgZPn0/L\nrhMkHi6bHJY0E/gI8EBvixUxQKb6ehLAmRRNjn2BnwL/Uh6LiC5Qnz8Pr869GxuBUyehLBGDp+H+\nhjrqjG5cxhgVIttLelKiiEEz1YMERfNixBzgPwEP96Y4EQNoqgcJ2y9aqk7SVygeGhwRXdDvzY2J\nTMt+NfDKbhckIvpTnT6JJ/llhWgGxcN6zu5loSIGSp/XJCqDRHkjyMEU61oCDHea5x0R4+D+HwKt\nbG6UAWG57aFyS4CI6LZpsKT+GkmH9rwkEQNITOF7NyTNsr2NYgmsOyU9CDxL8b5s+7BJKmPE9Nbn\n9fOqPonvAYcBJ0xSWSIGzxSfcSkonto1SWWJGExTOEjsIekT7U62Wx4rIsan30c3qoLETGAuHVa6\niYjtNIVrEhtsXzRpJYkYRA0Pb9bRsU8iInprKndcvnXSShExyKZqkLD9xGQWJGJQTeWaRERMhgSJ\niGin6SnXdSRIRDQtQSIiqqQmERHVEiQiolKfB4mJrHEZEd3SxWeBSloo6duS7pW0VtLHxkgjSRdL\nWifpbkkdl3xITSKiad2rSWwDzrK9WtI8YJWkm2zf25LmWOCAcvsN4PPlv22lJhHRMA3X2zqxvcH2\n6vL1M8B9wIJRyU4ErnLhdmA3SXtV5ZuaRETDxjG6MV/Sypb9pbaXjpmntIhiVbk7Rp1awIsfrrW+\nPLah3UUTJCKaNL67QDfZXtwpkaS5wNeBj9t+euKFKyRIRDSti6MbkmZTBIiv2f7GGEkeARa27O/D\nLx+ZMab0SUQ0qJurZZfPybkcuK9i5bhlwPvKUY43AE/ZbtvUgB7WJCR9CTge2Gj7oF5dJ2LK615N\n4k3Ae4EfSFpTHjsX2BfA9qXAcuA4YB3wHPD+Tpn2srlxBXAJcFUPrxEx5alLz7yyfSsdFosqH7D1\n38aTb8+ChO2byx7WiGhnCjzmLx2XEU3r82nZjQcJSUuAJQCz57684dJETL5+vwu08dEN20ttL7a9\neNacXZouTsTk6/MHBjdek4gYaFNgZaqe1SQkXQ3cBrxG0npJZ/TqWhFT2qDWJGy/q1d5R0wXI5Op\n+lmaGxEN03B/R4kEiYgmTfHH/EXEJMhkqoiolppERFRJx2VEtGegSzd49UqCRETD0icREW1lnkRE\nVLPT3IiIaqlJRES1BImIqJKaRES0ZyD3bkRElQyBRkS1jG5ERJX0SUREe7lVPCKqFDMu+ztKJEhE\nNC0dlxFRJTWJiGjP7vt5Eo0/nCdi0Mn1tlp5SV+StFHSPW3OHyXpKUlryu38TnmmJhHRtO42N64A\nLgGuqkhzi+3j62aYIBHRpC4/Vdz2zZIWdS/HNDcimjeypkSnrXuOlHSXpBsl/XqnxKlJRDSt/vd/\nvqSVLftLbS8d59VWA6+yvVnSccA/AgdU/UCCRETDxjEEusn24u25lu2nW14vl/R3kubb3tTuZxIk\nIppkYGjyhkAl7Qn81LYlHUHR5fB41c8kSEQ0SLirk6kkXQ0cRdE0WQ9cAMwGsH0pcDJwpqRtwPPA\nqXZ1ARIkIprWxSBh+10dzl9CMURaW4JERNMyLTsi2jK5wSsiquUGr4ioliAREW3ZMNzf7Y0EiYim\n9XeMSJCIaFr6JCKiWoJERLSVJ3iNz/Ob1m9a84WzHmq6HOMwH2h7Y0xf+kLTBRi3qfcZw6vqJ+36\nbeBd11dBwvYeTZdhPCSt3N678qLaQHzGCRIR0ZaBof4e3kiQiGiUwQkS09l4VwWK8Zv+n3GfNzey\nxuV26LR0mKShctnyeyT9g6SdJ3qtcin0G8rXJ0g6uyLtbpL+cALXuFDSJ+seH5XmCkknj+Nai9ot\n+95qAsuzTS0joxt1toYkSPTW87YPsX0Q8HPgQ60nVRj3/4HtZbb/vCLJbsC4g0Q0ZPIXwh2XBInJ\ncwuwf/kX9IeSrgLuARZKOlrSbZJWlzWOuQCSjpF0v6TVwO+NZCTpdEmXlK9fKen6cvXjuyS9Efhz\nYL+yFvOXZbo/lnSnpLslfbolr09JekDSrcBrOr0JSR8s87lL0tdH1Y7eJmllmd/xZfqZkv6y5dr/\ndXs/yGknQSIkzQKOBX5QHjoA+Dvbvw48C5wHvM32YcBK4BOS5gCXAb8LHA7s2Sb7i4H/a/tg4DBg\nLXA28GBZi/ljSUeX1zwCOAQ4XNKbJR0OnFoeOw54fY238w3bry+vdx9wRsu5ReU13gFcWr6HM4Cn\nbL++zP+Dkl5d4zqDwYahoXpbQ9Jx2Vs7SVpTvr4FuBzYG3jI9u3l8TcArwW+KwlgB+A24NeAf7P9\nIwBJXwWWjHGNtwDvA7A9BDwl6eWj0hxdbt8v9+dSBI15wPW2nyuvsazGezpI0p9RNGnmAitazl1r\nexj4kaQfl+/haOB1Lf0Vu5bXfqDGtQZDn3dcJkj01vO2D2k9UAaCZ1sPATeNXptQ0ot+bjsJ+Kzt\nF823lPTxCeR1BXCS7bsknU6x6OqI0b/tLq/9EdutwYRuP2VqSuvzIJHmRvNuB94kaX8ASbtIOhC4\nH1gkab8yXbsFTr8FnFn+7ExJuwLPUNQSRqwA/qClr2OBpF8BbgZOkrSTpHkUTZtO5gEbJM0GTht1\n7hRJM8oy/yrww/LaZ5bpkXSgpF1qXGdA1BzZaHB0IzWJhtl+rPyLfLWkHcvD59l+QNIS4JuSnqNo\nrswbI4uPAUslnQEMAWfavk3Sd8shxhvLfon/ANxW1mQ2A++xvVrSNcBdwEbgzhpF/lPgDuCx8t/W\nMv0/4HvAy4AP2X5B0hcp+ipWq7j4Y8BJ9T6dAWBwn0+mUocl9yOih3adtYePfFm9mLniyS+uauI+\nltQkIprW53+oEyQimjQyBNrHEiQiGuYshBsR7WXRmYioMgWWr8s8iYimebjeVoOkL0na2O4O2/Km\nwoslrSvvpTmsU54JEhENMuBh19pqugI4puL8sRTT4g+gmOb/+U4ZJkhENMnuak3C9s3AExVJTgSu\ncuF2YDdJe1XlmT6JiIZ5codAFwAPt+yvL49taPcDCRIRDXqGJ1f8i6+bXzP5HEkrW/aXTsbKXQkS\nEQ2yXdV/0AuPAAtb9vcpj7WVPomIwbIMeF85yvEGigWB2jY1IDWJiGlF0tUUa3zMl7QeuACYDWD7\nUmA5xSpk64DngPd3zDN3gUZElTQ3IqJSgkREVEqQiIhKCRIRUSlBIiIqJUhERKUEiYiolCAREZX+\nP5J3mS9q2rd8AAAAAElFTkSuQmCC\n", 107 | "text/plain": [ 108 | "" 109 | ] 110 | }, 111 | "metadata": {}, 112 | "output_type": "display_data" 113 | } 114 | ], 115 | "source": [ 116 | "from sklearn.metrics import confusion_matrix\n", 117 | "import matplotlib.pyplot as plt\n", 118 | "\n", 119 | "y_test = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]\n", 120 | "y_pred = [0, 1, 0, 0, 0, 0, 0, 1, 1, 1]\n", 121 | "confusion_matrix = confusion_matrix(y_test, y_pred)\n", 122 | "print(confusion_matrix)\n", 123 | "plt.matshow(confusion_matrix)\n", 124 | "plt.title('Confusion matrix')\n", 125 | "plt.colorbar()\n", 126 | "plt.ylabel('True label')\n", 127 | "plt.xlabel('Predicted label')\n", 128 | "plt.show()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [] 139 | } 140 | ], 141 | "metadata": { 142 | "kernelspec": { 143 | "display_name": "Python 2", 144 | "language": "python", 145 | "name": "python2" 146 | }, 147 | "language_info": { 148 | "codemirror_mode": { 149 | "name": "ipython", 150 | "version": 2 151 | }, 152 | "file_extension": ".py", 153 | "mimetype": "text/x-python", 154 | "name": "python", 155 | "nbconvert_exporter": "python", 156 | "pygments_lexer": "ipython2", 157 | "version": "2.7.12" 158 | } 159 | }, 160 | "nbformat": 4, 161 | "nbformat_minor": 2 162 | } 163 | -------------------------------------------------------------------------------- /chapter06/ed2-ch6-s2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 19, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Fitting 3 folds for each of 576 candidates, totalling 1728 fits\n" 15 | ] 16 | }, 17 | { 18 | "name": "stderr", 19 | "output_type": "stream", 20 | "text": [ 21 | "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 4.5s\n", 22 | "[Parallel(n_jobs=-1)]: Done 192 tasks | elapsed: 23.5s\n", 23 | "[Parallel(n_jobs=-1)]: Done 442 tasks | elapsed: 57.2s\n", 24 | "[Parallel(n_jobs=-1)]: Done 792 tasks | elapsed: 1.8min\n", 25 | "[Parallel(n_jobs=-1)]: Done 1242 tasks | elapsed: 2.9min\n", 26 | "[Parallel(n_jobs=-1)]: Done 1728 out of 1728 | elapsed: 6.0min finished\n" 27 | ] 28 | }, 29 | { 30 | "name": "stdout", 31 | "output_type": "stream", 32 | "text": [ 33 | "Best score: 0.983\n", 34 | "Best parameters set:\n", 35 | "\tclf__C: 10\n", 36 | "\tclf__penalty: 'l2'\n", 37 | "\tvect__max_df: 0.25\n", 38 | "\tvect__max_features: 5000\n", 39 | "\tvect__ngram_range: (1, 2)\n", 40 | "\tvect__stop_words: None\n", 41 | "\tvect__use_idf: True\n", 42 | "Accuracy: 0.983488872936\n", 43 | "Precision: 0.99375\n", 44 | "Recall: 0.878453038674\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "import pandas as pd\n", 50 | "from sklearn.preprocessing import LabelEncoder\n", 51 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 52 | "from sklearn.linear_model.logistic import LogisticRegression\n", 53 | "from sklearn.grid_search import GridSearchCV\n", 54 | "from sklearn.pipeline import Pipeline\n", 55 | "from sklearn.model_selection import train_test_split\n", 56 | "from sklearn.metrics import precision_score, recall_score, accuracy_score\n", 57 | "\n", 58 | "\n", 59 | "pipeline = Pipeline([\n", 60 | " ('vect', TfidfVectorizer(stop_words='english')),\n", 61 | " ('clf', LogisticRegression())\n", 62 | "])\n", 63 | "parameters = {\n", 64 | " 'vect__max_df': (0.25, 0.5, 0.75),\n", 65 | " 'vect__stop_words': ('english', None),\n", 66 | " 'vect__max_features': (2500, 5000, None),\n", 67 | " 'vect__ngram_range': ((1, 1), (1, 2)),\n", 68 | " 'vect__use_idf': (True, False),\n", 69 | " 'clf__penalty': ('l1', 'l2'),\n", 70 | " 'clf__C': (0.01, 0.1, 1, 10),\n", 71 | "}\n", 72 | "\n", 73 | "if __name__ == \"__main__\":\n", 74 | " df = pd.read_csv('./SMSSpamCollection', delimiter='\\t', header=None)\n", 75 | " X = df[1].values\n", 76 | " y = df[0].values\n", 77 | " label_encoder = LabelEncoder()\n", 78 | " y = label_encoder.fit_transform(y)\n", 79 | " X_train, X_test, y_train, y_test = train_test_split(X, y)\n", 80 | " \n", 81 | " grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1, scoring='accuracy', cv=3)\n", 82 | " grid_search.fit(X_train, y_train)\n", 83 | " \n", 84 | " print('Best score: %0.3f' % grid_search.best_score_)\n", 85 | " print('Best parameters set:')\n", 86 | " best_parameters = grid_search.best_estimator_.get_params()\n", 87 | " for param_name in sorted(parameters.keys()):\n", 88 | " print('\\t%s: %r' % (param_name, best_parameters[param_name]))\n", 89 | " \n", 90 | " predictions = grid_search.predict(X_test)\n", 91 | " print('Accuracy: %s' % accuracy_score(y_test, predictions))\n", 92 | " print('Precision: %s' % precision_score(y_test, predictions))\n", 93 | " print('Recall: %s' % recall_score(y_test, predictions))" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": true 101 | }, 102 | "outputs": [], 103 | "source": [] 104 | } 105 | ], 106 | "metadata": { 107 | "kernelspec": { 108 | "display_name": "Python 2", 109 | "language": "python", 110 | "name": "python2" 111 | }, 112 | "language_info": { 113 | "codemirror_mode": { 114 | "name": "ipython", 115 | "version": 2 116 | }, 117 | "file_extension": ".py", 118 | "mimetype": "text/x-python", 119 | "name": "python", 120 | "nbconvert_exporter": "python", 121 | "pygments_lexer": "ipython2", 122 | "version": "2.7.12" 123 | } 124 | }, 125 | "nbformat": 4, 126 | "nbformat_minor": 2 127 | } 128 | -------------------------------------------------------------------------------- /chapter06/ed2-ch6-s3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "PhraseId 156060\n", 15 | "SentenceId 156060\n", 16 | "Phrase 156060\n", 17 | "Sentiment 156060\n", 18 | "dtype: int64\n" 19 | ] 20 | } 21 | ], 22 | "source": [ 23 | "import pandas as pd\n", 24 | "df = pd.read_csv('./train.tsv', header=0, delimiter='\\t')\n", 25 | "print(df.count())" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 5, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | " PhraseId SentenceId Phrase \\\n", 40 | "0 1 1 A series of escapades demonstrating the adage ... \n", 41 | "1 2 1 A series of escapades demonstrating the adage ... \n", 42 | "2 3 1 A series \n", 43 | "3 4 1 A \n", 44 | "4 5 1 series \n", 45 | "\n", 46 | " Sentiment \n", 47 | "0 1 \n", 48 | "1 2 \n", 49 | "2 2 \n", 50 | "3 2 \n", 51 | "4 2 \n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "print(df.head())" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 6, 62 | "metadata": { 63 | "collapsed": false 64 | }, 65 | "outputs": [ 66 | { 67 | "name": "stdout", 68 | "output_type": "stream", 69 | "text": [ 70 | "0 A series of escapades demonstrating the adage ...\n", 71 | "1 A series of escapades demonstrating the adage ...\n", 72 | "2 A series\n", 73 | "3 A\n", 74 | "4 series\n", 75 | "5 of escapades demonstrating the adage that what...\n", 76 | "6 of\n", 77 | "7 escapades demonstrating the adage that what is...\n", 78 | "8 escapades\n", 79 | "9 demonstrating the adage that what is good for ...\n", 80 | "Name: Phrase, dtype: object\n" 81 | ] 82 | } 83 | ], 84 | "source": [ 85 | "print(df['Phrase'].head(10))" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 7, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [ 95 | { 96 | "name": "stdout", 97 | "output_type": "stream", 98 | "text": [ 99 | "count 156060.000000\n", 100 | "mean 2.063578\n", 101 | "std 0.893832\n", 102 | "min 0.000000\n", 103 | "25% 2.000000\n", 104 | "50% 2.000000\n", 105 | "75% 3.000000\n", 106 | "max 4.000000\n", 107 | "Name: Sentiment, dtype: float64\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "print(df['Sentiment'].describe())" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 8, 118 | "metadata": { 119 | "collapsed": false 120 | }, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "2 79582\n", 127 | "3 32927\n", 128 | "1 27273\n", 129 | "4 9206\n", 130 | "0 7072\n", 131 | "Name: Sentiment, dtype: int64\n" 132 | ] 133 | } 134 | ], 135 | "source": [ 136 | "print(df['Sentiment'].value_counts())" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 9, 142 | "metadata": { 143 | "collapsed": false 144 | }, 145 | "outputs": [ 146 | { 147 | "name": "stdout", 148 | "output_type": "stream", 149 | "text": [ 150 | "2 0.509945\n", 151 | "3 0.210989\n", 152 | "1 0.174760\n", 153 | "4 0.058990\n", 154 | "0 0.045316\n", 155 | "Name: Sentiment, dtype: float64\n" 156 | ] 157 | } 158 | ], 159 | "source": [ 160 | "print(df['Sentiment'].value_counts()/df['Sentiment'].count())" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 19, 166 | "metadata": { 167 | "collapsed": false 168 | }, 169 | "outputs": [ 170 | { 171 | "name": "stdout", 172 | "output_type": "stream", 173 | "text": [ 174 | "Fitting 3 folds for each of 24 candidates, totalling 72 fits\n" 175 | ] 176 | }, 177 | { 178 | "name": "stderr", 179 | "output_type": "stream", 180 | "text": [ 181 | "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 1.5min\n", 182 | "[Parallel(n_jobs=-1)]: Done 72 out of 72 | elapsed: 3.3min finished\n" 183 | ] 184 | }, 185 | { 186 | "name": "stdout", 187 | "output_type": "stream", 188 | "text": [ 189 | "Best score: 0.617\n", 190 | "Best parameters set:\n", 191 | "tclf__C: 10\n", 192 | "tvect__max_df: 0.25\n", 193 | "tvect__ngram_range: (1, 2)\n", 194 | "tvect__use_idf: False\n" 195 | ] 196 | } 197 | ], 198 | "source": [ 199 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 200 | "from sklearn.linear_model.logistic import LogisticRegression\n", 201 | "from sklearn.model_selection import train_test_split\n", 202 | "from sklearn.metrics import classification_report, accuracy_score, confusion_matrix\n", 203 | "from sklearn.pipeline import Pipeline\n", 204 | "from sklearn.model_selection import GridSearchCV\n", 205 | "\n", 206 | "\n", 207 | "df = pd.read_csv('./train.tsv', header=0, delimiter='\\t')\n", 208 | "X, y = df['Phrase'], df['Sentiment'].as_matrix()\n", 209 | "X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5)\n", 210 | "grid_search = main(X_train, y_train)\n", 211 | "pipeline = Pipeline([\n", 212 | " ('vect', TfidfVectorizer(stop_words='english')),\n", 213 | " ('clf', LogisticRegression())\n", 214 | "])\n", 215 | "parameters = {\n", 216 | " 'vect__max_df': (0.25, 0.5),\n", 217 | " 'vect__ngram_range': ((1, 1), (1, 2)),\n", 218 | " 'vect__use_idf': (True, False),\n", 219 | " 'clf__C': (0.1, 1, 10),\n", 220 | "}\n", 221 | "grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1, scoring='accuracy')\n", 222 | "grid_search.fit(X_train, y_train)\n", 223 | "print('Best score: %0.3f' % grid_search.best_score_)\n", 224 | "print('Best parameters set:')\n", 225 | "best_parameters = grid_search.best_estimator_.get_params()\n", 226 | "for param_name in sorted(parameters.keys()):\n", 227 | " print('t%s: %r' % (param_name, best_parameters[param_name]))" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 20, 233 | "metadata": { 234 | "collapsed": false 235 | }, 236 | "outputs": [ 237 | { 238 | "name": "stdout", 239 | "output_type": "stream", 240 | "text": [ 241 | "Accuracy: 0.636255286428\n", 242 | "Confusion Matrix:\n", 243 | "[[ 1124 1725 628 65 10]\n", 244 | " [ 923 6049 6132 583 34]\n", 245 | " [ 197 3131 32658 3640 137]\n", 246 | " [ 15 398 6530 8234 1301]\n", 247 | " [ 3 43 530 2358 1582]]\n", 248 | "Classification Report:\n", 249 | " precision recall f1-score support\n", 250 | "\n", 251 | " 0 0.50 0.32 0.39 3552\n", 252 | " 1 0.53 0.44 0.48 13721\n", 253 | " 2 0.70 0.82 0.76 39763\n", 254 | " 3 0.55 0.50 0.53 16478\n", 255 | " 4 0.52 0.35 0.42 4516\n", 256 | "\n", 257 | "avg / total 0.62 0.64 0.62 78030\n", 258 | "\n" 259 | ] 260 | } 261 | ], 262 | "source": [ 263 | "predictions = grid_search.predict(X_test)\n", 264 | "\n", 265 | "print('Accuracy: %s' % accuracy_score(y_test, predictions))\n", 266 | "print('Confusion Matrix:')\n", 267 | "print(confusion_matrix(y_test, predictions))\n", 268 | "print('Classification Report:')\n", 269 | "print(classification_report(y_test, predictions))" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": { 276 | "collapsed": true 277 | }, 278 | "outputs": [], 279 | "source": [] 280 | } 281 | ], 282 | "metadata": { 283 | "kernelspec": { 284 | "display_name": "Python 2", 285 | "language": "python", 286 | "name": "python2" 287 | }, 288 | "language_info": { 289 | "codemirror_mode": { 290 | "name": "ipython", 291 | "version": 2 292 | }, 293 | "file_extension": ".py", 294 | "mimetype": "text/x-python", 295 | "name": "python", 296 | "nbconvert_exporter": "python", 297 | "pygments_lexer": "ipython2", 298 | "version": "2.7.12" 299 | } 300 | }, 301 | "nbformat": 4, 302 | "nbformat_minor": 2 303 | } 304 | -------------------------------------------------------------------------------- /chapter06/ed2-ch6-s4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "0.0\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "import numpy as np\n", 20 | "from sklearn.metrics import hamming_loss, jaccard_similarity_score\n", 21 | "\n", 22 | "print(hamming_loss(np.array([[0.0, 1.0], [1.0, 1.0]]), np.array([[0.0, 1.0], [1.0, 1.0]])))" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 4, 28 | "metadata": { 29 | "collapsed": false 30 | }, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "0.25\n" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "print(hamming_loss(np.array([[0.0, 1.0], [1.0, 1.0]]), np.array([[1.0, 1.0], [1.0, 1.0]])))" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 5, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "0.5\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "print(hamming_loss(np.array([[0.0, 1.0], [1.0, 1.0]]), np.array([[1.0, 1.0], [0.0, 1.0]])))" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 6, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "1.0\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "print(jaccard_similarity_score(np.array([[0.0, 1.0], [1.0, 1.0]]), np.array([[0.0, 1.0], [1.0, 1.0]])))" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 7, 85 | "metadata": { 86 | "collapsed": false 87 | }, 88 | "outputs": [ 89 | { 90 | "name": "stdout", 91 | "output_type": "stream", 92 | "text": [ 93 | "0.75\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "print(jaccard_similarity_score(np.array([[0.0, 1.0], [1.0, 1.0]]), np.array([[1.0, 1.0], [1.0, 1.0]])))" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 8, 104 | "metadata": { 105 | "collapsed": false 106 | }, 107 | "outputs": [ 108 | { 109 | "name": "stdout", 110 | "output_type": "stream", 111 | "text": [ 112 | "0.5\n" 113 | ] 114 | } 115 | ], 116 | "source": [ 117 | "print(jaccard_similarity_score(np.array([[0.0, 1.0], [1.0, 1.0]]), np.array([[1.0, 1.0], [0.0, 1.0]])))" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": { 124 | "collapsed": true 125 | }, 126 | "outputs": [], 127 | "source": [] 128 | } 129 | ], 130 | "metadata": { 131 | "kernelspec": { 132 | "display_name": "Python 2", 133 | "language": "python", 134 | "name": "python2" 135 | }, 136 | "language_info": { 137 | "codemirror_mode": { 138 | "name": "ipython", 139 | "version": 2 140 | }, 141 | "file_extension": ".py", 142 | "mimetype": "text/x-python", 143 | "name": "python", 144 | "nbconvert_exporter": "python", 145 | "pygments_lexer": "ipython2", 146 | "version": "2.7.12" 147 | } 148 | }, 149 | "nbformat": 4, 150 | "nbformat_minor": 2 151 | } 152 | -------------------------------------------------------------------------------- /chapter08/ad.DOCUMENTATION: -------------------------------------------------------------------------------- 1 | 1. Title of Database: Internet advertisements 2 | 3 | 2. Sources: 4 | (a) Creator & donor: Nicholas Kushmerick 5 | (c) Generated: April-July 1998 6 | 7 | 3. Past Usage: 8 | N. Kushmerick (1999). "Learning to remove Internet advertisements", 9 | 3rd Int Conf Autonomous Agents. Available at 10 | www.cs.ucd.ie/staff/nick/research/download/kushmerick-aa99.ps.gz. 11 | Accuracy >97% using C4.5rules in predicting whether an image is an 12 | advertisement. 13 | 14 | 4. This dataset represents a set of possible advertisements on 15 | Internet pages. The features encode the geometry of the image (if 16 | available) as well as phrases occuring in the URL, the image's URL and 17 | alt text, the anchor text, and words occuring near the anchor text. 18 | The task is to predict whether an image is an advertisement ("ad") or 19 | not ("nonad"). 20 | 21 | 5. Number of Instances: 3279 (2821 nonads, 458 ads) 22 | 23 | 6. Number of Attributes: 1558 (3 continous; others binary; this is the 24 | "STANDARD encoding" mentioned in the [Kushmerick, 99].) 25 | One or more of the three continous features are missing in 28% 26 | of the instances; missing values should be interpreted as "unknown". 27 | 28 | 7. See [Kushmerick, 99] for details of the attributes; in 29 | ".names" format: 30 | 31 | height: continuous. | possibly missing 32 | width: continuous. | possibly missing 33 | aratio: continuous. | possibly missing 34 | local: 0,1. 35 | | 457 features from url terms, each of the form "url*term1+term2..."; 36 | | for example: 37 | url*images+buttons: 0,1. 38 | ... 39 | | 495 features from origurl terms, in same form; for example: 40 | origurl*labyrinth: 0,1. 41 | ... 42 | | 472 features from ancurl terms, in same form; for example: 43 | ancurl*search+direct: 0,1. 44 | ... 45 | | 111 features from alt terms, in same form; for example: 46 | alt*your: 0,1. 47 | ... 48 | | 19 features from caption terms 49 | caption*and: 0,1. 50 | ... 51 | 52 | 8. Missing Attribute Values: how many per each attribute? 53 | 28% of instances are missing some of the continous attributes. 54 | 55 | 9. Class Distribution: number of instances per class 56 | 2821 nonads, 458 ads. 57 | -------------------------------------------------------------------------------- /chapter08/ad.names: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Mastering-Machine-Learning-with-scikit-learn-Second-Edition/db39c2f407fbf515f67b368e0af5a7f042c0f0f7/chapter08/ad.names -------------------------------------------------------------------------------- /chapter08/ed2-ch8-s1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "from sklearn.tree import DecisionTreeClassifier\n", 13 | "from sklearn.model_selection import train_test_split\n", 14 | "from sklearn.metrics import classification_report\n", 15 | "from sklearn.pipeline import Pipeline\n", 16 | "from sklearn.grid_search import GridSearchCV\n", 17 | "\n", 18 | "df = pd.read_csv('./ad.data', header=None)\n", 19 | "\n", 20 | "explanatory_variable_columns = set(df.columns.values)\n", 21 | "explanatory_variable_columns.remove(len(df.columns.values)-1)\n", 22 | "response_variable_column = df[len(df.columns.values)-1] # The last column describes the classes\n", 23 | "\n", 24 | "y = [1 if e == 'ad.' else 0 for e in response_variable_column]\n", 25 | "X = df[list(explanatory_variable_columns)].copy()\n", 26 | "X.replace(to_replace=' *?', value=-1, regex=True, inplace=True)\n", 27 | "X_train, X_test, y_train, y_test = train_test_split(X, y)\n", 28 | "\n", 29 | "pipeline = Pipeline([\n", 30 | " ('clf', DecisionTreeClassifier(criterion='entropy'))\n", 31 | "])\n", 32 | "parameters = {\n", 33 | " 'clf__max_depth': (150, 155, 160),\n", 34 | " 'clf__min_samples_split': (2, 3),\n", 35 | " 'clf__min_samples_leaf': (1, 2, 3)\n", 36 | "}\n", 37 | "\n", 38 | "grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1, scoring='f1')\n", 39 | "grid_search.fit(X_train, y_train)\n", 40 | "\n", 41 | "best_parameters = grid_search.best_estimator_.get_params()\n", 42 | "print('Best score: %0.3f' % grid_search.best_score_)\n", 43 | "print('Best parameters set:')\n", 44 | "for param_name in sorted(parameters.keys()):\n", 45 | " print('t%s: %r' % (param_name, best_parameters[param_name]))\n", 46 | "\n", 47 | "predictions = grid_search.predict(X_test)\n", 48 | "print(classification_report(y_test, predictions))" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 12, 54 | "metadata": { 55 | "collapsed": false 56 | }, 57 | "outputs": [ 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "Fitting 3 folds for each of 18 candidates, totalling 54 fits\n" 63 | ] 64 | }, 65 | { 66 | "name": "stderr", 67 | "output_type": "stream", 68 | "text": [ 69 | "[Parallel(n_jobs=-1)]: Done 42 tasks | elapsed: 5.4s\n", 70 | "[Parallel(n_jobs=-1)]: Done 54 out of 54 | elapsed: 6.6s finished\n" 71 | ] 72 | }, 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "Best score: 0.887\n", 78 | "Best parameters set:\n", 79 | "tclf__max_depth: 150\n", 80 | "tclf__min_samples_leaf: 1\n", 81 | "tclf__min_samples_split: 3\n", 82 | " precision recall f1-score support\n", 83 | "\n", 84 | " 0 0.98 0.99 0.98 717\n", 85 | " 1 0.92 0.83 0.87 103\n", 86 | "\n", 87 | "avg / total 0.97 0.97 0.97 820\n", 88 | "\n" 89 | ] 90 | } 91 | ], 92 | "source": [] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [] 102 | } 103 | ], 104 | "metadata": { 105 | "kernelspec": { 106 | "display_name": "Python 2", 107 | "language": "python", 108 | "name": "python2" 109 | }, 110 | "language_info": { 111 | "codemirror_mode": { 112 | "name": "ipython", 113 | "version": 2 114 | }, 115 | "file_extension": ".py", 116 | "mimetype": "text/x-python", 117 | "name": "python", 118 | "nbconvert_exporter": "python", 119 | "pygments_lexer": "ipython2", 120 | "version": "2.7.12" 121 | } 122 | }, 123 | "nbformat": 4, 124 | "nbformat_minor": 2 125 | } 126 | -------------------------------------------------------------------------------- /chapter09/ed2-ch9-s1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 28, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Original sample: [60 84 64 59 58 30 1 97 58 34]\n", 15 | "Sample mean: 54.5\n", 16 | "Number of bootstrap re-samples: 100\n", 17 | "Example re-sample: [30 59 97 58 60 84 58 34 64 58]\n", 18 | "Mean of re-samples' means: 54.183\n" 19 | ] 20 | } 21 | ], 22 | "source": [ 23 | "import numpy as np\n", 24 | "\n", 25 | "# Sample 10 integers\n", 26 | "sample = np.random.randint(low=1, high=100, size=10)\n", 27 | "print('Original sample: %s' % sample)\n", 28 | "print('Sample mean: %s' % sample.mean())\n", 29 | "\n", 30 | "# Bootstrap re-sample 100 times by re-sampling with replacement from the original sample\n", 31 | "resamples = [np.random.choice(sample, size=sample.shape) for i in range(100)]\n", 32 | "print('Number of bootstrap re-samples: %s' % len(resamples))\n", 33 | "print('Example re-sample: %s' % resamples[0])\n", 34 | "\n", 35 | "resample_means = np.array([resample.mean() for resample in resamples])\n", 36 | "print('Mean of re-samples\\' means: %s' % resample_means.mean())" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "collapsed": true 44 | }, 45 | "outputs": [], 46 | "source": [] 47 | } 48 | ], 49 | "metadata": { 50 | "kernelspec": { 51 | "display_name": "Python 2", 52 | "language": "python", 53 | "name": "python2" 54 | }, 55 | "language_info": { 56 | "codemirror_mode": { 57 | "name": "ipython", 58 | "version": 2 59 | }, 60 | "file_extension": ".py", 61 | "mimetype": "text/x-python", 62 | "name": "python", 63 | "nbconvert_exporter": "python", 64 | "pygments_lexer": "ipython2", 65 | "version": "2.7.12" 66 | } 67 | }, 68 | "nbformat": 4, 69 | "nbformat_minor": 2 70 | } 71 | -------------------------------------------------------------------------------- /chapter09/ed2-ch9-s2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 97, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | " precision recall f1-score support\n", 15 | "\n", 16 | " 0 0.73 0.66 0.69 127\n", 17 | " 1 0.68 0.75 0.71 123\n", 18 | "\n", 19 | "avg / total 0.71 0.70 0.70 250\n", 20 | "\n" 21 | ] 22 | } 23 | ], 24 | "source": [ 25 | "from sklearn.tree import DecisionTreeClassifier\n", 26 | "from sklearn.ensemble import RandomForestClassifier\n", 27 | "from sklearn.datasets import make_classification\n", 28 | "from sklearn.model_selection import train_test_split\n", 29 | "from sklearn.metrics import classification_report\n", 30 | "\n", 31 | "X, y = make_classification(\n", 32 | " n_samples=1000, n_features=100, n_informative=20, n_clusters_per_class=2, random_state=11)\n", 33 | "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=11)\n", 34 | "\n", 35 | "clf = DecisionTreeClassifier(random_state=11)\n", 36 | "clf.fit(X_train, y_train)\n", 37 | "predictions = clf.predict(X_test)\n", 38 | "print(classification_report(y_test, predictions))" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 98, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | " precision recall f1-score support\n", 53 | "\n", 54 | " 0 0.74 0.83 0.79 127\n", 55 | " 1 0.80 0.70 0.75 123\n", 56 | "\n", 57 | "avg / total 0.77 0.77 0.77 250\n", 58 | "\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "clf = RandomForestClassifier(n_estimators=10, random_state=11)\n", 64 | "clf.fit(X_train, y_train)\n", 65 | "predictions = clf.predict(X_test)\n", 66 | "print(classification_report(y_test, predictions))" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": true 74 | }, 75 | "outputs": [], 76 | "source": [] 77 | } 78 | ], 79 | "metadata": { 80 | "kernelspec": { 81 | "display_name": "Python 2", 82 | "language": "python", 83 | "name": "python2" 84 | }, 85 | "language_info": { 86 | "codemirror_mode": { 87 | "name": "ipython", 88 | "version": 2 89 | }, 90 | "file_extension": ".py", 91 | "mimetype": "text/x-python", 92 | "name": "python", 93 | "nbconvert_exporter": "python", 94 | "pygments_lexer": "ipython2", 95 | "version": "2.7.12" 96 | } 97 | }, 98 | "nbformat": 4, 99 | "nbformat_minor": 2 100 | } 101 | -------------------------------------------------------------------------------- /chapter09/ed2-ch9-s4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 61, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "from sklearn.model_selection import train_test_split\n", 13 | "from sklearn.neighbors import KNeighborsClassifier\n", 14 | "from sklearn.tree import DecisionTreeClassifier\n", 15 | "from sklearn.linear_model import LogisticRegression\n", 16 | "from sklearn.datasets import make_classification\n", 17 | "from sklearn.base import clone, BaseEstimator, TransformerMixin, ClassifierMixin\n", 18 | "\n", 19 | "\n", 20 | "class StackingClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):\n", 21 | " \n", 22 | " def __init__(self, classifiers):\n", 23 | " self.classifiers = classifiers\n", 24 | " self.meta_classifier = DecisionTreeClassifier()\n", 25 | "\n", 26 | " def fit(self, X, y):\n", 27 | " for clf in self.classifiers: \n", 28 | " clf.fit(X, y)\n", 29 | "\n", 30 | " self.meta_classifier.fit(self._get_meta_features(X), y) \n", 31 | " return self\n", 32 | "\n", 33 | " def _get_meta_features(self, X):\n", 34 | " probas = np.asarray([clf.predict_proba(X) for clf in self.classifiers])\n", 35 | " return np.concatenate(probas, axis=1)\n", 36 | " \n", 37 | " def predict(self, X):\n", 38 | " return self.meta_classifier.predict(self._get_meta_features(X))\n", 39 | "\n", 40 | " def predict_proba(self, X):\n", 41 | " return self.meta_classifier.predict_proba(self._get_meta_features(X))\n", 42 | "\n", 43 | "\n", 44 | "X, y = make_classification(\n", 45 | " n_samples=1000, n_features=50, n_informative=30, n_clusters_per_class=3, random_state=11)\n", 46 | "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=11)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 66, 52 | "metadata": { 53 | "collapsed": false 54 | }, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "Logistic regression accuracy: 0.816\n", 61 | "KNN accuracy: 0.836\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "lr = LogisticRegression()\n", 67 | "lr.fit(X_train, y_train)\n", 68 | "print('Logistic regression accuracy: %s' % lr.score(X_test, y_test))\n", 69 | "\n", 70 | "knn_clf = KNeighborsClassifier()\n", 71 | "knn_clf.fit(X_train, y_train)\n", 72 | "print('KNN accuracy: %s' % knn_clf.score(X_test, y_test))" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 65, 78 | "metadata": { 79 | "collapsed": false 80 | }, 81 | "outputs": [ 82 | { 83 | "name": "stdout", 84 | "output_type": "stream", 85 | "text": [ 86 | "Stacking classifier accuracy: 0.852\n" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "base_classifiers = [lr, knn_clf]\n", 92 | "stacking_clf = StackingClassifier(base_classifiers)\n", 93 | "stacking_clf.fit(X_train, y_train)\n", 94 | "print('Stacking classifier accuracy: %s' % stacking_clf.score(X_test, y_test))" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": { 101 | "collapsed": true 102 | }, 103 | "outputs": [], 104 | "source": [] 105 | } 106 | ], 107 | "metadata": { 108 | "kernelspec": { 109 | "display_name": "Python 2", 110 | "language": "python", 111 | "name": "python2" 112 | }, 113 | "language_info": { 114 | "codemirror_mode": { 115 | "name": "ipython", 116 | "version": 2 117 | }, 118 | "file_extension": ".py", 119 | "mimetype": "text/x-python", 120 | "name": "python", 121 | "nbconvert_exporter": "python", 122 | "pygments_lexer": "ipython2", 123 | "version": "2.7.12" 124 | } 125 | }, 126 | "nbformat": 4, 127 | "nbformat_minor": 2 128 | } 129 | -------------------------------------------------------------------------------- /chapter10/e2-ch10-s1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 6, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | " precision recall f1-score support\n", 15 | "\n", 16 | " 0 0.81 0.92 0.86 396\n", 17 | " 1 0.87 0.76 0.81 397\n", 18 | " 2 0.86 0.85 0.86 399\n", 19 | "\n", 20 | "avg / total 0.85 0.84 0.84 1192\n", 21 | "\n" 22 | ] 23 | } 24 | ], 25 | "source": [ 26 | "from sklearn.datasets import fetch_20newsgroups\n", 27 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 28 | "from sklearn.linear_model import Perceptron\n", 29 | "from sklearn.metrics import f1_score, classification_report\n", 30 | "\n", 31 | "categories = ['rec.sport.hockey', 'rec.sport.baseball', 'rec.autos']\n", 32 | "newsgroups_train = fetch_20newsgroups(subset='train', categories=categories, remove=('headers', 'footers', 'quotes'))\n", 33 | "newsgroups_test = fetch_20newsgroups(subset='test', categories=categories, remove=('headers', 'footers', 'quotes'))\n", 34 | "\n", 35 | "vectorizer = TfidfVectorizer()\n", 36 | "X_train = vectorizer.fit_transform(newsgroups_train.data)\n", 37 | "X_test = vectorizer.transform(newsgroups_test.data)\n", 38 | "clf = Perceptron(random_state=11)\n", 39 | "clf.fit(X_train, newsgroups_train.target )\n", 40 | "predictions = clf.predict(X_test)\n", 41 | "print(classification_report(newsgroups_test.target, predictions))" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": true 49 | }, 50 | "outputs": [], 51 | "source": [] 52 | } 53 | ], 54 | "metadata": { 55 | "kernelspec": { 56 | "display_name": "Python 2", 57 | "language": "python", 58 | "name": "python2" 59 | }, 60 | "language_info": { 61 | "codemirror_mode": { 62 | "name": "ipython", 63 | "version": 2 64 | }, 65 | "file_extension": ".py", 66 | "mimetype": "text/x-python", 67 | "name": "python", 68 | "nbconvert_exporter": "python", 69 | "pygments_lexer": "ipython2", 70 | "version": "2.7.12" 71 | } 72 | }, 73 | "nbformat": 4, 74 | "nbformat_minor": 2 75 | } 76 | -------------------------------------------------------------------------------- /chapter11/ed2-ch11-s1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 8, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD2CAYAAADGbHw0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XecVNX9//HXKqIoimJPUFCDvYAKqNhQsSTG2GLHLopG\nlEjQr2JFEAsYuxgb9h5FediIBY1YSIxGBWzBrqiAYKfs7w9/770zd2Z2Z3dn7tw5837+s8udcs/e\nnT187jmf8zl19fX1mJlZ9Vuk0g0wM7PScIduZhYId+hmZoFwh25mFgh36GZmgXCHbmYWCHfoZmaB\ncIduZhYId+hmZoFwh25mFog2SZ6srq6uJuoM1NfX1xX7XF+TXL4m+fm65PI1yeYI3cwsEO7QzcwC\n4Q7dzCwQ7tDNzALhDt3MLBDu0M3MApFo2mKa7L333gDcd999ANTV/ZIVtNdeewHw4IMPVqZhVlEH\nH3wwALfeeisAc+fOBeD4448H4B//+AcAn3/+eQVaZ9Y4R+hmZoGouQj9lFNOAeCkk04CIL6n6uWX\nXw7AZptt1nDs3HPPBWD+/PlJNLHsdthhByCKNgvtK3vllVcCMHDgwGQaliK6Ju3btwfglltuAWDy\n5MkA9OrVqzINS8gWW2wBQN++fYHob6AQ3eH++OOPAGy11VYNj7366qvlaGLZ7bLLLgD07NkTgJNP\nPhmA5ZZbDoju8tN0N+8I3cwsEHWForOynKyCy3QVmR999NEArL322lmPK8LIdz303Pfee6+oc6V9\n6bIipk022QQoHKHLAw88AMBtt93WcOyhhx5q1jnTek06duwIRFFWt27dAFhqqaXyPl93cIMGDWr1\nudO09H/DDTcE4LDDDgOiv5MOHTq06P2++OKLhu/79esHwIQJE4p6baU+KyussAIQfd433XRTAJZc\ncsm8z9fdyKxZs/I+fvrppwMwfvx4AL766qsWt81L/83MakxwY+iKuLbffnsAbrjhBiD6X7ZNm+wf\necaMGUAUoa+44opJNDNRXbt2BeCFF14AYOmll27W6/fZZx8A3njjjYZjzY3Q02qbbbYBYIMNNgCi\nMfNCdy2aT3j99dcBuOmmm8rdxLJadtllAXj44YcB6Ny5c0ned+WVV274XuPwxUbolaIx8a233rqo\n5y+xxBIArLrqqnkf12fj5ZdfBqBPnz4Nj/3www8tbmdjHKGbmQXCHbqZWSCCGXI58sgjATjxxBMB\n2HjjjYHGJzsBLrroIgAWWWSRrH+HZPHFFwei4SiL6Jq0bdu2Wa+74oorAFi4cGHDsbFjx5auYWWi\nn/fee+8FYMsttwSi4YNiKYVXE4MaqqpGu+66KwCXXXZZWd5faY/Dhg1rODZ48OCynMsRuplZIKo+\nQtcCoUsvvTTv44rQC4k/3tTzq9HVV19d1POGDBkCRJNCe+yxR9naVCmKTDXRG1fs719pjWussUZp\nGpYQpSNmTtC1xMyZMwEYPXo0ACNHjmxdwxKmyWCAv//970B0J1us//znPwAss8wyAKy55pqNPv+E\nE05o+F4LtVRaolQcoZuZBaJqI3RF5hdccAEQjZFrbO/bb78Fov9127Vrl/V6PW/OnDlAtIAiyYVW\n5RIvMFXI3/72NwCOPfbYrOO6Fn/4wx+AKGoN4e5Fv9+mfs9K8XzyyScB2HnnnYFoSbzsv//+Dd9f\nc801QPaimjRYbbXVGr5XhN6U66+/HoC3334biIrWacxdEXlz5x7SYqWVVmr4vtjIfMGCBUBUBuKo\no44CogVJukYai4/PS2SeZ9FFF21Js5vkCN3MLBBVF6Erm0Vj5vFIS8vz119/fSBa8h/PXlGEoUhE\nzwtJU1FoPDKPvy7++mq8e9FCmd///vcA7L777lmPK0vju+++A2DKlCkA7LnnnlnHV1llFSA3Qs8s\nIaEMkrRF6LrbAPj1r3+d9Zh+p1rookydv/zlLwB8//33ANx8881AtIjmrbfeAqLxY0Wn1VK0THen\nLXmNSimLlvTr8Y8++giAnXbaCYhKH2QuWjziiCOAwnN/LeUI3cwsEFURoWdGz+eff37WYxoLV2Su\n/w3jPvnkEyBajpuZEwpw3XXXAdH/vl26dGllqytn+PDhjT5eaGZd46GZ44vVSuVb77//fiBaih6/\nyxg3bhwABx54YN73UdGq/v37l6Wd5aQl9xrjzUeReVN55IpC4wWmdFfS3HISlaKc88zyvoUoO2zo\n0KFANC/XlMceeyzrq8pVZ0boGjHQnc7jjz9e1Hs3xRG6mVkgqiJCP+OMMxq+j8+qX3XVVQD8+c9/\nzvvaRx99FIA777wTgE8//TTv8xS1/vzzz61rbAq89tprQHZ2Q6YBAwbkPX7eeecB1RmNxmlzgtbe\nbUyfPh2IIvnGcvOvvfZaALbbbrtWnbNURowYATSexdHa1a3K59ecVdrpM54vy+Snn34Cot/5JZdc\nAsDs2bNL3g6dX/nojtDNzCxLqiP03r17A9kRhnKhVXulKRqjKlYIOdfK6IiPF7/yyisAPPXUU3lf\np7zaapU5DqyNKuK/T2WgqL6GMhKaos9bofcDOOSQQ1rc9qQpu0vZLLUinuWTSX1F5vaTpaC7IH0m\nMxW6i24pR+hmZoFIZYSu6ElF9zNXXJUrF1qz9IsttlhZz1NOqi0Rv7tQZkI8hzqu0N2JNnNoaqPg\nShszZkzD98o3j/8eNc5dbGSubKf4+ykyV/51c96z3DT2my/SVF75c889l/XvllK2hjJqdtxxx5zn\npOlut7E7+3K1UytHMyP0ww8/vCzncoRuZhaIVEboGnNq6Qa1LaHMjmrMP9eqxtVXXx0ovl6J6GfX\nNn3x15V6NVu5aFPfxigfXXdi8+bNa9G5lLf+4osvtuj15dTY733ixIlA03V+mkt1TprbnqRl1q+P\nK/d2gpnnLtc1cYRuZhaIVEbojbnwwgtL+n4a11J+qmTmnpZrQ9dS6d69O9D8OxplhahiYDxfWfXR\nq2EnHsjeuLpQ9oY2Dy9Ud0UbaitjJZ7HrkwhZWA988wzrWu0pYbGtbUbValo3F53wOXkCN3MLBBV\nF6F/+eWXJXkfReaqd6166aqup1xuKLy6tFpotWzc7bffDkRRa1y1/dyN5dF/+OGHQJStUqgiojKs\nVL8jThkLWnmcJspiKrbmeWso80yZT6pXIlqtnPmctOvUqRMA6623HhBV3mwtzctl1s4vF0foZmaB\nSGWE3thqzVGjRgHNz7xQnvmECRMA6NGjR9bjs2bNAqJIIzPCqHbx/R6Vr93SFaVplRlRaV9UUV10\n7YGpvUX3228/ILoW+szFr8k999wDpDMyF2XcJFHjX1F3obmKzJpIrc11L6V3330XiOq6Z1I1xGef\nfRaI7lybu9o87sYbb2yyPaXiCN3MLBCpjNDPPPNMIMquyFctburUqUAUSd13331AFKXpPRRxKe9Y\n41nKP1a1Rp2rGiPz+B1N/M5G48aK1OPRqGj1YFqqBTZX5krNQnm++tkz50jyPV//VnR5xx13lKyd\n1UzZYIMGDcr7uCoWarVq2qg65COPPNJwTPMmovrxTz/9NBD1Lbrj0S5XTTnooIMA2HzzzXMee+ml\nlwDYe++9i257MRyhm5kFoi7JVVx1dXXNOplWQGZGR4rWC4115jln1vOmTZsGRPv/lWMVZH19fdFF\nIZp7TfLReKYyM1p6bbQ7T6lm9zMlcU0y25251+f/f0+1o9H30J3bnDlzADj44IOBKBuqlJpzTaD4\n63LxxRcD+cfQFUGr/r8i1rfffjvve22zzTZAtHuP9hDVvgTaMUxj5oceeigADzzwQDFNzSuJz4qy\n2gAmTZoEwMYbb9zoazS2rjvdQjXMFXUriyzfCIPWQRRba73Ya+II3cwsEO7QzcwCkcpJUXnwwQeB\nKNEf4LTTTgPg2GOPLeo9tLGrSsBqMkxpiiHQz6Lhgvg2fYWorK42yC51ClXS+vTp0/C9JrTiQy+F\nKFVTJZub2mg7zTSsqM9+5sYfuv3XVw0jNJeGbjT8UC2LhySznIcWZKmMQ3ySVJQsoLIPKrYVH8bT\n9nJt2vzSvWr4LrPw3zfffNOa5hfkCN3MLBCpnhRtjDaF1sIGFVGaMWMGEBXe12TP+PHjS3XqJiU9\nKSraTHvYsGFqR6PPz7dRbrkkfU3WWmstIFpIpoVFuiYqPKbyBlpEVagkQDmUa1JUdNdR6i3VIEpf\nPOecc0r+3pX6+1G6Yq9evYBoqb42BteEcLF0F6NkBS2KbAlPipqZ1ZiqjdDTrFIRhlx++eUAnHDC\nCUCUojZgwICs5yW5jL3S1ySNyh2hqwCd0vIgfwpdMTS+rDtf3fkobbGU0vZZUTqv0qj79esHRPMz\n8T5U83VKH1X6Yms4QjczqzGO0MsgbRFGGvia5Cp3hF6t/FnJ5QjdzKzGuEM3MwuEO3Qzs0C4Qzcz\nC4Q7dDOzQCSa5WJmZuXjCN3MLBDu0M3MAuEO3cwsEO7QzcwC4Q7dzCwQ7tDNzALhDt3MLBDu0M3M\nAuEO3cwsEO7QzcwC4Q7dzCwQ7tDNzALhDt3MLBDu0M3MAuEO3cwsEO7QzcwC4Q7dzCwQ7tDNzALR\nJsmT1dXV1cR+d/X19XXFPtfXJJevSX6+Lrl8TbI5QjczC4Q7dDOzQLhDNzMLhDt0M7NAuEM3MwuE\nO3Qzs0C4QzczC0SieeiWLnfccQcABx54IAC/+c1vAHjvvfcq1qZK+9///gdAly5dAOjRowcAkydP\nrlSTrMK23357AG666SYAbrnlFgDOPvvsSjWpIEfoZmaBCDZCHz16NACDBg0C4JprrgHgT3/6EwAL\nFy6sTMNSRNFnff0vi+2GDRsGwEEHHVSxNlVKmza//CnU1f2yIE/XxPKbNGkSAK+++ioAo0aNAsK8\nu+vQoQMAnTp1AmDAgAEAXHbZZQDMnDmzMg3LwxG6mVkggovQV1ttNQD69esHRJHWcccdB8Bjjz0G\nwLhx4yrQOkurffbZB4DVV1+9wi1JtyuuuAKAXr16AdCzZ08A+vbtC0DXrl0r07AyeuihhwD4+eef\nAVhhhRUA2HTTTQGYMGFCZRqWhyN0M7NABBehf/TRRwB8/PHHACy//PJZj48dOxaA7t27AzB9+vTk\nGpcSuibt27evcEvSa/78+QDMmzevwi1Jl5VXXrnSTbBGOEI3MwuEO3Qzs0AEN+TSFKUgafK0Fodc\nttxyS8C3z5k0KSr//ve/AXjttdcq0ZzUWWWVVYDosxN31llnJdkcK8ARuplZIGouQrfCzj///Eo3\nIXHdunUDYLfddqtwS9JJaYgTJ04EYKWVVsp6fPbs2QBMnTo12YalwOKLL17pJuRwhG5mFghH6DVo\n6NChWf9+9tlnAZgyZUolmlNRKsK11FJLVbYhKXXUUUcBuZG5HHDAAUBUAqCWDB48GIDx48dXuCUR\nR+hmZoFwhF6DVIhKtIimlgpSaVHViSeemPfxW2+9NcnmpM6QIUMAOPnkk/M+/uOPPwJhFuOqZo7Q\nzcwCUXMRukoDvPnmmxVuSfI6d+4MRBtZ1LLevXsD0KdPn6zjH3zwARBt/lGrRo4cCRS+a9OmKO+/\n/35ibbKmOUI3MwtEzUXoKkafpqL0SVHZz2WWWSbruLZdqyX9+/fPe/ySSy4BYNasWUk2JzX23HNP\nINroI+68884DopKyli6O0M3MAlFzEXot0yYfcdqer5asueaaeY/X6gYXWhGquQONneurygjX4t1c\nNXGEbmYWiOAi9I022giAddZZp8ItSZ9CUalF0rSdWJLOOeccoHB9EmWHaYMYSydH6GZmgQguQl9u\nueUAWGKJJfI+ftVVVyXZnFRTJscXX3xR4ZYkp2PHjllfRWPDkyZNSrxNabD++us3+rjrnVcHR+hm\nZoEILkJvilYC1hLt0rTuuutmHdcqv08//TTxNlXKhhtuCEQ7Vsn3338PwLfffpt4mypJ2S3akUj5\n5/qqHZvuvPPOCrTOmssRuplZIGouQq9Fqiy46qqrZh1PUx3npPTo0SPv8dGjRyfcknSI1zuP125R\n1UWDW265BYBjjz0WgHbt2gGwyCK/xMULFy6sTMMyOEI3MwtEcBF6oQhM48XPP/98ks1JhcMOOyzv\n8VdeeSXhllTefvvtl/VvZbfcddddlWhOxRxxxBFA4XrnkydPBuCFF15IrE1pt2DBAiCaX+jZsycA\nyy67LJCO+lCO0M3MAhFchL7//vvnPa4dVn744Yckm5MKWj0bp8i9FsbSu3XrBkD37t2zjqt2y7bb\nbgvA448/nmzDKkR7qS622GJ5Hx8+fDgA3333XVJNqhrxeYatttoKgEceeaQSzcniCN3MLBDBRehW\nvCeffLLSTUhM27Ztgdz9VJV//uGHHybepkqK55vHud55rqeffhqA448/Puv4iBEjAEfoZmZWQu7Q\nzcwC4SGXGlZLS/4L+eyzzwCYMmVKhVuSrPgGFlJLhdqaa9y4cQBMmzYNiEp0p6kstSN0M7NA1EyE\n/txzz1W6Calx9913A7WTopfP3LlzAdh3330r3JLKmD17NhAtV1da7957712xNqWdtuF7+OGHAfj6\n66+BaNu+NHCEbmYWiLr4GFpZT1ZXV/aTXXDBBQCceuqpAEycOBGAY445BoB33nmn3E2gvr4+fy5Y\nHklckzTwNcnVnGsC5bkuWvqvglMqCfDiiy+W+lRF82clV7HXxBG6mVkggovQ08ARRi5fk1xpiNDT\nyJ+VXI7QzcxqjDt0M7NAuEM3MwuEO3Qzs0C4QzczC0SiWS5mZlY+jtDNzALhDt3MLBDu0M3MAuEO\n3cwsEO7QzcwC4Q7dzCwQ7tDNzALhDt3MLBDu0M3MAuEO3cwsEO7QzcwC4Q7dzCwQ7tDNzALhDt3M\nLBDu0M3MAuEO3cwsEO7QzcwC4Q7dzCwQbZI8WV1dXU3sd1dfX19X7HN9TXL5muTn65LL1ySbI3Qz\ns0C4QzczC4Q7dDOzQLhDNzMLhDt0M7NAJJrlUk5rrLEGAMOHD886fvHFFwPw/vvvA/DNN98k27AA\nLL/88gAsscQSDcc+//xzABYsWFCRNlnLrL766gC88MILAPzqV78CoK7ulySK+vrspJEff/wRgL/+\n9a8AnH766Ym001rGEbqZWSDq4v8jl/VkZcwZnTFjBhBFk3GfffYZAKNGjQLgxhtvBMoTsVd7Hu1i\niy0GwMiRIwE45phjAFhqqaUanqPrOGTIkKLeM4lr0rVr14bvDznkEAD2339/ANZee20AvvjiCwCu\nu+66rNe++uqrALz55ptZr5ezzz67JU1qVCXy0DfaaCMANtlkEwDWXHNNAHbeeWe1CYDu3bsD0K5d\nu6zXf/vttwCMGDECgJtuugmI/v5Kodr/fsrBeehmZjWmaiN0jZkPHToUgCOOOALIHQPMOHfW4/fe\ney8ABxxwQKma1KDaIgyNq+qadOjQAciOeAtZdNFFizpHEtfkvffea/i+S5cuzXrtzz//nPW1ffv2\nWY/PmjULiD4/GlOOz9k0R5pXim6++eYALLfcckB0p3PkkUcC0XV4/vnnAdhuu+1Kdu6k/346duwI\nQP/+/QHo3bs3AL/73e90DrWr0fd55JFHALjiiisAePLJJ1vbtAaO0M3MakzVRug33HADAIcffrje\nG4j+F/3qq68AmDlzJgBLLrkkAJ06dQJg/vz5ABx99NEA3HrrraVqWuojdM0zPPbYY0AUiS+99NJq\nEwAvv/wyAOPGjQNg2LBhOe+Vhgh9zz33BOCOO+5oOLb44ovnfe7s2bOB3Owc/cxt27YFYJFFfol1\n9LmJf76+/vprAFZaaaXmNDVLmiP0QiZMmABAnz59gCh7rJi7uWIl8feTma1zwgknALDqqqvmfa7m\nDebMmZP3cc0z6G5Gz9ddzaOPPtqSJmZxhG5mVmOqLkLv1asXEI1PaaxTEZSyV4466qi8r3/iiScA\n2GmnnbKOKyIrhbRG6MrcGDNmDJCdV55Jdz333XcfEGU0DBw4sOE5Dz74IAD77LNPUedO4pqcfPLJ\nDd8vu+yyeZ9z2WWXAdGYeNxaa60FwIorrgjArrvuCsBZZ50FOELXZ+HUU08Fomh0t912A6L89tZI\n4rMyb968hu8L3WU+8MADAFx11VUAPP3003mfp3m4zDtEgLlz5wKw1157AfDUU0+1pKmAI3Qzs5rj\nDt3MLBBVt/R/9OjRQLTIRbfAWuCQb+Iuk4Ydnn32WSBacHLaaacB0WKakOywww5ANPGra6Zl3Wec\ncQYAl156adbr1l9/fSB34hngxBNPLF+DW0iphK2h1EcNydx8881Zj2tSdZdddmn1uaqRPgP6qon0\nDTfcECjNkEs5aag187MsH374IRAtqtKiw4ULF7boXLo2WszVmiGXYjlCNzMLRNVF6IUUmgSN0xJl\nTY5pwiPkCF2RQeakIUSTPh999FHWcS39V0S/zDLLAPDDDz80PEcLcEKlolXxdLx8kV0t6dGjB9D0\nIpu0+uSTT4D87b/rrruAwhPmhZx55pl5j7/++usA3HPPPc16v9ZwhG5mFoiqi9AnTZoEwBZbbAEU\n/t/RcumupCkqxtWtW7es42PHjm34Xgu3QqOIPL6kX/MNWoSiYl61QnNTmo+p1gh9ypQpQFSyAFpe\ntmC//fYDogJnMm3aNAB23HFHIEpxTYIjdDOzQFRdhD548GAgKoTzzDPPFPU6FZzSzHXfvn2B3Fn7\nWqaCTFdeeSUQRWEqEaBrH6J4+YB46QB93u68885kG5YS8Sg0rtBCrrTKzFLS513Re1NUIuC2224D\noE2b7G5UJTOSjMzFEbqZWSCqbul/IVp+Wyg/unPnzgB88MEHQDQ+rOXvWqZbikgjrUv/CznuuOOA\nKA9d0akKMe2+++5A6zJb0nZNVDJVedPrrLMOkJujr8hc46WlVA1L/3WdNGew2mqrAblj6PpsKHMq\n845X8w7FZo+k7bMSp7UJhx56aNZxXQPd6b7xxhslO6eX/puZ1ZiqG0OX+MYW2vCiqQ0uCpXIfOih\nh0rdxNRSmU9lvcS3W9NqyP/7v/8Dwso511i5VhYrx16fG2UonHfeeUDtjZlrPYaKb2nuSQrNNemu\n7sADD8x5nlZMxsvPakMVFXqrFloVGzdx4kSgtJF5czlCNzMLRNVF6NpsQWVNm9pgYfz48UA0DlyI\nIjfVXfjvf//bqnamkWqzPPfcc0A0X6AVoMoY0tZbIVCkuMEGGwDRZs+KzEWZPH/84x8B+O6775Jq\nYkWoFpL+PlS/RMeb2nZNxx9//HEABgwYUPBcH3/8MZC7qUi10UbzhTaiV7npSnKEbmYWiKqI0DVe\nDoUj7Z9++gmA66+/HsjNdtFY4JtvvglEtTpE43z//Oc/gSiSg9wqhGmQWWNks802a/S52hREVRPj\ntVmuvvpqAIYMGVLqZqbGa6+9lve4Vh7/9re/TbI5FXfNNdcAsM022wDFb4Qsisy1zZqyxEKkvkN9\nRDzvXPMupdhqrrUcoZuZBaIqInRlHEBuBKH8WGW7FBr71iy8tqzT+2gLLdV20Nh8ZoT+6aefAnD3\n3Xe34qcojUsuuQSA/v37NxyL/0yFKApTZK48WuUOh0iV7uLZGRozr7XIXPXA99hjj1a9j8bMQ47M\n5eKLLwaiDcPl3XffBaB3795AtCF9JTlCNzMLRFVE6PkoMtAqx6ayUlSHRONgctFFFwEwatQoAP71\nr38BsO666zY8R/U93nrrraLOVQ4XXnghACeddBLQuk2t58+fD4SZySOKwLfddlsgunt5++23gSib\npRaoVglAv379gOiurrk0XpyGaLTcunTpAkTXTJStc+211wLpuhaO0M3MAlEVtVwy9/RTe7Uirdjd\nQFSbQ7vxqJ63InHVmejZsycQZX5AlKOr8fatt94aKBzhlqIWhfLrlTOu+u8F3kPnzTqusfKpU6cC\n0Qo3XYM4rZZVTr7eTyvfNt5440Z/lsYkUZ9DtbohWq/Qrl07IMqC0s5WaVgBmlQtF1UYhdw71Iz3\nVpvyPq7IfL311mtJE5olLbVcbrjhBiCanxOtUSh0LcvBtVzMzGpMVYyha68/iKoqDhw4ECgcoWvF\nm2ao27Ztm/W4VklqLFERumoZq2IaRPUtzjnnHCDKaVb0vNdeewGlHUsbNGgQEOWQF3MnpV3LlTev\nes2qy6zMBu2jusIKKwDRtdHjGiNUvZMnnniiNT9KYjLr9CgyF93RlbNmj2qTxH9Xt99+e9nPnY8+\nt/pbgKY/R/HH582bB+Tu4FQLdLce99JLLyXckuI5QjczC0RVROjK94RoXFjjwVrZGbfKKqsA0Uy1\nfPnll0CUgx3f8T6fkSNHAlEe6imnnAJEY+nnn38+AMcff3yT71WsM844o9HHla0BUUbH6NGjgcI/\nk8aV9VUZIPEKlBpvrrYqeMqxhty885ZGx5pP2HTTTYEogtU6gJVXXjnnnPEoV9lRSUfoygTLnINq\nqvaRqMKmVmnrbq8WaP/UtddeO+u4djTSHXkaOUI3MwtEVWS5ZDr66KMBGDNmTPy9gcJjhN9//z0A\nffr0AWDy5MktbsN1110HRBkTWmWq3cNLMUt/7rnnAtFdgGhlrMZHIcorT7MkMhc0/wG59W00v6Gx\nYN2piWrDxz8/Gn/WzlaN/b3oM6hI/PTTTwfg888/B3J37Ekqy0X17aFwZoYicmU06a70/vvvb8kp\nW6VSWS6q/6Q73fjdjO7Ax44dW9T7af5BWWUaXWgJZ7mYmdUYd+hmZoGouiEX3fpq0nP77bcHoG/f\nvkDuLbFSjDTBV8wkaLFt0LCISpFOnz5dbUjFwog0SeKa6DMB8OKLLwLRpsZFnBMoPKSiiWItLtOw\nm8oxZ76Hhlya2iQjqSEXbScH0ebo2qRBw1QagkzDRHil/n408XvQQQeV5P2U1qziXfvuu2/DY829\nzh5yMTOrMVUXoVcDR+i5kr4m2sxD0Wk8BS3POYEoQtckuoq2aTK6lKmHSUXo1aZSfz+6m3vnnXeA\n3MWIcUoL1SYpmliOb8Kjz5QWOUJ2UkMxHKGbmdUYR+hl4Ag9l69JLkfo+VX6s6KyG9qSMXPxGESp\nnCqhoc3paIMGAAAA0ElEQVTVNbemsiTalL1jx45A6zYVcYRuZlZjHKGXQaUjjDTyNcnlCD0/f1Zy\nOUI3M6sx7tDNzALhDt3MLBDu0M3MAuEO3cwsEIlmuZiZWfk4QjczC4Q7dDOzQLhDNzMLhDt0M7NA\nuEM3MwuEO3Qzs0C4QzczC4Q7dDOzQLhDNzMLhDt0M7NAuEM3MwuEO3Qzs0C4QzczC4Q7dDOzQLhD\nNzMLhDt0M7NAuEM3MwuEO3Qzs0C4QzczC4Q7dDOzQLhDNzMLhDt0M7NAuEM3MwvE/wPyFjJ7D14k\nIQAAAABJRU5ErkJggg==\n", 13 | "text/plain": [ 14 | "" 15 | ] 16 | }, 17 | "metadata": {}, 18 | "output_type": "display_data" 19 | } 20 | ], 21 | "source": [ 22 | "import matplotlib.pyplot as plt\n", 23 | "from sklearn.datasets import fetch_mldata\n", 24 | "import matplotlib.cm as cm\n", 25 | "\n", 26 | "mnist = fetch_mldata('MNIST original', data_home='data/mnist')\n", 27 | "\n", 28 | "counter = 1\n", 29 | "for i in range(1, 4):\n", 30 | " for j in range(1, 6):\n", 31 | " plt.subplot(3, 5, counter)\n", 32 | " plt.imshow(mnist.data[(i - 1) * 8000 + j].reshape((28, 28)), cmap=cm.Greys_r)\n", 33 | " plt.axis('off')\n", 34 | " counter += 1\n", 35 | "plt.show()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 10, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "Fitting 3 folds for each of 30 candidates, totalling 90 fits\n" 50 | ] 51 | }, 52 | { 53 | "name": "stderr", 54 | "output_type": "stream", 55 | "text": [ 56 | "[Parallel(n_jobs=2)]: Done 46 tasks | elapsed: 54.0min\n", 57 | "[Parallel(n_jobs=2)]: Done 90 out of 90 | elapsed: 101.9min finished\n" 58 | ] 59 | }, 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "Best score: 0.965\n", 65 | "Best parameters set:\n", 66 | "\tclf__C: 3\n", 67 | "\tclf__gamma: 0.01\n", 68 | " precision recall f1-score support\n", 69 | "\n", 70 | " 0.0 0.98 0.98 0.98 1770\n", 71 | " 1.0 0.99 0.98 0.98 1987\n", 72 | " 2.0 0.95 0.97 0.96 1738\n", 73 | " 3.0 0.96 0.96 0.96 1808\n", 74 | " 4.0 0.97 0.98 0.97 1703\n", 75 | " 5.0 0.96 0.96 0.96 1549\n", 76 | " 6.0 0.98 0.98 0.98 1677\n", 77 | " 7.0 0.98 0.96 0.97 1827\n", 78 | " 8.0 0.96 0.95 0.96 1701\n", 79 | " 9.0 0.96 0.96 0.96 1740\n", 80 | "\n", 81 | "avg / total 0.97 0.97 0.97 17500\n", 82 | "\n" 83 | ] 84 | } 85 | ], 86 | "source": [ 87 | "from sklearn.pipeline import Pipeline\n", 88 | "from sklearn.preprocessing import scale\n", 89 | "from sklearn.model_selection import train_test_split\n", 90 | "from sklearn.svm import SVC\n", 91 | "from sklearn.grid_search import GridSearchCV\n", 92 | "from sklearn.metrics import classification_report\n", 93 | "\n", 94 | "if __name__ == '__main__':\n", 95 | " X, y = mnist.data, mnist.target\n", 96 | " X = X/255.0*2 - 1\n", 97 | " X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=11)\n", 98 | " \n", 99 | " pipeline = Pipeline([\n", 100 | " ('clf', SVC(kernel='rbf', gamma=0.01, C=100))\n", 101 | " ])\n", 102 | " \n", 103 | " parameters = {\n", 104 | " 'clf__gamma': (0.01, 0.03, 0.1, 0.3, 1),\n", 105 | " 'clf__C': (0.1, 0.3, 1, 3, 10, 30),\n", 106 | " }\n", 107 | "\n", 108 | " grid_search = GridSearchCV(pipeline, parameters, n_jobs=2, verbose=1, scoring='accuracy')\n", 109 | " grid_search.fit(X_train[:10000], y_train[:10000])\n", 110 | " print('Best score: %0.3f' % grid_search.best_score_)\n", 111 | " print('Best parameters set:')\n", 112 | " best_parameters = grid_search.best_estimator_.get_params()\n", 113 | " for param_name in sorted(parameters.keys()):\n", 114 | " print('\\t%s: %r' % (param_name, best_parameters[param_name]))\n", 115 | " \n", 116 | " predictions = grid_search.predict(X_test)\n", 117 | " print(classification_report(y_test, predictions))" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": { 124 | "collapsed": true 125 | }, 126 | "outputs": [], 127 | "source": [] 128 | } 129 | ], 130 | "metadata": { 131 | "kernelspec": { 132 | "display_name": "Python 2", 133 | "language": "python", 134 | "name": "python2" 135 | }, 136 | "language_info": { 137 | "codemirror_mode": { 138 | "name": "ipython", 139 | "version": 2 140 | }, 141 | "file_extension": ".py", 142 | "mimetype": "text/x-python", 143 | "name": "python", 144 | "nbconvert_exporter": "python", 145 | "pygments_lexer": "ipython2", 146 | "version": "2.7.12" 147 | } 148 | }, 149 | "nbformat": 4, 150 | "nbformat_minor": 2 151 | } 152 | -------------------------------------------------------------------------------- /chapter11/ed2-ch11-s2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 12, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os\n", 12 | "import numpy as np\n", 13 | "from sklearn.pipeline import Pipeline\n", 14 | "from sklearn.svm import SVC\n", 15 | "from sklearn.model_selection import train_test_split\n", 16 | "from sklearn.grid_search import GridSearchCV\n", 17 | "from sklearn.metrics import classification_report\n", 18 | "from PIL import Image\n", 19 | "\n", 20 | "\n", 21 | "X = []\n", 22 | "y = []\n", 23 | "for path, subdirs, files in os.walk('data/English/Img/GoodImg/Bmp/'):\n", 24 | " for filename in files:\n", 25 | " f = os.path.join(path, filename)\n", 26 | " target = filename[3:filename.index('-')]\n", 27 | " img = Image.open(f).convert('L').resize((30, 30), resample=Image.LANCZOS)\n", 28 | " X.append(np.array(img).reshape(900,))\n", 29 | " y.append(target)\n", 30 | "X = np.array(X)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 13, 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "Fitting 3 folds for each of 30 candidates, totalling 90 fits\n" 45 | ] 46 | }, 47 | { 48 | "name": "stderr", 49 | "output_type": "stream", 50 | "text": [ 51 | "[Parallel(n_jobs=3)]: Done 44 tasks | elapsed: 34.0min\n", 52 | "[Parallel(n_jobs=3)]: Done 90 out of 90 | elapsed: 67.7min finished\n" 53 | ] 54 | }, 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "Best score: 0.073\n", 60 | "Best parameters set:\n", 61 | "\tclf__C: 1\n", 62 | "\tclf__gamma: 0.01\n", 63 | " precision recall f1-score support\n", 64 | "\n", 65 | " 001 0.00 0.00 0.00 9\n", 66 | " 002 0.00 0.00 0.00 8\n", 67 | " 003 0.00 0.00 0.00 7\n", 68 | " 004 0.00 0.00 0.00 7\n", 69 | " 005 0.00 0.00 0.00 4\n", 70 | " 006 0.00 0.00 0.00 9\n", 71 | " 007 0.00 0.00 0.00 5\n", 72 | " 008 0.00 0.00 0.00 5\n", 73 | " 009 0.00 0.00 0.00 2\n", 74 | " 010 0.00 0.00 0.00 2\n", 75 | " 011 0.07 1.00 0.13 55\n", 76 | " 012 0.00 0.00 0.00 17\n", 77 | " 013 0.00 0.00 0.00 20\n", 78 | " 014 0.00 0.00 0.00 20\n", 79 | " 015 0.00 0.00 0.00 46\n", 80 | " 016 0.00 0.00 0.00 9\n", 81 | " 017 0.00 0.00 0.00 10\n", 82 | " 018 0.00 0.00 0.00 12\n", 83 | " 019 0.00 0.00 0.00 31\n", 84 | " 020 0.00 0.00 0.00 11\n", 85 | " 021 0.00 0.00 0.00 12\n", 86 | " 022 0.00 0.00 0.00 15\n", 87 | " 023 0.00 0.00 0.00 12\n", 88 | " 024 0.00 0.00 0.00 37\n", 89 | " 025 0.00 0.00 0.00 46\n", 90 | " 026 0.00 0.00 0.00 13\n", 91 | " 027 0.00 0.00 0.00 5\n", 92 | " 028 0.00 0.00 0.00 33\n", 93 | " 029 0.00 0.00 0.00 40\n", 94 | " 030 0.00 0.00 0.00 29\n", 95 | " 031 0.00 0.00 0.00 9\n", 96 | " 032 0.00 0.00 0.00 7\n", 97 | " 033 0.00 0.00 0.00 4\n", 98 | " 034 0.00 0.00 0.00 7\n", 99 | " 035 0.00 0.00 0.00 10\n", 100 | " 036 0.00 0.00 0.00 5\n", 101 | " 037 0.00 0.00 0.00 15\n", 102 | " 038 0.00 0.00 0.00 3\n", 103 | " 039 0.00 0.00 0.00 8\n", 104 | " 040 0.00 0.00 0.00 3\n", 105 | " 041 0.00 0.00 0.00 26\n", 106 | " 042 0.00 0.00 0.00 3\n", 107 | " 043 0.00 0.00 0.00 3\n", 108 | " 044 0.00 0.00 0.00 4\n", 109 | " 045 0.00 0.00 0.00 10\n", 110 | " 046 0.00 0.00 0.00 4\n", 111 | " 047 0.00 0.00 0.00 5\n", 112 | " 048 0.00 0.00 0.00 2\n", 113 | " 049 0.00 0.00 0.00 6\n", 114 | " 050 0.00 0.00 0.00 19\n", 115 | " 051 0.00 0.00 0.00 18\n", 116 | " 052 0.00 0.00 0.00 4\n", 117 | " 053 0.00 0.00 0.00 4\n", 118 | " 054 1.00 0.10 0.18 10\n", 119 | " 055 0.00 0.00 0.00 11\n", 120 | " 056 0.00 0.00 0.00 10\n", 121 | " 057 0.00 0.00 0.00 6\n", 122 | " 058 0.00 0.00 0.00 2\n", 123 | " 059 0.00 0.00 0.00 6\n", 124 | " 060 0.00 0.00 0.00 7\n", 125 | " 061 0.00 0.00 0.00 6\n", 126 | " 062 0.00 0.00 0.00 3\n", 127 | "\n", 128 | "avg / total 0.02 0.07 0.01 771\n", 129 | "\n" 130 | ] 131 | }, 132 | { 133 | "name": "stderr", 134 | "output_type": "stream", 135 | "text": [ 136 | "/home/gavin/classpass-activity-tagger/venv/local/lib/python2.7/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", 137 | " 'precision', 'predicted', average, warn_for)\n" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1, random_state=11)\n", 143 | "pipeline = Pipeline([\n", 144 | " ('clf', SVC(kernel='rbf', gamma=0.01, C=100))\n", 145 | "])\n", 146 | "parameters = {\n", 147 | " 'clf__gamma': (0.01, 0.03, 0.1, 0.3, 1),\n", 148 | " 'clf__C': (0.1, 0.3, 1, 3, 10, 30),\n", 149 | "}\n", 150 | "\n", 151 | "if __name__ == '__main__':\n", 152 | " grid_search = GridSearchCV(pipeline, parameters, n_jobs=3, verbose=1, scoring='accuracy')\n", 153 | " grid_search.fit(X_train, y_train)\n", 154 | " print('Best score: %0.3f' % grid_search.best_score_)\n", 155 | " print('Best parameters set:')\n", 156 | " best_parameters = grid_search.best_estimator_.get_params()\n", 157 | " for param_name in sorted(parameters.keys()):\n", 158 | " print('\\t%s: %r' % (param_name, best_parameters[param_name]))\n", 159 | " predictions = grid_search.predict(X_test)\n", 160 | " print(classification_report(y_test, predictions))" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": { 167 | "collapsed": true 168 | }, 169 | "outputs": [], 170 | "source": [] 171 | } 172 | ], 173 | "metadata": { 174 | "kernelspec": { 175 | "display_name": "Python 2", 176 | "language": "python", 177 | "name": "python2" 178 | }, 179 | "language_info": { 180 | "codemirror_mode": { 181 | "name": "ipython", 182 | "version": 2 183 | }, 184 | "file_extension": ".py", 185 | "mimetype": "text/x-python", 186 | "name": "python", 187 | "nbconvert_exporter": "python", 188 | "pygments_lexer": "ipython2", 189 | "version": "2.7.12" 190 | } 191 | }, 192 | "nbformat": 4, 193 | "nbformat_minor": 2 194 | } 195 | -------------------------------------------------------------------------------- /chapter12/ed2-ch12-s1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [ 12 | { 13 | "name": "stdout", 14 | "output_type": "stream", 15 | "text": [ 16 | "Accuracy: 1.0\n", 17 | "True: 0, Predicted: 0\n", 18 | "True: 1, Predicted: 1\n", 19 | "True: 1, Predicted: 1\n", 20 | "True: 0, Predicted: 0\n" 21 | ] 22 | } 23 | ], 24 | "source": [ 25 | "from sklearn.model_selection import train_test_split\n", 26 | "from sklearn.neural_network import MLPClassifier\n", 27 | "\n", 28 | "y = [0, 1, 1, 0]\n", 29 | "X = [[0, 0], [0, 1], [1, 0], [1, 1]]\n", 30 | "\n", 31 | "clf = MLPClassifier(solver='lbfgs', activation='logistic', hidden_layer_sizes=(2,), max_iter=100, random_state=20)\n", 32 | "clf.fit(X, y)\n", 33 | "\n", 34 | "predictions = clf.predict(X)\n", 35 | "print('Accuracy: %s' % clf.score(X, y))\n", 36 | "for i, p in enumerate(predictions[:10]):\n", 37 | " print('True: %s, Predicted: %s' % (y[i], p))" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": { 44 | "collapsed": false, 45 | "deletable": true, 46 | "editable": true 47 | }, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "Weights connecting the input layer and the hidden layer: \n", 54 | "[[ 6.11803955 6.35656369]\n", 55 | " [ 5.79147859 6.14551916]]\n", 56 | "Hidden layer bias weights: \n", 57 | "[-9.38637909 -2.77751771]\n", 58 | "Weights connecting the hidden layer and the output layer: \n", 59 | "[[-14.95481734]\n", 60 | " [ 14.53080968]]\n", 61 | "Output layer bias weight: \n", 62 | "[-7.2284531]\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "print('Weights connecting the input layer and the hidden layer: \\n%s' % clf.coefs_[0])\n", 68 | "print('Hidden layer bias weights: \\n%s' % clf.intercepts_[0])\n", 69 | "print('Weights connecting the hidden layer and the output layer: \\n%s' % clf.coefs_[1])\n", 70 | "print('Output layer bias weight: \\n%s' % clf.intercepts_[1])" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [] 81 | } 82 | ], 83 | "metadata": { 84 | "kernelspec": { 85 | "display_name": "Python 3", 86 | "language": "python", 87 | "name": "python3" 88 | }, 89 | "language_info": { 90 | "codemirror_mode": { 91 | "name": "ipython", 92 | "version": 3 93 | }, 94 | "file_extension": ".py", 95 | "mimetype": "text/x-python", 96 | "name": "python", 97 | "nbconvert_exporter": "python", 98 | "pygments_lexer": "ipython3", 99 | "version": "3.5.2" 100 | } 101 | }, 102 | "nbformat": 4, 103 | "nbformat_minor": 2 104 | } 105 | -------------------------------------------------------------------------------- /chapter12/ed2-ch12-s2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 13, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "[ 0.95348837 0.96160267 0.90604027]\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "from sklearn.datasets import load_digits\n", 20 | "from sklearn.model_selection import train_test_split, cross_val_score\n", 21 | "from sklearn.pipeline import Pipeline\n", 22 | "from sklearn.preprocessing import StandardScaler\n", 23 | "from sklearn.neural_network.multilayer_perceptron import MLPClassifier\n", 24 | "\n", 25 | "if __name__ == '__main__':\n", 26 | " digits = load_digits()\n", 27 | " X = digits.data\n", 28 | " y = digits.target\n", 29 | " pipeline = Pipeline([\n", 30 | " ('ss', StandardScaler()),\n", 31 | " ('mlp', MLPClassifier(hidden_layer_sizes=(150, 100), alpha=0.1, max_iter=300, random_state=20))\n", 32 | " ])\n", 33 | " print(cross_val_score(pipeline, X, y, n_jobs=-1))" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "[ 0.94850498 0.94991653 0.90771812]\n", 45 | "In [ ]:\n" 46 | ] 47 | } 48 | ], 49 | "metadata": { 50 | "kernelspec": { 51 | "display_name": "Python 3", 52 | "language": "python", 53 | "name": "python3" 54 | }, 55 | "language_info": { 56 | "codemirror_mode": { 57 | "name": "ipython", 58 | "version": 3 59 | }, 60 | "file_extension": ".py", 61 | "mimetype": "text/x-python", 62 | "name": "python", 63 | "nbconvert_exporter": "python", 64 | "pygments_lexer": "ipython3", 65 | "version": "3.5.2" 66 | } 67 | }, 68 | "nbformat": 4, 69 | "nbformat_minor": 2 70 | } 71 | -------------------------------------------------------------------------------- /chapter13/ed2-ch13-s1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEWCAYAAAB1xKBvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcXHWZ7/HPlyyQhSWQZpEkJCD7lkgTqMggEVlHibgC\ngsDIRL04V8e5OqP3KgrDvQ7qODoiCpgLjCwioGaEAeI1BDQE0mEPa4hAEpCsELIAWZ77x+8UXel0\ndZ9OuvpUdX/fr9d5VZ21nqpO6qnfehQRmJmZdWabogMwM7PG4IRhZma5OGGYmVkuThhmZpaLE4aZ\nmeXihGFmZrk4YRgAkkLSu2tw3VWS9q7Bdb8l6RfdfM3/knRuB/uvkfTPW3H98yT9cUvP7+JrvfP3\n3Nq460l3vpda/Bvq7ZwwehFJx0iaKel1Scsl/UnSkT34+vdIuqByW0QMjYj5PRXD1oiIUyLiWtj6\nL3dJo7Mv7f7dF+Em1/+WpHVZQi4vr9XitbZE9t4XV75/SQOybbkGf/VkgrV8nDB6CUk7AL8D/h3Y\nGdgT+DbwVpFxWU39MkvI5WWnogNqYwVwSsX6Kdk2a1BOGL3HfgARcWNEbIiItRFxd0Q8Vj5A0t9I\nekrSCkl3SdqrvQtJ2lbS9yS9JOlVST+VNKhi/yRJj0haKel5SSdLuhT4K+DH2a/dH2fHtq0auVzS\n7ZLekPSApH0qrnuipGeyEtJPJM1oW2KpEu8ASTdKulXSwDb7xkh6TdI22fpVkhZX7P8PSV/Knt8j\n6QJJBwI/BUrt/HIfVi3+Nu7NHl/LrlGqeM3vZX+DP0s6pWL7jpJ+LukVSYsk/bOkfp29/5yGS5qW\nxT2j8m8vaYKk2dnnPlvShGz7REmPVxw3TdLsivX7JH24g9f8D+DTFeufBq6rPKDae97Sv0G195Lt\nG5O99zckTQOG5/3wLBMRXnrBAuwALAOuJf2SG9Zm/yRgHnAg0B/4X8DMiv0BvDt7/gNgKqmksj3w\nn8D/yfaNB14HTiD94NgTOCDbdw9wQZvXrbzuNVmM47MYrgduyvYNB1YCH8n2fRFY1/Z6Fdf9FvAL\nYBBwe3btflWOfQk4Inv+DDAfOLBi37i28QPnAX9sc52q8bfzmqOz996/Ytt52Xv6W6Af8HngZUDZ\n/l8DPwOGALsCDwKf7ej9d/Dvoe3n/gZwLLAt8MPye8v+xiuAc7L3dGa2vkv22b6Z/W0GAK8Ci7J/\nE4OAtcAuHbz+Idk5OwHDsueHAFFxXNX33NW/QUfvJdt/P/Cv2WdwbPaZVP0MvWy+uITRS0TESuAY\n0n/Uq4AlkqZK2i075HOkL/2nImI98L+BsW1LGZIETAb+PiKWR8Qb2bFnZId8BpgSEdMiYmNELIqI\np7sQ6q8j4sEshuuBsdn2U4G5EXFbtu9HwF86udYOwJ3A88D5EbGhynEzgPdJ2j1bvyVbH5Nd49Fu\niD+vFyPiqizWa4E9gN2yv9OpwJciYnVELCYl7jM6uNYnstJTeZnewbG3R8S9EfEW8D9Jv9xHAn8N\nPBcR/xER6yPiRuBp4EMRsRaYTfpyPYL0Of0JeC9wdHbesg5e803Sj41PZsvUbBsAW/ieofrfoOp7\nkTQKOBL4RkS8FRH3ZrFZF9SkQc6KERFPkX6VIekA0i/wfyP90toL+KGk71ecIlIJ4cWKbU3AYGBO\nyh3vHFeuGhkJ3LEVYVYmgTXA0Oz5u4AFFe8lJC3s5FpHk375nhnZT8gqZgCnAQtJVUX3kH6Fvgnc\nFxEbuyH+Lp8fEWuyz3go6dfxAOCVis99Gyo+k3bcHBFn53zdys92laTlpM/8XWz69ydb3zN7PgM4\njvTZzSD9Yn8fqW1sRo7XvQ74P6R/Q//YZt9edP09Q8f/hqq9l3cBKyJidZt9Izt/C1bmhNFLRcTT\nkq4BPpttWgBcGhHXd3LqUlJVw8ERsaid/QuAavX2WzP18SvAiPJKVtIZUf1wAO4GHgP+n6TjIuLV\nKsfNAL5L65feH0n1429S/Utva6dx7ur5C0hfwsOzX87d7Z0vRknlBPVytrRtyxpFKrlB+ny+T6q6\n+w4pYVyVxXp5jte9j1SKCtLnXvlvp7P33NXPsKP38gqp7WNIRdIYtQWv0ae5SqqXkHSApH+QNCJb\nH0kqWczKDvkp8DVJB2f7d5T08bbXyX5tXwX8QNKu2bF7SjopO+TnwPmSjpe0TbbvgGzfq8CWjrm4\nHThU0oeVumJeCOzeyTlExGXADaSk0W4jZkQ8R0qCZwMzsuq7V4GPUj1hvAqMaNuI3gVLgI3k/Dwi\n4hVSAvy+pB2yz3YfSe/bwtdv61SlbtcDgUuAWRGxgFRa3E/SWZL6S/okcBCpxx3ATGB/UpvBgxEx\nl/SlfBStDfsdva8APgSc1rYUmOM9d/VvUPW9RMSLQAvwbUkDJR2TxWVd4ITRe7xB+k/8gKTVpETx\nBPAPABHxa+BfgJskrcz2nVLlWv9IaiCflR37e9KXBhHxIHA+qa75ddIXbvlX3Q+Bj2U9gH7UleAj\nYinwceAyUqPmQaT/4J12C46IS4DfAL+XtHOVw2YAy7IvyfK6gIeqHP8HYC7wF0lL876PipjWAJcC\nf8raF47OcdqngYHAk6Rf8reQfp1X80ltOg5jVTnJt+MG4CJgOak94uwszmXAB0n/TpYBXwU+mP09\nyH6NP0RqX3o7u9b9pLaYxeQQEXOzRNPV99ylv0Fn7wU4i/R/ZDnps7iuvetYdYoOq37NiqHUDXYh\n8KmI6Kgx18x6iEsYVjcknSRpJ0nbAl8nlQBmdXKamfUQJwyrJyVSF9mlpPrlD2ddO82sDrhKyszM\ncnEJw8zMculV4zCGDx8eo0ePLjoMM7OGMWfOnKUR0ZTn2F6VMEaPHk1LS0vRYZiZNQxJbUfHV+Uq\nKTMzy8UJw8zMcnHCMDOzXJwwzMwsFycMMzPLpU8njMsug+ltZimaPj1tNzOzTfXphHHkkfCJT7Qm\njenT0/qRRxYbl5lZPepV4zC6auJEuP56+NCHYNIkuPtuuPnmtN3MzDbVp0sYACecABs3wg03wOc/\n72RhZlZNn08Y99wDGzbALrvAFVds3qZhZmZJn04Y5TaLc86BZcvg6qs3bdMwM7NWfTphzJ6d2iw+\n9am0PmBAWp89u9i4zMzqUZ9u9P7qV9PjqlWwzTZw//1wySVuxzAza0+fLmGUDR0Khx+eEoaZmbXP\nCSNTKsEDD6QGcDMz25wTRqZUSlVTc+cWHYmZWX2qWcKQNFLSdElPSpor6YvtHCNJP5I0T9Jjkt5T\nse9cSc9ly7m1irOsVEqPrpYyM2tfLUsY64F/iIiDgKOBCyUd1OaYU4B9s2UycAWApJ2Bi4CjgPHA\nRZKG1TBW9t4bmppg5sxavoqZWeOqWcKIiFci4qHs+RvAU8CebQ6bBFwXySxgJ0l7ACcB0yJieUSs\nAKYBJ9cqVgAplTJcwjAza1+PtGFIGg2MAx5os2tPYEHF+sJsW7Xt7V17sqQWSS1LlizZqjhLJXju\nOVi6dKsuY2bWK9U8YUgaCtwKfCkiVnb39SPiyohojojmpqamrbpWuR1j1qxuCMzMrJepacKQNICU\nLK6PiNvaOWQRMLJifUS2rdr2mmpuhn79XC1lZtaeWvaSEvBz4KmI+Ncqh00FPp31ljoaeD0iXgHu\nAk6UNCxr7D4x21ZTQ4Z4AJ+ZWTW1nBrkvcA5wOOSHsm2fR0YBRARPwXuAE4F5gFrgPOzfcslXQKU\nZ3W6OCKW1zDWd5RKcM01sH499O/TE6eYmW2qZl+JEfFHQJ0cE8CFVfZNAabUILQOlUpw+eXwxBMw\ndmxPv7qZWf3ySO82JkxIj66WMjPblBNGG6NHw267OWGYmbXlhNGGB/CZmbXPCaMdpRLMmwdbOQ7Q\nzKxXccJohyciNDPbnBNGO5qbU5daJwwzs1ZOGO0YNCh1qXXCMDNr5YRRRakEs2enAXxmZuaEUVWp\nBGvWwGOPFR2JmVl9cMKowg3fZmabcsKoYq+9YPfdnTDMzMqcMKrwAD4zs005YXRgwgSYPx8WLy46\nEjOz4jlhdMDtGGZmrZwwOnDEETBggBOGmRnU8H4YkqYAHwQWR8Qh7ez/CvCpijgOBJqymye9ALwB\nbADWR0RzreLsyHbbwbhxMHNmEa9uZlZfalnCuAY4udrOiPhuRIyNiLHA14AZbe6qNzHbX0iyKCuV\noKUF1q0rMgozs+LVLGFExL1A3tuqngncWKtYtkapBGvXwqOPFh2JmVmxCm/DkDSYVBK5tWJzAHdL\nmiNpcjGRJW74NjNLCk8YwIeAP7WpjjomIt4DnAJcKOnYaidLmiypRVLLkhrcwGLkSHjXu5wwzMzq\nIWGcQZvqqIhYlD0uBn4NjK92ckRcGRHNEdHc1NTU7cF5AJ+ZWVJowpC0I/A+4LcV24ZI2r78HDgR\neKKYCJNSCV54Af7ylyKjMDMrVs0ShqQbgfuB/SUtlPQZSZ+T9LmKw04H7o6I1RXbdgP+KOlR4EHg\n9oi4s1Zx5uF2DDOzGo7DiIgzcxxzDan7beW2+cDhtYlqy7znPTBwYEoYp59edDRmZsWohzaMurfd\ndilpuIRhZn2ZE0ZO5QF8b79ddCRmZsVwwsipVII33/QAPjPru5wwcio3fHteKTPrq5wwchoxIi1u\nxzCzvsoJows8gM/M+jInjC4oleCll+Dll4uOxMys5zlhdIEH8JlZX+aE0QXjxrUO4DMz62ucMLpg\n223TbVudMMysL3LC6KJSCebM8QA+M+t7nDC6aMIEeOstePjhoiMxM+tZnU4+KGk/4CvAXpXHR8T7\naxhX3aps+D7qqGJjMTPrSXlmq/0V8FPgKmBDbcOpf+96F4walRLGl75UdDRmZj0nT8JYHxFX1DyS\nBlIqwZ/+VHQUZmY9K08bxn9K+m+S9pC0c3mpeWR1rFSChQvTYmbWV+RJGOeS2jBmAnOypaWzkyRN\nkbRYUru3V5V0nKTXJT2SLd+s2HeypGckzZP0T/neSs/xAD4z64s6rZKKiDFbeO1rgB8D13VwzH0R\n8cHKDZL6AZcDJwALgdmSpkbEk1sYR7cbOzbdVOn+++HjHy86GjOznpGnl9QA4PPAsdmme4CfRcS6\njs6LiHsljd6CmMYD87JbtSLpJmASUDcJY+BAD+Azs74nT5XUFcARwE+y5YhsW3coSXpU0n9JOjjb\ntiewoOKYhdm2dkmaLKlFUsuSJUu6KazOlUrw0ENpTIaZWV+QJ2EcGRHnRsQfsuV84MhueO2HgL0i\n4nDg34HfbMlFIuLKiGiOiOampqZuCCufUimN9n7ooR57STOzQuVJGBsk7VNekbQ33TAeIyJWRsSq\n7PkdwABJw4FFwMiKQ0dk2+qKG77NrK/JMw7jK8B0SfMBkUZ8n7+1Lyxpd+DViAhJ40nJaxnwGrCv\npDGkRHEGcNbWvl5322MP2GsvJwwz6zvy9JL6f5L2BfbPNj0TEZ3W3Eu6ETgOGC5pIXARMCC75k+B\njwGfl7QeWAucEREBrJf0BeAuoB8wJSLmdvmd9YAJE+Dee4uOwsysZ1RNGJLeHxF/kPSRNrveLYmI\nuK2jC0fEmZ3s/zGp2217++4A7ujo/HpQKsGNN8KCBTByZOfHm5k1so5KGO8D/gB8qJ19AXSYMPqC\nynYMJwwz6+2qJoyIuCh73Or2it7q8MNh0CCYORM+8YmiozEzq61Oe0lJ+qKkHZRcLekhSSf2RHD1\nbsAAaG52w7eZ9Q15utX+TUSsBE4EdgHOAb5T06gaSKmUbqb05ptFR2JmVlt5Eoayx1OB67IeS+rg\n+D6lVIJ169JtW83MerM8CWOOpLtJCeMuSdsDG2sbVuPwAD4z6ys6HIchScA3gSZgfkSskbQL3TBw\nr7fYbTcYM8YJw8x6vw4TRjYK+46IOLRi2zLSiGzLlEowfTpEgFxZZ2a9VJ4qqYckdcdkg71WqQSv\nvAIvvVR0JGZmtZMnYRwFzJL0vKTHJD0u6bFaB9ZI3I5hZn1BnskHT6p5FA3usMNg8OCUMM44o+ho\nzMxqo9MSRkS8SJpu/P3Z8zV5zutLBgyAI490CcPMerc8I70vAv4R+Fq2aQDwi1oG1YjKA/jWri06\nEjOz2shTUjgdOA1YDRARLwPb1zKoRlQqwfr10NJSdCRmZrWRJ2G8nd2nIgAkDaltSI3p6KPTo6ul\nzKy3ypMwbpb0M2AnSX8L/B64qrOTJE2RtFjSE1X2f6qi19VMSYdX7Hsh2/6IpIb4zb7rrrDPPk4Y\nZtZ75bnj3vcknQCsBPYDvhkR03Jc+xrSDZKuq7L/z8D7ImKFpFOAK0ldeMsmRsTSHK9TN0olmDbN\nA/jMrHfK29vpceA+4N7seaci4l5geQf7Z0bEimx1FjAiZyx1q1SCV1+FF14oOhIzs+6Xp5fUBcCD\nwEdI9+GeJelvujmOzwD/VbEewN2S5kia3M2vVTMewGdmvVmegXtfAcZlc0iRTT44E5jSHQFImkhK\nGMdUbD4mIhZJ2hWYJunprMTS3vmTgckAo0aN6o6Qttihh8KQISlhnHVWoaGYmXW7PFVSy4A3Ktbf\noJsmH5R0GHA1MKmckAAiYlH2uBj4NTC+2jUi4sqIaI6I5qampu4Ia4v17+8BfGbWe+VJGPOAByR9\nKxvENwt4VtKXJX15S19Y0ijgNuCciHi2YvuQ7J4b5S68JwLt9rSqRxMmwKOPwpo1RUdiZta98lRJ\nPZ8tZb/NHjscvCfpRuA4YLikhcBFpFHiRMRPSffZ2AX4SbrtBusjohnYDfh1tq0/cENE3Jnz/RSu\ncgDfsccWHY2ZWffJ06322+XnkrYBhmb3+O7svDM72X8BcEE72+cDh29+RmOoHMDnhGFmvUmeXlI3\nSNohqx56AnhS0ldqH1pjGj4c9t0XZs4sOhIzs+6Vpw3joKxE8WFS19cxwDk1jarBlUqphBFRdCRm\nZt0nT8IYIGkAKWFMjYh1ZPNKWftKJViyBObPLzoSM7Pukydh/Ax4ARgC3CtpL9I0IVaFB/CZWW+U\n5wZKP4qIPSPi1EheBCb2QGwN65BDYOhQJwwz612q9pKSdHZE/KKDsRb/WqOYGl6/fjB+vBOGmfUu\nHZUwyve92L7KYh0oleCxx2D16qIjMTPrHlVLGBHxs+zx29WOsepKJdiwAWbPhuOOKzoaM7Ot12Eb\nhqSJkm6VNDdbbpF0XA/F1tB8Bz4z622qJgxJf02akfZ3wFnAp4A7gCmSTu2Z8BrXLrvA/vs7YZhZ\n79HR1CBfAT4cEY9WbCvfMvXfScnDOlAqwe9+5zvwmVnv0FGV1O5tkgUAEfEYaYJA60SpBEuXwvPP\nd36smVm96yhhdNS/x31/cigP4PO8UmbWG3RUJbWPpKntbBewd43i6VUOOgi23z61Y3z600VHY2a2\ndTpKGJM62Pe97g6kN+rXD446yg3fZtY7dDQOY0ZPBtJblUpw6aXwxhuptGFm1qjyTD64xSRNkbRY\nUru3WFXyI0nzJD0m6T0V+86V9Fy2nFvLOGupVIKNG9MAPjOzRlbThAFcA5zcwf5TgH2zZTJwBYCk\nnUm3dD0KGA9cJGlYTSOtEQ/gM7PeInfCkDS4qxePiHuB5R0cMgm4LpsFdxawk6Q9gJOAaRGxPCJW\nANPoOPHUrWHD4IADnDDMrPHluUXrBElPAk9n64dL+kk3vf6ewIKK9YXZtmrb24tvsqQWSS1Llizp\nprC6V6kEs2b5Dnxm1tjylDB+QPrFvwwgG8x3bC2D6oqIuDIimiOiuampqehw2lUqwbJl8NxzRUdi\nZrblclVJRcSCNps2dNPrLwJGVqyPyLZV296QJkxIj66WMrNGlidhLJA0AQhJAyT9D+Cpbnr9qcCn\ns95SRwOvR8QrwF3AiZKGZY3dJ2bbGtKBB8KOOzphmFlj62jgXtnngB+S2hAWAXcDF+a5uKQbgeOA\n4ZIWkno+DQCIiJ+SJjA8FZgHrAHOz/Ytl3QJUO6MenFEdNR4Xte22SYN4PMUIWbWyDpNGBGxlDS1\neZdFxJmd7A+qJJ+ImEKaXr1XKJXg4oth5UrYYYeiozEz67pOE4akH7Wz+XWgJSJ+2/0h9U6lUuol\n9eCD8IEPFB2NmVnX5WnD2A4YCzyXLYeRGqE/I+nfahhbr3LUUenR7Rhm1qjytGEcBrw3IjYASLoC\nuA84Bni8hrH1KjvtlGavdcIws0aVp4QxDBhasT4E2DlLIG/VJKpeqjyAb+PGoiMxM+u6PAnjMtKt\nWf+vpGuAh4HvShoC/L6WwfU2pRKsWAHPPlt0JGZmXZenl9TPJd1BmgQQ4OsR8XL2/Cs1i6wXKt+B\n7/770/xSZmaNJO/kg28CrwArgHdLqpupQRrJAQektgy3Y5hZI8rTrfYC4IuknlGPAEcD9wPvr21o\nvc8226Tpzp0wzKwR5SlhfBE4EngxIiYC44DXahpVL1Yqwdy58PrrRUdiZtY1eRLGmxHxJoCkbSPi\naWD/2obVe1UO4DMzayR5EsZCSTsBvwGmSfot8GJtw+q9jjoKJM8rZWaNJ08vqdOzp9+SNB3YEbiz\nplH1YjvsAAcf7HYMM2s8HZYwJPWT9HR5PSJmRMTUiHi79qH1Xh7AZ2aNqMOEkY3mfkbSqB6Kp08o\nlVKj99NPd36smVm9yDOX1DBgrqQHgdXljRFxWs2i6uUqB/AddFCxsZiZ5ZUnYXxjSy8u6WTSzZf6\nAVdHxHfa7P8BMDFbHQzsGhE7Zfs20Dq54Uu9KUHttx8MG5YSxmc+U3Q0Zmb55Gn0niFpL2DfiPi9\npMGkBNAhSf2Ay4ETgIXAbElTI+LJimv/fcXxf0ca41G2NiLG5n8rjcMD+MysEXXarVbS3wK3AD/L\nNu1J6mLbmfHAvIiYnzWS3wRM6uD4M4Ebc1y3VyiV4Mkn4TUPgTSzBpFnHMaFwHuBlQAR8Rywa47z\n9gQWVKwvzLZtJivBjAH+ULF5O0ktkmZJ+nC1F5E0OTuuZcmSJTnCqg/ldowHHig2DjOzvPIkjLcq\nu9FK6g9EN8dxBnBL+SZNmb0iohk4C/g3Sfu0d2JEXBkRzRHR3NTU1M1h1c748alqytVSZtYo8iSM\nGZK+DgySdALwK+A/c5y3CBhZsT4i29aeM2hTHRURi7LH+cA9bNq+0fB22AEOOcQJw8waR56E8U/A\nElKPpc8CdwD/K8d5s4F9JY2RNJCUFKa2PUjSAaSuu/dXbBsmadvs+XBSldiTbc9tdB7AZ2aNJE/C\n+DBwXUR8PCI+FhFXRUSnVVIRsR74AnAX8BRwc0TMlXSxpMousmcAN7W55oFAi6RHgenAdyp7V/UW\npRKsXJkav83M6l2ecRgfAn4g6V7gl8CdWTLoVETcQSqRVG77Zpv1b7Vz3kzg0Dyv0cgqB/Adckix\nsZiZdabTEkZEnA+8m9R2cSbwvKSrax1YX7DvvrDLLm7HMLPGkKeEQUSsk/RfpN5Rg0jVVBfUMrC+\nQPIAPjNrHHkG7p0i6RrgOeCjwNXA7jWOq88oldIkhMuXFx2JmVnH8jR6f5o0snv/iDgvIu7I24Zh\nnfMAPjNrFHnaMM6MiN9ExFsAko6RdHntQ+sbPIDPzBpFrjYMSeNII64/DvwZuK2WQfUlQ4fCoYc6\nYZhZ/auaMCTtR+oVdSawlNSlVhExsdo5tmVKJbj+etiwAfp1Og+wmVkxOqqSehp4P/DBiDgmIv4d\n2NDB8baFJkyAN97wAD4zq28dJYyPAK8A0yVdJel4QD0TVt9SOYDPzKxeVU0YWUP3GcABpOk5vgTs\nKukKSSf2VIB9wT77wPDhMHNm0ZGYmVWXp5fU6oi4ISI+RJpx9mHgH2seWR8ipVKGSxhmVs/yjMN4\nR0SsyO4/cXytAuqrSiV49llYtqzoSMzM2telhGG1U27HmDWr2DjMzKpxwqgTRx6ZutS6WsrM6pUT\nRp0YMgQOO8wJw8zqlxNGHSmV4MEH0wA+M7N6U9OEIelkSc9Imifpn9rZf56kJZIeyZYLKvadK+m5\nbDm3lnHWi1IJVq2CJ54oOhIzs83lmktqS0jqB1wOnAAsBGZLmtrOrVZ/GRFfaHPuzsBFQDPpHhxz\nsnNX1CreelA5gO/ww4uNxcysrVqWMMYD8yJifkS8DdwETMp57knAtIhYniWJacDJNYqzbuy9NzQ1\nuR3DzOpTLRPGnsCCivWF2ba2PirpMUm3SBrZxXORNFlSi6SWJUuWdEfchZHSvFJOGGZWj4pu9P5P\nYHREHEYqRVzb1QtkAwmbI6K5qamp2wPsaaUSPPccNHjuM7NeqJYJYxEwsmJ9RLbtHRGxrHxjJtKt\nX4/Ie25v5QF8ZlavapkwZgP7ShojaSBwBjC18gBJe1SsngY8lT2/CzhR0jBJw4ATs229XnMz9O/v\naikzqz816yUVEeslfYH0Rd8PmBIRcyVdDLRExFTgv0s6DVgPLAfOy85dLukSUtIBuDgiltcq1noy\neHDqIeWEYWb1RhFRdAzdprm5OVpaWooOY6v93d/BlCnw+uuptGFmViuS5kREc55ji270tnaUSrBm\nDTz+eNGRmJm1csKoQ74Dn5nVIyeMOjR6NOy2mxOGmdUXJ4w65DvwmVk9csKoQ5ddBrvuCs8/D4sX\np23Tp6ftZmZFccKoQ0ceCb/8ZXo+a1ZKFp/4RNpuZlYUJ4w6NHEi3HRTen7eeXDqqXD++bDzzrBu\nXaGhmVkf5nEYdeyUU+DOO2HgQHj77bRtu+3SwL7mZjjiiPR44IEer2FmW6Yr4zD8NVOnpk+Hlhb4\nxjfgiivg+99PSWHOnLT92mvh8svTsYMGbZ5EDjjAScTMupe/UupQuc3i5ptT9dTEia3r3/9+Ombj\nRnj22ZRAyknkmmvgxz9O+wcNgnHjWhPIEUekJNKvX2Fvy8wanKuk6tBll6UG7okTW7dNnw6zZ8NX\nv1r9vA0bNk8iDz8Mq1en/YMHb55E9t/fScSsL+tKlZQTRi+3YQM880xrApkzJyWRNWvS/iFDNk8i\n++3XfhLZ0kRmZvXLCcM6tGEDPP305klk7dq0f+jQlETKCaScRGbM2LSqrG3VmZk1HicM67L161MS\nKSeQchJIHnSoAAAMBklEQVR58820f/vtUxLZdVe46y44+2z41a+cLMwanROGdYv16+Gpp1qTSEsL\nPPpoaxLp3z+VQsaNS8vYsXDooanrr5k1hrpJGJJOBn5IuoHS1RHxnTb7vwxcQLqB0hLgbyLixWzf\nBqA8wfdLEXFaZ6/nhFF706bBJz8Jf/VX6fm++8ILL8DKlWl/v36pN1ZlEhk7Ng06NLP6UxfjMCT1\nAy4HTgAWArMlTY2IJysOexhojog1kj4PXAZ8Mtu3NiLG1io+67rp0+Gss+DWWzdtw7jtNhgzJlVh\nPfJIepw+HX7xi9ZzR41qTSDlZDJyZJpo0cwaQy3HYYwH5kXEfABJNwGTgHcSRkRMrzh+FnB2DeOx\nrTR79qZtFhMnpvXZs+H442HvveGjH209fvHilEDKSeSRR2DqVCgXanfeubUEUk4i++/vAYdm9apm\nVVKSPgacHBEXZOvnAEdFxBeqHP9j4C8R8c/Z+nrgEVJ11Xci4jdVzpsMTAYYNWrUES+++GK3vxfr\nPqtXw2OPbZpEHn+8tV1ku+1SO0g5iYwdC4cdlrr/mln3q4sqqa6QdDbQDLyvYvNeEbFI0t7AHyQ9\nHhHPtz03Iq4EroTUhtEjAdsWGzIk3eujfFdBSI3rzzyTEkg5idxyC1x1VdovpZJHZRIZNw6amtJ+\njw8x6xm1TBiLgJEV6yOybZuQ9AHgfwLvi4i3ytsjYlH2OF/SPcA4YLOEYY2vf384+OC0nJ1VSkbA\nggWbJpGZM1tn8QXYc8+UPIYNg0svhZ/8BM48c9PxImbWfWpZJdUfeBY4npQoZgNnRcTcimPGAbeQ\nqq6eq9g+DFgTEW9JGg7cD0xq02C+GfeS6v2WL9+0Ouvhh9P4kQ0b0v5tsgn7Dz44VW2NGJEa1ysf\nm5pajzPr6+qiSioi1kv6AnAXqVvtlIiYK+lioCUipgLfBYYCv1LqLlPuPnsg8DNJG0n37PhOZ8nC\n+oadd4b3vz8tZWvXwhNPwLe/DbffDocckkod998Pixa1Tg1fNnBgKp20l0ycVMyq88A96xXKXXw/\n//k0HXy5N9fGjbB0aareWriw9bHt81okFbetWCOoixKGWU/paDr4iRPTdCa77prmxGpP26RSmUwW\nLEi3ye0oqZQTSNukss8+1efeMmtELmFYw+uJX/LlpNI2mXRWUunfP527++6wZEmKc8yYNDfX0KFp\nyfN86NDuG5/iko9VqpupQXqaE4YVKSIlhbbJ5PbbUxvLyJGpDWbVqrS88UbrNPN5bLdd1xNN+Xnl\ntsceg89+Fn75yzTg0rMO921OGGZ1olrbStmGDWkwY2USafu8vW0d7W9byunIwIFpHMy73w2jR6eE\ntssuHT/utJNvutWbuA3DrA501rYC6Yt3hx3S0l3efjsloc4SztSpcN99qfvxyJGwbBnMn5+6Lq9Y\n0TqFS1tSShqdJZa2jzvu2PHcYa4qq39OGGY10tHcW7Ws+hk4MC3DhlU/Zvp0+Jd/gW98I5V8fvjD\nzUs+r7+eksjy5R0/LlmSxsIsX57OqaZfvxRTtYSydClMmgTf/CYcdxzMnQtf/jJce21qByqqm7MT\nWStXSZn1MW1LPt3ZhrF+fSqd5Ek0lY+rVnV+7SFDUltMeSm3zWzJtiFD8iegWn5e9cBVUmZWVS1L\nPv37p/Ep5Xm+8nrrrdZE893vplLF6afDSSdtWrVWuaxaBa+8As8+27pt9er8r9mVBHTOOan0c8op\n6Y6Tl1ySSkvPPguDB7cu227bc1P2F1HycQnDzOpGZ50EOrNxY2v7TXsJJs+2yu1dSUCQkkVlAunq\nMmhQvuMGDOi+ko9LGGbWcPJ0EujMNtu0lg66Q7kX2513wuc+Bx/5SJpJ+etfT3ebXLNm82Xt2va3\nv/YavPzy5tvXret6XP37p8TRrx984ANpAOnatbWvJnPCMLO6UFQngY7065fuZ3/hha13mvzUp1oT\n2emnb/1rrFtXPcnkWe6/P43z+cY3av85uUrKzKwD9dxLamur8MAD94oOw8ys5opow/AEzmZmDaij\nKrxacQnDzKwPcwnDzMy6XU0ThqSTJT0jaZ6kf2pn/7aSfpntf0DS6Ip9X8u2PyPppFrGaWZmnatZ\nwpDUD7gcOAU4CDhT0kFtDvsMsCIi3g38APiX7NyDgDOAg4GTgZ9k1zMzs4LUsoQxHpgXEfMj4m3g\nJmBSm2MmAddmz28Bjle6ufck4KaIeCsi/gzMy65nZmYFqWXC2BNYULG+MNvW7jERsR54Hdgl57kA\nSJosqUVSy5IlS7opdDMza6vhR3pHxJXAlQCSlkh6cQsvNRxY2m2BdR/H1TWOq2scV9f0xrj2yntg\nLRPGImBkxfqIbFt7xyyU1B/YEViW89zNREQX58hsJaklb9eynuS4usZxdY3j6pq+Hlctq6RmA/tK\nGiNpIKkRe2qbY6YC52bPPwb8IdLAkKnAGVkvqjHAvsCDNYzVzMw6UbMSRkSsl/QF4C6gHzAlIuZK\nuhhoiYipwM+B/5A0D1hOSipkx90MPAmsBy6MiA21itXMzDpX0zaMiLgDuKPNtm9WPH8T+HiVcy8F\nLq1lfG1c2YOv1RWOq2scV9c4rq7p03H1qqlBzMysdjw1iJmZ5eKEYWZmufT5hCFpiqTFkp4oOpYy\nSSMlTZf0pKS5kr5YdEwAkraT9KCkR7O4vl10TJUk9ZP0sKTfFR1LJUkvSHpc0iOS6mY6ZUk7SbpF\n0tOSnpJUqoOY9s8+p/KyUtKXio4LQNLfZ//un5B0o6Ttio4JQNIXs5jm1vqz6vNtGJKOBVYB10XE\nIUXHAyBpD2CPiHhI0vbAHODDEfFkwXEJGBIRqyQNAP4IfDEiZhUZV5mkLwPNwA4R8cGi4ymT9ALQ\nHBF1NeBL0rXAfRFxddb1fXBEvFZ0XGXZ/HGLgKMiYksH5HZXLHuS/r0fFBFrs16cd0TENQXHdQhp\n2qXxwNvAncDnImJeLV6vz5cwIuJeUpfeuhERr0TEQ9nzN4CnqDI1Sk+KZFW2OiBb6uIXh6QRwF8D\nVxcdSyOQtCNwLKlrOxHxdj0li8zxwPNFJ4sK/YFB2SDjwcDLBccDcCDwQESsyaZXmgF8pFYv1ucT\nRr3LpnwfBzxQbCRJVu3zCLAYmBYRdREX8G/AV4GNRQfSjgDuljRH0uSig8mMAZYA/zerxrta0pCi\ng2rjDODGooMAiIhFwPeAl4BXgNcj4u5iowLgCeCvJO0iaTBwKpvOktGtnDDqmKShwK3AlyJiZdHx\nAETEhogYS5quZXxWJC6UpA8CiyNiTtGxVHFMRLyHNNX/hVk1aNH6A+8BroiIccBqYLN71hQlqyI7\nDfhV0bEASBpGmkV7DPAuYIiks4uNCiLiKdJtIe4mVUc9AtRskLMTRp3K2ghuBa6PiNuKjqetrPpi\nOul+JUV7L3Ba1lZwE/B+Sb8oNqRW2a9TImIx8GvqY6r+hcDCihLiLaQEUi9OAR6KiFeLDiTzAeDP\nEbEkItYBtwETCo4JgIj4eUQcERHHAiuAZ2v1Wk4YdShrXP458FRE/GvR8ZRJapK0U/Z8EHAC8HSx\nUUFEfC0iRkTEaFI1xh8iovBffwCShmQdF8iqfE4kVSMUKiL+AiyQtH+26XjSVDz14kzqpDoq8xJw\ntKTB2f/P40lti4WTtGv2OIrUfnFDrV6r4ac331qSbgSOA4ZLWghcFBE/LzYq3gucAzyetRcAfD2b\naqVIewDXZr1XtgFujoi66sJah3YDfp2+Y+gP3BARdxYb0jv+Drg+q/6ZD5xfcDzAO4n1BOCzRcdS\nFhEPSLoFeIg0v93D1M80IbdK2gVYR5p3r2adF/p8t1ozM8vHVVJmZpaLE4aZmeXihGFmZrk4YZiZ\nWS5OGGZmlosThlkNSRpdTzMhm20NJwwzM8vFCcOsh0jaO5vo78iiYzHbEn1+pLdZT8im4LgJOC8i\nHi06HrMt4YRhVntNwG+BjxR9EyyzreEqKbPae500ed0xRQditjVcwjCrvbeB04G7JK2KiJrNJmpW\nS04YZj0gIlZnN3qaliWNqUXHZNZVnq3WzMxycRuGmZnl4oRhZma5OGGYmVkuThhmZpaLE4aZmeXi\nhGFmZrk4YZiZWS7/H5rYADrNfZ5/AAAAAElFTkSuQmCC\n", 13 | "text/plain": [ 14 | "" 15 | ] 16 | }, 17 | "metadata": {}, 18 | "output_type": "display_data" 19 | } 20 | ], 21 | "source": [ 22 | "import numpy as np \n", 23 | "from sklearn.cluster import KMeans \n", 24 | "from scipy.spatial.distance import cdist \n", 25 | "import matplotlib.pyplot as plt \n", 26 | "\n", 27 | "c1x = np.random.uniform(0.5, 1.5, (1, 10)) \n", 28 | "c1y = np.random.uniform(0.5, 1.5, (1, 10)) \n", 29 | "c2x = np.random.uniform(3.5, 4.5, (1, 10)) \n", 30 | "c2y = np.random.uniform(3.5, 4.5, (1, 10)) \n", 31 | "x = np.hstack((c1x, c2x)) \n", 32 | "y = np.hstack((c1y, c2y)) \n", 33 | "X = np.vstack((x, y)).T \n", 34 | "\n", 35 | "K = range(1, 10) \n", 36 | "meanDispersions = [] \n", 37 | "for k in K: \n", 38 | " kmeans = KMeans(n_clusters=k) \n", 39 | " kmeans.fit(X) \n", 40 | " meanDispersions.append(sum(np.min(cdist(X, kmeans.cluster_centers_, 'euclidean'), axis=1)) / X.shape[0]) \n", 41 | "\n", 42 | "plt.plot(K, meanDispersions, 'bx-') \n", 43 | "plt.xlabel('k') \n", 44 | "plt.ylabel('Average Dispersion') \n", 45 | "plt.title('Selecting k with the Elbow Method') \n", 46 | "plt.show()" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [] 57 | } 58 | ], 59 | "metadata": { 60 | "kernelspec": { 61 | "display_name": "Python 2", 62 | "language": "python", 63 | "name": "python2" 64 | }, 65 | "language_info": { 66 | "codemirror_mode": { 67 | "name": "ipython", 68 | "version": 2 69 | }, 70 | "file_extension": ".py", 71 | "mimetype": "text/x-python", 72 | "name": "python", 73 | "nbconvert_exporter": "python", 74 | "pygments_lexer": "ipython2", 75 | "version": "2.7.12" 76 | } 77 | }, 78 | "nbformat": 4, 79 | "nbformat_minor": 2 80 | } 81 | -------------------------------------------------------------------------------- /chapter13/ed2-ch13-s2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "from sklearn.cluster import KMeans\n", 13 | "from sklearn import metrics\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "\n", 16 | "plt.subplot(3, 2, 1)\n", 17 | "x1 = np.array([1, 2, 3, 1, 5, 6, 5, 5, 6, 7, 8, 9, 7, 9])\n", 18 | "x2 = np.array([1, 3, 2, 2, 8, 6, 7, 6, 7, 1, 2, 1, 1, 3])\n", 19 | "X = np.array(zip(x1, x2)).reshape(len(x1), 2)\n", 20 | "\n", 21 | "plt.xlim([0, 10])\n", 22 | "plt.ylim([0, 10])\n", 23 | "plt.title('Instances')\n", 24 | "plt.scatter(x1, x2)\n", 25 | "colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'b']\n", 26 | "markers = ['o', 's', 'D', 'v', '^', 'p', '*', '+']\n", 27 | "tests = [2, 3, 4, 5, 8]\n", 28 | "subplot_counter = 1\n", 29 | "for t in tests:\n", 30 | " subplot_counter += 1\n", 31 | " plt.subplot(3, 2, subplot_counter)\n", 32 | " kmeans_model = KMeans(n_clusters=t).fit(X)\n", 33 | " for i, l in enumerate(kmeans_model.labels_):\n", 34 | " plt.plot(x1[i], x2[i], color=colors[l], marker=markers[l], ls='None')\n", 35 | " plt.xlim([0, 10])\n", 36 | " plt.ylim([0, 10])\n", 37 | " plt.title('K = %s, Silhouette Coefficient = %.03f' % (\n", 38 | " t, metrics.silhouette_score(X, kmeans_model.labels_, metric='euclidean')))\n", 39 | "plt.show()" 40 | ] 41 | } 42 | ], 43 | "metadata": { 44 | "kernelspec": { 45 | "display_name": "Python 2", 46 | "language": "python", 47 | "name": "python2" 48 | }, 49 | "language_info": { 50 | "codemirror_mode": { 51 | "name": "ipython", 52 | "version": 2 53 | }, 54 | "file_extension": ".py", 55 | "mimetype": "text/x-python", 56 | "name": "python", 57 | "nbconvert_exporter": "python", 58 | "pygments_lexer": "ipython2", 59 | "version": "2.7.12" 60 | } 61 | }, 62 | "nbformat": 4, 63 | "nbformat_minor": 2 64 | } 65 | -------------------------------------------------------------------------------- /chapter13/ed2-ch13-s4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os\n", 12 | "import glob\n", 13 | "import numpy as np\n", 14 | "import mahotas as mh\n", 15 | "from mahotas.features import surf\n", 16 | "from sklearn.model_selection import train_test_split\n", 17 | "from sklearn.linear_model import LogisticRegression\n", 18 | "from sklearn.metrics import *\n", 19 | "from sklearn.cluster import MiniBatchKMeans" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 45, 25 | "metadata": { 26 | "collapsed": false 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "all_instance_filenames = []\n", 31 | "all_instance_targets = []\n", 32 | "\n", 33 | "for f in glob.glob('cats-and-dogs-img/*.jpg'):\n", 34 | " target = 1 if 'cat' in os.path.split(f)[1] else 0\n", 35 | " all_instance_filenames.append(f)\n", 36 | " all_instance_targets.append(target)\n", 37 | "\n", 38 | "surf_features = []\n", 39 | "for f in all_instance_filenames:\n", 40 | " image = mh.imread(f, as_grey=True)\n", 41 | " surf_features.append(surf.surf(image)[:, 5:])\n", 42 | "\n", 43 | "train_len = int(len(all_instance_filenames) * .60)\n", 44 | "X_train_surf_features = np.concatenate(surf_features[:train_len])\n", 45 | "X_test_surf_feautres = np.concatenate(surf_features[train_len:])\n", 46 | "y_train = all_instance_targets[:train_len]\n", 47 | "y_test = all_instance_targets[train_len:]" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 46, 53 | "metadata": { 54 | "collapsed": false 55 | }, 56 | "outputs": [ 57 | { 58 | "data": { 59 | "text/plain": [ 60 | "array([[ 0.6056733 , 2.70938102, 1.22470857, ..., 0.40240388,\n", 61 | " 1.36376676, 0.91444056],\n", 62 | " [ 1.17256268, 2.15959095, 1.80512123, ..., 1.25544983,\n", 63 | " 2.14938607, 0.92937648],\n", 64 | " [ 4.05884662, 1.87604644, 5.28951557, ..., 4.32944494,\n", 65 | " 5.41296044, 3.89081466],\n", 66 | " ..., \n", 67 | " [ 0.6193189 , 2.92864247, 1.1535589 , ..., 0.36941273,\n", 68 | " 1.18161751, 1.09170526],\n", 69 | " [ 1.68619226, 3.95702531, 0.93771461, ..., 1.37208184,\n", 70 | " 0.80844426, 2.08232525],\n", 71 | " [ 1.09366926, 1.87174791, 1.99117652, ..., 1.12510896,\n", 72 | " 2.15558684, 1.0511277 ]])" 73 | ] 74 | }, 75 | "execution_count": 46, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "n_clusters = 300\n", 82 | "estimator = MiniBatchKMeans(n_clusters=n_clusters)\n", 83 | "estimator.fit_transform(X_train_surf_features)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 47, 89 | "metadata": { 90 | "collapsed": false 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "X_train = []\n", 95 | "for instance in surf_features[:train_len]:\n", 96 | " clusters = estimator.predict(instance)\n", 97 | " features = np.bincount(clusters)\n", 98 | " if len(features) < n_clusters:\n", 99 | " features = np.append(features, np.zeros((1, n_clusters-len(features))))\n", 100 | " X_train.append(features)\n", 101 | "\n", 102 | "X_test = []\n", 103 | "for instance in surf_features[train_len:]:\n", 104 | " clusters = estimator.predict(instance)\n", 105 | " features = np.bincount(clusters)\n", 106 | " if len(features) < n_clusters:\n", 107 | " features = np.append(features, np.zeros((1, n_clusters-len(features))))\n", 108 | " X_test.append(features)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 50, 114 | "metadata": { 115 | "collapsed": false 116 | }, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | " precision recall f1-score support\n", 123 | "\n", 124 | " 0 0.69 0.77 0.73 378\n", 125 | " 1 0.77 0.69 0.72 420\n", 126 | "\n", 127 | "avg / total 0.73 0.72 0.72 798\n", 128 | "\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "clf = LogisticRegression(C=0.001, penalty='l2')\n", 134 | "clf.fit(X_train, y_train)\n", 135 | "predictions = clf.predict(X_test)\n", 136 | "\n", 137 | "print(classification_report(y_test, predictions))" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "collapsed": true 145 | }, 146 | "outputs": [], 147 | "source": [] 148 | } 149 | ], 150 | "metadata": { 151 | "kernelspec": { 152 | "display_name": "Python 2", 153 | "language": "python", 154 | "name": "python2" 155 | }, 156 | "language_info": { 157 | "codemirror_mode": { 158 | "name": "ipython", 159 | "version": 2 160 | }, 161 | "file_extension": ".py", 162 | "mimetype": "text/x-python", 163 | "name": "python", 164 | "nbconvert_exporter": "python", 165 | "pygments_lexer": "ipython2", 166 | "version": "2.7.12" 167 | } 168 | }, 169 | "nbformat": 4, 170 | "nbformat_minor": 2 171 | } 172 | -------------------------------------------------------------------------------- /chapter13/tree.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Mastering-Machine-Learning-with-scikit-learn-Second-Edition/db39c2f407fbf515f67b368e0af5a7f042c0f0f7/chapter13/tree.jpg -------------------------------------------------------------------------------- /chapter14/ed2-ch14-s1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "[-0.99999998 -1.00000002]\n", 15 | "[[ 0.70710678 0.70710678]\n", 16 | " [ 0.70710678 0.70710678]]\n" 17 | ] 18 | } 19 | ], 20 | "source": [ 21 | "import numpy as np\n", 22 | "w, v = np.linalg.eig(np.array([[1, -2], [2, -3]]))\n", 23 | "print(w)\n", 24 | "print(v)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [] 35 | } 36 | ], 37 | "metadata": { 38 | "kernelspec": { 39 | "display_name": "Python 2", 40 | "language": "python", 41 | "name": "python2" 42 | }, 43 | "language_info": { 44 | "codemirror_mode": { 45 | "name": "ipython", 46 | "version": 2 47 | }, 48 | "file_extension": ".py", 49 | "mimetype": "text/x-python", 50 | "name": "python", 51 | "nbconvert_exporter": "python", 52 | "pygments_lexer": "ipython2", 53 | "version": "2.7.12" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 1 58 | } 59 | -------------------------------------------------------------------------------- /chapter14/ed2-ch14-s2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnX+QHdV1579n3swgxC4Sa7GBwGjHoqhs2GykXRTkKeMw\njojAbAJx4hRO7Vq4nBRLxXgGAbXFiCA9CTLjKn4KnHKESWwr643xZkMZJ2yBGWuCKQ2YUSy8YEzC\nTCkMxDFgI5EtrRnNvLN/3HeZfj3983X36+7X309V13v3vX7dVzZzvn3POfccUVUQQgipHj15T4AQ\nQkg+UAAIIaSiUAAIIaSiUAAIIaSiUAAIIaSiUAAIIaSiUAAIIaSiUAAIIaSiUAAIIaSi9OY9gSDW\nrVung4ODeU+DEEJKw+HDh99S1TOjnFtoARgcHMTMzEze0yCEkNIgIv8Q9Vy6gAghpKJQAAghpKJQ\nAAghpKJQAAghpKJQAAghpKJQAAghpKJQAAghpKJQAEhpmJvLewaEdBcUAFIKJiaA884zr4SQdKAA\nkMIzMQHccYd5f8cdFAFC0oICQAqNNf4nTpjxiRMUAULSggJACovb+FsoAoSkAwWAFJK5OWDnzpXG\n33LihPmegWFC2ocCQArJhg3A+DiwerX396tXm+83bOjsvAjpJigApLCMjQF/8AcrRWD1avP52Fg+\n8yKkW6AAkELjFoEyGf/p+WlMfHsC0/PTeU+FEE8K3RCGEGDZ2O/cWS7jv/XAViwsLaC/1o/J7ZMY\nGhjKe1qEtMAVACkFY2PA7Gw5jD8ATB2dwsLSApZ0CQtLC5g6OpX3lAhZAQWAlIYyBXyHB4fRX+tH\nTWror/VjeHA47ykRsgK6gAjJgKGBIUxun8TU0SkMDw7T/UMKCQWAkIwYGhii4SeFhi4gQgipKBQA\nQgipKBQAQgipKBQAQgipKBQAUhhY2I2QzkIBILkzN8eOX4TkQSoCICJ/KiJviMgLPt+LiNwvIq+I\nyPdE5D+mcV9Sfqzh373bjFnnn5DOkdYK4EsALg/4/iMAzm8e1wL4fEr3JSVmYgLYtcu8P3nSvLLZ\nCyGdIxUBUNWnAPwk4JSrABxQwzMA1orI2Wncm5QTa/wXF1d+RxEgpDN0KgZwDoB5x/i15mekgkxM\nAHv3eht/Czt+EZI9hQsCi8i1IjIjIjNvvvlm3tMhKWNbPf70p8HnrVrFjl+EZE2nBOB1AAOO8bnN\nz1agqg+q6mZV3XzmmWd2ZHKuCQSPSSLCWj0CQF+fcQ+VpfQzIWWlUwLwKIDtzWygDwA4rqo/7NC9\no1OvAzt2LBt9VTOu1/OcVdfh1+oRMMZ/zx4af0I6QVppoH8OYBrAz4nIayLyuyJynYhc1zzlMQBz\nAF4B8AUAv5/GfVNFFTh2DNi3b1kEduww42PHuBJIGbcI9Dbr0tL4E9I5RAts2DZv3qwzMzOdu6HT\n6FtGR4F77wVEOjePCjExYWIC4+PA1VfT509IUkTksKpujnQuBcCFKtDjWBg1GjT+GTM3R8NPSFrE\nEYDCZQHlil0BOHHGBEgm0PgTkg8UAIvT/TM6ap78R0dbYwKEENJFsCWkRQRYu7bV53/vvea7tWvp\nBqowdFGRboUxADeqrcbePSaVwhmkZnYSKQOMASTBbexp/CvLxISpSQSwNhHpTigAhHhgjf+JE2bM\nAnWkG6EAEOLCbfwtFAHSbVAASEcoS1VPW6zObfwtrFJKugkKAMmcMrV7DCtWt3p1OauUTs9PY+Lb\nE5ien857KqRAMA2UZIo7kAoUL5vGneZp5+d2A61ebeoXFW3+YUzPT2Prga1YWFpAf60fk9snMTQw\nlPe0SAHgCoBkRhkCqX6rE3exujIb//pUHe8uvoslXcLC0gKmjk7lPS1SELgCIJkQFkgF8jWmc3PA\nww8Hr07s+507y2v8tx7YineX3kUDDfSgB/21fgwPDuc9NVIQuAIgqVP0QKp96t+1K3x1cvXVwOxs\n+Yw/AEwdncLC0gIa2kCP9ODSDZfS/UNaoACQ1ClaINUpNBMTwO7d5r27J7FbBKxQPPxw+/fLk+HB\nYfTX+lGTGk6pnYL6cJ3Gn7SiqoU9LrzwQiXlZXxcdfVqVVNPwxyrV5vPOzkHwLyOj6uuWtU6H7/j\nppuW5x5nzs77FYFDrx7S8afG9dCrh/KeCukQAGY0oo3N3cgHHRSA8uMUgTyMv713X59qb2+44V+9\nWnXbtvaEK89/KyGWOALAIDDJlLwCqe4g9MmT4b9ZtQq4+GLg6afjB6/9Mp78ziekCLAaKOkInSyp\n7JeBFERvL3DDDcBdd4WfOzvb+m8Jul9Z00dJeWE1UFI4OhnwDcpActPXZ1737gXuvDN+8LroGU+E\nBEEBIF1FWAZSX5952gfMOXv2tKZ5ujeAWfye5IuW8URIHBgDIF1HWCkHwDsmYd1U7t+HuXG6rXQE\nqRBRo8V5HB3PAmo0gsekVARl5czOrjzXnb4ZN6WzCFlATPskYBpoG+zerTo6umz0Gw0z3r27c3Mg\nqRPFiMcRCr/P4twvKw69ekhPveNUre2p6al3nEoRqChxBIAxAMCkeh87BuzbB+zYYcY7dpjxsWNm\nTErJ2FhwKYewgnVu331Yaeuw+7VLlCCyLf0QVvTtwcMP4rI/uwwPHn4w3UmS8hFVKfI4OroCsE/8\nzt0/zhUB6Tq8dioHbfzKy8UTdVURZQWwf2a/oo73jv0z+zOaNckLxFgBcB+AE1Wgx7EoajTYFL5L\nmZszT/Jh2Jx/r1x/G+S9+urssnyc940SVJ6en8bU0SkMDw571v257M8uwxNzT7w33rZhGx7/xONZ\nTJ3kRLX3AbgFLarAWbePE+sOIl1HnPTNoNLWu3Zl1+2snX4KQwNDGPvQmG/Rt9+64LcCxwC7h1WK\nqEuFoAPA5QBeBvAKgFs8vv8kgDcBHGkevxflurFdQO0Gcp3uH/t795h0JWEF62ZnvV1E7mPVqnAX\nTVDwOMq80iqot39mv247sM3T/cNAcvlBJ7OAANQAzALYAKAfwPMALnCd80kAn4t77VgC0I4Rd362\ne7fqyAizgCpImG8/yBhHNcxxsoOiik4cQYnK+FPjWttTU9ShtT01HX+KFe3KRqcFYAjA447xGIAx\n1znZC4BqvECu12phZKTV4PPJvzKEGegkItBO8DjLFUAQUVYA3GtQbDotAB8D8JBj/Am3sW8KwA8B\nfA/AXwAYiHLttrKAGo3Wvxa/J3+6fIiLsCfqOP0E7LWS9ETIq59CkIGni6j4FFEA3gfglOb7/wrg\nWwHXuxbADICZ9evXx/uXx1kBMO2TtIFdKfT1hT+dp/EUn0XqaRLXEV1ExadwLiDX+TUAx6NcuyMx\ngLDVAiEuZmfTCx5HMcZp7i5Oei2uAIpPpwWgF8AcgPc7gsD/znXO2Y73HwXwTJRrZ5oFxBUAiYlX\n/aB2g8d9ffGMcBoB37RWE4wBFJuOCoC5H64A8HfNbKBbm5/tBXBl8/0EgBeb4nAQwL+Nct22YwBB\nY/sZYwAkBn5Pzu0Gj3t782uP2cl4Auk8HReArI5MS0Gw+FtXkklqZMiTc5TgsVfMoJ1rtUNeGUUk\nHygAUWH5564ii0qcaTw5j4/7N6R3B43Tnn+eewpIPlAASOXIIlsm6Mk5ys5f1egG+Kabsis0xxVA\ntaAAkEqRhX877SwePwPc26u6bVu2/vkoWUsM7HYPcQSg+4rBRUU1eExKQVChtrDCaUFs2ADcdJN/\nsTjA9Bd++OHwa9k+w70eDVgbDeDgwfTnb7H9C4DWXsfOyqLT89PYemArbjt4G7Ye2MoicBWimgJQ\nr7dW+lQ143rd+3yKRSGZmzO9fd3G03LihPk+SjMVNxMTwN13AxdfbAy9FydPxjPSXpXFGw1zHS+S\nzB9YFkdg+dX2RHaWlY7aSIZ0H9UTANV43b/iigXpGHFKOsfBaTgPHgQWF/3PtU/qN9/sf44VKj9D\n70e78wf8S0kDKzuWDQ8Oo7/Wj5rU0F/rx/DgcPwbknIS1VeUx5FZDCDqJjDuFygFacYAohZ98zpu\nvrm96/b2rkwTTRIDaCfoyxhA9wAGgSMQtQwEdwyXgjSygJIY/yj3DRKqtLKYmPZJKABhxDXqrBlU\nCoLy6MMMXlTDmaYIuM9Nax8A0z6rDQUgiLhuHT+xWFpKf24kMV6GPqphTboCiCMC7QpVVPxWG9fd\nQVdPt0MBCCNqGQin8d+0yRh953jXrmzmR1IjrmvFy3D29LQnBEHGvBMuGPe//bo7WMmzCsQRAI/M\n5ApQr5u/UZuXJwLce+/y2PndmjXAxo3AkSMm++fee4G/+RszvuSS1nNJoQjKhHFmwTixn9vf9faa\n/3sbjej3tTn2Qdk77WT2xMX+W3bubKZ/XjyFhYOt6Z5+zePzYnp+GlNHpzA8OFy4uXUj1RQAYKXR\ntuN63aSDrlkDHD8O3HOPGZ9yCnD//eYAgNHRVtEghSJsgxgQLgI7dwangHrh3GBVBMbGgKuvNoIz\nPW/SPReWFgqZ7mk3pNn5TW6fpAhkTPX2AQShurxH4NFHzeuFFxqjv7DQeu499xjjr7r8W+cryY00\nNoiNjZl8+aB9Bm6yMv7tbgSz2NXG0MAQJrdP4vYP357YuE7PT2Pi2xNt7xr2+j03pHWe6q4AvLCu\nIMAYf8C4epyvlgsvBH7914F33jFicOONy6uGtWu5USxH7AYxrxUAEM1FY6/jdgkFcfHFyY3/3Fzr\nvCYmjFiNjxdnVZHkSX16fhoHnj+ALx75IhYbiy2/txvSirpC6UqiBgvyOHIrBudO+7THyIgJBG/a\nZMbr1i0HhJ2v3CdQCDq9QSxpiqU7OyjtCqdptXNsty+wvb/URVGH5++5IS05YBA4Aaom2Ov3nQgw\nMwOcdRbw1lvmc+cqgbGBwuB+ek/iohkbA37yE+Cuu/zPsa4l63OPg7tuz9QU8PTT8QLYbtwBVS8X\nSztuoLAndb9Arr2/wrhJBbLi90MDQ/T7dxAKgJNGw7hy9u0DNm0yBt2+AsADDywbdmv83fhlE5Fc\ncGfCJHGj3Hmn+b/0859P5lpy45Wt9MQTK887cQLYu9e89/p3OA0vgBVumrRcLDaW4GXkg9xDzvvX\nemr41KZPYfvG7TT4OSJa4KDl5s2bdWZmpjM3c2f/nH468I1vGD//8ePAM88A3/nO8vlOYXAyMgLc\nd595v2MH4wEFwe1bT4JXhlG7qwu/bKUwZmdb/z1uw3vNxmvwhb/9ApZ0CTWp4fYP346xD41lnmY5\n8e0J3HbwthX3dc6TaZ7ZIiKHVXVzlHOZBQS0Zv/Y1M933jEG/p13zFP9M8+0/sauDoDlV8BkDN1w\ngzn8KoySjpNm3r2t7+9VW99JWPZOWLaSH729K/sQuN07ADwrfA4NDGHsQ2MYGhhKnMnjRVhlUef9\nSQGIGizI4+hoEDioPpDXd5s2qd56q+kNuH69CRBfdJH370lXElTSIevyE+6g8P98ZmWANyigmlZA\n2AsGcvMFLAXRJl5F35zGf9Uq79d161RPnmTRuAoyO7uyrEMa5SdWrzatIt1lor1EwIqNV50fP2Pc\nbiYPKT4UgHbwesofGTGf79q1/MRvUz/tsW6d6uIiy0ZXlKDUzTjpoV6iEbVCqX0Ocd8n6Ck/yxUA\nyRcKQFycxntkxBxOEbDj0VHzpO/863MbfzaOqQxuo+3V3D2uCLjdRuPjywbefYQ1kgl7yqerpjuh\nALSDs0Joo9EqAk7j77cCiFphlHQF7fruo/QlcI+97uVl/N0iwKf8ahJHAJgG6kS1NYe/x5EkdfIk\ncPbZJv9/3Trgn/5peTOYHff0tOb9u6/n9x0pFe2kbraTJuosAwEs33PVKuCnPw3//ews8KM+pl1W\nDaaBtovTWLt3A998s/krtsa+VjOv69YBp51mxkEVRtlYvitoJ3WzXePv3BkMNEs6A9i1K7hInbOZ\nfFnTLrNIUSUeRF0q5HHkUgsozJ9/8mTr+YuLya7H+EDpCGu56IwFtFPDJyiQ7HQRpVnrqEjQdZUM\ndDoGAOByAC8DeAXALR7fnwLg4eb3zwIYjHLd3IrBpe3PZ4ZQ1xFmfNtt+xi3n2/aBeOKQJQUVQaw\n/emoAACoAZgFsAFAP4DnAVzgOuf3Afxx8/3HATwc5dq5CYCqd2/gds5xfuf8a6bxLz1hxjduf+Ko\naZ9e+w78rllGwlYAXCEEE0cA0ogBXATgFVWdU9UFAF8FcJXrnKsAfLn5/i8AbBUpeATUz59viePX\nt985cf6WlBJbEgLw9vG7y0+4/foTEyvPj+rbd89jdrY4/QKSEta4ho1jUiSqUvgdAD4G4CHH+BMA\nPuc65wUA5zrGswDW+VzvWgAzAGbWr1+fnUwmIY5fnzGAridKg/c4/vpu9e2nBVcAwaDDLqBUBcB5\n5OoCCiOOX597BCpNXL+++zdBxj+K+HSSTvnmGQPwp9MCMATgccd4DMCY65zHAQw13/cCeAvNUtRB\nR25ZQEFj93dR/fpxrku6hnb9+qrhvv2i+f73z+zX3r292lPv4ZN5jsQRgDRiAM8BOF9E3i8i/TBB\n3kdd5zwK4Jrm+48B+FZzosUiS79+WEyBdCXt+vWBYN9+WDyh00zPT+P6x67HYmMRDTTw7uK79M2X\ngMQCoKqLAK6Hecp/CcDXVPVFEdkrIlc2T/sTAO8TkVcA3AjglqT3TR3V5Z4A1pDv2OFd09/53eio\n6SQ2Otr626D7BI1J1+HuH2CJskHMSxi8Ooh1SgT8NmhNHZ3Cki69N+7p6WFT9zIQdamQx5FLOegg\nv77TbbN793K1UOdvg/z6jAVUmjRy9tuJJ6RFlOqiPXt6tG9vn+6f2Z/dREggYDG4BPj59b2M98hI\nq/H2Ego7ZjYQ0ZV++zhB3CTxhFTmzuqipSCOAHRnLaB23Szq49dvNLzdQ/ff3+oeEvGPI+zZY1pL\nWldRT8+yC8nZSJ50NU6//sQEcN550V03SeIJaRDU7nF6fhoHnj+AV4+/ms3NSTZEVYo8jrZWAO26\nWcKe0JeWwtM+ozzlc0cw0WTuoDz3CXg95R969ZCecvspijoUdWj/7f2BqwCuFLIFlXUBJXWzhIlH\nkPF2/8ZLKFgTiGj0Ym9Rr5H3JrHxp8ZV6vKeAEhdfFtMchNX9lRXAFSTG1m/fP2glpFRhIIxAKLB\nQdzeXo2V11+UfQBxVgDsRZw91RYA1fTdLHFaRga5itrJHCJdQ5wev1GNelF2Ah969ZBe943r9Lpv\nXBfq/uEKIFuqLQBZuVmitIx0Gv/R0eVm8s7v/TKHSCWI2koyzK1TFMPvJop/P+gcxgeSU10ByNrN\n4vb5e60ydu1aKQabNi1/TpdP5UkqAu26frIWjaRP91wdpEMcAeiuNFARYO3a1tRKm3q5dm3yVMug\nlpE2XfT4cWDNGpPmae995Aiwdy/TPgmA5Z3Bq1YFn3fihGk/OTe3/Fm7JSDippy2Q5wyzV47ilnm\nOQeiKkUeR6IYQNA4Cc5VhXXvuF/dmURM+yQe2Cf5vr5oK4B20z/jZgy1u1KI+gTvdx5XAOmAyrqA\nssBLTGw8IGxvANM+SQizs9EMe5DbKChoHFc03O6luD75KOcHZQIxBpAcCkBaBO0LCHvCZ9oniUHQ\nU3qSVpFJ+gtfd0c2T+R80vcnDQGkAKRB1F29QU/4LP5GYhAU3A0LHPf2JhMNr+v3fXhcpZ5Nzn4a\nhq7bVgtpCSMFIC2i7uoNesJnIxgSA/dTvB37uYradRs5z/U979xDiltPVakX70m9G1cRaW2SiyMA\n3ZUFlDY2i8jJPfeYvw+bcTQyAtx9t3/GERvBkBg4C7nZzJ3LLjOvgMke6uvz/q27L0CUPgRzcybT\nyPYWaOG1IeDLk9Anb8eBS1c2Z+8UVckYCiq2lxlRlSKPo5ArgNNOU924UXVx0Xx//fWqZ52leskl\ny78hJCF+wdubb/ZfAfjFA8KygPLsMRBG1TKGGAMoigA4jf+WLaqf+Ywx/PYv433vU/2lX1oeb9pk\nsoIISUiYQd62zWT+xDHYUfoL51VhNAhmDMUnjgDQBeSH08WzZQvwwAPAL/8ysHGj+f7HPwaee868\n37QJOHzYbP4iJAHudo9uTpwAnn7a/KcYp8VkUH9h+73TXRSlXWXaeLl6gtwiQwNDGPuQmaBXm0oS\ngahKkceRuwtI1T/bx3ksLq78DSExiZq5Y4+bbkq/JHReFUbD2k0G1Q5q1xXUrSsIcAWQIiLewWAn\nZ50FLDUbYquashD1uve5qsFjUlnCOn5ZbOevu+4yT+nAyqd1Z/mIOIStFJLg11AeCA7q2id9ryB0\nu+UnpuensfXAVtx28DZsPbC1sqsHCkAUVIEbbmj9bN064NOfNu/femtZBHbsMDV/nK0iLX7tIv3E\nglQOv8wdi9s142Wwk9b9yaKlZJjBDWs36SccUTNn3Pc/8PyBrssiaofevCdQeKyRvv9+EwuYnwcW\nFozRr9WMCHzxi2bc2/yf0xZ8c1/n7bfNdQDzvRWL0dHl1FJSeawxd8cC/Pzy7tRRZ7E45/XyxOtJ\n3flEPzQwhMntk5g6OoXhweH3vrOGe2FpAf21fkxun4z0u7D7A0B/rf+963Yk5bKIRPUV5XEUIgag\nurIXwOJia0mIxcVW52yjsVz+2bkhbGTEZBT57RwmxIH1x2/bFs0vn3cmTxa++rQ2R3ndnzEApoFG\nJ6hVpNuof+YzKyuDOjuKOc91p45SDIiD2dnlI4i8c/mjGPh2DG6a+f7davDdUAA6hbszmLtdpBUB\nv+/d+wdYK4i4iJKV026xuFTnmWGv36oY7rSIIwAMAidBBDjjDLNXYGTE+Petj3/LFrM3wM399xuf\n/9KS2T9w5Ahw4YWmmUxQAJlUjqjNX8Kyh2zWUBbBXUuWZQyCsoBIQqIqRR5H4VcAFhsbcLt23HsH\ntmxpbQq/tLRylcCYANH2/PntxgDSWhmk/aRe9if/vOaPTrmAAPwrAN8E8PfN1zN8zlsCcKR5PBr1\n+qUSALexD4oBOA380lLr72j8K08Sf37c7l95bfwKo+y1fvKcfxwBSOoCugXApKqeD2CyOfbi/6nq\npuZxZcJ7Fgun68a6dmwf4E2bTPVQZ6XQM85o7S18442t13PuEyCVI7A6J7z7BDux+wiA8FIO7fYX\n7gRlr/ZZlvknFYCrAHy5+f7LAH4j4fXKRb1uDPh3v2uM/d13m/Hpp5vx6acv1weyImA3fam2Ckej\nYV737aMIVJg0/PlRdvO6aw65S0nnTS6lkVOkNPOPulTwOgAcc7wX59h13iKAGQDPAPiNqNcvtAso\nbnN4L9gxjPiQZU5/3imjUWEMoD2QZgwAwJMAXvA4rnIbfABv+1zjnObrBgBHAZwXcL9rm2Ixs379\n+sz/x0pEUJG4qMFcdgwjPsT150ehCCmjJFtSFYDAHwMvAzi7+f5sAC9H+M2XAHwsyvULvQKwuLN/\nGMwlKZJFkLYsK4CyUZQVSxwBSBoDeBTANc331wD4uvsEETlDRE5pvl8H4IMAvp/wvsVAm358L264\ngX58kpgsqnNGaRVZJoKKxXVyDmWsLppUAD4L4FdF5O8BXNocQ0Q2i8hDzXN+HsCMiDwP4CCAz6pq\n+QXAGv99+4Cf+RnzmW0Ws3Gj2fA1NEQRIInJYgNXERrApEFRDG9Zsn7cJKoGqqo/BrDV4/MZAL/X\nfH8IwL9Pcp/CYjuGPfss8KMfAZdcYo5nn817ZoSEYo39zp3lNP5AeJXRTmGzfspWXZTloNuhXjfl\nGmzJZ1VTzsGWgQCMMNx3H0s8k0IzNgZcfXW2ZSKypCiGN2pZ6qJBAYiLqjH++/aZ8b33mtz/I0da\nz6PxJyWhrMYfKJbhHRoYKo3ht4gW2Ee9efNmnZmZyXsaK3H6/y22sJtldNTsAmajeEJIBxGRw6q6\nOcq5tE7t4NUj2JZ+sKUg9u0zbqHdu/OZIyFN2u0PTLofCkA7eKV/rltnRODGG82Tv10RHD/OTCCS\nG0n7A5PuhgIQF6f7x1nD5623jNHft8/0Cj5yZLk3MGMBJAeKXOyNFAMGgeMiYtI/ncbduoPWrGmN\nA9D4k5zwK/YGlDPds5NMz08XIqjcCSgA7VCvm5WANe4ixu3jVdqZIkA6jNv4WygC4diNZTatdHL7\nZFeLAF1A7eI06tqs68/SziRnkvYTqDpl3dHbLhSANPBzC42Oms/dKwC3IFAgSEoUoT9wmSlNHf+U\n4D6ANHG6hbzGQOsuYpHloPLatcvNYghJiJcbqKz1fjpN2WMAcfYBMAaQJm5j7/Xk795F7Mwo8hIM\nQtrAGnkrAjT+0Snjjt52oQB0EmfG0L59y0LAdFGSAd1Q7I1kC11AeaDaWiKi0aDxJ5kxN0efv5uy\nu3mCoAsoa6xoWqPtHof91r2LmOmiJENo/FupWqpnEMwCiku9bhq92I5fqub90FB4ENdvFzHTRQnp\nGFVL9QyCK4A4qAJvv20avjibvtg+AFu2BAdyg3YRe6WLEkJSpyg9BIoAYwBxsU/8zuYvQLwGMFHS\nRQkhmcEYQPNcCkAbuIO4AAO5hJBCwH4AWWJXAG5sTKCd6wWNCSEkIygAcXC7f0ZGzAGYz+KKQL3e\nGvy1QWLuCCaEdAAGgeMgApxxhgn2btlifP6WZ58130V1A3FXMCEkZxgDaIck+wDc13H3FuauYEJI\nAhgELhPcFUwISREGgcuC367gAosyIaR7oADkBXcFE0JyhkHgvOCuYEJIzjAGkDfcFUwISZGOxQBE\n5LdF5EURaYiI7w1F5HIReVlEXhGRW5Lcs+sIayJDCCEZkTQG8AKA3wTwlN8JIlID8EcAPgLgAgC/\nIyIXJLwvIYSQhCSKAajqSwAgwU+tFwF4RVXnmud+FcBVAL6f5N6EEEKS0YksoHMAzDvGrzU/80RE\nrhWRGRGZefPNNzOfHCGEVJXQFYCIPAngLI+vblXVr6c9IVV9EMCDgAkCp319QgghhlABUNVLE97j\ndQADjvG5zc8IIYTkSCdcQM8BOF9E3i8i/QA+DuDRDtyXEEJIAEnTQD8qIq8BGALw1yLyePPznxWR\nxwBAVRdvHO1HAAAGqUlEQVQBXA/gcQAvAfiaqr6YbNoFgbX8CSElJmkW0CMAHvH4/B8BXOEYPwbg\nsST3Khz1uinnbHfx2tIOa9eynj8hpBSwFlA7OGv527o9tq7PsWNcCRBCSgFrAbWDs27Pvn3L9fxZ\ny58QUiJYCygJrOVPCCkY7AfQCVjLnxBScigA7dBotNbyX1piLX9CSOlgDCAuNvtnzRpj9O+5B7jx\nxuUxa/kTQgBMz09j6ugUhgeHMTQwlPd0PKEAxMGZ/eM0/s5xDxdVhFSd6flpbD2wFQtLC+iv9WNy\n+2QhRYACEAdm/xBCIjB1dAoLSwtY0iUsLC1g6uhUIQWAj6txcYqAhcafEOJgeHAY/bV+1KSG/lo/\nhgeH856SJxSAuDD7hxASwtDAECa3T+L2D99eWPcPQBdQPJw7fq3bx44BrgQIIe8xNDBUWMNvoQDE\nQcRk+Th9/tYdxOwfQkjJ4E7gdlBtNfbuMSGE5AR3AmeN29jT+BNCSggFgBBCKgoFgBBCKgoFIGvY\nNYwQUlAoAFlSr7fuEbBppOwYRggpABSArGDXMEJIweE+gKxg3SBCuo4yVPiMA/cBZA27hhHSFZSl\nwif3ARQF1g0ipGvwqvBZdigAWeGuG9RosGsYISWmLBU+48AYQFawbhAhXYWt8MkYQIfomhgA6wYR\nQjoEYwBFgnWDCCEFhQJACCEVJZEAiMhvi8iLItIQEd8lh4gcFZH/IyJHRKTkPh1CCOkOkgaBXwDw\nmwD2Rzj3w6r6VsL7EUIISYlEAqCqLwGA0K9NCCGlo1MxAAXwhIgcFpFrO3RPQgghAYSuAETkSQBn\neXx1q6p+PeJ9LlbV10XkXwP4poj8QFWf8rnftQCuBYD169dHvDwhhJC4hAqAql6a9Caq+nrz9Q0R\neQTARQA8BUBVHwTwIGD2ASS9NyGEEG8y3wksIqcB6FHVf26+3wZgb5TfHj58+C0R+YdMJwisA1CW\n4DTnmg1lmitQrvlyrtkQNNd/E/UiiXYCi8hHATwA4EwAxwAcUdXLRORnATykqleIyAYAjzR/0gvg\nf6jqH7Z905QRkZmou+byhnPNhjLNFSjXfDnXbEhrrkmzgB7BsnF3fv6PAK5ovp8DsDHJfQghhKQP\ndwITQkhFoQA0A84lgXPNhjLNFSjXfDnXbEhlroWuBkoIISQ7uAIghJCKQgEAICK3i8j3msXqnmhm\nMRUSEblTRH7QnO8jIrI27zn5EbVYYJ6IyOUi8rKIvCIit+Q9Hz9E5E9F5A0ReSHvuYQhIgMiclBE\nvt/8/3807zn5ISKrROQ7IvJ8c6578p5TGCJSE5HvishfJb0WBcBwp6r+oqpuAvBXAHblPaEAvgng\nF1T1FwH8HYCxnOcThC0W6LnpL29EpAbgjwB8BMAFAH5HRC7Id1a+fAnA5XlPIiKLAG5S1QsAfADA\npwv8v+u7AH5FVTcC2ATgchH5QM5zCmMUwEtpXIgCAEBV33EMT4OpXVRIVPUJVV1sDp8BcG6e8wlC\nVV9S1ZfznkcAFwF4RVXnVHUBwFcBXJXznDxplk75Sd7ziIKq/lBV/7b5/p9hjNU5+c7KGzX83+aw\nr3kU9u9fRM4F8J8APJTG9SgATUTkD0VkHsB/RrFXAE4+BeB/5z2JEnMOgHnH+DUU1FCVFREZBPAf\nADyb70z8abpUjgB4A8A3VbWwcwVwH4D/BqCRxsUqIwAi8qSIvOBxXAUAqnqrqg4A+AqA64s81+Y5\nt8Istb+S30yjzZVUExH5FwD+F4AbXKvsQqGqS03377kALhKRX8h7Tl6IyK8BeENVD6d1zcxrARWF\nGEXtvgLgMQC7M5xOIGFzFZFPAvg1AFs15zzeNIoF5sjrAAYc43Obn5GEiEgfjPH/iqr+Zd7ziYKq\nHhORgzCxliIG2z8I4EoRuQLAKgCni8h/V9X/0u4FK7MCCEJEzncMrwLwg7zmEoaIXA6zBLxSVU/k\nPZ+S8xyA80Xk/SLSD+DjAB7NeU6lR0yHqD8B8JKq3pP3fIIQkTNtJp2InArgV1HQv39VHVPVc1V1\nEOa/1W8lMf4ABcDy2abb4nsw1UoLm7YG4HMA/iVMX4UjIvLHeU/IDxH5qIi8BmAIwF+LyON5z8lJ\nM5h+PYDHYQKVX1PVF/OdlTci8ucApgH8nIi8JiK/m/ecAvgggE8A+JXmf6NHmk+tReRsAAebf/vP\nwcQAEqdXlgXuBCaEkIrCFQAhhFQUCgAhhFQUCgAhhFQUCgAhhFQUCgAhhFQUCgAhhFQUCgAhhFQU\nCgAhhFSU/w948Nj/SOOrKgAAAABJRU5ErkJggg==\n", 13 | "text/plain": [ 14 | "" 15 | ] 16 | }, 17 | "metadata": {}, 18 | "output_type": "display_data" 19 | } 20 | ], 21 | "source": [ 22 | "import matplotlib.pyplot as plt\n", 23 | "from sklearn.decomposition import PCA\n", 24 | "from sklearn.datasets import load_iris\n", 25 | "\n", 26 | "data = load_iris()\n", 27 | "y = data.target\n", 28 | "X = data.data\n", 29 | "pca = PCA(n_components=2)\n", 30 | "reduced_X = pca.fit_transform(X)\n", 31 | "\n", 32 | "red_x, red_y = [], []\n", 33 | "blue_x, blue_y = [], []\n", 34 | "green_x, green_y = [], []\n", 35 | "for i in range(len(reduced_X)):\n", 36 | " if y[i] == 0:\n", 37 | " red_x.append(reduced_X[i][0])\n", 38 | " red_y.append(reduced_X[i][1])\n", 39 | " elif y[i] == 1:\n", 40 | " blue_x.append(reduced_X[i][0])\n", 41 | " blue_y.append(reduced_X[i][1])\n", 42 | " else:\n", 43 | " green_x.append(reduced_X[i][0])\n", 44 | " green_y.append(reduced_X[i][1])\n", 45 | "plt.scatter(red_x, red_y, c='r', marker='x')\n", 46 | "plt.scatter(blue_x, blue_y, c='b', marker='D')\n", 47 | "plt.scatter(green_x, green_y, c='g', marker='.')\n", 48 | "plt.show()" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [] 59 | } 60 | ], 61 | "metadata": { 62 | "kernelspec": { 63 | "display_name": "Python 2", 64 | "language": "python", 65 | "name": "python2" 66 | }, 67 | "language_info": { 68 | "codemirror_mode": { 69 | "name": "ipython", 70 | "version": 2 71 | }, 72 | "file_extension": ".py", 73 | "mimetype": "text/x-python", 74 | "name": "python", 75 | "nbconvert_exporter": "python", 76 | "pygments_lexer": "ipython2", 77 | "version": "2.7.12" 78 | } 79 | }, 80 | "nbformat": 4, 81 | "nbformat_minor": 1 82 | } 83 | -------------------------------------------------------------------------------- /chapter14/ed2-ch14-s3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os\n", 12 | "import numpy as np\n", 13 | "from sklearn.cross_validation import train_test_split\n", 14 | "from sklearn.cross_validation import cross_val_score\n", 15 | "from sklearn.preprocessing import scale\n", 16 | "from sklearn.decomposition import PCA\n", 17 | "from sklearn.linear_model import LogisticRegression\n", 18 | "from sklearn.metrics import classification_report\n", 19 | "from PIL import Image\n", 20 | "\n", 21 | "X = []\n", 22 | "y = []\n", 23 | "\n", 24 | "for dirpath, _, filenames in os.walk('att-faces/orl_faces'):\n", 25 | " for filename in filenames:\n", 26 | " if filename[-3:] == 'pgm':\n", 27 | " img = Image.open(os.path.join(dirpath, filename)).convert('L')\n", 28 | " arr = np.array(img).reshape(10304).astype('float32') / 255.\n", 29 | " X.append(arr)\n", 30 | " y.append(dirpath)\n", 31 | "\n", 32 | "X = scale(X)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 7, 38 | "metadata": { 39 | "collapsed": false 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=11)\n", 44 | "pca = PCA(n_components=150)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 8, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "(300, 10304)\n", 59 | "(300, 150)\n", 60 | "Cross validation accuracy: 0.807660834984\n", 61 | " precision recall f1-score support\n", 62 | "\n", 63 | "att-faces/orl_faces/s1 0.50 1.00 0.67 1\n", 64 | "att-faces/orl_faces/s10 1.00 1.00 1.00 3\n", 65 | "att-faces/orl_faces/s11 1.00 0.67 0.80 3\n", 66 | "att-faces/orl_faces/s12 1.00 1.00 1.00 5\n", 67 | "att-faces/orl_faces/s13 0.00 0.00 0.00 0\n", 68 | "att-faces/orl_faces/s14 1.00 1.00 1.00 4\n", 69 | "att-faces/orl_faces/s16 1.00 1.00 1.00 2\n", 70 | "att-faces/orl_faces/s17 0.67 1.00 0.80 2\n", 71 | "att-faces/orl_faces/s18 1.00 1.00 1.00 2\n", 72 | "att-faces/orl_faces/s19 0.83 1.00 0.91 5\n", 73 | "att-faces/orl_faces/s2 0.33 1.00 0.50 1\n", 74 | "att-faces/orl_faces/s20 1.00 1.00 1.00 2\n", 75 | "att-faces/orl_faces/s21 1.00 1.00 1.00 2\n", 76 | "att-faces/orl_faces/s22 1.00 1.00 1.00 1\n", 77 | "att-faces/orl_faces/s23 0.67 1.00 0.80 2\n", 78 | "att-faces/orl_faces/s24 1.00 1.00 1.00 3\n", 79 | "att-faces/orl_faces/s25 1.00 1.00 1.00 2\n", 80 | "att-faces/orl_faces/s26 1.00 1.00 1.00 3\n", 81 | "att-faces/orl_faces/s27 1.00 1.00 1.00 1\n", 82 | "att-faces/orl_faces/s28 1.00 0.50 0.67 4\n", 83 | "att-faces/orl_faces/s29 1.00 1.00 1.00 5\n", 84 | "att-faces/orl_faces/s3 1.00 1.00 1.00 3\n", 85 | "att-faces/orl_faces/s30 1.00 0.67 0.80 3\n", 86 | "att-faces/orl_faces/s31 0.75 1.00 0.86 3\n", 87 | "att-faces/orl_faces/s32 1.00 1.00 1.00 3\n", 88 | "att-faces/orl_faces/s34 1.00 0.83 0.91 6\n", 89 | "att-faces/orl_faces/s35 0.50 0.33 0.40 3\n", 90 | "att-faces/orl_faces/s36 1.00 1.00 1.00 3\n", 91 | "att-faces/orl_faces/s37 1.00 0.75 0.86 4\n", 92 | "att-faces/orl_faces/s38 1.00 1.00 1.00 3\n", 93 | "att-faces/orl_faces/s39 1.00 1.00 1.00 2\n", 94 | "att-faces/orl_faces/s4 1.00 0.75 0.86 4\n", 95 | "att-faces/orl_faces/s40 0.00 0.00 0.00 0\n", 96 | "att-faces/orl_faces/s5 1.00 0.67 0.80 3\n", 97 | "att-faces/orl_faces/s6 1.00 1.00 1.00 1\n", 98 | "att-faces/orl_faces/s7 1.00 1.00 1.00 3\n", 99 | "att-faces/orl_faces/s8 1.00 1.00 1.00 2\n", 100 | "att-faces/orl_faces/s9 1.00 1.00 1.00 1\n", 101 | "\n", 102 | "avg / total 0.94 0.90 0.91 100\n", 103 | "\n" 104 | ] 105 | }, 106 | { 107 | "name": "stderr", 108 | "output_type": "stream", 109 | "text": [ 110 | "/home/gavin/classpass-activity-tagger/venv/local/lib/python2.7/site-packages/sklearn/metrics/classification.py:1076: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples.\n", 111 | " 'recall', 'true', average, warn_for)\n" 112 | ] 113 | } 114 | ], 115 | "source": [ 116 | "X_train_reduced = pca.fit_transform(X_train)\n", 117 | "X_test_reduced = pca.transform(X_test)\n", 118 | "print(X_train.shape)\n", 119 | "print(X_train_reduced.shape)\n", 120 | "classifier = LogisticRegression()\n", 121 | "accuracies = cross_val_score(classifier, X_train_reduced, y_train)\n", 122 | "print('Cross validation accuracy: %s' % np.mean(accuracies))\n", 123 | "classifier.fit(X_train_reduced, y_train)\n", 124 | "predictions = classifier.predict(X_test_reduced)\n", 125 | "print(classification_report(y_test, predictions))" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": { 132 | "collapsed": true 133 | }, 134 | "outputs": [], 135 | "source": [] 136 | } 137 | ], 138 | "metadata": { 139 | "kernelspec": { 140 | "display_name": "Python 2", 141 | "language": "python", 142 | "name": "python2" 143 | }, 144 | "language_info": { 145 | "codemirror_mode": { 146 | "name": "ipython", 147 | "version": 2 148 | }, 149 | "file_extension": ".py", 150 | "mimetype": "text/x-python", 151 | "name": "python", 152 | "nbconvert_exporter": "python", 153 | "pygments_lexer": "ipython2", 154 | "version": "2.7.12" 155 | } 156 | }, 157 | "nbformat": 4, 158 | "nbformat_minor": 1 159 | } 160 | --------------------------------------------------------------------------------