├── Chapter01
    ├── 1 simple example.ipynb
    ├── 2 European Option Pricing.ipynb
    ├── 3  Monte Carlo Pricing.ipynb
    ├── 4  Coin Flips.ipynb
    ├── 5 Insurance Pricing.ipynb
    ├── 6 Marketing Effectiveness.ipynb
    └── grapeJuice.csv
├── Chapter02
    ├── 1 web scraping.ipynb
    ├── 2 numpy arrays.ipynb
    ├── 3 numpy histogram.ipynb
    ├── 4 Pandas Titanic.ipynb
    ├── 5 SciPy.ipynb
    ├── 6 SciPy FFT.ipynb
    ├── 7 scipy linear algebra.ipynb
    └── 8 pandas dataframes.ipynb
├── Chapter03
    ├── 1 SciKit Predict.ipynb
    ├── 2 R Prediction.ipynb
    ├── 3 Bokeh.ipynb
    ├── 4 Plotly.ipynb
    ├── 5 State Map.ipynb
    ├── 6 web scraping.ipynb
    ├── 7 3D Plot.ipynb
    └── files
    │   ├── from_counts.npy
    │   ├── st99_d00.dbf
    │   └── states.csv
├── Chapter04
    ├── 1 Spark Total Line Lengths.ipynb
    ├── 2 Spark File Words.ipynb
    ├── 3 Spark SQL.ipynb
    ├── 4 Join Dataframes.ipynb
    ├── 5 People JSON.ipynb
    ├── 6 Spark Pivot.ipynb
    └── files
    │   ├── order.csv
    │   ├── orderproduct.csv
    │   ├── people.json
    │   ├── pivot.csv
    │   ├── product.csv
    │   └── productsales.csv
├── Chapter05
    ├── 1 Elections Data using R.ipynb
    ├── 2 Voter Registration.ipynb
    ├── 2a Voter Registration Python .ipynb
    ├── 3 Admissions.ipynb
    ├── 4 Python Admissions.ipynb
    ├── 5 Airplane Statistics.ipynb
    └── files
    │   ├── 05_income.tsv
    │   ├── 05_religion.tsv
    │   ├── acceptance-rates.csv
    │   ├── age.tsv
    │   ├── education.tsv
    │   ├── gender.tsv
    │   ├── ideology.tsv
    │   ├── orientation.tsv
    │   ├── party.tsv
    │   ├── race.tsv
    │   ├── region.tsv
    │   └── registration.csv
├── Chapter06
    ├── 1 CSV.ipynb
    ├── 2 CSV2.ipynb
    ├── 3 dplyr.ipynb
    ├── 4 tidyr.ipynb
    └── files
    │   ├── baseball.csv
    │   ├── dow_jones_index.data
    │   └── heating.csv
├── Chapter07
    ├── 1 markdown.ipynb
    ├── 2 glyphs.ipynb
    └── files
    │   ├── app.R
    │   ├── ui.R
    │   └── volcanoes.csv
├── Chapter08
    ├── 1 convert json to csv.ipynb
    ├── 2 yelp.ipynb
    ├── 3 cuisines.ipynb
    └── files
    │   ├── reviews.csv
    │   └── reviews.json
├── Chapter09
    ├── 1 naive bayes r.ipynb
    ├── 2 naive bayes python.ipynb
    ├── 3 nearest neighbor.ipynb
    ├── 4 nearest neighbor py.ipynb
    ├── 5 decision trees.ipynb
    ├── 6 decision trees py.ipynb
    ├── 7 neural net.ipynb
    ├── 8 random forests.ipynb
    └── files
    │   ├── car-mpg.csv
    │   └── housing-knn-predicted.csv
├── Chapter10
    ├── 1 profiling.ipynb
    └── 2 R microbenchmark.ipynb
├── LICENSE
└── README.md


/Chapter01/1 simple example.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {
 7 |     "collapsed": false
 8 |    },
 9 |    "outputs": [
10 |     {
11 |      "name": "stdout",
12 |      "output_type": "stream",
13 |      "text": [
14 |       "Dan Toomey lives in MA\n"
15 |      ]
16 |     }
17 |    ],
18 |    "source": [
19 |     "name = \"Dan Toomey\"\n",
20 |     "state = \"MA\"\n",
21 |     "print(name + \" lives in \" + state)"
22 |    ]
23 |   },
24 |   {
25 |    "cell_type": "code",
26 |    "execution_count": null,
27 |    "metadata": {
28 |     "collapsed": true
29 |    },
30 |    "outputs": [],
31 |    "source": []
32 |   }
33 |  ],
34 |  "metadata": {
35 |   "kernelspec": {
36 |    "display_name": "Python 3",
37 |    "language": "python",
38 |    "name": "python3"
39 |   },
40 |   "language_info": {
41 |    "codemirror_mode": {
42 |     "name": "ipython",
43 |     "version": 3
44 |    },
45 |    "file_extension": ".py",
46 |    "mimetype": "text/x-python",
47 |    "name": "python",
48 |    "nbconvert_exporter": "python",
49 |    "pygments_lexer": "ipython3",
50 |    "version": "3.6.0"
51 |   }
52 |  },
53 |  "nbformat": 4,
54 |  "nbformat_minor": 2
55 | }
56 | 


--------------------------------------------------------------------------------
/Chapter01/2 European Option Pricing.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {
 7 |     "collapsed": false
 8 |    },
 9 |    "outputs": [
10 |     {
11 |      "name": "stdout",
12 |      "output_type": "stream",
13 |      "text": [
14 |       "Value of the European Call Option 8.071\n"
15 |      ]
16 |     }
17 |    ],
18 |    "source": [
19 |     "#European Option Pricing\n",
20 |     "from numpy import *\n",
21 |     "\n",
22 |     "# set parameters\n",
23 |     "S0 = 100.\n",
24 |     "K = 105.\n",
25 |     "T = 1.0\n",
26 |     "r = 0.05\n",
27 |     "sigma = 0.2\n",
28 |     "\n",
29 |     "# how many samples we are using\n",
30 |     "I = 100000\n",
31 |     "\n",
32 |     "random.seed(103)\n",
33 |     "z = random.standard_normal(I)\n",
34 |     "ST = S0 * exp((r - 0.5 * sigma ** 2) * T + sigma * sqrt(T) * z)\n",
35 |     "hT = maximum(ST - K, 0)\n",
36 |     "C0 = exp(-r * T) * sum(hT) / I\n",
37 |     "\n",
38 |     "# tell user results\n",
39 |     "print(\"Value of the European Call Option %5.3f\" % C0)"
40 |    ]
41 |   }
42 |  ],
43 |  "metadata": {
44 |   "kernelspec": {
45 |    "display_name": "Python 3",
46 |    "language": "python",
47 |    "name": "python3"
48 |   },
49 |   "language_info": {
50 |    "codemirror_mode": {
51 |     "name": "ipython",
52 |     "version": 3
53 |    },
54 |    "file_extension": ".py",
55 |    "mimetype": "text/x-python",
56 |    "name": "python",
57 |    "nbconvert_exporter": "python",
58 |    "pygments_lexer": "ipython3",
59 |    "version": "3.6.0"
60 |   }
61 |  },
62 |  "nbformat": 4,
63 |  "nbformat_minor": 2
64 | }
65 | 


--------------------------------------------------------------------------------
/Chapter01/3  Monte Carlo Pricing.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {
 7 |     "collapsed": false
 8 |    },
 9 |    "outputs": [
10 |     {
11 |      "name": "stdout",
12 |      "output_type": "stream",
13 |      "text": [
14 |       "Price: 14.4452\n"
15 |      ]
16 |     }
17 |    ],
18 |    "source": [
19 |     "#Monte Carlo Pricing\n",
20 |     "import datetime\n",
21 |     "import random # import gauss\n",
22 |     "import math #import exp, sqrt\n",
23 |     "\n",
24 |     "random.seed(103)\n",
25 |     "\n",
26 |     "def generate_asset_price(S,v,r,T):\n",
27 |     "    return S * math.exp((r - 0.5 * v**2) * T + v * math.sqrt(T) * random.gauss(0,1.0))\n",
28 |     "\n",
29 |     "def call_payoff(S_T,K):\n",
30 |     "    return max(0.0,S_T-K)\n",
31 |     "\n",
32 |     "S = 857.29 # underlying price\n",
33 |     "v = 0.2076 # vol of 20.76%\n",
34 |     "r = 0.0014 # rate of 0.14%\n",
35 |     "T = (datetime.date(2013,9,21) - datetime.date(2013,9,3)).days / 365.0\n",
36 |     "K = 860.\n",
37 |     "simulations = 90000\n",
38 |     "payoffs = []\n",
39 |     "discount_factor = math.exp(-r * T)\n",
40 |     "\n",
41 |     "for i in range(simulations):\n",
42 |     "    S_T = generate_asset_price(S,v,r,T)\n",
43 |     "    payoffs.append(\n",
44 |     "        call_payoff(S_T, K)\n",
45 |     "    )\n",
46 |     "\n",
47 |     "price = discount_factor * (sum(payoffs) / float(simulations))\n",
48 |     "print('Price: %.4f' % price)\n"
49 |    ]
50 |   }
51 |  ],
52 |  "metadata": {
53 |   "kernelspec": {
54 |    "display_name": "Python 3",
55 |    "language": "python",
56 |    "name": "python3"
57 |   },
58 |   "language_info": {
59 |    "codemirror_mode": {
60 |     "name": "ipython",
61 |     "version": 3
62 |    },
63 |    "file_extension": ".py",
64 |    "mimetype": "text/x-python",
65 |    "name": "python",
66 |    "nbconvert_exporter": "python",
67 |    "pygments_lexer": "ipython3",
68 |    "version": "3.6.0"
69 |   }
70 |  },
71 |  "nbformat": 4,
72 |  "nbformat_minor": 2
73 | }
74 | 


--------------------------------------------------------------------------------
/Chapter01/4  Coin Flips.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 11,
 6 |    "metadata": {
 7 |     "collapsed": false
 8 |    },
 9 |    "outputs": [
10 |     {
11 |      "data": {
12 |       "text/plain": [
13 |        "0.907958984375"
14 |       ]
15 |      },
16 |      "execution_count": 11,
17 |      "metadata": {},
18 |      "output_type": "execute_result"
19 |     }
20 |    ],
21 |    "source": [
22 |     "import numpy as np\n",
23 |     "import math\n",
24 |     "\n",
25 |     "N = 14     # number of flips\n",
26 |     "m = 3      # length of run (must be  > 1 and <= N/2)\n",
27 |     "p = 0.5   # P(heads)\n",
28 |     "\n",
29 |     "prob = np.repeat(0.0,N)\n",
30 |     "h = np.repeat(0.0,N)\n",
31 |     "t = np.repeat(0.0,N)\n",
32 |     "\n",
33 |     "h[m] = math.pow(p,m)\n",
34 |     "t[m] = math.pow(1-p,m)\n",
35 |     "prob[m] = h[m] + t[m]\n",
36 |     "\n",
37 |     "for n in range(m+1,2*m):\n",
38 |     "  h[n] = (1-p)*math.pow(p,m)\n",
39 |     "  t[n] = p*math.pow(1-p,m)\n",
40 |     "  prob[n] = prob[n-1] + h[n] + t[n]\n",
41 |     "\n",
42 |     "\n",
43 |     "for n in range(2*m,N):\n",
44 |     "  h[n] = ((1-p) - t[n-m] - prob[n-m-1]*(1-p))*math.pow(p,m)\n",
45 |     "  t[n] = (p - h[n-m] - prob[n-m-1]*p)*math.pow(1-p,m)\n",
46 |     "  prob[n] = prob[n-1] + h[n] + t[n]\n",
47 |     "\n",
48 |     "prob[N-1]"
49 |    ]
50 |   }
51 |  ],
52 |  "metadata": {
53 |   "kernelspec": {
54 |    "display_name": "Python 3",
55 |    "language": "python",
56 |    "name": "python3"
57 |   },
58 |   "language_info": {
59 |    "codemirror_mode": {
60 |     "name": "ipython",
61 |     "version": 3
62 |    },
63 |    "file_extension": ".py",
64 |    "mimetype": "text/x-python",
65 |    "name": "python",
66 |    "nbconvert_exporter": "python",
67 |    "pygments_lexer": "ipython3",
68 |    "version": "3.6.0"
69 |   }
70 |  },
71 |  "nbformat": 4,
72 |  "nbformat_minor": 2
73 | }
74 | 


--------------------------------------------------------------------------------
/Chapter01/5 Insurance Pricing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "   premiekl moptva zon    dur medskad antskad riskpre helpre    skadfre\n",
 13 |       "1         1      1   1   62.9   18256      17    4936   2049 0.27027027\n",
 14 |       "2         1      1   2  112.9   13632       7     845   1230 0.06200177\n",
 15 |       "3         1      1   3  133.1   20877       9    1411    762 0.06761833\n",
 16 |       "4         1      1   4  376.6   13045       7     242    396 0.01858736\n",
 17 |       "5         1      1   5    9.4       0       0       0    990 0.00000000\n",
 18 |       "6         1      1   6   70.8   15000       1     212    594 0.01412429\n",
 19 |       "7         1      1   7    4.4    8018       1    1829    396 0.22727273\n",
 20 |       "8         1      2   1  352.1    8232      52    1216   1229 0.14768532\n",
 21 |       "9         1      2   2  840.1    7418      69     609    738 0.08213308\n",
 22 |       "10        1      2   3 1378.3    7318      75     398    457 0.05441486\n",
 23 |       "11        1      2   4 5505.3    6922     136     171    238 0.02470347\n",
 24 |       "12        1      2   5  114.1   11131       2     195    594 0.01752848\n",
 25 |       "13        1      2   6  810.9    5970      14     103    356 0.01726477\n",
 26 |       "14        1      2   7   62.3    6500       1     104    238 0.01605136\n",
 27 |       "15        2      1   1  191.6    7754      43    1740   1024 0.22442589\n",
 28 |       "16        2      1   2  237.3    6933      34     993    615 0.14327855\n",
 29 |       "17        2      1   3  162.4    4402      11     298    381 0.06773399\n",
 30 |       "18        2      1   4  446.5    8214       8     147    198 0.01791713\n",
 31 |       "19        2      1   5   13.2       0       0       0    495 0.00000000\n",
 32 |       "20        2      1   6   82.8    5830       3     211    297 0.03623188\n",
 33 |       "21        2      1   7   14.5       0       0       0    198 0.00000000\n",
 34 |       "22        2      2   1  844.8    4728      94     526    614 0.11126894\n",
 35 |       "23        2      2   2 1296.0    4252      99     325    369 0.07638889\n",
 36 |       "24        2      2   3 1214.9    4212      37     128    229 0.03045518\n",
 37 |       "25        2      2   4 3740.7    3846      56      58    119 0.01497046\n",
 38 |       "26        2      2   5  109.4    3925       4     144    297 0.03656307\n",
 39 |       "27        2      2   6  404.7    5280       5      65    178 0.01235483\n",
 40 |       "28        2      2   7   66.3    7795       1     118    119 0.01508296\n"
 41 |      ]
 42 |     }
 43 |    ],
 44 |    "source": [
 45 |     "con <- url(\"http://www2.math.su.se/~esbj/GLMbook/moppe.sas\")\n",
 46 |     "data <- readLines(con, n = 200L, warn = FALSE, encoding = \"unknown\")\n",
 47 |     "close(con)\n",
 48 |     "## Find the data range\n",
 49 |     "data.start <- grep(\"^cards;\", data) + 1L\n",
 50 |     "data.end   <- grep(\"^;\", data[data.start:999L]) + data.start - 2L\n",
 51 |     "table.1.2  <- read.table(text = data[data.start:data.end],\n",
 52 |     "                       header = FALSE, \n",
 53 |     "sep = \"\", \n",
 54 |     "quote = \"\",\n",
 55 |     "\n",
 56 |     "col.names = c(\"premiekl\", \"moptva\", \"zon\", \"dur\",\n",
 57 |     "                                       \"medskad\", \"antskad\", \"riskpre\", \"helpre\", \"cell\"),\n",
 58 |     "                         na.strings = NULL,\n",
 59 |     "                         colClasses = c(rep(\"factor\", 3), \"numeric\",\n",
 60 |     "                                        rep(\"integer\", 4), \"NULL\"),\n",
 61 |     "                         comment.char = \"\")\n",
 62 |     "rm(con, data, data.start, data.end)     \n",
 63 |     "\n",
 64 |     "# Remainder of Script adds comments/descriptions\n",
 65 |     "comment(table.1.2) <-\n",
 66 |     "  c(\"Title: Partial casco moped insurance from Wasa insurance, 1994--1999\",\n",
 67 |     "    \"Source: http://www2.math.su.se/~esbj/GLMbook/moppe.sas\",\n",
 68 |     "    \"Copyright: http://www2.math.su.se/~esbj/GLMbook/\")\n",
 69 |     "## See the SAS code for this derived field\n",
 70 |     "table.1.2$skadfre = with(table.1.2, antskad / dur)\n",
 71 |     "## English language column names as comments:\n",
 72 |     "comment(table.1.2$premiekl) <-\n",
 73 |     "  c(\"Name: Class\",\n",
 74 |     "    \"Code: 1=Weight over 60kg and more than 2 gears\",\n",
 75 |     "    \"Code: 2=Other\")\n",
 76 |     "comment(table.1.2$moptva)   <-\n",
 77 |     "  c(\"Name: Age\",\n",
 78 |     "    \"Code: 1=At most 1 year\",\n",
 79 |     "    \"Code: 2=2 years or more\")\n",
 80 |     "comment(table.1.2$zon)      <-\n",
 81 |     "  c(\"Name: Zone\",\n",
 82 |     "    \"Code: 1=Central and semi-central parts of Sweden's three largest cities\",\n",
 83 |     "    \"Code: 2=suburbs and middle-sized towns\",\n",
 84 |     "    \"Code: 3=Lesser towns, except those in 5 or 7\",\n",
 85 |     "    \"Code: 4=Small towns and countryside, except 5--7\",\n",
 86 |     "    \"Code: 5=Northern towns\",\n",
 87 |     "    \"Code: 6=Northern countryside\",\n",
 88 |     "    \"Code: 7=Gotland (Sweden's largest island)\")\n",
 89 |     "comment(table.1.2$dur)      <-\n",
 90 |     "  c(\"Name: Duration\",\n",
 91 |     "    \"Unit: year\")\n",
 92 |     "comment(table.1.2$medskad)  <-\n",
 93 |     "  c(\"Name: Claim severity\",\n",
 94 |     "    \"Unit: SEK\")\n",
 95 |     "comment(table.1.2$antskad)  <- \"Name: No. claims\"\n",
 96 |     "comment(table.1.2$riskpre)  <-\n",
 97 |     "  c(\"Name: Pure premium\",\n",
 98 |     "    \"Unit: SEK\")\n",
 99 |     "comment(table.1.2$helpre)   <-\n",
100 |     "  c(\"Name: Actual premium\",\n",
101 |     "    \"Note: The premium for one year according to the tariff in force 1999\",\n",
102 |     "    \"Unit: SEK\")\n",
103 |     "comment(table.1.2$skadfre)  <-\n",
104 |     "  c(\"Name: Claim frequency\",\n",
105 |     "    \"Unit: /year\")\n",
106 |     "## Save results for later\n",
107 |     "save(table.1.2, file = \"table.1.2.RData\")\n",
108 |     "## Print the table (not as pretty as the book)\n",
109 |     "print(table.1.2)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 3,
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "name": "stderr",
119 |      "output_type": "stream",
120 |      "text": [
121 |       "Warning message:\n",
122 |       "\"dependency 'GenomicRanges' is not available\"also installing the dependencies 'numDeriv', 'bindr', 'KernSmooth', 'lava', 'kernlab', 'CVST', 'DEoptimR', 'minqa', 'nloptr', 'RcppEigen', 'assertthat', 'bindrcpp', 'glue', 'pkgconfig', 'plogr', 'class', 'prodlim', 'DRR', 'robustbase', 'sfsmisc', 'lme4', 'mime', 'dplyr', 'ipred', 'dimRed', 'lubridate', 'timeDate', 'ddalpha', 'purrr', 'gower', 'RcppRoll', 'tidyselect', 'pbkrtest', 'bitops', 'backports', 'colorspace', 'openssl', 'checkmate', 'htmlwidgets', 'bit', 'highr', 'markdown', 'yaml', 'RcppCCTZ', 'Rcpp', 'praise', 'gtools', 'ModelMetrics', 'recipes', 'withr', 'Formula', 'bdsmatrix', 'sandwich', 'car', 'lmtest', 'htmltools', 'caTools', 'base64enc', 'rprojroot', 'RColorBrewer', 'dichromat', 'munsell', 'labeling', 'viridisLite', 'rlang', 'rex', 'httr', 'survival', 'latticeExtra', 'cluster', 'nnet', 'acepack', 'foreign', 'gridExtra', 'htmlTable', 'viridis', 'sp', 'Matrix', 'mvtnorm', 'TH.data', 'SparseM', 'MatrixModels', 'gdtools', 'BH', 'bit64', 'knitr', 'nanotime', 'chron', 'plyr', 'reshape', 'reshape2', 'testthat', 'hexbin', 'fastmatch', 'nlme', 'xts', 'gdata', 'caret', 'curl', 'zoo', 'plm', 'rmarkdown', 'codetools', 'iterators', 'randomForest', 'gtable', 'MASS', 'scales', 'tibble', 'lazyeval', 'covr', 'ggplot2movies', 'Hmisc', 'lattice', 'mapproj', 'maps', 'maptools', 'mgcv', 'multcomp', 'quantreg', 'rpart', 'svglite'\n",
123 |       "\n"
124 |      ]
125 |     },
126 |     {
127 |      "name": "stdout",
128 |      "output_type": "stream",
129 |      "text": [
130 |       "\n",
131 |       "  There is a binary version available but the source version is later:\n",
132 |       "             binary   source needs_compilation\n",
133 |       "data.table 1.10.4-1 1.10.4-2              TRUE\n",
134 |       "\n",
135 |       "package 'numDeriv' successfully unpacked and MD5 sums checked\n",
136 |       "package 'bindr' successfully unpacked and MD5 sums checked\n",
137 |       "package 'KernSmooth' successfully unpacked and MD5 sums checked\n",
138 |       "package 'lava' successfully unpacked and MD5 sums checked\n",
139 |       "package 'kernlab' successfully unpacked and MD5 sums checked\n",
140 |       "package 'CVST' successfully unpacked and MD5 sums checked\n",
141 |       "package 'DEoptimR' successfully unpacked and MD5 sums checked\n",
142 |       "package 'minqa' successfully unpacked and MD5 sums checked\n",
143 |       "package 'nloptr' successfully unpacked and MD5 sums checked\n",
144 |       "package 'RcppEigen' successfully unpacked and MD5 sums checked\n",
145 |       "package 'assertthat' successfully unpacked and MD5 sums checked\n",
146 |       "package 'bindrcpp' successfully unpacked and MD5 sums checked\n",
147 |       "package 'glue' successfully unpacked and MD5 sums checked\n",
148 |       "package 'pkgconfig' successfully unpacked and MD5 sums checked\n",
149 |       "package 'plogr' successfully unpacked and MD5 sums checked\n",
150 |       "package 'class' successfully unpacked and MD5 sums checked\n",
151 |       "package 'prodlim' successfully unpacked and MD5 sums checked\n",
152 |       "package 'DRR' successfully unpacked and MD5 sums checked\n",
153 |       "package 'robustbase' successfully unpacked and MD5 sums checked\n",
154 |       "package 'sfsmisc' successfully unpacked and MD5 sums checked\n",
155 |       "package 'lme4' successfully unpacked and MD5 sums checked\n",
156 |       "package 'mime' successfully unpacked and MD5 sums checked\n",
157 |       "package 'dplyr' successfully unpacked and MD5 sums checked\n",
158 |       "package 'ipred' successfully unpacked and MD5 sums checked\n",
159 |       "package 'dimRed' successfully unpacked and MD5 sums checked\n",
160 |       "package 'lubridate' successfully unpacked and MD5 sums checked\n",
161 |       "package 'timeDate' successfully unpacked and MD5 sums checked\n",
162 |       "package 'ddalpha' successfully unpacked and MD5 sums checked\n",
163 |       "package 'purrr' successfully unpacked and MD5 sums checked\n",
164 |       "package 'gower' successfully unpacked and MD5 sums checked\n",
165 |       "package 'RcppRoll' successfully unpacked and MD5 sums checked\n",
166 |       "package 'tidyselect' successfully unpacked and MD5 sums checked\n",
167 |       "package 'pbkrtest' successfully unpacked and MD5 sums checked\n",
168 |       "package 'bitops' successfully unpacked and MD5 sums checked\n",
169 |       "package 'backports' successfully unpacked and MD5 sums checked\n",
170 |       "package 'colorspace' successfully unpacked and MD5 sums checked\n",
171 |       "package 'openssl' successfully unpacked and MD5 sums checked\n",
172 |       "package 'checkmate' successfully unpacked and MD5 sums checked\n",
173 |       "package 'htmlwidgets' successfully unpacked and MD5 sums checked\n",
174 |       "package 'bit' successfully unpacked and MD5 sums checked\n",
175 |       "package 'highr' successfully unpacked and MD5 sums checked\n",
176 |       "package 'markdown' successfully unpacked and MD5 sums checked\n",
177 |       "package 'yaml' successfully unpacked and MD5 sums checked\n",
178 |       "package 'RcppCCTZ' successfully unpacked and MD5 sums checked\n",
179 |       "package 'Rcpp' successfully unpacked and MD5 sums checked\n",
180 |       "package 'praise' successfully unpacked and MD5 sums checked\n",
181 |       "package 'gtools' successfully unpacked and MD5 sums checked\n",
182 |       "package 'ModelMetrics' successfully unpacked and MD5 sums checked\n",
183 |       "package 'recipes' successfully unpacked and MD5 sums checked\n",
184 |       "package 'withr' successfully unpacked and MD5 sums checked\n",
185 |       "package 'Formula' successfully unpacked and MD5 sums checked\n",
186 |       "package 'bdsmatrix' successfully unpacked and MD5 sums checked\n",
187 |       "package 'sandwich' successfully unpacked and MD5 sums checked\n",
188 |       "package 'car' successfully unpacked and MD5 sums checked\n",
189 |       "package 'lmtest' successfully unpacked and MD5 sums checked\n",
190 |       "package 'htmltools' successfully unpacked and MD5 sums checked\n",
191 |       "package 'caTools' successfully unpacked and MD5 sums checked\n",
192 |       "package 'base64enc' successfully unpacked and MD5 sums checked\n",
193 |       "package 'rprojroot' successfully unpacked and MD5 sums checked\n",
194 |       "package 'RColorBrewer' successfully unpacked and MD5 sums checked\n",
195 |       "package 'dichromat' successfully unpacked and MD5 sums checked\n",
196 |       "package 'munsell' successfully unpacked and MD5 sums checked\n",
197 |       "package 'labeling' successfully unpacked and MD5 sums checked\n",
198 |       "package 'viridisLite' successfully unpacked and MD5 sums checked\n",
199 |       "package 'rlang' successfully unpacked and MD5 sums checked\n",
200 |       "package 'rex' successfully unpacked and MD5 sums checked\n",
201 |       "package 'httr' successfully unpacked and MD5 sums checked\n",
202 |       "package 'survival' successfully unpacked and MD5 sums checked\n",
203 |       "package 'latticeExtra' successfully unpacked and MD5 sums checked\n",
204 |       "package 'cluster' successfully unpacked and MD5 sums checked\n",
205 |       "package 'nnet' successfully unpacked and MD5 sums checked\n",
206 |       "package 'acepack' successfully unpacked and MD5 sums checked\n",
207 |       "package 'foreign' successfully unpacked and MD5 sums checked\n",
208 |       "package 'gridExtra' successfully unpacked and MD5 sums checked\n",
209 |       "package 'htmlTable' successfully unpacked and MD5 sums checked\n",
210 |       "package 'viridis' successfully unpacked and MD5 sums checked\n",
211 |       "package 'sp' successfully unpacked and MD5 sums checked\n",
212 |       "package 'Matrix' successfully unpacked and MD5 sums checked\n",
213 |       "package 'mvtnorm' successfully unpacked and MD5 sums checked\n",
214 |       "package 'TH.data' successfully unpacked and MD5 sums checked\n",
215 |       "package 'SparseM' successfully unpacked and MD5 sums checked\n",
216 |       "package 'MatrixModels' successfully unpacked and MD5 sums checked\n",
217 |       "package 'gdtools' successfully unpacked and MD5 sums checked\n",
218 |       "package 'BH' successfully unpacked and MD5 sums checked\n",
219 |       "package 'bit64' successfully unpacked and MD5 sums checked\n",
220 |       "package 'knitr' successfully unpacked and MD5 sums checked\n",
221 |       "package 'nanotime' successfully unpacked and MD5 sums checked\n",
222 |       "package 'chron' successfully unpacked and MD5 sums checked\n",
223 |       "package 'plyr' successfully unpacked and MD5 sums checked\n",
224 |       "package 'reshape' successfully unpacked and MD5 sums checked\n",
225 |       "package 'reshape2' successfully unpacked and MD5 sums checked\n",
226 |       "package 'testthat' successfully unpacked and MD5 sums checked\n",
227 |       "package 'hexbin' successfully unpacked and MD5 sums checked\n",
228 |       "package 'fastmatch' successfully unpacked and MD5 sums checked\n",
229 |       "package 'nlme' successfully unpacked and MD5 sums checked\n",
230 |       "package 'xts' successfully unpacked and MD5 sums checked\n",
231 |       "package 'gdata' successfully unpacked and MD5 sums checked\n",
232 |       "package 'caret' successfully unpacked and MD5 sums checked\n",
233 |       "package 'curl' successfully unpacked and MD5 sums checked\n",
234 |       "package 'zoo' successfully unpacked and MD5 sums checked\n",
235 |       "package 'plm' successfully unpacked and MD5 sums checked\n",
236 |       "package 'rmarkdown' successfully unpacked and MD5 sums checked\n",
237 |       "package 'codetools' successfully unpacked and MD5 sums checked\n",
238 |       "package 'iterators' successfully unpacked and MD5 sums checked\n",
239 |       "package 'randomForest' successfully unpacked and MD5 sums checked\n",
240 |       "package 'gtable' successfully unpacked and MD5 sums checked\n",
241 |       "package 'MASS' successfully unpacked and MD5 sums checked\n",
242 |       "package 'scales' successfully unpacked and MD5 sums checked\n",
243 |       "package 'tibble' successfully unpacked and MD5 sums checked\n",
244 |       "package 'lazyeval' successfully unpacked and MD5 sums checked\n",
245 |       "package 'covr' successfully unpacked and MD5 sums checked\n",
246 |       "package 'ggplot2movies' successfully unpacked and MD5 sums checked\n",
247 |       "package 'Hmisc' successfully unpacked and MD5 sums checked\n",
248 |       "package 'lattice' successfully unpacked and MD5 sums checked\n",
249 |       "package 'mapproj' successfully unpacked and MD5 sums checked\n",
250 |       "package 'maps' successfully unpacked and MD5 sums checked\n",
251 |       "package 'maptools' successfully unpacked and MD5 sums checked\n",
252 |       "package 'mgcv' successfully unpacked and MD5 sums checked\n",
253 |       "package 'multcomp' successfully unpacked and MD5 sums checked\n",
254 |       "package 'quantreg' successfully unpacked and MD5 sums checked\n",
255 |       "package 'rpart' successfully unpacked and MD5 sums checked\n",
256 |       "package 'svglite' successfully unpacked and MD5 sums checked\n",
257 |       "package 'foreach' successfully unpacked and MD5 sums checked\n",
258 |       "package 'ggplot2' successfully unpacked and MD5 sums checked\n",
259 |       "\n",
260 |       "The downloaded binary packages are in\n",
261 |       "\tC:\\Users\\prasadr\\AppData\\Local\\Temp\\RtmpohUiwh\\downloaded_packages\n"
262 |      ]
263 |     },
264 |     {
265 |      "name": "stderr",
266 |      "output_type": "stream",
267 |      "text": [
268 |       "installing the source package 'data.table'\n",
269 |       "\n",
270 |       "Warning message:\n",
271 |       "\"running command '\"C:/Users/prasadr/AppData/Local/Continuum/Anaconda3/R/bin/x64/R\" CMD INSTALL -l \"C:\\Users\\prasadr\\AppData\\Local\\Continuum\\Anaconda3\\R\\library\" C:\\Users\\prasadr\\AppData\\Local\\Temp\\RtmpohUiwh/downloaded_packages/data.table_1.10.4-2.tar.gz' had status 1\"Warning message in install.packages(c(\"data.table\", \"foreach\", \"ggplot2\"), dependencies = TRUE, :\n",
272 |       "\"installation of package 'data.table' had non-zero exit status\"Warning message:\n",
273 |       "\"package 'foreach' was built under R version 3.4.2\""
274 |      ]
275 |     },
276 |     {
277 |      "name": "stdout",
278 |      "output_type": "stream",
279 |      "text": [
280 |       "   rating.factor class duration n.claims rels.frequency rels.severity\n",
281 |       "1  Vehicle class     1     9833      391           1.00          1.00\n",
282 |       "2  Vehicle class     2     8825      395           0.78          0.55\n",
283 |       "11   Vehicle age     1     1918      141           1.55          1.79\n",
284 |       "21   Vehicle age     2    16740      645           1.00          1.00\n",
285 |       "12          Zone     1     1451      206           7.10          1.21\n",
286 |       "22          Zone     2     2486      209           4.17          1.07\n",
287 |       "3           Zone     3     2889      132           2.23          1.07\n",
288 |       "4           Zone     4    10069      207           1.00          1.00\n",
289 |       "5           Zone     5      246        6           1.20          1.21\n",
290 |       "6           Zone     6     1369       23           0.79          0.98\n",
291 |       "7           Zone     7      148        3           1.00          1.20\n",
292 |       "   rels.pure.premium\n",
293 |       "1               1.00\n",
294 |       "2               0.42\n",
295 |       "11              2.78\n",
296 |       "21              1.00\n",
297 |       "12              8.62\n",
298 |       "22              4.48\n",
299 |       "3               2.38\n",
300 |       "4               1.00\n",
301 |       "5               1.46\n",
302 |       "6               0.78\n",
303 |       "7               1.20\n"
304 |      ]
305 |     }
306 |    ],
307 |    "source": [
308 |     "# make sure the packages we want to use are installed\n",
309 |     "install.packages(c(\"data.table\", \"foreach\", \"ggplot2\"), dependencies = TRUE, repos = \"http://cran.us.r-project.org\")\n",
310 |     "# load the data table we need\n",
311 |     "if (!exists(\"table.1.2\"))\n",
312 |     "  load(\"table.1.2.RData\")\n",
313 |     "\n",
314 |     "library(\"foreach\")\n",
315 |     "\n",
316 |     "## We are looking to reproduce table 2.7 which we start building here,\n",
317 |     "## add columns for our results.\n",
318 |     "table27 <-\n",
319 |     "  data.frame(rating.factor =\n",
320 |     "               c(rep(\"Vehicle class\", nlevels(table.1.2$premiekl)),\n",
321 |     "                 rep(\"Vehicle age\",   nlevels(table.1.2$moptva)),\n",
322 |     "                 rep(\"Zone\",          nlevels(table.1.2$zon))),\n",
323 |     "             class =\n",
324 |     "               c(levels(table.1.2$premiekl),\n",
325 |     "                 levels(table.1.2$moptva),\n",
326 |     "                 levels(table.1.2$zon)),\n",
327 |     "             stringsAsFactors = FALSE)\n",
328 |     "\n",
329 |     "## Calculate duration per rating factor level and also set the\n",
330 |     "## contrasts (using the same idiom as in the code for the previous\n",
331 |     "## chapter). We use foreach here to execute the loop both for its\n",
332 |     "## side-effect (setting the contrasts) and to accumulate the sums.\n",
333 |     "# new.cols are set to claims, sums, levels\n",
334 |     "new.cols <-\n",
335 |     "  foreach (rating.factor = c(\"premiekl\", \"moptva\", \"zon\"),\n",
336 |     "           .combine = rbind) %do%\n",
337 |     "{\n",
338 |     "  nclaims <- tapply(table.1.2$antskad, table.1.2[[rating.factor]], sum)\n",
339 |     "  sums <- tapply(table.1.2$dur, table.1.2[[rating.factor]], sum)\n",
340 |     "  n.levels <- nlevels(table.1.2[[rating.factor]])\n",
341 |     "  contrasts(table.1.2[[rating.factor]]) <-\n",
342 |     "    contr.treatment(n.levels)[rank(-sums, ties.method = \"first\"), ]\n",
343 |     "  data.frame(duration = sums, n.claims = nclaims)\n",
344 |     "}\n",
345 |     "table27 <- cbind(table27, new.cols)\n",
346 |     "rm(new.cols)\n",
347 |     "\n",
348 |     "#build frequency distribution\n",
349 |     "model.frequency <-\n",
350 |     "  glm(antskad ~ premiekl + moptva + zon + offset(log(dur)),\n",
351 |     "      data = table.1.2, family = poisson)\n",
352 |     "\n",
353 |     "rels <- coef( model.frequency )\n",
354 |     "rels <- exp( rels[1] + rels[-1] ) / exp( rels[1] )\n",
355 |     "table27$rels.frequency <-\n",
356 |     "  c(c(1, rels[1])[rank(-table27$duration[1:2], ties.method = \"first\")],\n",
357 |     "    c(1, rels[2])[rank(-table27$duration[3:4], ties.method = \"first\")],\n",
358 |     "    c(1, rels[3:8])[rank(-table27$duration[5:11], ties.method = \"first\")])\n",
359 |     "\n",
360 |     "# note the severities involved\n",
361 |     "model.severity <-\n",
362 |     "  glm(medskad ~ premiekl + moptva + zon,\n",
363 |     "      data = table.1.2[table.1.2$medskad > 0, ],\n",
364 |     "      family = Gamma(\"log\"), weights = antskad)\n",
365 |     "\n",
366 |     "rels <- coef( model.severity )\n",
367 |     "rels <- exp( rels[1] + rels[-1] ) / exp( rels[1] )\n",
368 |     "## Aside: For the canonical link function use\n",
369 |     "## rels <- rels[1] / (rels[1] + rels[-1])\n",
370 |     "\n",
371 |     "table27$rels.severity <-\n",
372 |     "  c(c(1, rels[1])[rank(-table27$duration[1:2], ties.method = \"first\")],\n",
373 |     "    c(1, rels[2])[rank(-table27$duration[3:4], ties.method = \"first\")],\n",
374 |     "    c(1, rels[3:8])[rank(-table27$duration[5:11], ties.method = \"first\")])\n",
375 |     "\n",
376 |     "table27$rels.pure.premium <- with(table27, rels.frequency * rels.severity)\n",
377 |     "print(table27, digits = 2)"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": null,
383 |    "metadata": {
384 |     "collapsed": true
385 |    },
386 |    "outputs": [],
387 |    "source": []
388 |   }
389 |  ],
390 |  "metadata": {
391 |   "kernelspec": {
392 |    "display_name": "R",
393 |    "language": "R",
394 |    "name": "ir"
395 |   },
396 |   "language_info": {
397 |    "codemirror_mode": "r",
398 |    "file_extension": ".r",
399 |    "mimetype": "text/x-r-source",
400 |    "name": "R",
401 |    "pygments_lexer": "r",
402 |    "version": "3.4.1"
403 |   }
404 |  },
405 |  "nbformat": 4,
406 |  "nbformat_minor": 2
407 | }
408 | 


--------------------------------------------------------------------------------
/Chapter01/grapeJuice.csv:
--------------------------------------------------------------------------------
 1 | sales,price,ad_type,price_apple,price_cookies
 2 | 222,9.83,0,7.36,8.8
 3 | 201,9.72,1,7.43,9.62
 4 | 247,10.15,1,7.66,8.9
 5 | 169,10.04,0,7.57,10.26
 6 | 317,8.38,1,7.33,9.54
 7 | 227,9.74,0,7.51,9.49
 8 | 214,9.81,1,7.57,9.26
 9 | 187,9.51,0,7.66,9.96
10 | 188,10.44,1,7.39,9.27
11 | 275,8.2,0,8.29,8.96
12 | 236,9.68,1,7.51,9.55
13 | 174,10.1,0,7.48,10.4
14 | 217,9.6,1,7.73,10.5
15 | 166,10.46,0,7.63,9.59
16 | 252,8.99,1,7.31,10.41
17 | 181,10.46,0,8.2,9.05
18 | 163,10.42,0,7.3,9.43
19 | 195,10.29,0,7.84,9.76
20 | 208,10.26,1,8.13,10.05
21 | 320,8.45,1,7.83,9.13
22 | 191,9.7,1,7.56,10.17
23 | 167,10.18,0,7.46,9.18
24 | 188,9.88,0,7.42,9.35
25 | 294,9.58,1,8.12,8.79
26 | 335,8.34,1,8.23,9.13
27 | 145,10.27,0,7.41,10.58
28 | 201,10.26,1,7.67,9.22
29 | 131,10.49,0,7.59,10.43
30 | 210,10.36,0,7.93,9.44
31 | 279,8.56,1,7.65,10.44
32 | 


--------------------------------------------------------------------------------
/Chapter02/1 web scraping.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {
 7 |     "collapsed": false
 8 |    },
 9 |    "outputs": [
10 |     {
11 |      "name": "stdout",
12 |      "output_type": "stream",
13 |      "text": [
14 |       "Retrieving prices for AMD from https://www.google.com/finance/historical?q=NASDAQ:AMD\n",
15 |       "The last day of pricing information we have is:\n",
16 |       "{'date': 'Oct 3, 2017', 'open': '12.73', 'high': '13.48', 'low': '12.70', 'close': '13.42', 'volume': '85,174,031'}\n"
17 |      ]
18 |     }
19 |    ],
20 |    "source": [
21 |     "from lxml import html  \n",
22 |     "import requests\n",
23 |     "from time import sleep\n",
24 |     "\n",
25 |     "# setup the URL for the symbol we are interested in\n",
26 |     "exchange = \"NASDAQ\"\n",
27 |     "ticker = \"AMD\"\n",
28 |     "url = \"https://www.google.com/finance/historical?q=%s:%s\"%(exchange,ticker)\n",
29 |     "\n",
30 |     "# retrieve the web page\n",
31 |     "response = requests.get(url)\n",
32 |     "print(\"Retrieving prices for %s from %s\"%(ticker,url))\n",
33 |     "# give it a few seconds in case there is some delay\n",
34 |     "sleep(3)\n",
35 |     "\n",
36 |     "# convert the text into an HTML Document\n",
37 |     "parser = html.fromstring(response.text)\n",
38 |     "\n",
39 |     "# find the HTML DIV tag that has id 'prices'\n",
40 |     "price_store = parser.get_element_by_id(\"prices\")\n",
41 |     "\n",
42 |     "# we will store our price information in the price_data list\n",
43 |     "price_data = []\n",
44 |     "\n",
45 |     "# find the HTML TABLE element within the prices DIV\n",
46 |     "for table in price_store:\n",
47 |     "\n",
48 |     "    #every row (skip first row headings) of table has\n",
49 |     "    #  date, open, high, low, close, volume\n",
50 |     "    for row in table[1:]:\n",
51 |     "\n",
52 |     "        #store tuples for a day together\n",
53 |     "        day = {\"date\":row[0].text.strip('\\n'), \\\n",
54 |     "               \"open\":row[1].text.strip('\\n'), \\\n",
55 |     "               \"high\":row[2].text.strip('\\n'), \\\n",
56 |     "               \"low\":row[3].text.strip('\\n'), \\\n",
57 |     "               \"close\":row[4].text.strip('\\n'), \\\n",
58 |     "               \"volume\":row[5].text.strip('\\n')}\n",
59 |     "        \n",
60 |     "        #add day's information to our set\n",
61 |     "        price_data.append(day)\n",
62 |     "\n",
63 |     "print(\"The last day of pricing information we have is:\")\n",
64 |     "print(price_data[0])"
65 |    ]
66 |   }
67 |  ],
68 |  "metadata": {
69 |   "kernelspec": {
70 |    "display_name": "Python 3",
71 |    "language": "python",
72 |    "name": "python3"
73 |   },
74 |   "language_info": {
75 |    "codemirror_mode": {
76 |     "name": "ipython",
77 |     "version": 3
78 |    },
79 |    "file_extension": ".py",
80 |    "mimetype": "text/x-python",
81 |    "name": "python",
82 |    "nbconvert_exporter": "python",
83 |    "pygments_lexer": "ipython3",
84 |    "version": "3.6.0"
85 |   }
86 |  },
87 |  "nbformat": 4,
88 |  "nbformat_minor": 2
89 | }
90 | 


--------------------------------------------------------------------------------
/Chapter02/2 numpy arrays.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "name": "stdout",
10 |      "output_type": "stream",
11 |      "text": [
12 |       "Array contents [[ 1  1  2]\n",
13 |       " [ 3  5  8]\n",
14 |       " [13 21 34]]\n",
15 |       "max value =  34\n",
16 |       "min value =  1\n",
17 |       "sum across 2nd axis [ 4 16 68]\n"
18 |      ]
19 |     }
20 |    ],
21 |    "source": [
22 |     "# numpy arrays\n",
23 |     "import numpy as np\n",
24 |     "\n",
25 |     "# create an array ‘a’ with 3 3-tuples\n",
26 |     "a = np.array([[1, 1, 2], [3, 5, 8], [13, 21, 34]])\n",
27 |     "print(\"Array contents\", a)\n",
28 |     "\n",
29 |     "# determine the minimum value in array\n",
30 |     "print(\"max value = \", a.max())\n",
31 |     "\n",
32 |     "# max value in array\n",
33 |     "print(\"min value = \", a.min())\n",
34 |     "\n",
35 |     "# sum across the 2nd axis\n",
36 |     "print(\"sum across 2nd axis\", a.sum(axis = 1))"
37 |    ]
38 |   }
39 |  ],
40 |  "metadata": {
41 |   "kernelspec": {
42 |    "display_name": "Python 3",
43 |    "language": "python",
44 |    "name": "python3"
45 |   },
46 |   "language_info": {
47 |    "codemirror_mode": {
48 |     "name": "ipython",
49 |     "version": 3
50 |    },
51 |    "file_extension": ".py",
52 |    "mimetype": "text/x-python",
53 |    "name": "python",
54 |    "nbconvert_exporter": "python",
55 |    "pygments_lexer": "ipython3",
56 |    "version": "3.6.2"
57 |   }
58 |  },
59 |  "nbformat": 4,
60 |  "nbformat_minor": 2
61 | }
62 | 


--------------------------------------------------------------------------------
/Chapter02/3 numpy histogram.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "name": "stdout",
10 |      "output_type": "stream",
11 |      "text": [
12 |       "Histogram is  [197 187 201 221 195 221 183 205 194 196]\n",
13 |       "Correlation of the two rows is  [161131]\n"
14 |      ]
15 |     }
16 |    ],
17 |    "source": [
18 |     "# numpy histogram\n",
19 |     "import numpy as np\n",
20 |     "import random\n",
21 |     "\n",
22 |     "# build up 2 sets of random numbers\n",
23 |     "\n",
24 |     "# setup empty array 2 columns, 1000 rows\n",
25 |     "numbers = np.empty([2,1000], int)\n",
26 |     "\n",
27 |     "# set seed so we can repeat results\n",
28 |     "random.seed(137)\n",
29 |     "\n",
30 |     "# populate the array\n",
31 |     "for num in range(0, 1000):\n",
32 |     "    numbers[0,num] = random.randint(0, 1000)\n",
33 |     "    numbers[1,num] = random.randint(0, 1000)\n",
34 |     "    \n",
35 |     "# produce a histogram of the data\n",
36 |     "(hist, bins) = np.histogram(numbers, bins = 10, range = (0,1000))\n",
37 |     "print(\"Histogram is \",hist)\n",
38 |     "\n",
39 |     "# calculate correlation between the 2 columns\n",
40 |     "\n",
41 |     "corrs = np.correlate(numbers[:,1], numbers[:,2], mode='valid')\n",
42 |     "print(\"Correlation of the two rows is \", corrs)"
43 |    ]
44 |   }
45 |  ],
46 |  "metadata": {
47 |   "kernelspec": {
48 |    "display_name": "Python 3",
49 |    "language": "python",
50 |    "name": "python3"
51 |   },
52 |   "language_info": {
53 |    "codemirror_mode": {
54 |     "name": "ipython",
55 |     "version": 3
56 |    },
57 |    "file_extension": ".py",
58 |    "mimetype": "text/x-python",
59 |    "name": "python",
60 |    "nbconvert_exporter": "python",
61 |    "pygments_lexer": "ipython3",
62 |    "version": "3.6.2"
63 |   }
64 |  },
65 |  "nbformat": 4,
66 |  "nbformat_minor": 2
67 | }
68 | 


--------------------------------------------------------------------------------
/Chapter02/6 SciPy FFT.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "data": {
10 |       "text/plain": [
11 |        "array([ 8.0+0.j        ,  0.5+0.36327126j,  0.5+1.53884177j,\n",
12 |        "        0.5-1.53884177j,  0.5-0.36327126j])"
13 |       ]
14 |      },
15 |      "execution_count": 1,
16 |      "metadata": {},
17 |      "output_type": "execute_result"
18 |     }
19 |    ],
20 |    "source": [
21 |     "from scipy.fftpack import fft\n",
22 |     "import numpy as np\n",
23 |     "\n",
24 |     "x = np.array([2.0, 1.0, 2.0, 1.0, 2.0])\n",
25 |     "fft(x)"
26 |    ]
27 |   },
28 |   {
29 |    "cell_type": "code",
30 |    "execution_count": 2,
31 |    "metadata": {},
32 |    "outputs": [
33 |     {
34 |      "data": {
35 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmUXGd55/HvU1W9SN3aW+qxFmuxhYPMYBsr2EAyNNsZ\nmyFyZk4W+4QJCZxo/oAJM2GSmMPEw5Az2Zgkk5yYEGVjCYMHTCZRGBOHOG5MCAZb2AgkoyBrbcu2\n9qW7VdW1PPPHvbe6ulTdVV1V3fdW9+9zTp+quvetqvd2qZ969dznvq+5OyIisrCk4u6AiIi0n4K7\niMgCpOAuIrIAKbiLiCxACu4iIguQgruIyAKk4C4isgApuIuILEAK7iIiC1AmrjceGBjwLVu2NPXc\nsbEx+vr62tuheaZjiF+n9x90DEkw3/3ft2/fWXdfW7ehu8/4A/wZcBr47jT7Dfh94DCwH3hNvdd0\nd26//XZv1uOPP970c5NCxxC/Tu+/u44hCea7/8DT3kCMbSQt8wngrhn23w1sD392A3/YwGuKiMgc\nqhvc3f0J4PwMTe4BPhV+qTwJrDSz69rVQRERmb12nFDdAJyseDwSbhMRkZiYNzDlr5ltAb7o7q+q\nse//Ab/u7v8YPn4M+CV331ej7W6C1A2Dg4O3P/TQQ011enR0lP7+/qaemxQ6hvh1ev9Bx5AE893/\nN73pTfvcfWfdho0k5oEtTH9C9Y+A+yoeHwKuq/eaOqH6eNxdaFmnH0On999dx5AEnXxCtZ69wE9b\n4E7gkru/2IbXFRGRJtWtczezzwJDwICZjQD/DegCcPePA48AbycohRwHfnauOisiIo2pG9zd/b46\n+x14b9t61IHcnYf3jbDr1vX0ZNJxd0dERNMPtMPBFy/ziw/v5yuHzsTdFRERQMG9La5OFAEYD29F\nROKm4N4G2XwpvFVwF5FkUHBvg1whCOoK7iKSFArubZArlKbciojETcG9DSZH7gruIpIMCu5tUM65\nF5SWEZFkUHBvg1xeOXcRSRYF9zZQzl1EkkbBvQ1UCikiSaPg3gbRCdWcTqiKSEIouLfBZFpGI3cR\nSQYF9zZQKaSIJI2Cexso5y4iSaPg3gZRWkZ17iKSFArubRDVueuEqogkhYJ7G2Q1cheRhFFwb4PJ\nK1Q1cheRZFBwb4Nyzl0nVEUkIRTc20DTD4hI0ii4t0GUlpkolCiVPObeiIgouLdF5Yhdo3cRSQIF\n9zaonHZAeXcRSQIF9zbI5kss6UoDGrmLSDIouLdBrlBk5dIuQCN3EUkGBfcWFUtOvuisWBIGd13I\nJCIJoODeookwDbO8Nxq5Ky0jIvFTcG9RlIZZEaZlckrLiEgCKLi3KDqBOpmW0chdROKn4N6iqAyy\nHNw1cheRBFBwb1GUY5/MuSu4i0j8FNxbFI3co1JI1bmLSBI0FNzN7C4zO2Rmh83s/hr7rzezx83s\nGTPbb2Zvb39Xk6k6564TqiKSBHWDu5mlgQeBu4EdwH1mtqOq2X8FPufutwH3Ah9rd0eTqlwts0Sl\nkCKSHI2M3F8LHHb3I+4+ATwE3FPVxoHl4f0VwKn2dTHZoqX1lkcjd13EJCIJkGmgzQbgZMXjEeCO\nqjYfBv7OzP4j0Ae8tdYLmdluYDfA4OAgw8PDs+xuYHR0tOnnttu3XioA8Nz+ZzDg0OGjDKdeqPu8\nJB1Dszr9GDq9/6BjSIKk9r+R4G41tlVPWn4f8Al3/20zex3waTN7lbtPyVG4+x5gD8DOnTt9aGio\niS7D8PAwzT633c5/awSe/TY//Po7WPLUVxlcv5Ghoeqs1bWSdAzN6vRj6PT+g44hCZLa/0bSMiPA\nporHG7k27fIe4HMA7v51oBcYaEcHky7Ksfdk0vR2pTW3jIgkQiPB/Slgu5ltNbNughOme6vanADe\nAmBmryQI7mfa2dGkinLsPZkUPZlUOQcvIhKnusHd3QvA+4BHgecIqmIOmNlHzGxX2OwDwM+Z2beB\nzwI/4+6LYr25qBSytysauSu4i0j8Gsm54+6PAI9UbXug4v5B4A3t7VpniEohu8ORu65QFZEk0BWq\nLcoVSnSljXTK6OlKK7iLSCIouLcoly/RmwmW2OvNpDT9gIgkgoJ7i3KFIj1dwa+xtyut6QdEJBEU\n3FuUzZfoiUbuXSlNPyAiiaDg3qJcoUhPJvg19mRU5y4iyaDg3qJcoURP1+TIXXXuIpIECu4tyuYn\nR+66QlVEkkLBvUW5QmlqcNcJVRFJAAX3FuUKJXrDtExwEVOJRXJxrogkmIJ7i3JVaRmAiaLy7iIS\nLwX3FlWeUI2CvMohRSRuCu4tqjVy14VMIhI3BfcWBTn3qM5dI3cRSQYF9xYFpZBRnXs4clc5pIjE\nTMG9RdWlkKCRu4jET8G9BYViiULJy0E9Ss/oQiYRiZuCewuiksfKuWUAXcgkIrFTcG/B5OLYUVom\nuNX8MiISNwX3FpQXx+6aekJVaRkRiZuCewuiEbpKIUUkaRTcWxCN0FUKKSJJo+Deglx1zj2jUkgR\nSQYF9xZEi2GXZ4WMSiFVLSMiMVNwb0H5hGpmas5dc8uISNwU3FswWQoZjNzNjJ5MqjyiFxGJi4J7\nCyZLISd/jVqNSUSSQMG9BeVSyHDkDkFZpE6oikjcFNxbkK0xcu/JaJFsEYmfgnsLqkshIRi5a/oB\nEYmbgnsLqksho/sauYtI3BTcWxCdUO1OV4zcMzqhKiLxU3BvQTZfojudIpWy8rYenVAVkQRoKLib\n2V1mdsjMDpvZ/dO0+QkzO2hmB8zsf7e3m8mUKxSnnEyF4ISq6txFJG6Zeg3MLA08CLwNGAGeMrO9\n7n6wos124IPAG9z9gpmtm6sOJ0mwxF56yrbghKrSMiISr0ZG7q8FDrv7EXefAB4C7qlq83PAg+5+\nAcDdT7e3m8kULI499Veoi5hEJAnqjtyBDcDJiscjwB1VbV4BYGZfA9LAh939b6tfyMx2A7sBBgcH\nGR4ebqLLMDo62vRz22nkVJZivjSlL+dO57gyXqjbv6QcQys6/Rg6vf+gY0iCpPa/keBuNbZ5jdfZ\nDgwBG4Gvmtmr3P3ilCe57wH2AOzcudOHhoZm218AhoeHafa57fQXx59mtV1laOiHy9u+OnqQb758\nom7/knIMrej0Y+j0/oOOIQmS2v9G0jIjwKaKxxuBUzXa/LW75939KHCIINgvaLVOqPZ2pcjqhKqI\nxKyR4P4UsN3MtppZN3AvsLeqzV8BbwIwswGCNM2RdnY0iXL50rU590yaYsnJFxXgRSQ+dYO7uxeA\n9wGPAs8Bn3P3A2b2ETPbFTZ7FDhnZgeBx4FfdPdzc9XppMgVilOuTgUt2CEiydBIzh13fwR4pGrb\nAxX3HfiF8GfRCEohr62WifYti6NTIiLoCtWWBKWQVXXu5XVUNXIXkfgouLeg1sh9Mi2jnLuIxEfB\nvQW5QunanHsmSsto5C4i8VFwb0HtK1Q1cheR+Cm4tyBXKNWocw9H7sq5i0iMFNybVCiWKJZ8yvqp\nMLkqkxbsEJE4Kbg3KZrWd/qRu9IyIhIfBfcmRaWO1075G5ZCauQuIjFScG9SeeSuE6oikkAK7k2q\ntTg2TI7kdRGTiMRJwb1Jk2mZ2iN3LbUnInFScG/StCdUNXIXkQRQcG9SVMdeXQqZShnd6ZRy7iIS\nKwX3Jk03cocgVaORu4jEScG9SdOVQgL0dKWVcxeRWCm4N2m6UkgITqpq+gERiZOCe5OmK4WMtuki\nJhGJk4J7k6YrhYy26YSqiMRJwb1Jk2mZ2iN3zecuInFScG9SFLxrVcv0dmnkLiLxUnBvUjTrY+20\nTFqlkCISKwX3JmULRbozKczsmn3ByF3BXUTio+DepFz+2sWxI70Z1bmLSLwU3JtUa3HsSE9XWjl3\nEYmVgnuTcjUWx470ZHQRk4jES8G9SbnCDGkZTT8gIjFTcG9SrlCcNi3T25ViIlxAW0QkDgruTao3\ncg/aKDUjIvFQcG9SNl+seXUqTNa+66SqiMRlwQR3d2f/yEXc5ycVkiuUal6dChq5i0j8Fkxwf+rY\nBXb9wdf4xtHz8/J+uXzpmlWYItE6qhq5i0hcFkxw/+eXrwDwvRcvz8v7ZQvF6UfuWkdVRGLWUHA3\ns7vM7JCZHTaz+2do92Nm5ma2s31dbMyJ8+MAHDk7Ni/vN9MVqj3lkbuCu4jEo25wN7M08CBwN7AD\nuM/MdtRotwz4eeAb7e5kI46FQf3ofAX3mUohM1HOXWkZEYlHIyP31wKH3f2Iu08ADwH31Gj3q8Bv\nAdk29q9hx8+FI/cz8xXcZxq5Ky0jIvHKNNBmA3Cy4vEIcEdlAzO7Ddjk7l80s/8y3QuZ2W5gN8Dg\n4CDDw8Oz7jDA6OjolOe6O0fPjJMyeOHiVR597HF60tfO1tgu7s7ViSIvvTDC8PDpa/YfvxwE9X3P\n7ocXa/+Kq4+hE3X6MXR6/0HHkARJ7X8jwb1WlCzXG5pZCvhd4GfqvZC77wH2AOzcudOHhoYa6mS1\n4eFhKp/78uUsE48+xmu3ruabR8+z6ZW3s2P98qZeuxH5Ygl/9Eu84oatDA1tv2b/4dOj8E9f4cab\nXsnQrRtqvkb1MXSiTj+GTu8/6BiSIKn9byQtMwJsqni8EThV8XgZ8Cpg2MyOAXcCe+fzpGqUknnz\nD6wD4MjZ0Tl9v5kWxw62B7/WnEohRSQmjQT3p4DtZrbVzLqBe4G90U53v+TuA+6+xd23AE8Cu9z9\n6TnpcQ3HzgV59je+Yi0AR+c4715eHLvORUxZXcQkIjGpG9zdvQC8D3gUeA74nLsfMLOPmNmuue5g\nI46fGyOTMrav62f9it45L4ecXBx7+il/QSdURSQ+jeTccfdHgEeqtj0wTduh1rs1O8fPjbNh1RIy\n6RRb1/Zx5Mwcp2XCoD19WiaqllFaRkTisSCuUD1+bpzNa/oA2DbQz5EzY3M6x0y9kXtXOkU6ZZpb\nRkRi0/HB3d05dm6MLWuWArBtbR9XcgXOjk7M2XuWc+7TzC0D0JtJaeQuIrHp+OB+cTzPlWyB61dH\nwb0fYE5TM/VG7hCto6qRu4jEo+ODe1Qps6Wclglu5/Kkajm4T5NzB43cRSReHR/coxr3zWFaZv3K\nJXRnUnM6cp9My0z/6wvWUdXIXUTisSCCuxlsCtMy6ZSxdU3fnM4xM3kRU720jEbuIhKPBRDcx7hu\nee+UssRta/vmdHbIXAMnVHsyKY3cRSQ2HR/cj50b4/owJRPZtraPE+fHyRfnZuScLefcZ0rLpHRC\nVURi0/HB/cT58fLJ1MjWgX4KJS8v4NFujYzcg5y70jIiEo+ODu6jYT375qrgvm1tWDEzR3n3Rkoh\nezMqhRSR+HR0cD8elkFurkrL3DAQ1LofnaPZIRurc1cppIjEp8OD+9QyyMiKpV2s6eueu5F7vkhP\nJoXZ9AuC9GZUCiki8eno4H6sPHLvu2bftrVzVw450xJ7kV6N3EUkRh0d3E+cG2egv5v+nmsnt9w6\n0Ddni3bMtDh2pFfTD4hIjDo6uB87N1Zz1A7BHDNnRye4dDXf9vfN5kszlkFCVOdemtPZKUVEptPR\nwf34uXE2r15ac180x8xcXMyUKxRnLIOEyXlnVA4pInHo2OCezRd58VJ2xpE7zM3skLl8acapB2By\nwQ6toyoicejY4H4yvEBpy0Dtkfv1q5eSTtmcnFQNTqjWy7mHS+2pYkZEYtCxwf1YWAZ5/TRpme5M\nik2rlszJSdVsWAo5kyj466SqiMShY4P78ap53GvZtrZ/Dkfu9Usho7YiIvOtg4P7OMt6M6xc2jVt\nm20DweyQpVJ7K1YaKoXUyF1EYtSxwT1YN7VvxqtEt63tJ1cocerS1ba+dzZff+QelUrqQiYRiUPH\nBvcT58evmXag2taBuZlArJFSyGhkr5G7iMShI4N7oeSMXLhaN7jfUJ4dsr0nVXOFBkohM6pzF5H4\ndGRwP3fVKZZ82hr3yNplPfT3ZNp+IVMuX5pxcWyoKIXUyF1EYtCRwf30eDAanqlSBsDMggnE2hjc\n3Z1sQaWQIpJsHRncXx4Pql/qpWUgnECsjTn3fNFxn3kud6i8iElpGRGZfx0Z3M+MBznvdct66rbd\nNtDPCxevcnWiPSPoaI72eqWQ5bllNHIXkRh0ZHB/edzZvHrmMshItOReu/LuUWmjLmISkSTryOB+\nerzUUEoG2h/co5F7vVLI7nQKM+XcRSQeHRfcSyXn9FVny8DMJ1Mj2wb6MYPvn77Slvcvr59apxTS\nzOjJpBTcRSQWHRfcX7qcpVCafsKwaku602xd08fBU5fb8v65clpm5pE7BHl5pWVEJA4NBXczu8vM\nDpnZYTO7v8b+XzCzg2a238weM7PN7e9q4FgDE4ZV27F+OQfaFNyjKXzrjdwhuJBJI3cRiUPdCGVm\naeBB4G5gB3Cfme2oavYMsNPdXw08DPxWuzsaORFO9dtozh2C4P7CxatcHJ9o+f1zDZ5QheALQHPL\niEgcGhm5vxY47O5H3H0CeAi4p7KBuz/u7uPhwyeBje3t5qRMOsWmZSnWr1zS8HNuXr8CgIMvtj56\nb7QUEoKRe06LdYhIDDINtNkAnKx4PALcMUP79wBfqrXDzHYDuwEGBwcZHh5urJcVBoBfvqXIV5/4\nSsPPuZwLLnra+8QzTJycforgRux7uQDAd579FpePzBzgJ7JXOfXyeM3jHB0dber4k6TTj6HT+w86\nhiRIav8bCe61islrTpBuZu8EdgJvrLXf3fcAewB27tzpQ0NDjfWyyvDwMLN97v/Y9/fklg4wNHRr\nU+8ZufTsC/DMs7zhzju4cV3/jG0/dujrGDA09Lpr9jVzDEnT6cfQ6f0HHUMSJLX/jQT3EWBTxeON\nwKnqRmb2VuBDwBvdPdee7rXPzetXcODUpZZfJ6p+qTcrJAR5+cvZQsvvKSIyW43k3J8CtpvZVjPr\nBu4F9lY2MLPbgD8Cdrn76fZ3s3U3r1/O82fGWq5eKde5N1oKqWoZEYlB3eDu7gXgfcCjwHPA59z9\ngJl9xMx2hc0+CvQDnzezZ81s7zQvF5ub1y+nWHK+91JrFzNFwbqhUkjVuYtITBpJy+DujwCPVG17\noOL+W9vcr7bbcV1QMXPg1CVu3bSy6deZHLk3lpZRnbuIxKHjrlBt1qbVS1jWm2n5StVcvohZMHdM\nPb1dCu4iEo9FE9zNjB3XtX6larYQLI7dyIyUQZ270jIiMv8WTXCHoGLmey9dpliqWcnZkFy+/uLY\nkd6uYPoB9+bfT0SkGYssuC8nmy+1tGB2I4tjR3oyKUoerN4kIjKfFldw37AcoKXUTK5QmtXIHSYn\nGxMRmS+LKrjfsLaf7kyqpYuZsvn6i2NHyqsxafIwEZlniyq4d6VT3DS4rPWRe6NpmWjkrooZEZln\niyq4Q5B3P/ji5aZPcuYKRXobTMtEI3zNDCki821RBveL43lOXco29fxsvvGReznnrrSMiMyzRRfc\nd4Rzux94obm8e64wu1LI6DkiIvNp0QX3V163DLPmK2Zy+cZLIXvDtIxG7iIy3xZdcF/anWHbQF/T\nwT07i5G7TqiKSFwWXXCHIDVzsMlyyFy+NOtSSI3cRWS+LcrgfvP65Zy6lOXC2OwXzM4VZhHcM8q5\ni0g8Fm1wh+YWzM4Vig0tjg2qlhGR+CzS4D45t/tsuHtQCtngyL2nfEJVI3cRmV+LMriv7uvmuhW9\nsz6pOlEMF+qY7chdaRkRmWeLMrhDkJqZbXCfzSpMle2yEwruIjK/Fm1w37F+BUfOjHJ1FoE3W14/\ntbGReypl3LC2j88+dZKRC+NN9VNEpBmLN7hft5ySw3MvNT56j2Z3bHTkDvDxd95ONl/kPZ94msvZ\n/Kz7KSLSjEUb3MsVM7NIzcw2LQOwfXAZH3/n7Tx/ZpT3fuZb5IuqnBGRubdog/vGVUtYsaSLp4+d\nb/g5Ub16o6WQkTfcOMCv/dt/yVe/f5YH/vq7WnZPROZcJu4OxMXM2HXLej795HGGblrHj962oe5z\nsk2kZSI/8YObOH5+jAcff54ta/q4adavICLSuEU7cgf4lXfs4M5tq/mlh/fzzaP1R/DRyL3RuWWq\nfeBtN/Ejt6zn17/0PZ56qdDUa4iINGJRB/fuTIqPv/N2Nq5awu5PP83Rs2Mzto9y7o3OClktlTI+\n+mOv5vbNq9izP8fXnz/X1OuIiNSzqIM7wMql3fz5z/4gKTPe/YmnZpxvJpdvbeQOQb7+j396J6t7\njfv++En+3ce+xhf2jegqVhFpq0Uf3AE2r+ljz7+/nRcuXOU//MW+aSf6KlfLNDlyj6zu6+aB1y3h\nV96xg4tX83zg89/mjl97jF/94kGePzPa0muLiMAiPqFabeeW1Xz0x1/N+x96lvu/8B1+5yduwcwo\nlpyjZ0c5cOoyf/PtF4HmTqhW6+sy3vNDW3n3G7bw9SPn+Mw3TvDJfzrGn/7jUV5z/Up+cMtqbrt+\nJbddv4rB5b0tv5+ILC4K7hXuuXUDJ86N89tf/mfOj01w8WqeQy9dLlfJdKdT3LltNeuWtS/Ymhmv\nv2GA198wwJkrOT739Em+fPBl/vxrx/ijJ4L3Xb+il1uvX8ktG1eybW0/W9YsZdPqpbMuyRSRxUPB\nvcr73nwj58Ym+Jtvn2L7YD8/dcdmdly3nB3rl3Pjun660nOXyVq7rIf3vulG3vumG8kVihw8dZln\nTlzkmZMXeebEBR75zkvltmZw3fJeNq/pY/OapfyLFb0MLu9l3bIe1i3rZXB5D2v6e0inbM76KyLJ\npeBexcz48K6b+fCum2PtR08mzW3Xr+K261eVt10Ym+D4+XGOnxvj2Nnw9twYf//cy5wdvfZEcMpg\n1dJuVi7tCm+7WbW0i1V93axY0sWy3gzLe4PbZeXbDH3dGZb2pOlOpzDTl4NIJ1Jw7yCr+rpZ1dfN\nrZtWXrNvolDi7GiO01dynL6c5eUrOc5cznJubIKL43nOj00wcmGc776Q5/z4BBOF+tMgZFLG0u40\nfT0ZlnanWdKdZklXmt6u4PbyhSx/e24/vV1perpS9GTS9FbcdqdT9HQFXxI9mRTdmcnbrnTw01O+\nb3RlUnSlgvvplOmLRaQFDQV3M7sL+D0gDfyJu/9G1f4e4FPA7cA54Cfd/Vh7uyoz6c6kWL9yCetX\nLqnb1t3JFUpczuYZzRa4Uv7JcyVbYHyiwNhEkbFcgfHwdmyiQDZf4upEkSvZAmeu5Dh/ucTzo6fJ\n5YtkC6WGvjBmoyttdKVTZFLhbdrIpKLbyW3pVNAmk7Ipj9PhtlR4m04ZaQvapMx46cUcX7lygLQF\n+6J2qfBxunwfUja5PZUyUgbpcFv5cfiFFO2L7kfPM5t8nZQx2TZ83uT+4NaqboMM29TnnhkvcfL8\nOKmUYUy2Nyi/ZnQ/ZWAEG6LXCPYF282q7lPVRl+2HaVucDezNPAg8DZgBHjKzPa6+8GKZu8BLrj7\njWZ2L/CbwE/ORYeldWZGbzgCX7es+dcZHh5maGio/LhUciaKJXL5EtlCkYlCiVyhSC4M/MHj4DZf\nLDFRLJEvenA/3BY9LhRL5Ese3Ja3OYWSUygF9/PFEsXS1G25fIl8qUixVKJYgmKpRKHkFGv8ZCcK\nfPPlEYoePC558FodN/XPE4/P21tVB/3JbcGO6i8LqGpfvc0gn8/T/cTflb88oteIWk59nfLWKf2B\n6b98ys+p8dzJd6lsbzW3194A4+Pj9O0bnua9a/fp/W/Zzo/csr7mvnZpZOT+WuCwux8BMLOHgHuA\nyuB+D/Dh8P7DwB+YmblmyFpUUimjNxV8aaygK+7u1FX95RTxMNgX3SmVoOTR/ehLINhWCtu5U/5y\nCH7C/SWmbCuWHAj3h68fPdcJ2lJ+7antveI9g2bB7cHnnuMVN/3AlOdF+zw8FvfwFsLXCv4sJ1+L\nKa/p022vfD08/F1NbUdFu5r7w/tUtBl54QXWrw8CXfRewX0qvmgrtpf3T207+flV3GdqAy+38Yo2\n0z332n8XtZw+nWXtuuXX7pgh+q1YMvd/H40E9w3AyYrHI8Ad07Vx94KZXQLWAGcrG5nZbmA3wODg\nIMPDw011enR0tOnnJoWOIX5J6380xkuHP438+d+6Ikf/6PPt7cQ8Z19G03n6+8/Wb5hQo8sL9PfP\nblW30qkDDJ+aow6FGgnutT7q6u+kRtrg7nuAPQA7d+70WqOmRkw34uokOob4dXr/QceQBEntfyNF\n2yPAporHG4Hq75xyGzPLACuAxidKFxGRtmokuD8FbDezrWbWDdwL7K1qsxd4V3j/x4B/UL5dRCQ+\nddMyYQ79fcCjBKnAP3P3A2b2EeBpd98L/CnwaTM7TDBiv3cuOy0iIjNrqM7d3R8BHqna9kDF/Szw\n4+3tmoiINEtT/oqILEAK7iIiC5CCu4jIAmRxFbWY2RngeJNPH6DqAqkOpGOIX6f3H3QMSTDf/d/s\n7mvrNYotuLfCzJ52951x96MVOob4dXr/QceQBEntv9IyIiILkIK7iMgC1KnBfU/cHWgDHUP8Or3/\noGNIgkT2vyNz7iIiMrNOHbmLiMgMOi64m9ldZnbIzA6b2f1x96ceM9tkZo+b2XNmdsDM3h9uX21m\nXzaz74e3q+q9VtzMLG1mz5jZF8PHW83sG+Ex/J9wYrnEMrOVZvawmX0v/Dxe10mfg5n95/Df0HfN\n7LNm1pv0z8DM/szMTpvZdyu21fydW+D3w7/t/Wb2mvh6PmmaY/ho+O9ov5n9XzNbWbHvg+ExHDKz\nfx1PrzssuFcs+Xc3sAO4z8x2xNurugrAB9z9lcCdwHvDPt8PPObu24HHwsdJ937guYrHvwn8bngM\nFwiWW0yy3wP+1t1/ALiF4Fg64nMwsw3AzwM73f1VBJP4RUtaJvkz+ARwV9W26X7ndwPbw5/dwB/O\nUx/r+QTXHsOXgVe5+6uBfwY+CBD+bd8L3Bw+52Nh3Jp3HRXcqVjyz90ngGjJv8Ry9xfd/Vvh/SsE\nAWUDQb8/GTb7JPCj8fSwMWa2Efg3wJ+Ejw14M8GyipDwYzCz5cC/IpjBFHefcPeLdNbnkAGWhGsm\nLAVeJOG0dyBDAAACmUlEQVSfgbs/wbVrO0z3O78H+JQHngRWmtl189PT6dU6Bnf/O3cvhA+fJFjn\nAoJjeMjdc+5+FDhMELfmXacF91pL/m2IqS+zZmZbgNuAbwCD7v4iBF8AwLr4etaQ/wX8ElAKH68B\nLlb8A0/6Z7ENOAP8eZha+hMz66NDPgd3fwH4n8AJgqB+CdhHZ30Gkel+55369/1u4Evh/cQcQ6cF\n94aW80siM+sHvgD8J3ef3YKLMTOzdwCn3X1f5eYaTZP8WWSA1wB/6O63AWMkNAVTS5iXvgfYCqwH\n+gjSGNWS/BnU02n/pjCzDxGkXj8TbarRLJZj6LTg3siSf4ljZl0Egf0z7v6X4eaXo/9yhren4+pf\nA94A7DKzYwSpsDcTjORXhikCSP5nMQKMuPs3wscPEwT7Tvkc3gocdfcz7p4H/hJ4PZ31GUSm+513\n1N+3mb0LeAfwUxUrzyXmGDotuDey5F+ihLnpPwWec/ffqdhVuTThu4C/nu++NcrdP+juG919C8Hv\n/B/c/aeAxwmWVYTkH8NLwEkzuync9BbgIJ3zOZwA7jSzpeG/qaj/HfMZVJjud74X+OmwauZO4FKU\nvkkaM7sL+GVgl7uPV+zaC9xrZj1mtpXg5PA34+gj7t5RP8DbCc5OPw98KO7+NNDfHyL4b9l+4Nnw\n5+0EOevHgO+Ht6vj7muDxzMEfDG8v43gH+5h4PNAT9z9q9P3W4Gnw8/ir4BVnfQ5AP8d+B7wXeDT\nQE/SPwPgswTnCPIEo9r3TPc7J0hpPBj+bX+HoDIoqcdwmCC3Hv1Nf7yi/YfCYzgE3B1Xv3WFqojI\nAtRpaRkREWmAgruIyAKk4C4isgApuIuILEAK7iIiC5CCu4jIAqTgLiKyACm4i4gsQP8fanxAKEqY\nEHcAAAAASUVORK5CYII=\n",
36 |       "text/plain": [
37 |        "<matplotlib.figure.Figure at 0x26f02daa710>"
38 |       ]
39 |      },
40 |      "metadata": {},
41 |      "output_type": "display_data"
42 |     }
43 |    ],
44 |    "source": [
45 |     "from scipy.fftpack import fft\n",
46 |     "import numpy as np\n",
47 |     "\n",
48 |     "# how many points\n",
49 |     "n = 100\n",
50 |     "spacing = 1.0 / 250.0\n",
51 |     "x = np.linspace(0.0, n*spacing, n)\n",
52 |     "y = np.sin(30.0 * np.pi * x) + 0.5 * np.sin(7.0 * np.pi * x)\n",
53 |     "yf = fft(y)\n",
54 |     "xf = np.linspace(0.0, 1.0/(2.0*spacing), n//2)\n",
55 |     "\n",
56 |     "#plot the data to get a visual\n",
57 |     "import matplotlib.pyplot as plt\n",
58 |     "plt.plot(xf, 2.0/n * np.abs(yf[0:n//2]))\n",
59 |     "plt.grid()\n",
60 |     "plt.show()"
61 |    ]
62 |   }
63 |  ],
64 |  "metadata": {
65 |   "kernelspec": {
66 |    "display_name": "Python 3",
67 |    "language": "python",
68 |    "name": "python3"
69 |   },
70 |   "language_info": {
71 |    "codemirror_mode": {
72 |     "name": "ipython",
73 |     "version": 3
74 |    },
75 |    "file_extension": ".py",
76 |    "mimetype": "text/x-python",
77 |    "name": "python",
78 |    "nbconvert_exporter": "python",
79 |    "pygments_lexer": "ipython3",
80 |    "version": "3.6.2"
81 |   }
82 |  },
83 |  "nbformat": 4,
84 |  "nbformat_minor": 1
85 | }
86 | 


--------------------------------------------------------------------------------
/Chapter02/7 scipy linear algebra.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "name": "stdout",
10 |      "output_type": "stream",
11 |      "text": [
12 |       "A array\n",
13 |       "[[1 1]\n",
14 |       " [2 3]]\n",
15 |       "b array\n",
16 |       "[[1]\n",
17 |       " [2]]\n",
18 |       "solution \n",
19 |       "[[ 1.]\n",
20 |       " [ 0.]]\n",
21 |       "validation of solution (should be a 0 matrix)\n",
22 |       "[[ 0.]\n",
23 |       " [ 0.]]\n"
24 |      ]
25 |     }
26 |    ],
27 |    "source": [
28 |     "# scipi liner algebra\n",
29 |     "import numpy as np\n",
30 |     "from scipy import linalg\n",
31 |     "\n",
32 |     "A = np.array([[1, 1], [2, 3]])\n",
33 |     "print(\"A array\")\n",
34 |     "print(A)\n",
35 |     "\n",
36 |     "b = np.array([[1], [2]])\n",
37 |     "print(\"b array\")\n",
38 |     "print(b)\n",
39 |     "\n",
40 |     "solution = np.linalg.solve(A, b) \n",
41 |     "print(\"solution \")\n",
42 |     "print(solution)\n",
43 |     "\n",
44 |     "# validate results\n",
45 |     "print(\"validation of solution (should be a 0 matrix)\")\n",
46 |     "print(A.dot(solution) - b)"
47 |    ]
48 |   },
49 |   {
50 |    "cell_type": "code",
51 |    "execution_count": null,
52 |    "metadata": {
53 |     "collapsed": true
54 |    },
55 |    "outputs": [],
56 |    "source": []
57 |   }
58 |  ],
59 |  "metadata": {
60 |   "kernelspec": {
61 |    "display_name": "Python 3",
62 |    "language": "python",
63 |    "name": "python3"
64 |   },
65 |   "language_info": {
66 |    "codemirror_mode": {
67 |     "name": "ipython",
68 |     "version": 3
69 |    },
70 |    "file_extension": ".py",
71 |    "mimetype": "text/x-python",
72 |    "name": "python",
73 |    "nbconvert_exporter": "python",
74 |    "pygments_lexer": "ipython3",
75 |    "version": "3.6.0"
76 |   }
77 |  },
78 |  "nbformat": 4,
79 |  "nbformat_minor": 2
80 | }
81 | 


--------------------------------------------------------------------------------
/Chapter03/4 Plotly.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import plotly\n",
 12 |     "import plotly.graph_objs as go\n",
 13 |     "import plotly.plotly as py\n",
 14 |     "import pandas as pd\n",
 15 |     "import numpy as np\n",
 16 |     "\n",
 17 |     "#plotly.tools.set_credentials_file(username='DemoAccount', api_key='lr1c37zw81')\n",
 18 |     "plotly.tools.set_config_file(world_readable=True, sharing='public')"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 7,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "name": "stdout",
 28 |      "output_type": "stream",
 29 |      "text": [
 30 |       "(6110,)\n"
 31 |      ]
 32 |     }
 33 |    ],
 34 |    "source": [
 35 |     "# load voting summary from other project\n",
 36 |     "from_counts = np.load(\"Documents/from_counts.npy\")\n",
 37 |     "print(from_counts.shape)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 8,
 43 |    "metadata": {
 44 |     "collapsed": true
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "#plotly expects a list\n",
 49 |     "from_count_list = []\n",
 50 |     "for from_count in from_counts:\n",
 51 |     "    from_count_list.append(from_count)\n",
 52 |     "    \n",
 53 |     "data = [go.Histogram(x=from_count_list)]"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 9,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "name": "stdout",
 63 |      "output_type": "stream",
 64 |      "text": [
 65 |       "High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~prasadrpackt/0 or inside your plot.ly account where it is named 'basic histogram'\n"
 66 |      ]
 67 |     },
 68 |     {
 69 |      "data": {
 70 |       "text/html": [
 71 |        "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~prasadrpackt/0.embed\" height=\"525px\" width=\"100%\"></iframe>"
 72 |       ],
 73 |       "text/plain": [
 74 |        "<plotly.tools.PlotlyDisplay object>"
 75 |       ]
 76 |      },
 77 |      "execution_count": 9,
 78 |      "metadata": {},
 79 |      "output_type": "execute_result"
 80 |     }
 81 |    ],
 82 |    "source": [
 83 |     "# plot on plot.ly site\n",
 84 |     "py.iplot(data, filename='basic histogram')"
 85 |    ]
 86 |   }
 87 |  ],
 88 |  "metadata": {
 89 |   "kernelspec": {
 90 |    "display_name": "Python 3",
 91 |    "language": "python",
 92 |    "name": "python3"
 93 |   },
 94 |   "language_info": {
 95 |    "codemirror_mode": {
 96 |     "name": "ipython",
 97 |     "version": 3
 98 |    },
 99 |    "file_extension": ".py",
100 |    "mimetype": "text/x-python",
101 |    "name": "python",
102 |    "nbconvert_exporter": "python",
103 |    "pygments_lexer": "ipython3",
104 |    "version": "3.6.0"
105 |   }
106 |  },
107 |  "nbformat": 4,
108 |  "nbformat_minor": 1
109 | }
110 | 


--------------------------------------------------------------------------------
/Chapter03/5 State Map.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "ename": "ModuleNotFoundError",
 10 |      "evalue": "No module named 'mpl_toolkits.basemap'",
 11 |      "output_type": "error",
 12 |      "traceback": [
 13 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 14 |       "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
 15 |       "\u001b[0;32m<ipython-input-1-c74fd3d64882>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[1;32mfrom\u001b[0m \u001b[0mmpl_toolkits\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbasemap\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mBasemap\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpatches\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mPolygon\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 16 |       "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'mpl_toolkits.basemap'"
 17 |      ]
 18 |     }
 19 |    ],
 20 |    "source": [
 21 |     "%matplotlib inline\n",
 22 |     "\n",
 23 |     "import matplotlib.pyplot as plt\n",
 24 |     "from mpl_toolkits.basemap import Basemap\n",
 25 |     "from matplotlib.patches import Polygon\n",
 26 |     "import pandas as pd\n",
 27 |     "import numpy as np\n",
 28 |     "import matplotlib\n",
 29 |     "\n",
 30 |     "# create the map\n",
 31 |     "map = Basemap(llcrnrlon=-119,llcrnrlat=22,urcrnrlon=-64,urcrnrlat=49,\n",
 32 |     "        projection='lcc',lat_1=33,lat_2=45,lon_0=-95)\n",
 33 |     "\n",
 34 |     "# load the shapefile, use the name 'states'\n",
 35 |     "# download from https://github.com/matplotlib/basemap/tree/master/examples/st99_d00.dbf,shx,shp\n",
 36 |     "map.readshapefile('st99_d00', name='states', drawbounds=True)\n",
 37 |     "\n",
 38 |     "# collect the state names from the shapefile attributes so we can\n",
 39 |     "# look up the shape obect for a state by it's name\n",
 40 |     "state_names = []\n",
 41 |     "for shape_dict in map.states_info:\n",
 42 |     "    state_name = shape_dict['NAME']\n",
 43 |     "    state_names.append(state_name)\n",
 44 |     "    #print(state_name)\n",
 45 |     "#print(state_names)\n",
 46 |     "\n",
 47 |     "# get axes\n",
 48 |     "ax = plt.gca()\n",
 49 |     "\n",
 50 |     "# load density data drawn from \n",
 51 |     "# https://en.wikipedia.org/wiki/List_of_U.S._states_by_population_density\n",
 52 |     "df = pd.read_csv('states.csv')\n",
 53 |     "print(df.head())\n",
 54 |     "\n",
 55 |     "# determine the range of density values\n",
 56 |     "max_density = -1.0\n",
 57 |     "min_density = -1.0\n",
 58 |     "for index, row in df.iterrows():\n",
 59 |     "    d = row['density/mi2']\n",
 60 |     "    density = float(d.replace(',' , ''))\n",
 61 |     "    if (max_density==-1.0) or (max_density<density):\n",
 62 |     "        max_density = density\n",
 63 |     "    if (min_density==-1.0) or (min_density>density):\n",
 64 |     "        min_density = density\n",
 65 |     "print('max',max_density)\n",
 66 |     "print('min',min_density)\n",
 67 |     "range_density = max_density - min_density\n",
 68 |     "print('range',range_density)\n",
 69 |     "\n",
 70 |     "# we pick a color for the state density out of red spectrum\n",
 71 |     "cmap = matplotlib.cm.get_cmap('Spectral')\n",
 72 |     "\n",
 73 |     "# for each state get the color for it's density\n",
 74 |     "for index, row in df.iterrows():\n",
 75 |     "    state_name = row['State']\n",
 76 |     "    #print('Looking for state ', state_name)\n",
 77 |     "    d = row['density/mi2']\n",
 78 |     "    density = float(d.replace(',' , ''))\n",
 79 |     "    color = cmap((density - min_density)/range_density)\n",
 80 |     "    #print state_name, d, density, color\n",
 81 |     "    seg = map.states[state_names.index(state_name)]\n",
 82 |     "    poly = Polygon(seg, facecolor=color, edgecolor=color)\n",
 83 |     "    ax.add_patch(poly)\n",
 84 |     "\n",
 85 |     "plt.show()"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {
 92 |     "collapsed": true
 93 |    },
 94 |    "outputs": [],
 95 |    "source": []
 96 |   }
 97 |  ],
 98 |  "metadata": {
 99 |   "kernelspec": {
100 |    "display_name": "Python 3",
101 |    "language": "python",
102 |    "name": "python3"
103 |   },
104 |   "language_info": {
105 |    "codemirror_mode": {
106 |     "name": "ipython",
107 |     "version": 3
108 |    },
109 |    "file_extension": ".py",
110 |    "mimetype": "text/x-python",
111 |    "name": "python",
112 |    "nbconvert_exporter": "python",
113 |    "pygments_lexer": "ipython3",
114 |    "version": "3.6.0"
115 |   }
116 |  },
117 |  "nbformat": 4,
118 |  "nbformat_minor": 1
119 | }
120 | 


--------------------------------------------------------------------------------
/Chapter03/6 web scraping.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 2,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "name": "stdout",
10 |      "output_type": "stream",
11 |      "text": [
12 |       "Retrieving prices for AMD from https://www.google.com/finance/historical?q=NASDAQ:AMD\n",
13 |       "The last day of pricing information we have is: {'date': 'Sep 28, 2017', 'open': '12.76', 'high': '12.84', 'low': '12.55', 'close': '12.74', 'volume': '35,812,311'}\n"
14 |      ]
15 |     }
16 |    ],
17 |    "source": [
18 |     "# web scraping\n",
19 |     "from lxml import html  \n",
20 |     "import requests\n",
21 |     "from time import sleep\n",
22 |     "\n",
23 |     "# setup the URL for the symbol we are interested in\n",
24 |     "exchange = \"NASDAQ\"\n",
25 |     "ticker = \"AMD\"\n",
26 |     "url = \"https://www.google.com/finance/historical?q=%s:%s\"%(exchange,ticker)\n",
27 |     "\n",
28 |     "# retrieve the web page\n",
29 |     "response = requests.get(url)\n",
30 |     "print(\"Retrieving prices for %s from %s\"%(ticker,url))\n",
31 |     "# give it a few seconds in case there is some delay\n",
32 |     "sleep(3)\n",
33 |     "\n",
34 |     "# convert the text into an HTML Document\n",
35 |     "parser = html.fromstring(response.text)\n",
36 |     "\n",
37 |     "# find the HTML DIV tag that has id 'prices'\n",
38 |     "price_store = parser.get_element_by_id(\"prices\")\n",
39 |     "\n",
40 |     "# we will store our price information in the price_data list\n",
41 |     "price_data = []\n",
42 |     "\n",
43 |     "# find the HTML TABLE element within the prices DIV\n",
44 |     "for table in price_store:\n",
45 |     "\n",
46 |     "    #every row (skip first row headings) of table has\n",
47 |     "    #  date, open, high, low, close, volume\n",
48 |     "    for row in table[1:]:\n",
49 |     "\n",
50 |     "        #store tuples for a day together\n",
51 |     "        day = {\"date\":row[0].text.strip('\\n'), \\\n",
52 |     "               \"open\":row[1].text.strip('\\n'), \\\n",
53 |     "               \"high\":row[2].text.strip('\\n'), \\\n",
54 |     "               \"low\":row[3].text.strip('\\n'), \\\n",
55 |     "               \"close\":row[4].text.strip('\\n'), \\\n",
56 |     "               \"volume\":row[5].text.strip('\\n')}\n",
57 |     "        \n",
58 |     "        #add day's information to our set\n",
59 |     "        price_data.append(day)\n",
60 |     "\n",
61 |     "print(\"The last day of pricing information we have is:\", price_data[0])"
62 |    ]
63 |   }
64 |  ],
65 |  "metadata": {
66 |   "kernelspec": {
67 |    "display_name": "Python 3",
68 |    "language": "python",
69 |    "name": "python3"
70 |   },
71 |   "language_info": {
72 |    "codemirror_mode": {
73 |     "name": "ipython",
74 |     "version": 3
75 |    },
76 |    "file_extension": ".py",
77 |    "mimetype": "text/x-python",
78 |    "name": "python",
79 |    "nbconvert_exporter": "python",
80 |    "pygments_lexer": "ipython3",
81 |    "version": "3.6.2"
82 |   }
83 |  },
84 |  "nbformat": 4,
85 |  "nbformat_minor": 2
86 | }
87 | 


--------------------------------------------------------------------------------
/Chapter03/files/from_counts.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Jupyter-for-Data-Science/1e96065e582cacf8a77db60ed15eecbf047e55cf/Chapter03/files/from_counts.npy


--------------------------------------------------------------------------------
/Chapter03/files/st99_d00.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Jupyter-for-Data-Science/1e96065e582cacf8a77db60ed15eecbf047e55cf/Chapter03/files/st99_d00.dbf


--------------------------------------------------------------------------------
/Chapter03/files/states.csv:
--------------------------------------------------------------------------------
 1 | State,rank,density/mi2,density/km2,pop_rank,2015_pop,land_rank,area_mi2,area_km2
 2 | New Jersey,1,"1,218",470,11,"8,958,013",46,"7,354","19,046.80"
 3 | Rhode Island,2,"1,021",394,43,"1,056,298",50,"1,034","2,678.00"
 4 | Massachusetts,3,871,336,15,"6,794,422",45,"7,800","20,201.90"
 5 | Connecticut,4,741,286,29,"3,590,886",48,"4,842","12,540.70"
 6 | Maryland,5,618,238,19,"6,006,401",42,"9,707","25,141.00"
 7 | Delaware,6,485,187,45,"945,934",49,"1,949","5,047.90"
 8 | New York,7,420,162,4,"19,795,791",30,"47,126","122,055.80"
 9 | Florida,8,378,145,3,"20,271,272",26,"53,625","138,888.10"
10 | Pennsylvania,9,286,110,6,"12,802,503",32,"44,743","115,883.80"
11 | Ohio,10,284,109,7,"11,613,423",35,"40,861","105,829.50"
12 | North Carolina,11,254,98,9,"10,042,802",36,"39,490","102,278.60"
13 | California,12,251,97,1,"39,144,818",3,"163,696","423,970.70"
14 | Illinois,13,231,89,5,"12,859,995",24,"55,519","143,793.50"
15 | Hawaii,14,222,86,40,"1,431,603",47,"6,423","16,635.50"
16 | Indiana,15,184,71,16,"6,619,680",38,"35,826","92,788.90"
17 | Georgia,16,177,68,8,"10,214,860",21,"57,513","148,958.00"
18 | Michigan,17,175,67,10,"9,922,576",22,"56,539","146,435.30"
19 | Virginia,18,172,66,12,"8,382,993",29,"48,618","125,920.00"
20 | South Carolina,19,162,62,23,"4,896,146",40,"30,061","77,857.60"
21 | Tennessee,20,160,61,17,"6,600,299",34,"41,235","106,798.20"
22 | New Hampshire,21,148,57,41,"1,330,608",44,"8,953","23,188.20"
23 | Kentucky,22,112,43,26,"4,425,092",37,"39,486","102,268.30"
24 | Louisiana,23,108,41,25,"4,670,724",33,"43,204","111,897.80"
25 | Washington,24,107,41,13,"7,170,351",20,"66,456","172,120.20"
26 | Wisconsin,25,106,41,20,"5,771,337",25,"54,158","140,268.60"
27 | Texas,26,105,40,2,"27,469,114",2,"261,232","676,587.80"
28 | Alabama,27,95,37,24,"4,858,979",28,"50,645","131,169.90"
29 | Missouri,28,88,34,18,"6,083,672",18,"68,742","178,041.00"
30 | West Virginia,29,76,29,38,"1,844,128",41,"24,038","62,258.10"
31 | Minnesota,30,68,26,21,"5,489,594",14,"79,627","206,233.00"
32 | Vermont,31,67,26,49,"626,042",43,"9,217","23,871.90"
33 | Mississippi,32,63,24,32,"2,992,333",31,"46,923","121,530.00"
34 | Arizona,33,60,23,14,"6,828,065",6,"113,594","294,207.10"
35 | Arkansas,34,57,22,33,"2,978,204",27,"52,035","134,770.00"
36 | Oklahoma,35,57,22,28,"3,911,338",19,"68,595","177,660.20"
37 | Iowa,36,55,21,30,"3,123,899",23,"55,857","144,669.00"
38 | Colorado,37,52,20,22,"5,456,574",8,"103,642","268,431.50"
39 | Maine,38,43,16,42,"1,329,328",39,"30,843","79,883.00"
40 | Oregon,39,41,16,27,"4,028,977",10,"95,988","248,607.80"
41 | Utah,40,36,14,31,"2,995,919",12,"82,170","212,819.30"
42 | Kansas,41,36,14,34,"2,911,641",13,"81,759","211,754.80"
43 | Nevada,42,26,10,35,"2,890,845",7,"109,781","284,331.50"
44 | Nebraska,43,24,9,37,"1,896,190",15,"76,824","198,973.20"
45 | Idaho,44,20,7,39,"1,654,930",11,"82,643","214,044.40"
46 | New Mexico,45,17,6,36,"2,085,109",5,"121,298","314,160.40"
47 | South Dakota,46,11,4,46,"858,469",16,"75,811","196,349.60"
48 | North Dakota,47,10,4,47,"756,927",17,"69,001","178,711.80"
49 | Montana,48,7,2,44,"1,032,949",4,"145,546","376,962.40"
50 | Wyoming,49,6,2,50,"586,107",9,"97,093","251,469.70"
51 | Alaska,50,1,0,48,"738,432",1,"570,641","1,477,953.40"
52 | 


--------------------------------------------------------------------------------
/Chapter04/1 Spark Total Line Lengths.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "# using spark\n",
10 |     "from pyspark import SparkContext\n",
11 |     "sc = SparkContext.getOrCreate()\n",
12 |     "\n",
13 |     "lines = sc.textFile(\"Documents/1 Spark Total Line Lengths.ipynb\")\n",
14 |     "lineLengths = lines.map(lambda s: len(s))\n",
15 |     "totalLength = lineLengths.reduce(lambda a, b: a + b)\n",
16 |     "print(totalLength)"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": null,
22 |    "metadata": {
23 |     "collapsed": true
24 |    },
25 |    "outputs": [],
26 |    "source": []
27 |   }
28 |  ],
29 |  "metadata": {
30 |   "kernelspec": {
31 |    "display_name": "Python 3",
32 |    "language": "python",
33 |    "name": "python3"
34 |   },
35 |   "language_info": {
36 |    "codemirror_mode": {
37 |     "name": "ipython",
38 |     "version": 3
39 |    },
40 |    "file_extension": ".py",
41 |    "mimetype": "text/x-python",
42 |    "name": "python",
43 |    "nbconvert_exporter": "python",
44 |    "pygments_lexer": "ipython3",
45 |    "version": "3.6.0"
46 |   }
47 |  },
48 |  "nbformat": 4,
49 |  "nbformat_minor": 2
50 | }
51 | 


--------------------------------------------------------------------------------
/Chapter04/2 Spark File Words.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "import pyspark\n",
12 |     "if not 'sc' in globals():\n",
13 |     "    sc = pyspark.SparkContext()\n",
14 |     "\n",
15 |     "text_file = sc.textFile(\"2 Spark File Words.ipynb\")\n",
16 |     "counts = text_file.flatMap(lambda line: line.split(\" \")) \\\n",
17 |     "             .map(lambda word: (word, 1)) \\\n",
18 |     "             .reduceByKey(lambda a, b: a + b)\n",
19 |     "for x in counts.collect():\n",
20 |     "    print x  "
21 |    ]
22 |   }
23 |  ],
24 |  "metadata": {
25 |   "kernelspec": {
26 |    "display_name": "Python 3",
27 |    "language": "python",
28 |    "name": "python3"
29 |   },
30 |   "language_info": {
31 |    "codemirror_mode": {
32 |     "name": "ipython",
33 |     "version": 3
34 |    },
35 |    "file_extension": ".py",
36 |    "mimetype": "text/x-python",
37 |    "name": "python",
38 |    "nbconvert_exporter": "python",
39 |    "pygments_lexer": "ipython3",
40 |    "version": "3.6.0"
41 |   }
42 |  },
43 |  "nbformat": 4,
44 |  "nbformat_minor": 2
45 | }
46 | 


--------------------------------------------------------------------------------
/Chapter04/3 Spark SQL.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "+------+-------+-------+------+---------+---------+-------+-------+----+-----+\n",
 13 |       "|ACTUAL|PREDICT|COUNTRY|REGION| DIVISION| PRODTYPE|PRODUCT|QUARTER|YEAR|MONTH|\n",
 14 |       "+------+-------+-------+------+---------+---------+-------+-------+----+-----+\n",
 15 |       "|   925|    850| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      1|1993|12054|\n",
 16 |       "|   999|    297| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      1|1993|12085|\n",
 17 |       "|   608|    846| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      1|1993|12113|\n",
 18 |       "|   642|    533| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      2|1993|12144|\n",
 19 |       "|   656|    646| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      2|1993|12174|\n",
 20 |       "|   948|    486| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      2|1993|12205|\n",
 21 |       "|   612|    717| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      3|1993|12235|\n",
 22 |       "|   114|    564| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      3|1993|12266|\n",
 23 |       "|   685|    230| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      3|1993|12297|\n",
 24 |       "|   657|    494| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      4|1993|12327|\n",
 25 |       "|   608|    903| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      4|1993|12358|\n",
 26 |       "|   353|    266| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      4|1993|12388|\n",
 27 |       "|   107|    190| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      1|1994|12419|\n",
 28 |       "|   354|    139| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      1|1994|12450|\n",
 29 |       "|   101|    217| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      1|1994|12478|\n",
 30 |       "|   553|    560| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      2|1994|12509|\n",
 31 |       "|   877|    148| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      2|1994|12539|\n",
 32 |       "|   431|    762| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      2|1994|12570|\n",
 33 |       "|   511|    457| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      3|1994|12600|\n",
 34 |       "|   157|    532| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      3|1994|12631|\n",
 35 |       "+------+-------+-------+------+---------+---------+-------+-------+----+-----+\n",
 36 |       "only showing top 20 rows\n",
 37 |       "\n"
 38 |      ]
 39 |     }
 40 |    ],
 41 |    "source": [
 42 |     "from pyspark import SparkContext\n",
 43 |     "from pyspark.sql import SparkSession \n",
 44 |     "\n",
 45 |     "sc = SparkContext.getOrCreate()\n",
 46 |     "spark = SparkSession(sc)\n",
 47 |     "\n",
 48 |     "df = spark.read.format(\"csv\") \\\n",
 49 |     "        .option(\"header\", \"true\") \\\n",
 50 |     "        .load(\"productsales.csv\");\n",
 51 |     "df.show()"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 2,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "name": "stdout",
 61 |      "output_type": "stream",
 62 |      "text": [
 63 |       "+-------+-----+\n",
 64 |       "|PRODUCT|count|\n",
 65 |       "+-------+-----+\n",
 66 |       "|  CHAIR|  288|\n",
 67 |       "|    BED|  288|\n",
 68 |       "|  TABLE|  288|\n",
 69 |       "|   SOFA|  288|\n",
 70 |       "|   DESK|  288|\n",
 71 |       "+-------+-----+\n",
 72 |       "\n"
 73 |      ]
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "df.groupBy(\"PRODUCT\").count().show()"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 3,
 83 |    "metadata": {},
 84 |    "outputs": [
 85 |     {
 86 |      "name": "stdout",
 87 |      "output_type": "stream",
 88 |      "text": [
 89 |       "+------+-------+-------+------+---------+---------+-------+-------+----+-----+\n",
 90 |       "|ACTUAL|PREDICT|COUNTRY|REGION| DIVISION| PRODTYPE|PRODUCT|QUARTER|YEAR|MONTH|\n",
 91 |       "+------+-------+-------+------+---------+---------+-------+-------+----+-----+\n",
 92 |       "|   925|    850| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      1|1993|12054|\n",
 93 |       "|   999|    297| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      1|1993|12085|\n",
 94 |       "|   642|    533| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      2|1993|12144|\n",
 95 |       "|   656|    646| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      2|1993|12174|\n",
 96 |       "|   948|    486| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      2|1993|12205|\n",
 97 |       "|   685|    230| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      3|1993|12297|\n",
 98 |       "|   657|    494| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      4|1993|12327|\n",
 99 |       "|   353|    266| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      4|1993|12388|\n",
100 |       "|   354|    139| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      1|1994|12450|\n",
101 |       "|   877|    148| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      2|1994|12539|\n",
102 |       "|   511|    457| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      3|1994|12600|\n",
103 |       "|   277|      0| CANADA|  EAST|EDUCATION|FURNITURE|   SOFA|      4|1994|12723|\n",
104 |       "|   444|    267| CANADA|  EAST|EDUCATION|FURNITURE|    BED|      1|1993|12085|\n",
105 |       "|   329|    312| CANADA|  EAST|EDUCATION|FURNITURE|    BED|      2|1993|12174|\n",
106 |       "|   910|    531| CANADA|  EAST|EDUCATION|FURNITURE|    BED|      2|1993|12205|\n",
107 |       "|   515|    143| CANADA|  EAST|EDUCATION|FURNITURE|    BED|      3|1993|12297|\n",
108 |       "|   730|    126| CANADA|  EAST|EDUCATION|FURNITURE|    BED|      4|1993|12327|\n",
109 |       "|   993|    862| CANADA|  EAST|EDUCATION|FURNITURE|    BED|      4|1993|12358|\n",
110 |       "|   954|    754| CANADA|  EAST|EDUCATION|FURNITURE|    BED|      4|1993|12388|\n",
111 |       "|   991|    204| CANADA|  EAST|EDUCATION|FURNITURE|    BED|      1|1994|12478|\n",
112 |       "+------+-------+-------+------+---------+---------+-------+-------+----+-----+\n",
113 |       "only showing top 20 rows\n",
114 |       "\n"
115 |      ]
116 |     }
117 |    ],
118 |    "source": [
119 |     "df.filter(df['ACTUAL'] > df['PREDICT']).show()"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 4,
125 |    "metadata": {},
126 |    "outputs": [
127 |     {
128 |      "name": "stdout",
129 |      "output_type": "stream",
130 |      "text": [
131 |       "+------+-------+-------+------+---------+---------+-------+-------+----+-----+-----+\n",
132 |       "|ACTUAL|PREDICT|COUNTRY|REGION| DIVISION| PRODTYPE|PRODUCT|QUARTER|YEAR|MONTH| DIFF|\n",
133 |       "+------+-------+-------+------+---------+---------+-------+-------+----+-----+-----+\n",
134 |       "|   996|     50|GERMANY|  EAST|EDUCATION|   OFFICE|   DESK|      3|1993|12266|946.0|\n",
135 |       "|   972|     39| U.S.A.|  WEST|EDUCATION|   OFFICE|  TABLE|      2|1994|12570|933.0|\n",
136 |       "|   984|     65| U.S.A.|  EAST|EDUCATION|   OFFICE|   DESK|      1|1994|12419|919.0|\n",
137 |       "|   948|     50|GERMANY|  WEST|EDUCATION|FURNITURE|   SOFA|      4|1994|12723|898.0|\n",
138 |       "|   916|     18|GERMANY|  WEST| CONSUMER|FURNITURE|    BED|      4|1993|12327|898.0|\n",
139 |       "|   909|     15| CANADA|  EAST| CONSUMER|   OFFICE|   DESK|      3|1993|12235|894.0|\n",
140 |       "|   912|     23| CANADA|  EAST|EDUCATION|   OFFICE|   DESK|      1|1994|12450|889.0|\n",
141 |       "|   890|      7| CANADA|  EAST| CONSUMER|FURNITURE|    BED|      4|1993|12388|883.0|\n",
142 |       "|   900|     29|GERMANY|  WEST| CONSUMER|FURNITURE|   SOFA|      2|1993|12205|871.0|\n",
143 |       "|   991|    143|GERMANY|  WEST|EDUCATION|   OFFICE|   DESK|      2|1993|12144|848.0|\n",
144 |       "|   956|    111| CANADA|  EAST|EDUCATION|   OFFICE|  CHAIR|      4|1993|12327|845.0|\n",
145 |       "|   862|     18| CANADA|  WEST|EDUCATION|FURNITURE|   SOFA|      1|1994|12419|844.0|\n",
146 |       "|   862|     21| U.S.A.|  EAST|EDUCATION|   OFFICE|  CHAIR|      2|1994|12570|841.0|\n",
147 |       "|   847|     10| CANADA|  EAST| CONSUMER|FURNITURE|    BED|      2|1994|12570|837.0|\n",
148 |       "|   902|     72|GERMANY|  WEST| CONSUMER|FURNITURE|    BED|      1|1994|12450|830.0|\n",
149 |       "|   916|     88| U.S.A.|  EAST| CONSUMER|   OFFICE|  TABLE|      1|1994|12419|828.0|\n",
150 |       "|   943|    116|GERMANY|  EAST|EDUCATION|   OFFICE|   DESK|      1|1994|12478|827.0|\n",
151 |       "|   971|    147| CANADA|  WEST|EDUCATION|   OFFICE|  TABLE|      3|1994|12600|824.0|\n",
152 |       "|   896|     74|GERMANY|  EAST| CONSUMER|   OFFICE|  TABLE|      2|1994|12539|822.0|\n",
153 |       "|   906|     86| U.S.A.|  WEST|EDUCATION|   OFFICE|   DESK|      3|1993|12266|820.0|\n",
154 |       "+------+-------+-------+------+---------+---------+-------+-------+----+-----+-----+\n",
155 |       "only showing top 20 rows\n",
156 |       "\n"
157 |      ]
158 |     }
159 |    ],
160 |    "source": [
161 |     "#register dataframe as temp SQL table\n",
162 |     "df.createOrReplaceTempView(\"sales\")\n",
163 |     "\n",
164 |     "# pull the values by the difference calculated\n",
165 |     "sqlDF = spark.sql(\"SELECT *, ACTUAL-PREDICT as DIFF FROM sales ORDER BY DIFF desc\")\n",
166 |     "sqlDF.show()"
167 |    ]
168 |   }
169 |  ],
170 |  "metadata": {
171 |   "kernelspec": {
172 |    "display_name": "Python 3",
173 |    "language": "python",
174 |    "name": "python3"
175 |   },
176 |   "language_info": {
177 |    "codemirror_mode": {
178 |     "name": "ipython",
179 |     "version": 3
180 |    },
181 |    "file_extension": ".py",
182 |    "mimetype": "text/x-python",
183 |    "name": "python",
184 |    "nbconvert_exporter": "python",
185 |    "pygments_lexer": "ipython3",
186 |    "version": "3.6.0"
187 |   }
188 |  },
189 |  "nbformat": 4,
190 |  "nbformat_minor": 2
191 | }
192 | 


--------------------------------------------------------------------------------
/Chapter04/4 Join Dataframes.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 13,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from pyspark import SparkContext\n",
 12 |     "from pyspark.sql import SparkSession\n",
 13 |     "\n",
 14 |     "sc = SparkContext.getOrCreate()\n",
 15 |     "spark = SparkSession(sc) "
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 14,
 21 |    "metadata": {},
 22 |    "outputs": [
 23 |     {
 24 |      "name": "stdout",
 25 |      "output_type": "stream",
 26 |      "text": [
 27 |       "+---------+------------------+-------+\n",
 28 |       "|productid|       description|  price|\n",
 29 |       "+---------+------------------+-------+\n",
 30 |       "|     1001| \"Baby High Chair\"|  35.00|\n",
 31 |       "|     1002|   \"Kitchen Table\"| 120.00|\n",
 32 |       "|     1003|      \"Phone Desk\"|  20.00|\n",
 33 |       "+---------+------------------+-------+\n",
 34 |       "\n"
 35 |      ]
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "# load product set\n",
 40 |     "productDF = spark.read.format(\"csv\") \\\n",
 41 |     "        .option(\"header\", \"true\") \\\n",
 42 |     "        .load(\"product.csv\");\n",
 43 |     "productDF.show()\n",
 44 |     "productDF.createOrReplaceTempView(\"product\")"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 15,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "name": "stdout",
 54 |      "output_type": "stream",
 55 |      "text": [
 56 |       "+-------+-------------+\n",
 57 |       "|orderid|  \"orderdate\"|\n",
 58 |       "+-------+-------------+\n",
 59 |       "|   2001| '2017-07-04'|\n",
 60 |       "|   2002| '2017-07-08'|\n",
 61 |       "|   2003| '2017-07-09'|\n",
 62 |       "+-------+-------------+\n",
 63 |       "\n"
 64 |      ]
 65 |     }
 66 |    ],
 67 |    "source": [
 68 |     "# load order set\n",
 69 |     "orderDF = spark.read.format(\"csv\") \\\n",
 70 |     "        .option(\"header\", \"true\") \\\n",
 71 |     "        .load(\"order.csv\");\n",
 72 |     "orderDF.show()\n",
 73 |     "orderDF.createOrReplaceTempView(\"order\")"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 16,
 79 |    "metadata": {},
 80 |    "outputs": [
 81 |     {
 82 |      "name": "stdout",
 83 |      "output_type": "stream",
 84 |      "text": [
 85 |       "+-------+------------+-----------+\n",
 86 |       "|orderid| \"productid\"| \"quantity\"|\n",
 87 |       "+-------+------------+-----------+\n",
 88 |       "|   2001|        1001|          1|\n",
 89 |       "|   2001|        1002|          1|\n",
 90 |       "|   2002|        1003|          3|\n",
 91 |       "|   2003|        1002|          1|\n",
 92 |       "+-------+------------+-----------+\n",
 93 |       "\n"
 94 |      ]
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "# load order/product set\n",
 99 |     "orderproductDF = spark.read.format(\"csv\") \\\n",
100 |     "        .option(\"header\", \"true\") \\\n",
101 |     "        .load(\"orderproduct.csv\");\n",
102 |     "orderproductDF.show()\n",
103 |     "orderproductDF.createOrReplaceTempView(\"orderproduct\")"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 17,
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "name": "stdout",
113 |      "output_type": "stream",
114 |      "text": [
115 |       "+-------+------------+-----------+-------+-------------+\n",
116 |       "|orderid| \"productid\"| \"quantity\"|orderid|  \"orderdate\"|\n",
117 |       "+-------+------------+-----------+-------+-------------+\n",
118 |       "|   2001|        1001|          1|   2001| '2017-07-04'|\n",
119 |       "|   2001|        1002|          1|   2001| '2017-07-04'|\n",
120 |       "|   2002|        1003|          3|   2002| '2017-07-08'|\n",
121 |       "|   2003|        1002|          1|   2003| '2017-07-09'|\n",
122 |       "+-------+------------+-----------+-------+-------------+\n",
123 |       "\n"
124 |      ]
125 |     }
126 |    ],
127 |    "source": [
128 |     "# join the tables\n",
129 |     "joinedDF = spark.sql(\"SELECT * \" \\\n",
130 |     "                     \"FROM orderproduct \" \\\n",
131 |     "                     \"JOIN order ON order.orderid = orderproduct.orderid \" \\\n",
132 |     "                     \"ORDER BY order.orderid\")\n",
133 |     "joinedDF.show()"
134 |    ]
135 |   }
136 |  ],
137 |  "metadata": {
138 |   "kernelspec": {
139 |    "display_name": "Python 3",
140 |    "language": "python",
141 |    "name": "python3"
142 |   },
143 |   "language_info": {
144 |    "codemirror_mode": {
145 |     "name": "ipython",
146 |     "version": 3
147 |    },
148 |    "file_extension": ".py",
149 |    "mimetype": "text/x-python",
150 |    "name": "python",
151 |    "nbconvert_exporter": "python",
152 |    "pygments_lexer": "ipython3",
153 |    "version": "3.6.0"
154 |   }
155 |  },
156 |  "nbformat": 4,
157 |  "nbformat_minor": 2
158 | }
159 | 


--------------------------------------------------------------------------------
/Chapter04/5 People JSON.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 11,
  6 |    "metadata": {
  7 |     "collapsed": true,
  8 |     "scrolled": true
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "from pyspark import SparkContext\n",
 13 |     "from pyspark.sql import SparkSession \n",
 14 |     "\n",
 15 |     "sc = SparkContext.getOrCreate()\n",
 16 |     "spark = SparkSession(sc)"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 12,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "name": "stdout",
 26 |      "output_type": "stream",
 27 |      "text": [
 28 |       "+----+----+--------------------+---------------+\n",
 29 |       "| age|born|                fame|           name|\n",
 30 |       "+----+----+--------------------+---------------+\n",
 31 |       "|null|null|                null|        Michael|\n",
 32 |       "|  30|null|                null|           Andy|\n",
 33 |       "|  19|null|                null|         Justin|\n",
 34 |       "|null|1955|co-founder of App...|     Steve Jobs|\n",
 35 |       "|null|1955|                null|Tim Berners-Lee|\n",
 36 |       "|null|1815|                null|   George Boole|\n",
 37 |       "+----+----+--------------------+---------------+\n",
 38 |       "\n"
 39 |      ]
 40 |     }
 41 |    ],
 42 |    "source": [
 43 |     "#using some data from file from https://gist.github.com/marktyers/678711152b8dd33f6346\n",
 44 |     "df = spark.read.json(\"people.json\")\n",
 45 |     "df.show()"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 13,
 51 |    "metadata": {},
 52 |    "outputs": [
 53 |     {
 54 |      "name": "stdout",
 55 |      "output_type": "stream",
 56 |      "text": [
 57 |       "root\n",
 58 |       " |-- age: long (nullable = true)\n",
 59 |       " |-- born: long (nullable = true)\n",
 60 |       " |-- fame: string (nullable = true)\n",
 61 |       " |-- name: string (nullable = true)\n",
 62 |       "\n"
 63 |      ]
 64 |     }
 65 |    ],
 66 |    "source": [
 67 |     "df.printSchema()"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 14,
 73 |    "metadata": {},
 74 |    "outputs": [
 75 |     {
 76 |      "name": "stdout",
 77 |      "output_type": "stream",
 78 |      "text": [
 79 |       "+---------------+\n",
 80 |       "|           name|\n",
 81 |       "+---------------+\n",
 82 |       "|        Michael|\n",
 83 |       "|           Andy|\n",
 84 |       "|         Justin|\n",
 85 |       "|     Steve Jobs|\n",
 86 |       "|Tim Berners-Lee|\n",
 87 |       "|   George Boole|\n",
 88 |       "+---------------+\n",
 89 |       "\n"
 90 |      ]
 91 |     }
 92 |    ],
 93 |    "source": [
 94 |     "df.registerTempTable(\"people\")\n",
 95 |     "spark.sql(\"select name from people\").show()"
 96 |    ]
 97 |   }
 98 |  ],
 99 |  "metadata": {
100 |   "kernelspec": {
101 |    "display_name": "Python 3",
102 |    "language": "python",
103 |    "name": "python3"
104 |   },
105 |   "language_info": {
106 |    "codemirror_mode": {
107 |     "name": "ipython",
108 |     "version": 3
109 |    },
110 |    "file_extension": ".py",
111 |    "mimetype": "text/x-python",
112 |    "name": "python",
113 |    "nbconvert_exporter": "python",
114 |    "pygments_lexer": "ipython3",
115 |    "version": "3.6.0"
116 |   }
117 |  },
118 |  "nbformat": 4,
119 |  "nbformat_minor": 2
120 | }
121 | 


--------------------------------------------------------------------------------
/Chapter04/6 Spark Pivot.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 19,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from pyspark import SparkContext\n",
 12 |     "from pyspark.sql import SparkSession\n",
 13 |     "from pyspark.sql import functions as func\n",
 14 |     "\n",
 15 |     "sc = SparkContext.getOrCreate()\n",
 16 |     "spark = SparkSession(sc)"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 20,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "name": "stdout",
 26 |      "output_type": "stream",
 27 |      "text": [
 28 |       "+-----+----+-----+\n",
 29 |       "|stock|year|price|\n",
 30 |       "+-----+----+-----+\n",
 31 |       "|  IBM|2012|  100|\n",
 32 |       "| MSFT|2012|   45|\n",
 33 |       "|  IBM|2012|  105|\n",
 34 |       "|  IBM|2013|  144|\n",
 35 |       "| MSFT|2013|   47|\n",
 36 |       "+-----+----+-----+\n",
 37 |       "\n"
 38 |      ]
 39 |     }
 40 |    ],
 41 |    "source": [
 42 |     "# load product set\n",
 43 |     "pivotDF = spark.read.format(\"csv\") \\\n",
 44 |     "        .option(\"header\", \"true\") \\\n",
 45 |     "        .load(\"pivot.csv\");\n",
 46 |     "pivotDF.show()\n",
 47 |     "pivotDF.createOrReplaceTempView(\"pivot\")"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 21,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "+-----+-----+-----+\n",
 60 |       "|stock| 2012| 2013|\n",
 61 |       "+-----+-----+-----+\n",
 62 |       "|  IBM|102.5|144.0|\n",
 63 |       "| MSFT| 45.0| 47.0|\n",
 64 |       "+-----+-----+-----+\n",
 65 |       "\n"
 66 |      ]
 67 |     }
 68 |    ],
 69 |    "source": [
 70 |     "# pivot data per the year to get average prices per stock per year\n",
 71 |     "pivotDF \\\n",
 72 |     "    .groupBy(\"stock\") \\\n",
 73 |     "    .pivot(\"year\",[2012,2013]) \\\n",
 74 |     "    .agg(func.avg(\"price\")) \\\n",
 75 |     "    .show()"
 76 |    ]
 77 |   }
 78 |  ],
 79 |  "metadata": {
 80 |   "kernelspec": {
 81 |    "display_name": "Python 3",
 82 |    "language": "python",
 83 |    "name": "python3"
 84 |   },
 85 |   "language_info": {
 86 |    "codemirror_mode": {
 87 |     "name": "ipython",
 88 |     "version": 3
 89 |    },
 90 |    "file_extension": ".py",
 91 |    "mimetype": "text/x-python",
 92 |    "name": "python",
 93 |    "nbconvert_exporter": "python",
 94 |    "pygments_lexer": "ipython3",
 95 |    "version": "3.6.0"
 96 |   }
 97 |  },
 98 |  "nbformat": 4,
 99 |  "nbformat_minor": 2
100 | }
101 | 


--------------------------------------------------------------------------------
/Chapter04/files/order.csv:
--------------------------------------------------------------------------------
1 | "orderid", "orderdate"
2 | 2001, '2017-07-04'
3 | 2002, '2017-07-08'
4 | 2003, '2017-07-09'
5 | 


--------------------------------------------------------------------------------
/Chapter04/files/orderproduct.csv:
--------------------------------------------------------------------------------
1 | "orderid", "productid", "quantity"
2 | 2001, 1001, 1
3 | 2001, 1002, 1
4 | 2002, 1003, 3
5 | 2003, 1002, 1
6 | 


--------------------------------------------------------------------------------
/Chapter04/files/people.json:
--------------------------------------------------------------------------------
1 | {"name":"Michael"}
2 | {"name":"Andy", "age":30}
3 | {"name":"Justin", "age":19}
4 | {"born":1955,"fame":"co-founder of Apple Computers","name" : "Steve Jobs"}
5 | {"born":1955,"name":"Tim Berners-Lee"}
6 | {"born":1815,"name":"George Boole"}


--------------------------------------------------------------------------------
/Chapter04/files/pivot.csv:
--------------------------------------------------------------------------------
1 | stock,year,price
2 | IBM,2012,100
3 | MSFT,2012,45
4 | IBM,2012,105
5 | IBM,2013,144
6 | MSFT,2013,47


--------------------------------------------------------------------------------
/Chapter04/files/product.csv:
--------------------------------------------------------------------------------
1 | "productid","description","price"
2 | 1001, "Baby High Chair", 35.00
3 | 1002, "Kitchen Table", 120.00
4 | 1003, "Phone Desk", 20.00
5 | 


--------------------------------------------------------------------------------
/Chapter05/1 Elections Data using R.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "printf <- function(...)print(sprintf(...))"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 7,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "data": {
 21 |       "text/html": [
 22 |        "<table>\n",
 23 |        "<thead><tr><th scope=col>age</th><th scope=col>Clinton</th><th scope=col>Trump</th></tr></thead>\n",
 24 |        "<tbody>\n",
 25 |        "\t<tr><td>18-24 years old</td><td>56             </td><td>35             </td></tr>\n",
 26 |        "\t<tr><td>25-29 years old</td><td>53             </td><td>39             </td></tr>\n",
 27 |        "\t<tr><td>30-39 years old</td><td>51             </td><td>40             </td></tr>\n",
 28 |        "\t<tr><td>40-49 years old</td><td>46             </td><td>50             </td></tr>\n",
 29 |        "\t<tr><td>50-64 years old</td><td>44             </td><td>53             </td></tr>\n",
 30 |        "\t<tr><td>65 and older   </td><td>45             </td><td>53             </td></tr>\n",
 31 |        "</tbody>\n",
 32 |        "</table>\n"
 33 |       ],
 34 |       "text/latex": [
 35 |        "\\begin{tabular}{r|lll}\n",
 36 |        " age & Clinton & Trump\\\\\n",
 37 |        "\\hline\n",
 38 |        "\t 18-24 years old & 56              & 35             \\\\\n",
 39 |        "\t 25-29 years old & 53              & 39             \\\\\n",
 40 |        "\t 30-39 years old & 51              & 40             \\\\\n",
 41 |        "\t 40-49 years old & 46              & 50             \\\\\n",
 42 |        "\t 50-64 years old & 44              & 53             \\\\\n",
 43 |        "\t 65 and older    & 45              & 53             \\\\\n",
 44 |        "\\end{tabular}\n"
 45 |       ],
 46 |       "text/markdown": [
 47 |        "\n",
 48 |        "age | Clinton | Trump | \n",
 49 |        "|---|---|---|---|---|---|\n",
 50 |        "| 18-24 years old | 56              | 35              | \n",
 51 |        "| 25-29 years old | 53              | 39              | \n",
 52 |        "| 30-39 years old | 51              | 40              | \n",
 53 |        "| 40-49 years old | 46              | 50              | \n",
 54 |        "| 50-64 years old | 44              | 53              | \n",
 55 |        "| 65 and older    | 45              | 53              | \n",
 56 |        "\n",
 57 |        "\n"
 58 |       ],
 59 |       "text/plain": [
 60 |        "  age             Clinton Trump\n",
 61 |        "1 18-24 years old 56      35   \n",
 62 |        "2 25-29 years old 53      39   \n",
 63 |        "3 30-39 years old 51      40   \n",
 64 |        "4 40-49 years old 46      50   \n",
 65 |        "5 50-64 years old 44      53   \n",
 66 |        "6 65 and older    45      53   "
 67 |       ]
 68 |      },
 69 |      "metadata": {},
 70 |      "output_type": "display_data"
 71 |     }
 72 |    ],
 73 |    "source": [
 74 |     "age <- read.csv(\"age.tsv\", sep=\"\\t\")\n",
 75 |     "head(age)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 8,
 81 |    "metadata": {},
 82 |    "outputs": [
 83 |     {
 84 |      "data": {
 85 |       "text/html": [
 86 |        "<table>\n",
 87 |        "<thead><tr><th scope=col>education</th><th scope=col>Clinton</th><th scope=col>Trump</th></tr></thead>\n",
 88 |        "<tbody>\n",
 89 |        "\t<tr><td>High school or less   </td><td>45                    </td><td>51                    </td></tr>\n",
 90 |        "\t<tr><td>Some college education</td><td>43                    </td><td>52                    </td></tr>\n",
 91 |        "\t<tr><td>College graduate      </td><td>49                    </td><td>45                    </td></tr>\n",
 92 |        "\t<tr><td>Postgraduate education</td><td>58                    </td><td>37                    </td></tr>\n",
 93 |        "</tbody>\n",
 94 |        "</table>\n"
 95 |       ],
 96 |       "text/latex": [
 97 |        "\\begin{tabular}{r|lll}\n",
 98 |        " education & Clinton & Trump\\\\\n",
 99 |        "\\hline\n",
100 |        "\t High school or less    & 45                     & 51                    \\\\\n",
101 |        "\t Some college education & 43                     & 52                    \\\\\n",
102 |        "\t College graduate       & 49                     & 45                    \\\\\n",
103 |        "\t Postgraduate education & 58                     & 37                    \\\\\n",
104 |        "\\end{tabular}\n"
105 |       ],
106 |       "text/markdown": [
107 |        "\n",
108 |        "education | Clinton | Trump | \n",
109 |        "|---|---|---|---|\n",
110 |        "| High school or less    | 45                     | 51                     | \n",
111 |        "| Some college education | 43                     | 52                     | \n",
112 |        "| College graduate       | 49                     | 45                     | \n",
113 |        "| Postgraduate education | 58                     | 37                     | \n",
114 |        "\n",
115 |        "\n"
116 |       ],
117 |       "text/plain": [
118 |        "  education              Clinton Trump\n",
119 |        "1 High school or less    45      51   \n",
120 |        "2 Some college education 43      52   \n",
121 |        "3 College graduate       49      45   \n",
122 |        "4 Postgraduate education 58      37   "
123 |       ]
124 |      },
125 |      "metadata": {},
126 |      "output_type": "display_data"
127 |     }
128 |    ],
129 |    "source": [
130 |     "education <- read.csv(\"education.tsv\", sep=\"\\t\")\n",
131 |     "head(education)"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "gender <- read.csv(\"gender.tsv\", sep=\"\\t\")\n",
141 |     "head(gender)"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {
148 |     "collapsed": true
149 |    },
150 |    "outputs": [],
151 |    "source": [
152 |     "ideology <- read.csv(\"ideology.tsv\", sep=\"\\t\")\n",
153 |     "head(ideology)"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {
160 |     "collapsed": true
161 |    },
162 |    "outputs": [],
163 |    "source": [
164 |     "income <- read.csv(\"income.tsv\", sep=\"\\t\")\n",
165 |     "head(income)"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {
172 |     "collapsed": true
173 |    },
174 |    "outputs": [],
175 |    "source": [
176 |     "orientation <- read.csv(\"orientation.tsv\", sep=\"\\t\")\n",
177 |     "head(orientation)"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {
184 |     "collapsed": true
185 |    },
186 |    "outputs": [],
187 |    "source": [
188 |     "party <- read.csv(\"party.tsv\", sep=\"\\t\")\n",
189 |     "head(party)"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {
196 |     "collapsed": true
197 |    },
198 |    "outputs": [],
199 |    "source": [
200 |     "race <- read.csv(\"race.tsv\", sep=\"\\t\")\n",
201 |     "head(race)"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {
208 |     "collapsed": true
209 |    },
210 |    "outputs": [],
211 |    "source": [
212 |     "region <- read.csv(\"region.tsv\", sep=\"\\t\")\n",
213 |     "head(region)"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": null,
219 |    "metadata": {
220 |     "collapsed": true
221 |    },
222 |    "outputs": [],
223 |    "source": [
224 |     "religion <- read.csv(\"religion.tsv\", sep=\"\\t\")\n",
225 |     "head(religion)"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "metadata": {
232 |     "collapsed": true
233 |    },
234 |    "outputs": [],
235 |    "source": [
236 |     "printf(\"Most Clinton voters from %s\",age[which.max(age$Clinton),'age'])\n",
237 |     "printf(\"Most Clinton voters from %s\",education[which.max(education$Clinton),'education'])\n",
238 |     "printf(\"Most Clinton voters from %s\",gender[which.max(gender$Clinton),'gender'])\n",
239 |     "printf(\"Most Clinton voters from %s\",ideology[which.max(ideology$Clinton),'ideology'])\n",
240 |     "printf(\"Most Clinton voters from %s\",income[which.max(income$Clinton),'income'])\n",
241 |     "printf(\"Most Clinton voters from %s\",orientation[which.max(orientation$Clinton),'orientation'])\n",
242 |     "printf(\"Most Clinton voters from %s\",party[which.max(party$Clinton),'party'])\n",
243 |     "printf(\"Most Clinton voters from %s\",race[which.max(race$Clinton),'race'])\n",
244 |     "printf(\"Most Clinton voters from %s\",region[which.max(region$Clinton),'region'])\n",
245 |     "printf(\"Most Clinton voters from %s\",religion[which.max(religion$Clinton),'religion'])"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": null,
251 |    "metadata": {
252 |     "collapsed": true
253 |    },
254 |    "outputs": [],
255 |    "source": [
256 |     "printf(\"Most Trump voters from %s\",age[which.max(age$Trump),'age'])\n",
257 |     "printf(\"Most Trump voters from %s\",education[which.max(education$Trump),'education'])\n",
258 |     "printf(\"Most Trump voters from %s\",gender[which.max(gender$Trump),'gender'])\n",
259 |     "printf(\"Most Trump voters from %s\",ideology[which.max(ideology$Trump),'ideology'])\n",
260 |     "printf(\"Most Trump voters from %s\",income[which.max(income$Trump),'income'])\n",
261 |     "printf(\"Most Trump voters from %s\",orientation[which.max(orientation$Trump),'orientation'])\n",
262 |     "printf(\"Most Trump voters from %s\",party[which.max(party$Trump),'party'])\n",
263 |     "printf(\"Most Trump voters from %s\",race[which.max(race$Trump),'race'])\n",
264 |     "printf(\"Most Trump voters from %s\",region[which.max(region$Trump),'region'])\n",
265 |     "printf(\"Most Trump voters from %s\",religion[which.max(religion$Trump),'religion'])"
266 |    ]
267 |   }
268 |  ],
269 |  "metadata": {
270 |   "kernelspec": {
271 |    "display_name": "R",
272 |    "language": "R",
273 |    "name": "ir"
274 |   },
275 |   "language_info": {
276 |    "codemirror_mode": "r",
277 |    "file_extension": ".r",
278 |    "mimetype": "text/x-r-source",
279 |    "name": "R",
280 |    "pygments_lexer": "r",
281 |    "version": "3.4.1"
282 |   }
283 |  },
284 |  "nbformat": 4,
285 |  "nbformat_minor": 2
286 | }
287 | 


--------------------------------------------------------------------------------
/Chapter05/2 Voter Registration.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "scrolled": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "df <- read.csv(\"registration.csv\")\n",
 12 |     "summary(df)"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "plot(df)"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "cor(df$voted,df$registered)"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {
 41 |     "collapsed": true
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "fit <- lm(df$voted ~ df$registered)\n",
 46 |     "fit"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "collapsed": true,
 54 |     "scrolled": true
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "par(mfrow=c(2,2))\n",
 59 |     "plot(fit)"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {
 66 |     "collapsed": true
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "residuals(fit)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {
 77 |     "collapsed": true
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "summary(fit)"
 82 |    ]
 83 |   }
 84 |  ],
 85 |  "metadata": {
 86 |   "kernelspec": {
 87 |    "display_name": "R",
 88 |    "language": "R",
 89 |    "name": "ir"
 90 |   },
 91 |   "language_info": {
 92 |    "codemirror_mode": "r",
 93 |    "file_extension": ".r",
 94 |    "mimetype": "text/x-r-source",
 95 |    "name": "R",
 96 |    "pygments_lexer": "r",
 97 |    "version": "3.4.1"
 98 |   }
 99 |  },
100 |  "nbformat": 4,
101 |  "nbformat_minor": 2
102 | }
103 | 


--------------------------------------------------------------------------------
/Chapter05/files/05_income.tsv:
--------------------------------------------------------------------------------
 1 | income	Clinton	Trump
 2 | Under $30,000	53	41
 3 | $30,000-49,999	51	42
 4 | $50,000-99,999	46	50
 5 | $100,000-199,999	47	48
 6 | $200,000-249,999	48	49
 7 | Over $250,000	46	48
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/Chapter05/files/05_religion.tsv:
--------------------------------------------------------------------------------
 1 | religion	Clinton	Trump
 2 | Protestant	37	60
 3 | Catholic	45	52
 4 | Mormon	25	61
 5 | Other Christian	43	55
 6 | Jewish	71	24
 7 | Other religion	58	33
 8 | None	68	26
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/Chapter05/files/acceptance-rates.csv:
--------------------------------------------------------------------------------
 1 | ﻿School,2017,2016,2015,2007
 2 | Amherst College,NA,0.14,0.14,0.18
 3 | Boston College,0.32,0.32,0.28,0.27
 4 | Brown University,0.08,0.09,0.09,0.15
 5 | Columbia University,0.06,0.06,0.06,0.12
 6 | Cornell University,0.13,0.14,0.15,0.21
 7 | Dartmouth College,0.10,0.11,0.10,0.18
 8 | Duke University,0.09,0.10,0.10,0.21
 9 | George Washington,0.41,0.40,0.46,0.37
10 | Georgetown University,0.15,0.16,0.16,0.21
11 | Harvard University,0.05,0.05,0.05,0.10
12 | Massachusetts Institute of Technology,0.07,0.08,0.08,0.13
13 | Middlebury College,0.20,0.16,0.17,0.23
14 | New York University,0.27,0.32,0.31,0.37
15 | Northwestern University,0.09,0.11,0.13,0.27
16 | Pomona College,0.08,0.09,0.10,0.16
17 | Princeton University,0.06,0.06,0.07,0.10
18 | Stanford University,0.05,0.05,0.05,0.10
19 | Swarthmore College,0.10,0.13,0.12,0.24
20 | University of California – Berkeley,0.18,0.15,0.15,0.23
21 | University of Chicago,NA,0.08,0.08,0.35
22 | University of Michigan,0.24,0.29,0.26,0.50
23 | University of Notre Dame,0.18,0.18,0.20,0.24
24 | University of Pennsylvania,0.09,0.09,0.10,0.21
25 | University of Southern California,0.16,0.17,0.18,0.25
26 | University of Virginia,0.27,0.29,0.29,0.35
27 | Vanderbilt University,0.10,0.10,0.11,0.33
28 | Wesleyan University,0.15,0.18,0.22,0.27
29 | Williams College,0.15,0.17,0.17,0.17
30 | Yale University,0.07,0.06,0.07,0.11
31 | 


--------------------------------------------------------------------------------
/Chapter05/files/age.tsv:
--------------------------------------------------------------------------------
 1 | age	Clinton	Trump
 2 | 18-24 years old	56	35
 3 | 25-29 years old	53	39
 4 | 30-39 years old	51	40
 5 | 40-49 years old	46	50
 6 | 50-64 years old	44	53
 7 | 65 and older	45	53
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/Chapter05/files/education.tsv:
--------------------------------------------------------------------------------
 1 | education	Clinton	Trump
 2 | High school or less	45	51
 3 | Some college education	43	52
 4 | College graduate	49	45
 5 | Postgraduate education	58	37
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/Chapter05/files/gender.tsv:
--------------------------------------------------------------------------------
1 | gender	Clinton	Trump
2 | Men	41	52
3 | Women	54	41
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/Chapter05/files/ideology.tsv:
--------------------------------------------------------------------------------
1 | ideology	Clinton	Trump
2 | Liberals	84	10
3 | Moderates	52	41
4 | Conservatives	15	81
5 | 


--------------------------------------------------------------------------------
/Chapter05/files/orientation.tsv:
--------------------------------------------------------------------------------
 1 | orientation	Clinton	Trump
 2 | LGBT	78	14
 3 | Heterosexual	47	48
 4 | 
 5 | 
 6 | 
 7 | 
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/Chapter05/files/party.tsv:
--------------------------------------------------------------------------------
1 | party	Clinton	Trump
2 | Democrats	89	9
3 | Republicans	7	90
4 | Independents	42	48
5 | 
6 | 


--------------------------------------------------------------------------------
/Chapter05/files/race.tsv:
--------------------------------------------------------------------------------
 1 | race	Clinton	Trump
 2 | White	37	58
 3 | Black	88	8
 4 | Asian	65	29
 5 | Other	56	37
 6 | Hispanic (of any race)	65	29
 7 | 
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/Chapter05/files/region.tsv:
--------------------------------------------------------------------------------
 1 | region	Clinton	Trump
 2 | Northeast	55	40
 3 | Midwest	45	49
 4 | South	44	52
 5 | West	55	39
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/Chapter05/files/registration.csv:
--------------------------------------------------------------------------------
 1 | ﻿state,population,citizens,registered,voted
 2 | ALABAMA,3717,3651,2526,2095
 3 | ALASKA,518,502,358,308
 4 | ARIZONA,5196,4585,3145,2769
 5 | ARKANSAS,2216,2116,1456,1241
 6 | CALIFORNIA,29894,24890,16096,14416
 7 | COLORADO,4242,3895,2893,2707
 8 | CONNECTICUT,2759,2483,1763,1586
 9 | DELAWARE,729,669,487,417
10 | DISTRICT OF COLUMBIA,553,512,420,380
11 | FLORIDA,16202,14428,9604,8578
12 | GEORGIA,7626,7048,4892,4246
13 | HAWAII,1064,974,530,460
14 | IDAHO,1224,1150,790,714
15 | ILLINOIS,9723,8970,6665,5719
16 | INDIANA,4988,4795,3298,2795
17 | IOWA,2394,2292,1657,1454
18 | KANSAS,2142,2029,1438,1243
19 | KENTUCKY,3348,3246,2253,1850
20 | LOUISIANA,3463,3353,2446,2067
21 | MAINE,1058,1038,830,754
22 | MARYLAND,4623,4158,3114,2737
23 | MASSACHUSETTS,5374,4967,3660,3315
24 | MICHIGAN,7624,7332,5434,4713
25 | MINNESOTA,4190,3985,3055,2738
26 | MISSISSIPPI,2203,2170,1725,1470
27 | MISSOURI,4626,4486,3333,2906
28 | MONTANA,798,790,581,521
29 | NEBRASKA,1407,1336,1008,893
30 | NEVADA,2234,1975,1371,1195
31 | NEW HAMPSHIRE,1044,1012,763,698
32 | NEW JERSEY,6862,5958,4165,3665
33 | NEW MEXICO,1547,1396,916,765
34 | NEW YORK,15506,13751,9142,7869
35 | NORTH CAROLINA,7631,6960,5194,4700
36 | NORTH DAKOTA,583,564,424,362
37 | OHIO,8811,8499,6128,5408
38 | OKLAHOMA,2923,2746,1861,1555
39 | OREGON,3185,2929,2147,1942
40 | PENNSYLVANIA,9980,9596,6909,6008
41 | RHODE ISLAND,836,766,538,464
42 | SOUTH CAROLINA,3733,3598,2575,2233
43 | SOUTH DAKOTA,631,612,437,362
44 | TENNESSEE,5057,4872,3251,2630
45 | TEXAS,20172,17378,11724,9626
46 | UTAH,2096,1969,1398,1234
47 | VERMONT,500,488,351,305
48 | VIRGINIA,6343,5829,4399,3973
49 | WASHINGTON,5592,5104,3906,3382
50 | WEST VIRGINIA,1434,1425,913,723
51 | WISCONSIN,4465,4354,3323,3068
52 | WYOMING,436,427,304,277
53 | 


--------------------------------------------------------------------------------
/Chapter07/1 markdown.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "#You can use markdown that has \n",
 8 |     "<i>italic</i> and <b>bold</b> text"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "markdown",
13 |    "metadata": {},
14 |    "source": [
15 |     "<ul>\n",
16 |     "    <li>list item 1</li>\n",
17 |     "    <li>list item 2</li>\n",
18 |     "</ul>"
19 |    ]
20 |   },
21 |   {
22 |    "cell_type": "markdown",
23 |    "metadata": {},
24 |    "source": [
25 |     "# Heading 1\n",
26 |     "# Heading 2\n",
27 |     "## Heading 2.1\n",
28 |     "## Heading 2.2"
29 |    ]
30 |   },
31 |   {
32 |    "cell_type": "markdown",
33 |    "metadata": {},
34 |    "source": [
35 |     "<table>\n",
36 |     "    <tr>\n",
37 |     "        <th>column 1</th>\n",
38 |     "        <th>column 2</th>\n",
39 |     "    </tr>\n",
40 |     "    <tr>\n",
41 |     "        <td>1</td>\n",
42 |     "        <td>2</td>\n",
43 |     "    </tr>\n",
44 |     "</table>"
45 |    ]
46 |   },
47 |   {
48 |    "cell_type": "markdown",
49 |    "metadata": {},
50 |    "source": [
51 |     "```python\n",
52 |     "print \"Hello World\"\n",
53 |     "```"
54 |    ]
55 |   }
56 |  ],
57 |  "metadata": {
58 |   "kernelspec": {
59 |    "display_name": "R",
60 |    "language": "R",
61 |    "name": "ir"
62 |   },
63 |   "language_info": {
64 |    "codemirror_mode": "r",
65 |    "file_extension": ".r",
66 |    "mimetype": "text/x-r-source",
67 |    "name": "R",
68 |    "pygments_lexer": "r",
69 |    "version": "3.4.1"
70 |   }
71 |  },
72 |  "nbformat": 4,
73 |  "nbformat_minor": 2
74 | }
75 | 


--------------------------------------------------------------------------------
/Chapter07/files/app.R:
--------------------------------------------------------------------------------
 1 | ## app.R ##
 2 | library(shinydashboard)
 3 | 
 4 | ui <- dashboardPage(
 5 |   dashboardHeader(title = "Shiny dashboard"),
 6 |   dashboardSidebar(),
 7 |   dashboardBody(
 8 |     fluidRow(
 9 |       box(plotOutput("plot1", height = 250)),
10 |       
11 |       box(
12 |         title = "Controls",
13 |         sliderInput("slider", "Number of observations:", 1, 100, 50)
14 |       )
15 |     )
16 |   )
17 | )
18 | 
19 | server <- function(input, output) {
20 |   set.seed(122)
21 |   histdata <- rnorm(500)
22 |   
23 |   output$plot1 <- renderPlot({
24 |     data <- histdata[seq_len(input$slider)]
25 |     hist(data)
26 |   })
27 | }
28 | 
29 | shinyApp(ui, server)


--------------------------------------------------------------------------------
/Chapter07/files/ui.R:
--------------------------------------------------------------------------------
1 | ## ui.R ##
2 | library(shinydashboard)
3 | 
4 | dashboardPage(
5 |   dashboardHeader(),
6 |   dashboardSidebar(),
7 |   dashboardBody()
8 | )


--------------------------------------------------------------------------------
/Chapter07/files/volcanoes.csv:
--------------------------------------------------------------------------------
  1 | Number,Volcano Name,Country,Region,Latitude,Longitude,Elev,Type,Status,Last Known Eruption
  2 | 1402-08=,Acatenango,Guatemala,Guatemala,14.501,-90.876,3976,Stratovolcano,Historical,D1
  3 | 0803-17=,Adatara,Japan,Honshu-Japan,37.62,140.28,1718,Stratovolcano,Historical,D1
  4 | 0604-02=,Agung,Indonesia,Lesser Sunda Is,-8.342,115.508,3142,Stratovolcano,Historical,D1
  5 | 1000-123,Akademia Nauk,Russia,Kamchatka,53.98,159.45,1180,Stratovolcano,Historical,D1
  6 | 0805-07=,Akan,Japan,Hokkaido-Japan,43.38,144.02,1499,Caldera,Historical,D1
  7 | 0803-23=,Akita-Komaga-take,Japan,Honshu-Japan,39.75,140.8,1637,Stratovolcano,Historical,D1
  8 | 0803-26=,Akita-Yake-yama,Japan,Honshu-Japan,39.97,140.77,1366,Complex volcano,Historical,D1
  9 | 1101-32-,Akutan,United States,Aleutian Is,54.13,-165.97,1303,Stratovolcano,Historical,D1
 10 | 0900-39=,Alaid,Russia,Kuril Is,50.858,155.55,2339,Stratovolcano,Historical,D1
 11 | 1503-04=,"Alcedo, Volcan",Ecuador,Galapagos,-0.43,-91.12,1130,Shield volcano,Historical,D1
 12 | 0507-04=,Ambrym,Vanuatu,Vanuatu-SW Pacific,-16.25,168.12,1334,Pyroclastic shield,Historical,D1
 13 | 1101-19-,Amukta,United States,Aleutian Is,52.5,-171.25,1066,Stratovolcano,Historical,D1
 14 | 0507-03=,Aoba,Vanuatu,Vanuatu-SW Pacific,-15.4,167.83,1496,Shield volcano,Historical,D1
 15 | 1505-107,Aracar,Argentina,Argentina,-24.27,-67.77,6082,Stratovolcano,Historical,D1
 16 | 0201-126,Ardoukoba,Djibouti,Africa-NE,11.58,42.47,298,Fissure vent,Historical,D1
 17 | 1405-033,Arenal,Costa Rica,Costa Rica,10.463,-84.703,1657,Stratovolcano,Historical,D1
 18 | 0803-11=,Asama,Japan,Honshu-Japan,36.4,138.53,2560,Complex volcano,Historical,D1
 19 | 0802-11=,Aso,Japan,Kyushu-Japan,32.88,131.1,1592,Caldera,Historical,D1
 20 | 1101-16-,Atka,United States,Aleutian Is,52.38,-174.15,1533,Stratovolcano,Historical,D1
 21 | 1103-01-,Augustine,United States,Alaska-SW,59.37,-153.42,1252,Lava dome,Historical,D1
 22 | 1000-10=,Avachinsky,Russia,Kamchatka,53.255,158.83,2741,Stratovolcano,Historical,D1
 23 | 0607-04=,Awu,Indonesia,Sangihe Is-Indonesia,3.67,125.5,1320,Stratovolcano,Historical,D1
 24 | 1301-011,Axial Seamount,Pacific Ocean,Pacific-NE,45.95,-130,-1500,Submarine volcano,Historical,D1
 25 | 1503-06=,"Azul, Cerro",Ecuador,Galapagos,-0.9,-91.42,1690,Shield volcano,Historical,D1
 26 | 1507-06=,"Azul, Cerro [Quizapu]",Chile,Chile-C,-35.653,-70.761,3788,Stratovolcano,Historical,D1
 27 | 0803-18=,Azuma,Japan,Honshu-Japan,37.73,140.25,2024,Stratovolcano,Historical,D1
 28 | 0505-02=,Bagana,Papua New Guinea,Bougainville-SW Paci,-6.14,155.195,1750,Lava cone,Historical,D1
 29 | 0605-09=,Banda Api,Pacific Ocean,Banda Sea,-4.525,129.871,640,Caldera,Historical,D1
 30 | 0600-01=,Barren Island,India,Andaman Is-Indian O,12.292,93.875,354,Stratovolcano,Historical,D1
 31 | 0604-01=,Batur,Indonesia,Lesser Sunda Is,-8.242,115.375,1717,Caldera,Historical,D1
 32 | 0804-07=,Bayonnaise Rocks,Japan,Izu Is-Japan,31.92,139.92,10,Submarine volcano,Historical,D1
 33 | 1000-25=,Bezymianny,Russia,Kamchatka,55.978,160.587,2882,Stratovolcano,Historical,D1
 34 | 1101-30-,Bogoslof,United States,Aleutian Is,53.93,-168.03,101,Submarine volcano,Historical,D1
 35 | 0703-01=,Bulusan,Philippines,Luzon-Philippines,12.77,124.05,1565,Stratovolcano,Historical,D1
 36 | 1507-091,Callaqui,Chile,Chile-C,-37.92,-71.45,3164,Stratovolcano,Historical,D1
 37 | 0204-01=,"Cameroon, Mt.",Cameroon,Africa-W,4.203,9.17,4095,Stratovolcano,Historical,D1
 38 | 1507-14=,Carran-Los Venados,Chile,Chile-C,-40.35,-72.07,1114,Pyroclastic cone,Historical,D1
 39 | 1508-041,Chaiten,Chile,Chile-S,-42.833,-72.646,962,Caldera,Holocene,D1
 40 | 1401-12=,"Chichon, El",Mexico,Mexico,17.36,-93.228,1150,Tuff cone,Historical,D1
 41 | 1102-11-,Chiginagak,United States,Alaska Peninsula,57.13,-157,2075,Stratovolcano,Historical,D1
 42 | 0900-36=,Chikurachki,Russia,Kuril Is,50.325,155.458,1816,Stratovolcano,Historical,D1
 43 | 1507-07=,"Chillan, Nevados de",Chile,Chile-C,-36.863,-71.377,3212,Stratovolcano,Historical,D1
 44 | 0900-26=,Chirinkotan,Russia,Kuril Is,48.98,153.48,724,Stratovolcano,Historical,D1
 45 | 0900-15=,Chirpoi,Russia,Kuril Is,46.525,150.875,742,Caldera,Historical,D1
 46 | 0803-22=,Chokai,Japan,Honshu-Japan,39.08,140.03,2237,Stratovolcano,Historical,D1
 47 | 1301-02-,Cleft Segment,Pacific Ocean,Pacific-NE,44.83,-130.3,-2140,Submarine volcano,Historical,D1
 48 | 1101-24-,Cleveland,United States,Aleutian Is,52.82,-169.95,1730,Stratovolcano,Historical,D1
 49 | 1401-04=,Colima,Mexico,Mexico,19.514,-103.62,3850,Stratovolcano,Historical,D1
 50 | 0606-01=,Colo [Una Una],Indonesia,Sulawesi-Indonesia,-0.17,121.608,507,Stratovolcano,Historical,D1
 51 | 1404-12=,Concepcion,Nicaragua,Nicaragua,11.538,-85.622,1700,Stratovolcano,Historical,D1
 52 | 1507-09=,Copahue,Chile,Chile-C,-37.85,-71.17,2965,Stratovolcano,Historical,D1
 53 | 0403-101,Curacoa,Tonga,Tonga-SW Pacific,-15.62,-173.67,-33,Submarine volcano,Historical,D1
 54 | 1900-03=,Deception Island,Antarctica,Antarctica,-62.97,-60.65,576,Caldera,Historical,D1
 55 | 0601-23=,Dempo,Indonesia,Sumatra,-4.03,103.13,3173,Stratovolcano,Historical,D1
 56 | 0704-02=,Didicas,Philippines,Luzon-N of,19.077,122.202,244,Compound volcano,Historical,D1
 57 | 0603-20=,Dieng Volc Complex,Indonesia,Java,-7.2,109.92,2565,Complex volcano,Historical,D1
 58 | 0608-01=,Dukono,Indonesia,Halmahera-Indonesia,1.68,127.88,1185,Complex volcano,Historical,D1
 59 | 0508-001,Eastern Gemini Seamount,Pacific Ocean,SW Pacific,-20.98,170.28,-80,Submarine volcano,Historical,D1
 60 | 0900-38=,Ebeko,Russia,Kuril Is,50.68,156.02,1156,Somma volcano,Historical,D1
 61 | 0604-10=,Ebulobo,Indonesia,Lesser Sunda Is,-8.808,121.18,2124,Stratovolcano,Historical,D1
 62 | 0900-27=,Ekarma,Russia,Kuril Is,48.958,153.93,1170,Stratovolcano,Historical,D1
 63 | 1301-01-,Endeavour Ridge,Pacific Ocean,Pacific-NE,47.95,-129.1,-2400,Submarine volcano,Historical,D1
 64 | 0507-06=,Epi,Vanuatu,Vanuatu-SW Pacific,-16.68,168.37,833,Stratovolcano,Historical,D1
 65 | 1900-02=,Erebus,Antarctica,Antarctica,-77.53,167.17,3794,Stratovolcano,Historical,D1
 66 | 0201-08=,Erta Ale,Ethiopia,Africa-NE,13.6,40.67,613,Shield volcano,Historical,D1
 67 | 0101-06=,Etna,Italy,Italy,37.734,15.004,3350,Stratovolcano,Historical,D1
 68 | 1702-02=,Eyjafjallajokull,Iceland,Iceland-S,63.63,-19.62,1666,Stratovolcano,Historical,D1
 69 | 0804-14=,Farallon de Pajaros,United States,Mariana Is-C Pacific,20.53,144.9,360,Stratovolcano,Historical,D1
 70 | 1503-01=,Fernandina,Ecuador,Galapagos,-0.37,-91.55,1495,Shield volcano,Historical,D1
 71 | 1804-01=,Fogo,Cape Verde,Cape Verde Is,14.95,-24.35,2829,Stratovolcano,Historical,D1
 72 | 0303-02=,"Fournaise, Piton de la",Reunion,Indian O-W,-21.229,55.713,2631,Shield volcano,Historical,D1
 73 | 1402-09=,Fuego,Guatemala,Guatemala,14.473,-90.88,3763,Stratovolcano,Historical,D1
 74 | 0804-133,Fukujin,Japan,Volcano Is-Japan,21.925,143.442,-217,Submarine volcano,Historical,D1
 75 | 0804-13=,Fukutoku-okanoba,Japan,Volcano Is-Japan,24.28,141.52,-14,Submarine volcano,Historical,D1
 76 | 1303-02-,Galapagos Rift,Pacific Ocean,Pacific-E,0.792,-86.15,-2430,Submarine volcano,Historical,D1
 77 | 1501-08=,Galeras,Colombia,Colombia,1.22,-77.37,4276,Complex volcano,Historical,D1
 78 | 0603-14=,Galunggung,Indonesia,Java,-7.25,108.05,2168,Stratovolcano,Historical,D1
 79 | 0608-06=,Gamalama,Indonesia,Halmahera-Indonesia,0.8,127.325,1715,Stratovolcano,Historical,D1
 80 | 0608-04=,Gamkonora,Indonesia,Halmahera-Indonesia,1.375,127.52,1635,Stratovolcano,Historical,D1
 81 | 1101-07-,Gareloi,United States,Aleutian Is,51.78,-178.8,1573,Stratovolcano,Historical,D1
 82 | 0507-02=,Gaua,Vanuatu,Vanuatu-SW Pacific,-14.27,167.5,797,Stratovolcano,Historical,D1
 83 | 1000-07=,Gorely,Russia,Kamchatka,52.558,158.03,1829,Caldera,Historical,D1
 84 | 1101-12-,Great Sitkin,United States,Aleutian Is,52.08,-176.13,1740,Stratovolcano,Historical,D1
 85 | 1703-01=,Grimsvotn,Iceland,Iceland-NE,64.42,-17.33,1725,Caldera,Historical,D1
 86 | 0900-07=,Grozny Group,Russia,Kuril Is,45.02,147.87,1211,Complex volcano,Historical,D1
 87 | 1502-02=,Guagua Pichincha,Ecuador,Ecuador,-0.171,-78.598,4784,Stratovolcano,Historical,D1
 88 | 0304-01=,Heard,Heard I. & McDonald Is.,Indian O-S,-53.106,73.513,2745,Stratovolcano,Historical,D1
 89 | 1702-07=,Hekla,Iceland,Iceland-S,63.98,-19.7,1491,Stratovolcano,Historical,D1
 90 | 0403-08=,Home Reef,Tonga,Tonga-SW Pacific,-18.992,-174.775,-2,Submarine volcano,Historical,D1
 91 | 1508-057,"Hudson, Cerro",Chile,Chile-S,-45.9,-72.97,1905,Stratovolcano,Historical,D1
 92 | 0403-04=,Hunga Tonga-Hunga Ha'apai,Tonga,Tonga-SW Pacific,-20.57,-175.38,0,Submarine volcano,Historical,D1
 93 | 0608-03=,Ibu,Indonesia,Halmahera-Indonesia,1.475,127.642,1325,Stratovolcano,Historical,D1
 94 | 0603-35=,Ijen,Indonesia,Java,-8.058,114.242,2799,Stratovolcano,Historical,D1
 95 | 0604-22=,Iliboleng,Indonesia,Lesser Sunda Is,-8.342,123.258,1659,Stratovolcano,Historical,D1
 96 | 0604-25=,Iliwerung,Indonesia,Lesser Sunda Is,-8.54,123.59,1018,Complex volcano,Historical,D1
 97 | 1405-06=,Irazu,Costa Rica,Costa Rica,9.979,-83.852,3432,Stratovolcano,Historical,D1
 98 | 1505-04=,Irruputuncu,Bolivia,Chile-N,-20.73,-68.55,5163,Stratovolcano,Historical,D1
 99 | 0802-02=,Iwo-Tori-shima,Japan,Ryukyu Is,27.85,128.25,217,Complex volcano,Historical,D1
100 | 0804-12=,Iwo-jima,Japan,Volcano Is-Japan,24.75,141.33,161,Caldera,Historical,D1
101 | 0604-11=,Iya,Indonesia,Lesser Sunda Is,-8.88,121.63,637,Stratovolcano,Historical,D1
102 | 1403-03=,Izalco,El Salvador,El Salvador,13.813,-89.633,1950,Stratovolcano,Historical,D1
103 | 0803-01=,Izu-Tobu,Japan,Honshu-Japan,34.92,139.12,1406,Pyroclastic cone,Historical,D1
104 | 1706-01=,Jan Mayen,Jan Mayen,Atlantic-N-Jan Mayen,71.08,-8.17,2277,Stratovolcano,Historical,D1
105 | 0804-10=,Kaitoku Seamount,Japan,Volcano Is-Japan,26.122,141.102,-10,Submarine volcano,Historical,D1
106 | 1101-11-,Kanaga,United States,Aleutian Is,51.92,-177.17,1307,Stratovolcano,Historical,D1
107 | 0702-02=,Kanlaon,Philippines,Philippines-C,10.412,123.132,2435,Stratovolcano,Historical,D1
108 | 0607-02=,Karangetang [Api Siau],Indonesia,Sangihe Is-Indonesia,2.78,125.48,1784,Stratovolcano,Historical,D1
109 | 0501-03=,Karkar,Papua New Guinea,New Guinea-NE of,-4.649,145.964,1839,Stratovolcano,Historical,D1
110 | 0303-01=,Karthala,Comoros,Indian O-W,-11.75,43.38,2361,Shield volcano,Historical,D1
111 | 1000-13=,Karymsky,Russia,Kamchatka,54.05,159.43,1536,Stratovolcano,Historical,D1
112 | 1101-13-,Kasatochi,United States,Aleutian Is,52.18,-175.5,314,Stratovolcano,Historical,D1
113 | 0505-06=,Kavachi,Solomon Is.,Solomon Is-SW Pacifi,-9.02,157.95,-20,Submarine volcano,Historical,D1
114 | 0604-14=,Kelimutu,Indonesia,Lesser Sunda Is,-8.758,121.83,1640,Complex volcano,Historical,D1
115 | 0603-28=,Kelut,Indonesia,Java,-7.93,112.308,1731,Stratovolcano,Historical,D1
116 | 0601-17=,Kerinci,Indonesia,Sumatra,-1.814,101.264,3800,Stratovolcano,Historical,D1
117 | 1600-16=,Kick-'em-Jenny,Netherlands,W Indies,12.3,-61.63,-177,Submarine volcano,Historical,D1
118 | 0802-06=,Kikai,Japan,Ryukyu Is,30.78,130.28,717,Caldera,Historical,D1
119 | 1302-01-,Kilauea,United States,Hawaiian Is,19.425,-155.292,1222,Shield volcano,Historical,D1
120 | 0802-09=,Kirishima,Japan,Kyushu-Japan,31.93,130.87,1700,Shield volcano,Historical,D1
121 | 1101-02-,Kiska,United States,Aleutian Is,52.1,177.6,1220,Stratovolcano,Historical,D1
122 | 1000-26=,Kliuchevskoi,Russia,Kamchatka,56.057,160.638,4835,Stratovolcano,Historical,D1
123 | 0900-12=,Kolokol Group,Russia,Kuril Is,46.042,150.05,1328,Somma volcano,Historical,D1
124 | 0805-02=,Komaga-take,Japan,Hokkaido-Japan,42.07,140.68,1140,Stratovolcano,Historical,D1
125 | 1703-08=,Krafla,Iceland,Iceland-NE,65.73,-16.78,650,Caldera,Historical,D1
126 | 1702-09-,Krakagiger,Iceland,Iceland-S,63.98,-19.7,1491,Stratovolcano,Historical,D1
127 | 0602-00=,Krakatau,Indonesia,Indonesia,-6.102,105.423,813,Caldera,Historical,D1
128 | 0802-05=,Kuchinoerabu-jima,Japan,Ryukyu Is,30.43,130.22,649,Stratovolcano,Historical,D1
129 | 0802-12=,Kuju Group,Japan,Kyushu-Japan,33.08,131.25,1788,Stratovolcano,Historical,D1
130 | 1102-06-,Kupreanof,United States,Alaska Peninsula,56.02,-159.8,1895,Stratovolcano,Historical,D1
131 | 0803-12=,Kusatsu-Shirane,Japan,Honshu-Japan,36.62,138.55,2176,Stratovolcano,Historical,D1
132 | 0507-07=,Kuwae,Vanuatu,Vanuatu-SW Pacific,-16.829,168.536,-2,Caldera,Historical,D1
133 | 1703-05=,Kverkfjoll,Iceland,Iceland-NE,64.65,-16.72,1920,Stratovolcano,Historical,D1
134 | 1803-01-,La Palma,Spain,Canary Is,28.58,-17.83,2426,Stratovolcano,Historical,D1
135 | 1702-15-,Lakagigar,Sweden,Iceland-NE,64.42,17.33,1725,Caldera,Historical,D1
136 | 1702-10-,Lambafit,Iceland,Iceland-S,64.08,-19.4,550,Crater rows,Historical,D1
137 | 0502-01=,Langila,Papua New Guinea,New Britain-SW Pac,-5.525,148.42,1330,Complex volcano,Historical,D1
138 | 1505-10=,Lascar,Chile,Chile-N,-23.37,-67.73,5592,Stratovolcano,Historical,D1
139 | 0202-12=,"Lengai, Ol Doinyo",Tanzania,Africa-E,-2.751,35.902,2890,Stratovolcano,Historical,D1
140 | 0604-18=,Lewotobi,Indonesia,Lesser Sunda Is,-8.53,122.775,1703,Stratovolcano,Historical,D1
141 | 0604-19=,Lewotobi Perempuan,Indonesia,Lesser Sunda Is,-8.575,122.78,1703,Stratovolcano,Historical,D1
142 | 1507-11=,Llaima,Chile,Chile-C,-38.692,-71.729,3125,Stratovolcano,Historical,D1
143 | 1302-00-,Loihi,United States,Hawaiian Is,18.92,-155.27,-975,Submarine volcano,Historical,D1
144 | 0606-10=,Lokon-Empung,Indonesia,Sulawesi-Indonesia,1.358,124.792,1580,Stratovolcano,Historical,D1
145 | 0501-05=,Long Island,Papua New Guinea,New Guinea-NE of,-5.358,147.12,1280,Complex volcano,Historical,D1
146 | 1507-10=,Lonquimay,Chile,Chile-C,-38.377,-71.58,2865,Stratovolcano,Historical,D1
147 | 0507-05=,Lopevi,Vanuatu,Vanuatu-SW Pacific,-16.507,168.346,1413,Stratovolcano,Historical,D1
148 | 1303-08-,Macdonald,Antarctica,Austral Is-C Pacific,-28.98,-140.25,-50,Submarine volcano,Historical,D1
149 | 0606-11=,Mahawu,Indonesia,Sulawesi-Indonesia,1.358,124.858,1324,Stratovolcano,Historical,D1
150 | 0608-07=,Makian,Indonesia,Halmahera-Indonesia,0.32,127.4,1357,Stratovolcano,Historical,D1
151 | 1101-31-,Makushin,United States,Aleutian Is,53.9,-166.93,2036,Stratovolcano,Historical,D1
152 | 0501-02=,Manam,Papua New Guinea,New Guinea-NE of,-4.1,145.061,1807,Stratovolcano,Historical,D1
153 | 0601-14=,Marapi,Indonesia,Sumatra,-0.381,100.473,2891,Complex volcano,Historical,D1
154 | 1503-08=,Marchena,Ecuador,Galapagos,0.33,-90.47,343,Shield volcano,Historical,D1
155 | 0304-07-,Marion Island,South Africa,Indian O-S,-46.9,37.75,1230,Shield volcano,Historical,D1
156 | 1404-10=,Masaya,Nicaragua,Nicaragua,11.984,-86.161,635,Caldera,Historical,D1
157 | 1302-02=,Mauna Loa,United States,Hawaiian Is,19.475,-155.608,4170,Shield volcano,Historical,D1
158 | 0703-03=,Mayon,Philippines,Luzon-Philippines,13.257,123.685,2462,Stratovolcano,Historical,D1
159 | 0304-011,McDonald Islands,Indian Ocean,Indian O-S,-53.03,72.6,186,Complex volcano,Historical,D1
160 | 0900-10=,Medvezhia,Russia,Kuril Is,45.38,148.83,1124,Somma volcano,Historical,D1
161 | 0603-25=,Merapi,Indonesia,Java,-7.542,110.442,2947,Stratovolcano,Historical,D1
162 | 0403-07=,Metis Shoal,Tonga,Tonga-SW Pacific,-19.18,-174.87,43,Submarine volcano,Historical,D1
163 | 0804-131,Minami-Hiyoshi,Japan,Volcano Is-Japan,23.507,141.905,-30,Submarine volcano,Historical,D1
164 | 0804-04=,Miyake-jima,Japan,Izu Is-Japan,34.08,139.53,815,Stratovolcano,Historical,D1
165 | 0402-05-,Monowai Seamount,New Zealand,Kermadec Is,-25.887,-177.188,-100,Submarine volcano,Historical,D1
166 | 1900-081,Montagu Island,Antarctica,Antarctica,-58.417,-26.333,1370,Shield volcano,Historical,D1
167 | 1702-08-,Mundafell,Iceland,Iceland-S,63.98,-19.7,1491,Stratovolcano,Historical,D1
168 | 0804-211,NW Rota-1,United States,Mariana Is-C Pacific,14.601,144.775,-517,Submarine volcano,Historical,D1
169 | 1503-05=,"Negra, Sierra",Ecuador,Galapagos,-0.83,-91.17,1490,Shield volcano,Historical,D1
170 | 1404-07=,"Negro, Cerro",Nicaragua,Nicaragua,12.506,-86.702,728,Cinder cone,Historical,D1
171 | 0401-09=,Ngauruhoe,New Zealand,New Zealand,-39.158,175.63,2291,Stratovolcano,Historical,D1
172 | 1806-011,Nightingale Island,United Kingdom,Atlantic-S,-37.417,-12.483,365,Stratovolcano,Historical,D1
173 | 0803-09=,Niigata-Yake-yama,Japan,Honshu-Japan,36.92,138.03,2400,Lava dome,Historical,D1
174 | 0605-06=,Nila,Pacific Ocean,Banda Sea,-6.73,129.5,781,Stratovolcano,Historical,D1
175 | 0403-11=,Niuafo'ou,Tonga,Tonga-SW Pacific,-15.6,-175.63,260,Shield volcano,Historical,D1
176 | 1301-021,North Gorda Ridge,Pacific Ocean,Pacific-NE,42.67,-126.78,-3000,Submarine volcano,Historical,D1
177 | 0203-02=,Nyamuragira,"Congo, DRC",Africa-C,-1.408,29.2,3058,Shield volcano,Historical,D1
178 | 0203-03=,Nyiragongo,"Congo, DRC",Africa-C,-1.52,29.25,3470,Stratovolcano,Historical,D1
179 | 0401-05=,Okataina,New Zealand,New Zealand,-38.12,176.5,1111,Lava dome,Historical,D1
180 | 1101-29-,Okmok,United States,Aleutian Is,53.42,-168.13,1073,Shield volcano,Historical,D1
181 | 0803-04=,On-take,Japan,Honshu-Japan,35.9,137.48,3063,Complex volcano,Historical,D1
182 | 0804-01=,Oshima,Japan,Izu Is-Japan,34.73,139.38,758,Stratovolcano,Historical,D1
183 | 1402-11=,Pacaya,Guatemala,Guatemala,14.381,-90.601,2552,Complex volcano,Historical,D1
184 | 0804-17=,Pagan,United States,Mariana Is-C Pacific,18.13,145.8,570,Stratovolcano,Historical,D1
185 | 0604-15=,Paluweh,Indonesia,Lesser Sunda Is,-8.32,121.708,875,Stratovolcano,Historical,D1
186 | 1102-03-,Pavlof,United States,Alaska Peninsula,55.42,-161.9,2519,Stratovolcano,Historical,D1
187 | 0601-03=,Peuet Sague,Indonesia,Sumatra,4.914,96.329,2801,Complex volcano,Historical,D1
188 | 0703-083,Pinatubo,Philippines,Luzon-Philippines,15.13,120.35,1486,Stratovolcano,Historical,D1
189 | 1600-21-,Piparo,Trinidad,Trinidad,10,-61,140,Mud volcano,Historical,D1
190 | 1507-04=,Planchon-Peteroa,Chile,Chile-C,-35.24,-70.57,4107,Caldera,Historical,D1
191 | 1405-04=,Poas,Costa Rica,Costa Rica,10.2,-84.233,2708,Stratovolcano,Historical,D1
192 | 1401-09=,Popocatepetl,Mexico,Mexico,19.023,-98.622,5426,Stratovolcano,Historical,D1
193 | 1501-06=,Purace,Colombia,Colombia,2.32,-76.4,4650,Stratovolcano,Historical,D1
194 | 1505-09=,Putana,Chile,Chile-N,-22.57,-67.87,5890,Stratovolcano,Historical,D1
195 | 0502-14=,Rabaul,Papua New Guinea,New Britain-SW Pac,-4.271,152.203,688,Pyroclastic shield,Historical,D1
196 | 0604-071,Ranakah,Indonesia,Lesser Sunda Is,-8.62,120.52,2100,Lava dome,Historical,D1
197 | 0402-03=,Raoul Island,New Zealand,Kermadec Is,-29.27,-177.92,516,Stratovolcano,Historical,D1
198 | 0603-34=,Raung,Indonesia,Java,-8.125,114.042,3332,Stratovolcano,Historical,D1
199 | 1103-03-,Redoubt,United States,Alaska-SW,60.48,-152.75,3108,Stratovolcano,Historical,D1
200 | 1502-01=,Reventador,Ecuador,Ecuador,-0.078,-77.656,3562,Stratovolcano,Historical,D1
201 | 1701-01=,Reykjaneshryggur,Iceland,Iceland-SW,63.67,-23.33,80,Submarine volcanoes,Historical,D1
202 | 1405-02=,Rincon de la Vieja,Costa Rica,Costa Rica,10.83,-85.324,1916,Complex volcano,Historical,D1
203 | 0604-03=,Rinjani,Indonesia,Lesser Sunda Is,-8.42,116.47,3726,Stratovolcano,Historical,D1
204 | 0501-07=,Ritter Island,Papua New Guinea,New Guinea-NE of,-5.52,148.121,140,Stratovolcano,Historical,D1
205 | 0401-10=,Ruapehu,New Zealand,New Zealand,-39.28,175.57,2797,Stratovolcano,Historical,D1
206 | 0804-201,Ruby,United States,Mariana Is-C Pacific,15.62,145.57,-230,Submarine volcano,Historical,D1
207 | 1501-02=,Ruiz,Colombia,Colombia,4.895,-75.322,5321,Stratovolcano,Historical,D1
208 | 0401-13-,Rumble III,New Zealand,New Zealand,-35.745,178.478,-140,Submarine volcano,Hydrophonic,D1
209 | 1504-006,Sabancaya,Peru,Peru,-15.783,-71.85,5967,Stratovolcanoes,Historical,D1
210 | 1504-003,Sabancaya,Peru,Peru,-15.78,-71.85,5967,Stratovolcano,Historical,D1
211 | 0802-08=,Sakura-jima,Japan,Kyushu-Japan,31.58,130.67,1117,Stratovolcano,Historical,D1
212 | 1404-02=,San Cristobal,Nicaragua,Nicaragua,12.702,-87.004,1745,Stratovolcano,Historical,D1
213 | 1403-10=,San Miguel,El Salvador,El Salvador,13.431,-88.272,2130,Stratovolcano,Historical,D1
214 | 1502-09=,Sangay,Ecuador,Ecuador,-2.03,-78.33,5230,Stratovolcano,Historical,D1
215 | 0604-05=,Sangeang Api,Indonesia,Lesser Sunda Is,-8.18,119.058,1949,Complex volcano,Historical,D1
216 | 1402-03=,Santa Maria,Guatemala,Guatemala,14.756,-91.552,3772,Stratovolcano,Historical,D1
217 | 0900-24=,Sarychev Peak,Russia,Kuril Is,48.092,153.2,1496,Stratovolcano,Historical,D1
218 | 1101-18-,Seguam,United States,Aleutian Is,52.32,-172.52,1054,Stratovolcano,Historical,D1
219 | 0603-30=,Semeru,Indonesia,Java,-8.108,112.92,3676,Stratovolcano,Historical,D1
220 | 1101-06-,Semisopochnoi,United States,Aleutian Is,51.95,179.62,1221,Stratovolcano,Historical,D1
221 | 0805-04=,Shikotsu,Japan,Hokkaido-Japan,42.7,141.33,1320,Caldera,Historical,D1
222 | 1101-36-,Shishaldin,United States,Aleutian Is,54.75,-163.97,2857,Stratovolcano,Historical,D1
223 | 1000-27=,Shiveluch,Russia,Kamchatka,56.653,161.36,3283,Stratovolcano,Historical,D1
224 | 0604-27=,Sirung,Indonesia,Lesser Sunda Is,-8.51,124.148,862,Complex volcano,Historical,D1
225 | 0603-18=,Slamet,Indonesia,Java,-7.242,109.208,3432,Stratovolcano,Historical,D1
226 | 1401-021,Socorro,Mexico,Mexico-Is,18.78,-110.95,1050,Shield volcano,Historical,D1
227 | 0606-03=,Soputan,Indonesia,Sulawesi-Indonesia,1.108,124.725,1784,Stratovolcano,Historical,D1
228 | 0601-12=,Sorikmarapi,Indonesia,Sumatra,0.686,99.539,2145,Stratovolcano,Historical,D1
229 | 1600-06=,Soufriere Guadeloupe,Guadeloupe,W Indies,16.05,-61.67,1467,Stratovolcano,Historical,D1
230 | 1600-05=,Soufriere Hills,Montserrat,W Indies,16.72,-62.18,915,Stratovolcano,Historical,D1
231 | 1600-15=,Soufriere St. Vincent,St. Vincent & the Grenadines,W Indies,13.33,-61.18,1220,Stratovolcano,Historical,D1
232 | 1103-04-,Spurr,United States,Alaska-SW,61.3,-152.25,3374,Stratovolcano,Historical,D1
233 | 1201-05-,St. Helens,United States,US-Washington,46.2,-122.18,2549,Stratovolcano,Historical,D1
234 | 0101-04=,Stromboli,Italy,Italy,38.789,15.213,926,Stratovolcano,Historical,D1
235 | 0603-21=,Sundoro,Indonesia,Java,-7.3,109.992,3136,Stratovolcano,Historical,D1
236 | 0804-142,Supply Reef,United States,Mariana Is-C Pacific,20.13,145.1,-8,Submarine volcano,Hydrophonic,D1
237 | 0507-01=,Suretamatai,Vanuatu,Vanuatu-SW Pacific,-13.8,167.47,921,Complex volcano,Historical,D1
238 | 0802-03=,Suwanose-jima,Japan,Ryukyu Is,29.53,129.72,799,Stratovolcano,Historical,D1
239 | 0703-07=,Taal,Philippines,Luzon-Philippines,14.002,120.993,400,Stratovolcano,Historical,D1
240 | 1401-13=,Tacana,Mexico,Mexico,15.13,-92.112,4110,Stratovolcano,Historical,D1
241 | 0403-12-,Tafu-Maka,Tonga,Tonga-SW Pacific,-15.367,-174.233,-1400,Submarine volcano,Historical,D1
242 | 0601-16=,Talang,Indonesia,Sumatra,-0.978,100.679,2597,Stratovolcano,Historical,D1
243 | 0604-04=,Tambora,Indonesia,Lesser Sunda Is,-8.25,118,2850,Stratovolcano,Historical,D1
244 | 0603-09=,Tangkubanparahu,Indonesia,Java,-6.77,107.6,2084,Stratovolcano,Historical,D1
245 | 1404-04=,Telica,Nicaragua,Nicaragua,12.602,-86.845,1061,Stratovolcano,Historical,D1
246 | 0603-31=,Tengger Caldera,Indonesia,Java,-7.942,112.95,2329,Stratovolcano,Historical,D1
247 | 1802-05=,Terceira,Portugal,Azores,38.73,-27.32,1023,Stratovolcano,Historical,D1
248 | 1900-07=,Thule Islands,Antarctica,Antarctica,-59.45,-27.37,1075,Stratovolcano,Historical,D1
249 | 0900-03=,Tiatia,Russia,Kuril Is,44.358,146.27,1819,Stratovolcano,Historical,D1
250 | 0506-01=,Tinakula,Solomon Is.,Santa Cruz Is-SW Pac,-10.38,165.8,851,Stratovolcano,Historical,D1
251 | 0805-05=,Tokachi,Japan,Hokkaido-Japan,43.42,142.68,2077,Stratovolcano,Historical,D1
252 | 1000-24=,Tolbachik,Russia,Kamchatka,55.83,160.33,3682,Shield volcano,Historical,D1
253 | 0401-08=,Tongariro,New Zealand,New Zealand,-39.13,175.642,1978,Stratovolcano,Historical,D1
254 | 0804-09=,Tori-shima,Japan,Izu Is-Japan,30.48,140.32,403,Stratovolcano,Historical,D1
255 | 1102-16-,Trident,United States,Alaska Peninsula,58.23,-155.08,1864,Stratovolcano,Historical,D1
256 | 1502-08=,Tungurahua,Ecuador,Ecuador,-1.467,-78.442,5023,Stratovolcano,Historical,D1
257 | 1507-01=,Tupungatito,Chile,Chile-C,-33.4,-69.8,6000,Stratovolcano,Historical,D1
258 | 1504-02=,Ubinas,Peru,Peru,-16.355,-70.903,5672,Stratovolcano,Historical,D1
259 | 1102-131,Ukinrek Maars,United States,Alaska Peninsula,57.83,-156.52,91,Maar,Historical,D1
260 | 0502-12=,Ulawun,Papua New Guinea,New Britain-SW Pac,-5.05,151.33,2334,Stratovolcano,Historical,D1
261 | 0508-03-,Unnamed,Pacific Ocean,SW Pacific,-25.78,168.63,-2400,Submarine volcano,Hydrophonic,D1
262 | 1304-04-,Unnamed,Pacific Ocean,Pacific-E,10.733,-103.583,,Submarine volcano,Historical,D1
263 | 1101-17-,Unnamed,United States,Aleutian Is,52,-173.5,0,Submarine volcano,Hydrophonic,D1
264 | 1304-05-,Unnamed,Pacific Ocean,Pacific-E,9.833,-104.3,-2500,Submarine volcano,Historical,D1
265 | 0900-16-,Unnamed,Russia,Kuril Is,46.47,151.28,-502,Submarine volcano,Hydrophonic,D1
266 | 0500-03-,Unnamed,United States,Admiralty Is-SW Paci,-3.03,147.78,-1300,Submarine volcano,Hydrophonic,D1
267 | 1303-01-,Unnamed,Pacific Ocean,Pacific-E,9.82,-104.3,-2500,Submarine volcano,Historical,D1
268 | 0403-03=,Unnamed,Tonga,Tonga-SW Pacific,-20.85,-175.53,-13,Submarine volcano,Historical,D1
269 | 0403-091,Unnamed,Tonga,Tonga-SW Pacific,-18.325,-174.365,-40,Submarine volcano,Historical,D1
270 | 0802-10=,Unzen,Japan,Kyushu-Japan,32.75,130.3,1500,Complex volcano,Historical,D1
271 | 0805-03=,Usu,Japan,Hokkaido-Japan,42.53,140.83,731,Stratovolcano,Historical,D1
272 | 1102-07-,Veniaminof,United States,Alaska Peninsula,56.17,-159.38,2507,Stratovolcano,Historical,D1
273 | 1702-01=,Vestmannaeyjar,Iceland,Iceland-S,63.43,-20.28,279,Submarine volcano,Historical,D1
274 | 1508-061,"Viedma, Volcan",Argentina,Argentina,-49.358,-73.28,1300,Subglacial volcano,Historical,D1
275 | 1507-12=,Villarrica,Chile,Chile-C,-39.42,-71.93,2847,Stratovolcano,Historical,D1
276 | 0403-13-,West Mata,Tonga,Tonga-SW Pacific,-15.1,-173.75,-1174,Submarine volcano,Historical,D1
277 | 1101-34-,Westdahl,United States,Aleutian Is,54.52,-164.65,1654,Stratovolcano,Historical,D1
278 | 0401-04=,White Island,New Zealand,New Zealand,-37.52,177.18,321,Stratovolcano,Historical,D1
279 | 1503-02=,"Wolf, Volcan",Ecuador,Galapagos,0.02,-91.35,1710,Shield volcano,Historical,D1
280 | 0803-07=,Yake-dake,Japan,Honshu-Japan,36.22,137.58,2455,Stratovolcano,Historical,D1
281 | 0507-10=,Yasur,Vanuatu,Vanuatu-SW Pacific,-19.52,169.425,361,Stratovolcano,Historical,D1
282 | 1000-04=,Zheltovsky,Russia,Kamchatka,51.57,157.323,1953,Stratovolcano,Historical,D1
283 | 


--------------------------------------------------------------------------------
/Chapter08/1 convert json to csv.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "scrolled": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "import time\n",
12 |     "import datetime\n",
13 |     "import json, csv\n",
14 |     "\n",
15 |     "print( datetime.datetime.now().time())\n",
16 |     "\n",
17 |     "headers = True\n",
18 |     "filein = 'c:/Users/Dan/yelp_academic_dataset_business.json'\n",
19 |     "fileout = 'c:/Users/Dan/yelp_academic_dataset_review.csv'\n",
20 |     "with open(filein, encoding=\"cp866\") as jsonf, open(fileout, \"w\") as csvf:\n",
21 |     "    for line in jsonf:\n",
22 |     "        data = json.loads(line)        \n",
23 |     "        #remove the review text\n",
24 |     "        data.pop('text')\n",
25 |     "        #print(data)               \n",
26 |     "        if headers:\n",
27 |     "            keys = []\n",
28 |     "            for k, v in data.items():\n",
29 |     "                keys.append(k)\n",
30 |     "            #print(keys)\n",
31 |     "            writer = csv.DictWriter(csvf, fieldnames=keys)\n",
32 |     "            writer.writeheader()\n",
33 |     "            headers = False\n",
34 |     "        writer.writerow(data)\n",
35 |     "    \n",
36 |     "print( datetime.datetime.now().time())"
37 |    ]
38 |   }
39 |  ],
40 |  "metadata": {
41 |   "kernelspec": {
42 |    "display_name": "Python 3",
43 |    "language": "python",
44 |    "name": "python3"
45 |   },
46 |   "language_info": {
47 |    "codemirror_mode": {
48 |     "name": "ipython",
49 |     "version": 3
50 |    },
51 |    "file_extension": ".py",
52 |    "mimetype": "text/x-python",
53 |    "name": "python",
54 |    "nbconvert_exporter": "python",
55 |    "pygments_lexer": "ipython3",
56 |    "version": "3.6.0"
57 |   }
58 |  },
59 |  "nbformat": 4,
60 |  "nbformat_minor": 2
61 | }
62 | 


--------------------------------------------------------------------------------
/Chapter08/3 cuisines.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import json\n",
 12 |     "filein = 'business.json'\n",
 13 |     "filein = 'yelp_academic_dataset_business.json'\n",
 14 |     "lines = list(open(filein))\n",
 15 |     "\n",
 16 |     "ratings = {}\n",
 17 |     "for line in lines:\n",
 18 |     "    line = unicode(line, errors='ignore')\n",
 19 |     "    obj = json.loads(line)\n",
 20 |     "    if obj['categories'] == None:\n",
 21 |     "        continue\n",
 22 |     "    if 'Restaurants' in obj['categories']:\n",
 23 |     "        rating = obj['stars']\n",
 24 |     "        for category in obj['categories']:\n",
 25 |     "            if category not in ratings:\n",
 26 |     "                ratings[category] = []\n",
 27 |     "            clist = ratings.get(category)\n",
 28 |     "            clist.append(rating)\n",
 29 |     "\n",
 30 |     "cuisines = {}\n",
 31 |     "total = 0\n",
 32 |     "cmax = ''\n",
 33 |     "maxc = 0\n",
 34 |     "for cuisine in ratings:\n",
 35 |     "    clist = ratings[cuisine]\n",
 36 |     "    if len(clist) < 10:\n",
 37 |     "        continue\n",
 38 |     "    avg = float(sum(clist))/len(clist)\n",
 39 |     "    cuisines[cuisine] = avg\n",
 40 |     "    total = total + avg\n",
 41 |     "    if avg > maxc:\n",
 42 |     "        maxc = avg\n",
 43 |     "        cmax = cuisine\n",
 44 |     "        \n",
 45 |     "print(\"Highest rated cuisine is \",cmax,\" at \",maxc)\n",
 46 |     "print(\"Average cuisine rating is \",total/len(ratings))\n",
 47 |     "    \n",
 48 |     "print(cuisines)"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {
 55 |     "collapsed": true
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "import pandas as pd\n",
 60 |     "import numpy as np\n",
 61 |     "df = pd.DataFrame(columns=['Cuisine', 'Rating'])\n",
 62 |     "for cuisine in cuisines:\n",
 63 |     "    df.loc[len(df)]=[cuisine, cuisines[cuisine]]\n",
 64 |     "hist, bin_edges = np.histogram(df['Rating'], bins=range(5))\n",
 65 |     "\n",
 66 |     "import matplotlib.pyplot as plt\n",
 67 |     "plt.bar(bin_edges[:-1], hist, width = 1)\n",
 68 |     "plt.xlim(min(bin_edges), max(bin_edges))\n",
 69 |     "plt.show()   "
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {
 76 |     "collapsed": true
 77 |    },
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "for line in lines:\n",
 81 |     "    line = unicode(line, errors='ignore')\n",
 82 |     "    obj = json.loads(line)\n",
 83 |     "    if obj['categories'] == None:\n",
 84 |     "        continue\n",
 85 |     "    if 'Personal Chefs' in obj['categories']:\n",
 86 |     "        if obj['attributes'] == None:\n",
 87 |     "            continue\n",
 88 |     "        for attr in obj['attributes']:\n",
 89 |     "            print (attr)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {
 96 |     "collapsed": true
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "#determine relationship between number of reviews and star rating\n",
101 |     "import pandas as pd\n",
102 |     "from pandas import DataFrame as df  \n",
103 |     "import numpy as np  \n",
104 |     "\n",
105 |     "dfr2 = pd.DataFrame(columns=['reviews', 'rating'])\n",
106 |     "mynparray = dfr2.values\n",
107 |     "\n",
108 |     "for line in lines:\n",
109 |     "    line = unicode(line, errors='ignore')\n",
110 |     "    obj = json.loads(line)\n",
111 |     "    reviews = int(obj['review_count'])\n",
112 |     "    rating = float(obj['stars'])\n",
113 |     "    arow = [reviews,rating]\n",
114 |     "    mynparray = np.vstack((mynparray,arow)) \n",
115 |     "\n",
116 |     "dfr2 = df(mynparray)\n",
117 |     "print len(dfr2)"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {
124 |     "collapsed": true
125 |    },
126 |    "outputs": [],
127 |    "source": [
128 |     "dfr2.columns = ['reviews', 'rating']\n",
129 |     "dfr2.describe()"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {
136 |     "collapsed": true
137 |    },
138 |    "outputs": [],
139 |    "source": [
140 |     "#import matplotlib.pyplot as plt\n",
141 |     "dfr2.plot(kind='scatter', x='rating', y='reviews')\n",
142 |     "plt.show()"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {
149 |     "collapsed": true
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "#compute regression\n",
154 |     "import statsmodels.formula.api as smf\n",
155 |     "\n",
156 |     "# create a fitted model in one line\n",
157 |     "lm = smf.ols(formula='rating ~ reviews', data=dfr2).fit()\n",
158 |     "\n",
159 |     "# print the coefficients\n",
160 |     "lm.params"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "metadata": {
167 |     "collapsed": true
168 |    },
169 |    "outputs": [],
170 |    "source": [
171 |     "#min, max observed values\n",
172 |     "X_new = pd.DataFrame({'reviews': [dfr2.reviews.min(), dfr2.reviews.max()]})\n",
173 |     "X_new.head()"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": null,
179 |    "metadata": {
180 |     "collapsed": true
181 |    },
182 |    "outputs": [],
183 |    "source": [
184 |     "#make corresponding predictions\n",
185 |     "preds = lm.predict(X_new)\n",
186 |     "preds"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {
193 |     "collapsed": true
194 |    },
195 |    "outputs": [],
196 |    "source": [
197 |     "# first, plot the observed data\n",
198 |     "dfr2.plot(kind='scatter', x='reviews', y='rating')\n",
199 |     "\n",
200 |     "# then, plot the least squares line\n",
201 |     "plt.plot(X_new, preds, c='red', linewidth=2)\n",
202 |     "plt.show()"
203 |    ]
204 |   }
205 |  ],
206 |  "metadata": {
207 |   "kernelspec": {
208 |    "display_name": "Python 3",
209 |    "language": "python",
210 |    "name": "python3"
211 |   },
212 |   "language_info": {
213 |    "codemirror_mode": {
214 |     "name": "ipython",
215 |     "version": 3
216 |    },
217 |    "file_extension": ".py",
218 |    "mimetype": "text/x-python",
219 |    "name": "python",
220 |    "nbconvert_exporter": "python",
221 |    "pygments_lexer": "ipython3",
222 |    "version": "3.6.0"
223 |   }
224 |  },
225 |  "nbformat": 4,
226 |  "nbformat_minor": 2
227 | }
228 | 


--------------------------------------------------------------------------------
/Chapter08/files/reviews.csv:
--------------------------------------------------------------------------------
 1 | funny,user_id,review_id,business_id,stars,date,useful,type,cool
 2 | 0,KpkOkG6RIf4Ra25Lhhxf1A,NxL8SIC5yqOdnlXCg18IBg,2aFiy99vNLklCx3T_tGS9A,5,2011-10-10,0,review,0
 3 | 0,bQ7fQq1otn9hKX-gXRsrgA,pXbbIgOXvLuTi_SPs1hQEQ,2aFiy99vNLklCx3T_tGS9A,5,2010-12-29,1,review,0
 4 | 0,r1NUhdNmL6yU9Bn-Yx6FTw,wslW2Lu4NYylb1jEapAGsw,2aFiy99vNLklCx3T_tGS9A,5,2011-04-29,0,review,0
 5 | 0,aW3ix1KNZAvoM8q-WghA3Q,GP6YEearUWrzPtQYSF1vVg,2LfIuF3_sX6uwe-IR-P0jQ,5,2014-07-14,0,review,1
 6 | 0,YOo-Cip8HqvKp_p9nEGphw,25RlYGq2s5qShi-pn3ufVA,2LfIuF3_sX6uwe-IR-P0jQ,4,2014-01-15,0,review,0
 7 | 0,bgl3j8yJcRO-00NkUYsXGQ,Uf1Ki1yyH_JDKhLvn2e4FQ,2LfIuF3_sX6uwe-IR-P0jQ,5,2013-04-28,2,review,1
 8 | 0,CWKF9de-nskLYEqDDCfubg,oFmVZh-La7SuvpHrH_Al4Q,2LfIuF3_sX6uwe-IR-P0jQ,4,2014-10-12,0,review,0
 9 | 0,GJ7PTY7huYORFKKg3db3Gw,bRvdVt88MJ_YMTlLbjDLxQ,2LfIuF3_sX6uwe-IR-P0jQ,5,2012-09-18,2,review,0
10 | 0,rxqp9eXZj1jYTn0UIsm3Hg,zNUSxqflZKgKD1NQH3jdFA,2LfIuF3_sX6uwe-IR-P0jQ,5,2015-10-11,0,review,0
11 | 0,UU0nHQtHPMAfLidk8tOHTg,LkP1l7sZIwOV6IKNLqQp_A,2LfIuF3_sX6uwe-IR-P0jQ,5,2015-04-05,0,review,0
12 | 0,A_Hyfk3FcwFVIk1CQC7z7w,MvvT0BtQH9wq7K-pPgkoEQ,2LfIuF3_sX6uwe-IR-P0jQ,1,2014-07-08,1,review,0
13 | 1,OvD92wp0-uuFoGLBymwfKQ,dJguzIfNKtGkKk8ryOPdfQ,2LfIuF3_sX6uwe-IR-P0jQ,5,2014-08-23,1,review,1
14 | 0,5NDk-q5mv8PIDvz83HwMVg,l-yhe0IFJ7WTgEk0qD8yqA,2LfIuF3_sX6uwe-IR-P0jQ,4,2015-01-13,0,review,0
15 | 0,AziQIgYIAY6uVw1k8sbtTw,cNA3oGTmmeRvt10vrvlJOQ,2LfIuF3_sX6uwe-IR-P0jQ,5,2014-08-05,1,review,0
16 | 0,A65IYKs3FwsyyJH20XOk2w,S_Z5aqlpTVkR9RmYvcPpTQ,2LfIuF3_sX6uwe-IR-P0jQ,5,2014-12-22,0,review,1
17 | 3,wnzfuir72IZFg5RAPOwWCQ,klVqdjXPM2aM8Y0RsDCPoA,2LfIuF3_sX6uwe-IR-P0jQ,1,2013-11-17,4,review,2
18 | 0,2N9wrn5A37aOXDrBPIYQdA,Lulhm9mWWcgndcuc_zsgPA,2LfIuF3_sX6uwe-IR-P0jQ,4,2013-07-10,0,review,0
19 | 0,oKZiDLm0D1PIm1-vRtvONA,k4xighfFxKv5H-Eic28MNQ,2LfIuF3_sX6uwe-IR-P0jQ,5,2014-08-23,0,review,0
20 | 0,XMRkVYpQAZpWOpia5-MoPw,U617X1icAyAC5hbjsnhcpA,0czfEgv9KAD4VlIa7ANPWQ,1,2010-02-16,1,review,0
21 | 2,jhhHm3Vk9ZlP21WdY_5R0w,_a7Zu2ZSEGO4bl2gvu7OtQ,0czfEgv9KAD4VlIa7ANPWQ,5,2009-04-10,2,review,1
22 | 0,9Cmmt_34PxuC84P1AgmetQ,5hU8Uxdem1j_PEr4HFs5HQ,0czfEgv9KAD4VlIa7ANPWQ,5,2009-01-30,0,review,0
23 | 5,B7ecAeAIrXg7sgmabS38pg,RAzbLYbgw3tNv8Tu0R7qmQ,0czfEgv9KAD4VlIa7ANPWQ,3,2007-12-15,5,review,4
24 | 1,mdVHZbSl97wUUoE6hdxeLg,0-emituHrxFvtYJZVhr2tA,0czfEgv9KAD4VlIa7ANPWQ,3,2009-09-07,3,review,2
25 | 4,je5k8a3qIOM0VJE5MaxsfQ,dfL5TyVib8uZgxaNMwx7Aw,0czfEgv9KAD4VlIa7ANPWQ,2,2009-02-01,3,review,1
26 | 1,TryfbZaaGtedncOMq9k86g,z5OD5c-nu2v1xwfG0mAIiA,0czfEgv9KAD4VlIa7ANPWQ,4,2007-03-04,2,review,0
27 | 0,yt1VnahG-MRgU6tswWaUxA,fA2NFb69SvDCsB7WSpY2rw,0czfEgv9KAD4VlIa7ANPWQ,2,2010-03-01,2,review,0
28 | 1,wJ0mbYw5WoiaKxiNEayZvg,OiB7X9UxftB0WAWgd2KdJQ,0czfEgv9KAD4VlIa7ANPWQ,3,2010-05-14,1,review,1
29 | 0,29rUYREQxeFE6LWGnirgew,ezuT8BpvhLITCfbQEWddsw,0czfEgv9KAD4VlIa7ANPWQ,2,2011-01-14,1,review,0
30 | 1,zOU38EUHgUtEI4ziZ8DB1w,IXQgVQJUQoJ6sKxV7-cREQ,7GI_V9oLCUGdn2ogqB0IBg,5,2011-10-19,1,review,0
31 | 0,hMZ3_yBErMi-pFcquyLJWg,ojtDaz6oAxS74VnAAjb4NQ,7GI_V9oLCUGdn2ogqB0IBg,5,2014-04-26,0,review,0
32 | 1,9sdni8QHrai8l7ikHsV5Jg,-6wjE08Pf8oWrZQQocEgHg,7GI_V9oLCUGdn2ogqB0IBg,1,2014-04-23,0,review,0
33 | 0,PmgqNO0-5Y3e3UoR61TD7w,6-hKBi-6RC3g7Mft0c-6qw,4P-vTvE6cncJyUyLh73pxw,4,2014-03-29,0,review,0
34 | 0,kXUySHSlRgVrcR4Aa0HtGQ,tRd0-mPa9O1TMJp_dw5khQ,4P-vTvE6cncJyUyLh73pxw,4,2014-08-14,0,review,0
35 | 0,SYKPwRhnlKrW6yTvm7oPWg,8Mu56iQ-MYEyivqUVss0XA,4P-vTvE6cncJyUyLh73pxw,4,2012-10-20,1,review,0
36 | 0,7LCG3o2KW2jgKgbKN0DQOg,ToC77cIEiMas9CPU7dt_fA,4P-vTvE6cncJyUyLh73pxw,5,2012-08-25,2,review,1
37 | 0,iSdSNh1hjdE33LOwrFnFrg,6YC4o9yLc25DK8c6soOlaw,4P-vTvE6cncJyUyLh73pxw,4,2015-07-27,0,review,0
38 | 0,catpxxLS6OF5cfjxLvpAbA,T5Xa-KKFqgXdbFnATZA4gg,4P-vTvE6cncJyUyLh73pxw,5,2014-02-24,0,review,1
39 | 0,Zmp1Q6Ul9VH3zL02Z5ls_A,JYyKOtLznozAlT8P1Foaow,4P-vTvE6cncJyUyLh73pxw,2,2013-03-16,0,review,0
40 | 0,vzKgTUCV5Pz0gldaeM5j3g,4FnZR30PtlEb1Ifi7S65bg,4P-vTvE6cncJyUyLh73pxw,3,2015-09-05,0,review,0
41 | 7,i8hCMZN-0bHENsHZKHpC-g,6GBjCBVtPGsnQ67neAAkjw,4P-vTvE6cncJyUyLh73pxw,4,2011-05-25,7,review,6
42 | 0,OdTvOw8NKzaCcsj_dnRZSQ,CdBHQT3WuYuQf-VQ1Um8tQ,4P-vTvE6cncJyUyLh73pxw,3,2013-02-12,0,review,0
43 | 0,W4cfK8SxPloWtIXl3JST2g,FFTN8Y1U9G2F_p1dQnxO7A,4P-vTvE6cncJyUyLh73pxw,4,2016-05-11,0,review,1
44 | 0,n5P4Cw7F1pCYwVutSTqUPg,VlDz03s9VyODcVi1S9-Yfw,4P-vTvE6cncJyUyLh73pxw,3,2016-05-04,0,review,0
45 | 0,ub50UE95-gu7yIoed1zssA,HbnWR7vaXD5FLCcrrMAGhg,4P-vTvE6cncJyUyLh73pxw,1,2016-01-23,0,review,0
46 | 0,57w8gLY3f7OwGmWNtUIFIA,YGfi1F_Fuc_OyemfqQFspg,4P-vTvE6cncJyUyLh73pxw,1,2016-03-17,1,review,0
47 | 0,RlpkcJqctkKXl-LO1IAtig,cTNeflcioISILyhQpt0HDA,4P-vTvE6cncJyUyLh73pxw,3,2016-08-01,0,review,0
48 | 0,m5iFZbW5hSNPNplx-vGkyA,FM3Uo0F_2BHp1JdxR5nqvQ,4P-vTvE6cncJyUyLh73pxw,4,2013-06-12,1,review,1
49 | 2,1-xPzJk_ijBvY2J8Re1DIQ,Pqe0NlNUcxzEnUXALZviuA,4P-vTvE6cncJyUyLh73pxw,3,2011-04-01,0,review,0
50 | 0,oPQAJEiJnRrD0Km5RlKmtw,vAuh2cggRJI1ZUqiZDEjHQ,4P-vTvE6cncJyUyLh73pxw,5,2016-08-14,0,review,0
51 | 1,bZBPVRIGQZ0WXRYLtYXbYQ,sKStdKdvO5nIn1ZN0OC-cg,4P-vTvE6cncJyUyLh73pxw,2,2013-04-01,2,review,1
52 | 0,fflaErtSIUWvHe3q0I3JAA,uVdnYmlXio7BqtfmZZU-0w,4P-vTvE6cncJyUyLh73pxw,2,2013-01-23,0,review,0
53 | 0,BRO98wNeY2Q_BZX8G-iVvg,f0l_4MrqYjVCOZq_PowquQ,4P-vTvE6cncJyUyLh73pxw,4,2012-05-01,1,review,0
54 | 0,qn7SAaO9Jwl2ls5c9Og2yA,QfnIyQ5wYk2NDWSLYkSbgQ,4P-vTvE6cncJyUyLh73pxw,4,2012-03-01,0,review,1
55 | 0,7phnWalwWR0LcV-4xGGFgg,QVS_4D3lLpilcljTSzKtcw,4P-vTvE6cncJyUyLh73pxw,2,2015-11-21,0,review,0
56 | 0,C0jquh-km5UnawqDqSQpBw,-K5z7DzXHJgEC1tsTLfFeA,4uiijOUDzc-DeIb2XcKW_A,3,2009-09-15,2,review,0
57 | 1,cPifBB7Qbjs9PntPGOY9iQ,2tjghSImOPf4A9L4zhByRQ,4uiijOUDzc-DeIb2XcKW_A,3,2010-11-25,1,review,1
58 | 0,pgTz-Ds6WvS8qFOsRekG9A,fCVQlHk6x7-S2FWWMbOWpA,4uiijOUDzc-DeIb2XcKW_A,2,2011-01-13,1,review,0
59 | 1,GDeoUHALgyqK13ewN92Jnw,N42b2u6YSL5iEjN6NnrKeQ,4uiijOUDzc-DeIb2XcKW_A,4,2010-09-06,2,review,2
60 | 


--------------------------------------------------------------------------------
/Chapter09/1 naive bayes r.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#install.packages(\"e1071\", repos=\"http://cran.r-project.org\")\n",
 10 |     "library(e1071)\n",
 11 |     "library(caret)\n",
 12 |     "library(\"neuralnet\")\n",
 13 |     "set.seed(7317)\n",
 14 |     "data(iris)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 5,
 20 |    "metadata": {},
 21 |    "outputs": [
 22 |     {
 23 |      "data": {
 24 |       "text/html": [
 25 |        "114"
 26 |       ],
 27 |       "text/latex": [
 28 |        "114"
 29 |       ],
 30 |       "text/markdown": [
 31 |        "114"
 32 |       ],
 33 |       "text/plain": [
 34 |        "[1] 114"
 35 |       ]
 36 |      },
 37 |      "metadata": {},
 38 |      "output_type": "display_data"
 39 |     },
 40 |     {
 41 |      "data": {
 42 |       "text/html": [
 43 |        "36"
 44 |       ],
 45 |       "text/latex": [
 46 |        "36"
 47 |       ],
 48 |       "text/markdown": [
 49 |        "36"
 50 |       ],
 51 |       "text/plain": [
 52 |        "[1] 36"
 53 |       ]
 54 |      },
 55 |      "metadata": {},
 56 |      "output_type": "display_data"
 57 |     }
 58 |    ],
 59 |    "source": [
 60 |     "trainingIndices <- createDataPartition(iris$Species, p=0.75, list=FALSE)\n",
 61 |     "training <- iris[trainingIndices,]\n",
 62 |     "testing <- iris[-trainingIndices,]\n",
 63 |     "nrow(training)\n",
 64 |     "nrow(testing)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 6,
 70 |    "metadata": {},
 71 |    "outputs": [
 72 |     {
 73 |      "data": {
 74 |       "text/plain": [
 75 |        "\n",
 76 |        "Naive Bayes Classifier for Discrete Predictors\n",
 77 |        "\n",
 78 |        "Call:\n",
 79 |        "naiveBayes.default(x = X, y = Y, laplace = laplace)\n",
 80 |        "\n",
 81 |        "A-priori probabilities:\n",
 82 |        "Y\n",
 83 |        "    setosa versicolor  virginica \n",
 84 |        " 0.3333333  0.3333333  0.3333333 \n",
 85 |        "\n",
 86 |        "Conditional probabilities:\n",
 87 |        "            Sepal.Length\n",
 88 |        "Y                [,1]      [,2]\n",
 89 |        "  setosa     4.942105 0.3293167\n",
 90 |        "  versicolor 5.950000 0.5371345\n",
 91 |        "  virginica  6.602632 0.6598472\n",
 92 |        "\n",
 93 |        "            Sepal.Width\n",
 94 |        "Y                [,1]      [,2]\n",
 95 |        "  setosa     3.371053 0.3805458\n",
 96 |        "  versicolor 2.750000 0.2966024\n",
 97 |        "  virginica  2.921053 0.2839470\n",
 98 |        "\n",
 99 |        "            Petal.Length\n",
100 |        "Y                [,1]      [,2]\n",
101 |        "  setosa     1.468421 0.1662061\n",
102 |        "  versicolor 4.278947 0.4899850\n",
103 |        "  virginica  5.544737 0.5597767\n",
104 |        "\n",
105 |        "            Petal.Width\n",
106 |        "Y                 [,1]      [,2]\n",
107 |        "  setosa     0.2421053 0.1106042\n",
108 |        "  versicolor 1.3368421 0.1937134\n",
109 |        "  virginica  2.0000000 0.2438431\n"
110 |       ]
111 |      },
112 |      "metadata": {},
113 |      "output_type": "display_data"
114 |     }
115 |    ],
116 |    "source": [
117 |     "model <- naiveBayes(Species ~ ., data=training)\n",
118 |     "model"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 7,
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "text/html": [
129 |        "<ol class=list-inline>\n",
130 |        "\t<li>setosa</li>\n",
131 |        "\t<li>setosa</li>\n",
132 |        "\t<li>setosa</li>\n",
133 |        "\t<li>setosa</li>\n",
134 |        "\t<li>setosa</li>\n",
135 |        "\t<li>setosa</li>\n",
136 |        "\t<li>setosa</li>\n",
137 |        "\t<li>setosa</li>\n",
138 |        "\t<li>setosa</li>\n",
139 |        "\t<li>setosa</li>\n",
140 |        "\t<li>setosa</li>\n",
141 |        "\t<li>setosa</li>\n",
142 |        "\t<li>versicolor</li>\n",
143 |        "\t<li>versicolor</li>\n",
144 |        "\t<li>versicolor</li>\n",
145 |        "\t<li>versicolor</li>\n",
146 |        "\t<li>versicolor</li>\n",
147 |        "\t<li>versicolor</li>\n",
148 |        "\t<li>versicolor</li>\n",
149 |        "\t<li>versicolor</li>\n",
150 |        "\t<li>versicolor</li>\n",
151 |        "\t<li>versicolor</li>\n",
152 |        "\t<li>versicolor</li>\n",
153 |        "\t<li>versicolor</li>\n",
154 |        "\t<li>virginica</li>\n",
155 |        "\t<li>virginica</li>\n",
156 |        "\t<li>virginica</li>\n",
157 |        "\t<li>virginica</li>\n",
158 |        "\t<li>virginica</li>\n",
159 |        "\t<li>virginica</li>\n",
160 |        "\t<li>versicolor</li>\n",
161 |        "\t<li>virginica</li>\n",
162 |        "\t<li>virginica</li>\n",
163 |        "\t<li>virginica</li>\n",
164 |        "\t<li>virginica</li>\n",
165 |        "\t<li>virginica</li>\n",
166 |        "</ol>\n"
167 |       ],
168 |       "text/latex": [
169 |        "\\begin{enumerate*}\n",
170 |        "\\item setosa\n",
171 |        "\\item setosa\n",
172 |        "\\item setosa\n",
173 |        "\\item setosa\n",
174 |        "\\item setosa\n",
175 |        "\\item setosa\n",
176 |        "\\item setosa\n",
177 |        "\\item setosa\n",
178 |        "\\item setosa\n",
179 |        "\\item setosa\n",
180 |        "\\item setosa\n",
181 |        "\\item setosa\n",
182 |        "\\item versicolor\n",
183 |        "\\item versicolor\n",
184 |        "\\item versicolor\n",
185 |        "\\item versicolor\n",
186 |        "\\item versicolor\n",
187 |        "\\item versicolor\n",
188 |        "\\item versicolor\n",
189 |        "\\item versicolor\n",
190 |        "\\item versicolor\n",
191 |        "\\item versicolor\n",
192 |        "\\item versicolor\n",
193 |        "\\item versicolor\n",
194 |        "\\item virginica\n",
195 |        "\\item virginica\n",
196 |        "\\item virginica\n",
197 |        "\\item virginica\n",
198 |        "\\item virginica\n",
199 |        "\\item virginica\n",
200 |        "\\item versicolor\n",
201 |        "\\item virginica\n",
202 |        "\\item virginica\n",
203 |        "\\item virginica\n",
204 |        "\\item virginica\n",
205 |        "\\item virginica\n",
206 |        "\\end{enumerate*}\n"
207 |       ],
208 |       "text/markdown": [
209 |        "1. setosa\n",
210 |        "2. setosa\n",
211 |        "3. setosa\n",
212 |        "4. setosa\n",
213 |        "5. setosa\n",
214 |        "6. setosa\n",
215 |        "7. setosa\n",
216 |        "8. setosa\n",
217 |        "9. setosa\n",
218 |        "10. setosa\n",
219 |        "11. setosa\n",
220 |        "12. setosa\n",
221 |        "13. versicolor\n",
222 |        "14. versicolor\n",
223 |        "15. versicolor\n",
224 |        "16. versicolor\n",
225 |        "17. versicolor\n",
226 |        "18. versicolor\n",
227 |        "19. versicolor\n",
228 |        "20. versicolor\n",
229 |        "21. versicolor\n",
230 |        "22. versicolor\n",
231 |        "23. versicolor\n",
232 |        "24. versicolor\n",
233 |        "25. virginica\n",
234 |        "26. virginica\n",
235 |        "27. virginica\n",
236 |        "28. virginica\n",
237 |        "29. virginica\n",
238 |        "30. virginica\n",
239 |        "31. versicolor\n",
240 |        "32. virginica\n",
241 |        "33. virginica\n",
242 |        "34. virginica\n",
243 |        "35. virginica\n",
244 |        "36. virginica\n",
245 |        "\n",
246 |        "\n"
247 |       ],
248 |       "text/plain": [
249 |        " [1] setosa     setosa     setosa     setosa     setosa     setosa    \n",
250 |        " [7] setosa     setosa     setosa     setosa     setosa     setosa    \n",
251 |        "[13] versicolor versicolor versicolor versicolor versicolor versicolor\n",
252 |        "[19] versicolor versicolor versicolor versicolor versicolor versicolor\n",
253 |        "[25] virginica  virginica  virginica  virginica  virginica  virginica \n",
254 |        "[31] versicolor virginica  virginica  virginica  virginica  virginica \n",
255 |        "Levels: setosa versicolor virginica"
256 |       ]
257 |      },
258 |      "metadata": {},
259 |      "output_type": "display_data"
260 |     }
261 |    ],
262 |    "source": [
263 |     "prediction <- predict(model, testing, type=\"class\")\n",
264 |     "prediction"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": 8,
270 |    "metadata": {},
271 |    "outputs": [
272 |     {
273 |      "data": {
274 |       "text/html": [
275 |        "<table>\n",
276 |        "<thead><tr><th scope=col>testing.Species</th><th scope=col>prediction</th></tr></thead>\n",
277 |        "<tbody>\n",
278 |        "\t<tr><td>setosa</td><td>setosa</td></tr>\n",
279 |        "\t<tr><td>setosa</td><td>setosa</td></tr>\n",
280 |        "\t<tr><td>setosa</td><td>setosa</td></tr>\n",
281 |        "\t<tr><td>setosa</td><td>setosa</td></tr>\n",
282 |        "\t<tr><td>setosa</td><td>setosa</td></tr>\n",
283 |        "\t<tr><td>setosa</td><td>setosa</td></tr>\n",
284 |        "</tbody>\n",
285 |        "</table>\n"
286 |       ],
287 |       "text/latex": [
288 |        "\\begin{tabular}{r|ll}\n",
289 |        " testing.Species & prediction\\\\\n",
290 |        "\\hline\n",
291 |        "\t setosa & setosa\\\\\n",
292 |        "\t setosa & setosa\\\\\n",
293 |        "\t setosa & setosa\\\\\n",
294 |        "\t setosa & setosa\\\\\n",
295 |        "\t setosa & setosa\\\\\n",
296 |        "\t setosa & setosa\\\\\n",
297 |        "\\end{tabular}\n"
298 |       ],
299 |       "text/markdown": [
300 |        "\n",
301 |        "testing.Species | prediction | \n",
302 |        "|---|---|---|---|---|---|\n",
303 |        "| setosa | setosa | \n",
304 |        "| setosa | setosa | \n",
305 |        "| setosa | setosa | \n",
306 |        "| setosa | setosa | \n",
307 |        "| setosa | setosa | \n",
308 |        "| setosa | setosa | \n",
309 |        "\n",
310 |        "\n"
311 |       ],
312 |       "text/plain": [
313 |        "  testing.Species prediction\n",
314 |        "1 setosa          setosa    \n",
315 |        "2 setosa          setosa    \n",
316 |        "3 setosa          setosa    \n",
317 |        "4 setosa          setosa    \n",
318 |        "5 setosa          setosa    \n",
319 |        "6 setosa          setosa    "
320 |       ]
321 |      },
322 |      "metadata": {},
323 |      "output_type": "display_data"
324 |     }
325 |    ],
326 |    "source": [
327 |     "results <- data.frame(testing$Species, prediction)\n",
328 |     "head(results)"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "code",
333 |    "execution_count": 9,
334 |    "metadata": {},
335 |    "outputs": [
336 |     {
337 |      "data": {
338 |       "text/html": [
339 |        "<table>\n",
340 |        "<thead><tr><th scope=col>testing.Species</th><th scope=col>prediction</th><th scope=col>accurate</th></tr></thead>\n",
341 |        "<tbody>\n",
342 |        "\t<tr><td>setosa</td><td>setosa</td><td>TRUE  </td></tr>\n",
343 |        "\t<tr><td>setosa</td><td>setosa</td><td>TRUE  </td></tr>\n",
344 |        "\t<tr><td>setosa</td><td>setosa</td><td>TRUE  </td></tr>\n",
345 |        "\t<tr><td>setosa</td><td>setosa</td><td>TRUE  </td></tr>\n",
346 |        "\t<tr><td>setosa</td><td>setosa</td><td>TRUE  </td></tr>\n",
347 |        "\t<tr><td>setosa</td><td>setosa</td><td>TRUE  </td></tr>\n",
348 |        "</tbody>\n",
349 |        "</table>\n"
350 |       ],
351 |       "text/latex": [
352 |        "\\begin{tabular}{r|lll}\n",
353 |        " testing.Species & prediction & accurate\\\\\n",
354 |        "\\hline\n",
355 |        "\t setosa & setosa & TRUE  \\\\\n",
356 |        "\t setosa & setosa & TRUE  \\\\\n",
357 |        "\t setosa & setosa & TRUE  \\\\\n",
358 |        "\t setosa & setosa & TRUE  \\\\\n",
359 |        "\t setosa & setosa & TRUE  \\\\\n",
360 |        "\t setosa & setosa & TRUE  \\\\\n",
361 |        "\\end{tabular}\n"
362 |       ],
363 |       "text/markdown": [
364 |        "\n",
365 |        "testing.Species | prediction | accurate | \n",
366 |        "|---|---|---|---|---|---|\n",
367 |        "| setosa | setosa | TRUE   | \n",
368 |        "| setosa | setosa | TRUE   | \n",
369 |        "| setosa | setosa | TRUE   | \n",
370 |        "| setosa | setosa | TRUE   | \n",
371 |        "| setosa | setosa | TRUE   | \n",
372 |        "| setosa | setosa | TRUE   | \n",
373 |        "\n",
374 |        "\n"
375 |       ],
376 |       "text/plain": [
377 |        "  testing.Species prediction accurate\n",
378 |        "1 setosa          setosa     TRUE    \n",
379 |        "2 setosa          setosa     TRUE    \n",
380 |        "3 setosa          setosa     TRUE    \n",
381 |        "4 setosa          setosa     TRUE    \n",
382 |        "5 setosa          setosa     TRUE    \n",
383 |        "6 setosa          setosa     TRUE    "
384 |       ]
385 |      },
386 |      "metadata": {},
387 |      "output_type": "display_data"
388 |     }
389 |    ],
390 |    "source": [
391 |     "results[\"accurate\"] <- results['testing.Species'] == results['prediction']\n",
392 |     "head(results)"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "code",
397 |    "execution_count": 10,
398 |    "metadata": {},
399 |    "outputs": [
400 |     {
401 |      "data": {
402 |       "text/html": [
403 |        "36"
404 |       ],
405 |       "text/latex": [
406 |        "36"
407 |       ],
408 |       "text/markdown": [
409 |        "36"
410 |       ],
411 |       "text/plain": [
412 |        "[1] 36"
413 |       ]
414 |      },
415 |      "metadata": {},
416 |      "output_type": "display_data"
417 |     },
418 |     {
419 |      "data": {
420 |       "text/html": [
421 |        "35"
422 |       ],
423 |       "text/latex": [
424 |        "35"
425 |       ],
426 |       "text/markdown": [
427 |        "35"
428 |       ],
429 |       "text/plain": [
430 |        "[1] 35"
431 |       ]
432 |      },
433 |      "metadata": {},
434 |      "output_type": "display_data"
435 |     }
436 |    ],
437 |    "source": [
438 |     "nrow(results)\n",
439 |     "nrow(results[results$accurate == TRUE,])"
440 |    ]
441 |   }
442 |  ],
443 |  "metadata": {
444 |   "kernelspec": {
445 |    "display_name": "R",
446 |    "language": "R",
447 |    "name": "ir"
448 |   },
449 |   "language_info": {
450 |    "codemirror_mode": "r",
451 |    "file_extension": ".r",
452 |    "mimetype": "text/x-r-source",
453 |    "name": "R",
454 |    "pygments_lexer": "r",
455 |    "version": "3.4.1"
456 |   }
457 |  },
458 |  "nbformat": 4,
459 |  "nbformat_minor": 2
460 | }
461 | 


--------------------------------------------------------------------------------
/Chapter09/2 naive bayes python.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 19,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "import random\n",
12 |     "random.seed(9001)"
13 |    ]
14 |   },
15 |   {
16 |    "cell_type": "code",
17 |    "execution_count": 20,
18 |    "metadata": {},
19 |    "outputs": [
20 |     {
21 |      "data": {
22 |       "text/plain": [
23 |        "(150, 4)"
24 |       ]
25 |      },
26 |      "execution_count": 20,
27 |      "metadata": {},
28 |      "output_type": "execute_result"
29 |     }
30 |    ],
31 |    "source": [
32 |     "from sklearn import datasets\n",
33 |     "irisb = datasets.load_iris() #numpy.ndarray\n",
34 |     "iris = irisb['data']\n",
35 |     "iris.shape"
36 |    ]
37 |   },
38 |   {
39 |    "cell_type": "code",
40 |    "execution_count": 22,
41 |    "metadata": {},
42 |    "outputs": [
43 |     {
44 |      "name": "stdout",
45 |      "output_type": "stream",
46 |      "text": [
47 |       "Number of mislabeled points out of a total 150 points : 6\n"
48 |      ]
49 |     }
50 |    ],
51 |    "source": [
52 |     "from sklearn.naive_bayes import GaussianNB\n",
53 |     "gnb = GaussianNB()\n",
54 |     "y_pred = gnb.fit(irisb.data, irisb.target).predict(irisb.data)\n",
55 |     "print(\"Number of mislabeled points out of a total %d points : %d\" \n",
56 |     "      % (irisb.data.shape[0],(irisb.target != y_pred).sum()))"
57 |    ]
58 |   }
59 |  ],
60 |  "metadata": {
61 |   "kernelspec": {
62 |    "display_name": "Python 3",
63 |    "language": "python",
64 |    "name": "python3"
65 |   },
66 |   "language_info": {
67 |    "codemirror_mode": {
68 |     "name": "ipython",
69 |     "version": 3
70 |    },
71 |    "file_extension": ".py",
72 |    "mimetype": "text/x-python",
73 |    "name": "python",
74 |    "nbconvert_exporter": "python",
75 |    "pygments_lexer": "ipython3",
76 |    "version": "3.6.1"
77 |   }
78 |  },
79 |  "nbformat": 4,
80 |  "nbformat_minor": 2
81 | }
82 | 


--------------------------------------------------------------------------------
/Chapter09/3 nearest neighbor.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "scrolled": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "housing <- read.table(\"http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data\")\n",
 12 |     "colnames(housing) <- c(\"CRIM\", \"ZN\", \"INDUS\", \"CHAS\", \"NOX\", \"RM\", \"AGE\", \"DIS\", \"RAD\", \"TAX\", \"PRATIO\",\n",
 13 |     "                      \"B\", \"LSTAT\", \"MDEV\")\n",
 14 |     "summary(housing)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "housing <- housing[order(housing$MDEV),]\n",
 24 |     "#install.packages(\"caret\")\n",
 25 |     "library(caret)\n",
 26 |     "set.seed(5557)\n",
 27 |     "indices <- createDataPartition(housing$MDEV, p=0.75, list=FALSE)\n",
 28 |     "training <- housing[indices,]\n",
 29 |     "testing <- housing[-indices,]\n",
 30 |     "nrow(training)\n",
 31 |     "nrow(testing)\n",
 32 |     "testing$MDEV"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {
 39 |     "scrolled": true
 40 |    },
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "library(class)\n",
 44 |     "knnModel <- knn(train=training, test=testing, cl=training$MDEV)\n",
 45 |     "knnModel"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "plot(knnModel)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "plot(testing$MDEV)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {
 70 |     "scrolled": true
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "predicted <- read.table(\"housing-knn-predicted.csv\")\n",
 75 |     "colnames(predicted) <- c(\"predicted\")\n",
 76 |     "predicted"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "results <- data.frame(testing$MDEV, predicted)\n",
 86 |     "head(results)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "results[\"accuracy\"] <- results['testing.MDEV'] / results['predicted']\n",
 96 |     "head(results)\n",
 97 |     "mean(results$accuracy)"
 98 |    ]
 99 |   }
100 |  ],
101 |  "metadata": {
102 |   "kernelspec": {
103 |    "display_name": "R",
104 |    "language": "R",
105 |    "name": "ir"
106 |   },
107 |   "language_info": {
108 |    "codemirror_mode": "r",
109 |    "file_extension": ".r",
110 |    "mimetype": "text/x-r-source",
111 |    "name": "R",
112 |    "pygments_lexer": "r",
113 |    "version": "3.4.1"
114 |   }
115 |  },
116 |  "nbformat": 4,
117 |  "nbformat_minor": 2
118 | }
119 | 


--------------------------------------------------------------------------------
/Chapter09/4 nearest neighbor py.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 58,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from sklearn.neighbors import NearestNeighbors\n",
 12 |     "import numpy as np\n",
 13 |     "import pandas as pd"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 59,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "data": {
 23 |       "text/html": [
 24 |        "<div>\n",
 25 |        "<style>\n",
 26 |        "    .dataframe thead tr:only-child th {\n",
 27 |        "        text-align: right;\n",
 28 |        "    }\n",
 29 |        "\n",
 30 |        "    .dataframe thead th {\n",
 31 |        "        text-align: left;\n",
 32 |        "    }\n",
 33 |        "\n",
 34 |        "    .dataframe tbody tr th {\n",
 35 |        "        vertical-align: top;\n",
 36 |        "    }\n",
 37 |        "</style>\n",
 38 |        "<table border=\"1\" class=\"dataframe\">\n",
 39 |        "  <thead>\n",
 40 |        "    <tr style=\"text-align: right;\">\n",
 41 |        "      <th></th>\n",
 42 |        "      <th>CRIM</th>\n",
 43 |        "      <th>ZN</th>\n",
 44 |        "      <th>INDUS</th>\n",
 45 |        "      <th>CHAS</th>\n",
 46 |        "      <th>NOX</th>\n",
 47 |        "      <th>RM</th>\n",
 48 |        "      <th>AGE</th>\n",
 49 |        "      <th>DIS</th>\n",
 50 |        "      <th>RAD</th>\n",
 51 |        "      <th>TAX</th>\n",
 52 |        "      <th>PRATIO</th>\n",
 53 |        "      <th>B</th>\n",
 54 |        "      <th>LSTAT</th>\n",
 55 |        "      <th>MDEV</th>\n",
 56 |        "    </tr>\n",
 57 |        "  </thead>\n",
 58 |        "  <tbody>\n",
 59 |        "    <tr>\n",
 60 |        "      <th>0</th>\n",
 61 |        "      <td>0.00632</td>\n",
 62 |        "      <td>18.0</td>\n",
 63 |        "      <td>2.31</td>\n",
 64 |        "      <td>0</td>\n",
 65 |        "      <td>0.538</td>\n",
 66 |        "      <td>6.575</td>\n",
 67 |        "      <td>65.2</td>\n",
 68 |        "      <td>4.0900</td>\n",
 69 |        "      <td>1</td>\n",
 70 |        "      <td>296.0</td>\n",
 71 |        "      <td>15.3</td>\n",
 72 |        "      <td>396.90</td>\n",
 73 |        "      <td>4.98</td>\n",
 74 |        "      <td>24.0</td>\n",
 75 |        "    </tr>\n",
 76 |        "    <tr>\n",
 77 |        "      <th>1</th>\n",
 78 |        "      <td>0.02731</td>\n",
 79 |        "      <td>0.0</td>\n",
 80 |        "      <td>7.07</td>\n",
 81 |        "      <td>0</td>\n",
 82 |        "      <td>0.469</td>\n",
 83 |        "      <td>6.421</td>\n",
 84 |        "      <td>78.9</td>\n",
 85 |        "      <td>4.9671</td>\n",
 86 |        "      <td>2</td>\n",
 87 |        "      <td>242.0</td>\n",
 88 |        "      <td>17.8</td>\n",
 89 |        "      <td>396.90</td>\n",
 90 |        "      <td>9.14</td>\n",
 91 |        "      <td>21.6</td>\n",
 92 |        "    </tr>\n",
 93 |        "    <tr>\n",
 94 |        "      <th>2</th>\n",
 95 |        "      <td>0.02729</td>\n",
 96 |        "      <td>0.0</td>\n",
 97 |        "      <td>7.07</td>\n",
 98 |        "      <td>0</td>\n",
 99 |        "      <td>0.469</td>\n",
100 |        "      <td>7.185</td>\n",
101 |        "      <td>61.1</td>\n",
102 |        "      <td>4.9671</td>\n",
103 |        "      <td>2</td>\n",
104 |        "      <td>242.0</td>\n",
105 |        "      <td>17.8</td>\n",
106 |        "      <td>392.83</td>\n",
107 |        "      <td>4.03</td>\n",
108 |        "      <td>34.7</td>\n",
109 |        "    </tr>\n",
110 |        "    <tr>\n",
111 |        "      <th>3</th>\n",
112 |        "      <td>0.03237</td>\n",
113 |        "      <td>0.0</td>\n",
114 |        "      <td>2.18</td>\n",
115 |        "      <td>0</td>\n",
116 |        "      <td>0.458</td>\n",
117 |        "      <td>6.998</td>\n",
118 |        "      <td>45.8</td>\n",
119 |        "      <td>6.0622</td>\n",
120 |        "      <td>3</td>\n",
121 |        "      <td>222.0</td>\n",
122 |        "      <td>18.7</td>\n",
123 |        "      <td>394.63</td>\n",
124 |        "      <td>2.94</td>\n",
125 |        "      <td>33.4</td>\n",
126 |        "    </tr>\n",
127 |        "    <tr>\n",
128 |        "      <th>4</th>\n",
129 |        "      <td>0.06905</td>\n",
130 |        "      <td>0.0</td>\n",
131 |        "      <td>2.18</td>\n",
132 |        "      <td>0</td>\n",
133 |        "      <td>0.458</td>\n",
134 |        "      <td>7.147</td>\n",
135 |        "      <td>54.2</td>\n",
136 |        "      <td>6.0622</td>\n",
137 |        "      <td>3</td>\n",
138 |        "      <td>222.0</td>\n",
139 |        "      <td>18.7</td>\n",
140 |        "      <td>396.90</td>\n",
141 |        "      <td>5.33</td>\n",
142 |        "      <td>36.2</td>\n",
143 |        "    </tr>\n",
144 |        "  </tbody>\n",
145 |        "</table>\n",
146 |        "</div>"
147 |       ],
148 |       "text/plain": [
149 |        "      CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD    TAX  PRATIO  \\\n",
150 |        "0  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296.0    15.3   \n",
151 |        "1  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242.0    17.8   \n",
152 |        "2  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242.0    17.8   \n",
153 |        "3  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222.0    18.7   \n",
154 |        "4  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222.0    18.7   \n",
155 |        "\n",
156 |        "        B  LSTAT  MDEV  \n",
157 |        "0  396.90   4.98  24.0  \n",
158 |        "1  396.90   9.14  21.6  \n",
159 |        "2  392.83   4.03  34.7  \n",
160 |        "3  394.63   2.94  33.4  \n",
161 |        "4  396.90   5.33  36.2  "
162 |       ]
163 |      },
164 |      "execution_count": 59,
165 |      "metadata": {},
166 |      "output_type": "execute_result"
167 |     }
168 |    ],
169 |    "source": [
170 |     "housing = pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data\",\n",
171 |     "                     header=None, sep='\\s+')\n",
172 |     "housing.columns = [\"CRIM\", \"ZN\", \"INDUS\", \"CHAS\", \"NOX\", \"RM\", \"AGE\", \"DIS\", \"RAD\", \"TAX\", \"PRATIO\", \\\n",
173 |     "                   \"B\", \"LSTAT\", \"MDEV\"]\n",
174 |     "housing.head(5)"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 60,
180 |    "metadata": {},
181 |    "outputs": [
182 |     {
183 |      "data": {
184 |       "text/plain": [
185 |        "506"
186 |       ]
187 |      },
188 |      "execution_count": 60,
189 |      "metadata": {},
190 |      "output_type": "execute_result"
191 |     }
192 |    ],
193 |    "source": [
194 |     "len(housing)"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 61,
200 |    "metadata": {},
201 |    "outputs": [
202 |     {
203 |      "data": {
204 |       "text/plain": [
205 |        "417"
206 |       ]
207 |      },
208 |      "execution_count": 61,
209 |      "metadata": {},
210 |      "output_type": "execute_result"
211 |     }
212 |    ],
213 |    "source": [
214 |     "mask = np.random.rand(len(housing)) < 0.8\n",
215 |     "training = housing[mask]\n",
216 |     "testing = housing[~mask]\n",
217 |     "len(training)"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": 62,
223 |    "metadata": {},
224 |    "outputs": [
225 |     {
226 |      "data": {
227 |       "text/plain": [
228 |        "89"
229 |       ]
230 |      },
231 |      "execution_count": 62,
232 |      "metadata": {},
233 |      "output_type": "execute_result"
234 |     }
235 |    ],
236 |    "source": [
237 |     "len(testing)"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": 63,
243 |    "metadata": {
244 |     "collapsed": true
245 |    },
246 |    "outputs": [],
247 |    "source": [
248 |     "nbrs = NearestNeighbors().fit(housing)"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": 64,
254 |    "metadata": {},
255 |    "outputs": [
256 |     {
257 |      "data": {
258 |       "text/plain": [
259 |        "array([[  0, 241,  62,  81,   6],\n",
260 |        "       [  1,  47,  49,  87,   2],\n",
261 |        "       [  2,  85,  87,  84,   5],\n",
262 |        "       ..., \n",
263 |        "       [503, 504, 219,  88, 217],\n",
264 |        "       [504, 503, 219,  88, 217],\n",
265 |        "       [505, 502, 504, 503,  91]], dtype=int32)"
266 |       ]
267 |      },
268 |      "execution_count": 64,
269 |      "metadata": {},
270 |      "output_type": "execute_result"
271 |     }
272 |    ],
273 |    "source": [
274 |     "distances, indices = nbrs.kneighbors(housing)\n",
275 |     "indices                                           "
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 65,
281 |    "metadata": {},
282 |    "outputs": [
283 |     {
284 |      "data": {
285 |       "text/plain": [
286 |        "array([[  0.        ,  16.5628085 ,  17.09498324,  18.40127391,\n",
287 |        "         19.10555821],\n",
288 |        "       [  0.        ,  16.18433277,  20.59837827,  22.95753545,\n",
289 |        "         23.05885288],\n",
290 |        "       [  0.        ,  11.44014392,  15.34074743,  19.2322435 ,\n",
291 |        "         21.73264817],\n",
292 |        "       ..., \n",
293 |        "       [  0.        ,   4.38093898,   9.44318468,  10.79865973,\n",
294 |        "         11.95458848],\n",
295 |        "       [  0.        ,   4.38093898,   8.88725757,  10.88003717,\n",
296 |        "         11.15236419],\n",
297 |        "       [  0.        ,   9.69512304,  13.73766871,  15.93946676,\n",
298 |        "         15.94577477]])"
299 |       ]
300 |      },
301 |      "execution_count": 65,
302 |      "metadata": {},
303 |      "output_type": "execute_result"
304 |     }
305 |    ],
306 |    "source": [
307 |     "distances"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": 66,
313 |    "metadata": {},
314 |    "outputs": [
315 |     {
316 |      "data": {
317 |       "text/plain": [
318 |        "KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',\n",
319 |        "          metric_params=None, n_jobs=1, n_neighbors=5, p=2,\n",
320 |        "          weights='uniform')"
321 |       ]
322 |      },
323 |      "execution_count": 66,
324 |      "metadata": {},
325 |      "output_type": "execute_result"
326 |     }
327 |    ],
328 |    "source": [
329 |     "from sklearn.neighbors import KNeighborsRegressor\n",
330 |     "knn = KNeighborsRegressor(n_neighbors=5)\n",
331 |     "x_columns = [\"CRIM\", \"ZN\", \"INDUS\", \"CHAS\", \"NOX\", \"RM\", \"AGE\", \"DIS\", \"RAD\", \"TAX\", \"PRATIO\", \"B\", \"LSTAT\"]\n",
332 |     "y_column = [\"MDEV\"]\n",
333 |     "knn.fit(training[x_columns], training[y_column])"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": 67,
339 |    "metadata": {},
340 |    "outputs": [
341 |     {
342 |      "data": {
343 |       "text/plain": [
344 |        "array([[ 20.62],\n",
345 |        "       [ 21.18],\n",
346 |        "       [ 23.96],\n",
347 |        "       [ 17.14],\n",
348 |        "       [ 17.24],\n",
349 |        "       [ 18.68],\n",
350 |        "       [ 28.88],\n",
351 |        "       [ 37.54],\n",
352 |        "       [ 26.68],\n",
353 |        "       [ 39.02],\n",
354 |        "       [ 25.96],\n",
355 |        "       [ 21.9 ],\n",
356 |        "       [ 21.9 ],\n",
357 |        "       [ 25.42],\n",
358 |        "       [ 23.7 ],\n",
359 |        "       [ 25.54],\n",
360 |        "       [ 20.84],\n",
361 |        "       [ 19.28],\n",
362 |        "       [ 18.04],\n",
363 |        "       [ 18.72],\n",
364 |        "       [ 15.5 ],\n",
365 |        "       [ 18.26],\n",
366 |        "       [ 20.86],\n",
367 |        "       [ 37.22],\n",
368 |        "       [ 32.8 ],\n",
369 |        "       [ 24.66],\n",
370 |        "       [ 24.84],\n",
371 |        "       [ 28.86],\n",
372 |        "       [ 37.92],\n",
373 |        "       [ 27.58],\n",
374 |        "       [ 27.58],\n",
375 |        "       [ 29.  ],\n",
376 |        "       [ 27.28],\n",
377 |        "       [ 22.52],\n",
378 |        "       [ 23.86],\n",
379 |        "       [ 23.58],\n",
380 |        "       [ 28.46],\n",
381 |        "       [ 23.86],\n",
382 |        "       [ 27.82],\n",
383 |        "       [ 21.96],\n",
384 |        "       [ 19.68],\n",
385 |        "       [ 31.26],\n",
386 |        "       [ 42.88],\n",
387 |        "       [ 42.88],\n",
388 |        "       [ 36.16],\n",
389 |        "       [ 33.28],\n",
390 |        "       [ 32.76],\n",
391 |        "       [ 29.5 ],\n",
392 |        "       [ 29.86],\n",
393 |        "       [ 34.82],\n",
394 |        "       [ 41.6 ],\n",
395 |        "       [ 29.96],\n",
396 |        "       [ 20.18],\n",
397 |        "       [ 22.64],\n",
398 |        "       [ 22.38],\n",
399 |        "       [ 20.66],\n",
400 |        "       [ 26.08],\n",
401 |        "       [ 19.58],\n",
402 |        "       [ 26.1 ],\n",
403 |        "       [ 19.66],\n",
404 |        "       [ 19.66],\n",
405 |        "       [ 10.76],\n",
406 |        "       [ 10.54],\n",
407 |        "       [ 10.2 ],\n",
408 |        "       [ 11.32],\n",
409 |        "       [ 15.84],\n",
410 |        "       [ 12.6 ],\n",
411 |        "       [ 10.88],\n",
412 |        "       [ 13.72],\n",
413 |        "       [ 17.36],\n",
414 |        "       [ 10.32],\n",
415 |        "       [ 11.76],\n",
416 |        "       [ 14.7 ],\n",
417 |        "       [ 14.  ],\n",
418 |        "       [  9.36],\n",
419 |        "       [ 11.52],\n",
420 |        "       [ 12.56],\n",
421 |        "       [ 16.48],\n",
422 |        "       [ 14.56],\n",
423 |        "       [ 22.26],\n",
424 |        "       [ 21.22],\n",
425 |        "       [ 22.66],\n",
426 |        "       [ 23.7 ],\n",
427 |        "       [ 22.26],\n",
428 |        "       [ 22.34],\n",
429 |        "       [ 23.18],\n",
430 |        "       [ 21.58],\n",
431 |        "       [ 14.48],\n",
432 |        "       [ 20.46]])"
433 |       ]
434 |      },
435 |      "execution_count": 67,
436 |      "metadata": {},
437 |      "output_type": "execute_result"
438 |     }
439 |    ],
440 |    "source": [
441 |     "predictions = knn.predict(testing[x_columns])\n",
442 |     "predictions"
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "code",
447 |    "execution_count": 68,
448 |    "metadata": {},
449 |    "outputs": [
450 |     {
451 |      "data": {
452 |       "text/plain": [
453 |        "testing       22.159551\n",
454 |        "prediction    22.931011\n",
455 |        "diff          -0.771461\n",
456 |        "pct           -0.099104\n",
457 |        "dtype: float64"
458 |       ]
459 |      },
460 |      "execution_count": 68,
461 |      "metadata": {},
462 |      "output_type": "execute_result"
463 |     }
464 |    ],
465 |    "source": [
466 |     "columns = [\"testing\",\"prediction\",\"diff\"]\n",
467 |     "index = range(len(testing))\n",
468 |     "results = pd.DataFrame(index=index, columns=columns)\n",
469 |     "\n",
470 |     "results['prediction'] = predictions\n",
471 |     "\n",
472 |     "results = results.reset_index(drop=True)\n",
473 |     "testing = testing.reset_index(drop=True)\n",
474 |     "results['testing'] = testing[\"MDEV\"]\n",
475 |     "\n",
476 |     "results['diff'] = results['testing'] - results['prediction']\n",
477 |     "results['pct'] = results['diff'] / results['testing']\n",
478 |     "results.mean()"
479 |    ]
480 |   }
481 |  ],
482 |  "metadata": {
483 |   "kernelspec": {
484 |    "display_name": "Python 3",
485 |    "language": "python",
486 |    "name": "python3"
487 |   },
488 |   "language_info": {
489 |    "codemirror_mode": {
490 |     "name": "ipython",
491 |     "version": 3
492 |    },
493 |    "file_extension": ".py",
494 |    "mimetype": "text/x-python",
495 |    "name": "python",
496 |    "nbconvert_exporter": "python",
497 |    "pygments_lexer": "ipython3",
498 |    "version": "3.6.1"
499 |   }
500 |  },
501 |  "nbformat": 4,
502 |  "nbformat_minor": 2
503 | }
504 | 


--------------------------------------------------------------------------------
/Chapter09/5 decision trees.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 15,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "library(rpart)\n",
 10 |     "library(caret)\n",
 11 |     "set.seed(3277)"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 16,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "data": {
 21 |       "text/html": [
 22 |        "33"
 23 |       ],
 24 |       "text/latex": [
 25 |        "33"
 26 |       ],
 27 |       "text/markdown": [
 28 |        "33"
 29 |       ],
 30 |       "text/plain": [
 31 |        "[1] 33"
 32 |       ]
 33 |      },
 34 |      "metadata": {},
 35 |      "output_type": "display_data"
 36 |     },
 37 |     {
 38 |      "data": {
 39 |       "text/html": [
 40 |        "9"
 41 |       ],
 42 |       "text/latex": [
 43 |        "9"
 44 |       ],
 45 |       "text/markdown": [
 46 |        "9"
 47 |       ],
 48 |       "text/plain": [
 49 |        "[1] 9"
 50 |       ]
 51 |      },
 52 |      "metadata": {},
 53 |      "output_type": "display_data"
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "carmpg <- read.csv(\"car-mpg.csv\")\n",
 58 |     "indices <- createDataPartition(carmpg$mpg, p=0.75, list=FALSE)\n",
 59 |     "training <- carmpg[indices,]\n",
 60 |     "testing <- carmpg[-indices,]\n",
 61 |     "nrow(training)\n",
 62 |     "nrow(testing)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 17,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "data": {
 72 |       "text/plain": [
 73 |        "n= 33 \n",
 74 |        "\n",
 75 |        "node), split, n, deviance, yval\n",
 76 |        "      * denotes terminal node\n",
 77 |        "\n",
 78 |        "1) root 33 26.727270 1.909091  \n",
 79 |        "  2) weight>=3121.5 10  0.000000 1.000000 *\n",
 80 |        "  3) weight< 3121.5 23 14.869570 2.304348  \n",
 81 |        "    6) modelyear>=78.5 9  4.888889 1.888889 *\n",
 82 |        "    7) modelyear< 78.5 14  7.428571 2.571429 *"
 83 |       ]
 84 |      },
 85 |      "metadata": {},
 86 |      "output_type": "display_data"
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "fit <- rpart(mpg ~ cylinders + displacement + horsepower + weight + acceleration + modelyear + maker,\n",
 91 |     "             method=\"anova\", data=training)\n",
 92 |     "fit"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 18,
 98 |    "metadata": {},
 99 |    "outputs": [
100 |     {
101 |      "data": {
102 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0gAAANICAMAAADKOT/pAAAABlBMVEUAAAD///+l2Z/dAAAA\nCXBIWXMAABJ0AAASdAHeZh94AAARLklEQVR4nO3djVYb2RWE0eb9Xzor1xI04FkTck+Jlmrv\ncbBQbA108dFI/MzxBmw7fvsFgFcgJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBgg\nJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBgg\nJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBgg\nJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBgg\nJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBgg\nJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBggJBgg\nJBggJBggpIzj84Uvh/k4Pq7+c/l+DU/KfGHvxRyfr/tTz/3y8WaK52a9Kcf7r49Ojs9P1oXj\nlNbx/lffTPHcrDfleP/n77/ezgf7OF+hoxdgvinvIR3H30P6dkY6PzHEk7PflNMZ6e2fT0r3\n//OjIR29BAOO+XzX6HThfr/p9EdPd5TWOerjTMVzsl7M4egWMXXOt9PQL70cPIBxH0ZIr8y4\nDyOkV2bchxHSKzPuwwjplRn3YYT0yoz7MEJ6ZcZ9GCG9st8c9+C1/eLb1sP9aki/+O/+BV1v\nWG9l+wrpYYT0yoT0MEJ6ZUJ6GCG9MiGF3b+bfN35Pqp+0EnHa3kjpKzvPy6o5wedVLySd0KK\nOn3DrJBempDChNRBSGHn+0i3hFo+V1nxSt4JKcwZqYOQwj7OQm9vHrV7XUIipWpfIZFSta+Q\nSKnaV0ikVO0rJFKq9hXSA3U9aNe1r5Aep+zTSCWv5Y2Qwo6//NdcOl7zktfyRkhhn36U/p+a\nfGj3goQUtkJyRnp5Qgo7nZHcR3phQgo7f2jnUbvXJSRSqvYVEilV+wqJlKp9hURK1b5CIqVq\nXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTt\nKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9\nhURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWv\nkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYV\nEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5C\nIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldI\npFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJ\nlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGR\nUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRS\nqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK\n1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip\n2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV\n+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVq\nXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTt\nKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9\nhURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWv\nkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYV\nEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5C\nIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldI\npFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJ\nlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGR\nUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRS\nqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK\n1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip\n2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV\n+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVq\nXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTt\nKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9\nhURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWv\nkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYV\nEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5C\nIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldI\npFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJ\nlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGR\nUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRS\nqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK\n1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip\n2ldIpFTtKyRSqvYVEilV+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyRSqvYVEilV\n+wqJlKp9hURK1b5CIqVqXyGRUrWvkEip2ldIpFTtKyS2Hd8u/OW523X/tX77eD75oj2MkNj2\nbyF9u/p4v/Jl3gaExA8dx/00cn963C8f90RuVx23i7eT0O3Pn0J6mROSkPip419+fb/u7bz1\nKb7zqenZCYkf+ns/f84+95Dud4TuZ5/TGen8v/vtvQAh8UP/dC56O4X07Q+e//b5Q7u3V3kz\nEBI/9BHH5/tIX+77nM9In//2x985PXz37ITErOMvlwoIiVHHXy++PiGRUrWvkEip2ldIpFTt\nKyRSqvYVEilV+wqJlKp9hURK1b5Cel4He0bHmLwxHsp2e4TEYrs9QmKx3Z6XCckbwp4LHr/j\n9LMYTs9e80u8XyWkax7dJ3K94/fl+4yOT5cu50VCuujRfSLXPH6n73y9f8/RRV/UFwnpokf3\niVzy+N1fqI/v87vsd8EKieWKx+/TtyPdQhr/nM0QIbFc8Pgdp9/P56ILvqhC4uZ6x+927nn/\nWQwfD+L96ov1D14mJPbYbo+QWGy3R0gsttsjJBbb7RESi+32vFJInx7a4Wcuf9i+PHR3NS8U\n0qcfc8sPXf2oXfwLG548pOP0XyZ4/4Hr1zzQV3fJo3be9+3Sn4999pC+fBmJkP5vlzxq3/a9\n7pf5v0BI7++xhLThkkft077nrxi6nhcI6etz1zzQV3fJo3be9/StSVf0YiFd91Gdq7vkYTvt\ne78nfNV9nzskxthuj5BYbLdHSCy22yMkFtvtERKL7fYIicV2e4TEYrs9QmKx3R4hsdhuj5BY\nbLdHSCy22yMkFtvtERKL7fYIicV2e4TEYrs9QmKx3R4hsdhuj5BYbLdHSCy22yMkFtvtERKL\n7fYIicV2e4TEYrs9QmKx3R4hsdhuj5BYbLdHSCy22yMkFtvtERKL7fYIicV2e4TEYrs9QmKx\n3R4hsdhuj5BYbLdHSCy22yMkFtvtERKL7fYIicV2e4TEYrs9QmKx3R4hsdhuj5BYbLdHSCy2\n2yMkFtvtERKL7fYIicV2e4TEYrs9QmKx3R4hsdhuj5BYbLdHSCy22yMkFtvtERKL7fYIicV2\ne4TEYrs9QmKx3R4hsdhuj5BYbLdHSCy22yMkFtvtERKL7fYIicV2e4TEYrs9QmKx3R4hsdhu\nj5BYbLdHSCy22yMkFtvtERKL7fYIicV2e4TEYrs9QmKx3R4hsdhuj5BYbLdHSCy22yMkFtvt\nERKL7fYIicV2e4TEYrs9QoKrERIMEBIMEBIMENKTuQ12HMeXp/wP3g/V8d9Lx3F7fuD4meC5\n3N4QVj+fnvI/+DhUx+mat4l3RBZ4KsebkDbd0vn2HmniZnke3z+0E9JPnPv5OJpTt8vT+H5G\n+nj3yr86Pl8UUi0f2u043zU6vtxTmrllnsTHWejNo3Y/dXuY7uP43a8fuOn9mwCEBAOEBAOE\nBAOEBAOE9Nw8Yrfl9skEj9q18zmkLfcvWRVSpT+fC/nz9ctvJvyZ08E7fW3IwO3u3wQP9uVT\n8ib8ic/ZzH2pohWez5/3pcf7+9bffnmeyvngvX+l4sAxtMLzcUba8O2MdPpt83Z5Mue3BSek\nHxISXJiQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQ\nYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQ\nYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQ\nYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQ\nYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQ\nYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQ\nYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYICQYMB/AFH5uhCXKiy1\nAAAAAElFTkSuQmCC",
103 |       "text/plain": [
104 |        "plot without title"
105 |       ]
106 |      },
107 |      "metadata": {},
108 |      "output_type": "display_data"
109 |     }
110 |    ],
111 |    "source": [
112 |     "plot(fit)\n",
113 |     "text(fit, use.n=TRUE, all=TRUE, cex=.5)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 19,
119 |    "metadata": {},
120 |    "outputs": [
121 |     {
122 |      "data": {
123 |       "text/html": [
124 |        "<dl class=dl-horizontal>\n",
125 |        "\t<dt>1</dt>\n",
126 |        "\t\t<dd>1</dd>\n",
127 |        "\t<dt>3</dt>\n",
128 |        "\t\t<dd>1</dd>\n",
129 |        "\t<dt>7</dt>\n",
130 |        "\t\t<dd>1</dd>\n",
131 |        "\t<dt>8</dt>\n",
132 |        "\t\t<dd>1</dd>\n",
133 |        "\t<dt>24</dt>\n",
134 |        "\t\t<dd>1.88888888888889</dd>\n",
135 |        "\t<dt>28</dt>\n",
136 |        "\t\t<dd>2.57142857142857</dd>\n",
137 |        "\t<dt>32</dt>\n",
138 |        "\t\t<dd>2.57142857142857</dd>\n",
139 |        "\t<dt>39</dt>\n",
140 |        "\t\t<dd>1.88888888888889</dd>\n",
141 |        "\t<dt>40</dt>\n",
142 |        "\t\t<dd>1.88888888888889</dd>\n",
143 |        "</dl>\n"
144 |       ],
145 |       "text/latex": [
146 |        "\\begin{description*}\n",
147 |        "\\item[1] 1\n",
148 |        "\\item[3] 1\n",
149 |        "\\item[7] 1\n",
150 |        "\\item[8] 1\n",
151 |        "\\item[24] 1.88888888888889\n",
152 |        "\\item[28] 2.57142857142857\n",
153 |        "\\item[32] 2.57142857142857\n",
154 |        "\\item[39] 1.88888888888889\n",
155 |        "\\item[40] 1.88888888888889\n",
156 |        "\\end{description*}\n"
157 |       ],
158 |       "text/markdown": [
159 |        "1\n",
160 |        ":   13\n",
161 |        ":   17\n",
162 |        ":   18\n",
163 |        ":   124\n",
164 |        ":   1.8888888888888928\n",
165 |        ":   2.5714285714285732\n",
166 |        ":   2.5714285714285739\n",
167 |        ":   1.8888888888888940\n",
168 |        ":   1.88888888888889\n",
169 |        "\n"
170 |       ],
171 |       "text/plain": [
172 |        "       1        3        7        8       24       28       32       39 \n",
173 |        "1.000000 1.000000 1.000000 1.000000 1.888889 2.571429 2.571429 1.888889 \n",
174 |        "      40 \n",
175 |        "1.888889 "
176 |       ]
177 |      },
178 |      "metadata": {},
179 |      "output_type": "display_data"
180 |     },
181 |     {
182 |      "data": {
183 |       "text/html": [
184 |        "<table>\n",
185 |        "<thead><tr><th></th><th scope=col>mpg</th><th scope=col>cylinders</th><th scope=col>displacement</th><th scope=col>horsepower</th><th scope=col>weight</th><th scope=col>acceleration</th><th scope=col>modelyear</th><th scope=col>maker</th></tr></thead>\n",
186 |        "<tbody>\n",
187 |        "\t<tr><th scope=row>1</th><td>Bad    </td><td>8      </td><td>350    </td><td>150    </td><td>4699   </td><td>14.5   </td><td>74     </td><td>America</td></tr>\n",
188 |        "\t<tr><th scope=row>3</th><td>Bad    </td><td>8      </td><td>400    </td><td>175    </td><td>4385   </td><td>12.0   </td><td>72     </td><td>America</td></tr>\n",
189 |        "\t<tr><th scope=row>7</th><td>Bad    </td><td>6      </td><td>250    </td><td>105    </td><td>3897   </td><td>18.5   </td><td>75     </td><td>America</td></tr>\n",
190 |        "\t<tr><th scope=row>8</th><td>Bad    </td><td>6      </td><td>163    </td><td>133    </td><td>3410   </td><td>15.8   </td><td>78     </td><td>Asia   </td></tr>\n",
191 |        "\t<tr><th scope=row>24</th><td>OK     </td><td>6      </td><td>146    </td><td>120    </td><td>2930   </td><td>13.8   </td><td>81     </td><td>Europe </td></tr>\n",
192 |        "\t<tr><th scope=row>28</th><td>OK     </td><td>4      </td><td> 97    </td><td> 60    </td><td>1834   </td><td>19.0   </td><td>71     </td><td>Asia   </td></tr>\n",
193 |        "\t<tr><th scope=row>32</th><td>OK     </td><td>4      </td><td> 98    </td><td> 83    </td><td>2219   </td><td>16.5   </td><td>74     </td><td>Asia   </td></tr>\n",
194 |        "\t<tr><th scope=row>39</th><td>Good   </td><td>4      </td><td>135    </td><td> 84    </td><td>2370   </td><td>13.0   </td><td>82     </td><td>America</td></tr>\n",
195 |        "\t<tr><th scope=row>40</th><td>Good   </td><td>4      </td><td>105    </td><td> 63    </td><td>2125   </td><td>14.7   </td><td>82     </td><td>America</td></tr>\n",
196 |        "</tbody>\n",
197 |        "</table>\n"
198 |       ],
199 |       "text/latex": [
200 |        "\\begin{tabular}{r|llllllll}\n",
201 |        "  & mpg & cylinders & displacement & horsepower & weight & acceleration & modelyear & maker\\\\\n",
202 |        "\\hline\n",
203 |        "\t1 & Bad     & 8       & 350     & 150     & 4699    & 14.5    & 74      & America\\\\\n",
204 |        "\t3 & Bad     & 8       & 400     & 175     & 4385    & 12.0    & 72      & America\\\\\n",
205 |        "\t7 & Bad     & 6       & 250     & 105     & 3897    & 18.5    & 75      & America\\\\\n",
206 |        "\t8 & Bad     & 6       & 163     & 133     & 3410    & 15.8    & 78      & Asia   \\\\\n",
207 |        "\t24 & OK      & 6       & 146     & 120     & 2930    & 13.8    & 81      & Europe \\\\\n",
208 |        "\t28 & OK      & 4       &  97     &  60     & 1834    & 19.0    & 71      & Asia   \\\\\n",
209 |        "\t32 & OK      & 4       &  98     &  83     & 2219    & 16.5    & 74      & Asia   \\\\\n",
210 |        "\t39 & Good    & 4       & 135     &  84     & 2370    & 13.0    & 82      & America\\\\\n",
211 |        "\t40 & Good    & 4       & 105     &  63     & 2125    & 14.7    & 82      & America\\\\\n",
212 |        "\\end{tabular}\n"
213 |       ],
214 |       "text/markdown": [
215 |        "\n",
216 |        "| <!--/--> | mpg | cylinders | displacement | horsepower | weight | acceleration | modelyear | maker | \n",
217 |        "|---|---|---|---|---|---|---|---|---|\n",
218 |        "| 1 | Bad     | 8       | 350     | 150     | 4699    | 14.5    | 74      | America | \n",
219 |        "| 3 | Bad     | 8       | 400     | 175     | 4385    | 12.0    | 72      | America | \n",
220 |        "| 7 | Bad     | 6       | 250     | 105     | 3897    | 18.5    | 75      | America | \n",
221 |        "| 8 | Bad     | 6       | 163     | 133     | 3410    | 15.8    | 78      | Asia    | \n",
222 |        "| 24 | OK      | 6       | 146     | 120     | 2930    | 13.8    | 81      | Europe  | \n",
223 |        "| 28 | OK      | 4       |  97     |  60     | 1834    | 19.0    | 71      | Asia    | \n",
224 |        "| 32 | OK      | 4       |  98     |  83     | 2219    | 16.5    | 74      | Asia    | \n",
225 |        "| 39 | Good    | 4       | 135     |  84     | 2370    | 13.0    | 82      | America | \n",
226 |        "| 40 | Good    | 4       | 105     |  63     | 2125    | 14.7    | 82      | America | \n",
227 |        "\n",
228 |        "\n"
229 |       ],
230 |       "text/plain": [
231 |        "   mpg  cylinders displacement horsepower weight acceleration modelyear maker  \n",
232 |        "1  Bad  8         350          150        4699   14.5         74        America\n",
233 |        "3  Bad  8         400          175        4385   12.0         72        America\n",
234 |        "7  Bad  6         250          105        3897   18.5         75        America\n",
235 |        "8  Bad  6         163          133        3410   15.8         78        Asia   \n",
236 |        "24 OK   6         146          120        2930   13.8         81        Europe \n",
237 |        "28 OK   4          97           60        1834   19.0         71        Asia   \n",
238 |        "32 OK   4          98           83        2219   16.5         74        Asia   \n",
239 |        "39 Good 4         135           84        2370   13.0         82        America\n",
240 |        "40 Good 4         105           63        2125   14.7         82        America"
241 |       ]
242 |      },
243 |      "metadata": {},
244 |      "output_type": "display_data"
245 |     }
246 |    ],
247 |    "source": [
248 |     "predicted <- predict(fit, newdata=testing)\n",
249 |     "predicted\n",
250 |     "testing"
251 |    ]
252 |   }
253 |  ],
254 |  "metadata": {
255 |   "kernelspec": {
256 |    "display_name": "R",
257 |    "language": "R",
258 |    "name": "ir"
259 |   },
260 |   "language_info": {
261 |    "codemirror_mode": "r",
262 |    "file_extension": ".r",
263 |    "mimetype": "text/x-r-source",
264 |    "name": "R",
265 |    "pygments_lexer": "r",
266 |    "version": "3.4.1"
267 |   }
268 |  },
269 |  "nbformat": 4,
270 |  "nbformat_minor": 2
271 | }
272 | 


--------------------------------------------------------------------------------
/Chapter09/6 decision trees py.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 8,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import pandas as pd\n",
 12 |     "import numpy as np\n",
 13 |     "from os import system\n",
 14 |     "import graphviz #pip install graphviz \n",
 15 |     "from sklearn.cross_validation import train_test_split\n",
 16 |     "from sklearn.tree import DecisionTreeClassifier\n",
 17 |     "from sklearn.metrics import accuracy_score\n",
 18 |     "from sklearn import tree"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 9,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "data": {
 28 |       "text/html": [
 29 |        "<div>\n",
 30 |        "<table border=\"1\" class=\"dataframe\">\n",
 31 |        "  <thead>\n",
 32 |        "    <tr style=\"text-align: right;\">\n",
 33 |        "      <th></th>\n",
 34 |        "      <th>mpg</th>\n",
 35 |        "      <th>cylinders</th>\n",
 36 |        "      <th>displacement</th>\n",
 37 |        "      <th>horsepower</th>\n",
 38 |        "      <th>weight</th>\n",
 39 |        "      <th>acceleration</th>\n",
 40 |        "      <th>modelyear</th>\n",
 41 |        "      <th>maker</th>\n",
 42 |        "    </tr>\n",
 43 |        "  </thead>\n",
 44 |        "  <tbody>\n",
 45 |        "    <tr>\n",
 46 |        "      <th>0</th>\n",
 47 |        "      <td>Bad</td>\n",
 48 |        "      <td>8</td>\n",
 49 |        "      <td>350</td>\n",
 50 |        "      <td>150</td>\n",
 51 |        "      <td>4699</td>\n",
 52 |        "      <td>14.5</td>\n",
 53 |        "      <td>74</td>\n",
 54 |        "      <td>America</td>\n",
 55 |        "    </tr>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>1</th>\n",
 58 |        "      <td>Bad</td>\n",
 59 |        "      <td>8</td>\n",
 60 |        "      <td>400</td>\n",
 61 |        "      <td>170</td>\n",
 62 |        "      <td>4746</td>\n",
 63 |        "      <td>12.0</td>\n",
 64 |        "      <td>71</td>\n",
 65 |        "      <td>America</td>\n",
 66 |        "    </tr>\n",
 67 |        "    <tr>\n",
 68 |        "      <th>2</th>\n",
 69 |        "      <td>Bad</td>\n",
 70 |        "      <td>8</td>\n",
 71 |        "      <td>400</td>\n",
 72 |        "      <td>175</td>\n",
 73 |        "      <td>4385</td>\n",
 74 |        "      <td>12.0</td>\n",
 75 |        "      <td>72</td>\n",
 76 |        "      <td>America</td>\n",
 77 |        "    </tr>\n",
 78 |        "    <tr>\n",
 79 |        "      <th>3</th>\n",
 80 |        "      <td>Bad</td>\n",
 81 |        "      <td>6</td>\n",
 82 |        "      <td>250</td>\n",
 83 |        "      <td>72</td>\n",
 84 |        "      <td>3158</td>\n",
 85 |        "      <td>19.5</td>\n",
 86 |        "      <td>75</td>\n",
 87 |        "      <td>America</td>\n",
 88 |        "    </tr>\n",
 89 |        "    <tr>\n",
 90 |        "      <th>4</th>\n",
 91 |        "      <td>Bad</td>\n",
 92 |        "      <td>8</td>\n",
 93 |        "      <td>304</td>\n",
 94 |        "      <td>150</td>\n",
 95 |        "      <td>3892</td>\n",
 96 |        "      <td>12.5</td>\n",
 97 |        "      <td>72</td>\n",
 98 |        "      <td>America</td>\n",
 99 |        "    </tr>\n",
100 |        "  </tbody>\n",
101 |        "</table>\n",
102 |        "</div>"
103 |       ],
104 |       "text/plain": [
105 |        "   mpg  cylinders  displacement  horsepower  weight  acceleration  modelyear  \\\n",
106 |        "0  Bad          8           350         150    4699          14.5         74   \n",
107 |        "1  Bad          8           400         170    4746          12.0         71   \n",
108 |        "2  Bad          8           400         175    4385          12.0         72   \n",
109 |        "3  Bad          6           250          72    3158          19.5         75   \n",
110 |        "4  Bad          8           304         150    3892          12.5         72   \n",
111 |        "\n",
112 |        "     maker  \n",
113 |        "0  America  \n",
114 |        "1  America  \n",
115 |        "2  America  \n",
116 |        "3  America  \n",
117 |        "4  America  "
118 |       ]
119 |      },
120 |      "execution_count": 9,
121 |      "metadata": {},
122 |      "output_type": "execute_result"
123 |     }
124 |    ],
125 |    "source": [
126 |     "carmpg = pd.read_csv(\"car-mpg.csv\")\n",
127 |     "carmpg.head(5)"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 10,
133 |    "metadata": {
134 |     "collapsed": true
135 |    },
136 |    "outputs": [],
137 |    "source": [
138 |     "columns = carmpg.columns\n",
139 |     "mask = np.ones(columns.shape, dtype=bool)\n",
140 |     "i = 0 #The specified column that you don't want to show\n",
141 |     "mask[i] = 0\n",
142 |     "mask[7] = 0 #maker is a string\n",
143 |     "X = carmpg[columns[mask]]\n",
144 |     "Y = carmpg[\"mpg\"]"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 11,
150 |    "metadata": {
151 |     "collapsed": true
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size = 0.3, random_state = 100)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 12,
161 |    "metadata": {
162 |     "collapsed": true
163 |    },
164 |    "outputs": [],
165 |    "source": [
166 |     "clf_gini = tree.DecisionTreeClassifier(criterion = \"gini\", random_state = 100, max_depth=3, min_samples_leaf=5)"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 13,
172 |    "metadata": {},
173 |    "outputs": [
174 |     {
175 |      "data": {
176 |       "text/plain": [
177 |        "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=3,\n",
178 |        "            max_features=None, max_leaf_nodes=None,\n",
179 |        "            min_impurity_split=1e-07, min_samples_leaf=5,\n",
180 |        "            min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
181 |        "            presort=False, random_state=100, splitter='best')"
182 |       ]
183 |      },
184 |      "execution_count": 13,
185 |      "metadata": {},
186 |      "output_type": "execute_result"
187 |     }
188 |    ],
189 |    "source": [
190 |     "clf_gini.fit(X_train, y_train)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 14,
196 |    "metadata": {
197 |     "collapsed": true,
198 |     "scrolled": true
199 |    },
200 |    "outputs": [],
201 |    "source": [
202 |     "#dot_data = tree.export_graphviz(clf_gini, out_file=None, \n",
203 |     "#                         filled=True, rounded=True,  \n",
204 |     "#                         special_characters=True)  \n",
205 |     "#graph = graphviz.Source(dot_data)  \n",
206 |     "#graph"
207 |    ]
208 |   }
209 |  ],
210 |  "metadata": {
211 |   "kernelspec": {
212 |    "display_name": "Python 3",
213 |    "language": "python",
214 |    "name": "python3"
215 |   },
216 |   "language_info": {
217 |    "codemirror_mode": {
218 |     "name": "ipython",
219 |     "version": 3
220 |    },
221 |    "file_extension": ".py",
222 |    "mimetype": "text/x-python",
223 |    "name": "python",
224 |    "nbconvert_exporter": "python",
225 |    "pygments_lexer": "ipython3",
226 |    "version": "3.6.0"
227 |   }
228 |  },
229 |  "nbformat": 4,
230 |  "nbformat_minor": 2
231 | }
232 | 


--------------------------------------------------------------------------------
/Chapter09/8 random forests.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "install.packages(\"randomForest\", repos=\"http://cran.r-project.org\")\n",
 10 |     "library(randomForest)"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {
 17 |     "collapsed": true
 18 |    },
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "filename = \"http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data\"\n",
 22 |     "housing <- read.table(filename)\n",
 23 |     "colnames(housing) <- c(\"CRIM\", \"ZN\", \"INDUS\", \"CHAS\", \"NOX\", \n",
 24 |     "                       \"RM\", \"AGE\", \"DIS\", \"RAD\", \"TAX\", \"PRATIO\",\n",
 25 |     "                      \"B\", \"LSTAT\", \"MDEV\")"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 3,
 31 |    "metadata": {},
 32 |    "outputs": [
 33 |     {
 34 |      "name": "stderr",
 35 |      "output_type": "stream",
 36 |      "text": [
 37 |       "Warning message:\n",
 38 |       "\"package 'caret' was built under R version 3.4.2\"Loading required package: lattice\n",
 39 |       "Warning message:\n",
 40 |       "\"package 'lattice' was built under R version 3.4.2\"Loading required package: ggplot2\n",
 41 |       "Warning message:\n",
 42 |       "\"package 'ggplot2' was built under R version 3.4.2\"\n",
 43 |       "Attaching package: 'ggplot2'\n",
 44 |       "\n",
 45 |       "The following object is masked from 'package:randomForest':\n",
 46 |       "\n",
 47 |       "    margin\n",
 48 |       "\n"
 49 |      ]
 50 |     },
 51 |     {
 52 |      "data": {
 53 |       "text/html": [
 54 |        "381"
 55 |       ],
 56 |       "text/latex": [
 57 |        "381"
 58 |       ],
 59 |       "text/markdown": [
 60 |        "381"
 61 |       ],
 62 |       "text/plain": [
 63 |        "[1] 381"
 64 |       ]
 65 |      },
 66 |      "metadata": {},
 67 |      "output_type": "display_data"
 68 |     },
 69 |     {
 70 |      "data": {
 71 |       "text/html": [
 72 |        "125"
 73 |       ],
 74 |       "text/latex": [
 75 |        "125"
 76 |       ],
 77 |       "text/markdown": [
 78 |        "125"
 79 |       ],
 80 |       "text/plain": [
 81 |        "[1] 125"
 82 |       ]
 83 |      },
 84 |      "metadata": {},
 85 |      "output_type": "display_data"
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "housing <- housing[order(housing$MDEV),]\n",
 90 |     "#install.packages(\"caret\")\n",
 91 |     "library(caret)\n",
 92 |     "set.seed(5557)\n",
 93 |     "indices <- createDataPartition(housing$MDEV, p=0.75, list=FALSE)\n",
 94 |     "training <- housing[indices,]\n",
 95 |     "testing <- housing[-indices,]\n",
 96 |     "nrow(training)\n",
 97 |     "nrow(testing)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 4,
103 |    "metadata": {},
104 |    "outputs": [
105 |     {
106 |      "data": {
107 |       "text/plain": [
108 |        "\n",
109 |        "Call:\n",
110 |        " randomForest(formula = MDEV ~ CRIM + ZN + INDUS + CHAS + NOX +      RM + AGE + DIS + RAD + TAX + PRATIO + B + LSTAT, data = training) \n",
111 |        "               Type of random forest: regression\n",
112 |        "                     Number of trees: 500\n",
113 |        "No. of variables tried at each split: 4\n",
114 |        "\n",
115 |        "          Mean of squared residuals: 11.17358\n",
116 |        "                    % Var explained: 87.27"
117 |       ]
118 |      },
119 |      "metadata": {},
120 |      "output_type": "display_data"
121 |     }
122 |    ],
123 |    "source": [
124 |     "forestFit <- randomForest(MDEV ~ CRIM + ZN + INDUS + CHAS + NOX \n",
125 |     "                  + RM + AGE + DIS + RAD + TAX + PRATIO \n",
126 |     "                  + B + LSTAT, data=training)\n",
127 |     "forestFit"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 5,
133 |    "metadata": {
134 |     "collapsed": true
135 |    },
136 |    "outputs": [],
137 |    "source": [
138 |     "forestPredict <- predict(forestFit, newdata=testing)"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 6,
144 |    "metadata": {},
145 |    "outputs": [
146 |     {
147 |      "data": {
148 |       "text/html": [
149 |        "1441.30069227016"
150 |       ],
151 |       "text/latex": [
152 |        "1441.30069227016"
153 |       ],
154 |       "text/markdown": [
155 |        "1441.30069227016"
156 |       ],
157 |       "text/plain": [
158 |        "[1] 1441.301"
159 |       ]
160 |      },
161 |      "metadata": {},
162 |      "output_type": "display_data"
163 |     }
164 |    ],
165 |    "source": [
166 |     "diff <- forestPredict - testing$MDEV\n",
167 |     "sum( (diff - mean(diff) )^2 ) #sum of squares"
168 |    ]
169 |   }
170 |  ],
171 |  "metadata": {
172 |   "kernelspec": {
173 |    "display_name": "R",
174 |    "language": "R",
175 |    "name": "ir"
176 |   },
177 |   "language_info": {
178 |    "codemirror_mode": "r",
179 |    "file_extension": ".r",
180 |    "mimetype": "text/x-r-source",
181 |    "name": "R",
182 |    "pygments_lexer": "r",
183 |    "version": "3.4.1"
184 |   }
185 |  },
186 |  "nbformat": 4,
187 |  "nbformat_minor": 2
188 | }
189 | 


--------------------------------------------------------------------------------
/Chapter09/files/car-mpg.csv:
--------------------------------------------------------------------------------
 1 | mpg,cylinders,displacement,horsepower,weight,acceleration,modelyear,maker
 2 | Bad,8,350,150,4699,14.5,74,America
 3 | Bad,8,400,170,4746,12,71,America
 4 | Bad,8,400,175,4385,12,72,America
 5 | Bad,6,250,72,3158,19.5,75,America
 6 | Bad,8,304,150,3892,12.5,72,America
 7 | Bad,8,350,145,4440,14,75,America
 8 | Bad,6,250,105,3897,18.5,75,America
 9 | Bad,6,163,133,3410,15.8,78,Asia
10 | Bad,8,260,110,4060,19,77,America
11 | Bad,8,305,130,3840,15.4,79,America
12 | Bad,6,250,110,3520,16.4,77,America
13 | Bad,6,258,95,3193,17.8,76,America
14 | Bad,4,121,112,2933,14.5,72,Asia
15 | Bad,6,225,105,3613,16.5,74,America
16 | Bad,4,121,112,2868,15.5,73,Asia
17 | Bad,6,225,95,3264,16,75,America
18 | Bad,6,200,85,2990,18.2,79,America
19 | OK,4,121,98,2945,14.5,75,Asia
20 | OK,6,232,90,3085,17.6,76,America
21 | OK,4,120,97,2506,14.5,72,Europe
22 | OK,4,151,85,2855,17.6,78,America
23 | OK,4,116,75,2158,15.5,73,Asia
24 | OK,4,119,97,2545,17,75,Europe
25 | OK,6,146,120,2930,13.8,81,Europe
26 | OK,4,116,81,2220,16.9,76,Asia
27 | OK,4,156,92,2620,14.4,81,America
28 | OK,4,140,88,2870,18.1,80,America
29 | OK,4,97,60,1834,19,71,Asia
30 | OK,4,134,95,2560,14.2,78,Europe
31 | OK,4,97,75,2171,16,75,Europe
32 | OK,4,97,78,1940,14.5,77,Asia
33 | OK,4,98,83,2219,16.5,74,Asia
34 | Good,4,79,70,2074,19.5,71,Asia
35 | Good,4,91,68,1970,17.6,82,Europe
36 | Good,4,89,71,1925,14,79,Asia
37 | Good,4,83,61,2003,19,74,Europe
38 | Good,4,112,88,2395,18,82,America
39 | Good,4,81,60,1760,16.1,81,Europe
40 | Good,4,135,84,2370,13,82,America
41 | Good,4,105,63,2125,14.7,82,America
42 | Bad,4,135,84,2370,13,82,America
43 | Bad,4,105,63,2125,14.7,82,America
44 | 


--------------------------------------------------------------------------------
/Chapter09/files/housing-knn-predicted.csv:
--------------------------------------------------------------------------------
  1 | 10.5
  2 |  9.7
  3 |  7
  4 |  6.3
  5 |  13.1
  6 |  16.3
  7 |  16.1
  8 |  13.3
  9 |  13.3
 10 |  13.4
 11 |  17.2
 12 |  20.6
 13 |  13.8
 14 |  13.3
 15 |  13.1
 16 |  15.6
 17 |  16.2
 18 |  13.4
 19 |  20.1
 20 |  13.3
 21 |  21.9
 22 |  17.8
 23 |  13.6
 24 |  13.4
 25 |  13.9
 26 |  17.2
 27 |  20.1
 28 |  17.1
 29 |  17.1
 30 |  14.9
 31 |  17.2
 32 |  23
 33 |  17.5
 34 |  7.2
 35 |  23.1
 36 |  15.6
 37 |  16.2
 38 |  19.2
 39 |  18.7
 40 |  17.8
 41 |  19.2
 42 |  18.9
 43 |  21.4
 44 |  19.6
 45 |  20.1
 46 |  23.8
 47 |  15.6
 48 |  20.5
 49 |  19.5
 50 |  19.3
 51 |  20.6
 52 |  24.4
 53 |  21.4
 54 |  21.4
 55 |  23.1
 56 |  19.9
 57 |  17.6
 58 |  18.2
 59 |  33
 60 |  17.8
 61 |  13
 62 |  23.5
 63 |  18.2
 64 |  16.6
 65 |  22.8
 66 |  21.2
 67 |  19.6
 68 |  21.2
 69 |  33.3
 70 |  20.9
 71 |  21.4
 72 |  20.1
 73 |  23.9
 74 |  19.3
 75 |  15.6
 76 |  19.5
 77 |  21.4
 78 |  29.4
 79 |  23.6
 80 |  22
 81 |  19.1
 82 |  22.4
 83 |  23.9
 84 |  23.7
 85 |  24.6
 86 |  24
 87 |  22
 88 |  22.4
 89 |  35.2
 90 |  33.3
 91 |  24.1
 92 |  23.4
 93 |  23.8
 94 |  21
 95 |  27.5
 96 |  22.3
 97 |  23.8
 98 |  21.9
 99 |  24.8
100 |  28.2
101 |  23
102 |  22.5
103 |  31.1
104 |  37.2
105 |  30.5
106 |  50
107 |  30.5
108 |  21.7
109 |  34.9
110 |  24.5
111 |  29.1
112 |  22.9
113 |  29.1
114 |  35.4
115 |  48.5
116 |  28.7
117 |  37.2
118 |  28.5
119 |  44.8
120 |  44.8
121 |  29.6
122 |  45.4
123 |  24.2
124 |  44.8
125 |  50
126 |  


--------------------------------------------------------------------------------
/Chapter10/1 profiling.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import timeit\n",
10 |     "import myfunction\n",
11 |     "t = timeit.Timer(myfunction('Hello World'))   \n",
12 |     "t.timeit()              \n",
13 |     "3.32132323232\n",
14 |     "t.repeat(2, 2000000)  "
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "code",
19 |    "execution_count": null,
20 |    "metadata": {},
21 |    "outputs": [],
22 |    "source": [
23 |     "import hotshot\n",
24 |     "import myfunction\n",
25 |     "prof = hotshot.Profile('my_hotshot _stats')\n",
26 |     "prof.run('myfunction').close()"
27 |    ]
28 |   },
29 |   {
30 |    "cell_type": "code",
31 |    "execution_count": null,
32 |    "metadata": {},
33 |    "outputs": [],
34 |    "source": [
35 |     "import hotshot.stats\n",
36 |     "hotshot.stats.load('my_hotshot_stats').strip_dirs().sort_stats('time').print_stats()  "
37 |    ]
38 |   },
39 |   {
40 |    "cell_type": "code",
41 |    "execution_count": null,
42 |    "metadata": {
43 |     "collapsed": true
44 |    },
45 |    "outputs": [],
46 |    "source": []
47 |   }
48 |  ],
49 |  "metadata": {
50 |   "kernelspec": {
51 |    "display_name": "Python 3",
52 |    "language": "python",
53 |    "name": "python3"
54 |   },
55 |   "language_info": {
56 |    "codemirror_mode": {
57 |     "name": "ipython",
58 |     "version": 3
59 |    },
60 |    "file_extension": ".py",
61 |    "mimetype": "text/x-python",
62 |    "name": "python",
63 |    "nbconvert_exporter": "python",
64 |    "pygments_lexer": "ipython3",
65 |    "version": "3.6.0"
66 |   }
67 |  },
68 |  "nbformat": 4,
69 |  "nbformat_minor": 2
70 | }
71 | 


--------------------------------------------------------------------------------
/Chapter10/2 R microbenchmark.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "install.packages(\"microbenchmark\", repos=\"http://cran.us.r-project.org\")\n",
10 |     "library(microbenchmark)\n",
11 |     "x <- runif(125) \n",
12 |     "microbenchmark( mean(x) )"
13 |    ]
14 |   },
15 |   {
16 |    "cell_type": "code",
17 |    "execution_count": null,
18 |    "metadata": {
19 |     "collapsed": true
20 |    },
21 |    "outputs": [],
22 |    "source": []
23 |   }
24 |  ],
25 |  "metadata": {
26 |   "kernelspec": {
27 |    "display_name": "R",
28 |    "language": "R",
29 |    "name": "ir"
30 |   },
31 |   "language_info": {
32 |    "codemirror_mode": "r",
33 |    "file_extension": ".r",
34 |    "mimetype": "text/x-r-source",
35 |    "name": "R",
36 |    "pygments_lexer": "r",
37 |    "version": "3.4.1"
38 |   }
39 |  },
40 |  "nbformat": 4,
41 |  "nbformat_minor": 2
42 | }
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Packt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | # Jupyter for Data Science
 5 | This is the code repository for [Jupyter for Data Science](https://www.packtpub.com/big-data-and-business-intelligence/jupyter-data-science?utm_source=github&utm_medium=repository&utm_campaign=9781785880070), published by [Packt](https://www.packtpub.com/?utm_source=github). It contains all the supporting project files necessary to work through the book from start to finish.
 6 | ## About the Book
 7 | If you are familiar with Jupyter notebook and want to learn how to use its capabilities to perform various data science tasks, this is the book for you! From data exploration to visualization, this book will take you through every step of the way in implementing an effective data science pipeline using Jupyter. You will also see how you can utilize Jupyter's features to share your documents and codes with your colleagues. The book also explains how Python 3, R, and Julia can be integrated with Jupyter for various data science tasks.
 8 | By the end of this book, you will comfortably leverage the power of Jupyter to perform various tasks in data science successfully.
 9 | ## Instructions and Navigation
10 | All of the code is organized into folders. Each folder starts with a number followed by the application name. For example, Chapter02.
11 | 
12 | All chapters 1 to 10 have codes and data files.
13 | Some of the larger CSV files are not provided in this bundle, they can be downloaded with the links provided in the chapter.
14 | hotshot and timeit examples in chapter 10 are dummy examples and do not produce any output.
15 | 
16 | The code will look like the following:
17 | ```
18 | plt.xlabel("Actual Price")
19 | plt.ylabel("Predicted Price")
20 | plt.title("Actual Price vs Predicted Price")
21 | ```
22 | 
23 | This book is focused on using Jupyter as the platform for data science. It assumes that you have a good understanding of the data science concepts and are looking to use Jupyter as your presentation platform.
24 | 
25 | ## Related Products
26 | * [Statistics for Data Science](https://www.packtpub.com/big-data-and-business-intelligence/statistics-data-science?utm_source=github&utm_medium=repository&utm_campaign=9781788290678)
27 | 
28 | * [Mastering Spark for Data Science](https://www.packtpub.com/big-data-and-business-intelligence/mastering-spark-data-science?utm_source=github&utm_medium=repository&utm_campaign=9781785882142)
29 | 
30 | * [Learning Jupyter](https://www.packtpub.com/big-data-and-business-intelligence/learning-jupyter?utm_source=github&utm_medium=repository&utm_campaign=9781785884870)
31 | ### Download a free PDF
32 | 
33 |  <i>If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.<br>Simply click on the link to claim your free PDF.</i>
34 | <p align="center"> <a href="https://packt.link/free-ebook/9781785880070">https://packt.link/free-ebook/9781785880070 </a> </p>


--------------------------------------------------------------------------------