├── Class Quizzes
    └── quiz1a1.pdf
├── ClassNotebooks
    ├── .ipynb_checkpoints
    │   ├── Assignment 2-checkpoint.ipynb
    │   ├── Assignment 2-scratch-checkpoint.ipynb
    │   ├── ReadingAndWritingCSVfiles-checkpoint.ipynb
    │   ├── Untitled-checkpoint.ipynb
    │   ├── Week 1-checkpoint.ipynb
    │   ├── Week 2-checkpoint.ipynb
    │   └── Week 4-checkpoint.ipynb
    ├── Assignment 2-scratch.ipynb
    ├── Assignment 2.ipynb
    ├── Assignment 3.ipynb
    ├── Assignment 4.ipynb
    ├── City_Zhvi_AllHomes.csv
    ├── Energy Indicators.xls
    ├── ReadingAndWritingCSVfiles.ipynb
    ├── Untitled.ipynb
    ├── Week 1.ipynb
    ├── Week 2.ipynb
    ├── Week 3.ipynb
    ├── Week 4.ipynb
    ├── cars.csv
    ├── census.csv
    ├── gdplev.xls
    ├── grades.csv
    ├── log.csv
    ├── log.txt
    ├── mpg.csv
    ├── olympics.csv
    ├── requirements.txt
    ├── scimagojr-3.xlsx
    ├── university_towns.txt
    └── world_bank.csv
├── MyNotebooks
    ├── .ipynb_checkpoints
    │   ├── Basic-Data-Processing-with-Pandas-checkpoint.ipynb
    │   ├── Python-Dates-and-Times-checkpoint.ipynb
    │   └── ReadingAndWritingCSVfiles-checkpoint.ipynb
    ├── Basic-Data-Processing-with-Pandas.ipynb
    ├── City_Zhvi_AllHomes.csv
    ├── Energy Indicators.xls
    ├── Python-Dates-and-Times.ipynb
    ├── ReadingAndWritingCSVfiles.ipynb
    ├── cars.csv
    ├── census.csv
    ├── gdplev.xls
    ├── grades.csv
    ├── log.csv
    ├── log.txt
    ├── mpg.csv
    ├── olympics.csv
    ├── scimagojr-3.xlsx
    ├── university_towns.txt
    └── world_bank.csv
├── README.md
└── course1_downloads
    ├── .ipynb_checkpoints
        ├── Assignment 2-checkpoint.ipynb
        └── Week 2-checkpoint.ipynb
    ├── Assignment 2.ipynb
    ├── Assignment 3.ipynb
    ├── Assignment 4.ipynb
    ├── Assignment+2.py
    ├── Assignment+3.py
    ├── Assignment+4.py
    ├── City_Zhvi_AllHomes.csv
    ├── Energy Indicators.xls
    ├── Week 1.ipynb
    ├── Week 2.ipynb
    ├── Week 3.ipynb
    ├── Week 4.ipynb
    ├── cars.csv
    ├── census.csv
    ├── gdplev.xls
    ├── grades.csv
    ├── log.csv
    ├── log.txt
    ├── mpg.csv
    ├── olympics.csv
    ├── scimagojr-3.xlsx
    ├── university_towns.txt
    └── world_bank.csv


/Class Quizzes/quiz1a1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/Class Quizzes/quiz1a1.pdf


--------------------------------------------------------------------------------
/ClassNotebooks/.ipynb_checkpoints/ReadingAndWritingCSVfiles-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "data": {
 12 |       "text/plain": [
 13 |        "[{'': '1',\n",
 14 |        "  'class': 'compact',\n",
 15 |        "  'cty': '18',\n",
 16 |        "  'cyl': '4',\n",
 17 |        "  'displ': '1.8',\n",
 18 |        "  'drv': 'f',\n",
 19 |        "  'fl': 'p',\n",
 20 |        "  'hwy': '29',\n",
 21 |        "  'manufacturer': 'audi',\n",
 22 |        "  'model': 'a4',\n",
 23 |        "  'trans': 'auto(l5)',\n",
 24 |        "  'year': '1999'},\n",
 25 |        " {'': '2',\n",
 26 |        "  'class': 'compact',\n",
 27 |        "  'cty': '21',\n",
 28 |        "  'cyl': '4',\n",
 29 |        "  'displ': '1.8',\n",
 30 |        "  'drv': 'f',\n",
 31 |        "  'fl': 'p',\n",
 32 |        "  'hwy': '29',\n",
 33 |        "  'manufacturer': 'audi',\n",
 34 |        "  'model': 'a4',\n",
 35 |        "  'trans': 'manual(m5)',\n",
 36 |        "  'year': '1999'},\n",
 37 |        " {'': '3',\n",
 38 |        "  'class': 'compact',\n",
 39 |        "  'cty': '20',\n",
 40 |        "  'cyl': '4',\n",
 41 |        "  'displ': '2',\n",
 42 |        "  'drv': 'f',\n",
 43 |        "  'fl': 'p',\n",
 44 |        "  'hwy': '31',\n",
 45 |        "  'manufacturer': 'audi',\n",
 46 |        "  'model': 'a4',\n",
 47 |        "  'trans': 'manual(m6)',\n",
 48 |        "  'year': '2008'}]"
 49 |       ]
 50 |      },
 51 |      "execution_count": 1,
 52 |      "metadata": {},
 53 |      "output_type": "execute_result"
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "import csv\n",
 58 |     "\n",
 59 |     "# set floatpoint precision for printing to 2\n",
 60 |     "%precision 2\n",
 61 |     "\n",
 62 |     "with open('mpg.csv') as csvfile:\n",
 63 |     "    mpg = list(csv.DictReader(csvfile))\n",
 64 |     "\n",
 65 |     "# view first 3 elements of list created from csv file\n",
 66 |     "mpg[:3]"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "Here we can see each element of this list is a car in a dict form, the keys for the dict corresponds to a column in the csv file.\n",
 74 |     "\n",
 75 |     "Lets see how many dicts - or cars - we have in our dataset"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 2,
 81 |    "metadata": {
 82 |     "collapsed": false
 83 |    },
 84 |    "outputs": [
 85 |     {
 86 |      "data": {
 87 |       "text/plain": [
 88 |        "234"
 89 |       ]
 90 |      },
 91 |      "execution_count": 2,
 92 |      "metadata": {},
 93 |      "output_type": "execute_result"
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "len(mpg)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "We have a dictionary for each of the 234 cars in the dataset.\n",
105 |     "\n",
106 |     "To extract just the column names, or the keys in each of these dictionaries we can use the `keys()` function on the first element in the mpg dataset since we are assuming each element has the same keys or _row names_"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 9,
112 |    "metadata": {
113 |     "collapsed": false
114 |    },
115 |    "outputs": [
116 |     {
117 |      "data": {
118 |       "text/plain": [
119 |        "dict_keys(['', 'class', 'model', 'fl', 'cty', 'displ', 'drv', 'hwy', 'cyl', 'manufacturer', 'year', 'trans'])"
120 |       ]
121 |      },
122 |      "execution_count": 9,
123 |      "metadata": {},
124 |      "output_type": "execute_result"
125 |     }
126 |    ],
127 |    "source": [
128 |     "mpg[0].keys()"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "Here we can see we have class, model, fuel type, cty, mpg, engine volume, front or rear wheel drive, highway mpg, manufacturer, model, year, and transmission type.\n",
136 |     "\n",
137 |     "**NOTE:** I happen to know this from working with the same __cars__ dataset in R. This can easily be found on google when looking at the values is not helping.\n",
138 |     "\n",
139 |     "Now let's try and find the average city MPG across all cars in our CSV file.\n",
140 |     "To do this we want to sum the value for the 'cty' key for each car in the set and divide by the number of cars used (which we already know the set to contain 234 cars).\n",
141 |     "\n",
142 |     "So that should look something like:  \n",
143 |     "sum (each value of cty in dataset mpg) / size (mpg)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 12,
149 |    "metadata": {
150 |     "collapsed": false
151 |    },
152 |    "outputs": [
153 |     {
154 |      "data": {
155 |       "text/plain": [
156 |        "16.86"
157 |       ]
158 |      },
159 |      "execution_count": 12,
160 |      "metadata": {},
161 |      "output_type": "execute_result"
162 |     }
163 |    ],
164 |    "source": [
165 |     "sum(float(d['cty']) for d in mpg) / len(mpg)"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "metadata": {},
171 |    "source": [
172 |     "Now let's do the same thing for average hwy mpg across all cars in the dataset"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 13,
178 |    "metadata": {
179 |     "collapsed": false
180 |    },
181 |    "outputs": [
182 |     {
183 |      "data": {
184 |       "text/plain": [
185 |        "23.44"
186 |       ]
187 |      },
188 |      "execution_count": 13,
189 |      "metadata": {},
190 |      "output_type": "execute_result"
191 |     }
192 |    ],
193 |    "source": [
194 |     "sum(float(d['hwy']) for d in mpg) / len(mpg)"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "markdown",
199 |    "metadata": {},
200 |    "source": [
201 |     "Note that the average mpg for highway is significantly better than for city. This makes sense, as anyone who drives knows they get much better gas milage on the highway that they do in the city."
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {
208 |     "collapsed": true
209 |    },
210 |    "outputs": [],
211 |    "source": []
212 |   }
213 |  ],
214 |  "metadata": {
215 |   "anaconda-cloud": {},
216 |   "kernelspec": {
217 |    "display_name": "Python [Py35]",
218 |    "language": "python",
219 |    "name": "Python [Py35]"
220 |   },
221 |   "language_info": {
222 |    "codemirror_mode": {
223 |     "name": "ipython",
224 |     "version": 3
225 |    },
226 |    "file_extension": ".py",
227 |    "mimetype": "text/x-python",
228 |    "name": "python",
229 |    "nbconvert_exporter": "python",
230 |    "pygments_lexer": "ipython3",
231 |    "version": "3.5.2"
232 |   }
233 |  },
234 |  "nbformat": 4,
235 |  "nbformat_minor": 0
236 | }
237 | 


--------------------------------------------------------------------------------
/ClassNotebooks/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 1
6 | }
7 | 


--------------------------------------------------------------------------------
/ClassNotebooks/.ipynb_checkpoints/Week 4-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "---\n",
  8 |     "\n",
  9 |     "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
 10 |     "\n",
 11 |     "---"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Distributions in Pandas"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [
 28 |     {
 29 |      "ename": "ImportError",
 30 |      "evalue": "No module named 'pandas'",
 31 |      "output_type": "error",
 32 |      "traceback": [
 33 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 34 |       "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
 35 |       "\u001b[0;32m<ipython-input-1-c046f915005f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 36 |       "\u001b[0;31mImportError\u001b[0m: No module named 'pandas'"
 37 |      ]
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "import pandas as pd\n",
 42 |     "import numpy as np"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "np.random.binomial(1, 0.5)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {
 60 |     "collapsed": false
 61 |    },
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "np.random.binomial(1000, 0.5)/1000"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {
 71 |     "collapsed": false
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "chance_of_tornado = 0.01/100\n",
 76 |     "np.random.binomial(100000, chance_of_tornado)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {
 83 |     "collapsed": false
 84 |    },
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "chance_of_tornado = 0.01\n",
 88 |     "\n",
 89 |     "tornado_events = np.random.binomial(1, chance_of_tornado, 1000000)\n",
 90 |     "    \n",
 91 |     "two_days_in_a_row = 0\n",
 92 |     "for j in range(1,len(tornado_events)-1):\n",
 93 |     "    if tornado_events[j]==1 and tornado_events[j-1]==1:\n",
 94 |     "        two_days_in_a_row+=1\n",
 95 |     "\n",
 96 |     "print('{} tornadoes back to back in {} years'.format(two_days_in_a_row, 1000000/365))"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {
103 |     "collapsed": false
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "np.random.uniform(0, 1)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {
114 |     "collapsed": false
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "np.random.normal(0.75)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "Formula for standard deviation\n",
126 |     "$$\\sqrt{\\frac{1}{N} \\sum_{i=1}^N (x_i - \\overline{x})^2}$$"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "metadata": {
133 |     "collapsed": false
134 |    },
135 |    "outputs": [],
136 |    "source": [
137 |     "distribution = np.random.normal(0.75,size=1000)\n",
138 |     "\n",
139 |     "np.sqrt(np.sum((np.mean(distribution)-distribution)**2)/len(distribution))"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {
146 |     "collapsed": false,
147 |     "scrolled": true
148 |    },
149 |    "outputs": [],
150 |    "source": [
151 |     "np.std(distribution)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {
158 |     "collapsed": false
159 |    },
160 |    "outputs": [],
161 |    "source": [
162 |     "import scipy.stats as stats\n",
163 |     "stats.kurtosis(distribution)"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {
170 |     "collapsed": false
171 |    },
172 |    "outputs": [],
173 |    "source": [
174 |     "stats.skew(distribution)"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {
181 |     "collapsed": false
182 |    },
183 |    "outputs": [],
184 |    "source": [
185 |     "chi_squared_df2 = np.random.chisquare(2, size=10000)\n",
186 |     "stats.skew(chi_squared_df2)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {
193 |     "collapsed": false
194 |    },
195 |    "outputs": [],
196 |    "source": [
197 |     "chi_squared_df5 = np.random.chisquare(5, size=10000)\n",
198 |     "stats.skew(chi_squared_df5)"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {
205 |     "collapsed": false
206 |    },
207 |    "outputs": [],
208 |    "source": [
209 |     "%matplotlib inline\n",
210 |     "import matplotlib\n",
211 |     "import matplotlib.pyplot as plt\n",
212 |     "\n",
213 |     "output = plt.hist([chi_squared_df2,chi_squared_df5], bins=50, histtype='step', \n",
214 |     "                  label=['2 degrees of freedom','5 degrees of freedom'])\n",
215 |     "plt.legend(loc='upper right')\n"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "metadata": {},
221 |    "source": [
222 |     "# Hypothesis Testing"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": null,
228 |    "metadata": {
229 |     "collapsed": false
230 |    },
231 |    "outputs": [],
232 |    "source": [
233 |     "df = pd.read_csv('grades.csv')"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": null,
239 |    "metadata": {
240 |     "collapsed": false
241 |    },
242 |    "outputs": [],
243 |    "source": [
244 |     "df.head()"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": null,
250 |    "metadata": {
251 |     "collapsed": false
252 |    },
253 |    "outputs": [],
254 |    "source": [
255 |     "len(df)"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "metadata": {
262 |     "collapsed": false
263 |    },
264 |    "outputs": [],
265 |    "source": [
266 |     "early = df[df['assignment1_submission'] <= '2015-12-31']\n",
267 |     "late = df[df['assignment1_submission'] > '2015-12-31']"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": null,
273 |    "metadata": {
274 |     "collapsed": false
275 |    },
276 |    "outputs": [],
277 |    "source": [
278 |     "early.mean()"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": null,
284 |    "metadata": {
285 |     "collapsed": false
286 |    },
287 |    "outputs": [],
288 |    "source": [
289 |     "late.mean()"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {
296 |     "collapsed": false
297 |    },
298 |    "outputs": [],
299 |    "source": [
300 |     "from scipy import stats\n",
301 |     "stats.ttest_ind?"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": null,
307 |    "metadata": {
308 |     "collapsed": false
309 |    },
310 |    "outputs": [],
311 |    "source": [
312 |     "stats.ttest_ind(early['assignment1_grade'], late['assignment1_grade'])"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": null,
318 |    "metadata": {
319 |     "collapsed": false
320 |    },
321 |    "outputs": [],
322 |    "source": [
323 |     "stats.ttest_ind(early['assignment2_grade'], late['assignment2_grade'])"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "metadata": {
330 |     "collapsed": false
331 |    },
332 |    "outputs": [],
333 |    "source": [
334 |     "stats.ttest_ind(early['assignment3_grade'], late['assignment3_grade'])"
335 |    ]
336 |   }
337 |  ],
338 |  "metadata": {
339 |   "anaconda-cloud": {},
340 |   "kernelspec": {
341 |    "display_name": "Python [Py35]",
342 |    "language": "python",
343 |    "name": "Python [Py35]"
344 |   },
345 |   "language_info": {
346 |    "codemirror_mode": {
347 |     "name": "ipython",
348 |     "version": 3
349 |    },
350 |    "file_extension": ".py",
351 |    "mimetype": "text/x-python",
352 |    "name": "python",
353 |    "nbconvert_exporter": "python",
354 |    "pygments_lexer": "ipython3",
355 |    "version": "3.5.2"
356 |   }
357 |  },
358 |  "nbformat": 4,
359 |  "nbformat_minor": 0
360 | }
361 | 


--------------------------------------------------------------------------------
/ClassNotebooks/Assignment 4.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "---\n",
  8 |     "\n",
  9 |     "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
 10 |     "\n",
 11 |     "---"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import pandas as pd\n",
 23 |     "import numpy as np\n",
 24 |     "from scipy.stats import ttest_ind"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "# Assignment 4 - Hypothesis Testing\n",
 32 |     "This assignment requires more individual learning than previous assignments - you are encouraged to check out the [pandas documentation](http://pandas.pydata.org/pandas-docs/stable/) to find functions or methods you might not have used yet, or ask questions on [Stack Overflow](http://stackoverflow.com/) and tag them as pandas and python related. And of course, the discussion forums are open for interaction with your peers and the course staff.\n",
 33 |     "\n",
 34 |     "Definitions:\n",
 35 |     "* A _quarter_ is a specific three month period, Q1 is January through March, Q2 is April through June, Q3 is July through September, Q4 is October through December.\n",
 36 |     "* A _recession_ is defined as starting with two consecutive quarters of GDP decline, and ending with two consecutive quarters of GDP growth.\n",
 37 |     "* A _recession bottom_ is the quarter within a recession which had the lowest GDP.\n",
 38 |     "* A _university town_ is a city which has a high percentage of university students compared to the total population of the city.\n",
 39 |     "\n",
 40 |     "**Hypothesis**: University towns have their mean housing prices less effected by recessions. Run a t-test to compare the ratio of the mean price of houses in university towns the quarter before the recession starts compared to the recession bottom. (`price_ratio=quarter_before_recession/recession_bottom`)\n",
 41 |     "\n",
 42 |     "The following data files are available for this assignment:\n",
 43 |     "* From the [Zillow research data site](http://www.zillow.com/research/data/) there is housing data for the United States. In particular the datafile for [all homes at a city level](http://files.zillowstatic.com/research/public/City/City_Zhvi_AllHomes.csv), ```City_Zhvi_AllHomes.csv```, has median home sale prices at a fine grained level.\n",
 44 |     "* From the Wikipedia page on college towns is a list of [university towns in the United States](https://en.wikipedia.org/wiki/List_of_college_towns#College_towns_in_the_United_States) which has been copy and pasted into the file ```university_towns.txt```.\n",
 45 |     "* From Bureau of Economic Analysis, US Department of Commerce, the [GDP over time](http://www.bea.gov/national/index.htm#gdp) of the United States in current dollars (use the chained value in 2009 dollars), in quarterly intervals, in the file ```gdplev.xls```. For this assignment, only look at GDP data from the first quarter of 2000 onward.\n",
 46 |     "\n",
 47 |     "Each function in this assignment below is worth 10%, with the exception of ```run_ttest()```, which is worth 50%."
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {
 54 |     "collapsed": false
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "# Use this dictionary to map state names to two letter acronyms\n",
 59 |     "states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 'ND': 'North Dakota', 'VA': 'Virginia'}"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {
 66 |     "collapsed": false
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "def get_list_of_university_towns():\n",
 71 |     "    '''Returns a DataFrame of towns and the states they are in from the \n",
 72 |     "    university_towns.txt list. The format of the DataFrame should be:\n",
 73 |     "    DataFrame( [ [\"Michigan\",\"Ann Arbor\"], [\"Michigan\", \"Yipsilanti\"] ], \n",
 74 |     "    columns=[\"State\",\"RegionName\"]  )'''\n",
 75 |     "    \n",
 76 |     "    return \"ANSWER\""
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {
 83 |     "collapsed": false
 84 |    },
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "def get_recession_start():\n",
 88 |     "    '''Returns the year and quarter of the recession start time as a \n",
 89 |     "    string value in a format such as 2005q3'''\n",
 90 |     "    \n",
 91 |     "    return \"ANSWER\""
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "def get_recession_end():\n",
103 |     "    '''Returns the year and quarter of the recession end time as a \n",
104 |     "    string value in a format such as 2005q3'''\n",
105 |     "       \n",
106 |     "    return \"ANSWER\""
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": false
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "def get_recession_bottom():\n",
118 |     "    '''Returns the year and quarter of the recession bottom time as a \n",
119 |     "    string value in a format such as 2005q3'''\n",
120 |     "    \n",
121 |     "    return \"ANSWER\""
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [],
131 |    "source": [
132 |     "def convert_housing_data_to_quarters():\n",
133 |     "    '''Converts the housing data to quarters and returns it as mean \n",
134 |     "    values in a dataframe. This dataframe should be a dataframe with\n",
135 |     "    columns for 2000q1 through 2016q3, and should have a multi-index\n",
136 |     "    in the shape of [\"State\",\"RegionName\"].\n",
137 |     "    \n",
138 |     "    Note: Quarters are defined in the assignment description, they are\n",
139 |     "    not arbitrary three month periods.\n",
140 |     "    \n",
141 |     "    The resulting dataframe should have 67 columns, and 10,730 rows.\n",
142 |     "    '''\n",
143 |     "    \n",
144 |     "    return \"ANSWER\""
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": false
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "def run_ttest():\n",
156 |     "    '''First creates new data showing the decline or growth of housing prices\n",
157 |     "    between the recession start and the recession bottom. Then runs a ttest\n",
158 |     "    comparing the university town values to the non-university towns values, \n",
159 |     "    return whether the alternative hypothesis (that the two groups are the same)\n",
160 |     "    is true or not as well as the p-value of the confidence. \n",
161 |     "    \n",
162 |     "    Return the tuple (different, p, better) where different=True if the t-test is\n",
163 |     "    True at a p<0.01 (we reject the null hypothesis), or different=False if \n",
164 |     "    otherwise (we cannot reject the null hypothesis). The variable p should\n",
165 |     "    be equal to the exact p value returned from scipy.stats.ttest_ind(). The\n",
166 |     "    value for better should be either \"university town\" or \"non-university town\"\n",
167 |     "    depending on which has a lower mean price ratio (which is equivilent to a\n",
168 |     "    reduced market loss).'''\n",
169 |     "    \n",
170 |     "    return \"ANSWER\""
171 |    ]
172 |   }
173 |  ],
174 |  "metadata": {
175 |   "coursera": {
176 |    "course_slug": "python-data-analysis",
177 |    "graded_item_id": "Il9Fx",
178 |    "launcher_item_id": "TeDW0",
179 |    "part_id": "WGlun"
180 |   },
181 |   "kernelspec": {
182 |    "display_name": "Python 3",
183 |    "language": "python",
184 |    "name": "python3"
185 |   },
186 |   "language_info": {
187 |    "codemirror_mode": {
188 |     "name": "ipython",
189 |     "version": 3
190 |    },
191 |    "file_extension": ".py",
192 |    "mimetype": "text/x-python",
193 |    "name": "python",
194 |    "nbconvert_exporter": "python",
195 |    "pygments_lexer": "ipython3",
196 |    "version": "3.5.2"
197 |   }
198 |  },
199 |  "nbformat": 4,
200 |  "nbformat_minor": 0
201 | }
202 | 


--------------------------------------------------------------------------------
/ClassNotebooks/Energy Indicators.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/ClassNotebooks/Energy Indicators.xls


--------------------------------------------------------------------------------
/ClassNotebooks/ReadingAndWritingCSVfiles.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "data": {
 12 |       "text/plain": [
 13 |        "[{'': '1',\n",
 14 |        "  'class': 'compact',\n",
 15 |        "  'cty': '18',\n",
 16 |        "  'cyl': '4',\n",
 17 |        "  'displ': '1.8',\n",
 18 |        "  'drv': 'f',\n",
 19 |        "  'fl': 'p',\n",
 20 |        "  'hwy': '29',\n",
 21 |        "  'manufacturer': 'audi',\n",
 22 |        "  'model': 'a4',\n",
 23 |        "  'trans': 'auto(l5)',\n",
 24 |        "  'year': '1999'},\n",
 25 |        " {'': '2',\n",
 26 |        "  'class': 'compact',\n",
 27 |        "  'cty': '21',\n",
 28 |        "  'cyl': '4',\n",
 29 |        "  'displ': '1.8',\n",
 30 |        "  'drv': 'f',\n",
 31 |        "  'fl': 'p',\n",
 32 |        "  'hwy': '29',\n",
 33 |        "  'manufacturer': 'audi',\n",
 34 |        "  'model': 'a4',\n",
 35 |        "  'trans': 'manual(m5)',\n",
 36 |        "  'year': '1999'},\n",
 37 |        " {'': '3',\n",
 38 |        "  'class': 'compact',\n",
 39 |        "  'cty': '20',\n",
 40 |        "  'cyl': '4',\n",
 41 |        "  'displ': '2',\n",
 42 |        "  'drv': 'f',\n",
 43 |        "  'fl': 'p',\n",
 44 |        "  'hwy': '31',\n",
 45 |        "  'manufacturer': 'audi',\n",
 46 |        "  'model': 'a4',\n",
 47 |        "  'trans': 'manual(m6)',\n",
 48 |        "  'year': '2008'}]"
 49 |       ]
 50 |      },
 51 |      "execution_count": 1,
 52 |      "metadata": {},
 53 |      "output_type": "execute_result"
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "import csv\n",
 58 |     "\n",
 59 |     "# set floatpoint precision for printing to 2\n",
 60 |     "%precision 2\n",
 61 |     "\n",
 62 |     "with open('mpg.csv') as csvfile:\n",
 63 |     "    mpg = list(csv.DictReader(csvfile))\n",
 64 |     "\n",
 65 |     "# view first 3 elements of list created from csv file\n",
 66 |     "mpg[:3]"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "Here we can see each element of this list is a car in a dict form, the keys for the dict corresponds to a column in the csv file.\n",
 74 |     "\n",
 75 |     "Lets see how many dicts - or cars - we have in our dataset"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 2,
 81 |    "metadata": {
 82 |     "collapsed": false
 83 |    },
 84 |    "outputs": [
 85 |     {
 86 |      "data": {
 87 |       "text/plain": [
 88 |        "234"
 89 |       ]
 90 |      },
 91 |      "execution_count": 2,
 92 |      "metadata": {},
 93 |      "output_type": "execute_result"
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "len(mpg)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "We have a dictionary for each of the 234 cars in the dataset.\n",
105 |     "\n",
106 |     "To extract just the column names, or the keys in each of these dictionaries we can use the `keys()` function on the first element in the mpg dataset since we are assuming each element has the same keys or _row names_"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 9,
112 |    "metadata": {
113 |     "collapsed": false
114 |    },
115 |    "outputs": [
116 |     {
117 |      "data": {
118 |       "text/plain": [
119 |        "dict_keys(['', 'class', 'model', 'fl', 'cty', 'displ', 'drv', 'hwy', 'cyl', 'manufacturer', 'year', 'trans'])"
120 |       ]
121 |      },
122 |      "execution_count": 9,
123 |      "metadata": {},
124 |      "output_type": "execute_result"
125 |     }
126 |    ],
127 |    "source": [
128 |     "mpg[0].keys()"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "Here we can see we have class, model, fuel type, cty, mpg, engine volume, front or rear wheel drive, highway mpg, manufacturer, model, year, and transmission type.\n",
136 |     "\n",
137 |     "**NOTE:** I happen to know this from working with the same __cars__ dataset in R. This can easily be found on google when looking at the values is not helping.\n",
138 |     "\n",
139 |     "Now let's try and find the average city MPG across all cars in our CSV file.\n",
140 |     "To do this we want to sum the value for the 'cty' key for each car in the set and divide by the number of cars used (which we already know the set to contain 234 cars).\n",
141 |     "\n",
142 |     "So that should look something like:  \n",
143 |     "sum (each value of cty in dataset mpg) / size (mpg)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 12,
149 |    "metadata": {
150 |     "collapsed": false
151 |    },
152 |    "outputs": [
153 |     {
154 |      "data": {
155 |       "text/plain": [
156 |        "16.86"
157 |       ]
158 |      },
159 |      "execution_count": 12,
160 |      "metadata": {},
161 |      "output_type": "execute_result"
162 |     }
163 |    ],
164 |    "source": [
165 |     "sum(float(d['cty']) for d in mpg) / len(mpg)"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "metadata": {},
171 |    "source": [
172 |     "Now let's do the same thing for average hwy mpg across all cars in the dataset"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 13,
178 |    "metadata": {
179 |     "collapsed": false
180 |    },
181 |    "outputs": [
182 |     {
183 |      "data": {
184 |       "text/plain": [
185 |        "23.44"
186 |       ]
187 |      },
188 |      "execution_count": 13,
189 |      "metadata": {},
190 |      "output_type": "execute_result"
191 |     }
192 |    ],
193 |    "source": [
194 |     "sum(float(d['hwy']) for d in mpg) / len(mpg)"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "markdown",
199 |    "metadata": {},
200 |    "source": [
201 |     "Note that the average mpg for highway is significantly better than for city. This makes sense, as anyone who drives knows they get much better gas milage on the highway that they do in the city."
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {
208 |     "collapsed": true
209 |    },
210 |    "outputs": [],
211 |    "source": []
212 |   }
213 |  ],
214 |  "metadata": {
215 |   "anaconda-cloud": {},
216 |   "kernelspec": {
217 |    "display_name": "Python [Py35]",
218 |    "language": "python",
219 |    "name": "Python [Py35]"
220 |   },
221 |   "language_info": {
222 |    "codemirror_mode": {
223 |     "name": "ipython",
224 |     "version": 3
225 |    },
226 |    "file_extension": ".py",
227 |    "mimetype": "text/x-python",
228 |    "name": "python",
229 |    "nbconvert_exporter": "python",
230 |    "pygments_lexer": "ipython3",
231 |    "version": "3.5.2"
232 |   }
233 |  },
234 |  "nbformat": 4,
235 |  "nbformat_minor": 0
236 | }
237 | 


--------------------------------------------------------------------------------
/ClassNotebooks/Untitled.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 1
6 | }
7 | 


--------------------------------------------------------------------------------
/ClassNotebooks/Week 4.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "---\n",
  8 |     "\n",
  9 |     "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
 10 |     "\n",
 11 |     "---"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Distributions in Pandas"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [
 28 |     {
 29 |      "ename": "ImportError",
 30 |      "evalue": "No module named 'pandas'",
 31 |      "output_type": "error",
 32 |      "traceback": [
 33 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 34 |       "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
 35 |       "\u001b[0;32m<ipython-input-1-c046f915005f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 36 |       "\u001b[0;31mImportError\u001b[0m: No module named 'pandas'"
 37 |      ]
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "import pandas as pd\n",
 42 |     "import numpy as np"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "np.random.binomial(1, 0.5)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {
 60 |     "collapsed": false
 61 |    },
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "np.random.binomial(1000, 0.5)/1000"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {
 71 |     "collapsed": false
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "chance_of_tornado = 0.01/100\n",
 76 |     "np.random.binomial(100000, chance_of_tornado)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {
 83 |     "collapsed": false
 84 |    },
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "chance_of_tornado = 0.01\n",
 88 |     "\n",
 89 |     "tornado_events = np.random.binomial(1, chance_of_tornado, 1000000)\n",
 90 |     "    \n",
 91 |     "two_days_in_a_row = 0\n",
 92 |     "for j in range(1,len(tornado_events)-1):\n",
 93 |     "    if tornado_events[j]==1 and tornado_events[j-1]==1:\n",
 94 |     "        two_days_in_a_row+=1\n",
 95 |     "\n",
 96 |     "print('{} tornadoes back to back in {} years'.format(two_days_in_a_row, 1000000/365))"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {
103 |     "collapsed": false
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "np.random.uniform(0, 1)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {
114 |     "collapsed": false
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "np.random.normal(0.75)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "Formula for standard deviation\n",
126 |     "$$\\sqrt{\\frac{1}{N} \\sum_{i=1}^N (x_i - \\overline{x})^2}$$"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "metadata": {
133 |     "collapsed": false
134 |    },
135 |    "outputs": [],
136 |    "source": [
137 |     "distribution = np.random.normal(0.75,size=1000)\n",
138 |     "\n",
139 |     "np.sqrt(np.sum((np.mean(distribution)-distribution)**2)/len(distribution))"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {
146 |     "collapsed": false,
147 |     "scrolled": true
148 |    },
149 |    "outputs": [],
150 |    "source": [
151 |     "np.std(distribution)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {
158 |     "collapsed": false
159 |    },
160 |    "outputs": [],
161 |    "source": [
162 |     "import scipy.stats as stats\n",
163 |     "stats.kurtosis(distribution)"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {
170 |     "collapsed": false
171 |    },
172 |    "outputs": [],
173 |    "source": [
174 |     "stats.skew(distribution)"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {
181 |     "collapsed": false
182 |    },
183 |    "outputs": [],
184 |    "source": [
185 |     "chi_squared_df2 = np.random.chisquare(2, size=10000)\n",
186 |     "stats.skew(chi_squared_df2)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {
193 |     "collapsed": false
194 |    },
195 |    "outputs": [],
196 |    "source": [
197 |     "chi_squared_df5 = np.random.chisquare(5, size=10000)\n",
198 |     "stats.skew(chi_squared_df5)"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {
205 |     "collapsed": false
206 |    },
207 |    "outputs": [],
208 |    "source": [
209 |     "%matplotlib inline\n",
210 |     "import matplotlib\n",
211 |     "import matplotlib.pyplot as plt\n",
212 |     "\n",
213 |     "output = plt.hist([chi_squared_df2,chi_squared_df5], bins=50, histtype='step', \n",
214 |     "                  label=['2 degrees of freedom','5 degrees of freedom'])\n",
215 |     "plt.legend(loc='upper right')\n"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "metadata": {},
221 |    "source": [
222 |     "# Hypothesis Testing"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": null,
228 |    "metadata": {
229 |     "collapsed": false
230 |    },
231 |    "outputs": [],
232 |    "source": [
233 |     "df = pd.read_csv('grades.csv')"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": null,
239 |    "metadata": {
240 |     "collapsed": false
241 |    },
242 |    "outputs": [],
243 |    "source": [
244 |     "df.head()"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": null,
250 |    "metadata": {
251 |     "collapsed": false
252 |    },
253 |    "outputs": [],
254 |    "source": [
255 |     "len(df)"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "metadata": {
262 |     "collapsed": false
263 |    },
264 |    "outputs": [],
265 |    "source": [
266 |     "early = df[df['assignment1_submission'] <= '2015-12-31']\n",
267 |     "late = df[df['assignment1_submission'] > '2015-12-31']"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": null,
273 |    "metadata": {
274 |     "collapsed": false
275 |    },
276 |    "outputs": [],
277 |    "source": [
278 |     "early.mean()"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": null,
284 |    "metadata": {
285 |     "collapsed": false
286 |    },
287 |    "outputs": [],
288 |    "source": [
289 |     "late.mean()"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {
296 |     "collapsed": false
297 |    },
298 |    "outputs": [],
299 |    "source": [
300 |     "from scipy import stats\n",
301 |     "stats.ttest_ind?"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": null,
307 |    "metadata": {
308 |     "collapsed": false
309 |    },
310 |    "outputs": [],
311 |    "source": [
312 |     "stats.ttest_ind(early['assignment1_grade'], late['assignment1_grade'])"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": null,
318 |    "metadata": {
319 |     "collapsed": false
320 |    },
321 |    "outputs": [],
322 |    "source": [
323 |     "stats.ttest_ind(early['assignment2_grade'], late['assignment2_grade'])"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "metadata": {
330 |     "collapsed": false
331 |    },
332 |    "outputs": [],
333 |    "source": [
334 |     "stats.ttest_ind(early['assignment3_grade'], late['assignment3_grade'])"
335 |    ]
336 |   }
337 |  ],
338 |  "metadata": {
339 |   "anaconda-cloud": {},
340 |   "kernelspec": {
341 |    "display_name": "Python [Py35]",
342 |    "language": "python",
343 |    "name": "Python [Py35]"
344 |   },
345 |   "language_info": {
346 |    "codemirror_mode": {
347 |     "name": "ipython",
348 |     "version": 3
349 |    },
350 |    "file_extension": ".py",
351 |    "mimetype": "text/x-python",
352 |    "name": "python",
353 |    "nbconvert_exporter": "python",
354 |    "pygments_lexer": "ipython3",
355 |    "version": "3.5.2"
356 |   }
357 |  },
358 |  "nbformat": 4,
359 |  "nbformat_minor": 0
360 | }
361 | 


--------------------------------------------------------------------------------
/ClassNotebooks/cars.csv:
--------------------------------------------------------------------------------
 1 | YEAR,Make,Model,Size,(kW),Unnamed: 5,TYPE,CITY (kWh/100 km),HWY (kWh/100 km),COMB (kWh/100 km),CITY (Le/100 km),HWY (Le/100 km),COMB (Le/100 km),(g/km),RATING,(km),TIME (h)
 2 | 2012,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7
 3 | 2012,NISSAN,LEAF,MID-SIZE,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,0,n/a,117,7
 4 | 2013,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4
 5 | 2013,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7
 6 | 2013,NISSAN,LEAF,MID-SIZE,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,0,n/a,117,7
 7 | 2013,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
 8 | 2013,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
 9 | 2013,TESLA,MODEL S (40 kWh battery),FULL-SIZE,270,A1,B,22.4,21.9,22.2,2.5,2.5,2.5,0,n/a,224,6
10 | 2013,TESLA,MODEL S (60 kWh battery),FULL-SIZE,270,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10
11 | 2013,TESLA,MODEL S (85 kWh battery),FULL-SIZE,270,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
12 | 2013,TESLA,MODEL S PERFORMANCE,FULL-SIZE,310,A1,B,23.9,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
13 | 2014,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,n/a,131,7
14 | 2014,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4
15 | 2014,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7
16 | 2014,NISSAN,LEAF,MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,n/a,135,5
17 | 2014,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
18 | 2014,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
19 | 2014,TESLA,MODEL S (60 kWh battery),FULL-SIZE,225,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10
20 | 2014,TESLA,MODEL S (85 kWh battery),FULL-SIZE,270,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
21 | 2014,TESLA,MODEL S PERFORMANCE,FULL-SIZE,310,A1,B,23.9,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
22 | 2015,BMW,i3,SUBCOMPACT,125,A1,B,15.2,18.8,16.8,1.7,2.1,1.9,0,n/a,130,4
23 | 2015,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,n/a,131,7
24 | 2015,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4
25 | 2015,KIA,SOUL EV,STATION WAGON - SMALL,81,A1,B,17.5,22.7,19.9,2.0,2.6,2.2,0,n/a,149,4
26 | 2015,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7
27 | 2015,NISSAN,LEAF,MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,n/a,135,5
28 | 2015,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
29 | 2015,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
30 | 2015,TESLA,MODEL S (60 kWh battery),FULL-SIZE,283,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10
31 | 2015,TESLA,MODEL S (70 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,377,12
32 | 2015,TESLA,MODEL S (85/90 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
33 | 2015,TESLA,MODEL S 70D,FULL-SIZE,280,A1,B,20.8,20.6,20.7,2.3,2.3,2.3,0,n/a,386,12
34 | 2015,TESLA,MODEL S 85D/90D,FULL-SIZE,280,A1,B,22.0,19.8,21.0,2.5,2.2,2.4,0,n/a,435,12
35 | 2015,TESLA,MODEL S P85D/P90D,FULL-SIZE,515,A1,B,23.4,21.5,22.5,2.6,2.4,2.5,0,n/a,407,12
36 | 2016,BMW,i3,SUBCOMPACT,125,A1,B,15.2,18.8,16.8,1.7,2.1,1.9,0,10,130,4
37 | 2016,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,10,131,7
38 | 2016,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,10,122,4
39 | 2016,KIA,SOUL EV,STATION WAGON - SMALL,81,A1,B,17.5,22.7,19.9,2.0,2.6,2.2,0,10,149,4
40 | 2016,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,10,100,7
41 | 2016,NISSAN,LEAF (24 kWh battery),MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,10,135,5
42 | 2016,NISSAN,LEAF (30 kWh battery),MID-SIZE,80,A1,B,17.0,20.7,18.6,1.9,2.3,2.1,0,10,172,6
43 | 2016,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,10,109,8
44 | 2016,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,10,109,8
45 | 2016,TESLA,MODEL S (60 kWh battery),FULL-SIZE,283,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,10,335,10
46 | 2016,TESLA,MODEL S (70 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,10,377,12
47 | 2016,TESLA,MODEL S (85/90 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,10,426,12
48 | 2016,TESLA,MODEL S 70D,FULL-SIZE,386,A1,B,20.8,20.6,20.7,2.3,2.3,2.3,0,10,386,12
49 | 2016,TESLA,MODEL S 85D/90D,FULL-SIZE,386,A1,B,22.0,19.8,21.0,2.5,2.2,2.4,0,10,435,12
50 | 2016,TESLA,MODEL S 90D (Refresh),FULL-SIZE,386,A1,B,20.8,19.7,20.3,2.3,2.2,2.3,0,10,473,12
51 | 2016,TESLA,MODEL S P85D/P90D,FULL-SIZE,568,A1,B,23.4,21.5,22.5,2.6,2.4,2.5,0,10,407,12
52 | 2016,TESLA,MODEL S P90D (Refresh),FULL-SIZE,568,A1,B,22.9,21.0,22.1,2.6,2.4,2.5,0,10,435,12
53 | 2016,TESLA,MODEL X 90D,SUV - STANDARD,386,A1,B,23.2,22.2,22.7,2.6,2.5,2.6,0,10,414,12
54 | 2016,TESLA,MODEL X P90D,SUV - STANDARD,568,A1,B,23.6,23.3,23.5,2.7,2.6,2.6,0,10,402,12
55 | 


--------------------------------------------------------------------------------
/ClassNotebooks/gdplev.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/ClassNotebooks/gdplev.xls


--------------------------------------------------------------------------------
/ClassNotebooks/log.csv:
--------------------------------------------------------------------------------
 1 | time,user,video,playback position,paused,volume
 2 | 1469974424,cheryl,intro.html,5,FALSE,10
 3 | 1469974454,cheryl,intro.html,6,,
 4 | 1469974544,cheryl,intro.html,9,,
 5 | 1469974574,cheryl,intro.html,10,,
 6 | 1469977514,bob,intro.html,1,,
 7 | 1469977544,bob,intro.html,1,,
 8 | 1469977574,bob,intro.html,1,,
 9 | 1469977604,bob,intro.html,1,,
10 | 1469974604,cheryl,intro.html,11,,
11 | 1469974694,cheryl,intro.html,14,,
12 | 1469974724,cheryl,intro.html,15,,
13 | 1469974454,sue,advanced.html,24,,
14 | 1469974524,sue,advanced.html,25,,
15 | 1469974424,sue,advanced.html,23,FALSE,10
16 | 1469974554,sue,advanced.html,26,,
17 | 1469974624,sue,advanced.html,27,,
18 | 1469974654,sue,advanced.html,28,,5
19 | 1469974724,sue,advanced.html,29,,
20 | 1469974484,cheryl,intro.html,7,,
21 | 1469974514,cheryl,intro.html,8,,
22 | 1469974754,sue,advanced.html,30,,
23 | 1469974824,sue,advanced.html,31,,
24 | 1469974854,sue,advanced.html,32,,
25 | 1469974924,sue,advanced.html,33,,
26 | 1469977424,bob,intro.html,1,TRUE,10
27 | 1469977454,bob,intro.html,1,,
28 | 1469977484,bob,intro.html,1,,
29 | 1469977634,bob,intro.html,1,,
30 | 1469977664,bob,intro.html,1,,
31 | 1469974634,cheryl,intro.html,12,,
32 | 1469974664,cheryl,intro.html,13,,
33 | 1469977694,bob,intro.html,1,,
34 | 1469977724,bob,intro.html,1,,
35 | 


--------------------------------------------------------------------------------
/ClassNotebooks/log.txt:
--------------------------------------------------------------------------------
 1 | time,user,video,playback position,paused,volume
 2 | 1469974424,cheryl,intro.html,5,FALSE,10
 3 | 1469974454,cheryl,intro.html,6,,
 4 | 1469974544,cheryl,intro.html,9,,
 5 | 1469974574,cheryl,intro.html,10,,
 6 | 1469977514,bob,intro.html,1,,
 7 | 1469977544,bob,intro.html,1,,
 8 | 1469977574,bob,intro.html,1,,
 9 | 1469977604,bob,intro.html,1,,
10 | 1469974604,cheryl,intro.html,11,,
11 | 1469974694,cheryl,intro.html,14,,
12 | 1469974724,cheryl,intro.html,15,,
13 | 1469974454,sue,advanced.html,24,,
14 | 1469974524,sue,advanced.html,25,,
15 | 1469974424,sue,advanced.html,23,FALSE,10
16 | 1469974554,sue,advanced.html,26,,
17 | 1469974624,sue,advanced.html,27,,
18 | 1469974654,sue,advanced.html,28,,5
19 | 1469974724,sue,advanced.html,29,,
20 | 1469974484,cheryl,intro.html,7,,
21 | 1469974514,cheryl,intro.html,8,,
22 | 1469974754,sue,advanced.html,30,,
23 | 1469974824,sue,advanced.html,31,,
24 | 1469974854,sue,advanced.html,32,,
25 | 1469974924,sue,advanced.html,33,,
26 | 1469977424,bob,intro.html,1,TRUE,10
27 | 1469977454,bob,intro.html,1,,
28 | 1469977484,bob,intro.html,1,,
29 | 1469977634,bob,intro.html,1,,
30 | 1469977664,bob,intro.html,1,,
31 | 1469974634,cheryl,intro.html,12,,
32 | 1469974664,cheryl,intro.html,13,,
33 | 1469977694,bob,intro.html,1,,
34 | 1469977724,bob,intro.html,1,,
35 | 


--------------------------------------------------------------------------------
/ClassNotebooks/olympics.csv:
--------------------------------------------------------------------------------
  1 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
  2 | ,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total
  3 | Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
  4 | Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
  5 | Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
  6 | Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
  7 | Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12
  8 | Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480
  9 | Austria (AUT),26,18,33,35,86,22,59,78,81,218,48,77,111,116,304
 10 | Azerbaijan (AZE),5,6,5,15,26,5,0,0,0,0,10,6,5,15,26
 11 | Bahamas (BAH),15,5,2,5,12,0,0,0,0,0,15,5,2,5,12
 12 | Bahrain (BRN),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1
 13 | Barbados (BAR) [BAR],11,0,0,1,1,0,0,0,0,0,11,0,0,1,1
 14 | Belarus (BLR),5,12,24,39,75,6,6,4,5,15,11,18,28,44,90
 15 | Belgium (BEL),25,37,52,53,142,20,1,1,3,5,45,38,53,56,147
 16 | Bermuda (BER),17,0,0,1,1,7,0,0,0,0,24,0,0,1,1
 17 | Bohemia (BOH) [BOH] [Z],3,0,1,3,4,0,0,0,0,0,3,0,1,3,4
 18 | Botswana (BOT),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1
 19 | Brazil (BRA),21,23,30,55,108,7,0,0,0,0,28,23,30,55,108
 20 | British West Indies (BWI) [BWI],1,0,0,2,2,0,0,0,0,0,1,0,0,2,2
 21 | Bulgaria (BUL) [H],19,51,85,78,214,19,1,2,3,6,38,52,87,81,220
 22 | Burundi (BDI),5,1,0,0,1,0,0,0,0,0,5,1,0,0,1
 23 | Cameroon (CMR),13,3,1,1,5,1,0,0,0,0,14,3,1,1,5
 24 | Canada (CAN),25,59,99,121,279,22,62,56,52,170,47,121,155,173,449
 25 | Chile (CHI) [I],22,2,7,4,13,16,0,0,0,0,38,2,7,4,13
 26 | China (CHN) [CHN],9,201,146,126,473,10,12,22,19,53,19,213,168,145,526
 27 | Colombia (COL),18,2,6,11,19,1,0,0,0,0,19,2,6,11,19
 28 | Costa Rica (CRC),14,1,1,2,4,6,0,0,0,0,20,1,1,2,4
 29 | Ivory Coast (CIV) [CIV],12,0,1,0,1,0,0,0,0,0,12,0,1,0,1
 30 | Croatia (CRO),6,6,7,10,23,7,4,6,1,11,13,10,13,11,34
 31 | Cuba (CUB) [Z],19,72,67,70,209,0,0,0,0,0,19,72,67,70,209
 32 | Cyprus (CYP),9,0,1,0,1,10,0,0,0,0,19,0,1,0,1
 33 | Czech Republic (CZE) [CZE],5,14,15,15,44,6,7,9,8,24,11,21,24,23,68
 34 | Czechoslovakia (TCH) [TCH],16,49,49,45,143,16,2,8,15,25,32,51,57,60,168
 35 | Denmark (DEN) [Z],26,43,68,68,179,13,0,1,0,1,39,43,69,68,180
 36 | Djibouti (DJI) [B],7,0,0,1,1,0,0,0,0,0,7,0,0,1,1
 37 | Dominican Republic (DOM),13,3,2,1,6,0,0,0,0,0,13,3,2,1,6
 38 | Ecuador (ECU),13,1,1,0,2,0,0,0,0,0,13,1,1,0,2
 39 | Egypt (EGY) [EGY] [Z],21,7,9,10,26,1,0,0,0,0,22,7,9,10,26
 40 | Eritrea (ERI),4,0,0,1,1,0,0,0,0,0,4,0,0,1,1
 41 | Estonia (EST),11,9,9,15,33,9,4,2,1,7,20,13,11,16,40
 42 | Ethiopia (ETH),12,21,7,17,45,2,0,0,0,0,14,21,7,17,45
 43 | Finland (FIN),24,101,84,117,302,22,42,62,57,161,46,143,146,174,463
 44 | France (FRA) [O] [P] [Z],27,202,223,246,671,22,31,31,47,109,49,233,254,293,780
 45 | Gabon (GAB),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1
 46 | Georgia (GEO),5,6,5,14,25,6,0,0,0,0,11,6,5,14,25
 47 | Germany (GER) [GER] [Z],15,174,182,217,573,11,78,78,53,209,26,252,260,270,782
 48 | United Team of Germany (EUA) [EUA],3,28,54,36,118,3,8,6,5,19,6,36,60,41,137
 49 | East Germany (GDR) [GDR],5,153,129,127,409,6,39,36,35,110,11,192,165,162,519
 50 | West Germany (FRG) [FRG],5,56,67,81,204,6,11,15,13,39,11,67,82,94,243
 51 | Ghana (GHA) [GHA],13,0,1,3,4,1,0,0,0,0,14,0,1,3,4
 52 | Great Britain (GBR) [GBR] [Z],27,236,272,272,780,22,10,4,12,26,49,246,276,284,806
 53 | Greece (GRE) [Z],27,30,42,39,111,18,0,0,0,0,45,30,42,39,111
 54 | Grenada (GRN),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1
 55 | Guatemala (GUA),13,0,1,0,1,1,0,0,0,0,14,0,1,0,1
 56 | Guyana (GUY) [GUY],16,0,0,1,1,0,0,0,0,0,16,0,0,1,1
 57 | Haiti (HAI) [J],14,0,1,1,2,0,0,0,0,0,14,0,1,1,2
 58 | Hong Kong (HKG) [HKG],15,1,1,1,3,4,0,0,0,0,19,1,1,1,3
 59 | Hungary (HUN),25,167,144,165,476,22,0,2,4,6,47,167,146,169,482
 60 | Iceland (ISL),19,0,2,2,4,17,0,0,0,0,36,0,2,2,4
 61 | India (IND) [F],23,9,6,11,26,9,0,0,0,0,32,9,6,11,26
 62 | Indonesia (INA),14,6,10,11,27,0,0,0,0,0,14,6,10,11,27
 63 | Iran (IRI) [K],15,15,20,25,60,10,0,0,0,0,25,15,20,25,60
 64 | Iraq (IRQ),13,0,0,1,1,0,0,0,0,0,13,0,0,1,1
 65 | Ireland (IRL),20,9,8,12,29,6,0,0,0,0,26,9,8,12,29
 66 | Israel (ISR),15,1,1,5,7,6,0,0,0,0,21,1,1,5,7
 67 | Italy (ITA) [M] [S],26,198,166,185,549,22,37,34,43,114,48,235,200,228,663
 68 | Jamaica (JAM) [JAM],16,17,30,20,67,7,0,0,0,0,23,17,30,20,67
 69 | Japan (JPN),21,130,126,142,398,20,10,17,18,45,41,140,143,160,443
 70 | Kazakhstan (KAZ),5,16,17,19,52,6,1,3,3,7,11,17,20,22,59
 71 | Kenya (KEN),13,25,32,29,86,3,0,0,0,0,16,25,32,29,86
 72 | North Korea (PRK),9,14,12,21,47,8,0,1,1,2,17,14,13,22,49
 73 | South Korea (KOR),16,81,82,80,243,17,26,17,10,53,33,107,99,90,296
 74 | Kuwait (KUW),12,0,0,2,2,0,0,0,0,0,12,0,0,2,2
 75 | Kyrgyzstan (KGZ),5,0,1,2,3,6,0,0,0,0,11,0,1,2,3
 76 | Latvia (LAT),10,3,11,5,19,10,0,4,3,7,20,3,15,8,26
 77 | Lebanon (LIB),16,0,2,2,4,16,0,0,0,0,32,0,2,2,4
 78 | Liechtenstein (LIE),16,0,0,0,0,18,2,2,5,9,34,2,2,5,9
 79 | Lithuania (LTU),8,6,5,10,21,8,0,0,0,0,16,6,5,10,21
 80 | Luxembourg (LUX) [O],22,1,1,0,2,8,0,2,0,2,30,1,3,0,4
 81 | Macedonia (MKD),5,0,0,1,1,5,0,0,0,0,10,0,0,1,1
 82 | Malaysia (MAS) [MAS],12,0,3,3,6,0,0,0,0,0,12,0,3,3,6
 83 | Mauritius (MRI),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1
 84 | Mexico (MEX),22,13,21,28,62,8,0,0,0,0,30,13,21,28,62
 85 | Moldova (MDA),5,0,2,5,7,6,0,0,0,0,11,0,2,5,7
 86 | Mongolia (MGL),12,2,9,13,24,13,0,0,0,0,25,2,9,13,24
 87 | Montenegro (MNE),2,0,1,0,1,2,0,0,0,0,4,0,1,0,1
 88 | Morocco (MAR),13,6,5,11,22,6,0,0,0,0,19,6,5,11,22
 89 | Mozambique (MOZ),9,1,0,1,2,0,0,0,0,0,9,1,0,1,2
 90 | Namibia (NAM),6,0,4,0,4,0,0,0,0,0,6,0,4,0,4
 91 | Netherlands (NED) [Z],25,77,85,104,266,20,37,38,35,110,45,114,123,139,376
 92 | Netherlands Antilles (AHO) [AHO] [I],13,0,1,0,1,2,0,0,0,0,15,0,1,0,1
 93 | New Zealand (NZL) [NZL],22,42,18,39,99,15,0,1,0,1,37,42,19,39,100
 94 | Niger (NIG),11,0,0,1,1,0,0,0,0,0,11,0,0,1,1
 95 | Nigeria (NGR),15,3,8,12,23,0,0,0,0,0,15,3,8,12,23
 96 | Norway (NOR) [Q],24,56,49,43,148,22,118,111,100,329,46,174,160,143,477
 97 | Pakistan (PAK),16,3,3,4,10,2,0,0,0,0,18,3,3,4,10
 98 | Panama (PAN),16,1,0,2,3,0,0,0,0,0,16,1,0,2,3
 99 | Paraguay (PAR),11,0,1,0,1,1,0,0,0,0,12,0,1,0,1
100 | Peru (PER) [L],17,1,3,0,4,2,0,0,0,0,19,1,3,0,4
101 | Philippines (PHI),20,0,2,7,9,4,0,0,0,0,24,0,2,7,9
102 | Poland (POL),20,64,82,125,271,22,6,7,7,20,42,70,89,132,291
103 | Portugal (POR),23,4,8,11,23,7,0,0,0,0,30,4,8,11,23
104 | Puerto Rico (PUR),17,0,2,6,8,6,0,0,0,0,23,0,2,6,8
105 | Qatar (QAT),8,0,0,4,4,0,0,0,0,0,8,0,0,4,4
106 | Romania (ROU),20,88,94,119,301,20,0,0,1,1,40,88,94,120,302
107 | Russia (RUS) [RUS],5,132,121,142,395,6,49,40,35,124,11,181,161,177,519
108 | Russian Empire (RU1) [RU1],3,1,4,3,8,0,0,0,0,0,3,1,4,3,8
109 | Soviet Union (URS) [URS],9,395,319,296,1010,9,78,57,59,194,18,473,376,355,1204
110 | Unified Team (EUN) [EUN],1,45,38,29,112,1,9,6,8,23,2,54,44,37,135
111 | Saudi Arabia (KSA),10,0,1,2,3,0,0,0,0,0,10,0,1,2,3
112 | Senegal (SEN),13,0,1,0,1,5,0,0,0,0,18,0,1,0,1
113 | Serbia (SRB) [SRB],3,1,2,4,7,2,0,0,0,0,5,1,2,4,7
114 | Serbia and Montenegro (SCG) [SCG],3,2,4,3,9,3,0,0,0,0,6,2,4,3,9
115 | Singapore (SIN),15,0,2,2,4,0,0,0,0,0,15,0,2,2,4
116 | Slovakia (SVK) [SVK],5,7,9,8,24,6,2,2,1,5,11,9,11,9,29
117 | Slovenia (SLO),6,4,6,9,19,7,2,4,9,15,13,6,10,18,34
118 | South Africa (RSA),18,23,26,27,76,6,0,0,0,0,24,23,26,27,76
119 | Spain (ESP) [Z],22,37,59,35,131,19,1,0,1,2,41,38,59,36,133
120 | Sri Lanka (SRI) [SRI],16,0,2,0,2,0,0,0,0,0,16,0,2,0,2
121 | Sudan (SUD),11,0,1,0,1,0,0,0,0,0,11,0,1,0,1
122 | Suriname (SUR) [E],11,1,0,1,2,0,0,0,0,0,11,1,0,1,2
123 | Sweden (SWE) [Z],26,143,164,176,483,22,50,40,54,144,48,193,204,230,627
124 | Switzerland (SUI),27,47,73,65,185,22,50,40,48,138,49,97,113,113,323
125 | Syria (SYR),12,1,1,1,3,0,0,0,0,0,12,1,1,1,3
126 | Chinese Taipei (TPE) [TPE] [TPE2],13,2,7,12,21,11,0,0,0,0,24,2,7,12,21
127 | Tajikistan (TJK),5,0,1,2,3,4,0,0,0,0,9,0,1,2,3
128 | Tanzania (TAN) [TAN],12,0,2,0,2,0,0,0,0,0,12,0,2,0,2
129 | Thailand (THA),15,7,6,11,24,3,0,0,0,0,18,7,6,11,24
130 | Togo (TOG),9,0,0,1,1,1,0,0,0,0,10,0,0,1,1
131 | Tonga (TGA),8,0,1,0,1,1,0,0,0,0,9,0,1,0,1
132 | Trinidad and Tobago (TRI) [TRI],16,2,5,11,18,3,0,0,0,0,19,2,5,11,18
133 | Tunisia (TUN),13,3,3,4,10,0,0,0,0,0,13,3,3,4,10
134 | Turkey (TUR),21,39,25,24,88,16,0,0,0,0,37,39,25,24,88
135 | Uganda (UGA),14,2,3,2,7,0,0,0,0,0,14,2,3,2,7
136 | Ukraine (UKR),5,33,27,55,115,6,2,1,4,7,11,35,28,59,122
137 | United Arab Emirates (UAE),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1
138 | United States (USA) [P] [Q] [R] [Z],26,976,757,666,2399,22,96,102,84,282,48,1072,859,750,2681
139 | Uruguay (URU),20,2,2,6,10,1,0,0,0,0,21,2,2,6,10
140 | Uzbekistan (UZB),5,5,5,10,20,6,1,0,0,1,11,6,5,10,21
141 | Venezuela (VEN),17,2,2,8,12,4,0,0,0,0,21,2,2,8,12
142 | Vietnam (VIE),14,0,2,0,2,0,0,0,0,0,14,0,2,0,2
143 | Virgin Islands (ISV),11,0,1,0,1,7,0,0,0,0,18,0,1,0,1
144 | Yugoslavia (YUG) [YUG],16,26,29,28,83,14,0,3,1,4,30,26,32,29,87
145 | Independent Olympic Participants (IOP) [IOP],1,0,1,2,3,0,0,0,0,0,1,0,1,2,3
146 | Zambia (ZAM) [ZAM],12,0,1,1,2,0,0,0,0,0,12,0,1,1,2
147 | Zimbabwe (ZIM) [ZIM],12,3,4,1,8,1,0,0,0,0,13,3,4,1,8
148 | Mixed team (ZZX) [ZZX],3,8,5,4,17,0,0,0,0,0,3,8,5,4,17
149 | Totals,27,4809,4775,5130,14714,22,959,958,948,2865,49,5768,5733,6078,17579
150 | 


--------------------------------------------------------------------------------
/ClassNotebooks/requirements.txt:
--------------------------------------------------------------------------------
  1 | alabaster==0.7.9
  2 | anaconda-clean==1.0
  3 | anaconda-client==1.5.1
  4 | anaconda-navigator==1.3.1
  5 | appnope==0.1.0
  6 | appscript==1.0.1
  7 | argcomplete==1.0.0
  8 | astroid==1.4.7
  9 | astropy==1.2.1
 10 | Babel==2.3.4
 11 | backports.shutil-get-terminal-size==1.0.0
 12 | beautifulsoup4==4.5.1
 13 | bitarray==0.8.1
 14 | blaze==0.10.1
 15 | bokeh==0.12.2
 16 | boto==2.42.0
 17 | Bottleneck==1.1.0
 18 | cffi==1.7.0
 19 | chest==0.2.3
 20 | click==6.6
 21 | cloudpickle==0.2.1
 22 | clyent==1.2.2
 23 | colorama==0.3.7
 24 | configobj==5.0.6
 25 | contextlib2==0.5.3
 26 | cryptography==1.5
 27 | cycler==0.10.0
 28 | Cython==0.24.1
 29 | cytoolz==0.8.0
 30 | dask==0.11.0
 31 | datashape==0.5.2
 32 | decorator==4.0.10
 33 | dill==0.2.5
 34 | docutils==0.12
 35 | dynd==0.7.3.dev1
 36 | et-xmlfile==1.0.1
 37 | fastcache==1.0.2
 38 | filelock==2.0.6
 39 | Flask==0.11.1
 40 | Flask-Cors==2.1.2
 41 | gevent==1.1.2
 42 | greenlet==0.4.10
 43 | h5py==2.6.0
 44 | HeapDict==1.0.0
 45 | idna==2.1
 46 | imagesize==0.7.1
 47 | ipykernel==4.5.0
 48 | ipython==5.1.0
 49 | ipython-genutils==0.1.0
 50 | ipywidgets==5.2.2
 51 | itsdangerous==0.24
 52 | jdcal==1.2
 53 | jedi==0.9.0
 54 | Jinja2==2.8
 55 | jsonschema==2.5.1
 56 | jupyter==1.0.0
 57 | jupyter-client==4.4.0
 58 | jupyter-console==5.0.0
 59 | jupyter-core==4.2.0
 60 | lazy-object-proxy==1.2.1
 61 | llvmlite==0.13.0
 62 | locket==0.2.0
 63 | lxml==3.6.4
 64 | MarkupSafe==0.23
 65 | matplotlib==1.5.3
 66 | mistune==0.7.3
 67 | mpmath==0.19
 68 | multipledispatch==0.4.8
 69 | nb-anacondacloud==1.2.0
 70 | nb-conda==2.0.0
 71 | nb-conda-kernels==2.0.0
 72 | nbconvert==4.2.0
 73 | nbformat==4.1.0
 74 | nbpresent==3.0.2
 75 | networkx==1.11
 76 | nltk==3.2.1
 77 | nose==1.3.7
 78 | notebook==4.2.3
 79 | numba==0.28.1
 80 | numexpr==2.6.1
 81 | numpy==1.11.1
 82 | odo==0.5.0
 83 | openpyxl==2.3.2
 84 | pandas==0.18.1
 85 | partd==0.3.6
 86 | path.py==0.0.0
 87 | pathlib2==2.1.0
 88 | patsy==0.4.1
 89 | pep8==1.7.0
 90 | pexpect==4.0.1
 91 | pickleshare==0.7.4
 92 | Pillow==3.3.1
 93 | pkginfo==1.3.2
 94 | ply==3.9
 95 | prompt-toolkit==1.0.3
 96 | psutil==4.3.1
 97 | ptyprocess==0.5.1
 98 | py==1.4.31
 99 | pyasn1==0.1.9
100 | pycosat==0.6.1
101 | pycparser==2.14
102 | pycrypto==2.6.1
103 | pycurl==7.43.0
104 | pyflakes==1.3.0
105 | Pygments==2.1.3
106 | pylint==1.5.4
107 | pyOpenSSL==16.0.0
108 | pyparsing==2.1.4
109 | pytest==2.9.2
110 | python-dateutil==2.5.3
111 | pytz==2016.6.1
112 | PyYAML==3.12
113 | pyzmq==15.4.0
114 | QtAwesome==0.3.3
115 | qtconsole==4.2.1
116 | QtPy==1.1.2
117 | redis==2.10.5
118 | requests==2.11.1
119 | rope-py3k==0.9.4.post1
120 | scikit-image==0.12.3
121 | scikit-learn==0.17.1
122 | scipy==0.18.1
123 | simplegeneric==0.8.1
124 | singledispatch==3.4.0.3
125 | six==1.10.0
126 | snowballstemmer==1.2.1
127 | sockjs-tornado==1.0.3
128 | Sphinx==1.4.6
129 | spyder==3.0.0
130 | SQLAlchemy==1.0.13
131 | statsmodels==0.6.1
132 | sympy==1.0
133 | tables==3.2.3.1
134 | terminado==0.6
135 | toolz==0.8.0
136 | tornado==4.4.1
137 | traitlets==4.3.0
138 | unicodecsv==0.14.1
139 | wcwidth==0.1.7
140 | Werkzeug==0.11.11
141 | widgetsnbextension==1.2.6
142 | wrapt==1.10.6
143 | xlrd==1.0.0
144 | XlsxWriter==0.9.3
145 | xlwings==0.10.0
146 | xlwt==1.1.2
147 | 


--------------------------------------------------------------------------------
/ClassNotebooks/scimagojr-3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/ClassNotebooks/scimagojr-3.xlsx


--------------------------------------------------------------------------------
/MyNotebooks/.ipynb_checkpoints/Python-Dates-and-Times-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Python Dates and Times"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Dates and times can be stored in many different ways.  \n",
 15 |     "The offset from the **Epoch** is one of the most common nethods for storing dates and time.  \n",
 16 |     "The Epoch is January 1, 1970. The measurements is usually the numer of miliseconds since this date.  "
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "In Python you can get the current time since the epoch using the time module."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 1,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import time as tm"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 2,
 40 |    "metadata": {
 41 |     "collapsed": false
 42 |    },
 43 |    "outputs": [
 44 |     {
 45 |      "data": {
 46 |       "text/plain": [
 47 |        "1478139144.00778"
 48 |       ]
 49 |      },
 50 |      "execution_count": 2,
 51 |      "metadata": {},
 52 |      "output_type": "execute_result"
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "tm.time()"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "You can create a timestamp using the `fromtimestamp()` function on the datetime object"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 3,
 69 |    "metadata": {
 70 |     "collapsed": true
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "import datetime as dt"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 4,
 80 |    "metadata": {
 81 |     "collapsed": false
 82 |    },
 83 |    "outputs": [
 84 |     {
 85 |      "data": {
 86 |       "text/plain": [
 87 |        "datetime.datetime(2016, 11, 2, 19, 12, 25, 640018)"
 88 |       ]
 89 |      },
 90 |      "execution_count": 4,
 91 |      "metadata": {},
 92 |      "output_type": "execute_result"
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "dtnow = dt.datetime.fromtimestamp(tm.time())\n",
 97 |     "dtnow"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "The datetime object has attributes to get the representative hour, day, seconds, etc"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "datetime objects allow for simple math using time deltas.  \n",
112 |     "This allows us to use a date and a time delta to find another date seperated by that delta."
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "Let's find the date 100 days before today using the `timedelta()` function in the datetime library.  "
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 5,
125 |    "metadata": {
126 |     "collapsed": false
127 |    },
128 |    "outputs": [
129 |     {
130 |      "data": {
131 |       "text/plain": [
132 |        "datetime.timedelta(100)"
133 |       ]
134 |      },
135 |      "execution_count": 5,
136 |      "metadata": {},
137 |      "output_type": "execute_result"
138 |     }
139 |    ],
140 |    "source": [
141 |     "delta = dt.timedelta(days = 100)\n",
142 |     "delta"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 6,
148 |    "metadata": {
149 |     "collapsed": true
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "today = dt.date.today()"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 7,
159 |    "metadata": {
160 |     "collapsed": false
161 |    },
162 |    "outputs": [
163 |     {
164 |      "data": {
165 |       "text/plain": [
166 |        "datetime.date(2016, 7, 25)"
167 |       ]
168 |      },
169 |      "execution_count": 7,
170 |      "metadata": {},
171 |      "output_type": "execute_result"
172 |     }
173 |    ],
174 |    "source": [
175 |     "today - delta"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "metadata": {},
181 |    "source": [
182 |     "Here we see that 100 days before today - _previously shown at 2016, 11, 2_ - is 2016, 7, 25"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "markdown",
187 |    "metadata": {},
188 |    "source": [
189 |     "We can also use conditionals as expected. Are timestamps equal to greater than less than etc, using are known conditional operators.  \n",
190 |     "For example: Today is certainly greater than 100 days ago we just computed (when measuring time since the Jan 1 1970 epoch)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 8,
196 |    "metadata": {
197 |     "collapsed": false
198 |    },
199 |    "outputs": [
200 |     {
201 |      "data": {
202 |       "text/plain": [
203 |        "True"
204 |       ]
205 |      },
206 |      "execution_count": 8,
207 |      "metadata": {},
208 |      "output_type": "execute_result"
209 |     }
210 |    ],
211 |    "source": [
212 |     "today > today-delta"
213 |    ]
214 |   }
215 |  ],
216 |  "metadata": {
217 |   "anaconda-cloud": {},
218 |   "kernelspec": {
219 |    "display_name": "Python [Py35]",
220 |    "language": "python",
221 |    "name": "Python [Py35]"
222 |   },
223 |   "language_info": {
224 |    "codemirror_mode": {
225 |     "name": "ipython",
226 |     "version": 3
227 |    },
228 |    "file_extension": ".py",
229 |    "mimetype": "text/x-python",
230 |    "name": "python",
231 |    "nbconvert_exporter": "python",
232 |    "pygments_lexer": "ipython3",
233 |    "version": "3.5.2"
234 |   }
235 |  },
236 |  "nbformat": 4,
237 |  "nbformat_minor": 0
238 | }
239 | 


--------------------------------------------------------------------------------
/MyNotebooks/.ipynb_checkpoints/ReadingAndWritingCSVfiles-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Reading and Writing CSV files in Python"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Reading a CSV file\n",
 15 |     "To open and read a CSV file, we will use the CSV package.  \n",
 16 |     "precision set to 2 allows full floating point math while only printing 2 decimal places for legibility.  \n",
 17 |     "Use `open('fileName')` to open **fileName** _from the current directory_.  \n",
 18 |     "Finally, view the first 3 elements of the file we just loaded."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [
 28 |     {
 29 |      "data": {
 30 |       "text/plain": [
 31 |        "[{'': '1',\n",
 32 |        "  'class': 'compact',\n",
 33 |        "  'cty': '18',\n",
 34 |        "  'cyl': '4',\n",
 35 |        "  'displ': '1.8',\n",
 36 |        "  'drv': 'f',\n",
 37 |        "  'fl': 'p',\n",
 38 |        "  'hwy': '29',\n",
 39 |        "  'manufacturer': 'audi',\n",
 40 |        "  'model': 'a4',\n",
 41 |        "  'trans': 'auto(l5)',\n",
 42 |        "  'year': '1999'},\n",
 43 |        " {'': '2',\n",
 44 |        "  'class': 'compact',\n",
 45 |        "  'cty': '21',\n",
 46 |        "  'cyl': '4',\n",
 47 |        "  'displ': '1.8',\n",
 48 |        "  'drv': 'f',\n",
 49 |        "  'fl': 'p',\n",
 50 |        "  'hwy': '29',\n",
 51 |        "  'manufacturer': 'audi',\n",
 52 |        "  'model': 'a4',\n",
 53 |        "  'trans': 'manual(m5)',\n",
 54 |        "  'year': '1999'},\n",
 55 |        " {'': '3',\n",
 56 |        "  'class': 'compact',\n",
 57 |        "  'cty': '20',\n",
 58 |        "  'cyl': '4',\n",
 59 |        "  'displ': '2',\n",
 60 |        "  'drv': 'f',\n",
 61 |        "  'fl': 'p',\n",
 62 |        "  'hwy': '31',\n",
 63 |        "  'manufacturer': 'audi',\n",
 64 |        "  'model': 'a4',\n",
 65 |        "  'trans': 'manual(m6)',\n",
 66 |        "  'year': '2008'}]"
 67 |       ]
 68 |      },
 69 |      "execution_count": 1,
 70 |      "metadata": {},
 71 |      "output_type": "execute_result"
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "import csv\n",
 76 |     "\n",
 77 |     "# set floatpoint precision for printing to 2\n",
 78 |     "%precision 2\n",
 79 |     "\n",
 80 |     "with open('mpg.csv') as csvfile:\n",
 81 |     "    mpg = list(csv.DictReader(csvfile))\n",
 82 |     "\n",
 83 |     "# view first 3 elements of list created from csv file\n",
 84 |     "mpg[:3]"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "Here we can see each element of this list is a car in a dict form, the keys for the dict corresponds to a column in the csv file.\n",
 92 |     "\n",
 93 |     "Lets see how many dicts - or cars - we have in our dataset"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 2,
 99 |    "metadata": {
100 |     "collapsed": false
101 |    },
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/plain": [
106 |        "234"
107 |       ]
108 |      },
109 |      "execution_count": 2,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "len(mpg)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "We have a dictionary for each of the 234 cars in the dataset.\n",
123 |     "\n",
124 |     "To extract just the column names, or the keys in each of these dictionaries we can use the `keys()` function on the first element in the mpg dataset since we are assuming each element has the same keys or _row names_"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 3,
130 |    "metadata": {
131 |     "collapsed": false
132 |    },
133 |    "outputs": [
134 |     {
135 |      "data": {
136 |       "text/plain": [
137 |        "dict_keys(['', 'manufacturer', 'class', 'year', 'cyl', 'hwy', 'model', 'fl', 'displ', 'cty', 'drv', 'trans'])"
138 |       ]
139 |      },
140 |      "execution_count": 3,
141 |      "metadata": {},
142 |      "output_type": "execute_result"
143 |     }
144 |    ],
145 |    "source": [
146 |     "mpg[0].keys()"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "Here we can see we have class, model, fuel type, cty, mpg, engine volume, front or rear wheel drive, highway mpg, manufacturer, model, year, and transmission type.\n",
154 |     "\n",
155 |     "**NOTE:** I happen to know this from working with the same __cars__ dataset in R. This can easily be found on google when looking at the values is not helping.\n",
156 |     "\n",
157 |     "Now let's try and find the average city MPG across all cars in our CSV file.\n",
158 |     "To do this we want to sum the value for the 'cty' key for each car in the set and divide by the number of cars used (which we already know the set to contain 234 cars).\n",
159 |     "\n",
160 |     "So that should look something like:  \n",
161 |     "sum (each value of cty in dataset mpg) / size (mpg)"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 4,
167 |    "metadata": {
168 |     "collapsed": false
169 |    },
170 |    "outputs": [
171 |     {
172 |      "data": {
173 |       "text/plain": [
174 |        "16.86"
175 |       ]
176 |      },
177 |      "execution_count": 4,
178 |      "metadata": {},
179 |      "output_type": "execute_result"
180 |     }
181 |    ],
182 |    "source": [
183 |     "sum(float(d['cty']) for d in mpg) / len(mpg)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "Now let's do the same thing for average hwy mpg across all cars in the dataset"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 5,
196 |    "metadata": {
197 |     "collapsed": false
198 |    },
199 |    "outputs": [
200 |     {
201 |      "data": {
202 |       "text/plain": [
203 |        "23.44"
204 |       ]
205 |      },
206 |      "execution_count": 5,
207 |      "metadata": {},
208 |      "output_type": "execute_result"
209 |     }
210 |    ],
211 |    "source": [
212 |     "sum(float(d['hwy']) for d in mpg) / len(mpg)"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {},
218 |    "source": [
219 |     "**Note:** the average mpg for highway is significantly better than for city.  \n",
220 |     "This makes sense, as cars get better gas milage on the highway that in the city."
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {
226 |     "collapsed": true
227 |    },
228 |    "source": [
229 |     "## Grouping\n",
230 |     "#### Find the average city mpg grouped by the number of cylinders a car has."
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "### Sets\n",
238 |     "**Sets** are lists with no duplicate entries.  \n",
239 |     "We can see how many unique values - or _Levels_ - for cylinders the cars in this dataset have by defining a `set()` from the entire list of all cyl values.  "
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 6,
245 |    "metadata": {
246 |     "collapsed": false
247 |    },
248 |    "outputs": [
249 |     {
250 |      "data": {
251 |       "text/plain": [
252 |        "{'4', '5', '6', '8'}"
253 |       ]
254 |      },
255 |      "execution_count": 6,
256 |      "metadata": {},
257 |      "output_type": "execute_result"
258 |     }
259 |    ],
260 |    "source": [
261 |     "cylinders = set(d['cyl'] for d in mpg)\n",
262 |     "cylinders"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {},
268 |    "source": [
269 |     "Here we see there are 4 unique levels for the cyl key: 4, 5, 6, and 8"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "markdown",
274 |    "metadata": {},
275 |    "source": [
276 |     "Now we can iterate across each of the cylinder levels,  \n",
277 |     "then iterate over all the dictionaries.  \n",
278 |     "If the level for the current dictionary matches the current cylinder being calculated,  \n",
279 |     "add the mpg to that cylinder's level summpg variable and increment the count in order to average the total.  \n",
280 |     "After doing through each dictionary in the CSV, we can compute the MPG calculation and append it to our list."
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": 7,
286 |    "metadata": {
287 |     "collapsed": false
288 |    },
289 |    "outputs": [
290 |     {
291 |      "data": {
292 |       "text/plain": [
293 |        "[('4', 21.01), ('5', 20.50), ('6', 16.22), ('8', 12.57)]"
294 |       ]
295 |      },
296 |      "execution_count": 7,
297 |      "metadata": {},
298 |      "output_type": "execute_result"
299 |     }
300 |    ],
301 |    "source": [
302 |     "# create an empty list to store calculations\n",
303 |     "CtyMpgByCyl = []\n",
304 |     "\n",
305 |     "# start with one cylinder level and iterate\n",
306 |     "for c in cylinders:\n",
307 |     "    summpg = 0\n",
308 |     "    cyltypecount = 0\n",
309 |     "    \n",
310 |     "    # though each dictionary checking for an equal level cyl value\n",
311 |     "    for d in mpg:\n",
312 |     "        # if a match is found, add cty to the sum and increase the count to compute the average\n",
313 |     "        if d['cyl'] == c:\n",
314 |     "            summpg += float(d['cty'])\n",
315 |     "            cyltypecount += 1\n",
316 |     "    # after iterating through all the dictionaries, append MPG calculation and go to the next cylinder level\n",
317 |     "    CtyMpgByCyl.append((c, summpg / cyltypecount))\n",
318 |     "\n",
319 |     "CtyMpgByCyl.sort(key=lambda x: x[0])\n",
320 |     "CtyMpgByCyl"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "markdown",
325 |    "metadata": {},
326 |    "source": [
327 |     "After sorting the list of calculations we see that as the number of cylinders increases, the city miles per gallon, `'cty'`, decreases.  \n",
328 |     "This makes sense, as we would expect a car with more cylinders to be larger and have poorer city fuel milage."
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "markdown",
333 |    "metadata": {},
334 |    "source": [
335 |     "#### Find the average highway MPG for the different vehicle classes"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "markdown",
340 |    "metadata": {},
341 |    "source": [
342 |     "First let's look at the different classes of vehicles in the CSV dataset _mpg_"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": 8,
348 |    "metadata": {
349 |     "collapsed": false
350 |    },
351 |    "outputs": [
352 |     {
353 |      "data": {
354 |       "text/plain": [
355 |        "{'2seater', 'compact', 'midsize', 'minivan', 'pickup', 'subcompact', 'suv'}"
356 |       ]
357 |      },
358 |      "execution_count": 8,
359 |      "metadata": {},
360 |      "output_type": "execute_result"
361 |     }
362 |    ],
363 |    "source": [
364 |     "vehicleclass = set(d['class'] for d in mpg)\n",
365 |     "vehicleclass"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "markdown",
370 |    "metadata": {},
371 |    "source": [
372 |     "Similarly, we iterate each vehicle class through all the dictionaries.  \n",
373 |     "Each match will add highway mpg to the sum total and increase the count.\n",
374 |     "After exhausting all the dictionaries for a given vehicle class, we can computer the average and append it to our list.   "
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "code",
379 |    "execution_count": 9,
380 |    "metadata": {
381 |     "collapsed": false
382 |    },
383 |    "outputs": [
384 |     {
385 |      "data": {
386 |       "text/plain": [
387 |        "[('pickup', 16.88),\n",
388 |        " ('suv', 18.13),\n",
389 |        " ('minivan', 22.36),\n",
390 |        " ('2seater', 24.80),\n",
391 |        " ('midsize', 27.29),\n",
392 |        " ('subcompact', 28.14),\n",
393 |        " ('compact', 28.30)]"
394 |       ]
395 |      },
396 |      "execution_count": 9,
397 |      "metadata": {},
398 |      "output_type": "execute_result"
399 |     }
400 |    ],
401 |    "source": [
402 |     "HwyMpgByClass = []\n",
403 |     "\n",
404 |     "for v in vehicleclass: # iterate over all the vehicle classes\n",
405 |     "    summpg = 0\n",
406 |     "    vclasscount = 0\n",
407 |     "    for d in mpg: # check each dictionary in the mpg dataset\n",
408 |     "        if d['class'] == v: # to find a match in class\n",
409 |     "            summpg += float(d['hwy'])\n",
410 |     "            vclasscount += 1 # increment the count\n",
411 |     "    HwyMpgByClass.append((v, summpg / vclasscount)) # append the tuple ('vehicle class', 'avg mpg')\n",
412 |     "    \n",
413 |     "HwyMpgByClass.sort(key=lambda x: x[1]) # this time, sort based on MPG - the second element in each tuple\n",
414 |     "HwyMpgByClass"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "markdown",
419 |    "metadata": {},
420 |    "source": [
421 |     "Here we have found the pickup to have the worst highway MPG while **the compact has the highest highway MPG.**"
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "markdown",
426 |    "metadata": {},
427 |    "source": [
428 |     "Do not despair or completely write off Python as an inefficient iterator of data for summarization.  \n",
429 |     "The **Pandas** library will bring in many of the tools and tricks us R thoroughbreds have come to rely upon for speedy exploration and summarization of a dataset with a few quick key strokes - well maybe a few extra, but much better than this spiraling mess."
430 |    ]
431 |   }
432 |  ],
433 |  "metadata": {
434 |   "anaconda-cloud": {},
435 |   "kernelspec": {
436 |    "display_name": "Python [Py35]",
437 |    "language": "python",
438 |    "name": "Python [Py35]"
439 |   },
440 |   "language_info": {
441 |    "codemirror_mode": {
442 |     "name": "ipython",
443 |     "version": 3
444 |    },
445 |    "file_extension": ".py",
446 |    "mimetype": "text/x-python",
447 |    "name": "python",
448 |    "nbconvert_exporter": "python",
449 |    "pygments_lexer": "ipython3",
450 |    "version": "3.5.2"
451 |   }
452 |  },
453 |  "nbformat": 4,
454 |  "nbformat_minor": 0
455 | }
456 | 


--------------------------------------------------------------------------------
/MyNotebooks/Energy Indicators.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/MyNotebooks/Energy Indicators.xls


--------------------------------------------------------------------------------
/MyNotebooks/Python-Dates-and-Times.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Python Dates and Times"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Dates and times can be stored in many different ways.  \n",
 15 |     "The offset from the **Epoch** is one of the most common nethods for storing dates and time.  \n",
 16 |     "The Epoch is January 1, 1970. The measurements is usually the numer of miliseconds since this date.  "
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "In Python you can get the current time since the epoch using the time module."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 1,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import time as tm"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 1,
 40 |    "metadata": {
 41 |     "collapsed": false
 42 |    },
 43 |    "outputs": [
 44 |     {
 45 |      "ename": "NameError",
 46 |      "evalue": "name 'tm' is not defined",
 47 |      "output_type": "error",
 48 |      "traceback": [
 49 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 50 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 51 |       "\u001b[0;32m<ipython-input-1-fa754c4909f4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 52 |       "\u001b[0;31mNameError\u001b[0m: name 'tm' is not defined"
 53 |      ]
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "tm.time()"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "You can create a timestamp using the `fromtimestamp()` function on the datetime object"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 3,
 70 |    "metadata": {
 71 |     "collapsed": true
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "import datetime as dt"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 4,
 81 |    "metadata": {
 82 |     "collapsed": false
 83 |    },
 84 |    "outputs": [
 85 |     {
 86 |      "data": {
 87 |       "text/plain": [
 88 |        "datetime.datetime(2016, 11, 2, 19, 12, 25, 640018)"
 89 |       ]
 90 |      },
 91 |      "execution_count": 4,
 92 |      "metadata": {},
 93 |      "output_type": "execute_result"
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "dtnow = dt.datetime.fromtimestamp(tm.time())\n",
 98 |     "dtnow"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "The datetime object has attributes to get the representative hour, day, seconds, etc"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "datetime objects allow for simple math using time deltas.  \n",
113 |     "This allows us to use a date and a time delta to find another date seperated by that delta."
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "Let's find the date 100 days before today using the `timedelta()` function in the datetime library.  "
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 5,
126 |    "metadata": {
127 |     "collapsed": false
128 |    },
129 |    "outputs": [
130 |     {
131 |      "data": {
132 |       "text/plain": [
133 |        "datetime.timedelta(100)"
134 |       ]
135 |      },
136 |      "execution_count": 5,
137 |      "metadata": {},
138 |      "output_type": "execute_result"
139 |     }
140 |    ],
141 |    "source": [
142 |     "delta = dt.timedelta(days = 100)\n",
143 |     "delta"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 6,
149 |    "metadata": {
150 |     "collapsed": true
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "today = dt.date.today()"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 7,
160 |    "metadata": {
161 |     "collapsed": false
162 |    },
163 |    "outputs": [
164 |     {
165 |      "data": {
166 |       "text/plain": [
167 |        "datetime.date(2016, 7, 25)"
168 |       ]
169 |      },
170 |      "execution_count": 7,
171 |      "metadata": {},
172 |      "output_type": "execute_result"
173 |     }
174 |    ],
175 |    "source": [
176 |     "today - delta"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "Here we see that 100 days before today - _previously shown at 2016, 11, 2_ - is 2016, 7, 25"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "We can also use conditionals as expected. Are timestamps equal to greater than less than etc, using are known conditional operators.  \n",
191 |     "For example: Today is certainly greater than 100 days ago we just computed (when measuring time since the Jan 1 1970 epoch)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 8,
197 |    "metadata": {
198 |     "collapsed": false
199 |    },
200 |    "outputs": [
201 |     {
202 |      "data": {
203 |       "text/plain": [
204 |        "True"
205 |       ]
206 |      },
207 |      "execution_count": 8,
208 |      "metadata": {},
209 |      "output_type": "execute_result"
210 |     }
211 |    ],
212 |    "source": [
213 |     "today > today-delta"
214 |    ]
215 |   }
216 |  ],
217 |  "metadata": {
218 |   "anaconda-cloud": {},
219 |   "kernelspec": {
220 |    "display_name": "Python [conda env:py35]",
221 |    "language": "python",
222 |    "name": "conda-env-py35-py"
223 |   },
224 |   "language_info": {
225 |    "codemirror_mode": {
226 |     "name": "ipython",
227 |     "version": 3
228 |    },
229 |    "file_extension": ".py",
230 |    "mimetype": "text/x-python",
231 |    "name": "python",
232 |    "nbconvert_exporter": "python",
233 |    "pygments_lexer": "ipython3",
234 |    "version": "3.5.2"
235 |   }
236 |  },
237 |  "nbformat": 4,
238 |  "nbformat_minor": 0
239 | }
240 | 


--------------------------------------------------------------------------------
/MyNotebooks/ReadingAndWritingCSVfiles.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Reading and Writing CSV files in Python"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Reading a CSV file\n",
 15 |     "To open and read a CSV file, we will use the CSV package.  \n",
 16 |     "precision set to 2 allows full floating point math while only printing 2 decimal places for legibility.  \n",
 17 |     "Use `open('fileName')` to open **fileName** _from the current directory_.  \n",
 18 |     "Finally, view the first 3 elements of the file we just loaded."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [
 28 |     {
 29 |      "data": {
 30 |       "text/plain": [
 31 |        "[{'': '1',\n",
 32 |        "  'class': 'compact',\n",
 33 |        "  'cty': '18',\n",
 34 |        "  'cyl': '4',\n",
 35 |        "  'displ': '1.8',\n",
 36 |        "  'drv': 'f',\n",
 37 |        "  'fl': 'p',\n",
 38 |        "  'hwy': '29',\n",
 39 |        "  'manufacturer': 'audi',\n",
 40 |        "  'model': 'a4',\n",
 41 |        "  'trans': 'auto(l5)',\n",
 42 |        "  'year': '1999'},\n",
 43 |        " {'': '2',\n",
 44 |        "  'class': 'compact',\n",
 45 |        "  'cty': '21',\n",
 46 |        "  'cyl': '4',\n",
 47 |        "  'displ': '1.8',\n",
 48 |        "  'drv': 'f',\n",
 49 |        "  'fl': 'p',\n",
 50 |        "  'hwy': '29',\n",
 51 |        "  'manufacturer': 'audi',\n",
 52 |        "  'model': 'a4',\n",
 53 |        "  'trans': 'manual(m5)',\n",
 54 |        "  'year': '1999'},\n",
 55 |        " {'': '3',\n",
 56 |        "  'class': 'compact',\n",
 57 |        "  'cty': '20',\n",
 58 |        "  'cyl': '4',\n",
 59 |        "  'displ': '2',\n",
 60 |        "  'drv': 'f',\n",
 61 |        "  'fl': 'p',\n",
 62 |        "  'hwy': '31',\n",
 63 |        "  'manufacturer': 'audi',\n",
 64 |        "  'model': 'a4',\n",
 65 |        "  'trans': 'manual(m6)',\n",
 66 |        "  'year': '2008'}]"
 67 |       ]
 68 |      },
 69 |      "execution_count": 1,
 70 |      "metadata": {},
 71 |      "output_type": "execute_result"
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "import csv\n",
 76 |     "\n",
 77 |     "# set floatpoint precision for printing to 2\n",
 78 |     "%precision 2\n",
 79 |     "\n",
 80 |     "with open('mpg.csv') as csvfile:\n",
 81 |     "    mpg = list(csv.DictReader(csvfile))\n",
 82 |     "\n",
 83 |     "# view first 3 elements of list created from csv file\n",
 84 |     "mpg[:3]"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "Here we can see each element of this list is a car in a dict form, the keys for the dict corresponds to a column in the csv file.\n",
 92 |     "\n",
 93 |     "Lets see how many dicts - or cars - we have in our dataset"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 2,
 99 |    "metadata": {
100 |     "collapsed": false
101 |    },
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/plain": [
106 |        "234"
107 |       ]
108 |      },
109 |      "execution_count": 2,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "len(mpg)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "We have a dictionary for each of the 234 cars in the dataset.\n",
123 |     "\n",
124 |     "To extract just the column names, or the keys in each of these dictionaries we can use the `keys()` function on the first element in the mpg dataset since we are assuming each element has the same keys or _row names_"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 3,
130 |    "metadata": {
131 |     "collapsed": false
132 |    },
133 |    "outputs": [
134 |     {
135 |      "data": {
136 |       "text/plain": [
137 |        "dict_keys(['', 'manufacturer', 'class', 'year', 'cyl', 'hwy', 'model', 'fl', 'displ', 'cty', 'drv', 'trans'])"
138 |       ]
139 |      },
140 |      "execution_count": 3,
141 |      "metadata": {},
142 |      "output_type": "execute_result"
143 |     }
144 |    ],
145 |    "source": [
146 |     "mpg[0].keys()"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "Here we can see we have class, model, fuel type, cty, mpg, engine volume, front or rear wheel drive, highway mpg, manufacturer, model, year, and transmission type.\n",
154 |     "\n",
155 |     "**NOTE:** I happen to know this from working with the same __cars__ dataset in R. This can easily be found on google when looking at the values is not helping.\n",
156 |     "\n",
157 |     "Now let's try and find the average city MPG across all cars in our CSV file.\n",
158 |     "To do this we want to sum the value for the 'cty' key for each car in the set and divide by the number of cars used (which we already know the set to contain 234 cars).\n",
159 |     "\n",
160 |     "So that should look something like:  \n",
161 |     "sum (each value of cty in dataset mpg) / size (mpg)"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 4,
167 |    "metadata": {
168 |     "collapsed": false
169 |    },
170 |    "outputs": [
171 |     {
172 |      "data": {
173 |       "text/plain": [
174 |        "16.86"
175 |       ]
176 |      },
177 |      "execution_count": 4,
178 |      "metadata": {},
179 |      "output_type": "execute_result"
180 |     }
181 |    ],
182 |    "source": [
183 |     "sum(float(d['cty']) for d in mpg) / len(mpg)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "Now let's do the same thing for average hwy mpg across all cars in the dataset"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 5,
196 |    "metadata": {
197 |     "collapsed": false
198 |    },
199 |    "outputs": [
200 |     {
201 |      "data": {
202 |       "text/plain": [
203 |        "23.44"
204 |       ]
205 |      },
206 |      "execution_count": 5,
207 |      "metadata": {},
208 |      "output_type": "execute_result"
209 |     }
210 |    ],
211 |    "source": [
212 |     "sum(float(d['hwy']) for d in mpg) / len(mpg)"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {},
218 |    "source": [
219 |     "**Note:** the average mpg for highway is significantly better than for city.  \n",
220 |     "This makes sense, as cars get better gas milage on the highway that in the city."
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {
226 |     "collapsed": true
227 |    },
228 |    "source": [
229 |     "## Grouping\n",
230 |     "#### Find the average city mpg grouped by the number of cylinders a car has."
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "### Sets\n",
238 |     "**Sets** are lists with no duplicate entries.  \n",
239 |     "We can see how many unique values - or _Levels_ - for cylinders the cars in this dataset have by defining a `set()` from the entire list of all cyl values.  "
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 6,
245 |    "metadata": {
246 |     "collapsed": false
247 |    },
248 |    "outputs": [
249 |     {
250 |      "data": {
251 |       "text/plain": [
252 |        "{'4', '5', '6', '8'}"
253 |       ]
254 |      },
255 |      "execution_count": 6,
256 |      "metadata": {},
257 |      "output_type": "execute_result"
258 |     }
259 |    ],
260 |    "source": [
261 |     "cylinders = set(d['cyl'] for d in mpg)\n",
262 |     "cylinders"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {},
268 |    "source": [
269 |     "Here we see there are 4 unique levels for the cyl key: 4, 5, 6, and 8"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "markdown",
274 |    "metadata": {},
275 |    "source": [
276 |     "Now we can iterate across each of the cylinder levels,  \n",
277 |     "then iterate over all the dictionaries.  \n",
278 |     "If the level for the current dictionary matches the current cylinder being calculated,  \n",
279 |     "add the mpg to that cylinder's level summpg variable and increment the count in order to average the total.  \n",
280 |     "After doing through each dictionary in the CSV, we can compute the MPG calculation and append it to our list."
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": 7,
286 |    "metadata": {
287 |     "collapsed": false
288 |    },
289 |    "outputs": [
290 |     {
291 |      "data": {
292 |       "text/plain": [
293 |        "[('4', 21.01), ('5', 20.50), ('6', 16.22), ('8', 12.57)]"
294 |       ]
295 |      },
296 |      "execution_count": 7,
297 |      "metadata": {},
298 |      "output_type": "execute_result"
299 |     }
300 |    ],
301 |    "source": [
302 |     "# create an empty list to store calculations\n",
303 |     "CtyMpgByCyl = []\n",
304 |     "\n",
305 |     "# start with one cylinder level and iterate\n",
306 |     "for c in cylinders:\n",
307 |     "    summpg = 0\n",
308 |     "    cyltypecount = 0\n",
309 |     "    \n",
310 |     "    # though each dictionary checking for an equal level cyl value\n",
311 |     "    for d in mpg:\n",
312 |     "        # if a match is found, add cty to the sum and increase the count to compute the average\n",
313 |     "        if d['cyl'] == c:\n",
314 |     "            summpg += float(d['cty'])\n",
315 |     "            cyltypecount += 1\n",
316 |     "    # after iterating through all the dictionaries, append MPG calculation and go to the next cylinder level\n",
317 |     "    CtyMpgByCyl.append((c, summpg / cyltypecount))\n",
318 |     "\n",
319 |     "CtyMpgByCyl.sort(key=lambda x: x[0])\n",
320 |     "CtyMpgByCyl"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "markdown",
325 |    "metadata": {},
326 |    "source": [
327 |     "After sorting the list of calculations we see that as the number of cylinders increases, the city miles per gallon, `'cty'`, decreases.  \n",
328 |     "This makes sense, as we would expect a car with more cylinders to be larger and have poorer city fuel milage."
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "markdown",
333 |    "metadata": {},
334 |    "source": [
335 |     "#### Find the average highway MPG for the different vehicle classes"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "markdown",
340 |    "metadata": {},
341 |    "source": [
342 |     "First let's look at the different classes of vehicles in the CSV dataset _mpg_"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": 8,
348 |    "metadata": {
349 |     "collapsed": false
350 |    },
351 |    "outputs": [
352 |     {
353 |      "data": {
354 |       "text/plain": [
355 |        "{'2seater', 'compact', 'midsize', 'minivan', 'pickup', 'subcompact', 'suv'}"
356 |       ]
357 |      },
358 |      "execution_count": 8,
359 |      "metadata": {},
360 |      "output_type": "execute_result"
361 |     }
362 |    ],
363 |    "source": [
364 |     "vehicleclass = set(d['class'] for d in mpg)\n",
365 |     "vehicleclass"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "markdown",
370 |    "metadata": {},
371 |    "source": [
372 |     "Similarly, we iterate each vehicle class through all the dictionaries.  \n",
373 |     "Each match will add highway mpg to the sum total and increase the count.\n",
374 |     "After exhausting all the dictionaries for a given vehicle class, we can computer the average and append it to our list.   "
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "code",
379 |    "execution_count": 9,
380 |    "metadata": {
381 |     "collapsed": false
382 |    },
383 |    "outputs": [
384 |     {
385 |      "data": {
386 |       "text/plain": [
387 |        "[('pickup', 16.88),\n",
388 |        " ('suv', 18.13),\n",
389 |        " ('minivan', 22.36),\n",
390 |        " ('2seater', 24.80),\n",
391 |        " ('midsize', 27.29),\n",
392 |        " ('subcompact', 28.14),\n",
393 |        " ('compact', 28.30)]"
394 |       ]
395 |      },
396 |      "execution_count": 9,
397 |      "metadata": {},
398 |      "output_type": "execute_result"
399 |     }
400 |    ],
401 |    "source": [
402 |     "HwyMpgByClass = []\n",
403 |     "\n",
404 |     "for v in vehicleclass: # iterate over all the vehicle classes\n",
405 |     "    summpg = 0\n",
406 |     "    vclasscount = 0\n",
407 |     "    for d in mpg: # check each dictionary in the mpg dataset\n",
408 |     "        if d['class'] == v: # to find a match in class\n",
409 |     "            summpg += float(d['hwy'])\n",
410 |     "            vclasscount += 1 # increment the count\n",
411 |     "    HwyMpgByClass.append((v, summpg / vclasscount)) # append the tuple ('vehicle class', 'avg mpg')\n",
412 |     "    \n",
413 |     "HwyMpgByClass.sort(key=lambda x: x[1]) # this time, sort based on MPG - the second element in each tuple\n",
414 |     "HwyMpgByClass"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "markdown",
419 |    "metadata": {},
420 |    "source": [
421 |     "Here we have found the pickup to have the worst highway MPG while **the compact has the highest highway MPG.**"
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "markdown",
426 |    "metadata": {},
427 |    "source": [
428 |     "Do not despair or completely write off Python as an inefficient iterator of data for summarization.  \n",
429 |     "The **Pandas** library will bring in many of the tools and tricks us R thoroughbreds have come to rely upon for speedy exploration and summarization of a dataset with a few quick key strokes - well maybe a few extra, but much better than this spiraling mess."
430 |    ]
431 |   }
432 |  ],
433 |  "metadata": {
434 |   "anaconda-cloud": {},
435 |   "kernelspec": {
436 |    "display_name": "Python [Py35]",
437 |    "language": "python",
438 |    "name": "Python [Py35]"
439 |   },
440 |   "language_info": {
441 |    "codemirror_mode": {
442 |     "name": "ipython",
443 |     "version": 3
444 |    },
445 |    "file_extension": ".py",
446 |    "mimetype": "text/x-python",
447 |    "name": "python",
448 |    "nbconvert_exporter": "python",
449 |    "pygments_lexer": "ipython3",
450 |    "version": "3.5.2"
451 |   }
452 |  },
453 |  "nbformat": 4,
454 |  "nbformat_minor": 0
455 | }
456 | 


--------------------------------------------------------------------------------
/MyNotebooks/cars.csv:
--------------------------------------------------------------------------------
 1 | YEAR,Make,Model,Size,(kW),Unnamed: 5,TYPE,CITY (kWh/100 km),HWY (kWh/100 km),COMB (kWh/100 km),CITY (Le/100 km),HWY (Le/100 km),COMB (Le/100 km),(g/km),RATING,(km),TIME (h)
 2 | 2012,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7
 3 | 2012,NISSAN,LEAF,MID-SIZE,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,0,n/a,117,7
 4 | 2013,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4
 5 | 2013,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7
 6 | 2013,NISSAN,LEAF,MID-SIZE,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,0,n/a,117,7
 7 | 2013,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
 8 | 2013,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
 9 | 2013,TESLA,MODEL S (40 kWh battery),FULL-SIZE,270,A1,B,22.4,21.9,22.2,2.5,2.5,2.5,0,n/a,224,6
10 | 2013,TESLA,MODEL S (60 kWh battery),FULL-SIZE,270,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10
11 | 2013,TESLA,MODEL S (85 kWh battery),FULL-SIZE,270,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
12 | 2013,TESLA,MODEL S PERFORMANCE,FULL-SIZE,310,A1,B,23.9,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
13 | 2014,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,n/a,131,7
14 | 2014,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4
15 | 2014,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7
16 | 2014,NISSAN,LEAF,MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,n/a,135,5
17 | 2014,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
18 | 2014,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
19 | 2014,TESLA,MODEL S (60 kWh battery),FULL-SIZE,225,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10
20 | 2014,TESLA,MODEL S (85 kWh battery),FULL-SIZE,270,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
21 | 2014,TESLA,MODEL S PERFORMANCE,FULL-SIZE,310,A1,B,23.9,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
22 | 2015,BMW,i3,SUBCOMPACT,125,A1,B,15.2,18.8,16.8,1.7,2.1,1.9,0,n/a,130,4
23 | 2015,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,n/a,131,7
24 | 2015,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4
25 | 2015,KIA,SOUL EV,STATION WAGON - SMALL,81,A1,B,17.5,22.7,19.9,2.0,2.6,2.2,0,n/a,149,4
26 | 2015,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7
27 | 2015,NISSAN,LEAF,MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,n/a,135,5
28 | 2015,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
29 | 2015,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
30 | 2015,TESLA,MODEL S (60 kWh battery),FULL-SIZE,283,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10
31 | 2015,TESLA,MODEL S (70 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,377,12
32 | 2015,TESLA,MODEL S (85/90 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
33 | 2015,TESLA,MODEL S 70D,FULL-SIZE,280,A1,B,20.8,20.6,20.7,2.3,2.3,2.3,0,n/a,386,12
34 | 2015,TESLA,MODEL S 85D/90D,FULL-SIZE,280,A1,B,22.0,19.8,21.0,2.5,2.2,2.4,0,n/a,435,12
35 | 2015,TESLA,MODEL S P85D/P90D,FULL-SIZE,515,A1,B,23.4,21.5,22.5,2.6,2.4,2.5,0,n/a,407,12
36 | 2016,BMW,i3,SUBCOMPACT,125,A1,B,15.2,18.8,16.8,1.7,2.1,1.9,0,10,130,4
37 | 2016,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,10,131,7
38 | 2016,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,10,122,4
39 | 2016,KIA,SOUL EV,STATION WAGON - SMALL,81,A1,B,17.5,22.7,19.9,2.0,2.6,2.2,0,10,149,4
40 | 2016,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,10,100,7
41 | 2016,NISSAN,LEAF (24 kWh battery),MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,10,135,5
42 | 2016,NISSAN,LEAF (30 kWh battery),MID-SIZE,80,A1,B,17.0,20.7,18.6,1.9,2.3,2.1,0,10,172,6
43 | 2016,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,10,109,8
44 | 2016,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,10,109,8
45 | 2016,TESLA,MODEL S (60 kWh battery),FULL-SIZE,283,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,10,335,10
46 | 2016,TESLA,MODEL S (70 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,10,377,12
47 | 2016,TESLA,MODEL S (85/90 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,10,426,12
48 | 2016,TESLA,MODEL S 70D,FULL-SIZE,386,A1,B,20.8,20.6,20.7,2.3,2.3,2.3,0,10,386,12
49 | 2016,TESLA,MODEL S 85D/90D,FULL-SIZE,386,A1,B,22.0,19.8,21.0,2.5,2.2,2.4,0,10,435,12
50 | 2016,TESLA,MODEL S 90D (Refresh),FULL-SIZE,386,A1,B,20.8,19.7,20.3,2.3,2.2,2.3,0,10,473,12
51 | 2016,TESLA,MODEL S P85D/P90D,FULL-SIZE,568,A1,B,23.4,21.5,22.5,2.6,2.4,2.5,0,10,407,12
52 | 2016,TESLA,MODEL S P90D (Refresh),FULL-SIZE,568,A1,B,22.9,21.0,22.1,2.6,2.4,2.5,0,10,435,12
53 | 2016,TESLA,MODEL X 90D,SUV - STANDARD,386,A1,B,23.2,22.2,22.7,2.6,2.5,2.6,0,10,414,12
54 | 2016,TESLA,MODEL X P90D,SUV - STANDARD,568,A1,B,23.6,23.3,23.5,2.7,2.6,2.6,0,10,402,12
55 | 


--------------------------------------------------------------------------------
/MyNotebooks/gdplev.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/MyNotebooks/gdplev.xls


--------------------------------------------------------------------------------
/MyNotebooks/log.csv:
--------------------------------------------------------------------------------
 1 | time,user,video,playback position,paused,volume
 2 | 1469974424,cheryl,intro.html,5,FALSE,10
 3 | 1469974454,cheryl,intro.html,6,,
 4 | 1469974544,cheryl,intro.html,9,,
 5 | 1469974574,cheryl,intro.html,10,,
 6 | 1469977514,bob,intro.html,1,,
 7 | 1469977544,bob,intro.html,1,,
 8 | 1469977574,bob,intro.html,1,,
 9 | 1469977604,bob,intro.html,1,,
10 | 1469974604,cheryl,intro.html,11,,
11 | 1469974694,cheryl,intro.html,14,,
12 | 1469974724,cheryl,intro.html,15,,
13 | 1469974454,sue,advanced.html,24,,
14 | 1469974524,sue,advanced.html,25,,
15 | 1469974424,sue,advanced.html,23,FALSE,10
16 | 1469974554,sue,advanced.html,26,,
17 | 1469974624,sue,advanced.html,27,,
18 | 1469974654,sue,advanced.html,28,,5
19 | 1469974724,sue,advanced.html,29,,
20 | 1469974484,cheryl,intro.html,7,,
21 | 1469974514,cheryl,intro.html,8,,
22 | 1469974754,sue,advanced.html,30,,
23 | 1469974824,sue,advanced.html,31,,
24 | 1469974854,sue,advanced.html,32,,
25 | 1469974924,sue,advanced.html,33,,
26 | 1469977424,bob,intro.html,1,TRUE,10
27 | 1469977454,bob,intro.html,1,,
28 | 1469977484,bob,intro.html,1,,
29 | 1469977634,bob,intro.html,1,,
30 | 1469977664,bob,intro.html,1,,
31 | 1469974634,cheryl,intro.html,12,,
32 | 1469974664,cheryl,intro.html,13,,
33 | 1469977694,bob,intro.html,1,,
34 | 1469977724,bob,intro.html,1,,
35 | 


--------------------------------------------------------------------------------
/MyNotebooks/log.txt:
--------------------------------------------------------------------------------
 1 | time,user,video,playback position,paused,volume
 2 | 1469974424,cheryl,intro.html,5,FALSE,10
 3 | 1469974454,cheryl,intro.html,6,,
 4 | 1469974544,cheryl,intro.html,9,,
 5 | 1469974574,cheryl,intro.html,10,,
 6 | 1469977514,bob,intro.html,1,,
 7 | 1469977544,bob,intro.html,1,,
 8 | 1469977574,bob,intro.html,1,,
 9 | 1469977604,bob,intro.html,1,,
10 | 1469974604,cheryl,intro.html,11,,
11 | 1469974694,cheryl,intro.html,14,,
12 | 1469974724,cheryl,intro.html,15,,
13 | 1469974454,sue,advanced.html,24,,
14 | 1469974524,sue,advanced.html,25,,
15 | 1469974424,sue,advanced.html,23,FALSE,10
16 | 1469974554,sue,advanced.html,26,,
17 | 1469974624,sue,advanced.html,27,,
18 | 1469974654,sue,advanced.html,28,,5
19 | 1469974724,sue,advanced.html,29,,
20 | 1469974484,cheryl,intro.html,7,,
21 | 1469974514,cheryl,intro.html,8,,
22 | 1469974754,sue,advanced.html,30,,
23 | 1469974824,sue,advanced.html,31,,
24 | 1469974854,sue,advanced.html,32,,
25 | 1469974924,sue,advanced.html,33,,
26 | 1469977424,bob,intro.html,1,TRUE,10
27 | 1469977454,bob,intro.html,1,,
28 | 1469977484,bob,intro.html,1,,
29 | 1469977634,bob,intro.html,1,,
30 | 1469977664,bob,intro.html,1,,
31 | 1469974634,cheryl,intro.html,12,,
32 | 1469974664,cheryl,intro.html,13,,
33 | 1469977694,bob,intro.html,1,,
34 | 1469977724,bob,intro.html,1,,
35 | 


--------------------------------------------------------------------------------
/MyNotebooks/mpg.csv:
--------------------------------------------------------------------------------
  1 | "","manufacturer","model","displ","year","cyl","trans","drv","cty","hwy","fl","class"
  2 | "1","audi","a4",1.8,1999,4,"auto(l5)","f",18,29,"p","compact"
  3 | "2","audi","a4",1.8,1999,4,"manual(m5)","f",21,29,"p","compact"
  4 | "3","audi","a4",2,2008,4,"manual(m6)","f",20,31,"p","compact"
  5 | "4","audi","a4",2,2008,4,"auto(av)","f",21,30,"p","compact"
  6 | "5","audi","a4",2.8,1999,6,"auto(l5)","f",16,26,"p","compact"
  7 | "6","audi","a4",2.8,1999,6,"manual(m5)","f",18,26,"p","compact"
  8 | "7","audi","a4",3.1,2008,6,"auto(av)","f",18,27,"p","compact"
  9 | "8","audi","a4 quattro",1.8,1999,4,"manual(m5)","4",18,26,"p","compact"
 10 | "9","audi","a4 quattro",1.8,1999,4,"auto(l5)","4",16,25,"p","compact"
 11 | "10","audi","a4 quattro",2,2008,4,"manual(m6)","4",20,28,"p","compact"
 12 | "11","audi","a4 quattro",2,2008,4,"auto(s6)","4",19,27,"p","compact"
 13 | "12","audi","a4 quattro",2.8,1999,6,"auto(l5)","4",15,25,"p","compact"
 14 | "13","audi","a4 quattro",2.8,1999,6,"manual(m5)","4",17,25,"p","compact"
 15 | "14","audi","a4 quattro",3.1,2008,6,"auto(s6)","4",17,25,"p","compact"
 16 | "15","audi","a4 quattro",3.1,2008,6,"manual(m6)","4",15,25,"p","compact"
 17 | "16","audi","a6 quattro",2.8,1999,6,"auto(l5)","4",15,24,"p","midsize"
 18 | "17","audi","a6 quattro",3.1,2008,6,"auto(s6)","4",17,25,"p","midsize"
 19 | "18","audi","a6 quattro",4.2,2008,8,"auto(s6)","4",16,23,"p","midsize"
 20 | "19","chevrolet","c1500 suburban 2wd",5.3,2008,8,"auto(l4)","r",14,20,"r","suv"
 21 | "20","chevrolet","c1500 suburban 2wd",5.3,2008,8,"auto(l4)","r",11,15,"e","suv"
 22 | "21","chevrolet","c1500 suburban 2wd",5.3,2008,8,"auto(l4)","r",14,20,"r","suv"
 23 | "22","chevrolet","c1500 suburban 2wd",5.7,1999,8,"auto(l4)","r",13,17,"r","suv"
 24 | "23","chevrolet","c1500 suburban 2wd",6,2008,8,"auto(l4)","r",12,17,"r","suv"
 25 | "24","chevrolet","corvette",5.7,1999,8,"manual(m6)","r",16,26,"p","2seater"
 26 | "25","chevrolet","corvette",5.7,1999,8,"auto(l4)","r",15,23,"p","2seater"
 27 | "26","chevrolet","corvette",6.2,2008,8,"manual(m6)","r",16,26,"p","2seater"
 28 | "27","chevrolet","corvette",6.2,2008,8,"auto(s6)","r",15,25,"p","2seater"
 29 | "28","chevrolet","corvette",7,2008,8,"manual(m6)","r",15,24,"p","2seater"
 30 | "29","chevrolet","k1500 tahoe 4wd",5.3,2008,8,"auto(l4)","4",14,19,"r","suv"
 31 | "30","chevrolet","k1500 tahoe 4wd",5.3,2008,8,"auto(l4)","4",11,14,"e","suv"
 32 | "31","chevrolet","k1500 tahoe 4wd",5.7,1999,8,"auto(l4)","4",11,15,"r","suv"
 33 | "32","chevrolet","k1500 tahoe 4wd",6.5,1999,8,"auto(l4)","4",14,17,"d","suv"
 34 | "33","chevrolet","malibu",2.4,1999,4,"auto(l4)","f",19,27,"r","midsize"
 35 | "34","chevrolet","malibu",2.4,2008,4,"auto(l4)","f",22,30,"r","midsize"
 36 | "35","chevrolet","malibu",3.1,1999,6,"auto(l4)","f",18,26,"r","midsize"
 37 | "36","chevrolet","malibu",3.5,2008,6,"auto(l4)","f",18,29,"r","midsize"
 38 | "37","chevrolet","malibu",3.6,2008,6,"auto(s6)","f",17,26,"r","midsize"
 39 | "38","dodge","caravan 2wd",2.4,1999,4,"auto(l3)","f",18,24,"r","minivan"
 40 | "39","dodge","caravan 2wd",3,1999,6,"auto(l4)","f",17,24,"r","minivan"
 41 | "40","dodge","caravan 2wd",3.3,1999,6,"auto(l4)","f",16,22,"r","minivan"
 42 | "41","dodge","caravan 2wd",3.3,1999,6,"auto(l4)","f",16,22,"r","minivan"
 43 | "42","dodge","caravan 2wd",3.3,2008,6,"auto(l4)","f",17,24,"r","minivan"
 44 | "43","dodge","caravan 2wd",3.3,2008,6,"auto(l4)","f",17,24,"r","minivan"
 45 | "44","dodge","caravan 2wd",3.3,2008,6,"auto(l4)","f",11,17,"e","minivan"
 46 | "45","dodge","caravan 2wd",3.8,1999,6,"auto(l4)","f",15,22,"r","minivan"
 47 | "46","dodge","caravan 2wd",3.8,1999,6,"auto(l4)","f",15,21,"r","minivan"
 48 | "47","dodge","caravan 2wd",3.8,2008,6,"auto(l6)","f",16,23,"r","minivan"
 49 | "48","dodge","caravan 2wd",4,2008,6,"auto(l6)","f",16,23,"r","minivan"
 50 | "49","dodge","dakota pickup 4wd",3.7,2008,6,"manual(m6)","4",15,19,"r","pickup"
 51 | "50","dodge","dakota pickup 4wd",3.7,2008,6,"auto(l4)","4",14,18,"r","pickup"
 52 | "51","dodge","dakota pickup 4wd",3.9,1999,6,"auto(l4)","4",13,17,"r","pickup"
 53 | "52","dodge","dakota pickup 4wd",3.9,1999,6,"manual(m5)","4",14,17,"r","pickup"
 54 | "53","dodge","dakota pickup 4wd",4.7,2008,8,"auto(l5)","4",14,19,"r","pickup"
 55 | "54","dodge","dakota pickup 4wd",4.7,2008,8,"auto(l5)","4",14,19,"r","pickup"
 56 | "55","dodge","dakota pickup 4wd",4.7,2008,8,"auto(l5)","4",9,12,"e","pickup"
 57 | "56","dodge","dakota pickup 4wd",5.2,1999,8,"manual(m5)","4",11,17,"r","pickup"
 58 | "57","dodge","dakota pickup 4wd",5.2,1999,8,"auto(l4)","4",11,15,"r","pickup"
 59 | "58","dodge","durango 4wd",3.9,1999,6,"auto(l4)","4",13,17,"r","suv"
 60 | "59","dodge","durango 4wd",4.7,2008,8,"auto(l5)","4",13,17,"r","suv"
 61 | "60","dodge","durango 4wd",4.7,2008,8,"auto(l5)","4",9,12,"e","suv"
 62 | "61","dodge","durango 4wd",4.7,2008,8,"auto(l5)","4",13,17,"r","suv"
 63 | "62","dodge","durango 4wd",5.2,1999,8,"auto(l4)","4",11,16,"r","suv"
 64 | "63","dodge","durango 4wd",5.7,2008,8,"auto(l5)","4",13,18,"r","suv"
 65 | "64","dodge","durango 4wd",5.9,1999,8,"auto(l4)","4",11,15,"r","suv"
 66 | "65","dodge","ram 1500 pickup 4wd",4.7,2008,8,"manual(m6)","4",12,16,"r","pickup"
 67 | "66","dodge","ram 1500 pickup 4wd",4.7,2008,8,"auto(l5)","4",9,12,"e","pickup"
 68 | "67","dodge","ram 1500 pickup 4wd",4.7,2008,8,"auto(l5)","4",13,17,"r","pickup"
 69 | "68","dodge","ram 1500 pickup 4wd",4.7,2008,8,"auto(l5)","4",13,17,"r","pickup"
 70 | "69","dodge","ram 1500 pickup 4wd",4.7,2008,8,"manual(m6)","4",12,16,"r","pickup"
 71 | "70","dodge","ram 1500 pickup 4wd",4.7,2008,8,"manual(m6)","4",9,12,"e","pickup"
 72 | "71","dodge","ram 1500 pickup 4wd",5.2,1999,8,"auto(l4)","4",11,15,"r","pickup"
 73 | "72","dodge","ram 1500 pickup 4wd",5.2,1999,8,"manual(m5)","4",11,16,"r","pickup"
 74 | "73","dodge","ram 1500 pickup 4wd",5.7,2008,8,"auto(l5)","4",13,17,"r","pickup"
 75 | "74","dodge","ram 1500 pickup 4wd",5.9,1999,8,"auto(l4)","4",11,15,"r","pickup"
 76 | "75","ford","expedition 2wd",4.6,1999,8,"auto(l4)","r",11,17,"r","suv"
 77 | "76","ford","expedition 2wd",5.4,1999,8,"auto(l4)","r",11,17,"r","suv"
 78 | "77","ford","expedition 2wd",5.4,2008,8,"auto(l6)","r",12,18,"r","suv"
 79 | "78","ford","explorer 4wd",4,1999,6,"auto(l5)","4",14,17,"r","suv"
 80 | "79","ford","explorer 4wd",4,1999,6,"manual(m5)","4",15,19,"r","suv"
 81 | "80","ford","explorer 4wd",4,1999,6,"auto(l5)","4",14,17,"r","suv"
 82 | "81","ford","explorer 4wd",4,2008,6,"auto(l5)","4",13,19,"r","suv"
 83 | "82","ford","explorer 4wd",4.6,2008,8,"auto(l6)","4",13,19,"r","suv"
 84 | "83","ford","explorer 4wd",5,1999,8,"auto(l4)","4",13,17,"r","suv"
 85 | "84","ford","f150 pickup 4wd",4.2,1999,6,"auto(l4)","4",14,17,"r","pickup"
 86 | "85","ford","f150 pickup 4wd",4.2,1999,6,"manual(m5)","4",14,17,"r","pickup"
 87 | "86","ford","f150 pickup 4wd",4.6,1999,8,"manual(m5)","4",13,16,"r","pickup"
 88 | "87","ford","f150 pickup 4wd",4.6,1999,8,"auto(l4)","4",13,16,"r","pickup"
 89 | "88","ford","f150 pickup 4wd",4.6,2008,8,"auto(l4)","4",13,17,"r","pickup"
 90 | "89","ford","f150 pickup 4wd",5.4,1999,8,"auto(l4)","4",11,15,"r","pickup"
 91 | "90","ford","f150 pickup 4wd",5.4,2008,8,"auto(l4)","4",13,17,"r","pickup"
 92 | "91","ford","mustang",3.8,1999,6,"manual(m5)","r",18,26,"r","subcompact"
 93 | "92","ford","mustang",3.8,1999,6,"auto(l4)","r",18,25,"r","subcompact"
 94 | "93","ford","mustang",4,2008,6,"manual(m5)","r",17,26,"r","subcompact"
 95 | "94","ford","mustang",4,2008,6,"auto(l5)","r",16,24,"r","subcompact"
 96 | "95","ford","mustang",4.6,1999,8,"auto(l4)","r",15,21,"r","subcompact"
 97 | "96","ford","mustang",4.6,1999,8,"manual(m5)","r",15,22,"r","subcompact"
 98 | "97","ford","mustang",4.6,2008,8,"manual(m5)","r",15,23,"r","subcompact"
 99 | "98","ford","mustang",4.6,2008,8,"auto(l5)","r",15,22,"r","subcompact"
100 | "99","ford","mustang",5.4,2008,8,"manual(m6)","r",14,20,"p","subcompact"
101 | "100","honda","civic",1.6,1999,4,"manual(m5)","f",28,33,"r","subcompact"
102 | "101","honda","civic",1.6,1999,4,"auto(l4)","f",24,32,"r","subcompact"
103 | "102","honda","civic",1.6,1999,4,"manual(m5)","f",25,32,"r","subcompact"
104 | "103","honda","civic",1.6,1999,4,"manual(m5)","f",23,29,"p","subcompact"
105 | "104","honda","civic",1.6,1999,4,"auto(l4)","f",24,32,"r","subcompact"
106 | "105","honda","civic",1.8,2008,4,"manual(m5)","f",26,34,"r","subcompact"
107 | "106","honda","civic",1.8,2008,4,"auto(l5)","f",25,36,"r","subcompact"
108 | "107","honda","civic",1.8,2008,4,"auto(l5)","f",24,36,"c","subcompact"
109 | "108","honda","civic",2,2008,4,"manual(m6)","f",21,29,"p","subcompact"
110 | "109","hyundai","sonata",2.4,1999,4,"auto(l4)","f",18,26,"r","midsize"
111 | "110","hyundai","sonata",2.4,1999,4,"manual(m5)","f",18,27,"r","midsize"
112 | "111","hyundai","sonata",2.4,2008,4,"auto(l4)","f",21,30,"r","midsize"
113 | "112","hyundai","sonata",2.4,2008,4,"manual(m5)","f",21,31,"r","midsize"
114 | "113","hyundai","sonata",2.5,1999,6,"auto(l4)","f",18,26,"r","midsize"
115 | "114","hyundai","sonata",2.5,1999,6,"manual(m5)","f",18,26,"r","midsize"
116 | "115","hyundai","sonata",3.3,2008,6,"auto(l5)","f",19,28,"r","midsize"
117 | "116","hyundai","tiburon",2,1999,4,"auto(l4)","f",19,26,"r","subcompact"
118 | "117","hyundai","tiburon",2,1999,4,"manual(m5)","f",19,29,"r","subcompact"
119 | "118","hyundai","tiburon",2,2008,4,"manual(m5)","f",20,28,"r","subcompact"
120 | "119","hyundai","tiburon",2,2008,4,"auto(l4)","f",20,27,"r","subcompact"
121 | "120","hyundai","tiburon",2.7,2008,6,"auto(l4)","f",17,24,"r","subcompact"
122 | "121","hyundai","tiburon",2.7,2008,6,"manual(m6)","f",16,24,"r","subcompact"
123 | "122","hyundai","tiburon",2.7,2008,6,"manual(m5)","f",17,24,"r","subcompact"
124 | "123","jeep","grand cherokee 4wd",3,2008,6,"auto(l5)","4",17,22,"d","suv"
125 | "124","jeep","grand cherokee 4wd",3.7,2008,6,"auto(l5)","4",15,19,"r","suv"
126 | "125","jeep","grand cherokee 4wd",4,1999,6,"auto(l4)","4",15,20,"r","suv"
127 | "126","jeep","grand cherokee 4wd",4.7,1999,8,"auto(l4)","4",14,17,"r","suv"
128 | "127","jeep","grand cherokee 4wd",4.7,2008,8,"auto(l5)","4",9,12,"e","suv"
129 | "128","jeep","grand cherokee 4wd",4.7,2008,8,"auto(l5)","4",14,19,"r","suv"
130 | "129","jeep","grand cherokee 4wd",5.7,2008,8,"auto(l5)","4",13,18,"r","suv"
131 | "130","jeep","grand cherokee 4wd",6.1,2008,8,"auto(l5)","4",11,14,"p","suv"
132 | "131","land rover","range rover",4,1999,8,"auto(l4)","4",11,15,"p","suv"
133 | "132","land rover","range rover",4.2,2008,8,"auto(s6)","4",12,18,"r","suv"
134 | "133","land rover","range rover",4.4,2008,8,"auto(s6)","4",12,18,"r","suv"
135 | "134","land rover","range rover",4.6,1999,8,"auto(l4)","4",11,15,"p","suv"
136 | "135","lincoln","navigator 2wd",5.4,1999,8,"auto(l4)","r",11,17,"r","suv"
137 | "136","lincoln","navigator 2wd",5.4,1999,8,"auto(l4)","r",11,16,"p","suv"
138 | "137","lincoln","navigator 2wd",5.4,2008,8,"auto(l6)","r",12,18,"r","suv"
139 | "138","mercury","mountaineer 4wd",4,1999,6,"auto(l5)","4",14,17,"r","suv"
140 | "139","mercury","mountaineer 4wd",4,2008,6,"auto(l5)","4",13,19,"r","suv"
141 | "140","mercury","mountaineer 4wd",4.6,2008,8,"auto(l6)","4",13,19,"r","suv"
142 | "141","mercury","mountaineer 4wd",5,1999,8,"auto(l4)","4",13,17,"r","suv"
143 | "142","nissan","altima",2.4,1999,4,"manual(m5)","f",21,29,"r","compact"
144 | "143","nissan","altima",2.4,1999,4,"auto(l4)","f",19,27,"r","compact"
145 | "144","nissan","altima",2.5,2008,4,"auto(av)","f",23,31,"r","midsize"
146 | "145","nissan","altima",2.5,2008,4,"manual(m6)","f",23,32,"r","midsize"
147 | "146","nissan","altima",3.5,2008,6,"manual(m6)","f",19,27,"p","midsize"
148 | "147","nissan","altima",3.5,2008,6,"auto(av)","f",19,26,"p","midsize"
149 | "148","nissan","maxima",3,1999,6,"auto(l4)","f",18,26,"r","midsize"
150 | "149","nissan","maxima",3,1999,6,"manual(m5)","f",19,25,"r","midsize"
151 | "150","nissan","maxima",3.5,2008,6,"auto(av)","f",19,25,"p","midsize"
152 | "151","nissan","pathfinder 4wd",3.3,1999,6,"auto(l4)","4",14,17,"r","suv"
153 | "152","nissan","pathfinder 4wd",3.3,1999,6,"manual(m5)","4",15,17,"r","suv"
154 | "153","nissan","pathfinder 4wd",4,2008,6,"auto(l5)","4",14,20,"p","suv"
155 | "154","nissan","pathfinder 4wd",5.6,2008,8,"auto(s5)","4",12,18,"p","suv"
156 | "155","pontiac","grand prix",3.1,1999,6,"auto(l4)","f",18,26,"r","midsize"
157 | "156","pontiac","grand prix",3.8,1999,6,"auto(l4)","f",16,26,"p","midsize"
158 | "157","pontiac","grand prix",3.8,1999,6,"auto(l4)","f",17,27,"r","midsize"
159 | "158","pontiac","grand prix",3.8,2008,6,"auto(l4)","f",18,28,"r","midsize"
160 | "159","pontiac","grand prix",5.3,2008,8,"auto(s4)","f",16,25,"p","midsize"
161 | "160","subaru","forester awd",2.5,1999,4,"manual(m5)","4",18,25,"r","suv"
162 | "161","subaru","forester awd",2.5,1999,4,"auto(l4)","4",18,24,"r","suv"
163 | "162","subaru","forester awd",2.5,2008,4,"manual(m5)","4",20,27,"r","suv"
164 | "163","subaru","forester awd",2.5,2008,4,"manual(m5)","4",19,25,"p","suv"
165 | "164","subaru","forester awd",2.5,2008,4,"auto(l4)","4",20,26,"r","suv"
166 | "165","subaru","forester awd",2.5,2008,4,"auto(l4)","4",18,23,"p","suv"
167 | "166","subaru","impreza awd",2.2,1999,4,"auto(l4)","4",21,26,"r","subcompact"
168 | "167","subaru","impreza awd",2.2,1999,4,"manual(m5)","4",19,26,"r","subcompact"
169 | "168","subaru","impreza awd",2.5,1999,4,"manual(m5)","4",19,26,"r","subcompact"
170 | "169","subaru","impreza awd",2.5,1999,4,"auto(l4)","4",19,26,"r","subcompact"
171 | "170","subaru","impreza awd",2.5,2008,4,"auto(s4)","4",20,25,"p","compact"
172 | "171","subaru","impreza awd",2.5,2008,4,"auto(s4)","4",20,27,"r","compact"
173 | "172","subaru","impreza awd",2.5,2008,4,"manual(m5)","4",19,25,"p","compact"
174 | "173","subaru","impreza awd",2.5,2008,4,"manual(m5)","4",20,27,"r","compact"
175 | "174","toyota","4runner 4wd",2.7,1999,4,"manual(m5)","4",15,20,"r","suv"
176 | "175","toyota","4runner 4wd",2.7,1999,4,"auto(l4)","4",16,20,"r","suv"
177 | "176","toyota","4runner 4wd",3.4,1999,6,"auto(l4)","4",15,19,"r","suv"
178 | "177","toyota","4runner 4wd",3.4,1999,6,"manual(m5)","4",15,17,"r","suv"
179 | "178","toyota","4runner 4wd",4,2008,6,"auto(l5)","4",16,20,"r","suv"
180 | "179","toyota","4runner 4wd",4.7,2008,8,"auto(l5)","4",14,17,"r","suv"
181 | "180","toyota","camry",2.2,1999,4,"manual(m5)","f",21,29,"r","midsize"
182 | "181","toyota","camry",2.2,1999,4,"auto(l4)","f",21,27,"r","midsize"
183 | "182","toyota","camry",2.4,2008,4,"manual(m5)","f",21,31,"r","midsize"
184 | "183","toyota","camry",2.4,2008,4,"auto(l5)","f",21,31,"r","midsize"
185 | "184","toyota","camry",3,1999,6,"auto(l4)","f",18,26,"r","midsize"
186 | "185","toyota","camry",3,1999,6,"manual(m5)","f",18,26,"r","midsize"
187 | "186","toyota","camry",3.5,2008,6,"auto(s6)","f",19,28,"r","midsize"
188 | "187","toyota","camry solara",2.2,1999,4,"auto(l4)","f",21,27,"r","compact"
189 | "188","toyota","camry solara",2.2,1999,4,"manual(m5)","f",21,29,"r","compact"
190 | "189","toyota","camry solara",2.4,2008,4,"manual(m5)","f",21,31,"r","compact"
191 | "190","toyota","camry solara",2.4,2008,4,"auto(s5)","f",22,31,"r","compact"
192 | "191","toyota","camry solara",3,1999,6,"auto(l4)","f",18,26,"r","compact"
193 | "192","toyota","camry solara",3,1999,6,"manual(m5)","f",18,26,"r","compact"
194 | "193","toyota","camry solara",3.3,2008,6,"auto(s5)","f",18,27,"r","compact"
195 | "194","toyota","corolla",1.8,1999,4,"auto(l3)","f",24,30,"r","compact"
196 | "195","toyota","corolla",1.8,1999,4,"auto(l4)","f",24,33,"r","compact"
197 | "196","toyota","corolla",1.8,1999,4,"manual(m5)","f",26,35,"r","compact"
198 | "197","toyota","corolla",1.8,2008,4,"manual(m5)","f",28,37,"r","compact"
199 | "198","toyota","corolla",1.8,2008,4,"auto(l4)","f",26,35,"r","compact"
200 | "199","toyota","land cruiser wagon 4wd",4.7,1999,8,"auto(l4)","4",11,15,"r","suv"
201 | "200","toyota","land cruiser wagon 4wd",5.7,2008,8,"auto(s6)","4",13,18,"r","suv"
202 | "201","toyota","toyota tacoma 4wd",2.7,1999,4,"manual(m5)","4",15,20,"r","pickup"
203 | "202","toyota","toyota tacoma 4wd",2.7,1999,4,"auto(l4)","4",16,20,"r","pickup"
204 | "203","toyota","toyota tacoma 4wd",2.7,2008,4,"manual(m5)","4",17,22,"r","pickup"
205 | "204","toyota","toyota tacoma 4wd",3.4,1999,6,"manual(m5)","4",15,17,"r","pickup"
206 | "205","toyota","toyota tacoma 4wd",3.4,1999,6,"auto(l4)","4",15,19,"r","pickup"
207 | "206","toyota","toyota tacoma 4wd",4,2008,6,"manual(m6)","4",15,18,"r","pickup"
208 | "207","toyota","toyota tacoma 4wd",4,2008,6,"auto(l5)","4",16,20,"r","pickup"
209 | "208","volkswagen","gti",2,1999,4,"manual(m5)","f",21,29,"r","compact"
210 | "209","volkswagen","gti",2,1999,4,"auto(l4)","f",19,26,"r","compact"
211 | "210","volkswagen","gti",2,2008,4,"manual(m6)","f",21,29,"p","compact"
212 | "211","volkswagen","gti",2,2008,4,"auto(s6)","f",22,29,"p","compact"
213 | "212","volkswagen","gti",2.8,1999,6,"manual(m5)","f",17,24,"r","compact"
214 | "213","volkswagen","jetta",1.9,1999,4,"manual(m5)","f",33,44,"d","compact"
215 | "214","volkswagen","jetta",2,1999,4,"manual(m5)","f",21,29,"r","compact"
216 | "215","volkswagen","jetta",2,1999,4,"auto(l4)","f",19,26,"r","compact"
217 | "216","volkswagen","jetta",2,2008,4,"auto(s6)","f",22,29,"p","compact"
218 | "217","volkswagen","jetta",2,2008,4,"manual(m6)","f",21,29,"p","compact"
219 | "218","volkswagen","jetta",2.5,2008,5,"auto(s6)","f",21,29,"r","compact"
220 | "219","volkswagen","jetta",2.5,2008,5,"manual(m5)","f",21,29,"r","compact"
221 | "220","volkswagen","jetta",2.8,1999,6,"auto(l4)","f",16,23,"r","compact"
222 | "221","volkswagen","jetta",2.8,1999,6,"manual(m5)","f",17,24,"r","compact"
223 | "222","volkswagen","new beetle",1.9,1999,4,"manual(m5)","f",35,44,"d","subcompact"
224 | "223","volkswagen","new beetle",1.9,1999,4,"auto(l4)","f",29,41,"d","subcompact"
225 | "224","volkswagen","new beetle",2,1999,4,"manual(m5)","f",21,29,"r","subcompact"
226 | "225","volkswagen","new beetle",2,1999,4,"auto(l4)","f",19,26,"r","subcompact"
227 | "226","volkswagen","new beetle",2.5,2008,5,"manual(m5)","f",20,28,"r","subcompact"
228 | "227","volkswagen","new beetle",2.5,2008,5,"auto(s6)","f",20,29,"r","subcompact"
229 | "228","volkswagen","passat",1.8,1999,4,"manual(m5)","f",21,29,"p","midsize"
230 | "229","volkswagen","passat",1.8,1999,4,"auto(l5)","f",18,29,"p","midsize"
231 | "230","volkswagen","passat",2,2008,4,"auto(s6)","f",19,28,"p","midsize"
232 | "231","volkswagen","passat",2,2008,4,"manual(m6)","f",21,29,"p","midsize"
233 | "232","volkswagen","passat",2.8,1999,6,"auto(l5)","f",16,26,"p","midsize"
234 | "233","volkswagen","passat",2.8,1999,6,"manual(m5)","f",18,26,"p","midsize"
235 | "234","volkswagen","passat",3.6,2008,6,"auto(s6)","f",17,26,"p","midsize"
236 | 


--------------------------------------------------------------------------------
/MyNotebooks/olympics.csv:
--------------------------------------------------------------------------------
  1 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
  2 | ,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total
  3 | Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
  4 | Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
  5 | Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
  6 | Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
  7 | Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12
  8 | Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480
  9 | Austria (AUT),26,18,33,35,86,22,59,78,81,218,48,77,111,116,304
 10 | Azerbaijan (AZE),5,6,5,15,26,5,0,0,0,0,10,6,5,15,26
 11 | Bahamas (BAH),15,5,2,5,12,0,0,0,0,0,15,5,2,5,12
 12 | Bahrain (BRN),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1
 13 | Barbados (BAR) [BAR],11,0,0,1,1,0,0,0,0,0,11,0,0,1,1
 14 | Belarus (BLR),5,12,24,39,75,6,6,4,5,15,11,18,28,44,90
 15 | Belgium (BEL),25,37,52,53,142,20,1,1,3,5,45,38,53,56,147
 16 | Bermuda (BER),17,0,0,1,1,7,0,0,0,0,24,0,0,1,1
 17 | Bohemia (BOH) [BOH] [Z],3,0,1,3,4,0,0,0,0,0,3,0,1,3,4
 18 | Botswana (BOT),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1
 19 | Brazil (BRA),21,23,30,55,108,7,0,0,0,0,28,23,30,55,108
 20 | British West Indies (BWI) [BWI],1,0,0,2,2,0,0,0,0,0,1,0,0,2,2
 21 | Bulgaria (BUL) [H],19,51,85,78,214,19,1,2,3,6,38,52,87,81,220
 22 | Burundi (BDI),5,1,0,0,1,0,0,0,0,0,5,1,0,0,1
 23 | Cameroon (CMR),13,3,1,1,5,1,0,0,0,0,14,3,1,1,5
 24 | Canada (CAN),25,59,99,121,279,22,62,56,52,170,47,121,155,173,449
 25 | Chile (CHI) [I],22,2,7,4,13,16,0,0,0,0,38,2,7,4,13
 26 | China (CHN) [CHN],9,201,146,126,473,10,12,22,19,53,19,213,168,145,526
 27 | Colombia (COL),18,2,6,11,19,1,0,0,0,0,19,2,6,11,19
 28 | Costa Rica (CRC),14,1,1,2,4,6,0,0,0,0,20,1,1,2,4
 29 | Ivory Coast (CIV) [CIV],12,0,1,0,1,0,0,0,0,0,12,0,1,0,1
 30 | Croatia (CRO),6,6,7,10,23,7,4,6,1,11,13,10,13,11,34
 31 | Cuba (CUB) [Z],19,72,67,70,209,0,0,0,0,0,19,72,67,70,209
 32 | Cyprus (CYP),9,0,1,0,1,10,0,0,0,0,19,0,1,0,1
 33 | Czech Republic (CZE) [CZE],5,14,15,15,44,6,7,9,8,24,11,21,24,23,68
 34 | Czechoslovakia (TCH) [TCH],16,49,49,45,143,16,2,8,15,25,32,51,57,60,168
 35 | Denmark (DEN) [Z],26,43,68,68,179,13,0,1,0,1,39,43,69,68,180
 36 | Djibouti (DJI) [B],7,0,0,1,1,0,0,0,0,0,7,0,0,1,1
 37 | Dominican Republic (DOM),13,3,2,1,6,0,0,0,0,0,13,3,2,1,6
 38 | Ecuador (ECU),13,1,1,0,2,0,0,0,0,0,13,1,1,0,2
 39 | Egypt (EGY) [EGY] [Z],21,7,9,10,26,1,0,0,0,0,22,7,9,10,26
 40 | Eritrea (ERI),4,0,0,1,1,0,0,0,0,0,4,0,0,1,1
 41 | Estonia (EST),11,9,9,15,33,9,4,2,1,7,20,13,11,16,40
 42 | Ethiopia (ETH),12,21,7,17,45,2,0,0,0,0,14,21,7,17,45
 43 | Finland (FIN),24,101,84,117,302,22,42,62,57,161,46,143,146,174,463
 44 | France (FRA) [O] [P] [Z],27,202,223,246,671,22,31,31,47,109,49,233,254,293,780
 45 | Gabon (GAB),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1
 46 | Georgia (GEO),5,6,5,14,25,6,0,0,0,0,11,6,5,14,25
 47 | Germany (GER) [GER] [Z],15,174,182,217,573,11,78,78,53,209,26,252,260,270,782
 48 | United Team of Germany (EUA) [EUA],3,28,54,36,118,3,8,6,5,19,6,36,60,41,137
 49 | East Germany (GDR) [GDR],5,153,129,127,409,6,39,36,35,110,11,192,165,162,519
 50 | West Germany (FRG) [FRG],5,56,67,81,204,6,11,15,13,39,11,67,82,94,243
 51 | Ghana (GHA) [GHA],13,0,1,3,4,1,0,0,0,0,14,0,1,3,4
 52 | Great Britain (GBR) [GBR] [Z],27,236,272,272,780,22,10,4,12,26,49,246,276,284,806
 53 | Greece (GRE) [Z],27,30,42,39,111,18,0,0,0,0,45,30,42,39,111
 54 | Grenada (GRN),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1
 55 | Guatemala (GUA),13,0,1,0,1,1,0,0,0,0,14,0,1,0,1
 56 | Guyana (GUY) [GUY],16,0,0,1,1,0,0,0,0,0,16,0,0,1,1
 57 | Haiti (HAI) [J],14,0,1,1,2,0,0,0,0,0,14,0,1,1,2
 58 | Hong Kong (HKG) [HKG],15,1,1,1,3,4,0,0,0,0,19,1,1,1,3
 59 | Hungary (HUN),25,167,144,165,476,22,0,2,4,6,47,167,146,169,482
 60 | Iceland (ISL),19,0,2,2,4,17,0,0,0,0,36,0,2,2,4
 61 | India (IND) [F],23,9,6,11,26,9,0,0,0,0,32,9,6,11,26
 62 | Indonesia (INA),14,6,10,11,27,0,0,0,0,0,14,6,10,11,27
 63 | Iran (IRI) [K],15,15,20,25,60,10,0,0,0,0,25,15,20,25,60
 64 | Iraq (IRQ),13,0,0,1,1,0,0,0,0,0,13,0,0,1,1
 65 | Ireland (IRL),20,9,8,12,29,6,0,0,0,0,26,9,8,12,29
 66 | Israel (ISR),15,1,1,5,7,6,0,0,0,0,21,1,1,5,7
 67 | Italy (ITA) [M] [S],26,198,166,185,549,22,37,34,43,114,48,235,200,228,663
 68 | Jamaica (JAM) [JAM],16,17,30,20,67,7,0,0,0,0,23,17,30,20,67
 69 | Japan (JPN),21,130,126,142,398,20,10,17,18,45,41,140,143,160,443
 70 | Kazakhstan (KAZ),5,16,17,19,52,6,1,3,3,7,11,17,20,22,59
 71 | Kenya (KEN),13,25,32,29,86,3,0,0,0,0,16,25,32,29,86
 72 | North Korea (PRK),9,14,12,21,47,8,0,1,1,2,17,14,13,22,49
 73 | South Korea (KOR),16,81,82,80,243,17,26,17,10,53,33,107,99,90,296
 74 | Kuwait (KUW),12,0,0,2,2,0,0,0,0,0,12,0,0,2,2
 75 | Kyrgyzstan (KGZ),5,0,1,2,3,6,0,0,0,0,11,0,1,2,3
 76 | Latvia (LAT),10,3,11,5,19,10,0,4,3,7,20,3,15,8,26
 77 | Lebanon (LIB),16,0,2,2,4,16,0,0,0,0,32,0,2,2,4
 78 | Liechtenstein (LIE),16,0,0,0,0,18,2,2,5,9,34,2,2,5,9
 79 | Lithuania (LTU),8,6,5,10,21,8,0,0,0,0,16,6,5,10,21
 80 | Luxembourg (LUX) [O],22,1,1,0,2,8,0,2,0,2,30,1,3,0,4
 81 | Macedonia (MKD),5,0,0,1,1,5,0,0,0,0,10,0,0,1,1
 82 | Malaysia (MAS) [MAS],12,0,3,3,6,0,0,0,0,0,12,0,3,3,6
 83 | Mauritius (MRI),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1
 84 | Mexico (MEX),22,13,21,28,62,8,0,0,0,0,30,13,21,28,62
 85 | Moldova (MDA),5,0,2,5,7,6,0,0,0,0,11,0,2,5,7
 86 | Mongolia (MGL),12,2,9,13,24,13,0,0,0,0,25,2,9,13,24
 87 | Montenegro (MNE),2,0,1,0,1,2,0,0,0,0,4,0,1,0,1
 88 | Morocco (MAR),13,6,5,11,22,6,0,0,0,0,19,6,5,11,22
 89 | Mozambique (MOZ),9,1,0,1,2,0,0,0,0,0,9,1,0,1,2
 90 | Namibia (NAM),6,0,4,0,4,0,0,0,0,0,6,0,4,0,4
 91 | Netherlands (NED) [Z],25,77,85,104,266,20,37,38,35,110,45,114,123,139,376
 92 | Netherlands Antilles (AHO) [AHO] [I],13,0,1,0,1,2,0,0,0,0,15,0,1,0,1
 93 | New Zealand (NZL) [NZL],22,42,18,39,99,15,0,1,0,1,37,42,19,39,100
 94 | Niger (NIG),11,0,0,1,1,0,0,0,0,0,11,0,0,1,1
 95 | Nigeria (NGR),15,3,8,12,23,0,0,0,0,0,15,3,8,12,23
 96 | Norway (NOR) [Q],24,56,49,43,148,22,118,111,100,329,46,174,160,143,477
 97 | Pakistan (PAK),16,3,3,4,10,2,0,0,0,0,18,3,3,4,10
 98 | Panama (PAN),16,1,0,2,3,0,0,0,0,0,16,1,0,2,3
 99 | Paraguay (PAR),11,0,1,0,1,1,0,0,0,0,12,0,1,0,1
100 | Peru (PER) [L],17,1,3,0,4,2,0,0,0,0,19,1,3,0,4
101 | Philippines (PHI),20,0,2,7,9,4,0,0,0,0,24,0,2,7,9
102 | Poland (POL),20,64,82,125,271,22,6,7,7,20,42,70,89,132,291
103 | Portugal (POR),23,4,8,11,23,7,0,0,0,0,30,4,8,11,23
104 | Puerto Rico (PUR),17,0,2,6,8,6,0,0,0,0,23,0,2,6,8
105 | Qatar (QAT),8,0,0,4,4,0,0,0,0,0,8,0,0,4,4
106 | Romania (ROU),20,88,94,119,301,20,0,0,1,1,40,88,94,120,302
107 | Russia (RUS) [RUS],5,132,121,142,395,6,49,40,35,124,11,181,161,177,519
108 | Russian Empire (RU1) [RU1],3,1,4,3,8,0,0,0,0,0,3,1,4,3,8
109 | Soviet Union (URS) [URS],9,395,319,296,1010,9,78,57,59,194,18,473,376,355,1204
110 | Unified Team (EUN) [EUN],1,45,38,29,112,1,9,6,8,23,2,54,44,37,135
111 | Saudi Arabia (KSA),10,0,1,2,3,0,0,0,0,0,10,0,1,2,3
112 | Senegal (SEN),13,0,1,0,1,5,0,0,0,0,18,0,1,0,1
113 | Serbia (SRB) [SRB],3,1,2,4,7,2,0,0,0,0,5,1,2,4,7
114 | Serbia and Montenegro (SCG) [SCG],3,2,4,3,9,3,0,0,0,0,6,2,4,3,9
115 | Singapore (SIN),15,0,2,2,4,0,0,0,0,0,15,0,2,2,4
116 | Slovakia (SVK) [SVK],5,7,9,8,24,6,2,2,1,5,11,9,11,9,29
117 | Slovenia (SLO),6,4,6,9,19,7,2,4,9,15,13,6,10,18,34
118 | South Africa (RSA),18,23,26,27,76,6,0,0,0,0,24,23,26,27,76
119 | Spain (ESP) [Z],22,37,59,35,131,19,1,0,1,2,41,38,59,36,133
120 | Sri Lanka (SRI) [SRI],16,0,2,0,2,0,0,0,0,0,16,0,2,0,2
121 | Sudan (SUD),11,0,1,0,1,0,0,0,0,0,11,0,1,0,1
122 | Suriname (SUR) [E],11,1,0,1,2,0,0,0,0,0,11,1,0,1,2
123 | Sweden (SWE) [Z],26,143,164,176,483,22,50,40,54,144,48,193,204,230,627
124 | Switzerland (SUI),27,47,73,65,185,22,50,40,48,138,49,97,113,113,323
125 | Syria (SYR),12,1,1,1,3,0,0,0,0,0,12,1,1,1,3
126 | Chinese Taipei (TPE) [TPE] [TPE2],13,2,7,12,21,11,0,0,0,0,24,2,7,12,21
127 | Tajikistan (TJK),5,0,1,2,3,4,0,0,0,0,9,0,1,2,3
128 | Tanzania (TAN) [TAN],12,0,2,0,2,0,0,0,0,0,12,0,2,0,2
129 | Thailand (THA),15,7,6,11,24,3,0,0,0,0,18,7,6,11,24
130 | Togo (TOG),9,0,0,1,1,1,0,0,0,0,10,0,0,1,1
131 | Tonga (TGA),8,0,1,0,1,1,0,0,0,0,9,0,1,0,1
132 | Trinidad and Tobago (TRI) [TRI],16,2,5,11,18,3,0,0,0,0,19,2,5,11,18
133 | Tunisia (TUN),13,3,3,4,10,0,0,0,0,0,13,3,3,4,10
134 | Turkey (TUR),21,39,25,24,88,16,0,0,0,0,37,39,25,24,88
135 | Uganda (UGA),14,2,3,2,7,0,0,0,0,0,14,2,3,2,7
136 | Ukraine (UKR),5,33,27,55,115,6,2,1,4,7,11,35,28,59,122
137 | United Arab Emirates (UAE),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1
138 | United States (USA) [P] [Q] [R] [Z],26,976,757,666,2399,22,96,102,84,282,48,1072,859,750,2681
139 | Uruguay (URU),20,2,2,6,10,1,0,0,0,0,21,2,2,6,10
140 | Uzbekistan (UZB),5,5,5,10,20,6,1,0,0,1,11,6,5,10,21
141 | Venezuela (VEN),17,2,2,8,12,4,0,0,0,0,21,2,2,8,12
142 | Vietnam (VIE),14,0,2,0,2,0,0,0,0,0,14,0,2,0,2
143 | Virgin Islands (ISV),11,0,1,0,1,7,0,0,0,0,18,0,1,0,1
144 | Yugoslavia (YUG) [YUG],16,26,29,28,83,14,0,3,1,4,30,26,32,29,87
145 | Independent Olympic Participants (IOP) [IOP],1,0,1,2,3,0,0,0,0,0,1,0,1,2,3
146 | Zambia (ZAM) [ZAM],12,0,1,1,2,0,0,0,0,0,12,0,1,1,2
147 | Zimbabwe (ZIM) [ZIM],12,3,4,1,8,1,0,0,0,0,13,3,4,1,8
148 | Mixed team (ZZX) [ZZX],3,8,5,4,17,0,0,0,0,0,3,8,5,4,17
149 | Totals,27,4809,4775,5130,14714,22,959,958,948,2865,49,5768,5733,6078,17579
150 | 


--------------------------------------------------------------------------------
/MyNotebooks/scimagojr-3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/MyNotebooks/scimagojr-3.xlsx


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Intro-to-Data-Science-in-Python
2 | Repo for the first course of the Applied Data Science with Python Specialization taught by University of Michigan hosted by Coursera
3 | 


--------------------------------------------------------------------------------
/course1_downloads/.ipynb_checkpoints/Assignment 2-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "---\n",
  8 |     "\n",
  9 |     "_You are currently looking at **version 1.1** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
 10 |     "\n",
 11 |     "---"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Assignment 2 - Pandas Introduction\n",
 19 |     "All questions are weighted the same in this assignment.\n",
 20 |     "## Part 1\n",
 21 |     "The following code loads the olympics dataset (olympics.csv), which was derrived from the Wikipedia entry on [All Time Olympic Games Medals](https://en.wikipedia.org/wiki/All-time_Olympic_Games_medal_table), and does some basic data cleaning. \n",
 22 |     "\n",
 23 |     "The columns are organized as # of Summer games, Summer medals, # of Winter games, Winter medals, total # number of games, total # of medals. Use this dataset to answer the questions below."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {
 30 |     "collapsed": false,
 31 |     "nbgrader": {
 32 |      "grade": false,
 33 |      "grade_id": "1",
 34 |      "locked": false,
 35 |      "solution": false
 36 |     }
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "import pandas as pd\n",
 41 |     "\n",
 42 |     "df = pd.read_csv('olympics.csv', index_col=0, skiprows=1)\n",
 43 |     "\n",
 44 |     "for col in df.columns:\n",
 45 |     "    if col[:2]=='01':\n",
 46 |     "        df.rename(columns={col:'Gold'+col[4:]}, inplace=True)\n",
 47 |     "    if col[:2]=='02':\n",
 48 |     "        df.rename(columns={col:'Silver'+col[4:]}, inplace=True)\n",
 49 |     "    if col[:2]=='03':\n",
 50 |     "        df.rename(columns={col:'Bronze'+col[4:]}, inplace=True)\n",
 51 |     "    if col[:1]=='№':\n",
 52 |     "        df.rename(columns={col:'#'+col[1:]}, inplace=True)\n",
 53 |     "\n",
 54 |     "names_ids = df.index.str.split('\\s\\(') # split the index by '('\n",
 55 |     "\n",
 56 |     "df.index = names_ids.str[0] # the [0] element is the country name (new index) \n",
 57 |     "df['ID'] = names_ids.str[1].str[:3] # the [1] element is the abbreviation or ID (take first 3 characters from that)\n",
 58 |     "\n",
 59 |     "df = df.drop('Totals')\n",
 60 |     "df.head()"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "### Question 0 (Example)\n",
 68 |     "\n",
 69 |     "What is the first country in df?\n",
 70 |     "\n",
 71 |     "*This function should return a Series.*"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {
 78 |     "collapsed": false
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "# You should write your whole answer within the function provided. The autograder will call\n",
 83 |     "# this function and compare the return value against the correct solution value\n",
 84 |     "def answer_zero():\n",
 85 |     "    # This function returns the row for Afghanistan, which is a Series object. The assignment\n",
 86 |     "    # question description will tell you the general format the autograder is expecting\n",
 87 |     "    return df.iloc[0]\n",
 88 |     "\n",
 89 |     "# You can examine what your function returns by calling it in the cell. If you have questions\n",
 90 |     "# about the assignment formats, check out the discussion forums for any FAQs\n",
 91 |     "answer_zero() "
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "### Question 1\n",
 99 |     "Which country has won the most gold medals in summer games?\n",
100 |     "\n",
101 |     "*This function should return a single string value.*"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {
108 |     "collapsed": false,
109 |     "nbgrader": {
110 |      "grade": false,
111 |      "locked": false,
112 |      "solution": false
113 |     }
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "def answer_one():\n",
118 |     "    return \"YOUR ANSWER HERE\""
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "### Question 2\n",
126 |     "Which country had the biggest difference between their summer and winter gold medal counts?\n",
127 |     "\n",
128 |     "*This function should return a single string value.*"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {
135 |     "collapsed": true
136 |    },
137 |    "outputs": [],
138 |    "source": [
139 |     "def answer_two():\n",
140 |     "    return \"YOUR ANSWER HERE\""
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "metadata": {},
146 |    "source": [
147 |     "### Question 3\n",
148 |     "Which country has the biggest difference between their summer gold medal counts and winter gold medal counts relative to their total gold medal count? \n",
149 |     "\n",
150 |     "$$\\frac{Summer~Gold - Winter~Gold}{Total~Gold}$$\n",
151 |     "\n",
152 |     "Only include countries that have won at least 1 gold in both summer and winter.\n",
153 |     "\n",
154 |     "*This function should return a single string value.*"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {
161 |     "collapsed": true
162 |    },
163 |    "outputs": [],
164 |    "source": [
165 |     "def answer_three():\n",
166 |     "    return \"YOUR ANSWER HERE\""
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "### Question 4\n",
174 |     "Write a function to update the dataframe to include a new column called \"Points\" which is a weighted value where each gold medal counts for 3 points, silver medals for 2 points, and bronze mdeals for 1 point. The function should return only the column (a Series object) which you created.\n",
175 |     "\n",
176 |     "*This function should return a Series named `Points` of length 146*"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {
183 |     "collapsed": true
184 |    },
185 |    "outputs": [],
186 |    "source": [
187 |     "def answer_four():\n",
188 |     "    return \"YOUR ANSWER HERE\""
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "markdown",
193 |    "metadata": {},
194 |    "source": [
195 |     "## Part 2\n",
196 |     "For the next set of questions, we will be using census data from the [United States Census Bureau](http://www.census.gov/popest/data/counties/totals/2015/CO-EST2015-alldata.html). Counties are political and geographic subdivisions of states in the United States. This dataset contains population data for counties and states in the US from 2010 to 2015. [See this document](http://www.census.gov/popest/data/counties/totals/2015/files/CO-EST2015-alldata.pdf) for a description of the variable names.\n",
197 |     "\n",
198 |     "The census dataset (census.csv) should be loaded as census_df. Answer questions using this as appropriate.\n",
199 |     "\n",
200 |     "### Question 5\n",
201 |     "Which state has the most counties in it? (hint: consider the sumlevel key carefully! You'll need this for future questions too...)\n",
202 |     "\n",
203 |     "*This function should return a single string value.*"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {
210 |     "collapsed": true
211 |    },
212 |    "outputs": [],
213 |    "source": [
214 |     "census_df = pd.read_csv('census.csv')\n",
215 |     "census_df.head()"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "metadata": {
222 |     "collapsed": true
223 |    },
224 |    "outputs": [],
225 |    "source": [
226 |     "def answer_five():\n",
227 |     "    return \"YOUR ANSWER HERE\""
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "metadata": {},
233 |    "source": [
234 |     "### Question 6\n",
235 |     "Only looking at the three most populous counties for each state, what are the three most populous states (in order of highest population to lowest population)?\n",
236 |     "\n",
237 |     "*This function should return a list of string values.*"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "metadata": {
244 |     "collapsed": true
245 |    },
246 |    "outputs": [],
247 |    "source": [
248 |     "def answer_six():\n",
249 |     "    return \"YOUR ANSWER HERE\""
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "markdown",
254 |    "metadata": {},
255 |    "source": [
256 |     "### Question 7\n",
257 |     "Which county has had the largest absolute change in population within the period 2010-2015? (Hint: population values are stored in columns POPESTIMATE2010 through POPESTIMATE2015, you need to consider all six columns.)\n",
258 |     "\n",
259 |     "e.g. If County Population in the 5 year period is 100, 120, 80, 105, 100, 130, then its largest change in the period would be |130-80| = 50.\n",
260 |     "\n",
261 |     "*This function should return a single string value.*"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {
268 |     "collapsed": true
269 |    },
270 |    "outputs": [],
271 |    "source": [
272 |     "def answer_seven():\n",
273 |     "    return \"YOUR ANSWER HERE\""
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "markdown",
278 |    "metadata": {},
279 |    "source": [
280 |     "### Question 8\n",
281 |     "In this datafile, the United States is broken up into four regions using the \"REGION\" column. \n",
282 |     "\n",
283 |     "Create a query that finds the counties that belong to regions 1 or 2, whose name starts with 'Washington', and whose POPESTIMATE2015 was greater than their POPESTIMATE 2014.\n",
284 |     "\n",
285 |     "*This function should return a 5x2 DataFrame with the columns = ['STNAME', 'CTYNAME'] and the same index ID as the census_df (sorted ascending by index).*"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "metadata": {
292 |     "collapsed": false
293 |    },
294 |    "outputs": [],
295 |    "source": [
296 |     "def answer_eight():\n",
297 |     "    return \"YOUR ANSWER HERE\""
298 |    ]
299 |   }
300 |  ],
301 |  "metadata": {
302 |   "anaconda-cloud": {},
303 |   "coursera": {
304 |    "course_slug": "python-data-analysis",
305 |    "graded_item_id": "tHmgx",
306 |    "launcher_item_id": "Um6Bz",
307 |    "part_id": "OQsnr"
308 |   },
309 |   "kernelspec": {
310 |    "display_name": "Python [default]",
311 |    "language": "python",
312 |    "name": "python3"
313 |   },
314 |   "language_info": {
315 |    "codemirror_mode": {
316 |     "name": "ipython",
317 |     "version": 3
318 |    },
319 |    "file_extension": ".py",
320 |    "mimetype": "text/x-python",
321 |    "name": "python",
322 |    "nbconvert_exporter": "python",
323 |    "pygments_lexer": "ipython3",
324 |    "version": "3.5.2"
325 |   }
326 |  },
327 |  "nbformat": 4,
328 |  "nbformat_minor": 0
329 | }
330 | 


--------------------------------------------------------------------------------
/course1_downloads/Assignment 2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "---\n",
  8 |     "\n",
  9 |     "_You are currently looking at **version 1.1** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
 10 |     "\n",
 11 |     "---"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Assignment 2 - Pandas Introduction\n",
 19 |     "All questions are weighted the same in this assignment.\n",
 20 |     "## Part 1\n",
 21 |     "The following code loads the olympics dataset (olympics.csv), which was derrived from the Wikipedia entry on [All Time Olympic Games Medals](https://en.wikipedia.org/wiki/All-time_Olympic_Games_medal_table), and does some basic data cleaning. \n",
 22 |     "\n",
 23 |     "The columns are organized as # of Summer games, Summer medals, # of Winter games, Winter medals, total # number of games, total # of medals. Use this dataset to answer the questions below."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {
 30 |     "collapsed": false,
 31 |     "nbgrader": {
 32 |      "grade": false,
 33 |      "grade_id": "1",
 34 |      "locked": false,
 35 |      "solution": false
 36 |     }
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "import pandas as pd\n",
 41 |     "\n",
 42 |     "df = pd.read_csv('olympics.csv', index_col=0, skiprows=1)\n",
 43 |     "\n",
 44 |     "for col in df.columns:\n",
 45 |     "    if col[:2]=='01':\n",
 46 |     "        df.rename(columns={col:'Gold'+col[4:]}, inplace=True)\n",
 47 |     "    if col[:2]=='02':\n",
 48 |     "        df.rename(columns={col:'Silver'+col[4:]}, inplace=True)\n",
 49 |     "    if col[:2]=='03':\n",
 50 |     "        df.rename(columns={col:'Bronze'+col[4:]}, inplace=True)\n",
 51 |     "    if col[:1]=='№':\n",
 52 |     "        df.rename(columns={col:'#'+col[1:]}, inplace=True)\n",
 53 |     "\n",
 54 |     "names_ids = df.index.str.split('\\s\\(') # split the index by '('\n",
 55 |     "\n",
 56 |     "df.index = names_ids.str[0] # the [0] element is the country name (new index) \n",
 57 |     "df['ID'] = names_ids.str[1].str[:3] # the [1] element is the abbreviation or ID (take first 3 characters from that)\n",
 58 |     "\n",
 59 |     "df = df.drop('Totals')\n",
 60 |     "df.head()"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "### Question 0 (Example)\n",
 68 |     "\n",
 69 |     "What is the first country in df?\n",
 70 |     "\n",
 71 |     "*This function should return a Series.*"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {
 78 |     "collapsed": false
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "# You should write your whole answer within the function provided. The autograder will call\n",
 83 |     "# this function and compare the return value against the correct solution value\n",
 84 |     "def answer_zero():\n",
 85 |     "    # This function returns the row for Afghanistan, which is a Series object. The assignment\n",
 86 |     "    # question description will tell you the general format the autograder is expecting\n",
 87 |     "    return df.iloc[0]\n",
 88 |     "\n",
 89 |     "# You can examine what your function returns by calling it in the cell. If you have questions\n",
 90 |     "# about the assignment formats, check out the discussion forums for any FAQs\n",
 91 |     "answer_zero() "
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "### Question 1\n",
 99 |     "Which country has won the most gold medals in summer games?\n",
100 |     "\n",
101 |     "*This function should return a single string value.*"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {
108 |     "collapsed": false,
109 |     "nbgrader": {
110 |      "grade": false,
111 |      "locked": false,
112 |      "solution": false
113 |     }
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "def answer_one():\n",
118 |     "    return \"YOUR ANSWER HERE\""
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "### Question 2\n",
126 |     "Which country had the biggest difference between their summer and winter gold medal counts?\n",
127 |     "\n",
128 |     "*This function should return a single string value.*"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {
135 |     "collapsed": true
136 |    },
137 |    "outputs": [],
138 |    "source": [
139 |     "def answer_two():\n",
140 |     "    return \"YOUR ANSWER HERE\""
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "metadata": {},
146 |    "source": [
147 |     "### Question 3\n",
148 |     "Which country has the biggest difference between their summer gold medal counts and winter gold medal counts relative to their total gold medal count? \n",
149 |     "\n",
150 |     "$$\\frac{Summer~Gold - Winter~Gold}{Total~Gold}$$\n",
151 |     "\n",
152 |     "Only include countries that have won at least 1 gold in both summer and winter.\n",
153 |     "\n",
154 |     "*This function should return a single string value.*"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {
161 |     "collapsed": true
162 |    },
163 |    "outputs": [],
164 |    "source": [
165 |     "def answer_three():\n",
166 |     "    return \"YOUR ANSWER HERE\""
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "### Question 4\n",
174 |     "Write a function to update the dataframe to include a new column called \"Points\" which is a weighted value where each gold medal counts for 3 points, silver medals for 2 points, and bronze mdeals for 1 point. The function should return only the column (a Series object) which you created.\n",
175 |     "\n",
176 |     "*This function should return a Series named `Points` of length 146*"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {
183 |     "collapsed": true
184 |    },
185 |    "outputs": [],
186 |    "source": [
187 |     "def answer_four():\n",
188 |     "    return \"YOUR ANSWER HERE\""
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "markdown",
193 |    "metadata": {},
194 |    "source": [
195 |     "## Part 2\n",
196 |     "For the next set of questions, we will be using census data from the [United States Census Bureau](http://www.census.gov/popest/data/counties/totals/2015/CO-EST2015-alldata.html). Counties are political and geographic subdivisions of states in the United States. This dataset contains population data for counties and states in the US from 2010 to 2015. [See this document](http://www.census.gov/popest/data/counties/totals/2015/files/CO-EST2015-alldata.pdf) for a description of the variable names.\n",
197 |     "\n",
198 |     "The census dataset (census.csv) should be loaded as census_df. Answer questions using this as appropriate.\n",
199 |     "\n",
200 |     "### Question 5\n",
201 |     "Which state has the most counties in it? (hint: consider the sumlevel key carefully! You'll need this for future questions too...)\n",
202 |     "\n",
203 |     "*This function should return a single string value.*"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {
210 |     "collapsed": true
211 |    },
212 |    "outputs": [],
213 |    "source": [
214 |     "census_df = pd.read_csv('census.csv')\n",
215 |     "census_df.head()"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "metadata": {
222 |     "collapsed": true
223 |    },
224 |    "outputs": [],
225 |    "source": [
226 |     "def answer_five():\n",
227 |     "    return \"YOUR ANSWER HERE\""
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "metadata": {},
233 |    "source": [
234 |     "### Question 6\n",
235 |     "Only looking at the three most populous counties for each state, what are the three most populous states (in order of highest population to lowest population)?\n",
236 |     "\n",
237 |     "*This function should return a list of string values.*"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "metadata": {
244 |     "collapsed": true
245 |    },
246 |    "outputs": [],
247 |    "source": [
248 |     "def answer_six():\n",
249 |     "    return \"YOUR ANSWER HERE\""
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "markdown",
254 |    "metadata": {},
255 |    "source": [
256 |     "### Question 7\n",
257 |     "Which county has had the largest absolute change in population within the period 2010-2015? (Hint: population values are stored in columns POPESTIMATE2010 through POPESTIMATE2015, you need to consider all six columns.)\n",
258 |     "\n",
259 |     "e.g. If County Population in the 5 year period is 100, 120, 80, 105, 100, 130, then its largest change in the period would be |130-80| = 50.\n",
260 |     "\n",
261 |     "*This function should return a single string value.*"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {
268 |     "collapsed": true
269 |    },
270 |    "outputs": [],
271 |    "source": [
272 |     "def answer_seven():\n",
273 |     "    return \"YOUR ANSWER HERE\""
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "markdown",
278 |    "metadata": {},
279 |    "source": [
280 |     "### Question 8\n",
281 |     "In this datafile, the United States is broken up into four regions using the \"REGION\" column. \n",
282 |     "\n",
283 |     "Create a query that finds the counties that belong to regions 1 or 2, whose name starts with 'Washington', and whose POPESTIMATE2015 was greater than their POPESTIMATE 2014.\n",
284 |     "\n",
285 |     "*This function should return a 5x2 DataFrame with the columns = ['STNAME', 'CTYNAME'] and the same index ID as the census_df (sorted ascending by index).*"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "metadata": {
292 |     "collapsed": false
293 |    },
294 |    "outputs": [],
295 |    "source": [
296 |     "def answer_eight():\n",
297 |     "    return \"YOUR ANSWER HERE\""
298 |    ]
299 |   }
300 |  ],
301 |  "metadata": {
302 |   "anaconda-cloud": {},
303 |   "coursera": {
304 |    "course_slug": "python-data-analysis",
305 |    "graded_item_id": "tHmgx",
306 |    "launcher_item_id": "Um6Bz",
307 |    "part_id": "OQsnr"
308 |   },
309 |   "kernelspec": {
310 |    "display_name": "Python [default]",
311 |    "language": "python",
312 |    "name": "python3"
313 |   },
314 |   "language_info": {
315 |    "codemirror_mode": {
316 |     "name": "ipython",
317 |     "version": 3
318 |    },
319 |    "file_extension": ".py",
320 |    "mimetype": "text/x-python",
321 |    "name": "python",
322 |    "nbconvert_exporter": "python",
323 |    "pygments_lexer": "ipython3",
324 |    "version": "3.5.2"
325 |   }
326 |  },
327 |  "nbformat": 4,
328 |  "nbformat_minor": 0
329 | }
330 | 


--------------------------------------------------------------------------------
/course1_downloads/Assignment 4.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "---\n",
  8 |     "\n",
  9 |     "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
 10 |     "\n",
 11 |     "---"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import pandas as pd\n",
 23 |     "import numpy as np\n",
 24 |     "from scipy.stats import ttest_ind"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "# Assignment 4 - Hypothesis Testing\n",
 32 |     "This assignment requires more individual learning than previous assignments - you are encouraged to check out the [pandas documentation](http://pandas.pydata.org/pandas-docs/stable/) to find functions or methods you might not have used yet, or ask questions on [Stack Overflow](http://stackoverflow.com/) and tag them as pandas and python related. And of course, the discussion forums are open for interaction with your peers and the course staff.\n",
 33 |     "\n",
 34 |     "Definitions:\n",
 35 |     "* A _quarter_ is a specific three month period, Q1 is January through March, Q2 is April through June, Q3 is July through September, Q4 is October through December.\n",
 36 |     "* A _recession_ is defined as starting with two consecutive quarters of GDP decline, and ending with two consecutive quarters of GDP growth.\n",
 37 |     "* A _recession bottom_ is the quarter within a recession which had the lowest GDP.\n",
 38 |     "* A _university town_ is a city which has a high percentage of university students compared to the total population of the city.\n",
 39 |     "\n",
 40 |     "**Hypothesis**: University towns have their mean housing prices less effected by recessions. Run a t-test to compare the ratio of the mean price of houses in university towns the quarter before the recession starts compared to the recession bottom. (`price_ratio=quarter_before_recession/recession_bottom`)\n",
 41 |     "\n",
 42 |     "The following data files are available for this assignment:\n",
 43 |     "* From the [Zillow research data site](http://www.zillow.com/research/data/) there is housing data for the United States. In particular the datafile for [all homes at a city level](http://files.zillowstatic.com/research/public/City/City_Zhvi_AllHomes.csv), ```City_Zhvi_AllHomes.csv```, has median home sale prices at a fine grained level.\n",
 44 |     "* From the Wikipedia page on college towns is a list of [university towns in the United States](https://en.wikipedia.org/wiki/List_of_college_towns#College_towns_in_the_United_States) which has been copy and pasted into the file ```university_towns.txt```.\n",
 45 |     "* From Bureau of Economic Analysis, US Department of Commerce, the [GDP over time](http://www.bea.gov/national/index.htm#gdp) of the United States in current dollars (use the chained value in 2009 dollars), in quarterly intervals, in the file ```gdplev.xls```. For this assignment, only look at GDP data from the first quarter of 2000 onward.\n",
 46 |     "\n",
 47 |     "Each function in this assignment below is worth 10%, with the exception of ```run_ttest()```, which is worth 50%."
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {
 54 |     "collapsed": false
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "# Use this dictionary to map state names to two letter acronyms\n",
 59 |     "states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 'ND': 'North Dakota', 'VA': 'Virginia'}"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {
 66 |     "collapsed": false
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "def get_list_of_university_towns():\n",
 71 |     "    '''Returns a DataFrame of towns and the states they are in from the \n",
 72 |     "    university_towns.txt list. The format of the DataFrame should be:\n",
 73 |     "    DataFrame( [ [\"Michigan\",\"Ann Arbor\"], [\"Michigan\", \"Yipsilanti\"] ], \n",
 74 |     "    columns=[\"State\",\"RegionName\"]  )'''\n",
 75 |     "    \n",
 76 |     "    return \"ANSWER\""
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {
 83 |     "collapsed": false
 84 |    },
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "def get_recession_start():\n",
 88 |     "    '''Returns the year and quarter of the recession start time as a \n",
 89 |     "    string value in a format such as 2005q3'''\n",
 90 |     "    \n",
 91 |     "    return \"ANSWER\""
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "def get_recession_end():\n",
103 |     "    '''Returns the year and quarter of the recession end time as a \n",
104 |     "    string value in a format such as 2005q3'''\n",
105 |     "       \n",
106 |     "    return \"ANSWER\""
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": false
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "def get_recession_bottom():\n",
118 |     "    '''Returns the year and quarter of the recession bottom time as a \n",
119 |     "    string value in a format such as 2005q3'''\n",
120 |     "    \n",
121 |     "    return \"ANSWER\""
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [],
131 |    "source": [
132 |     "def convert_housing_data_to_quarters():\n",
133 |     "    '''Converts the housing data to quarters and returns it as mean \n",
134 |     "    values in a dataframe. This dataframe should be a dataframe with\n",
135 |     "    columns for 2000q1 through 2016q3, and should have a multi-index\n",
136 |     "    in the shape of [\"State\",\"RegionName\"].\n",
137 |     "    \n",
138 |     "    Note: Quarters are defined in the assignment description, they are\n",
139 |     "    not arbitrary three month periods.\n",
140 |     "    \n",
141 |     "    The resulting dataframe should have 67 columns, and 10,730 rows.\n",
142 |     "    '''\n",
143 |     "    \n",
144 |     "    return \"ANSWER\""
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": false
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "def run_ttest():\n",
156 |     "    '''First creates new data showing the decline or growth of housing prices\n",
157 |     "    between the recession start and the recession bottom. Then runs a ttest\n",
158 |     "    comparing the university town values to the non-university towns values, \n",
159 |     "    return whether the alternative hypothesis (that the two groups are the same)\n",
160 |     "    is true or not as well as the p-value of the confidence. \n",
161 |     "    \n",
162 |     "    Return the tuple (different, p, better) where different=True if the t-test is\n",
163 |     "    True at a p<0.01 (we reject the null hypothesis), or different=False if \n",
164 |     "    otherwise (we cannot reject the null hypothesis). The variable p should\n",
165 |     "    be equal to the exact p value returned from scipy.stats.ttest_ind(). The\n",
166 |     "    value for better should be either \"university town\" or \"non-university town\"\n",
167 |     "    depending on which has a lower mean price ratio (which is equivilent to a\n",
168 |     "    reduced market loss).'''\n",
169 |     "    \n",
170 |     "    return \"ANSWER\""
171 |    ]
172 |   }
173 |  ],
174 |  "metadata": {
175 |   "coursera": {
176 |    "course_slug": "python-data-analysis",
177 |    "graded_item_id": "Il9Fx",
178 |    "launcher_item_id": "TeDW0",
179 |    "part_id": "WGlun"
180 |   },
181 |   "kernelspec": {
182 |    "display_name": "Python 3",
183 |    "language": "python",
184 |    "name": "python3"
185 |   },
186 |   "language_info": {
187 |    "codemirror_mode": {
188 |     "name": "ipython",
189 |     "version": 3
190 |    },
191 |    "file_extension": ".py",
192 |    "mimetype": "text/x-python",
193 |    "name": "python",
194 |    "nbconvert_exporter": "python",
195 |    "pygments_lexer": "ipython3",
196 |    "version": "3.5.2"
197 |   }
198 |  },
199 |  "nbformat": 4,
200 |  "nbformat_minor": 0
201 | }
202 | 


--------------------------------------------------------------------------------
/course1_downloads/Assignment+2.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # ---
  5 | # 
  6 | # _You are currently looking at **version 1.1** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._
  7 | # 
  8 | # ---
  9 | 
 10 | # # Assignment 2 - Pandas Introduction
 11 | # All questions are weighted the same in this assignment.
 12 | # ## Part 1
 13 | # The following code loads the olympics dataset (olympics.csv), which was derrived from the Wikipedia entry on [All Time Olympic Games Medals](https://en.wikipedia.org/wiki/All-time_Olympic_Games_medal_table), and does some basic data cleaning. 
 14 | # 
 15 | # The columns are organized as # of Summer games, Summer medals, # of Winter games, Winter medals, total # number of games, total # of medals. Use this dataset to answer the questions below.
 16 | 
 17 | # In[ ]:
 18 | 
 19 | import pandas as pd
 20 | 
 21 | df = pd.read_csv('olympics.csv', index_col=0, skiprows=1)
 22 | 
 23 | for col in df.columns:
 24 |     if col[:2]=='01':
 25 |         df.rename(columns={col:'Gold'+col[4:]}, inplace=True)
 26 |     if col[:2]=='02':
 27 |         df.rename(columns={col:'Silver'+col[4:]}, inplace=True)
 28 |     if col[:2]=='03':
 29 |         df.rename(columns={col:'Bronze'+col[4:]}, inplace=True)
 30 |     if col[:1]=='№':
 31 |         df.rename(columns={col:'#'+col[1:]}, inplace=True)
 32 | 
 33 | names_ids = df.index.str.split('\s\(') # split the index by '('
 34 | 
 35 | df.index = names_ids.str[0] # the [0] element is the country name (new index) 
 36 | df['ID'] = names_ids.str[1].str[:3] # the [1] element is the abbreviation or ID (take first 3 characters from that)
 37 | 
 38 | df = df.drop('Totals')
 39 | df.head()
 40 | 
 41 | 
 42 | # ### Question 0 (Example)
 43 | # 
 44 | # What is the first country in df?
 45 | # 
 46 | # *This function should return a Series.*
 47 | 
 48 | # In[ ]:
 49 | 
 50 | # You should write your whole answer within the function provided. The autograder will call
 51 | # this function and compare the return value against the correct solution value
 52 | def answer_zero():
 53 |     # This function returns the row for Afghanistan, which is a Series object. The assignment
 54 |     # question description will tell you the general format the autograder is expecting
 55 |     return df.iloc[0]
 56 | 
 57 | # You can examine what your function returns by calling it in the cell. If you have questions
 58 | # about the assignment formats, check out the discussion forums for any FAQs
 59 | answer_zero() 
 60 | 
 61 | 
 62 | # ### Question 1
 63 | # Which country has won the most gold medals in summer games?
 64 | # 
 65 | # *This function should return a single string value.*
 66 | 
 67 | # In[ ]:
 68 | 
 69 | def answer_one():
 70 |     return "YOUR ANSWER HERE"
 71 | 
 72 | 
 73 | # ### Question 2
 74 | # Which country had the biggest difference between their summer and winter gold medal counts?
 75 | # 
 76 | # *This function should return a single string value.*
 77 | 
 78 | # In[ ]:
 79 | 
 80 | def answer_two():
 81 |     return "YOUR ANSWER HERE"
 82 | 
 83 | 
 84 | # ### Question 3
 85 | # Which country has the biggest difference between their summer gold medal counts and winter gold medal counts relative to their total gold medal count? 
 86 | # 
 87 | # $$\frac{Summer~Gold - Winter~Gold}{Total~Gold}$$
 88 | # 
 89 | # Only include countries that have won at least 1 gold in both summer and winter.
 90 | # 
 91 | # *This function should return a single string value.*
 92 | 
 93 | # In[ ]:
 94 | 
 95 | def answer_three():
 96 |     return "YOUR ANSWER HERE"
 97 | 
 98 | 
 99 | # ### Question 4
100 | # Write a function to update the dataframe to include a new column called "Points" which is a weighted value where each gold medal counts for 3 points, silver medals for 2 points, and bronze mdeals for 1 point. The function should return only the column (a Series object) which you created.
101 | # 
102 | # *This function should return a Series named `Points` of length 146*
103 | 
104 | # In[ ]:
105 | 
106 | def answer_four():
107 |     return "YOUR ANSWER HERE"
108 | 
109 | 
110 | # ## Part 2
111 | # For the next set of questions, we will be using census data from the [United States Census Bureau](http://www.census.gov/popest/data/counties/totals/2015/CO-EST2015-alldata.html). Counties are political and geographic subdivisions of states in the United States. This dataset contains population data for counties and states in the US from 2010 to 2015. [See this document](http://www.census.gov/popest/data/counties/totals/2015/files/CO-EST2015-alldata.pdf) for a description of the variable names.
112 | # 
113 | # The census dataset (census.csv) should be loaded as census_df. Answer questions using this as appropriate.
114 | # 
115 | # ### Question 5
116 | # Which state has the most counties in it? (hint: consider the sumlevel key carefully! You'll need this for future questions too...)
117 | # 
118 | # *This function should return a single string value.*
119 | 
120 | # In[ ]:
121 | 
122 | census_df = pd.read_csv('census.csv')
123 | census_df.head()
124 | 
125 | 
126 | # In[ ]:
127 | 
128 | def answer_five():
129 |     return "YOUR ANSWER HERE"
130 | 
131 | 
132 | # ### Question 6
133 | # Only looking at the three most populous counties for each state, what are the three most populous states (in order of highest population to lowest population)?
134 | # 
135 | # *This function should return a list of string values.*
136 | 
137 | # In[ ]:
138 | 
139 | def answer_six():
140 |     return "YOUR ANSWER HERE"
141 | 
142 | 
143 | # ### Question 7
144 | # Which county has had the largest absolute change in population within the period 2010-2015? (Hint: population values are stored in columns POPESTIMATE2010 through POPESTIMATE2015, you need to consider all six columns.)
145 | # 
146 | # e.g. If County Population in the 5 year period is 100, 120, 80, 105, 100, 130, then its largest change in the period would be |130-80| = 50.
147 | # 
148 | # *This function should return a single string value.*
149 | 
150 | # In[ ]:
151 | 
152 | def answer_seven():
153 |     return "YOUR ANSWER HERE"
154 | 
155 | 
156 | # ### Question 8
157 | # In this datafile, the United States is broken up into four regions using the "REGION" column. 
158 | # 
159 | # Create a query that finds the counties that belong to regions 1 or 2, whose name starts with 'Washington', and whose POPESTIMATE2015 was greater than their POPESTIMATE 2014.
160 | # 
161 | # *This function should return a 5x2 DataFrame with the columns = ['STNAME', 'CTYNAME'] and the same index ID as the census_df (sorted ascending by index).*
162 | 
163 | # In[ ]:
164 | 
165 | def answer_eight():
166 |     return "YOUR ANSWER HERE"
167 | 
168 | 


--------------------------------------------------------------------------------
/course1_downloads/Assignment+3.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # ---
  5 | # 
  6 | # _You are currently looking at **version 1.2** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._
  7 | # 
  8 | # ---
  9 | 
 10 | # # Assignment 3 - More Pandas
 11 | # All questions are weighted the same in this assignment. This assignment requires more individual learning then the last one did - you are encouraged to check out the [pandas documentation](http://pandas.pydata.org/pandas-docs/stable/) to find functions or methods you might not have used yet, or ask questions on [Stack Overflow](http://stackoverflow.com/) and tag them as pandas and python related. And of course, the discussion forums are open for interaction with your peers and the course staff.
 12 | 
 13 | # ### Question 1 (20%)
 14 | # Load the energy data from the file `Energy Indicators.xls`, which is a list of indicators of [energy supply and renewable electricity production](Energy%20Indicators.xls) from the [United Nations](http://unstats.un.org/unsd/environment/excel_file_tables/2013/Energy%20Indicators.xls) for the year 2013, and should be put into a DataFrame with the variable name of **energy**.
 15 | # 
 16 | # Keep in mind that this is an Excel file, and not a comma separated values file. Also, make sure to exclude the footer and header information from the datafile. The first two columns are unneccessary, so you should get rid of them, and you should change the column labels so that the columns are:
 17 | # 
 18 | # `['Country', 'Energy Supply', 'Energy Supply per Capita', '% Renewable]`
 19 | # 
 20 | # Convert `Energy Supply` to gigajoules (there are 1,000,000 gigajoules in a petajoule). For all countries which have missing data (e.g. data with "...") make sure this is reflected as `np.NaN` values.
 21 | # 
 22 | # Rename the following list of countries (for use in later questions):
 23 | # 
 24 | # ```"Republic of Korea": "South Korea",
 25 | # "United States of America": "United States",
 26 | # "United Kingdom of Great Britain and Northern Ireland": "United Kingdom",
 27 | # "China, Hong Kong Special Administrative Region": "Hong Kong"```
 28 | # 
 29 | # There are also several countries with parenthesis in their name. Be sure to remove these, e.g. `'Bolivia (Plurinational State of)'` should be `'Bolivia'`.
 30 | # 
 31 | # <br>
 32 | # 
 33 | # Next, load the GDP data from the file `world_bank.csv`, which is a csv containing countries' GDP from 1960 to 2015 from [World Bank](http://data.worldbank.org/indicator/NY.GDP.MKTP.CD). Call this DataFrame **GDP**. 
 34 | # 
 35 | # Make sure to skip the header, and rename the following list of countries:
 36 | # 
 37 | # ```"Korea, Rep.": "South Korea", 
 38 | # "Iran, Islamic Rep.": "Iran",
 39 | # "Hong Kong SAR, China": "Hong Kong"```
 40 | # 
 41 | # <br>
 42 | # 
 43 | # Finally, load the [Sciamgo Journal and Country Rank data for Energy Engineering and Power Technology](http://www.scimagojr.com/countryrank.php?category=2102) from the file `scimagojr-3.xlsx`, which ranks countries based on their journal contributions in the aforementioned area. Call this DataFrame **ScimEn**.
 44 | # 
 45 | # Join the three datasets: GDP, Energy, and ScimEn into a new dataset (using the intersection of country names). Use only the last 10 years (2006-2015) of GDP data and only the top 15 countries by Scimagojr 'Rank' (Rank 1 through 15). 
 46 | # 
 47 | # The index of this DataFrame should be the name of the country, and the columns should be ['Rank', 'Documents', 'Citable documents', 'Citations', 'Self-citations',
 48 | #        'Citations per document', 'H index', 'Energy Supply',
 49 | #        'Energy Supply per Capita', '% Renewable', '2006', '2007', '2008',
 50 | #        '2009', '2010', '2011', '2012', '2013', '2014', '2015'].
 51 | # 
 52 | # *This function should return a DataFrame with 20 columns and 15 entries.*
 53 | 
 54 | # In[ ]:
 55 | 
 56 | def answer_one():
 57 |     return "ANSWER"
 58 | 
 59 | 
 60 | # ### Question 2 (6.6%)
 61 | # The previous question joined three datasets then reduced this to just the top 15 entries. When you joined the datasets, but before you reduced this to the top 15 items, how many entries did you lose?
 62 | # 
 63 | # *This function should return a single number.*
 64 | 
 65 | # In[1]:
 66 | 
 67 | get_ipython().run_cell_magic('HTML', '', '<svg width="800" height="300">\n  <circle cx="150" cy="180" r="80" fill-opacity="0.2" stroke="black" stroke-width="2" fill="blue" />\n  <circle cx="200" cy="100" r="80" fill-opacity="0.2" stroke="black" stroke-width="2" fill="red" />\n  <circle cx="100" cy="100" r="80" fill-opacity="0.2" stroke="black" stroke-width="2" fill="green" />\n  <line x1="150" y1="125" x2="300" y2="150" stroke="black" stroke-width="2" fill="black" stroke-dasharray="5,3"/>\n  <text  x="300" y="165" font-family="Verdana" font-size="35">Everything but this!</text>\n</svg>')
 68 | 
 69 | 
 70 | # In[ ]:
 71 | 
 72 | def answer_two():
 73 |     return "ANSWER"
 74 | 
 75 | 
 76 | # ### Question 3 (6.6%)
 77 | # What are the top 15 countries for average GDP over the last 10 years?
 78 | # 
 79 | # *This function should return a Series named `avgGDP` with 15 countries and their average GDP sorted in descending order.*
 80 | 
 81 | # In[ ]:
 82 | 
 83 | def answer_three():
 84 |     Top15 = answer_one()
 85 |     return "ANSWER"
 86 | 
 87 | 
 88 | # ### Question 4 (6.6%)
 89 | # By how much had the GDP changed over the 10 year span for the country with the 6th largest average GDP?
 90 | # 
 91 | # *This function should return a single number.*
 92 | 
 93 | # In[ ]:
 94 | 
 95 | def answer_four():
 96 |     Top15 = answer_one()
 97 |     return "ANSWER"
 98 | 
 99 | 
100 | # ### Question 5 (6.6%)
101 | # What is the mean energy supply per capita?
102 | # 
103 | # *This function should return a single number.*
104 | 
105 | # In[ ]:
106 | 
107 | def answer_five():
108 |     Top15 = answer_one()
109 |     return "ANSWER"
110 | 
111 | 
112 | # ### Question 6 (6.6%)
113 | # What country has the maximum % Renewable and what is the percentage?
114 | # 
115 | # *This function should return a tuple with the name of the country and the percentage.*
116 | 
117 | # In[ ]:
118 | 
119 | def answer_six():
120 |     Top15 = answer_one()
121 |     return "ANSWER"
122 | 
123 | 
124 | # ### Question 7 (6.6%)
125 | # Create a new column that is the ratio of Self-Citations to Total Citations. 
126 | # What is the maximum value for this new column, and what country has the highest ratio?
127 | # 
128 | # *This function should return a tuple with the name of the country and the ratio.*
129 | 
130 | # In[ ]:
131 | 
132 | def answer_seven():
133 |     Top15 = answer_one()
134 |     return "ANSWER"
135 | 
136 | 
137 | # ### Question 8 (6.6%)
138 | # 
139 | # Create a column that estimates the population using Energy Supply and Energy Supply per capita. 
140 | # What is the third most populous country according to this estimate?
141 | # 
142 | # *This function should return a single string value.*
143 | 
144 | # In[ ]:
145 | 
146 | def answer_eight():
147 |     Top15 = answer_one()
148 |     return "ANSWER"
149 | 
150 | 
151 | # ### Question 9
152 | # Create a column that estimates the number of citable documents per person. 
153 | # What is the correlation between the number of citable documents per capita and the energy supply per capita? Use the `.corr()` method, (Pearson's correlation).
154 | # 
155 | # *This function should return a single number.*
156 | # 
157 | # *(Optional: Use the built-in function `plot9()` to visualize the relationship between Energy Supply per Capita vs. Citable docs per Capita)*
158 | 
159 | # In[ ]:
160 | 
161 | def answer_nine():
162 |     Top15 = answer_one()
163 |     return "ANSWER"
164 | 
165 | 
166 | # In[ ]:
167 | 
168 | def plot9():
169 |     import matplotlib as plt
170 |     get_ipython().magic('matplotlib inline')
171 |     
172 |     Top15 = answer_one()
173 |     Top15['PopEst'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']
174 |     Top15['Citable docs per Capita'] = Top15['Citable documents'] / Top15['PopEst']
175 |     Top15.plot(x='Citable docs per Capita', y='Energy Supply per Capita', kind='scatter', xlim=[0, 0.0006])
176 | 
177 | 
178 | # In[ ]:
179 | 
180 | #plot9() # Be sure to comment out plot9() before submitting the assignment!
181 | 
182 | 
183 | # ### Question 10 (6.6%)
184 | # Create a new column with a 1 if the country's % Renewable value is at or above the median for all countries in the top 15, and a 0 if the country's % Renewable value is below the median.
185 | # 
186 | # *This function should return a series named `HighRenew` whose index is the country name sorted in ascending order of rank.*
187 | 
188 | # In[ ]:
189 | 
190 | def answer_ten():
191 |     Top15 = answer_one()
192 |     return "ANSWER"
193 | 
194 | 
195 | # ### Question 11 (6.6%)
196 | # Use the following dictionary to group the Countries by Continent, then create a dateframe that displays the sample size (the number of countries in each continent bin), and the sum, mean, and std deviation for the estimated population of each country.
197 | # 
198 | # ```python
199 | # ContinentDict  = {'China':'Asia', 
200 | #                   'United States':'North America', 
201 | #                   'Japan':'Asia', 
202 | #                   'United Kingdom':'Europe', 
203 | #                   'Russian Federation':'Europe', 
204 | #                   'Canada':'North America', 
205 | #                   'Germany':'Europe', 
206 | #                   'India':'Asia',
207 | #                   'France':'Europe', 
208 | #                   'South Korea':'Asia', 
209 | #                   'Italy':'Europe', 
210 | #                   'Spain':'Europe', 
211 | #                   'Iran':'Asia',
212 | #                   'Australia':'Australia', 
213 | #                   'Brazil':'South America'}
214 | # ```
215 | # 
216 | # *This function should return a DataFrame with index named Continent `['Asia', 'Australia', 'Europe', 'North America', 'South America']` and columns `['size', 'sum', 'mean', 'std']`*
217 | 
218 | # In[ ]:
219 | 
220 | def answer_eleven():
221 |     Top15 = answer_one()
222 |     return "ANSWER"
223 | 
224 | 
225 | # ### Question 12 (6.6%)
226 | # Cut % Renewable into 5 bins. Group Top15 by the Continent, as well as these new % Renewable bins. How many countries are in each of these groups?
227 | # 
228 | # *This function should return a Series with a MultiIndex of `Continent`, then the bins for `% Renewable`. Do not include groups with no countries.*
229 | 
230 | # In[ ]:
231 | 
232 | def answer_twelve():
233 |     Top15 = answer_one()
234 |     return "ANSWER"
235 | 
236 | 
237 | # ### Question 13 (6.6%)
238 | # Convert the Population Estimate series to a string with thousands separator (using commas). Use all significant digits (do not round the results).
239 | # 
240 | # e.g. 12345678.90 -> 12,345,678.90
241 | # 
242 | # *This function should return a Series `PopEst` whose index is the country name and whose values are the population estimate string.*
243 | 
244 | # In[ ]:
245 | 
246 | def answer_thirteen():
247 |     Top15 = answer_one()
248 |     return "ANSWER"
249 | 
250 | 
251 | # ### Optional
252 | # 
253 | # Use the built in function `plot_optional()` to see an example visualization.
254 | 
255 | # In[ ]:
256 | 
257 | def plot_optional():
258 |     import matplotlib as plt
259 |     get_ipython().magic('matplotlib inline')
260 |     Top15 = answer_one()
261 |     ax = Top15.plot(x='Rank', y='% Renewable', kind='scatter', 
262 |                     c=['#e41a1c','#377eb8','#e41a1c','#4daf4a','#4daf4a','#377eb8','#4daf4a','#e41a1c',
263 |                        '#4daf4a','#e41a1c','#4daf4a','#4daf4a','#e41a1c','#dede00','#ff7f00'], 
264 |                     xticks=range(1,16), s=6*Top15['2014']/10**10, alpha=.75, figsize=[16,6]);
265 | 
266 |     for i, txt in enumerate(Top15.index):
267 |         ax.annotate(txt, [Top15['Rank'][i], Top15['% Renewable'][i]], ha='center')
268 | 
269 |     print("This is an example of a visualization that can be created to help understand the data. This is a bubble chart showing % Renewable vs. Rank. The size of the bubble corresponds to the countries' 2014 GDP, and the color corresponds to the continent.")
270 | 
271 | 
272 | # In[ ]:
273 | 
274 | #plot_optional() # Be sure to comment out plot_optional() before submitting the assignment!
275 | 
276 | 


--------------------------------------------------------------------------------
/course1_downloads/Assignment+4.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # ---
  5 | # 
  6 | # _You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._
  7 | # 
  8 | # ---
  9 | 
 10 | # In[ ]:
 11 | 
 12 | import pandas as pd
 13 | import numpy as np
 14 | from scipy.stats import ttest_ind
 15 | 
 16 | 
 17 | # # Assignment 4 - Hypothesis Testing
 18 | # This assignment requires more individual learning than previous assignments - you are encouraged to check out the [pandas documentation](http://pandas.pydata.org/pandas-docs/stable/) to find functions or methods you might not have used yet, or ask questions on [Stack Overflow](http://stackoverflow.com/) and tag them as pandas and python related. And of course, the discussion forums are open for interaction with your peers and the course staff.
 19 | # 
 20 | # Definitions:
 21 | # * A _quarter_ is a specific three month period, Q1 is January through March, Q2 is April through June, Q3 is July through September, Q4 is October through December.
 22 | # * A _recession_ is defined as starting with two consecutive quarters of GDP decline, and ending with two consecutive quarters of GDP growth.
 23 | # * A _recession bottom_ is the quarter within a recession which had the lowest GDP.
 24 | # * A _university town_ is a city which has a high percentage of university students compared to the total population of the city.
 25 | # 
 26 | # **Hypothesis**: University towns have their mean housing prices less effected by recessions. Run a t-test to compare the ratio of the mean price of houses in university towns the quarter before the recession starts compared to the recession bottom. (`price_ratio=quarter_before_recession/recession_bottom`)
 27 | # 
 28 | # The following data files are available for this assignment:
 29 | # * From the [Zillow research data site](http://www.zillow.com/research/data/) there is housing data for the United States. In particular the datafile for [all homes at a city level](http://files.zillowstatic.com/research/public/City/City_Zhvi_AllHomes.csv), ```City_Zhvi_AllHomes.csv```, has median home sale prices at a fine grained level.
 30 | # * From the Wikipedia page on college towns is a list of [university towns in the United States](https://en.wikipedia.org/wiki/List_of_college_towns#College_towns_in_the_United_States) which has been copy and pasted into the file ```university_towns.txt```.
 31 | # * From Bureau of Economic Analysis, US Department of Commerce, the [GDP over time](http://www.bea.gov/national/index.htm#gdp) of the United States in current dollars (use the chained value in 2009 dollars), in quarterly intervals, in the file ```gdplev.xls```. For this assignment, only look at GDP data from the first quarter of 2000 onward.
 32 | # 
 33 | # Each function in this assignment below is worth 10%, with the exception of ```run_ttest()```, which is worth 50%.
 34 | 
 35 | # In[ ]:
 36 | 
 37 | # Use this dictionary to map state names to two letter acronyms
 38 | states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 'ND': 'North Dakota', 'VA': 'Virginia'}
 39 | 
 40 | 
 41 | # In[ ]:
 42 | 
 43 | def get_list_of_university_towns():
 44 |     '''Returns a DataFrame of towns and the states they are in from the 
 45 |     university_towns.txt list. The format of the DataFrame should be:
 46 |     DataFrame( [ ["Michigan","Ann Arbor"], ["Michigan", "Yipsilanti"] ], 
 47 |     columns=["State","RegionName"]  )'''
 48 |     
 49 |     return "ANSWER"
 50 | 
 51 | 
 52 | # In[ ]:
 53 | 
 54 | def get_recession_start():
 55 |     '''Returns the year and quarter of the recession start time as a 
 56 |     string value in a format such as 2005q3'''
 57 |     
 58 |     return "ANSWER"
 59 | 
 60 | 
 61 | # In[ ]:
 62 | 
 63 | def get_recession_end():
 64 |     '''Returns the year and quarter of the recession end time as a 
 65 |     string value in a format such as 2005q3'''
 66 |        
 67 |     return "ANSWER"
 68 | 
 69 | 
 70 | # In[ ]:
 71 | 
 72 | def get_recession_bottom():
 73 |     '''Returns the year and quarter of the recession bottom time as a 
 74 |     string value in a format such as 2005q3'''
 75 |     
 76 |     return "ANSWER"
 77 | 
 78 | 
 79 | # In[ ]:
 80 | 
 81 | def convert_housing_data_to_quarters():
 82 |     '''Converts the housing data to quarters and returns it as mean 
 83 |     values in a dataframe. This dataframe should be a dataframe with
 84 |     columns for 2000q1 through 2016q3, and should have a multi-index
 85 |     in the shape of ["State","RegionName"].
 86 |     
 87 |     Note: Quarters are defined in the assignment description, they are
 88 |     not arbitrary three month periods.
 89 |     
 90 |     The resulting dataframe should have 67 columns, and 10,730 rows.
 91 |     '''
 92 |     
 93 |     return "ANSWER"
 94 | 
 95 | 
 96 | # In[ ]:
 97 | 
 98 | def run_ttest():
 99 |     '''First creates new data showing the decline or growth of housing prices
100 |     between the recession start and the recession bottom. Then runs a ttest
101 |     comparing the university town values to the non-university towns values, 
102 |     return whether the alternative hypothesis (that the two groups are the same)
103 |     is true or not as well as the p-value of the confidence. 
104 |     
105 |     Return the tuple (different, p, better) where different=True if the t-test is
106 |     True at a p<0.01 (we reject the null hypothesis), or different=False if 
107 |     otherwise (we cannot reject the null hypothesis). The variable p should
108 |     be equal to the exact p value returned from scipy.stats.ttest_ind(). The
109 |     value for better should be either "university town" or "non-university town"
110 |     depending on which has a lower mean price ratio (which is equivilent to a
111 |     reduced market loss).'''
112 |     
113 |     return "ANSWER"
114 | 
115 | 


--------------------------------------------------------------------------------
/course1_downloads/Energy Indicators.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/course1_downloads/Energy Indicators.xls


--------------------------------------------------------------------------------
/course1_downloads/Week 4.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "---\n",
  8 |     "\n",
  9 |     "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
 10 |     "\n",
 11 |     "---"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Distributions in Pandas"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "import pandas as pd\n",
 30 |     "import numpy as np"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {
 37 |     "collapsed": false
 38 |    },
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "np.random.binomial(1, 0.5)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {
 48 |     "collapsed": false
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "np.random.binomial(1000, 0.5)/1000"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {
 59 |     "collapsed": false
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "chance_of_tornado = 0.01/100\n",
 64 |     "np.random.binomial(100000, chance_of_tornado)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {
 71 |     "collapsed": false
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "chance_of_tornado = 0.01\n",
 76 |     "\n",
 77 |     "tornado_events = np.random.binomial(1, chance_of_tornado, 1000000)\n",
 78 |     "    \n",
 79 |     "two_days_in_a_row = 0\n",
 80 |     "for j in range(1,len(tornado_events)-1):\n",
 81 |     "    if tornado_events[j]==1 and tornado_events[j-1]==1:\n",
 82 |     "        two_days_in_a_row+=1\n",
 83 |     "\n",
 84 |     "print('{} tornadoes back to back in {} years'.format(two_days_in_a_row, 1000000/365))"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {
 91 |     "collapsed": false
 92 |    },
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "np.random.uniform(0, 1)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "collapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "np.random.normal(0.75)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "Formula for standard deviation\n",
114 |     "$$\\sqrt{\\frac{1}{N} \\sum_{i=1}^N (x_i - \\overline{x})^2}$$"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {
121 |     "collapsed": false
122 |    },
123 |    "outputs": [],
124 |    "source": [
125 |     "distribution = np.random.normal(0.75,size=1000)\n",
126 |     "\n",
127 |     "np.sqrt(np.sum((np.mean(distribution)-distribution)**2)/len(distribution))"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {
134 |     "collapsed": false,
135 |     "scrolled": true
136 |    },
137 |    "outputs": [],
138 |    "source": [
139 |     "np.std(distribution)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {
146 |     "collapsed": false
147 |    },
148 |    "outputs": [],
149 |    "source": [
150 |     "import scipy.stats as stats\n",
151 |     "stats.kurtosis(distribution)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {
158 |     "collapsed": false
159 |    },
160 |    "outputs": [],
161 |    "source": [
162 |     "stats.skew(distribution)"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {
169 |     "collapsed": false
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "chi_squared_df2 = np.random.chisquare(2, size=10000)\n",
174 |     "stats.skew(chi_squared_df2)"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {
181 |     "collapsed": false
182 |    },
183 |    "outputs": [],
184 |    "source": [
185 |     "chi_squared_df5 = np.random.chisquare(5, size=10000)\n",
186 |     "stats.skew(chi_squared_df5)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {
193 |     "collapsed": false
194 |    },
195 |    "outputs": [],
196 |    "source": [
197 |     "%matplotlib inline\n",
198 |     "import matplotlib\n",
199 |     "import matplotlib.pyplot as plt\n",
200 |     "\n",
201 |     "output = plt.hist([chi_squared_df2,chi_squared_df5], bins=50, histtype='step', \n",
202 |     "                  label=['2 degrees of freedom','5 degrees of freedom'])\n",
203 |     "plt.legend(loc='upper right')\n"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": [
210 |     "# Hypothesis Testing"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {
217 |     "collapsed": false
218 |    },
219 |    "outputs": [],
220 |    "source": [
221 |     "df = pd.read_csv('grades.csv')"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": null,
227 |    "metadata": {
228 |     "collapsed": false
229 |    },
230 |    "outputs": [],
231 |    "source": [
232 |     "df.head()"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": null,
238 |    "metadata": {
239 |     "collapsed": false
240 |    },
241 |    "outputs": [],
242 |    "source": [
243 |     "len(df)"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "metadata": {
250 |     "collapsed": false
251 |    },
252 |    "outputs": [],
253 |    "source": [
254 |     "early = df[df['assignment1_submission'] <= '2015-12-31']\n",
255 |     "late = df[df['assignment1_submission'] > '2015-12-31']"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "metadata": {
262 |     "collapsed": false
263 |    },
264 |    "outputs": [],
265 |    "source": [
266 |     "early.mean()"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": null,
272 |    "metadata": {
273 |     "collapsed": false
274 |    },
275 |    "outputs": [],
276 |    "source": [
277 |     "late.mean()"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": null,
283 |    "metadata": {
284 |     "collapsed": false
285 |    },
286 |    "outputs": [],
287 |    "source": [
288 |     "from scipy import stats\n",
289 |     "stats.ttest_ind?"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {
296 |     "collapsed": false
297 |    },
298 |    "outputs": [],
299 |    "source": [
300 |     "stats.ttest_ind(early['assignment1_grade'], late['assignment1_grade'])"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": null,
306 |    "metadata": {
307 |     "collapsed": false
308 |    },
309 |    "outputs": [],
310 |    "source": [
311 |     "stats.ttest_ind(early['assignment2_grade'], late['assignment2_grade'])"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": null,
317 |    "metadata": {
318 |     "collapsed": false
319 |    },
320 |    "outputs": [],
321 |    "source": [
322 |     "stats.ttest_ind(early['assignment3_grade'], late['assignment3_grade'])"
323 |    ]
324 |   }
325 |  ],
326 |  "metadata": {
327 |   "kernelspec": {
328 |    "display_name": "Python 3",
329 |    "language": "python",
330 |    "name": "python3"
331 |   },
332 |   "language_info": {
333 |    "codemirror_mode": {
334 |     "name": "ipython",
335 |     "version": 3
336 |    },
337 |    "file_extension": ".py",
338 |    "mimetype": "text/x-python",
339 |    "name": "python",
340 |    "nbconvert_exporter": "python",
341 |    "pygments_lexer": "ipython3",
342 |    "version": "3.5.2"
343 |   }
344 |  },
345 |  "nbformat": 4,
346 |  "nbformat_minor": 0
347 | }
348 | 


--------------------------------------------------------------------------------
/course1_downloads/cars.csv:
--------------------------------------------------------------------------------
 1 | YEAR,Make,Model,Size,(kW),Unnamed: 5,TYPE,CITY (kWh/100 km),HWY (kWh/100 km),COMB (kWh/100 km),CITY (Le/100 km),HWY (Le/100 km),COMB (Le/100 km),(g/km),RATING,(km),TIME (h)
 2 | 2012,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7
 3 | 2012,NISSAN,LEAF,MID-SIZE,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,0,n/a,117,7
 4 | 2013,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4
 5 | 2013,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7
 6 | 2013,NISSAN,LEAF,MID-SIZE,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,0,n/a,117,7
 7 | 2013,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
 8 | 2013,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
 9 | 2013,TESLA,MODEL S (40 kWh battery),FULL-SIZE,270,A1,B,22.4,21.9,22.2,2.5,2.5,2.5,0,n/a,224,6
10 | 2013,TESLA,MODEL S (60 kWh battery),FULL-SIZE,270,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10
11 | 2013,TESLA,MODEL S (85 kWh battery),FULL-SIZE,270,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
12 | 2013,TESLA,MODEL S PERFORMANCE,FULL-SIZE,310,A1,B,23.9,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
13 | 2014,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,n/a,131,7
14 | 2014,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4
15 | 2014,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7
16 | 2014,NISSAN,LEAF,MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,n/a,135,5
17 | 2014,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
18 | 2014,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
19 | 2014,TESLA,MODEL S (60 kWh battery),FULL-SIZE,225,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10
20 | 2014,TESLA,MODEL S (85 kWh battery),FULL-SIZE,270,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
21 | 2014,TESLA,MODEL S PERFORMANCE,FULL-SIZE,310,A1,B,23.9,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
22 | 2015,BMW,i3,SUBCOMPACT,125,A1,B,15.2,18.8,16.8,1.7,2.1,1.9,0,n/a,130,4
23 | 2015,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,n/a,131,7
24 | 2015,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4
25 | 2015,KIA,SOUL EV,STATION WAGON - SMALL,81,A1,B,17.5,22.7,19.9,2.0,2.6,2.2,0,n/a,149,4
26 | 2015,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7
27 | 2015,NISSAN,LEAF,MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,n/a,135,5
28 | 2015,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
29 | 2015,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8
30 | 2015,TESLA,MODEL S (60 kWh battery),FULL-SIZE,283,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10
31 | 2015,TESLA,MODEL S (70 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,377,12
32 | 2015,TESLA,MODEL S (85/90 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12
33 | 2015,TESLA,MODEL S 70D,FULL-SIZE,280,A1,B,20.8,20.6,20.7,2.3,2.3,2.3,0,n/a,386,12
34 | 2015,TESLA,MODEL S 85D/90D,FULL-SIZE,280,A1,B,22.0,19.8,21.0,2.5,2.2,2.4,0,n/a,435,12
35 | 2015,TESLA,MODEL S P85D/P90D,FULL-SIZE,515,A1,B,23.4,21.5,22.5,2.6,2.4,2.5,0,n/a,407,12
36 | 2016,BMW,i3,SUBCOMPACT,125,A1,B,15.2,18.8,16.8,1.7,2.1,1.9,0,10,130,4
37 | 2016,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,10,131,7
38 | 2016,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,10,122,4
39 | 2016,KIA,SOUL EV,STATION WAGON - SMALL,81,A1,B,17.5,22.7,19.9,2.0,2.6,2.2,0,10,149,4
40 | 2016,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,10,100,7
41 | 2016,NISSAN,LEAF (24 kWh battery),MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,10,135,5
42 | 2016,NISSAN,LEAF (30 kWh battery),MID-SIZE,80,A1,B,17.0,20.7,18.6,1.9,2.3,2.1,0,10,172,6
43 | 2016,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,10,109,8
44 | 2016,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,10,109,8
45 | 2016,TESLA,MODEL S (60 kWh battery),FULL-SIZE,283,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,10,335,10
46 | 2016,TESLA,MODEL S (70 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,10,377,12
47 | 2016,TESLA,MODEL S (85/90 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,10,426,12
48 | 2016,TESLA,MODEL S 70D,FULL-SIZE,386,A1,B,20.8,20.6,20.7,2.3,2.3,2.3,0,10,386,12
49 | 2016,TESLA,MODEL S 85D/90D,FULL-SIZE,386,A1,B,22.0,19.8,21.0,2.5,2.2,2.4,0,10,435,12
50 | 2016,TESLA,MODEL S 90D (Refresh),FULL-SIZE,386,A1,B,20.8,19.7,20.3,2.3,2.2,2.3,0,10,473,12
51 | 2016,TESLA,MODEL S P85D/P90D,FULL-SIZE,568,A1,B,23.4,21.5,22.5,2.6,2.4,2.5,0,10,407,12
52 | 2016,TESLA,MODEL S P90D (Refresh),FULL-SIZE,568,A1,B,22.9,21.0,22.1,2.6,2.4,2.5,0,10,435,12
53 | 2016,TESLA,MODEL X 90D,SUV - STANDARD,386,A1,B,23.2,22.2,22.7,2.6,2.5,2.6,0,10,414,12
54 | 2016,TESLA,MODEL X P90D,SUV - STANDARD,568,A1,B,23.6,23.3,23.5,2.7,2.6,2.6,0,10,402,12
55 | 


--------------------------------------------------------------------------------
/course1_downloads/gdplev.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/course1_downloads/gdplev.xls


--------------------------------------------------------------------------------
/course1_downloads/log.csv:
--------------------------------------------------------------------------------
 1 | time,user,video,playback position,paused,volume
 2 | 1469974424,cheryl,intro.html,5,FALSE,10
 3 | 1469974454,cheryl,intro.html,6,,
 4 | 1469974544,cheryl,intro.html,9,,
 5 | 1469974574,cheryl,intro.html,10,,
 6 | 1469977514,bob,intro.html,1,,
 7 | 1469977544,bob,intro.html,1,,
 8 | 1469977574,bob,intro.html,1,,
 9 | 1469977604,bob,intro.html,1,,
10 | 1469974604,cheryl,intro.html,11,,
11 | 1469974694,cheryl,intro.html,14,,
12 | 1469974724,cheryl,intro.html,15,,
13 | 1469974454,sue,advanced.html,24,,
14 | 1469974524,sue,advanced.html,25,,
15 | 1469974424,sue,advanced.html,23,FALSE,10
16 | 1469974554,sue,advanced.html,26,,
17 | 1469974624,sue,advanced.html,27,,
18 | 1469974654,sue,advanced.html,28,,5
19 | 1469974724,sue,advanced.html,29,,
20 | 1469974484,cheryl,intro.html,7,,
21 | 1469974514,cheryl,intro.html,8,,
22 | 1469974754,sue,advanced.html,30,,
23 | 1469974824,sue,advanced.html,31,,
24 | 1469974854,sue,advanced.html,32,,
25 | 1469974924,sue,advanced.html,33,,
26 | 1469977424,bob,intro.html,1,TRUE,10
27 | 1469977454,bob,intro.html,1,,
28 | 1469977484,bob,intro.html,1,,
29 | 1469977634,bob,intro.html,1,,
30 | 1469977664,bob,intro.html,1,,
31 | 1469974634,cheryl,intro.html,12,,
32 | 1469974664,cheryl,intro.html,13,,
33 | 1469977694,bob,intro.html,1,,
34 | 1469977724,bob,intro.html,1,,
35 | 


--------------------------------------------------------------------------------
/course1_downloads/log.txt:
--------------------------------------------------------------------------------
 1 | time,user,video,playback position,paused,volume
 2 | 1469974424,cheryl,intro.html,5,FALSE,10
 3 | 1469974454,cheryl,intro.html,6,,
 4 | 1469974544,cheryl,intro.html,9,,
 5 | 1469974574,cheryl,intro.html,10,,
 6 | 1469977514,bob,intro.html,1,,
 7 | 1469977544,bob,intro.html,1,,
 8 | 1469977574,bob,intro.html,1,,
 9 | 1469977604,bob,intro.html,1,,
10 | 1469974604,cheryl,intro.html,11,,
11 | 1469974694,cheryl,intro.html,14,,
12 | 1469974724,cheryl,intro.html,15,,
13 | 1469974454,sue,advanced.html,24,,
14 | 1469974524,sue,advanced.html,25,,
15 | 1469974424,sue,advanced.html,23,FALSE,10
16 | 1469974554,sue,advanced.html,26,,
17 | 1469974624,sue,advanced.html,27,,
18 | 1469974654,sue,advanced.html,28,,5
19 | 1469974724,sue,advanced.html,29,,
20 | 1469974484,cheryl,intro.html,7,,
21 | 1469974514,cheryl,intro.html,8,,
22 | 1469974754,sue,advanced.html,30,,
23 | 1469974824,sue,advanced.html,31,,
24 | 1469974854,sue,advanced.html,32,,
25 | 1469974924,sue,advanced.html,33,,
26 | 1469977424,bob,intro.html,1,TRUE,10
27 | 1469977454,bob,intro.html,1,,
28 | 1469977484,bob,intro.html,1,,
29 | 1469977634,bob,intro.html,1,,
30 | 1469977664,bob,intro.html,1,,
31 | 1469974634,cheryl,intro.html,12,,
32 | 1469974664,cheryl,intro.html,13,,
33 | 1469977694,bob,intro.html,1,,
34 | 1469977724,bob,intro.html,1,,
35 | 


--------------------------------------------------------------------------------
/course1_downloads/olympics.csv:
--------------------------------------------------------------------------------
  1 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
  2 | ,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total
  3 | Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
  4 | Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
  5 | Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
  6 | Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
  7 | Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12
  8 | Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480
  9 | Austria (AUT),26,18,33,35,86,22,59,78,81,218,48,77,111,116,304
 10 | Azerbaijan (AZE),5,6,5,15,26,5,0,0,0,0,10,6,5,15,26
 11 | Bahamas (BAH),15,5,2,5,12,0,0,0,0,0,15,5,2,5,12
 12 | Bahrain (BRN),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1
 13 | Barbados (BAR) [BAR],11,0,0,1,1,0,0,0,0,0,11,0,0,1,1
 14 | Belarus (BLR),5,12,24,39,75,6,6,4,5,15,11,18,28,44,90
 15 | Belgium (BEL),25,37,52,53,142,20,1,1,3,5,45,38,53,56,147
 16 | Bermuda (BER),17,0,0,1,1,7,0,0,0,0,24,0,0,1,1
 17 | Bohemia (BOH) [BOH] [Z],3,0,1,3,4,0,0,0,0,0,3,0,1,3,4
 18 | Botswana (BOT),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1
 19 | Brazil (BRA),21,23,30,55,108,7,0,0,0,0,28,23,30,55,108
 20 | British West Indies (BWI) [BWI],1,0,0,2,2,0,0,0,0,0,1,0,0,2,2
 21 | Bulgaria (BUL) [H],19,51,85,78,214,19,1,2,3,6,38,52,87,81,220
 22 | Burundi (BDI),5,1,0,0,1,0,0,0,0,0,5,1,0,0,1
 23 | Cameroon (CMR),13,3,1,1,5,1,0,0,0,0,14,3,1,1,5
 24 | Canada (CAN),25,59,99,121,279,22,62,56,52,170,47,121,155,173,449
 25 | Chile (CHI) [I],22,2,7,4,13,16,0,0,0,0,38,2,7,4,13
 26 | China (CHN) [CHN],9,201,146,126,473,10,12,22,19,53,19,213,168,145,526
 27 | Colombia (COL),18,2,6,11,19,1,0,0,0,0,19,2,6,11,19
 28 | Costa Rica (CRC),14,1,1,2,4,6,0,0,0,0,20,1,1,2,4
 29 | Ivory Coast (CIV) [CIV],12,0,1,0,1,0,0,0,0,0,12,0,1,0,1
 30 | Croatia (CRO),6,6,7,10,23,7,4,6,1,11,13,10,13,11,34
 31 | Cuba (CUB) [Z],19,72,67,70,209,0,0,0,0,0,19,72,67,70,209
 32 | Cyprus (CYP),9,0,1,0,1,10,0,0,0,0,19,0,1,0,1
 33 | Czech Republic (CZE) [CZE],5,14,15,15,44,6,7,9,8,24,11,21,24,23,68
 34 | Czechoslovakia (TCH) [TCH],16,49,49,45,143,16,2,8,15,25,32,51,57,60,168
 35 | Denmark (DEN) [Z],26,43,68,68,179,13,0,1,0,1,39,43,69,68,180
 36 | Djibouti (DJI) [B],7,0,0,1,1,0,0,0,0,0,7,0,0,1,1
 37 | Dominican Republic (DOM),13,3,2,1,6,0,0,0,0,0,13,3,2,1,6
 38 | Ecuador (ECU),13,1,1,0,2,0,0,0,0,0,13,1,1,0,2
 39 | Egypt (EGY) [EGY] [Z],21,7,9,10,26,1,0,0,0,0,22,7,9,10,26
 40 | Eritrea (ERI),4,0,0,1,1,0,0,0,0,0,4,0,0,1,1
 41 | Estonia (EST),11,9,9,15,33,9,4,2,1,7,20,13,11,16,40
 42 | Ethiopia (ETH),12,21,7,17,45,2,0,0,0,0,14,21,7,17,45
 43 | Finland (FIN),24,101,84,117,302,22,42,62,57,161,46,143,146,174,463
 44 | France (FRA) [O] [P] [Z],27,202,223,246,671,22,31,31,47,109,49,233,254,293,780
 45 | Gabon (GAB),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1
 46 | Georgia (GEO),5,6,5,14,25,6,0,0,0,0,11,6,5,14,25
 47 | Germany (GER) [GER] [Z],15,174,182,217,573,11,78,78,53,209,26,252,260,270,782
 48 | United Team of Germany (EUA) [EUA],3,28,54,36,118,3,8,6,5,19,6,36,60,41,137
 49 | East Germany (GDR) [GDR],5,153,129,127,409,6,39,36,35,110,11,192,165,162,519
 50 | West Germany (FRG) [FRG],5,56,67,81,204,6,11,15,13,39,11,67,82,94,243
 51 | Ghana (GHA) [GHA],13,0,1,3,4,1,0,0,0,0,14,0,1,3,4
 52 | Great Britain (GBR) [GBR] [Z],27,236,272,272,780,22,10,4,12,26,49,246,276,284,806
 53 | Greece (GRE) [Z],27,30,42,39,111,18,0,0,0,0,45,30,42,39,111
 54 | Grenada (GRN),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1
 55 | Guatemala (GUA),13,0,1,0,1,1,0,0,0,0,14,0,1,0,1
 56 | Guyana (GUY) [GUY],16,0,0,1,1,0,0,0,0,0,16,0,0,1,1
 57 | Haiti (HAI) [J],14,0,1,1,2,0,0,0,0,0,14,0,1,1,2
 58 | Hong Kong (HKG) [HKG],15,1,1,1,3,4,0,0,0,0,19,1,1,1,3
 59 | Hungary (HUN),25,167,144,165,476,22,0,2,4,6,47,167,146,169,482
 60 | Iceland (ISL),19,0,2,2,4,17,0,0,0,0,36,0,2,2,4
 61 | India (IND) [F],23,9,6,11,26,9,0,0,0,0,32,9,6,11,26
 62 | Indonesia (INA),14,6,10,11,27,0,0,0,0,0,14,6,10,11,27
 63 | Iran (IRI) [K],15,15,20,25,60,10,0,0,0,0,25,15,20,25,60
 64 | Iraq (IRQ),13,0,0,1,1,0,0,0,0,0,13,0,0,1,1
 65 | Ireland (IRL),20,9,8,12,29,6,0,0,0,0,26,9,8,12,29
 66 | Israel (ISR),15,1,1,5,7,6,0,0,0,0,21,1,1,5,7
 67 | Italy (ITA) [M] [S],26,198,166,185,549,22,37,34,43,114,48,235,200,228,663
 68 | Jamaica (JAM) [JAM],16,17,30,20,67,7,0,0,0,0,23,17,30,20,67
 69 | Japan (JPN),21,130,126,142,398,20,10,17,18,45,41,140,143,160,443
 70 | Kazakhstan (KAZ),5,16,17,19,52,6,1,3,3,7,11,17,20,22,59
 71 | Kenya (KEN),13,25,32,29,86,3,0,0,0,0,16,25,32,29,86
 72 | North Korea (PRK),9,14,12,21,47,8,0,1,1,2,17,14,13,22,49
 73 | South Korea (KOR),16,81,82,80,243,17,26,17,10,53,33,107,99,90,296
 74 | Kuwait (KUW),12,0,0,2,2,0,0,0,0,0,12,0,0,2,2
 75 | Kyrgyzstan (KGZ),5,0,1,2,3,6,0,0,0,0,11,0,1,2,3
 76 | Latvia (LAT),10,3,11,5,19,10,0,4,3,7,20,3,15,8,26
 77 | Lebanon (LIB),16,0,2,2,4,16,0,0,0,0,32,0,2,2,4
 78 | Liechtenstein (LIE),16,0,0,0,0,18,2,2,5,9,34,2,2,5,9
 79 | Lithuania (LTU),8,6,5,10,21,8,0,0,0,0,16,6,5,10,21
 80 | Luxembourg (LUX) [O],22,1,1,0,2,8,0,2,0,2,30,1,3,0,4
 81 | Macedonia (MKD),5,0,0,1,1,5,0,0,0,0,10,0,0,1,1
 82 | Malaysia (MAS) [MAS],12,0,3,3,6,0,0,0,0,0,12,0,3,3,6
 83 | Mauritius (MRI),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1
 84 | Mexico (MEX),22,13,21,28,62,8,0,0,0,0,30,13,21,28,62
 85 | Moldova (MDA),5,0,2,5,7,6,0,0,0,0,11,0,2,5,7
 86 | Mongolia (MGL),12,2,9,13,24,13,0,0,0,0,25,2,9,13,24
 87 | Montenegro (MNE),2,0,1,0,1,2,0,0,0,0,4,0,1,0,1
 88 | Morocco (MAR),13,6,5,11,22,6,0,0,0,0,19,6,5,11,22
 89 | Mozambique (MOZ),9,1,0,1,2,0,0,0,0,0,9,1,0,1,2
 90 | Namibia (NAM),6,0,4,0,4,0,0,0,0,0,6,0,4,0,4
 91 | Netherlands (NED) [Z],25,77,85,104,266,20,37,38,35,110,45,114,123,139,376
 92 | Netherlands Antilles (AHO) [AHO] [I],13,0,1,0,1,2,0,0,0,0,15,0,1,0,1
 93 | New Zealand (NZL) [NZL],22,42,18,39,99,15,0,1,0,1,37,42,19,39,100
 94 | Niger (NIG),11,0,0,1,1,0,0,0,0,0,11,0,0,1,1
 95 | Nigeria (NGR),15,3,8,12,23,0,0,0,0,0,15,3,8,12,23
 96 | Norway (NOR) [Q],24,56,49,43,148,22,118,111,100,329,46,174,160,143,477
 97 | Pakistan (PAK),16,3,3,4,10,2,0,0,0,0,18,3,3,4,10
 98 | Panama (PAN),16,1,0,2,3,0,0,0,0,0,16,1,0,2,3
 99 | Paraguay (PAR),11,0,1,0,1,1,0,0,0,0,12,0,1,0,1
100 | Peru (PER) [L],17,1,3,0,4,2,0,0,0,0,19,1,3,0,4
101 | Philippines (PHI),20,0,2,7,9,4,0,0,0,0,24,0,2,7,9
102 | Poland (POL),20,64,82,125,271,22,6,7,7,20,42,70,89,132,291
103 | Portugal (POR),23,4,8,11,23,7,0,0,0,0,30,4,8,11,23
104 | Puerto Rico (PUR),17,0,2,6,8,6,0,0,0,0,23,0,2,6,8
105 | Qatar (QAT),8,0,0,4,4,0,0,0,0,0,8,0,0,4,4
106 | Romania (ROU),20,88,94,119,301,20,0,0,1,1,40,88,94,120,302
107 | Russia (RUS) [RUS],5,132,121,142,395,6,49,40,35,124,11,181,161,177,519
108 | Russian Empire (RU1) [RU1],3,1,4,3,8,0,0,0,0,0,3,1,4,3,8
109 | Soviet Union (URS) [URS],9,395,319,296,1010,9,78,57,59,194,18,473,376,355,1204
110 | Unified Team (EUN) [EUN],1,45,38,29,112,1,9,6,8,23,2,54,44,37,135
111 | Saudi Arabia (KSA),10,0,1,2,3,0,0,0,0,0,10,0,1,2,3
112 | Senegal (SEN),13,0,1,0,1,5,0,0,0,0,18,0,1,0,1
113 | Serbia (SRB) [SRB],3,1,2,4,7,2,0,0,0,0,5,1,2,4,7
114 | Serbia and Montenegro (SCG) [SCG],3,2,4,3,9,3,0,0,0,0,6,2,4,3,9
115 | Singapore (SIN),15,0,2,2,4,0,0,0,0,0,15,0,2,2,4
116 | Slovakia (SVK) [SVK],5,7,9,8,24,6,2,2,1,5,11,9,11,9,29
117 | Slovenia (SLO),6,4,6,9,19,7,2,4,9,15,13,6,10,18,34
118 | South Africa (RSA),18,23,26,27,76,6,0,0,0,0,24,23,26,27,76
119 | Spain (ESP) [Z],22,37,59,35,131,19,1,0,1,2,41,38,59,36,133
120 | Sri Lanka (SRI) [SRI],16,0,2,0,2,0,0,0,0,0,16,0,2,0,2
121 | Sudan (SUD),11,0,1,0,1,0,0,0,0,0,11,0,1,0,1
122 | Suriname (SUR) [E],11,1,0,1,2,0,0,0,0,0,11,1,0,1,2
123 | Sweden (SWE) [Z],26,143,164,176,483,22,50,40,54,144,48,193,204,230,627
124 | Switzerland (SUI),27,47,73,65,185,22,50,40,48,138,49,97,113,113,323
125 | Syria (SYR),12,1,1,1,3,0,0,0,0,0,12,1,1,1,3
126 | Chinese Taipei (TPE) [TPE] [TPE2],13,2,7,12,21,11,0,0,0,0,24,2,7,12,21
127 | Tajikistan (TJK),5,0,1,2,3,4,0,0,0,0,9,0,1,2,3
128 | Tanzania (TAN) [TAN],12,0,2,0,2,0,0,0,0,0,12,0,2,0,2
129 | Thailand (THA),15,7,6,11,24,3,0,0,0,0,18,7,6,11,24
130 | Togo (TOG),9,0,0,1,1,1,0,0,0,0,10,0,0,1,1
131 | Tonga (TGA),8,0,1,0,1,1,0,0,0,0,9,0,1,0,1
132 | Trinidad and Tobago (TRI) [TRI],16,2,5,11,18,3,0,0,0,0,19,2,5,11,18
133 | Tunisia (TUN),13,3,3,4,10,0,0,0,0,0,13,3,3,4,10
134 | Turkey (TUR),21,39,25,24,88,16,0,0,0,0,37,39,25,24,88
135 | Uganda (UGA),14,2,3,2,7,0,0,0,0,0,14,2,3,2,7
136 | Ukraine (UKR),5,33,27,55,115,6,2,1,4,7,11,35,28,59,122
137 | United Arab Emirates (UAE),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1
138 | United States (USA) [P] [Q] [R] [Z],26,976,757,666,2399,22,96,102,84,282,48,1072,859,750,2681
139 | Uruguay (URU),20,2,2,6,10,1,0,0,0,0,21,2,2,6,10
140 | Uzbekistan (UZB),5,5,5,10,20,6,1,0,0,1,11,6,5,10,21
141 | Venezuela (VEN),17,2,2,8,12,4,0,0,0,0,21,2,2,8,12
142 | Vietnam (VIE),14,0,2,0,2,0,0,0,0,0,14,0,2,0,2
143 | Virgin Islands (ISV),11,0,1,0,1,7,0,0,0,0,18,0,1,0,1
144 | Yugoslavia (YUG) [YUG],16,26,29,28,83,14,0,3,1,4,30,26,32,29,87
145 | Independent Olympic Participants (IOP) [IOP],1,0,1,2,3,0,0,0,0,0,1,0,1,2,3
146 | Zambia (ZAM) [ZAM],12,0,1,1,2,0,0,0,0,0,12,0,1,1,2
147 | Zimbabwe (ZIM) [ZIM],12,3,4,1,8,1,0,0,0,0,13,3,4,1,8
148 | Mixed team (ZZX) [ZZX],3,8,5,4,17,0,0,0,0,0,3,8,5,4,17
149 | Totals,27,4809,4775,5130,14714,22,959,958,948,2865,49,5768,5733,6078,17579
150 | 


--------------------------------------------------------------------------------
/course1_downloads/scimagojr-3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/course1_downloads/scimagojr-3.xlsx


--------------------------------------------------------------------------------