├── Class Quizzes └── quiz1a1.pdf ├── ClassNotebooks ├── .ipynb_checkpoints │ ├── Assignment 2-checkpoint.ipynb │ ├── Assignment 2-scratch-checkpoint.ipynb │ ├── ReadingAndWritingCSVfiles-checkpoint.ipynb │ ├── Untitled-checkpoint.ipynb │ ├── Week 1-checkpoint.ipynb │ ├── Week 2-checkpoint.ipynb │ └── Week 4-checkpoint.ipynb ├── Assignment 2-scratch.ipynb ├── Assignment 2.ipynb ├── Assignment 3.ipynb ├── Assignment 4.ipynb ├── City_Zhvi_AllHomes.csv ├── Energy Indicators.xls ├── ReadingAndWritingCSVfiles.ipynb ├── Untitled.ipynb ├── Week 1.ipynb ├── Week 2.ipynb ├── Week 3.ipynb ├── Week 4.ipynb ├── cars.csv ├── census.csv ├── gdplev.xls ├── grades.csv ├── log.csv ├── log.txt ├── mpg.csv ├── olympics.csv ├── requirements.txt ├── scimagojr-3.xlsx ├── university_towns.txt └── world_bank.csv ├── MyNotebooks ├── .ipynb_checkpoints │ ├── Basic-Data-Processing-with-Pandas-checkpoint.ipynb │ ├── Python-Dates-and-Times-checkpoint.ipynb │ └── ReadingAndWritingCSVfiles-checkpoint.ipynb ├── Basic-Data-Processing-with-Pandas.ipynb ├── City_Zhvi_AllHomes.csv ├── Energy Indicators.xls ├── Python-Dates-and-Times.ipynb ├── ReadingAndWritingCSVfiles.ipynb ├── cars.csv ├── census.csv ├── gdplev.xls ├── grades.csv ├── log.csv ├── log.txt ├── mpg.csv ├── olympics.csv ├── scimagojr-3.xlsx ├── university_towns.txt └── world_bank.csv ├── README.md └── course1_downloads ├── .ipynb_checkpoints ├── Assignment 2-checkpoint.ipynb └── Week 2-checkpoint.ipynb ├── Assignment 2.ipynb ├── Assignment 3.ipynb ├── Assignment 4.ipynb ├── Assignment+2.py ├── Assignment+3.py ├── Assignment+4.py ├── City_Zhvi_AllHomes.csv ├── Energy Indicators.xls ├── Week 1.ipynb ├── Week 2.ipynb ├── Week 3.ipynb ├── Week 4.ipynb ├── cars.csv ├── census.csv ├── gdplev.xls ├── grades.csv ├── log.csv ├── log.txt ├── mpg.csv ├── olympics.csv ├── scimagojr-3.xlsx ├── university_towns.txt └── world_bank.csv /Class Quizzes/quiz1a1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/Class Quizzes/quiz1a1.pdf -------------------------------------------------------------------------------- /ClassNotebooks/.ipynb_checkpoints/ReadingAndWritingCSVfiles-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "text/plain": [ 13 | "[{'': '1',\n", 14 | " 'class': 'compact',\n", 15 | " 'cty': '18',\n", 16 | " 'cyl': '4',\n", 17 | " 'displ': '1.8',\n", 18 | " 'drv': 'f',\n", 19 | " 'fl': 'p',\n", 20 | " 'hwy': '29',\n", 21 | " 'manufacturer': 'audi',\n", 22 | " 'model': 'a4',\n", 23 | " 'trans': 'auto(l5)',\n", 24 | " 'year': '1999'},\n", 25 | " {'': '2',\n", 26 | " 'class': 'compact',\n", 27 | " 'cty': '21',\n", 28 | " 'cyl': '4',\n", 29 | " 'displ': '1.8',\n", 30 | " 'drv': 'f',\n", 31 | " 'fl': 'p',\n", 32 | " 'hwy': '29',\n", 33 | " 'manufacturer': 'audi',\n", 34 | " 'model': 'a4',\n", 35 | " 'trans': 'manual(m5)',\n", 36 | " 'year': '1999'},\n", 37 | " {'': '3',\n", 38 | " 'class': 'compact',\n", 39 | " 'cty': '20',\n", 40 | " 'cyl': '4',\n", 41 | " 'displ': '2',\n", 42 | " 'drv': 'f',\n", 43 | " 'fl': 'p',\n", 44 | " 'hwy': '31',\n", 45 | " 'manufacturer': 'audi',\n", 46 | " 'model': 'a4',\n", 47 | " 'trans': 'manual(m6)',\n", 48 | " 'year': '2008'}]" 49 | ] 50 | }, 51 | "execution_count": 1, 52 | "metadata": {}, 53 | "output_type": "execute_result" 54 | } 55 | ], 56 | "source": [ 57 | "import csv\n", 58 | "\n", 59 | "# set floatpoint precision for printing to 2\n", 60 | "%precision 2\n", 61 | "\n", 62 | "with open('mpg.csv') as csvfile:\n", 63 | " mpg = list(csv.DictReader(csvfile))\n", 64 | "\n", 65 | "# view first 3 elements of list created from csv file\n", 66 | "mpg[:3]" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "Here we can see each element of this list is a car in a dict form, the keys for the dict corresponds to a column in the csv file.\n", 74 | "\n", 75 | "Lets see how many dicts - or cars - we have in our dataset" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 2, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "234" 89 | ] 90 | }, 91 | "execution_count": 2, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": [ 97 | "len(mpg)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "We have a dictionary for each of the 234 cars in the dataset.\n", 105 | "\n", 106 | "To extract just the column names, or the keys in each of these dictionaries we can use the `keys()` function on the first element in the mpg dataset since we are assuming each element has the same keys or _row names_" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 9, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [ 116 | { 117 | "data": { 118 | "text/plain": [ 119 | "dict_keys(['', 'class', 'model', 'fl', 'cty', 'displ', 'drv', 'hwy', 'cyl', 'manufacturer', 'year', 'trans'])" 120 | ] 121 | }, 122 | "execution_count": 9, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "mpg[0].keys()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "Here we can see we have class, model, fuel type, cty, mpg, engine volume, front or rear wheel drive, highway mpg, manufacturer, model, year, and transmission type.\n", 136 | "\n", 137 | "**NOTE:** I happen to know this from working with the same __cars__ dataset in R. This can easily be found on google when looking at the values is not helping.\n", 138 | "\n", 139 | "Now let's try and find the average city MPG across all cars in our CSV file.\n", 140 | "To do this we want to sum the value for the 'cty' key for each car in the set and divide by the number of cars used (which we already know the set to contain 234 cars).\n", 141 | "\n", 142 | "So that should look something like: \n", 143 | "sum (each value of cty in dataset mpg) / size (mpg)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 12, 149 | "metadata": { 150 | "collapsed": false 151 | }, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/plain": [ 156 | "16.86" 157 | ] 158 | }, 159 | "execution_count": 12, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "sum(float(d['cty']) for d in mpg) / len(mpg)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "Now let's do the same thing for average hwy mpg across all cars in the dataset" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 13, 178 | "metadata": { 179 | "collapsed": false 180 | }, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": [ 185 | "23.44" 186 | ] 187 | }, 188 | "execution_count": 13, 189 | "metadata": {}, 190 | "output_type": "execute_result" 191 | } 192 | ], 193 | "source": [ 194 | "sum(float(d['hwy']) for d in mpg) / len(mpg)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "Note that the average mpg for highway is significantly better than for city. This makes sense, as anyone who drives knows they get much better gas milage on the highway that they do in the city." 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": { 208 | "collapsed": true 209 | }, 210 | "outputs": [], 211 | "source": [] 212 | } 213 | ], 214 | "metadata": { 215 | "anaconda-cloud": {}, 216 | "kernelspec": { 217 | "display_name": "Python [Py35]", 218 | "language": "python", 219 | "name": "Python [Py35]" 220 | }, 221 | "language_info": { 222 | "codemirror_mode": { 223 | "name": "ipython", 224 | "version": 3 225 | }, 226 | "file_extension": ".py", 227 | "mimetype": "text/x-python", 228 | "name": "python", 229 | "nbconvert_exporter": "python", 230 | "pygments_lexer": "ipython3", 231 | "version": "3.5.2" 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 0 236 | } 237 | -------------------------------------------------------------------------------- /ClassNotebooks/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /ClassNotebooks/.ipynb_checkpoints/Week 4-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "---\n", 8 | "\n", 9 | "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n", 10 | "\n", 11 | "---" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Distributions in Pandas" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [ 28 | { 29 | "ename": "ImportError", 30 | "evalue": "No module named 'pandas'", 31 | "output_type": "error", 32 | "traceback": [ 33 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 34 | "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", 35 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 36 | "\u001b[0;31mImportError\u001b[0m: No module named 'pandas'" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "import pandas as pd\n", 42 | "import numpy as np" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "np.random.binomial(1, 0.5)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "collapsed": false 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "np.random.binomial(1000, 0.5)/1000" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "chance_of_tornado = 0.01/100\n", 76 | "np.random.binomial(100000, chance_of_tornado)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "collapsed": false 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "chance_of_tornado = 0.01\n", 88 | "\n", 89 | "tornado_events = np.random.binomial(1, chance_of_tornado, 1000000)\n", 90 | " \n", 91 | "two_days_in_a_row = 0\n", 92 | "for j in range(1,len(tornado_events)-1):\n", 93 | " if tornado_events[j]==1 and tornado_events[j-1]==1:\n", 94 | " two_days_in_a_row+=1\n", 95 | "\n", 96 | "print('{} tornadoes back to back in {} years'.format(two_days_in_a_row, 1000000/365))" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": false 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "np.random.uniform(0, 1)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "np.random.normal(0.75)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "Formula for standard deviation\n", 126 | "$$\\sqrt{\\frac{1}{N} \\sum_{i=1}^N (x_i - \\overline{x})^2}$$" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "collapsed": false 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "distribution = np.random.normal(0.75,size=1000)\n", 138 | "\n", 139 | "np.sqrt(np.sum((np.mean(distribution)-distribution)**2)/len(distribution))" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "collapsed": false, 147 | "scrolled": true 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "np.std(distribution)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "collapsed": false 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "import scipy.stats as stats\n", 163 | "stats.kurtosis(distribution)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": { 170 | "collapsed": false 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "stats.skew(distribution)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "chi_squared_df2 = np.random.chisquare(2, size=10000)\n", 186 | "stats.skew(chi_squared_df2)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": { 193 | "collapsed": false 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "chi_squared_df5 = np.random.chisquare(5, size=10000)\n", 198 | "stats.skew(chi_squared_df5)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": { 205 | "collapsed": false 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "%matplotlib inline\n", 210 | "import matplotlib\n", 211 | "import matplotlib.pyplot as plt\n", 212 | "\n", 213 | "output = plt.hist([chi_squared_df2,chi_squared_df5], bins=50, histtype='step', \n", 214 | " label=['2 degrees of freedom','5 degrees of freedom'])\n", 215 | "plt.legend(loc='upper right')\n" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "# Hypothesis Testing" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "collapsed": false 230 | }, 231 | "outputs": [], 232 | "source": [ 233 | "df = pd.read_csv('grades.csv')" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": false 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "df.head()" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": { 251 | "collapsed": false 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "len(df)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "collapsed": false 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "early = df[df['assignment1_submission'] <= '2015-12-31']\n", 267 | "late = df[df['assignment1_submission'] > '2015-12-31']" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": { 274 | "collapsed": false 275 | }, 276 | "outputs": [], 277 | "source": [ 278 | "early.mean()" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": { 285 | "collapsed": false 286 | }, 287 | "outputs": [], 288 | "source": [ 289 | "late.mean()" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": { 296 | "collapsed": false 297 | }, 298 | "outputs": [], 299 | "source": [ 300 | "from scipy import stats\n", 301 | "stats.ttest_ind?" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "metadata": { 308 | "collapsed": false 309 | }, 310 | "outputs": [], 311 | "source": [ 312 | "stats.ttest_ind(early['assignment1_grade'], late['assignment1_grade'])" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": null, 318 | "metadata": { 319 | "collapsed": false 320 | }, 321 | "outputs": [], 322 | "source": [ 323 | "stats.ttest_ind(early['assignment2_grade'], late['assignment2_grade'])" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": { 330 | "collapsed": false 331 | }, 332 | "outputs": [], 333 | "source": [ 334 | "stats.ttest_ind(early['assignment3_grade'], late['assignment3_grade'])" 335 | ] 336 | } 337 | ], 338 | "metadata": { 339 | "anaconda-cloud": {}, 340 | "kernelspec": { 341 | "display_name": "Python [Py35]", 342 | "language": "python", 343 | "name": "Python [Py35]" 344 | }, 345 | "language_info": { 346 | "codemirror_mode": { 347 | "name": "ipython", 348 | "version": 3 349 | }, 350 | "file_extension": ".py", 351 | "mimetype": "text/x-python", 352 | "name": "python", 353 | "nbconvert_exporter": "python", 354 | "pygments_lexer": "ipython3", 355 | "version": "3.5.2" 356 | } 357 | }, 358 | "nbformat": 4, 359 | "nbformat_minor": 0 360 | } 361 | -------------------------------------------------------------------------------- /ClassNotebooks/Assignment 4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "---\n", 8 | "\n", 9 | "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n", 10 | "\n", 11 | "---" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import pandas as pd\n", 23 | "import numpy as np\n", 24 | "from scipy.stats import ttest_ind" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "# Assignment 4 - Hypothesis Testing\n", 32 | "This assignment requires more individual learning than previous assignments - you are encouraged to check out the [pandas documentation](http://pandas.pydata.org/pandas-docs/stable/) to find functions or methods you might not have used yet, or ask questions on [Stack Overflow](http://stackoverflow.com/) and tag them as pandas and python related. And of course, the discussion forums are open for interaction with your peers and the course staff.\n", 33 | "\n", 34 | "Definitions:\n", 35 | "* A _quarter_ is a specific three month period, Q1 is January through March, Q2 is April through June, Q3 is July through September, Q4 is October through December.\n", 36 | "* A _recession_ is defined as starting with two consecutive quarters of GDP decline, and ending with two consecutive quarters of GDP growth.\n", 37 | "* A _recession bottom_ is the quarter within a recession which had the lowest GDP.\n", 38 | "* A _university town_ is a city which has a high percentage of university students compared to the total population of the city.\n", 39 | "\n", 40 | "**Hypothesis**: University towns have their mean housing prices less effected by recessions. Run a t-test to compare the ratio of the mean price of houses in university towns the quarter before the recession starts compared to the recession bottom. (`price_ratio=quarter_before_recession/recession_bottom`)\n", 41 | "\n", 42 | "The following data files are available for this assignment:\n", 43 | "* From the [Zillow research data site](http://www.zillow.com/research/data/) there is housing data for the United States. In particular the datafile for [all homes at a city level](http://files.zillowstatic.com/research/public/City/City_Zhvi_AllHomes.csv), ```City_Zhvi_AllHomes.csv```, has median home sale prices at a fine grained level.\n", 44 | "* From the Wikipedia page on college towns is a list of [university towns in the United States](https://en.wikipedia.org/wiki/List_of_college_towns#College_towns_in_the_United_States) which has been copy and pasted into the file ```university_towns.txt```.\n", 45 | "* From Bureau of Economic Analysis, US Department of Commerce, the [GDP over time](http://www.bea.gov/national/index.htm#gdp) of the United States in current dollars (use the chained value in 2009 dollars), in quarterly intervals, in the file ```gdplev.xls```. For this assignment, only look at GDP data from the first quarter of 2000 onward.\n", 46 | "\n", 47 | "Each function in this assignment below is worth 10%, with the exception of ```run_ttest()```, which is worth 50%." 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": { 54 | "collapsed": false 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "# Use this dictionary to map state names to two letter acronyms\n", 59 | "states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 'ND': 'North Dakota', 'VA': 'Virginia'}" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "collapsed": false 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "def get_list_of_university_towns():\n", 71 | " '''Returns a DataFrame of towns and the states they are in from the \n", 72 | " university_towns.txt list. The format of the DataFrame should be:\n", 73 | " DataFrame( [ [\"Michigan\",\"Ann Arbor\"], [\"Michigan\", \"Yipsilanti\"] ], \n", 74 | " columns=[\"State\",\"RegionName\"] )'''\n", 75 | " \n", 76 | " return \"ANSWER\"" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "collapsed": false 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "def get_recession_start():\n", 88 | " '''Returns the year and quarter of the recession start time as a \n", 89 | " string value in a format such as 2005q3'''\n", 90 | " \n", 91 | " return \"ANSWER\"" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "def get_recession_end():\n", 103 | " '''Returns the year and quarter of the recession end time as a \n", 104 | " string value in a format such as 2005q3'''\n", 105 | " \n", 106 | " return \"ANSWER\"" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "def get_recession_bottom():\n", 118 | " '''Returns the year and quarter of the recession bottom time as a \n", 119 | " string value in a format such as 2005q3'''\n", 120 | " \n", 121 | " return \"ANSWER\"" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "def convert_housing_data_to_quarters():\n", 133 | " '''Converts the housing data to quarters and returns it as mean \n", 134 | " values in a dataframe. This dataframe should be a dataframe with\n", 135 | " columns for 2000q1 through 2016q3, and should have a multi-index\n", 136 | " in the shape of [\"State\",\"RegionName\"].\n", 137 | " \n", 138 | " Note: Quarters are defined in the assignment description, they are\n", 139 | " not arbitrary three month periods.\n", 140 | " \n", 141 | " The resulting dataframe should have 67 columns, and 10,730 rows.\n", 142 | " '''\n", 143 | " \n", 144 | " return \"ANSWER\"" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "def run_ttest():\n", 156 | " '''First creates new data showing the decline or growth of housing prices\n", 157 | " between the recession start and the recession bottom. Then runs a ttest\n", 158 | " comparing the university town values to the non-university towns values, \n", 159 | " return whether the alternative hypothesis (that the two groups are the same)\n", 160 | " is true or not as well as the p-value of the confidence. \n", 161 | " \n", 162 | " Return the tuple (different, p, better) where different=True if the t-test is\n", 163 | " True at a p<0.01 (we reject the null hypothesis), or different=False if \n", 164 | " otherwise (we cannot reject the null hypothesis). The variable p should\n", 165 | " be equal to the exact p value returned from scipy.stats.ttest_ind(). The\n", 166 | " value for better should be either \"university town\" or \"non-university town\"\n", 167 | " depending on which has a lower mean price ratio (which is equivilent to a\n", 168 | " reduced market loss).'''\n", 169 | " \n", 170 | " return \"ANSWER\"" 171 | ] 172 | } 173 | ], 174 | "metadata": { 175 | "coursera": { 176 | "course_slug": "python-data-analysis", 177 | "graded_item_id": "Il9Fx", 178 | "launcher_item_id": "TeDW0", 179 | "part_id": "WGlun" 180 | }, 181 | "kernelspec": { 182 | "display_name": "Python 3", 183 | "language": "python", 184 | "name": "python3" 185 | }, 186 | "language_info": { 187 | "codemirror_mode": { 188 | "name": "ipython", 189 | "version": 3 190 | }, 191 | "file_extension": ".py", 192 | "mimetype": "text/x-python", 193 | "name": "python", 194 | "nbconvert_exporter": "python", 195 | "pygments_lexer": "ipython3", 196 | "version": "3.5.2" 197 | } 198 | }, 199 | "nbformat": 4, 200 | "nbformat_minor": 0 201 | } 202 | -------------------------------------------------------------------------------- /ClassNotebooks/Energy Indicators.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/ClassNotebooks/Energy Indicators.xls -------------------------------------------------------------------------------- /ClassNotebooks/ReadingAndWritingCSVfiles.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "text/plain": [ 13 | "[{'': '1',\n", 14 | " 'class': 'compact',\n", 15 | " 'cty': '18',\n", 16 | " 'cyl': '4',\n", 17 | " 'displ': '1.8',\n", 18 | " 'drv': 'f',\n", 19 | " 'fl': 'p',\n", 20 | " 'hwy': '29',\n", 21 | " 'manufacturer': 'audi',\n", 22 | " 'model': 'a4',\n", 23 | " 'trans': 'auto(l5)',\n", 24 | " 'year': '1999'},\n", 25 | " {'': '2',\n", 26 | " 'class': 'compact',\n", 27 | " 'cty': '21',\n", 28 | " 'cyl': '4',\n", 29 | " 'displ': '1.8',\n", 30 | " 'drv': 'f',\n", 31 | " 'fl': 'p',\n", 32 | " 'hwy': '29',\n", 33 | " 'manufacturer': 'audi',\n", 34 | " 'model': 'a4',\n", 35 | " 'trans': 'manual(m5)',\n", 36 | " 'year': '1999'},\n", 37 | " {'': '3',\n", 38 | " 'class': 'compact',\n", 39 | " 'cty': '20',\n", 40 | " 'cyl': '4',\n", 41 | " 'displ': '2',\n", 42 | " 'drv': 'f',\n", 43 | " 'fl': 'p',\n", 44 | " 'hwy': '31',\n", 45 | " 'manufacturer': 'audi',\n", 46 | " 'model': 'a4',\n", 47 | " 'trans': 'manual(m6)',\n", 48 | " 'year': '2008'}]" 49 | ] 50 | }, 51 | "execution_count": 1, 52 | "metadata": {}, 53 | "output_type": "execute_result" 54 | } 55 | ], 56 | "source": [ 57 | "import csv\n", 58 | "\n", 59 | "# set floatpoint precision for printing to 2\n", 60 | "%precision 2\n", 61 | "\n", 62 | "with open('mpg.csv') as csvfile:\n", 63 | " mpg = list(csv.DictReader(csvfile))\n", 64 | "\n", 65 | "# view first 3 elements of list created from csv file\n", 66 | "mpg[:3]" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "Here we can see each element of this list is a car in a dict form, the keys for the dict corresponds to a column in the csv file.\n", 74 | "\n", 75 | "Lets see how many dicts - or cars - we have in our dataset" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 2, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "234" 89 | ] 90 | }, 91 | "execution_count": 2, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": [ 97 | "len(mpg)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "We have a dictionary for each of the 234 cars in the dataset.\n", 105 | "\n", 106 | "To extract just the column names, or the keys in each of these dictionaries we can use the `keys()` function on the first element in the mpg dataset since we are assuming each element has the same keys or _row names_" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 9, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [ 116 | { 117 | "data": { 118 | "text/plain": [ 119 | "dict_keys(['', 'class', 'model', 'fl', 'cty', 'displ', 'drv', 'hwy', 'cyl', 'manufacturer', 'year', 'trans'])" 120 | ] 121 | }, 122 | "execution_count": 9, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "mpg[0].keys()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "Here we can see we have class, model, fuel type, cty, mpg, engine volume, front or rear wheel drive, highway mpg, manufacturer, model, year, and transmission type.\n", 136 | "\n", 137 | "**NOTE:** I happen to know this from working with the same __cars__ dataset in R. This can easily be found on google when looking at the values is not helping.\n", 138 | "\n", 139 | "Now let's try and find the average city MPG across all cars in our CSV file.\n", 140 | "To do this we want to sum the value for the 'cty' key for each car in the set and divide by the number of cars used (which we already know the set to contain 234 cars).\n", 141 | "\n", 142 | "So that should look something like: \n", 143 | "sum (each value of cty in dataset mpg) / size (mpg)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 12, 149 | "metadata": { 150 | "collapsed": false 151 | }, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/plain": [ 156 | "16.86" 157 | ] 158 | }, 159 | "execution_count": 12, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "sum(float(d['cty']) for d in mpg) / len(mpg)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "Now let's do the same thing for average hwy mpg across all cars in the dataset" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 13, 178 | "metadata": { 179 | "collapsed": false 180 | }, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": [ 185 | "23.44" 186 | ] 187 | }, 188 | "execution_count": 13, 189 | "metadata": {}, 190 | "output_type": "execute_result" 191 | } 192 | ], 193 | "source": [ 194 | "sum(float(d['hwy']) for d in mpg) / len(mpg)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "Note that the average mpg for highway is significantly better than for city. This makes sense, as anyone who drives knows they get much better gas milage on the highway that they do in the city." 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": { 208 | "collapsed": true 209 | }, 210 | "outputs": [], 211 | "source": [] 212 | } 213 | ], 214 | "metadata": { 215 | "anaconda-cloud": {}, 216 | "kernelspec": { 217 | "display_name": "Python [Py35]", 218 | "language": "python", 219 | "name": "Python [Py35]" 220 | }, 221 | "language_info": { 222 | "codemirror_mode": { 223 | "name": "ipython", 224 | "version": 3 225 | }, 226 | "file_extension": ".py", 227 | "mimetype": "text/x-python", 228 | "name": "python", 229 | "nbconvert_exporter": "python", 230 | "pygments_lexer": "ipython3", 231 | "version": "3.5.2" 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 0 236 | } 237 | -------------------------------------------------------------------------------- /ClassNotebooks/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 1 6 | } 7 | -------------------------------------------------------------------------------- /ClassNotebooks/Week 4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "---\n", 8 | "\n", 9 | "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n", 10 | "\n", 11 | "---" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Distributions in Pandas" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [ 28 | { 29 | "ename": "ImportError", 30 | "evalue": "No module named 'pandas'", 31 | "output_type": "error", 32 | "traceback": [ 33 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 34 | "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", 35 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 36 | "\u001b[0;31mImportError\u001b[0m: No module named 'pandas'" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "import pandas as pd\n", 42 | "import numpy as np" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "np.random.binomial(1, 0.5)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "collapsed": false 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "np.random.binomial(1000, 0.5)/1000" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "chance_of_tornado = 0.01/100\n", 76 | "np.random.binomial(100000, chance_of_tornado)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "collapsed": false 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "chance_of_tornado = 0.01\n", 88 | "\n", 89 | "tornado_events = np.random.binomial(1, chance_of_tornado, 1000000)\n", 90 | " \n", 91 | "two_days_in_a_row = 0\n", 92 | "for j in range(1,len(tornado_events)-1):\n", 93 | " if tornado_events[j]==1 and tornado_events[j-1]==1:\n", 94 | " two_days_in_a_row+=1\n", 95 | "\n", 96 | "print('{} tornadoes back to back in {} years'.format(two_days_in_a_row, 1000000/365))" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": false 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "np.random.uniform(0, 1)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "np.random.normal(0.75)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "Formula for standard deviation\n", 126 | "$$\\sqrt{\\frac{1}{N} \\sum_{i=1}^N (x_i - \\overline{x})^2}$$" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "collapsed": false 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "distribution = np.random.normal(0.75,size=1000)\n", 138 | "\n", 139 | "np.sqrt(np.sum((np.mean(distribution)-distribution)**2)/len(distribution))" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "collapsed": false, 147 | "scrolled": true 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "np.std(distribution)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "collapsed": false 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "import scipy.stats as stats\n", 163 | "stats.kurtosis(distribution)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": { 170 | "collapsed": false 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "stats.skew(distribution)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "chi_squared_df2 = np.random.chisquare(2, size=10000)\n", 186 | "stats.skew(chi_squared_df2)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": { 193 | "collapsed": false 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "chi_squared_df5 = np.random.chisquare(5, size=10000)\n", 198 | "stats.skew(chi_squared_df5)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": { 205 | "collapsed": false 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "%matplotlib inline\n", 210 | "import matplotlib\n", 211 | "import matplotlib.pyplot as plt\n", 212 | "\n", 213 | "output = plt.hist([chi_squared_df2,chi_squared_df5], bins=50, histtype='step', \n", 214 | " label=['2 degrees of freedom','5 degrees of freedom'])\n", 215 | "plt.legend(loc='upper right')\n" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "# Hypothesis Testing" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "collapsed": false 230 | }, 231 | "outputs": [], 232 | "source": [ 233 | "df = pd.read_csv('grades.csv')" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": false 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "df.head()" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": { 251 | "collapsed": false 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "len(df)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "collapsed": false 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "early = df[df['assignment1_submission'] <= '2015-12-31']\n", 267 | "late = df[df['assignment1_submission'] > '2015-12-31']" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": { 274 | "collapsed": false 275 | }, 276 | "outputs": [], 277 | "source": [ 278 | "early.mean()" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": { 285 | "collapsed": false 286 | }, 287 | "outputs": [], 288 | "source": [ 289 | "late.mean()" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": { 296 | "collapsed": false 297 | }, 298 | "outputs": [], 299 | "source": [ 300 | "from scipy import stats\n", 301 | "stats.ttest_ind?" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "metadata": { 308 | "collapsed": false 309 | }, 310 | "outputs": [], 311 | "source": [ 312 | "stats.ttest_ind(early['assignment1_grade'], late['assignment1_grade'])" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": null, 318 | "metadata": { 319 | "collapsed": false 320 | }, 321 | "outputs": [], 322 | "source": [ 323 | "stats.ttest_ind(early['assignment2_grade'], late['assignment2_grade'])" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": { 330 | "collapsed": false 331 | }, 332 | "outputs": [], 333 | "source": [ 334 | "stats.ttest_ind(early['assignment3_grade'], late['assignment3_grade'])" 335 | ] 336 | } 337 | ], 338 | "metadata": { 339 | "anaconda-cloud": {}, 340 | "kernelspec": { 341 | "display_name": "Python [Py35]", 342 | "language": "python", 343 | "name": "Python [Py35]" 344 | }, 345 | "language_info": { 346 | "codemirror_mode": { 347 | "name": "ipython", 348 | "version": 3 349 | }, 350 | "file_extension": ".py", 351 | "mimetype": "text/x-python", 352 | "name": "python", 353 | "nbconvert_exporter": "python", 354 | "pygments_lexer": "ipython3", 355 | "version": "3.5.2" 356 | } 357 | }, 358 | "nbformat": 4, 359 | "nbformat_minor": 0 360 | } 361 | -------------------------------------------------------------------------------- /ClassNotebooks/cars.csv: -------------------------------------------------------------------------------- 1 | YEAR,Make,Model,Size,(kW),Unnamed: 5,TYPE,CITY (kWh/100 km),HWY (kWh/100 km),COMB (kWh/100 km),CITY (Le/100 km),HWY (Le/100 km),COMB (Le/100 km),(g/km),RATING,(km),TIME (h) 2 | 2012,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7 3 | 2012,NISSAN,LEAF,MID-SIZE,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,0,n/a,117,7 4 | 2013,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4 5 | 2013,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7 6 | 2013,NISSAN,LEAF,MID-SIZE,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,0,n/a,117,7 7 | 2013,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 8 | 2013,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 9 | 2013,TESLA,MODEL S (40 kWh battery),FULL-SIZE,270,A1,B,22.4,21.9,22.2,2.5,2.5,2.5,0,n/a,224,6 10 | 2013,TESLA,MODEL S (60 kWh battery),FULL-SIZE,270,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10 11 | 2013,TESLA,MODEL S (85 kWh battery),FULL-SIZE,270,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 12 | 2013,TESLA,MODEL S PERFORMANCE,FULL-SIZE,310,A1,B,23.9,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 13 | 2014,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,n/a,131,7 14 | 2014,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4 15 | 2014,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7 16 | 2014,NISSAN,LEAF,MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,n/a,135,5 17 | 2014,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 18 | 2014,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 19 | 2014,TESLA,MODEL S (60 kWh battery),FULL-SIZE,225,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10 20 | 2014,TESLA,MODEL S (85 kWh battery),FULL-SIZE,270,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 21 | 2014,TESLA,MODEL S PERFORMANCE,FULL-SIZE,310,A1,B,23.9,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 22 | 2015,BMW,i3,SUBCOMPACT,125,A1,B,15.2,18.8,16.8,1.7,2.1,1.9,0,n/a,130,4 23 | 2015,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,n/a,131,7 24 | 2015,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4 25 | 2015,KIA,SOUL EV,STATION WAGON - SMALL,81,A1,B,17.5,22.7,19.9,2.0,2.6,2.2,0,n/a,149,4 26 | 2015,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7 27 | 2015,NISSAN,LEAF,MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,n/a,135,5 28 | 2015,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 29 | 2015,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 30 | 2015,TESLA,MODEL S (60 kWh battery),FULL-SIZE,283,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10 31 | 2015,TESLA,MODEL S (70 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,377,12 32 | 2015,TESLA,MODEL S (85/90 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 33 | 2015,TESLA,MODEL S 70D,FULL-SIZE,280,A1,B,20.8,20.6,20.7,2.3,2.3,2.3,0,n/a,386,12 34 | 2015,TESLA,MODEL S 85D/90D,FULL-SIZE,280,A1,B,22.0,19.8,21.0,2.5,2.2,2.4,0,n/a,435,12 35 | 2015,TESLA,MODEL S P85D/P90D,FULL-SIZE,515,A1,B,23.4,21.5,22.5,2.6,2.4,2.5,0,n/a,407,12 36 | 2016,BMW,i3,SUBCOMPACT,125,A1,B,15.2,18.8,16.8,1.7,2.1,1.9,0,10,130,4 37 | 2016,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,10,131,7 38 | 2016,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,10,122,4 39 | 2016,KIA,SOUL EV,STATION WAGON - SMALL,81,A1,B,17.5,22.7,19.9,2.0,2.6,2.2,0,10,149,4 40 | 2016,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,10,100,7 41 | 2016,NISSAN,LEAF (24 kWh battery),MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,10,135,5 42 | 2016,NISSAN,LEAF (30 kWh battery),MID-SIZE,80,A1,B,17.0,20.7,18.6,1.9,2.3,2.1,0,10,172,6 43 | 2016,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,10,109,8 44 | 2016,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,10,109,8 45 | 2016,TESLA,MODEL S (60 kWh battery),FULL-SIZE,283,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,10,335,10 46 | 2016,TESLA,MODEL S (70 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,10,377,12 47 | 2016,TESLA,MODEL S (85/90 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,10,426,12 48 | 2016,TESLA,MODEL S 70D,FULL-SIZE,386,A1,B,20.8,20.6,20.7,2.3,2.3,2.3,0,10,386,12 49 | 2016,TESLA,MODEL S 85D/90D,FULL-SIZE,386,A1,B,22.0,19.8,21.0,2.5,2.2,2.4,0,10,435,12 50 | 2016,TESLA,MODEL S 90D (Refresh),FULL-SIZE,386,A1,B,20.8,19.7,20.3,2.3,2.2,2.3,0,10,473,12 51 | 2016,TESLA,MODEL S P85D/P90D,FULL-SIZE,568,A1,B,23.4,21.5,22.5,2.6,2.4,2.5,0,10,407,12 52 | 2016,TESLA,MODEL S P90D (Refresh),FULL-SIZE,568,A1,B,22.9,21.0,22.1,2.6,2.4,2.5,0,10,435,12 53 | 2016,TESLA,MODEL X 90D,SUV - STANDARD,386,A1,B,23.2,22.2,22.7,2.6,2.5,2.6,0,10,414,12 54 | 2016,TESLA,MODEL X P90D,SUV - STANDARD,568,A1,B,23.6,23.3,23.5,2.7,2.6,2.6,0,10,402,12 55 | -------------------------------------------------------------------------------- /ClassNotebooks/gdplev.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/ClassNotebooks/gdplev.xls -------------------------------------------------------------------------------- /ClassNotebooks/log.csv: -------------------------------------------------------------------------------- 1 | time,user,video,playback position,paused,volume 2 | 1469974424,cheryl,intro.html,5,FALSE,10 3 | 1469974454,cheryl,intro.html,6,, 4 | 1469974544,cheryl,intro.html,9,, 5 | 1469974574,cheryl,intro.html,10,, 6 | 1469977514,bob,intro.html,1,, 7 | 1469977544,bob,intro.html,1,, 8 | 1469977574,bob,intro.html,1,, 9 | 1469977604,bob,intro.html,1,, 10 | 1469974604,cheryl,intro.html,11,, 11 | 1469974694,cheryl,intro.html,14,, 12 | 1469974724,cheryl,intro.html,15,, 13 | 1469974454,sue,advanced.html,24,, 14 | 1469974524,sue,advanced.html,25,, 15 | 1469974424,sue,advanced.html,23,FALSE,10 16 | 1469974554,sue,advanced.html,26,, 17 | 1469974624,sue,advanced.html,27,, 18 | 1469974654,sue,advanced.html,28,,5 19 | 1469974724,sue,advanced.html,29,, 20 | 1469974484,cheryl,intro.html,7,, 21 | 1469974514,cheryl,intro.html,8,, 22 | 1469974754,sue,advanced.html,30,, 23 | 1469974824,sue,advanced.html,31,, 24 | 1469974854,sue,advanced.html,32,, 25 | 1469974924,sue,advanced.html,33,, 26 | 1469977424,bob,intro.html,1,TRUE,10 27 | 1469977454,bob,intro.html,1,, 28 | 1469977484,bob,intro.html,1,, 29 | 1469977634,bob,intro.html,1,, 30 | 1469977664,bob,intro.html,1,, 31 | 1469974634,cheryl,intro.html,12,, 32 | 1469974664,cheryl,intro.html,13,, 33 | 1469977694,bob,intro.html,1,, 34 | 1469977724,bob,intro.html,1,, 35 | -------------------------------------------------------------------------------- /ClassNotebooks/log.txt: -------------------------------------------------------------------------------- 1 | time,user,video,playback position,paused,volume 2 | 1469974424,cheryl,intro.html,5,FALSE,10 3 | 1469974454,cheryl,intro.html,6,, 4 | 1469974544,cheryl,intro.html,9,, 5 | 1469974574,cheryl,intro.html,10,, 6 | 1469977514,bob,intro.html,1,, 7 | 1469977544,bob,intro.html,1,, 8 | 1469977574,bob,intro.html,1,, 9 | 1469977604,bob,intro.html,1,, 10 | 1469974604,cheryl,intro.html,11,, 11 | 1469974694,cheryl,intro.html,14,, 12 | 1469974724,cheryl,intro.html,15,, 13 | 1469974454,sue,advanced.html,24,, 14 | 1469974524,sue,advanced.html,25,, 15 | 1469974424,sue,advanced.html,23,FALSE,10 16 | 1469974554,sue,advanced.html,26,, 17 | 1469974624,sue,advanced.html,27,, 18 | 1469974654,sue,advanced.html,28,,5 19 | 1469974724,sue,advanced.html,29,, 20 | 1469974484,cheryl,intro.html,7,, 21 | 1469974514,cheryl,intro.html,8,, 22 | 1469974754,sue,advanced.html,30,, 23 | 1469974824,sue,advanced.html,31,, 24 | 1469974854,sue,advanced.html,32,, 25 | 1469974924,sue,advanced.html,33,, 26 | 1469977424,bob,intro.html,1,TRUE,10 27 | 1469977454,bob,intro.html,1,, 28 | 1469977484,bob,intro.html,1,, 29 | 1469977634,bob,intro.html,1,, 30 | 1469977664,bob,intro.html,1,, 31 | 1469974634,cheryl,intro.html,12,, 32 | 1469974664,cheryl,intro.html,13,, 33 | 1469977694,bob,intro.html,1,, 34 | 1469977724,bob,intro.html,1,, 35 | -------------------------------------------------------------------------------- /ClassNotebooks/olympics.csv: -------------------------------------------------------------------------------- 1 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 2 | ,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total 3 | Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2 4 | Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15 5 | Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70 6 | Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12 7 | Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12 8 | Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480 9 | Austria (AUT),26,18,33,35,86,22,59,78,81,218,48,77,111,116,304 10 | Azerbaijan (AZE),5,6,5,15,26,5,0,0,0,0,10,6,5,15,26 11 | Bahamas (BAH),15,5,2,5,12,0,0,0,0,0,15,5,2,5,12 12 | Bahrain (BRN),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1 13 | Barbados (BAR) [BAR],11,0,0,1,1,0,0,0,0,0,11,0,0,1,1 14 | Belarus (BLR),5,12,24,39,75,6,6,4,5,15,11,18,28,44,90 15 | Belgium (BEL),25,37,52,53,142,20,1,1,3,5,45,38,53,56,147 16 | Bermuda (BER),17,0,0,1,1,7,0,0,0,0,24,0,0,1,1 17 | Bohemia (BOH) [BOH] [Z],3,0,1,3,4,0,0,0,0,0,3,0,1,3,4 18 | Botswana (BOT),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1 19 | Brazil (BRA),21,23,30,55,108,7,0,0,0,0,28,23,30,55,108 20 | British West Indies (BWI) [BWI],1,0,0,2,2,0,0,0,0,0,1,0,0,2,2 21 | Bulgaria (BUL) [H],19,51,85,78,214,19,1,2,3,6,38,52,87,81,220 22 | Burundi (BDI),5,1,0,0,1,0,0,0,0,0,5,1,0,0,1 23 | Cameroon (CMR),13,3,1,1,5,1,0,0,0,0,14,3,1,1,5 24 | Canada (CAN),25,59,99,121,279,22,62,56,52,170,47,121,155,173,449 25 | Chile (CHI) [I],22,2,7,4,13,16,0,0,0,0,38,2,7,4,13 26 | China (CHN) [CHN],9,201,146,126,473,10,12,22,19,53,19,213,168,145,526 27 | Colombia (COL),18,2,6,11,19,1,0,0,0,0,19,2,6,11,19 28 | Costa Rica (CRC),14,1,1,2,4,6,0,0,0,0,20,1,1,2,4 29 | Ivory Coast (CIV) [CIV],12,0,1,0,1,0,0,0,0,0,12,0,1,0,1 30 | Croatia (CRO),6,6,7,10,23,7,4,6,1,11,13,10,13,11,34 31 | Cuba (CUB) [Z],19,72,67,70,209,0,0,0,0,0,19,72,67,70,209 32 | Cyprus (CYP),9,0,1,0,1,10,0,0,0,0,19,0,1,0,1 33 | Czech Republic (CZE) [CZE],5,14,15,15,44,6,7,9,8,24,11,21,24,23,68 34 | Czechoslovakia (TCH) [TCH],16,49,49,45,143,16,2,8,15,25,32,51,57,60,168 35 | Denmark (DEN) [Z],26,43,68,68,179,13,0,1,0,1,39,43,69,68,180 36 | Djibouti (DJI) [B],7,0,0,1,1,0,0,0,0,0,7,0,0,1,1 37 | Dominican Republic (DOM),13,3,2,1,6,0,0,0,0,0,13,3,2,1,6 38 | Ecuador (ECU),13,1,1,0,2,0,0,0,0,0,13,1,1,0,2 39 | Egypt (EGY) [EGY] [Z],21,7,9,10,26,1,0,0,0,0,22,7,9,10,26 40 | Eritrea (ERI),4,0,0,1,1,0,0,0,0,0,4,0,0,1,1 41 | Estonia (EST),11,9,9,15,33,9,4,2,1,7,20,13,11,16,40 42 | Ethiopia (ETH),12,21,7,17,45,2,0,0,0,0,14,21,7,17,45 43 | Finland (FIN),24,101,84,117,302,22,42,62,57,161,46,143,146,174,463 44 | France (FRA) [O] [P] [Z],27,202,223,246,671,22,31,31,47,109,49,233,254,293,780 45 | Gabon (GAB),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1 46 | Georgia (GEO),5,6,5,14,25,6,0,0,0,0,11,6,5,14,25 47 | Germany (GER) [GER] [Z],15,174,182,217,573,11,78,78,53,209,26,252,260,270,782 48 | United Team of Germany (EUA) [EUA],3,28,54,36,118,3,8,6,5,19,6,36,60,41,137 49 | East Germany (GDR) [GDR],5,153,129,127,409,6,39,36,35,110,11,192,165,162,519 50 | West Germany (FRG) [FRG],5,56,67,81,204,6,11,15,13,39,11,67,82,94,243 51 | Ghana (GHA) [GHA],13,0,1,3,4,1,0,0,0,0,14,0,1,3,4 52 | Great Britain (GBR) [GBR] [Z],27,236,272,272,780,22,10,4,12,26,49,246,276,284,806 53 | Greece (GRE) [Z],27,30,42,39,111,18,0,0,0,0,45,30,42,39,111 54 | Grenada (GRN),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1 55 | Guatemala (GUA),13,0,1,0,1,1,0,0,0,0,14,0,1,0,1 56 | Guyana (GUY) [GUY],16,0,0,1,1,0,0,0,0,0,16,0,0,1,1 57 | Haiti (HAI) [J],14,0,1,1,2,0,0,0,0,0,14,0,1,1,2 58 | Hong Kong (HKG) [HKG],15,1,1,1,3,4,0,0,0,0,19,1,1,1,3 59 | Hungary (HUN),25,167,144,165,476,22,0,2,4,6,47,167,146,169,482 60 | Iceland (ISL),19,0,2,2,4,17,0,0,0,0,36,0,2,2,4 61 | India (IND) [F],23,9,6,11,26,9,0,0,0,0,32,9,6,11,26 62 | Indonesia (INA),14,6,10,11,27,0,0,0,0,0,14,6,10,11,27 63 | Iran (IRI) [K],15,15,20,25,60,10,0,0,0,0,25,15,20,25,60 64 | Iraq (IRQ),13,0,0,1,1,0,0,0,0,0,13,0,0,1,1 65 | Ireland (IRL),20,9,8,12,29,6,0,0,0,0,26,9,8,12,29 66 | Israel (ISR),15,1,1,5,7,6,0,0,0,0,21,1,1,5,7 67 | Italy (ITA) [M] [S],26,198,166,185,549,22,37,34,43,114,48,235,200,228,663 68 | Jamaica (JAM) [JAM],16,17,30,20,67,7,0,0,0,0,23,17,30,20,67 69 | Japan (JPN),21,130,126,142,398,20,10,17,18,45,41,140,143,160,443 70 | Kazakhstan (KAZ),5,16,17,19,52,6,1,3,3,7,11,17,20,22,59 71 | Kenya (KEN),13,25,32,29,86,3,0,0,0,0,16,25,32,29,86 72 | North Korea (PRK),9,14,12,21,47,8,0,1,1,2,17,14,13,22,49 73 | South Korea (KOR),16,81,82,80,243,17,26,17,10,53,33,107,99,90,296 74 | Kuwait (KUW),12,0,0,2,2,0,0,0,0,0,12,0,0,2,2 75 | Kyrgyzstan (KGZ),5,0,1,2,3,6,0,0,0,0,11,0,1,2,3 76 | Latvia (LAT),10,3,11,5,19,10,0,4,3,7,20,3,15,8,26 77 | Lebanon (LIB),16,0,2,2,4,16,0,0,0,0,32,0,2,2,4 78 | Liechtenstein (LIE),16,0,0,0,0,18,2,2,5,9,34,2,2,5,9 79 | Lithuania (LTU),8,6,5,10,21,8,0,0,0,0,16,6,5,10,21 80 | Luxembourg (LUX) [O],22,1,1,0,2,8,0,2,0,2,30,1,3,0,4 81 | Macedonia (MKD),5,0,0,1,1,5,0,0,0,0,10,0,0,1,1 82 | Malaysia (MAS) [MAS],12,0,3,3,6,0,0,0,0,0,12,0,3,3,6 83 | Mauritius (MRI),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1 84 | Mexico (MEX),22,13,21,28,62,8,0,0,0,0,30,13,21,28,62 85 | Moldova (MDA),5,0,2,5,7,6,0,0,0,0,11,0,2,5,7 86 | Mongolia (MGL),12,2,9,13,24,13,0,0,0,0,25,2,9,13,24 87 | Montenegro (MNE),2,0,1,0,1,2,0,0,0,0,4,0,1,0,1 88 | Morocco (MAR),13,6,5,11,22,6,0,0,0,0,19,6,5,11,22 89 | Mozambique (MOZ),9,1,0,1,2,0,0,0,0,0,9,1,0,1,2 90 | Namibia (NAM),6,0,4,0,4,0,0,0,0,0,6,0,4,0,4 91 | Netherlands (NED) [Z],25,77,85,104,266,20,37,38,35,110,45,114,123,139,376 92 | Netherlands Antilles (AHO) [AHO] [I],13,0,1,0,1,2,0,0,0,0,15,0,1,0,1 93 | New Zealand (NZL) [NZL],22,42,18,39,99,15,0,1,0,1,37,42,19,39,100 94 | Niger (NIG),11,0,0,1,1,0,0,0,0,0,11,0,0,1,1 95 | Nigeria (NGR),15,3,8,12,23,0,0,0,0,0,15,3,8,12,23 96 | Norway (NOR) [Q],24,56,49,43,148,22,118,111,100,329,46,174,160,143,477 97 | Pakistan (PAK),16,3,3,4,10,2,0,0,0,0,18,3,3,4,10 98 | Panama (PAN),16,1,0,2,3,0,0,0,0,0,16,1,0,2,3 99 | Paraguay (PAR),11,0,1,0,1,1,0,0,0,0,12,0,1,0,1 100 | Peru (PER) [L],17,1,3,0,4,2,0,0,0,0,19,1,3,0,4 101 | Philippines (PHI),20,0,2,7,9,4,0,0,0,0,24,0,2,7,9 102 | Poland (POL),20,64,82,125,271,22,6,7,7,20,42,70,89,132,291 103 | Portugal (POR),23,4,8,11,23,7,0,0,0,0,30,4,8,11,23 104 | Puerto Rico (PUR),17,0,2,6,8,6,0,0,0,0,23,0,2,6,8 105 | Qatar (QAT),8,0,0,4,4,0,0,0,0,0,8,0,0,4,4 106 | Romania (ROU),20,88,94,119,301,20,0,0,1,1,40,88,94,120,302 107 | Russia (RUS) [RUS],5,132,121,142,395,6,49,40,35,124,11,181,161,177,519 108 | Russian Empire (RU1) [RU1],3,1,4,3,8,0,0,0,0,0,3,1,4,3,8 109 | Soviet Union (URS) [URS],9,395,319,296,1010,9,78,57,59,194,18,473,376,355,1204 110 | Unified Team (EUN) [EUN],1,45,38,29,112,1,9,6,8,23,2,54,44,37,135 111 | Saudi Arabia (KSA),10,0,1,2,3,0,0,0,0,0,10,0,1,2,3 112 | Senegal (SEN),13,0,1,0,1,5,0,0,0,0,18,0,1,0,1 113 | Serbia (SRB) [SRB],3,1,2,4,7,2,0,0,0,0,5,1,2,4,7 114 | Serbia and Montenegro (SCG) [SCG],3,2,4,3,9,3,0,0,0,0,6,2,4,3,9 115 | Singapore (SIN),15,0,2,2,4,0,0,0,0,0,15,0,2,2,4 116 | Slovakia (SVK) [SVK],5,7,9,8,24,6,2,2,1,5,11,9,11,9,29 117 | Slovenia (SLO),6,4,6,9,19,7,2,4,9,15,13,6,10,18,34 118 | South Africa (RSA),18,23,26,27,76,6,0,0,0,0,24,23,26,27,76 119 | Spain (ESP) [Z],22,37,59,35,131,19,1,0,1,2,41,38,59,36,133 120 | Sri Lanka (SRI) [SRI],16,0,2,0,2,0,0,0,0,0,16,0,2,0,2 121 | Sudan (SUD),11,0,1,0,1,0,0,0,0,0,11,0,1,0,1 122 | Suriname (SUR) [E],11,1,0,1,2,0,0,0,0,0,11,1,0,1,2 123 | Sweden (SWE) [Z],26,143,164,176,483,22,50,40,54,144,48,193,204,230,627 124 | Switzerland (SUI),27,47,73,65,185,22,50,40,48,138,49,97,113,113,323 125 | Syria (SYR),12,1,1,1,3,0,0,0,0,0,12,1,1,1,3 126 | Chinese Taipei (TPE) [TPE] [TPE2],13,2,7,12,21,11,0,0,0,0,24,2,7,12,21 127 | Tajikistan (TJK),5,0,1,2,3,4,0,0,0,0,9,0,1,2,3 128 | Tanzania (TAN) [TAN],12,0,2,0,2,0,0,0,0,0,12,0,2,0,2 129 | Thailand (THA),15,7,6,11,24,3,0,0,0,0,18,7,6,11,24 130 | Togo (TOG),9,0,0,1,1,1,0,0,0,0,10,0,0,1,1 131 | Tonga (TGA),8,0,1,0,1,1,0,0,0,0,9,0,1,0,1 132 | Trinidad and Tobago (TRI) [TRI],16,2,5,11,18,3,0,0,0,0,19,2,5,11,18 133 | Tunisia (TUN),13,3,3,4,10,0,0,0,0,0,13,3,3,4,10 134 | Turkey (TUR),21,39,25,24,88,16,0,0,0,0,37,39,25,24,88 135 | Uganda (UGA),14,2,3,2,7,0,0,0,0,0,14,2,3,2,7 136 | Ukraine (UKR),5,33,27,55,115,6,2,1,4,7,11,35,28,59,122 137 | United Arab Emirates (UAE),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1 138 | United States (USA) [P] [Q] [R] [Z],26,976,757,666,2399,22,96,102,84,282,48,1072,859,750,2681 139 | Uruguay (URU),20,2,2,6,10,1,0,0,0,0,21,2,2,6,10 140 | Uzbekistan (UZB),5,5,5,10,20,6,1,0,0,1,11,6,5,10,21 141 | Venezuela (VEN),17,2,2,8,12,4,0,0,0,0,21,2,2,8,12 142 | Vietnam (VIE),14,0,2,0,2,0,0,0,0,0,14,0,2,0,2 143 | Virgin Islands (ISV),11,0,1,0,1,7,0,0,0,0,18,0,1,0,1 144 | Yugoslavia (YUG) [YUG],16,26,29,28,83,14,0,3,1,4,30,26,32,29,87 145 | Independent Olympic Participants (IOP) [IOP],1,0,1,2,3,0,0,0,0,0,1,0,1,2,3 146 | Zambia (ZAM) [ZAM],12,0,1,1,2,0,0,0,0,0,12,0,1,1,2 147 | Zimbabwe (ZIM) [ZIM],12,3,4,1,8,1,0,0,0,0,13,3,4,1,8 148 | Mixed team (ZZX) [ZZX],3,8,5,4,17,0,0,0,0,0,3,8,5,4,17 149 | Totals,27,4809,4775,5130,14714,22,959,958,948,2865,49,5768,5733,6078,17579 150 | -------------------------------------------------------------------------------- /ClassNotebooks/requirements.txt: -------------------------------------------------------------------------------- 1 | alabaster==0.7.9 2 | anaconda-clean==1.0 3 | anaconda-client==1.5.1 4 | anaconda-navigator==1.3.1 5 | appnope==0.1.0 6 | appscript==1.0.1 7 | argcomplete==1.0.0 8 | astroid==1.4.7 9 | astropy==1.2.1 10 | Babel==2.3.4 11 | backports.shutil-get-terminal-size==1.0.0 12 | beautifulsoup4==4.5.1 13 | bitarray==0.8.1 14 | blaze==0.10.1 15 | bokeh==0.12.2 16 | boto==2.42.0 17 | Bottleneck==1.1.0 18 | cffi==1.7.0 19 | chest==0.2.3 20 | click==6.6 21 | cloudpickle==0.2.1 22 | clyent==1.2.2 23 | colorama==0.3.7 24 | configobj==5.0.6 25 | contextlib2==0.5.3 26 | cryptography==1.5 27 | cycler==0.10.0 28 | Cython==0.24.1 29 | cytoolz==0.8.0 30 | dask==0.11.0 31 | datashape==0.5.2 32 | decorator==4.0.10 33 | dill==0.2.5 34 | docutils==0.12 35 | dynd==0.7.3.dev1 36 | et-xmlfile==1.0.1 37 | fastcache==1.0.2 38 | filelock==2.0.6 39 | Flask==0.11.1 40 | Flask-Cors==2.1.2 41 | gevent==1.1.2 42 | greenlet==0.4.10 43 | h5py==2.6.0 44 | HeapDict==1.0.0 45 | idna==2.1 46 | imagesize==0.7.1 47 | ipykernel==4.5.0 48 | ipython==5.1.0 49 | ipython-genutils==0.1.0 50 | ipywidgets==5.2.2 51 | itsdangerous==0.24 52 | jdcal==1.2 53 | jedi==0.9.0 54 | Jinja2==2.8 55 | jsonschema==2.5.1 56 | jupyter==1.0.0 57 | jupyter-client==4.4.0 58 | jupyter-console==5.0.0 59 | jupyter-core==4.2.0 60 | lazy-object-proxy==1.2.1 61 | llvmlite==0.13.0 62 | locket==0.2.0 63 | lxml==3.6.4 64 | MarkupSafe==0.23 65 | matplotlib==1.5.3 66 | mistune==0.7.3 67 | mpmath==0.19 68 | multipledispatch==0.4.8 69 | nb-anacondacloud==1.2.0 70 | nb-conda==2.0.0 71 | nb-conda-kernels==2.0.0 72 | nbconvert==4.2.0 73 | nbformat==4.1.0 74 | nbpresent==3.0.2 75 | networkx==1.11 76 | nltk==3.2.1 77 | nose==1.3.7 78 | notebook==4.2.3 79 | numba==0.28.1 80 | numexpr==2.6.1 81 | numpy==1.11.1 82 | odo==0.5.0 83 | openpyxl==2.3.2 84 | pandas==0.18.1 85 | partd==0.3.6 86 | path.py==0.0.0 87 | pathlib2==2.1.0 88 | patsy==0.4.1 89 | pep8==1.7.0 90 | pexpect==4.0.1 91 | pickleshare==0.7.4 92 | Pillow==3.3.1 93 | pkginfo==1.3.2 94 | ply==3.9 95 | prompt-toolkit==1.0.3 96 | psutil==4.3.1 97 | ptyprocess==0.5.1 98 | py==1.4.31 99 | pyasn1==0.1.9 100 | pycosat==0.6.1 101 | pycparser==2.14 102 | pycrypto==2.6.1 103 | pycurl==7.43.0 104 | pyflakes==1.3.0 105 | Pygments==2.1.3 106 | pylint==1.5.4 107 | pyOpenSSL==16.0.0 108 | pyparsing==2.1.4 109 | pytest==2.9.2 110 | python-dateutil==2.5.3 111 | pytz==2016.6.1 112 | PyYAML==3.12 113 | pyzmq==15.4.0 114 | QtAwesome==0.3.3 115 | qtconsole==4.2.1 116 | QtPy==1.1.2 117 | redis==2.10.5 118 | requests==2.11.1 119 | rope-py3k==0.9.4.post1 120 | scikit-image==0.12.3 121 | scikit-learn==0.17.1 122 | scipy==0.18.1 123 | simplegeneric==0.8.1 124 | singledispatch==3.4.0.3 125 | six==1.10.0 126 | snowballstemmer==1.2.1 127 | sockjs-tornado==1.0.3 128 | Sphinx==1.4.6 129 | spyder==3.0.0 130 | SQLAlchemy==1.0.13 131 | statsmodels==0.6.1 132 | sympy==1.0 133 | tables==3.2.3.1 134 | terminado==0.6 135 | toolz==0.8.0 136 | tornado==4.4.1 137 | traitlets==4.3.0 138 | unicodecsv==0.14.1 139 | wcwidth==0.1.7 140 | Werkzeug==0.11.11 141 | widgetsnbextension==1.2.6 142 | wrapt==1.10.6 143 | xlrd==1.0.0 144 | XlsxWriter==0.9.3 145 | xlwings==0.10.0 146 | xlwt==1.1.2 147 | -------------------------------------------------------------------------------- /ClassNotebooks/scimagojr-3.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/ClassNotebooks/scimagojr-3.xlsx -------------------------------------------------------------------------------- /MyNotebooks/.ipynb_checkpoints/Python-Dates-and-Times-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python Dates and Times" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Dates and times can be stored in many different ways. \n", 15 | "The offset from the **Epoch** is one of the most common nethods for storing dates and time. \n", 16 | "The Epoch is January 1, 1970. The measurements is usually the numer of miliseconds since this date. " 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "In Python you can get the current time since the epoch using the time module." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "import time as tm" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": { 41 | "collapsed": false 42 | }, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/plain": [ 47 | "1478139144.00778" 48 | ] 49 | }, 50 | "execution_count": 2, 51 | "metadata": {}, 52 | "output_type": "execute_result" 53 | } 54 | ], 55 | "source": [ 56 | "tm.time()" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "You can create a timestamp using the `fromtimestamp()` function on the datetime object" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 3, 69 | "metadata": { 70 | "collapsed": true 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "import datetime as dt" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 4, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [ 84 | { 85 | "data": { 86 | "text/plain": [ 87 | "datetime.datetime(2016, 11, 2, 19, 12, 25, 640018)" 88 | ] 89 | }, 90 | "execution_count": 4, 91 | "metadata": {}, 92 | "output_type": "execute_result" 93 | } 94 | ], 95 | "source": [ 96 | "dtnow = dt.datetime.fromtimestamp(tm.time())\n", 97 | "dtnow" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "The datetime object has attributes to get the representative hour, day, seconds, etc" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "datetime objects allow for simple math using time deltas. \n", 112 | "This allows us to use a date and a time delta to find another date seperated by that delta." 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "Let's find the date 100 days before today using the `timedelta()` function in the datetime library. " 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 5, 125 | "metadata": { 126 | "collapsed": false 127 | }, 128 | "outputs": [ 129 | { 130 | "data": { 131 | "text/plain": [ 132 | "datetime.timedelta(100)" 133 | ] 134 | }, 135 | "execution_count": 5, 136 | "metadata": {}, 137 | "output_type": "execute_result" 138 | } 139 | ], 140 | "source": [ 141 | "delta = dt.timedelta(days = 100)\n", 142 | "delta" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 6, 148 | "metadata": { 149 | "collapsed": true 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "today = dt.date.today()" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 7, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/plain": [ 166 | "datetime.date(2016, 7, 25)" 167 | ] 168 | }, 169 | "execution_count": 7, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "today - delta" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "Here we see that 100 days before today - _previously shown at 2016, 11, 2_ - is 2016, 7, 25" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "We can also use conditionals as expected. Are timestamps equal to greater than less than etc, using are known conditional operators. \n", 190 | "For example: Today is certainly greater than 100 days ago we just computed (when measuring time since the Jan 1 1970 epoch)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 8, 196 | "metadata": { 197 | "collapsed": false 198 | }, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "True" 204 | ] 205 | }, 206 | "execution_count": 8, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [ 212 | "today > today-delta" 213 | ] 214 | } 215 | ], 216 | "metadata": { 217 | "anaconda-cloud": {}, 218 | "kernelspec": { 219 | "display_name": "Python [Py35]", 220 | "language": "python", 221 | "name": "Python [Py35]" 222 | }, 223 | "language_info": { 224 | "codemirror_mode": { 225 | "name": "ipython", 226 | "version": 3 227 | }, 228 | "file_extension": ".py", 229 | "mimetype": "text/x-python", 230 | "name": "python", 231 | "nbconvert_exporter": "python", 232 | "pygments_lexer": "ipython3", 233 | "version": "3.5.2" 234 | } 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 0 238 | } 239 | -------------------------------------------------------------------------------- /MyNotebooks/.ipynb_checkpoints/ReadingAndWritingCSVfiles-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Reading and Writing CSV files in Python" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Reading a CSV file\n", 15 | "To open and read a CSV file, we will use the CSV package. \n", 16 | "precision set to 2 allows full floating point math while only printing 2 decimal places for legibility. \n", 17 | "Use `open('fileName')` to open **fileName** _from the current directory_. \n", 18 | "Finally, view the first 3 elements of the file we just loaded." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "[{'': '1',\n", 32 | " 'class': 'compact',\n", 33 | " 'cty': '18',\n", 34 | " 'cyl': '4',\n", 35 | " 'displ': '1.8',\n", 36 | " 'drv': 'f',\n", 37 | " 'fl': 'p',\n", 38 | " 'hwy': '29',\n", 39 | " 'manufacturer': 'audi',\n", 40 | " 'model': 'a4',\n", 41 | " 'trans': 'auto(l5)',\n", 42 | " 'year': '1999'},\n", 43 | " {'': '2',\n", 44 | " 'class': 'compact',\n", 45 | " 'cty': '21',\n", 46 | " 'cyl': '4',\n", 47 | " 'displ': '1.8',\n", 48 | " 'drv': 'f',\n", 49 | " 'fl': 'p',\n", 50 | " 'hwy': '29',\n", 51 | " 'manufacturer': 'audi',\n", 52 | " 'model': 'a4',\n", 53 | " 'trans': 'manual(m5)',\n", 54 | " 'year': '1999'},\n", 55 | " {'': '3',\n", 56 | " 'class': 'compact',\n", 57 | " 'cty': '20',\n", 58 | " 'cyl': '4',\n", 59 | " 'displ': '2',\n", 60 | " 'drv': 'f',\n", 61 | " 'fl': 'p',\n", 62 | " 'hwy': '31',\n", 63 | " 'manufacturer': 'audi',\n", 64 | " 'model': 'a4',\n", 65 | " 'trans': 'manual(m6)',\n", 66 | " 'year': '2008'}]" 67 | ] 68 | }, 69 | "execution_count": 1, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "import csv\n", 76 | "\n", 77 | "# set floatpoint precision for printing to 2\n", 78 | "%precision 2\n", 79 | "\n", 80 | "with open('mpg.csv') as csvfile:\n", 81 | " mpg = list(csv.DictReader(csvfile))\n", 82 | "\n", 83 | "# view first 3 elements of list created from csv file\n", 84 | "mpg[:3]" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "Here we can see each element of this list is a car in a dict form, the keys for the dict corresponds to a column in the csv file.\n", 92 | "\n", 93 | "Lets see how many dicts - or cars - we have in our dataset" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 2, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "234" 107 | ] 108 | }, 109 | "execution_count": 2, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "len(mpg)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "We have a dictionary for each of the 234 cars in the dataset.\n", 123 | "\n", 124 | "To extract just the column names, or the keys in each of these dictionaries we can use the `keys()` function on the first element in the mpg dataset since we are assuming each element has the same keys or _row names_" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 3, 130 | "metadata": { 131 | "collapsed": false 132 | }, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "dict_keys(['', 'manufacturer', 'class', 'year', 'cyl', 'hwy', 'model', 'fl', 'displ', 'cty', 'drv', 'trans'])" 138 | ] 139 | }, 140 | "execution_count": 3, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "mpg[0].keys()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "Here we can see we have class, model, fuel type, cty, mpg, engine volume, front or rear wheel drive, highway mpg, manufacturer, model, year, and transmission type.\n", 154 | "\n", 155 | "**NOTE:** I happen to know this from working with the same __cars__ dataset in R. This can easily be found on google when looking at the values is not helping.\n", 156 | "\n", 157 | "Now let's try and find the average city MPG across all cars in our CSV file.\n", 158 | "To do this we want to sum the value for the 'cty' key for each car in the set and divide by the number of cars used (which we already know the set to contain 234 cars).\n", 159 | "\n", 160 | "So that should look something like: \n", 161 | "sum (each value of cty in dataset mpg) / size (mpg)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 4, 167 | "metadata": { 168 | "collapsed": false 169 | }, 170 | "outputs": [ 171 | { 172 | "data": { 173 | "text/plain": [ 174 | "16.86" 175 | ] 176 | }, 177 | "execution_count": 4, 178 | "metadata": {}, 179 | "output_type": "execute_result" 180 | } 181 | ], 182 | "source": [ 183 | "sum(float(d['cty']) for d in mpg) / len(mpg)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "Now let's do the same thing for average hwy mpg across all cars in the dataset" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 5, 196 | "metadata": { 197 | "collapsed": false 198 | }, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "23.44" 204 | ] 205 | }, 206 | "execution_count": 5, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [ 212 | "sum(float(d['hwy']) for d in mpg) / len(mpg)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "**Note:** the average mpg for highway is significantly better than for city. \n", 220 | "This makes sense, as cars get better gas milage on the highway that in the city." 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": { 226 | "collapsed": true 227 | }, 228 | "source": [ 229 | "## Grouping\n", 230 | "#### Find the average city mpg grouped by the number of cylinders a car has." 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "### Sets\n", 238 | "**Sets** are lists with no duplicate entries. \n", 239 | "We can see how many unique values - or _Levels_ - for cylinders the cars in this dataset have by defining a `set()` from the entire list of all cyl values. " 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 6, 245 | "metadata": { 246 | "collapsed": false 247 | }, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/plain": [ 252 | "{'4', '5', '6', '8'}" 253 | ] 254 | }, 255 | "execution_count": 6, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | } 259 | ], 260 | "source": [ 261 | "cylinders = set(d['cyl'] for d in mpg)\n", 262 | "cylinders" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "Here we see there are 4 unique levels for the cyl key: 4, 5, 6, and 8" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "Now we can iterate across each of the cylinder levels, \n", 277 | "then iterate over all the dictionaries. \n", 278 | "If the level for the current dictionary matches the current cylinder being calculated, \n", 279 | "add the mpg to that cylinder's level summpg variable and increment the count in order to average the total. \n", 280 | "After doing through each dictionary in the CSV, we can compute the MPG calculation and append it to our list." 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 7, 286 | "metadata": { 287 | "collapsed": false 288 | }, 289 | "outputs": [ 290 | { 291 | "data": { 292 | "text/plain": [ 293 | "[('4', 21.01), ('5', 20.50), ('6', 16.22), ('8', 12.57)]" 294 | ] 295 | }, 296 | "execution_count": 7, 297 | "metadata": {}, 298 | "output_type": "execute_result" 299 | } 300 | ], 301 | "source": [ 302 | "# create an empty list to store calculations\n", 303 | "CtyMpgByCyl = []\n", 304 | "\n", 305 | "# start with one cylinder level and iterate\n", 306 | "for c in cylinders:\n", 307 | " summpg = 0\n", 308 | " cyltypecount = 0\n", 309 | " \n", 310 | " # though each dictionary checking for an equal level cyl value\n", 311 | " for d in mpg:\n", 312 | " # if a match is found, add cty to the sum and increase the count to compute the average\n", 313 | " if d['cyl'] == c:\n", 314 | " summpg += float(d['cty'])\n", 315 | " cyltypecount += 1\n", 316 | " # after iterating through all the dictionaries, append MPG calculation and go to the next cylinder level\n", 317 | " CtyMpgByCyl.append((c, summpg / cyltypecount))\n", 318 | "\n", 319 | "CtyMpgByCyl.sort(key=lambda x: x[0])\n", 320 | "CtyMpgByCyl" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "After sorting the list of calculations we see that as the number of cylinders increases, the city miles per gallon, `'cty'`, decreases. \n", 328 | "This makes sense, as we would expect a car with more cylinders to be larger and have poorer city fuel milage." 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "#### Find the average highway MPG for the different vehicle classes" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "First let's look at the different classes of vehicles in the CSV dataset _mpg_" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 8, 348 | "metadata": { 349 | "collapsed": false 350 | }, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "text/plain": [ 355 | "{'2seater', 'compact', 'midsize', 'minivan', 'pickup', 'subcompact', 'suv'}" 356 | ] 357 | }, 358 | "execution_count": 8, 359 | "metadata": {}, 360 | "output_type": "execute_result" 361 | } 362 | ], 363 | "source": [ 364 | "vehicleclass = set(d['class'] for d in mpg)\n", 365 | "vehicleclass" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "Similarly, we iterate each vehicle class through all the dictionaries. \n", 373 | "Each match will add highway mpg to the sum total and increase the count.\n", 374 | "After exhausting all the dictionaries for a given vehicle class, we can computer the average and append it to our list. " 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 9, 380 | "metadata": { 381 | "collapsed": false 382 | }, 383 | "outputs": [ 384 | { 385 | "data": { 386 | "text/plain": [ 387 | "[('pickup', 16.88),\n", 388 | " ('suv', 18.13),\n", 389 | " ('minivan', 22.36),\n", 390 | " ('2seater', 24.80),\n", 391 | " ('midsize', 27.29),\n", 392 | " ('subcompact', 28.14),\n", 393 | " ('compact', 28.30)]" 394 | ] 395 | }, 396 | "execution_count": 9, 397 | "metadata": {}, 398 | "output_type": "execute_result" 399 | } 400 | ], 401 | "source": [ 402 | "HwyMpgByClass = []\n", 403 | "\n", 404 | "for v in vehicleclass: # iterate over all the vehicle classes\n", 405 | " summpg = 0\n", 406 | " vclasscount = 0\n", 407 | " for d in mpg: # check each dictionary in the mpg dataset\n", 408 | " if d['class'] == v: # to find a match in class\n", 409 | " summpg += float(d['hwy'])\n", 410 | " vclasscount += 1 # increment the count\n", 411 | " HwyMpgByClass.append((v, summpg / vclasscount)) # append the tuple ('vehicle class', 'avg mpg')\n", 412 | " \n", 413 | "HwyMpgByClass.sort(key=lambda x: x[1]) # this time, sort based on MPG - the second element in each tuple\n", 414 | "HwyMpgByClass" 415 | ] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": {}, 420 | "source": [ 421 | "Here we have found the pickup to have the worst highway MPG while **the compact has the highest highway MPG.**" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "Do not despair or completely write off Python as an inefficient iterator of data for summarization. \n", 429 | "The **Pandas** library will bring in many of the tools and tricks us R thoroughbreds have come to rely upon for speedy exploration and summarization of a dataset with a few quick key strokes - well maybe a few extra, but much better than this spiraling mess." 430 | ] 431 | } 432 | ], 433 | "metadata": { 434 | "anaconda-cloud": {}, 435 | "kernelspec": { 436 | "display_name": "Python [Py35]", 437 | "language": "python", 438 | "name": "Python [Py35]" 439 | }, 440 | "language_info": { 441 | "codemirror_mode": { 442 | "name": "ipython", 443 | "version": 3 444 | }, 445 | "file_extension": ".py", 446 | "mimetype": "text/x-python", 447 | "name": "python", 448 | "nbconvert_exporter": "python", 449 | "pygments_lexer": "ipython3", 450 | "version": "3.5.2" 451 | } 452 | }, 453 | "nbformat": 4, 454 | "nbformat_minor": 0 455 | } 456 | -------------------------------------------------------------------------------- /MyNotebooks/Energy Indicators.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/MyNotebooks/Energy Indicators.xls -------------------------------------------------------------------------------- /MyNotebooks/Python-Dates-and-Times.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python Dates and Times" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Dates and times can be stored in many different ways. \n", 15 | "The offset from the **Epoch** is one of the most common nethods for storing dates and time. \n", 16 | "The Epoch is January 1, 1970. The measurements is usually the numer of miliseconds since this date. " 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "In Python you can get the current time since the epoch using the time module." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "import time as tm" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 1, 40 | "metadata": { 41 | "collapsed": false 42 | }, 43 | "outputs": [ 44 | { 45 | "ename": "NameError", 46 | "evalue": "name 'tm' is not defined", 47 | "output_type": "error", 48 | "traceback": [ 49 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 50 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 51 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 52 | "\u001b[0;31mNameError\u001b[0m: name 'tm' is not defined" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "tm.time()" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "You can create a timestamp using the `fromtimestamp()` function on the datetime object" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 3, 70 | "metadata": { 71 | "collapsed": true 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "import datetime as dt" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 4, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "datetime.datetime(2016, 11, 2, 19, 12, 25, 640018)" 89 | ] 90 | }, 91 | "execution_count": 4, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": [ 97 | "dtnow = dt.datetime.fromtimestamp(tm.time())\n", 98 | "dtnow" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "The datetime object has attributes to get the representative hour, day, seconds, etc" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "datetime objects allow for simple math using time deltas. \n", 113 | "This allows us to use a date and a time delta to find another date seperated by that delta." 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "Let's find the date 100 days before today using the `timedelta()` function in the datetime library. " 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 5, 126 | "metadata": { 127 | "collapsed": false 128 | }, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "datetime.timedelta(100)" 134 | ] 135 | }, 136 | "execution_count": 5, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "delta = dt.timedelta(days = 100)\n", 143 | "delta" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 6, 149 | "metadata": { 150 | "collapsed": true 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "today = dt.date.today()" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 7, 160 | "metadata": { 161 | "collapsed": false 162 | }, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/plain": [ 167 | "datetime.date(2016, 7, 25)" 168 | ] 169 | }, 170 | "execution_count": 7, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "today - delta" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "Here we see that 100 days before today - _previously shown at 2016, 11, 2_ - is 2016, 7, 25" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "We can also use conditionals as expected. Are timestamps equal to greater than less than etc, using are known conditional operators. \n", 191 | "For example: Today is certainly greater than 100 days ago we just computed (when measuring time since the Jan 1 1970 epoch)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 8, 197 | "metadata": { 198 | "collapsed": false 199 | }, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "True" 205 | ] 206 | }, 207 | "execution_count": 8, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "today > today-delta" 214 | ] 215 | } 216 | ], 217 | "metadata": { 218 | "anaconda-cloud": {}, 219 | "kernelspec": { 220 | "display_name": "Python [conda env:py35]", 221 | "language": "python", 222 | "name": "conda-env-py35-py" 223 | }, 224 | "language_info": { 225 | "codemirror_mode": { 226 | "name": "ipython", 227 | "version": 3 228 | }, 229 | "file_extension": ".py", 230 | "mimetype": "text/x-python", 231 | "name": "python", 232 | "nbconvert_exporter": "python", 233 | "pygments_lexer": "ipython3", 234 | "version": "3.5.2" 235 | } 236 | }, 237 | "nbformat": 4, 238 | "nbformat_minor": 0 239 | } 240 | -------------------------------------------------------------------------------- /MyNotebooks/ReadingAndWritingCSVfiles.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Reading and Writing CSV files in Python" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Reading a CSV file\n", 15 | "To open and read a CSV file, we will use the CSV package. \n", 16 | "precision set to 2 allows full floating point math while only printing 2 decimal places for legibility. \n", 17 | "Use `open('fileName')` to open **fileName** _from the current directory_. \n", 18 | "Finally, view the first 3 elements of the file we just loaded." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "[{'': '1',\n", 32 | " 'class': 'compact',\n", 33 | " 'cty': '18',\n", 34 | " 'cyl': '4',\n", 35 | " 'displ': '1.8',\n", 36 | " 'drv': 'f',\n", 37 | " 'fl': 'p',\n", 38 | " 'hwy': '29',\n", 39 | " 'manufacturer': 'audi',\n", 40 | " 'model': 'a4',\n", 41 | " 'trans': 'auto(l5)',\n", 42 | " 'year': '1999'},\n", 43 | " {'': '2',\n", 44 | " 'class': 'compact',\n", 45 | " 'cty': '21',\n", 46 | " 'cyl': '4',\n", 47 | " 'displ': '1.8',\n", 48 | " 'drv': 'f',\n", 49 | " 'fl': 'p',\n", 50 | " 'hwy': '29',\n", 51 | " 'manufacturer': 'audi',\n", 52 | " 'model': 'a4',\n", 53 | " 'trans': 'manual(m5)',\n", 54 | " 'year': '1999'},\n", 55 | " {'': '3',\n", 56 | " 'class': 'compact',\n", 57 | " 'cty': '20',\n", 58 | " 'cyl': '4',\n", 59 | " 'displ': '2',\n", 60 | " 'drv': 'f',\n", 61 | " 'fl': 'p',\n", 62 | " 'hwy': '31',\n", 63 | " 'manufacturer': 'audi',\n", 64 | " 'model': 'a4',\n", 65 | " 'trans': 'manual(m6)',\n", 66 | " 'year': '2008'}]" 67 | ] 68 | }, 69 | "execution_count": 1, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "import csv\n", 76 | "\n", 77 | "# set floatpoint precision for printing to 2\n", 78 | "%precision 2\n", 79 | "\n", 80 | "with open('mpg.csv') as csvfile:\n", 81 | " mpg = list(csv.DictReader(csvfile))\n", 82 | "\n", 83 | "# view first 3 elements of list created from csv file\n", 84 | "mpg[:3]" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "Here we can see each element of this list is a car in a dict form, the keys for the dict corresponds to a column in the csv file.\n", 92 | "\n", 93 | "Lets see how many dicts - or cars - we have in our dataset" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 2, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "234" 107 | ] 108 | }, 109 | "execution_count": 2, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "len(mpg)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "We have a dictionary for each of the 234 cars in the dataset.\n", 123 | "\n", 124 | "To extract just the column names, or the keys in each of these dictionaries we can use the `keys()` function on the first element in the mpg dataset since we are assuming each element has the same keys or _row names_" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 3, 130 | "metadata": { 131 | "collapsed": false 132 | }, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "dict_keys(['', 'manufacturer', 'class', 'year', 'cyl', 'hwy', 'model', 'fl', 'displ', 'cty', 'drv', 'trans'])" 138 | ] 139 | }, 140 | "execution_count": 3, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "mpg[0].keys()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "Here we can see we have class, model, fuel type, cty, mpg, engine volume, front or rear wheel drive, highway mpg, manufacturer, model, year, and transmission type.\n", 154 | "\n", 155 | "**NOTE:** I happen to know this from working with the same __cars__ dataset in R. This can easily be found on google when looking at the values is not helping.\n", 156 | "\n", 157 | "Now let's try and find the average city MPG across all cars in our CSV file.\n", 158 | "To do this we want to sum the value for the 'cty' key for each car in the set and divide by the number of cars used (which we already know the set to contain 234 cars).\n", 159 | "\n", 160 | "So that should look something like: \n", 161 | "sum (each value of cty in dataset mpg) / size (mpg)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 4, 167 | "metadata": { 168 | "collapsed": false 169 | }, 170 | "outputs": [ 171 | { 172 | "data": { 173 | "text/plain": [ 174 | "16.86" 175 | ] 176 | }, 177 | "execution_count": 4, 178 | "metadata": {}, 179 | "output_type": "execute_result" 180 | } 181 | ], 182 | "source": [ 183 | "sum(float(d['cty']) for d in mpg) / len(mpg)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "Now let's do the same thing for average hwy mpg across all cars in the dataset" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 5, 196 | "metadata": { 197 | "collapsed": false 198 | }, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "23.44" 204 | ] 205 | }, 206 | "execution_count": 5, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [ 212 | "sum(float(d['hwy']) for d in mpg) / len(mpg)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "**Note:** the average mpg for highway is significantly better than for city. \n", 220 | "This makes sense, as cars get better gas milage on the highway that in the city." 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": { 226 | "collapsed": true 227 | }, 228 | "source": [ 229 | "## Grouping\n", 230 | "#### Find the average city mpg grouped by the number of cylinders a car has." 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "### Sets\n", 238 | "**Sets** are lists with no duplicate entries. \n", 239 | "We can see how many unique values - or _Levels_ - for cylinders the cars in this dataset have by defining a `set()` from the entire list of all cyl values. " 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 6, 245 | "metadata": { 246 | "collapsed": false 247 | }, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/plain": [ 252 | "{'4', '5', '6', '8'}" 253 | ] 254 | }, 255 | "execution_count": 6, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | } 259 | ], 260 | "source": [ 261 | "cylinders = set(d['cyl'] for d in mpg)\n", 262 | "cylinders" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "Here we see there are 4 unique levels for the cyl key: 4, 5, 6, and 8" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "Now we can iterate across each of the cylinder levels, \n", 277 | "then iterate over all the dictionaries. \n", 278 | "If the level for the current dictionary matches the current cylinder being calculated, \n", 279 | "add the mpg to that cylinder's level summpg variable and increment the count in order to average the total. \n", 280 | "After doing through each dictionary in the CSV, we can compute the MPG calculation and append it to our list." 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 7, 286 | "metadata": { 287 | "collapsed": false 288 | }, 289 | "outputs": [ 290 | { 291 | "data": { 292 | "text/plain": [ 293 | "[('4', 21.01), ('5', 20.50), ('6', 16.22), ('8', 12.57)]" 294 | ] 295 | }, 296 | "execution_count": 7, 297 | "metadata": {}, 298 | "output_type": "execute_result" 299 | } 300 | ], 301 | "source": [ 302 | "# create an empty list to store calculations\n", 303 | "CtyMpgByCyl = []\n", 304 | "\n", 305 | "# start with one cylinder level and iterate\n", 306 | "for c in cylinders:\n", 307 | " summpg = 0\n", 308 | " cyltypecount = 0\n", 309 | " \n", 310 | " # though each dictionary checking for an equal level cyl value\n", 311 | " for d in mpg:\n", 312 | " # if a match is found, add cty to the sum and increase the count to compute the average\n", 313 | " if d['cyl'] == c:\n", 314 | " summpg += float(d['cty'])\n", 315 | " cyltypecount += 1\n", 316 | " # after iterating through all the dictionaries, append MPG calculation and go to the next cylinder level\n", 317 | " CtyMpgByCyl.append((c, summpg / cyltypecount))\n", 318 | "\n", 319 | "CtyMpgByCyl.sort(key=lambda x: x[0])\n", 320 | "CtyMpgByCyl" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "After sorting the list of calculations we see that as the number of cylinders increases, the city miles per gallon, `'cty'`, decreases. \n", 328 | "This makes sense, as we would expect a car with more cylinders to be larger and have poorer city fuel milage." 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "#### Find the average highway MPG for the different vehicle classes" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "First let's look at the different classes of vehicles in the CSV dataset _mpg_" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 8, 348 | "metadata": { 349 | "collapsed": false 350 | }, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "text/plain": [ 355 | "{'2seater', 'compact', 'midsize', 'minivan', 'pickup', 'subcompact', 'suv'}" 356 | ] 357 | }, 358 | "execution_count": 8, 359 | "metadata": {}, 360 | "output_type": "execute_result" 361 | } 362 | ], 363 | "source": [ 364 | "vehicleclass = set(d['class'] for d in mpg)\n", 365 | "vehicleclass" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "Similarly, we iterate each vehicle class through all the dictionaries. \n", 373 | "Each match will add highway mpg to the sum total and increase the count.\n", 374 | "After exhausting all the dictionaries for a given vehicle class, we can computer the average and append it to our list. " 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 9, 380 | "metadata": { 381 | "collapsed": false 382 | }, 383 | "outputs": [ 384 | { 385 | "data": { 386 | "text/plain": [ 387 | "[('pickup', 16.88),\n", 388 | " ('suv', 18.13),\n", 389 | " ('minivan', 22.36),\n", 390 | " ('2seater', 24.80),\n", 391 | " ('midsize', 27.29),\n", 392 | " ('subcompact', 28.14),\n", 393 | " ('compact', 28.30)]" 394 | ] 395 | }, 396 | "execution_count": 9, 397 | "metadata": {}, 398 | "output_type": "execute_result" 399 | } 400 | ], 401 | "source": [ 402 | "HwyMpgByClass = []\n", 403 | "\n", 404 | "for v in vehicleclass: # iterate over all the vehicle classes\n", 405 | " summpg = 0\n", 406 | " vclasscount = 0\n", 407 | " for d in mpg: # check each dictionary in the mpg dataset\n", 408 | " if d['class'] == v: # to find a match in class\n", 409 | " summpg += float(d['hwy'])\n", 410 | " vclasscount += 1 # increment the count\n", 411 | " HwyMpgByClass.append((v, summpg / vclasscount)) # append the tuple ('vehicle class', 'avg mpg')\n", 412 | " \n", 413 | "HwyMpgByClass.sort(key=lambda x: x[1]) # this time, sort based on MPG - the second element in each tuple\n", 414 | "HwyMpgByClass" 415 | ] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": {}, 420 | "source": [ 421 | "Here we have found the pickup to have the worst highway MPG while **the compact has the highest highway MPG.**" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "Do not despair or completely write off Python as an inefficient iterator of data for summarization. \n", 429 | "The **Pandas** library will bring in many of the tools and tricks us R thoroughbreds have come to rely upon for speedy exploration and summarization of a dataset with a few quick key strokes - well maybe a few extra, but much better than this spiraling mess." 430 | ] 431 | } 432 | ], 433 | "metadata": { 434 | "anaconda-cloud": {}, 435 | "kernelspec": { 436 | "display_name": "Python [Py35]", 437 | "language": "python", 438 | "name": "Python [Py35]" 439 | }, 440 | "language_info": { 441 | "codemirror_mode": { 442 | "name": "ipython", 443 | "version": 3 444 | }, 445 | "file_extension": ".py", 446 | "mimetype": "text/x-python", 447 | "name": "python", 448 | "nbconvert_exporter": "python", 449 | "pygments_lexer": "ipython3", 450 | "version": "3.5.2" 451 | } 452 | }, 453 | "nbformat": 4, 454 | "nbformat_minor": 0 455 | } 456 | -------------------------------------------------------------------------------- /MyNotebooks/cars.csv: -------------------------------------------------------------------------------- 1 | YEAR,Make,Model,Size,(kW),Unnamed: 5,TYPE,CITY (kWh/100 km),HWY (kWh/100 km),COMB (kWh/100 km),CITY (Le/100 km),HWY (Le/100 km),COMB (Le/100 km),(g/km),RATING,(km),TIME (h) 2 | 2012,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7 3 | 2012,NISSAN,LEAF,MID-SIZE,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,0,n/a,117,7 4 | 2013,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4 5 | 2013,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7 6 | 2013,NISSAN,LEAF,MID-SIZE,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,0,n/a,117,7 7 | 2013,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 8 | 2013,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 9 | 2013,TESLA,MODEL S (40 kWh battery),FULL-SIZE,270,A1,B,22.4,21.9,22.2,2.5,2.5,2.5,0,n/a,224,6 10 | 2013,TESLA,MODEL S (60 kWh battery),FULL-SIZE,270,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10 11 | 2013,TESLA,MODEL S (85 kWh battery),FULL-SIZE,270,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 12 | 2013,TESLA,MODEL S PERFORMANCE,FULL-SIZE,310,A1,B,23.9,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 13 | 2014,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,n/a,131,7 14 | 2014,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4 15 | 2014,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7 16 | 2014,NISSAN,LEAF,MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,n/a,135,5 17 | 2014,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 18 | 2014,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 19 | 2014,TESLA,MODEL S (60 kWh battery),FULL-SIZE,225,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10 20 | 2014,TESLA,MODEL S (85 kWh battery),FULL-SIZE,270,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 21 | 2014,TESLA,MODEL S PERFORMANCE,FULL-SIZE,310,A1,B,23.9,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 22 | 2015,BMW,i3,SUBCOMPACT,125,A1,B,15.2,18.8,16.8,1.7,2.1,1.9,0,n/a,130,4 23 | 2015,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,n/a,131,7 24 | 2015,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4 25 | 2015,KIA,SOUL EV,STATION WAGON - SMALL,81,A1,B,17.5,22.7,19.9,2.0,2.6,2.2,0,n/a,149,4 26 | 2015,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7 27 | 2015,NISSAN,LEAF,MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,n/a,135,5 28 | 2015,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 29 | 2015,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 30 | 2015,TESLA,MODEL S (60 kWh battery),FULL-SIZE,283,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10 31 | 2015,TESLA,MODEL S (70 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,377,12 32 | 2015,TESLA,MODEL S (85/90 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 33 | 2015,TESLA,MODEL S 70D,FULL-SIZE,280,A1,B,20.8,20.6,20.7,2.3,2.3,2.3,0,n/a,386,12 34 | 2015,TESLA,MODEL S 85D/90D,FULL-SIZE,280,A1,B,22.0,19.8,21.0,2.5,2.2,2.4,0,n/a,435,12 35 | 2015,TESLA,MODEL S P85D/P90D,FULL-SIZE,515,A1,B,23.4,21.5,22.5,2.6,2.4,2.5,0,n/a,407,12 36 | 2016,BMW,i3,SUBCOMPACT,125,A1,B,15.2,18.8,16.8,1.7,2.1,1.9,0,10,130,4 37 | 2016,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,10,131,7 38 | 2016,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,10,122,4 39 | 2016,KIA,SOUL EV,STATION WAGON - SMALL,81,A1,B,17.5,22.7,19.9,2.0,2.6,2.2,0,10,149,4 40 | 2016,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,10,100,7 41 | 2016,NISSAN,LEAF (24 kWh battery),MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,10,135,5 42 | 2016,NISSAN,LEAF (30 kWh battery),MID-SIZE,80,A1,B,17.0,20.7,18.6,1.9,2.3,2.1,0,10,172,6 43 | 2016,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,10,109,8 44 | 2016,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,10,109,8 45 | 2016,TESLA,MODEL S (60 kWh battery),FULL-SIZE,283,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,10,335,10 46 | 2016,TESLA,MODEL S (70 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,10,377,12 47 | 2016,TESLA,MODEL S (85/90 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,10,426,12 48 | 2016,TESLA,MODEL S 70D,FULL-SIZE,386,A1,B,20.8,20.6,20.7,2.3,2.3,2.3,0,10,386,12 49 | 2016,TESLA,MODEL S 85D/90D,FULL-SIZE,386,A1,B,22.0,19.8,21.0,2.5,2.2,2.4,0,10,435,12 50 | 2016,TESLA,MODEL S 90D (Refresh),FULL-SIZE,386,A1,B,20.8,19.7,20.3,2.3,2.2,2.3,0,10,473,12 51 | 2016,TESLA,MODEL S P85D/P90D,FULL-SIZE,568,A1,B,23.4,21.5,22.5,2.6,2.4,2.5,0,10,407,12 52 | 2016,TESLA,MODEL S P90D (Refresh),FULL-SIZE,568,A1,B,22.9,21.0,22.1,2.6,2.4,2.5,0,10,435,12 53 | 2016,TESLA,MODEL X 90D,SUV - STANDARD,386,A1,B,23.2,22.2,22.7,2.6,2.5,2.6,0,10,414,12 54 | 2016,TESLA,MODEL X P90D,SUV - STANDARD,568,A1,B,23.6,23.3,23.5,2.7,2.6,2.6,0,10,402,12 55 | -------------------------------------------------------------------------------- /MyNotebooks/gdplev.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/MyNotebooks/gdplev.xls -------------------------------------------------------------------------------- /MyNotebooks/log.csv: -------------------------------------------------------------------------------- 1 | time,user,video,playback position,paused,volume 2 | 1469974424,cheryl,intro.html,5,FALSE,10 3 | 1469974454,cheryl,intro.html,6,, 4 | 1469974544,cheryl,intro.html,9,, 5 | 1469974574,cheryl,intro.html,10,, 6 | 1469977514,bob,intro.html,1,, 7 | 1469977544,bob,intro.html,1,, 8 | 1469977574,bob,intro.html,1,, 9 | 1469977604,bob,intro.html,1,, 10 | 1469974604,cheryl,intro.html,11,, 11 | 1469974694,cheryl,intro.html,14,, 12 | 1469974724,cheryl,intro.html,15,, 13 | 1469974454,sue,advanced.html,24,, 14 | 1469974524,sue,advanced.html,25,, 15 | 1469974424,sue,advanced.html,23,FALSE,10 16 | 1469974554,sue,advanced.html,26,, 17 | 1469974624,sue,advanced.html,27,, 18 | 1469974654,sue,advanced.html,28,,5 19 | 1469974724,sue,advanced.html,29,, 20 | 1469974484,cheryl,intro.html,7,, 21 | 1469974514,cheryl,intro.html,8,, 22 | 1469974754,sue,advanced.html,30,, 23 | 1469974824,sue,advanced.html,31,, 24 | 1469974854,sue,advanced.html,32,, 25 | 1469974924,sue,advanced.html,33,, 26 | 1469977424,bob,intro.html,1,TRUE,10 27 | 1469977454,bob,intro.html,1,, 28 | 1469977484,bob,intro.html,1,, 29 | 1469977634,bob,intro.html,1,, 30 | 1469977664,bob,intro.html,1,, 31 | 1469974634,cheryl,intro.html,12,, 32 | 1469974664,cheryl,intro.html,13,, 33 | 1469977694,bob,intro.html,1,, 34 | 1469977724,bob,intro.html,1,, 35 | -------------------------------------------------------------------------------- /MyNotebooks/log.txt: -------------------------------------------------------------------------------- 1 | time,user,video,playback position,paused,volume 2 | 1469974424,cheryl,intro.html,5,FALSE,10 3 | 1469974454,cheryl,intro.html,6,, 4 | 1469974544,cheryl,intro.html,9,, 5 | 1469974574,cheryl,intro.html,10,, 6 | 1469977514,bob,intro.html,1,, 7 | 1469977544,bob,intro.html,1,, 8 | 1469977574,bob,intro.html,1,, 9 | 1469977604,bob,intro.html,1,, 10 | 1469974604,cheryl,intro.html,11,, 11 | 1469974694,cheryl,intro.html,14,, 12 | 1469974724,cheryl,intro.html,15,, 13 | 1469974454,sue,advanced.html,24,, 14 | 1469974524,sue,advanced.html,25,, 15 | 1469974424,sue,advanced.html,23,FALSE,10 16 | 1469974554,sue,advanced.html,26,, 17 | 1469974624,sue,advanced.html,27,, 18 | 1469974654,sue,advanced.html,28,,5 19 | 1469974724,sue,advanced.html,29,, 20 | 1469974484,cheryl,intro.html,7,, 21 | 1469974514,cheryl,intro.html,8,, 22 | 1469974754,sue,advanced.html,30,, 23 | 1469974824,sue,advanced.html,31,, 24 | 1469974854,sue,advanced.html,32,, 25 | 1469974924,sue,advanced.html,33,, 26 | 1469977424,bob,intro.html,1,TRUE,10 27 | 1469977454,bob,intro.html,1,, 28 | 1469977484,bob,intro.html,1,, 29 | 1469977634,bob,intro.html,1,, 30 | 1469977664,bob,intro.html,1,, 31 | 1469974634,cheryl,intro.html,12,, 32 | 1469974664,cheryl,intro.html,13,, 33 | 1469977694,bob,intro.html,1,, 34 | 1469977724,bob,intro.html,1,, 35 | -------------------------------------------------------------------------------- /MyNotebooks/mpg.csv: -------------------------------------------------------------------------------- 1 | "","manufacturer","model","displ","year","cyl","trans","drv","cty","hwy","fl","class" 2 | "1","audi","a4",1.8,1999,4,"auto(l5)","f",18,29,"p","compact" 3 | "2","audi","a4",1.8,1999,4,"manual(m5)","f",21,29,"p","compact" 4 | "3","audi","a4",2,2008,4,"manual(m6)","f",20,31,"p","compact" 5 | "4","audi","a4",2,2008,4,"auto(av)","f",21,30,"p","compact" 6 | "5","audi","a4",2.8,1999,6,"auto(l5)","f",16,26,"p","compact" 7 | "6","audi","a4",2.8,1999,6,"manual(m5)","f",18,26,"p","compact" 8 | "7","audi","a4",3.1,2008,6,"auto(av)","f",18,27,"p","compact" 9 | "8","audi","a4 quattro",1.8,1999,4,"manual(m5)","4",18,26,"p","compact" 10 | "9","audi","a4 quattro",1.8,1999,4,"auto(l5)","4",16,25,"p","compact" 11 | "10","audi","a4 quattro",2,2008,4,"manual(m6)","4",20,28,"p","compact" 12 | "11","audi","a4 quattro",2,2008,4,"auto(s6)","4",19,27,"p","compact" 13 | "12","audi","a4 quattro",2.8,1999,6,"auto(l5)","4",15,25,"p","compact" 14 | "13","audi","a4 quattro",2.8,1999,6,"manual(m5)","4",17,25,"p","compact" 15 | "14","audi","a4 quattro",3.1,2008,6,"auto(s6)","4",17,25,"p","compact" 16 | "15","audi","a4 quattro",3.1,2008,6,"manual(m6)","4",15,25,"p","compact" 17 | "16","audi","a6 quattro",2.8,1999,6,"auto(l5)","4",15,24,"p","midsize" 18 | "17","audi","a6 quattro",3.1,2008,6,"auto(s6)","4",17,25,"p","midsize" 19 | "18","audi","a6 quattro",4.2,2008,8,"auto(s6)","4",16,23,"p","midsize" 20 | "19","chevrolet","c1500 suburban 2wd",5.3,2008,8,"auto(l4)","r",14,20,"r","suv" 21 | "20","chevrolet","c1500 suburban 2wd",5.3,2008,8,"auto(l4)","r",11,15,"e","suv" 22 | "21","chevrolet","c1500 suburban 2wd",5.3,2008,8,"auto(l4)","r",14,20,"r","suv" 23 | "22","chevrolet","c1500 suburban 2wd",5.7,1999,8,"auto(l4)","r",13,17,"r","suv" 24 | "23","chevrolet","c1500 suburban 2wd",6,2008,8,"auto(l4)","r",12,17,"r","suv" 25 | "24","chevrolet","corvette",5.7,1999,8,"manual(m6)","r",16,26,"p","2seater" 26 | "25","chevrolet","corvette",5.7,1999,8,"auto(l4)","r",15,23,"p","2seater" 27 | "26","chevrolet","corvette",6.2,2008,8,"manual(m6)","r",16,26,"p","2seater" 28 | "27","chevrolet","corvette",6.2,2008,8,"auto(s6)","r",15,25,"p","2seater" 29 | "28","chevrolet","corvette",7,2008,8,"manual(m6)","r",15,24,"p","2seater" 30 | "29","chevrolet","k1500 tahoe 4wd",5.3,2008,8,"auto(l4)","4",14,19,"r","suv" 31 | "30","chevrolet","k1500 tahoe 4wd",5.3,2008,8,"auto(l4)","4",11,14,"e","suv" 32 | "31","chevrolet","k1500 tahoe 4wd",5.7,1999,8,"auto(l4)","4",11,15,"r","suv" 33 | "32","chevrolet","k1500 tahoe 4wd",6.5,1999,8,"auto(l4)","4",14,17,"d","suv" 34 | "33","chevrolet","malibu",2.4,1999,4,"auto(l4)","f",19,27,"r","midsize" 35 | "34","chevrolet","malibu",2.4,2008,4,"auto(l4)","f",22,30,"r","midsize" 36 | "35","chevrolet","malibu",3.1,1999,6,"auto(l4)","f",18,26,"r","midsize" 37 | "36","chevrolet","malibu",3.5,2008,6,"auto(l4)","f",18,29,"r","midsize" 38 | "37","chevrolet","malibu",3.6,2008,6,"auto(s6)","f",17,26,"r","midsize" 39 | "38","dodge","caravan 2wd",2.4,1999,4,"auto(l3)","f",18,24,"r","minivan" 40 | "39","dodge","caravan 2wd",3,1999,6,"auto(l4)","f",17,24,"r","minivan" 41 | "40","dodge","caravan 2wd",3.3,1999,6,"auto(l4)","f",16,22,"r","minivan" 42 | "41","dodge","caravan 2wd",3.3,1999,6,"auto(l4)","f",16,22,"r","minivan" 43 | "42","dodge","caravan 2wd",3.3,2008,6,"auto(l4)","f",17,24,"r","minivan" 44 | "43","dodge","caravan 2wd",3.3,2008,6,"auto(l4)","f",17,24,"r","minivan" 45 | "44","dodge","caravan 2wd",3.3,2008,6,"auto(l4)","f",11,17,"e","minivan" 46 | "45","dodge","caravan 2wd",3.8,1999,6,"auto(l4)","f",15,22,"r","minivan" 47 | "46","dodge","caravan 2wd",3.8,1999,6,"auto(l4)","f",15,21,"r","minivan" 48 | "47","dodge","caravan 2wd",3.8,2008,6,"auto(l6)","f",16,23,"r","minivan" 49 | "48","dodge","caravan 2wd",4,2008,6,"auto(l6)","f",16,23,"r","minivan" 50 | "49","dodge","dakota pickup 4wd",3.7,2008,6,"manual(m6)","4",15,19,"r","pickup" 51 | "50","dodge","dakota pickup 4wd",3.7,2008,6,"auto(l4)","4",14,18,"r","pickup" 52 | "51","dodge","dakota pickup 4wd",3.9,1999,6,"auto(l4)","4",13,17,"r","pickup" 53 | "52","dodge","dakota pickup 4wd",3.9,1999,6,"manual(m5)","4",14,17,"r","pickup" 54 | "53","dodge","dakota pickup 4wd",4.7,2008,8,"auto(l5)","4",14,19,"r","pickup" 55 | "54","dodge","dakota pickup 4wd",4.7,2008,8,"auto(l5)","4",14,19,"r","pickup" 56 | "55","dodge","dakota pickup 4wd",4.7,2008,8,"auto(l5)","4",9,12,"e","pickup" 57 | "56","dodge","dakota pickup 4wd",5.2,1999,8,"manual(m5)","4",11,17,"r","pickup" 58 | "57","dodge","dakota pickup 4wd",5.2,1999,8,"auto(l4)","4",11,15,"r","pickup" 59 | "58","dodge","durango 4wd",3.9,1999,6,"auto(l4)","4",13,17,"r","suv" 60 | "59","dodge","durango 4wd",4.7,2008,8,"auto(l5)","4",13,17,"r","suv" 61 | "60","dodge","durango 4wd",4.7,2008,8,"auto(l5)","4",9,12,"e","suv" 62 | "61","dodge","durango 4wd",4.7,2008,8,"auto(l5)","4",13,17,"r","suv" 63 | "62","dodge","durango 4wd",5.2,1999,8,"auto(l4)","4",11,16,"r","suv" 64 | "63","dodge","durango 4wd",5.7,2008,8,"auto(l5)","4",13,18,"r","suv" 65 | "64","dodge","durango 4wd",5.9,1999,8,"auto(l4)","4",11,15,"r","suv" 66 | "65","dodge","ram 1500 pickup 4wd",4.7,2008,8,"manual(m6)","4",12,16,"r","pickup" 67 | "66","dodge","ram 1500 pickup 4wd",4.7,2008,8,"auto(l5)","4",9,12,"e","pickup" 68 | "67","dodge","ram 1500 pickup 4wd",4.7,2008,8,"auto(l5)","4",13,17,"r","pickup" 69 | "68","dodge","ram 1500 pickup 4wd",4.7,2008,8,"auto(l5)","4",13,17,"r","pickup" 70 | "69","dodge","ram 1500 pickup 4wd",4.7,2008,8,"manual(m6)","4",12,16,"r","pickup" 71 | "70","dodge","ram 1500 pickup 4wd",4.7,2008,8,"manual(m6)","4",9,12,"e","pickup" 72 | "71","dodge","ram 1500 pickup 4wd",5.2,1999,8,"auto(l4)","4",11,15,"r","pickup" 73 | "72","dodge","ram 1500 pickup 4wd",5.2,1999,8,"manual(m5)","4",11,16,"r","pickup" 74 | "73","dodge","ram 1500 pickup 4wd",5.7,2008,8,"auto(l5)","4",13,17,"r","pickup" 75 | "74","dodge","ram 1500 pickup 4wd",5.9,1999,8,"auto(l4)","4",11,15,"r","pickup" 76 | "75","ford","expedition 2wd",4.6,1999,8,"auto(l4)","r",11,17,"r","suv" 77 | "76","ford","expedition 2wd",5.4,1999,8,"auto(l4)","r",11,17,"r","suv" 78 | "77","ford","expedition 2wd",5.4,2008,8,"auto(l6)","r",12,18,"r","suv" 79 | "78","ford","explorer 4wd",4,1999,6,"auto(l5)","4",14,17,"r","suv" 80 | "79","ford","explorer 4wd",4,1999,6,"manual(m5)","4",15,19,"r","suv" 81 | "80","ford","explorer 4wd",4,1999,6,"auto(l5)","4",14,17,"r","suv" 82 | "81","ford","explorer 4wd",4,2008,6,"auto(l5)","4",13,19,"r","suv" 83 | "82","ford","explorer 4wd",4.6,2008,8,"auto(l6)","4",13,19,"r","suv" 84 | "83","ford","explorer 4wd",5,1999,8,"auto(l4)","4",13,17,"r","suv" 85 | "84","ford","f150 pickup 4wd",4.2,1999,6,"auto(l4)","4",14,17,"r","pickup" 86 | "85","ford","f150 pickup 4wd",4.2,1999,6,"manual(m5)","4",14,17,"r","pickup" 87 | "86","ford","f150 pickup 4wd",4.6,1999,8,"manual(m5)","4",13,16,"r","pickup" 88 | "87","ford","f150 pickup 4wd",4.6,1999,8,"auto(l4)","4",13,16,"r","pickup" 89 | "88","ford","f150 pickup 4wd",4.6,2008,8,"auto(l4)","4",13,17,"r","pickup" 90 | "89","ford","f150 pickup 4wd",5.4,1999,8,"auto(l4)","4",11,15,"r","pickup" 91 | "90","ford","f150 pickup 4wd",5.4,2008,8,"auto(l4)","4",13,17,"r","pickup" 92 | "91","ford","mustang",3.8,1999,6,"manual(m5)","r",18,26,"r","subcompact" 93 | "92","ford","mustang",3.8,1999,6,"auto(l4)","r",18,25,"r","subcompact" 94 | "93","ford","mustang",4,2008,6,"manual(m5)","r",17,26,"r","subcompact" 95 | "94","ford","mustang",4,2008,6,"auto(l5)","r",16,24,"r","subcompact" 96 | "95","ford","mustang",4.6,1999,8,"auto(l4)","r",15,21,"r","subcompact" 97 | "96","ford","mustang",4.6,1999,8,"manual(m5)","r",15,22,"r","subcompact" 98 | "97","ford","mustang",4.6,2008,8,"manual(m5)","r",15,23,"r","subcompact" 99 | "98","ford","mustang",4.6,2008,8,"auto(l5)","r",15,22,"r","subcompact" 100 | "99","ford","mustang",5.4,2008,8,"manual(m6)","r",14,20,"p","subcompact" 101 | "100","honda","civic",1.6,1999,4,"manual(m5)","f",28,33,"r","subcompact" 102 | "101","honda","civic",1.6,1999,4,"auto(l4)","f",24,32,"r","subcompact" 103 | "102","honda","civic",1.6,1999,4,"manual(m5)","f",25,32,"r","subcompact" 104 | "103","honda","civic",1.6,1999,4,"manual(m5)","f",23,29,"p","subcompact" 105 | "104","honda","civic",1.6,1999,4,"auto(l4)","f",24,32,"r","subcompact" 106 | "105","honda","civic",1.8,2008,4,"manual(m5)","f",26,34,"r","subcompact" 107 | "106","honda","civic",1.8,2008,4,"auto(l5)","f",25,36,"r","subcompact" 108 | "107","honda","civic",1.8,2008,4,"auto(l5)","f",24,36,"c","subcompact" 109 | "108","honda","civic",2,2008,4,"manual(m6)","f",21,29,"p","subcompact" 110 | "109","hyundai","sonata",2.4,1999,4,"auto(l4)","f",18,26,"r","midsize" 111 | "110","hyundai","sonata",2.4,1999,4,"manual(m5)","f",18,27,"r","midsize" 112 | "111","hyundai","sonata",2.4,2008,4,"auto(l4)","f",21,30,"r","midsize" 113 | "112","hyundai","sonata",2.4,2008,4,"manual(m5)","f",21,31,"r","midsize" 114 | "113","hyundai","sonata",2.5,1999,6,"auto(l4)","f",18,26,"r","midsize" 115 | "114","hyundai","sonata",2.5,1999,6,"manual(m5)","f",18,26,"r","midsize" 116 | "115","hyundai","sonata",3.3,2008,6,"auto(l5)","f",19,28,"r","midsize" 117 | "116","hyundai","tiburon",2,1999,4,"auto(l4)","f",19,26,"r","subcompact" 118 | "117","hyundai","tiburon",2,1999,4,"manual(m5)","f",19,29,"r","subcompact" 119 | "118","hyundai","tiburon",2,2008,4,"manual(m5)","f",20,28,"r","subcompact" 120 | "119","hyundai","tiburon",2,2008,4,"auto(l4)","f",20,27,"r","subcompact" 121 | "120","hyundai","tiburon",2.7,2008,6,"auto(l4)","f",17,24,"r","subcompact" 122 | "121","hyundai","tiburon",2.7,2008,6,"manual(m6)","f",16,24,"r","subcompact" 123 | "122","hyundai","tiburon",2.7,2008,6,"manual(m5)","f",17,24,"r","subcompact" 124 | "123","jeep","grand cherokee 4wd",3,2008,6,"auto(l5)","4",17,22,"d","suv" 125 | "124","jeep","grand cherokee 4wd",3.7,2008,6,"auto(l5)","4",15,19,"r","suv" 126 | "125","jeep","grand cherokee 4wd",4,1999,6,"auto(l4)","4",15,20,"r","suv" 127 | "126","jeep","grand cherokee 4wd",4.7,1999,8,"auto(l4)","4",14,17,"r","suv" 128 | "127","jeep","grand cherokee 4wd",4.7,2008,8,"auto(l5)","4",9,12,"e","suv" 129 | "128","jeep","grand cherokee 4wd",4.7,2008,8,"auto(l5)","4",14,19,"r","suv" 130 | "129","jeep","grand cherokee 4wd",5.7,2008,8,"auto(l5)","4",13,18,"r","suv" 131 | "130","jeep","grand cherokee 4wd",6.1,2008,8,"auto(l5)","4",11,14,"p","suv" 132 | "131","land rover","range rover",4,1999,8,"auto(l4)","4",11,15,"p","suv" 133 | "132","land rover","range rover",4.2,2008,8,"auto(s6)","4",12,18,"r","suv" 134 | "133","land rover","range rover",4.4,2008,8,"auto(s6)","4",12,18,"r","suv" 135 | "134","land rover","range rover",4.6,1999,8,"auto(l4)","4",11,15,"p","suv" 136 | "135","lincoln","navigator 2wd",5.4,1999,8,"auto(l4)","r",11,17,"r","suv" 137 | "136","lincoln","navigator 2wd",5.4,1999,8,"auto(l4)","r",11,16,"p","suv" 138 | "137","lincoln","navigator 2wd",5.4,2008,8,"auto(l6)","r",12,18,"r","suv" 139 | "138","mercury","mountaineer 4wd",4,1999,6,"auto(l5)","4",14,17,"r","suv" 140 | "139","mercury","mountaineer 4wd",4,2008,6,"auto(l5)","4",13,19,"r","suv" 141 | "140","mercury","mountaineer 4wd",4.6,2008,8,"auto(l6)","4",13,19,"r","suv" 142 | "141","mercury","mountaineer 4wd",5,1999,8,"auto(l4)","4",13,17,"r","suv" 143 | "142","nissan","altima",2.4,1999,4,"manual(m5)","f",21,29,"r","compact" 144 | "143","nissan","altima",2.4,1999,4,"auto(l4)","f",19,27,"r","compact" 145 | "144","nissan","altima",2.5,2008,4,"auto(av)","f",23,31,"r","midsize" 146 | "145","nissan","altima",2.5,2008,4,"manual(m6)","f",23,32,"r","midsize" 147 | "146","nissan","altima",3.5,2008,6,"manual(m6)","f",19,27,"p","midsize" 148 | "147","nissan","altima",3.5,2008,6,"auto(av)","f",19,26,"p","midsize" 149 | "148","nissan","maxima",3,1999,6,"auto(l4)","f",18,26,"r","midsize" 150 | "149","nissan","maxima",3,1999,6,"manual(m5)","f",19,25,"r","midsize" 151 | "150","nissan","maxima",3.5,2008,6,"auto(av)","f",19,25,"p","midsize" 152 | "151","nissan","pathfinder 4wd",3.3,1999,6,"auto(l4)","4",14,17,"r","suv" 153 | "152","nissan","pathfinder 4wd",3.3,1999,6,"manual(m5)","4",15,17,"r","suv" 154 | "153","nissan","pathfinder 4wd",4,2008,6,"auto(l5)","4",14,20,"p","suv" 155 | "154","nissan","pathfinder 4wd",5.6,2008,8,"auto(s5)","4",12,18,"p","suv" 156 | "155","pontiac","grand prix",3.1,1999,6,"auto(l4)","f",18,26,"r","midsize" 157 | "156","pontiac","grand prix",3.8,1999,6,"auto(l4)","f",16,26,"p","midsize" 158 | "157","pontiac","grand prix",3.8,1999,6,"auto(l4)","f",17,27,"r","midsize" 159 | "158","pontiac","grand prix",3.8,2008,6,"auto(l4)","f",18,28,"r","midsize" 160 | "159","pontiac","grand prix",5.3,2008,8,"auto(s4)","f",16,25,"p","midsize" 161 | "160","subaru","forester awd",2.5,1999,4,"manual(m5)","4",18,25,"r","suv" 162 | "161","subaru","forester awd",2.5,1999,4,"auto(l4)","4",18,24,"r","suv" 163 | "162","subaru","forester awd",2.5,2008,4,"manual(m5)","4",20,27,"r","suv" 164 | "163","subaru","forester awd",2.5,2008,4,"manual(m5)","4",19,25,"p","suv" 165 | "164","subaru","forester awd",2.5,2008,4,"auto(l4)","4",20,26,"r","suv" 166 | "165","subaru","forester awd",2.5,2008,4,"auto(l4)","4",18,23,"p","suv" 167 | "166","subaru","impreza awd",2.2,1999,4,"auto(l4)","4",21,26,"r","subcompact" 168 | "167","subaru","impreza awd",2.2,1999,4,"manual(m5)","4",19,26,"r","subcompact" 169 | "168","subaru","impreza awd",2.5,1999,4,"manual(m5)","4",19,26,"r","subcompact" 170 | "169","subaru","impreza awd",2.5,1999,4,"auto(l4)","4",19,26,"r","subcompact" 171 | "170","subaru","impreza awd",2.5,2008,4,"auto(s4)","4",20,25,"p","compact" 172 | "171","subaru","impreza awd",2.5,2008,4,"auto(s4)","4",20,27,"r","compact" 173 | "172","subaru","impreza awd",2.5,2008,4,"manual(m5)","4",19,25,"p","compact" 174 | "173","subaru","impreza awd",2.5,2008,4,"manual(m5)","4",20,27,"r","compact" 175 | "174","toyota","4runner 4wd",2.7,1999,4,"manual(m5)","4",15,20,"r","suv" 176 | "175","toyota","4runner 4wd",2.7,1999,4,"auto(l4)","4",16,20,"r","suv" 177 | "176","toyota","4runner 4wd",3.4,1999,6,"auto(l4)","4",15,19,"r","suv" 178 | "177","toyota","4runner 4wd",3.4,1999,6,"manual(m5)","4",15,17,"r","suv" 179 | "178","toyota","4runner 4wd",4,2008,6,"auto(l5)","4",16,20,"r","suv" 180 | "179","toyota","4runner 4wd",4.7,2008,8,"auto(l5)","4",14,17,"r","suv" 181 | "180","toyota","camry",2.2,1999,4,"manual(m5)","f",21,29,"r","midsize" 182 | "181","toyota","camry",2.2,1999,4,"auto(l4)","f",21,27,"r","midsize" 183 | "182","toyota","camry",2.4,2008,4,"manual(m5)","f",21,31,"r","midsize" 184 | "183","toyota","camry",2.4,2008,4,"auto(l5)","f",21,31,"r","midsize" 185 | "184","toyota","camry",3,1999,6,"auto(l4)","f",18,26,"r","midsize" 186 | "185","toyota","camry",3,1999,6,"manual(m5)","f",18,26,"r","midsize" 187 | "186","toyota","camry",3.5,2008,6,"auto(s6)","f",19,28,"r","midsize" 188 | "187","toyota","camry solara",2.2,1999,4,"auto(l4)","f",21,27,"r","compact" 189 | "188","toyota","camry solara",2.2,1999,4,"manual(m5)","f",21,29,"r","compact" 190 | "189","toyota","camry solara",2.4,2008,4,"manual(m5)","f",21,31,"r","compact" 191 | "190","toyota","camry solara",2.4,2008,4,"auto(s5)","f",22,31,"r","compact" 192 | "191","toyota","camry solara",3,1999,6,"auto(l4)","f",18,26,"r","compact" 193 | "192","toyota","camry solara",3,1999,6,"manual(m5)","f",18,26,"r","compact" 194 | "193","toyota","camry solara",3.3,2008,6,"auto(s5)","f",18,27,"r","compact" 195 | "194","toyota","corolla",1.8,1999,4,"auto(l3)","f",24,30,"r","compact" 196 | "195","toyota","corolla",1.8,1999,4,"auto(l4)","f",24,33,"r","compact" 197 | "196","toyota","corolla",1.8,1999,4,"manual(m5)","f",26,35,"r","compact" 198 | "197","toyota","corolla",1.8,2008,4,"manual(m5)","f",28,37,"r","compact" 199 | "198","toyota","corolla",1.8,2008,4,"auto(l4)","f",26,35,"r","compact" 200 | "199","toyota","land cruiser wagon 4wd",4.7,1999,8,"auto(l4)","4",11,15,"r","suv" 201 | "200","toyota","land cruiser wagon 4wd",5.7,2008,8,"auto(s6)","4",13,18,"r","suv" 202 | "201","toyota","toyota tacoma 4wd",2.7,1999,4,"manual(m5)","4",15,20,"r","pickup" 203 | "202","toyota","toyota tacoma 4wd",2.7,1999,4,"auto(l4)","4",16,20,"r","pickup" 204 | "203","toyota","toyota tacoma 4wd",2.7,2008,4,"manual(m5)","4",17,22,"r","pickup" 205 | "204","toyota","toyota tacoma 4wd",3.4,1999,6,"manual(m5)","4",15,17,"r","pickup" 206 | "205","toyota","toyota tacoma 4wd",3.4,1999,6,"auto(l4)","4",15,19,"r","pickup" 207 | "206","toyota","toyota tacoma 4wd",4,2008,6,"manual(m6)","4",15,18,"r","pickup" 208 | "207","toyota","toyota tacoma 4wd",4,2008,6,"auto(l5)","4",16,20,"r","pickup" 209 | "208","volkswagen","gti",2,1999,4,"manual(m5)","f",21,29,"r","compact" 210 | "209","volkswagen","gti",2,1999,4,"auto(l4)","f",19,26,"r","compact" 211 | "210","volkswagen","gti",2,2008,4,"manual(m6)","f",21,29,"p","compact" 212 | "211","volkswagen","gti",2,2008,4,"auto(s6)","f",22,29,"p","compact" 213 | "212","volkswagen","gti",2.8,1999,6,"manual(m5)","f",17,24,"r","compact" 214 | "213","volkswagen","jetta",1.9,1999,4,"manual(m5)","f",33,44,"d","compact" 215 | "214","volkswagen","jetta",2,1999,4,"manual(m5)","f",21,29,"r","compact" 216 | "215","volkswagen","jetta",2,1999,4,"auto(l4)","f",19,26,"r","compact" 217 | "216","volkswagen","jetta",2,2008,4,"auto(s6)","f",22,29,"p","compact" 218 | "217","volkswagen","jetta",2,2008,4,"manual(m6)","f",21,29,"p","compact" 219 | "218","volkswagen","jetta",2.5,2008,5,"auto(s6)","f",21,29,"r","compact" 220 | "219","volkswagen","jetta",2.5,2008,5,"manual(m5)","f",21,29,"r","compact" 221 | "220","volkswagen","jetta",2.8,1999,6,"auto(l4)","f",16,23,"r","compact" 222 | "221","volkswagen","jetta",2.8,1999,6,"manual(m5)","f",17,24,"r","compact" 223 | "222","volkswagen","new beetle",1.9,1999,4,"manual(m5)","f",35,44,"d","subcompact" 224 | "223","volkswagen","new beetle",1.9,1999,4,"auto(l4)","f",29,41,"d","subcompact" 225 | "224","volkswagen","new beetle",2,1999,4,"manual(m5)","f",21,29,"r","subcompact" 226 | "225","volkswagen","new beetle",2,1999,4,"auto(l4)","f",19,26,"r","subcompact" 227 | "226","volkswagen","new beetle",2.5,2008,5,"manual(m5)","f",20,28,"r","subcompact" 228 | "227","volkswagen","new beetle",2.5,2008,5,"auto(s6)","f",20,29,"r","subcompact" 229 | "228","volkswagen","passat",1.8,1999,4,"manual(m5)","f",21,29,"p","midsize" 230 | "229","volkswagen","passat",1.8,1999,4,"auto(l5)","f",18,29,"p","midsize" 231 | "230","volkswagen","passat",2,2008,4,"auto(s6)","f",19,28,"p","midsize" 232 | "231","volkswagen","passat",2,2008,4,"manual(m6)","f",21,29,"p","midsize" 233 | "232","volkswagen","passat",2.8,1999,6,"auto(l5)","f",16,26,"p","midsize" 234 | "233","volkswagen","passat",2.8,1999,6,"manual(m5)","f",18,26,"p","midsize" 235 | "234","volkswagen","passat",3.6,2008,6,"auto(s6)","f",17,26,"p","midsize" 236 | -------------------------------------------------------------------------------- /MyNotebooks/olympics.csv: -------------------------------------------------------------------------------- 1 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 2 | ,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total 3 | Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2 4 | Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15 5 | Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70 6 | Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12 7 | Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12 8 | Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480 9 | Austria (AUT),26,18,33,35,86,22,59,78,81,218,48,77,111,116,304 10 | Azerbaijan (AZE),5,6,5,15,26,5,0,0,0,0,10,6,5,15,26 11 | Bahamas (BAH),15,5,2,5,12,0,0,0,0,0,15,5,2,5,12 12 | Bahrain (BRN),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1 13 | Barbados (BAR) [BAR],11,0,0,1,1,0,0,0,0,0,11,0,0,1,1 14 | Belarus (BLR),5,12,24,39,75,6,6,4,5,15,11,18,28,44,90 15 | Belgium (BEL),25,37,52,53,142,20,1,1,3,5,45,38,53,56,147 16 | Bermuda (BER),17,0,0,1,1,7,0,0,0,0,24,0,0,1,1 17 | Bohemia (BOH) [BOH] [Z],3,0,1,3,4,0,0,0,0,0,3,0,1,3,4 18 | Botswana (BOT),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1 19 | Brazil (BRA),21,23,30,55,108,7,0,0,0,0,28,23,30,55,108 20 | British West Indies (BWI) [BWI],1,0,0,2,2,0,0,0,0,0,1,0,0,2,2 21 | Bulgaria (BUL) [H],19,51,85,78,214,19,1,2,3,6,38,52,87,81,220 22 | Burundi (BDI),5,1,0,0,1,0,0,0,0,0,5,1,0,0,1 23 | Cameroon (CMR),13,3,1,1,5,1,0,0,0,0,14,3,1,1,5 24 | Canada (CAN),25,59,99,121,279,22,62,56,52,170,47,121,155,173,449 25 | Chile (CHI) [I],22,2,7,4,13,16,0,0,0,0,38,2,7,4,13 26 | China (CHN) [CHN],9,201,146,126,473,10,12,22,19,53,19,213,168,145,526 27 | Colombia (COL),18,2,6,11,19,1,0,0,0,0,19,2,6,11,19 28 | Costa Rica (CRC),14,1,1,2,4,6,0,0,0,0,20,1,1,2,4 29 | Ivory Coast (CIV) [CIV],12,0,1,0,1,0,0,0,0,0,12,0,1,0,1 30 | Croatia (CRO),6,6,7,10,23,7,4,6,1,11,13,10,13,11,34 31 | Cuba (CUB) [Z],19,72,67,70,209,0,0,0,0,0,19,72,67,70,209 32 | Cyprus (CYP),9,0,1,0,1,10,0,0,0,0,19,0,1,0,1 33 | Czech Republic (CZE) [CZE],5,14,15,15,44,6,7,9,8,24,11,21,24,23,68 34 | Czechoslovakia (TCH) [TCH],16,49,49,45,143,16,2,8,15,25,32,51,57,60,168 35 | Denmark (DEN) [Z],26,43,68,68,179,13,0,1,0,1,39,43,69,68,180 36 | Djibouti (DJI) [B],7,0,0,1,1,0,0,0,0,0,7,0,0,1,1 37 | Dominican Republic (DOM),13,3,2,1,6,0,0,0,0,0,13,3,2,1,6 38 | Ecuador (ECU),13,1,1,0,2,0,0,0,0,0,13,1,1,0,2 39 | Egypt (EGY) [EGY] [Z],21,7,9,10,26,1,0,0,0,0,22,7,9,10,26 40 | Eritrea (ERI),4,0,0,1,1,0,0,0,0,0,4,0,0,1,1 41 | Estonia (EST),11,9,9,15,33,9,4,2,1,7,20,13,11,16,40 42 | Ethiopia (ETH),12,21,7,17,45,2,0,0,0,0,14,21,7,17,45 43 | Finland (FIN),24,101,84,117,302,22,42,62,57,161,46,143,146,174,463 44 | France (FRA) [O] [P] [Z],27,202,223,246,671,22,31,31,47,109,49,233,254,293,780 45 | Gabon (GAB),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1 46 | Georgia (GEO),5,6,5,14,25,6,0,0,0,0,11,6,5,14,25 47 | Germany (GER) [GER] [Z],15,174,182,217,573,11,78,78,53,209,26,252,260,270,782 48 | United Team of Germany (EUA) [EUA],3,28,54,36,118,3,8,6,5,19,6,36,60,41,137 49 | East Germany (GDR) [GDR],5,153,129,127,409,6,39,36,35,110,11,192,165,162,519 50 | West Germany (FRG) [FRG],5,56,67,81,204,6,11,15,13,39,11,67,82,94,243 51 | Ghana (GHA) [GHA],13,0,1,3,4,1,0,0,0,0,14,0,1,3,4 52 | Great Britain (GBR) [GBR] [Z],27,236,272,272,780,22,10,4,12,26,49,246,276,284,806 53 | Greece (GRE) [Z],27,30,42,39,111,18,0,0,0,0,45,30,42,39,111 54 | Grenada (GRN),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1 55 | Guatemala (GUA),13,0,1,0,1,1,0,0,0,0,14,0,1,0,1 56 | Guyana (GUY) [GUY],16,0,0,1,1,0,0,0,0,0,16,0,0,1,1 57 | Haiti (HAI) [J],14,0,1,1,2,0,0,0,0,0,14,0,1,1,2 58 | Hong Kong (HKG) [HKG],15,1,1,1,3,4,0,0,0,0,19,1,1,1,3 59 | Hungary (HUN),25,167,144,165,476,22,0,2,4,6,47,167,146,169,482 60 | Iceland (ISL),19,0,2,2,4,17,0,0,0,0,36,0,2,2,4 61 | India (IND) [F],23,9,6,11,26,9,0,0,0,0,32,9,6,11,26 62 | Indonesia (INA),14,6,10,11,27,0,0,0,0,0,14,6,10,11,27 63 | Iran (IRI) [K],15,15,20,25,60,10,0,0,0,0,25,15,20,25,60 64 | Iraq (IRQ),13,0,0,1,1,0,0,0,0,0,13,0,0,1,1 65 | Ireland (IRL),20,9,8,12,29,6,0,0,0,0,26,9,8,12,29 66 | Israel (ISR),15,1,1,5,7,6,0,0,0,0,21,1,1,5,7 67 | Italy (ITA) [M] [S],26,198,166,185,549,22,37,34,43,114,48,235,200,228,663 68 | Jamaica (JAM) [JAM],16,17,30,20,67,7,0,0,0,0,23,17,30,20,67 69 | Japan (JPN),21,130,126,142,398,20,10,17,18,45,41,140,143,160,443 70 | Kazakhstan (KAZ),5,16,17,19,52,6,1,3,3,7,11,17,20,22,59 71 | Kenya (KEN),13,25,32,29,86,3,0,0,0,0,16,25,32,29,86 72 | North Korea (PRK),9,14,12,21,47,8,0,1,1,2,17,14,13,22,49 73 | South Korea (KOR),16,81,82,80,243,17,26,17,10,53,33,107,99,90,296 74 | Kuwait (KUW),12,0,0,2,2,0,0,0,0,0,12,0,0,2,2 75 | Kyrgyzstan (KGZ),5,0,1,2,3,6,0,0,0,0,11,0,1,2,3 76 | Latvia (LAT),10,3,11,5,19,10,0,4,3,7,20,3,15,8,26 77 | Lebanon (LIB),16,0,2,2,4,16,0,0,0,0,32,0,2,2,4 78 | Liechtenstein (LIE),16,0,0,0,0,18,2,2,5,9,34,2,2,5,9 79 | Lithuania (LTU),8,6,5,10,21,8,0,0,0,0,16,6,5,10,21 80 | Luxembourg (LUX) [O],22,1,1,0,2,8,0,2,0,2,30,1,3,0,4 81 | Macedonia (MKD),5,0,0,1,1,5,0,0,0,0,10,0,0,1,1 82 | Malaysia (MAS) [MAS],12,0,3,3,6,0,0,0,0,0,12,0,3,3,6 83 | Mauritius (MRI),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1 84 | Mexico (MEX),22,13,21,28,62,8,0,0,0,0,30,13,21,28,62 85 | Moldova (MDA),5,0,2,5,7,6,0,0,0,0,11,0,2,5,7 86 | Mongolia (MGL),12,2,9,13,24,13,0,0,0,0,25,2,9,13,24 87 | Montenegro (MNE),2,0,1,0,1,2,0,0,0,0,4,0,1,0,1 88 | Morocco (MAR),13,6,5,11,22,6,0,0,0,0,19,6,5,11,22 89 | Mozambique (MOZ),9,1,0,1,2,0,0,0,0,0,9,1,0,1,2 90 | Namibia (NAM),6,0,4,0,4,0,0,0,0,0,6,0,4,0,4 91 | Netherlands (NED) [Z],25,77,85,104,266,20,37,38,35,110,45,114,123,139,376 92 | Netherlands Antilles (AHO) [AHO] [I],13,0,1,0,1,2,0,0,0,0,15,0,1,0,1 93 | New Zealand (NZL) [NZL],22,42,18,39,99,15,0,1,0,1,37,42,19,39,100 94 | Niger (NIG),11,0,0,1,1,0,0,0,0,0,11,0,0,1,1 95 | Nigeria (NGR),15,3,8,12,23,0,0,0,0,0,15,3,8,12,23 96 | Norway (NOR) [Q],24,56,49,43,148,22,118,111,100,329,46,174,160,143,477 97 | Pakistan (PAK),16,3,3,4,10,2,0,0,0,0,18,3,3,4,10 98 | Panama (PAN),16,1,0,2,3,0,0,0,0,0,16,1,0,2,3 99 | Paraguay (PAR),11,0,1,0,1,1,0,0,0,0,12,0,1,0,1 100 | Peru (PER) [L],17,1,3,0,4,2,0,0,0,0,19,1,3,0,4 101 | Philippines (PHI),20,0,2,7,9,4,0,0,0,0,24,0,2,7,9 102 | Poland (POL),20,64,82,125,271,22,6,7,7,20,42,70,89,132,291 103 | Portugal (POR),23,4,8,11,23,7,0,0,0,0,30,4,8,11,23 104 | Puerto Rico (PUR),17,0,2,6,8,6,0,0,0,0,23,0,2,6,8 105 | Qatar (QAT),8,0,0,4,4,0,0,0,0,0,8,0,0,4,4 106 | Romania (ROU),20,88,94,119,301,20,0,0,1,1,40,88,94,120,302 107 | Russia (RUS) [RUS],5,132,121,142,395,6,49,40,35,124,11,181,161,177,519 108 | Russian Empire (RU1) [RU1],3,1,4,3,8,0,0,0,0,0,3,1,4,3,8 109 | Soviet Union (URS) [URS],9,395,319,296,1010,9,78,57,59,194,18,473,376,355,1204 110 | Unified Team (EUN) [EUN],1,45,38,29,112,1,9,6,8,23,2,54,44,37,135 111 | Saudi Arabia (KSA),10,0,1,2,3,0,0,0,0,0,10,0,1,2,3 112 | Senegal (SEN),13,0,1,0,1,5,0,0,0,0,18,0,1,0,1 113 | Serbia (SRB) [SRB],3,1,2,4,7,2,0,0,0,0,5,1,2,4,7 114 | Serbia and Montenegro (SCG) [SCG],3,2,4,3,9,3,0,0,0,0,6,2,4,3,9 115 | Singapore (SIN),15,0,2,2,4,0,0,0,0,0,15,0,2,2,4 116 | Slovakia (SVK) [SVK],5,7,9,8,24,6,2,2,1,5,11,9,11,9,29 117 | Slovenia (SLO),6,4,6,9,19,7,2,4,9,15,13,6,10,18,34 118 | South Africa (RSA),18,23,26,27,76,6,0,0,0,0,24,23,26,27,76 119 | Spain (ESP) [Z],22,37,59,35,131,19,1,0,1,2,41,38,59,36,133 120 | Sri Lanka (SRI) [SRI],16,0,2,0,2,0,0,0,0,0,16,0,2,0,2 121 | Sudan (SUD),11,0,1,0,1,0,0,0,0,0,11,0,1,0,1 122 | Suriname (SUR) [E],11,1,0,1,2,0,0,0,0,0,11,1,0,1,2 123 | Sweden (SWE) [Z],26,143,164,176,483,22,50,40,54,144,48,193,204,230,627 124 | Switzerland (SUI),27,47,73,65,185,22,50,40,48,138,49,97,113,113,323 125 | Syria (SYR),12,1,1,1,3,0,0,0,0,0,12,1,1,1,3 126 | Chinese Taipei (TPE) [TPE] [TPE2],13,2,7,12,21,11,0,0,0,0,24,2,7,12,21 127 | Tajikistan (TJK),5,0,1,2,3,4,0,0,0,0,9,0,1,2,3 128 | Tanzania (TAN) [TAN],12,0,2,0,2,0,0,0,0,0,12,0,2,0,2 129 | Thailand (THA),15,7,6,11,24,3,0,0,0,0,18,7,6,11,24 130 | Togo (TOG),9,0,0,1,1,1,0,0,0,0,10,0,0,1,1 131 | Tonga (TGA),8,0,1,0,1,1,0,0,0,0,9,0,1,0,1 132 | Trinidad and Tobago (TRI) [TRI],16,2,5,11,18,3,0,0,0,0,19,2,5,11,18 133 | Tunisia (TUN),13,3,3,4,10,0,0,0,0,0,13,3,3,4,10 134 | Turkey (TUR),21,39,25,24,88,16,0,0,0,0,37,39,25,24,88 135 | Uganda (UGA),14,2,3,2,7,0,0,0,0,0,14,2,3,2,7 136 | Ukraine (UKR),5,33,27,55,115,6,2,1,4,7,11,35,28,59,122 137 | United Arab Emirates (UAE),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1 138 | United States (USA) [P] [Q] [R] [Z],26,976,757,666,2399,22,96,102,84,282,48,1072,859,750,2681 139 | Uruguay (URU),20,2,2,6,10,1,0,0,0,0,21,2,2,6,10 140 | Uzbekistan (UZB),5,5,5,10,20,6,1,0,0,1,11,6,5,10,21 141 | Venezuela (VEN),17,2,2,8,12,4,0,0,0,0,21,2,2,8,12 142 | Vietnam (VIE),14,0,2,0,2,0,0,0,0,0,14,0,2,0,2 143 | Virgin Islands (ISV),11,0,1,0,1,7,0,0,0,0,18,0,1,0,1 144 | Yugoslavia (YUG) [YUG],16,26,29,28,83,14,0,3,1,4,30,26,32,29,87 145 | Independent Olympic Participants (IOP) [IOP],1,0,1,2,3,0,0,0,0,0,1,0,1,2,3 146 | Zambia (ZAM) [ZAM],12,0,1,1,2,0,0,0,0,0,12,0,1,1,2 147 | Zimbabwe (ZIM) [ZIM],12,3,4,1,8,1,0,0,0,0,13,3,4,1,8 148 | Mixed team (ZZX) [ZZX],3,8,5,4,17,0,0,0,0,0,3,8,5,4,17 149 | Totals,27,4809,4775,5130,14714,22,959,958,948,2865,49,5768,5733,6078,17579 150 | -------------------------------------------------------------------------------- /MyNotebooks/scimagojr-3.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/MyNotebooks/scimagojr-3.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intro-to-Data-Science-in-Python 2 | Repo for the first course of the Applied Data Science with Python Specialization taught by University of Michigan hosted by Coursera 3 | -------------------------------------------------------------------------------- /course1_downloads/.ipynb_checkpoints/Assignment 2-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "---\n", 8 | "\n", 9 | "_You are currently looking at **version 1.1** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n", 10 | "\n", 11 | "---" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Assignment 2 - Pandas Introduction\n", 19 | "All questions are weighted the same in this assignment.\n", 20 | "## Part 1\n", 21 | "The following code loads the olympics dataset (olympics.csv), which was derrived from the Wikipedia entry on [All Time Olympic Games Medals](https://en.wikipedia.org/wiki/All-time_Olympic_Games_medal_table), and does some basic data cleaning. \n", 22 | "\n", 23 | "The columns are organized as # of Summer games, Summer medals, # of Winter games, Winter medals, total # number of games, total # of medals. Use this dataset to answer the questions below." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "collapsed": false, 31 | "nbgrader": { 32 | "grade": false, 33 | "grade_id": "1", 34 | "locked": false, 35 | "solution": false 36 | } 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "import pandas as pd\n", 41 | "\n", 42 | "df = pd.read_csv('olympics.csv', index_col=0, skiprows=1)\n", 43 | "\n", 44 | "for col in df.columns:\n", 45 | " if col[:2]=='01':\n", 46 | " df.rename(columns={col:'Gold'+col[4:]}, inplace=True)\n", 47 | " if col[:2]=='02':\n", 48 | " df.rename(columns={col:'Silver'+col[4:]}, inplace=True)\n", 49 | " if col[:2]=='03':\n", 50 | " df.rename(columns={col:'Bronze'+col[4:]}, inplace=True)\n", 51 | " if col[:1]=='№':\n", 52 | " df.rename(columns={col:'#'+col[1:]}, inplace=True)\n", 53 | "\n", 54 | "names_ids = df.index.str.split('\\s\\(') # split the index by '('\n", 55 | "\n", 56 | "df.index = names_ids.str[0] # the [0] element is the country name (new index) \n", 57 | "df['ID'] = names_ids.str[1].str[:3] # the [1] element is the abbreviation or ID (take first 3 characters from that)\n", 58 | "\n", 59 | "df = df.drop('Totals')\n", 60 | "df.head()" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "### Question 0 (Example)\n", 68 | "\n", 69 | "What is the first country in df?\n", 70 | "\n", 71 | "*This function should return a Series.*" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "collapsed": false 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "# You should write your whole answer within the function provided. The autograder will call\n", 83 | "# this function and compare the return value against the correct solution value\n", 84 | "def answer_zero():\n", 85 | " # This function returns the row for Afghanistan, which is a Series object. The assignment\n", 86 | " # question description will tell you the general format the autograder is expecting\n", 87 | " return df.iloc[0]\n", 88 | "\n", 89 | "# You can examine what your function returns by calling it in the cell. If you have questions\n", 90 | "# about the assignment formats, check out the discussion forums for any FAQs\n", 91 | "answer_zero() " 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "### Question 1\n", 99 | "Which country has won the most gold medals in summer games?\n", 100 | "\n", 101 | "*This function should return a single string value.*" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": false, 109 | "nbgrader": { 110 | "grade": false, 111 | "locked": false, 112 | "solution": false 113 | } 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "def answer_one():\n", 118 | " return \"YOUR ANSWER HERE\"" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "### Question 2\n", 126 | "Which country had the biggest difference between their summer and winter gold medal counts?\n", 127 | "\n", 128 | "*This function should return a single string value.*" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "def answer_two():\n", 140 | " return \"YOUR ANSWER HERE\"" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "### Question 3\n", 148 | "Which country has the biggest difference between their summer gold medal counts and winter gold medal counts relative to their total gold medal count? \n", 149 | "\n", 150 | "$$\\frac{Summer~Gold - Winter~Gold}{Total~Gold}$$\n", 151 | "\n", 152 | "Only include countries that have won at least 1 gold in both summer and winter.\n", 153 | "\n", 154 | "*This function should return a single string value.*" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "collapsed": true 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "def answer_three():\n", 166 | " return \"YOUR ANSWER HERE\"" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "### Question 4\n", 174 | "Write a function to update the dataframe to include a new column called \"Points\" which is a weighted value where each gold medal counts for 3 points, silver medals for 2 points, and bronze mdeals for 1 point. The function should return only the column (a Series object) which you created.\n", 175 | "\n", 176 | "*This function should return a Series named `Points` of length 146*" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": { 183 | "collapsed": true 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "def answer_four():\n", 188 | " return \"YOUR ANSWER HERE\"" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "## Part 2\n", 196 | "For the next set of questions, we will be using census data from the [United States Census Bureau](http://www.census.gov/popest/data/counties/totals/2015/CO-EST2015-alldata.html). Counties are political and geographic subdivisions of states in the United States. This dataset contains population data for counties and states in the US from 2010 to 2015. [See this document](http://www.census.gov/popest/data/counties/totals/2015/files/CO-EST2015-alldata.pdf) for a description of the variable names.\n", 197 | "\n", 198 | "The census dataset (census.csv) should be loaded as census_df. Answer questions using this as appropriate.\n", 199 | "\n", 200 | "### Question 5\n", 201 | "Which state has the most counties in it? (hint: consider the sumlevel key carefully! You'll need this for future questions too...)\n", 202 | "\n", 203 | "*This function should return a single string value.*" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": { 210 | "collapsed": true 211 | }, 212 | "outputs": [], 213 | "source": [ 214 | "census_df = pd.read_csv('census.csv')\n", 215 | "census_df.head()" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": { 222 | "collapsed": true 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "def answer_five():\n", 227 | " return \"YOUR ANSWER HERE\"" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "### Question 6\n", 235 | "Only looking at the three most populous counties for each state, what are the three most populous states (in order of highest population to lowest population)?\n", 236 | "\n", 237 | "*This function should return a list of string values.*" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": { 244 | "collapsed": true 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "def answer_six():\n", 249 | " return \"YOUR ANSWER HERE\"" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "### Question 7\n", 257 | "Which county has had the largest absolute change in population within the period 2010-2015? (Hint: population values are stored in columns POPESTIMATE2010 through POPESTIMATE2015, you need to consider all six columns.)\n", 258 | "\n", 259 | "e.g. If County Population in the 5 year period is 100, 120, 80, 105, 100, 130, then its largest change in the period would be |130-80| = 50.\n", 260 | "\n", 261 | "*This function should return a single string value.*" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": { 268 | "collapsed": true 269 | }, 270 | "outputs": [], 271 | "source": [ 272 | "def answer_seven():\n", 273 | " return \"YOUR ANSWER HERE\"" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "### Question 8\n", 281 | "In this datafile, the United States is broken up into four regions using the \"REGION\" column. \n", 282 | "\n", 283 | "Create a query that finds the counties that belong to regions 1 or 2, whose name starts with 'Washington', and whose POPESTIMATE2015 was greater than their POPESTIMATE 2014.\n", 284 | "\n", 285 | "*This function should return a 5x2 DataFrame with the columns = ['STNAME', 'CTYNAME'] and the same index ID as the census_df (sorted ascending by index).*" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": { 292 | "collapsed": false 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "def answer_eight():\n", 297 | " return \"YOUR ANSWER HERE\"" 298 | ] 299 | } 300 | ], 301 | "metadata": { 302 | "anaconda-cloud": {}, 303 | "coursera": { 304 | "course_slug": "python-data-analysis", 305 | "graded_item_id": "tHmgx", 306 | "launcher_item_id": "Um6Bz", 307 | "part_id": "OQsnr" 308 | }, 309 | "kernelspec": { 310 | "display_name": "Python [default]", 311 | "language": "python", 312 | "name": "python3" 313 | }, 314 | "language_info": { 315 | "codemirror_mode": { 316 | "name": "ipython", 317 | "version": 3 318 | }, 319 | "file_extension": ".py", 320 | "mimetype": "text/x-python", 321 | "name": "python", 322 | "nbconvert_exporter": "python", 323 | "pygments_lexer": "ipython3", 324 | "version": "3.5.2" 325 | } 326 | }, 327 | "nbformat": 4, 328 | "nbformat_minor": 0 329 | } 330 | -------------------------------------------------------------------------------- /course1_downloads/Assignment 2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "---\n", 8 | "\n", 9 | "_You are currently looking at **version 1.1** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n", 10 | "\n", 11 | "---" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Assignment 2 - Pandas Introduction\n", 19 | "All questions are weighted the same in this assignment.\n", 20 | "## Part 1\n", 21 | "The following code loads the olympics dataset (olympics.csv), which was derrived from the Wikipedia entry on [All Time Olympic Games Medals](https://en.wikipedia.org/wiki/All-time_Olympic_Games_medal_table), and does some basic data cleaning. \n", 22 | "\n", 23 | "The columns are organized as # of Summer games, Summer medals, # of Winter games, Winter medals, total # number of games, total # of medals. Use this dataset to answer the questions below." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "collapsed": false, 31 | "nbgrader": { 32 | "grade": false, 33 | "grade_id": "1", 34 | "locked": false, 35 | "solution": false 36 | } 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "import pandas as pd\n", 41 | "\n", 42 | "df = pd.read_csv('olympics.csv', index_col=0, skiprows=1)\n", 43 | "\n", 44 | "for col in df.columns:\n", 45 | " if col[:2]=='01':\n", 46 | " df.rename(columns={col:'Gold'+col[4:]}, inplace=True)\n", 47 | " if col[:2]=='02':\n", 48 | " df.rename(columns={col:'Silver'+col[4:]}, inplace=True)\n", 49 | " if col[:2]=='03':\n", 50 | " df.rename(columns={col:'Bronze'+col[4:]}, inplace=True)\n", 51 | " if col[:1]=='№':\n", 52 | " df.rename(columns={col:'#'+col[1:]}, inplace=True)\n", 53 | "\n", 54 | "names_ids = df.index.str.split('\\s\\(') # split the index by '('\n", 55 | "\n", 56 | "df.index = names_ids.str[0] # the [0] element is the country name (new index) \n", 57 | "df['ID'] = names_ids.str[1].str[:3] # the [1] element is the abbreviation or ID (take first 3 characters from that)\n", 58 | "\n", 59 | "df = df.drop('Totals')\n", 60 | "df.head()" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "### Question 0 (Example)\n", 68 | "\n", 69 | "What is the first country in df?\n", 70 | "\n", 71 | "*This function should return a Series.*" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "collapsed": false 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "# You should write your whole answer within the function provided. The autograder will call\n", 83 | "# this function and compare the return value against the correct solution value\n", 84 | "def answer_zero():\n", 85 | " # This function returns the row for Afghanistan, which is a Series object. The assignment\n", 86 | " # question description will tell you the general format the autograder is expecting\n", 87 | " return df.iloc[0]\n", 88 | "\n", 89 | "# You can examine what your function returns by calling it in the cell. If you have questions\n", 90 | "# about the assignment formats, check out the discussion forums for any FAQs\n", 91 | "answer_zero() " 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "### Question 1\n", 99 | "Which country has won the most gold medals in summer games?\n", 100 | "\n", 101 | "*This function should return a single string value.*" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": false, 109 | "nbgrader": { 110 | "grade": false, 111 | "locked": false, 112 | "solution": false 113 | } 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "def answer_one():\n", 118 | " return \"YOUR ANSWER HERE\"" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "### Question 2\n", 126 | "Which country had the biggest difference between their summer and winter gold medal counts?\n", 127 | "\n", 128 | "*This function should return a single string value.*" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "def answer_two():\n", 140 | " return \"YOUR ANSWER HERE\"" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "### Question 3\n", 148 | "Which country has the biggest difference between their summer gold medal counts and winter gold medal counts relative to their total gold medal count? \n", 149 | "\n", 150 | "$$\\frac{Summer~Gold - Winter~Gold}{Total~Gold}$$\n", 151 | "\n", 152 | "Only include countries that have won at least 1 gold in both summer and winter.\n", 153 | "\n", 154 | "*This function should return a single string value.*" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "collapsed": true 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "def answer_three():\n", 166 | " return \"YOUR ANSWER HERE\"" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "### Question 4\n", 174 | "Write a function to update the dataframe to include a new column called \"Points\" which is a weighted value where each gold medal counts for 3 points, silver medals for 2 points, and bronze mdeals for 1 point. The function should return only the column (a Series object) which you created.\n", 175 | "\n", 176 | "*This function should return a Series named `Points` of length 146*" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": { 183 | "collapsed": true 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "def answer_four():\n", 188 | " return \"YOUR ANSWER HERE\"" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "## Part 2\n", 196 | "For the next set of questions, we will be using census data from the [United States Census Bureau](http://www.census.gov/popest/data/counties/totals/2015/CO-EST2015-alldata.html). Counties are political and geographic subdivisions of states in the United States. This dataset contains population data for counties and states in the US from 2010 to 2015. [See this document](http://www.census.gov/popest/data/counties/totals/2015/files/CO-EST2015-alldata.pdf) for a description of the variable names.\n", 197 | "\n", 198 | "The census dataset (census.csv) should be loaded as census_df. Answer questions using this as appropriate.\n", 199 | "\n", 200 | "### Question 5\n", 201 | "Which state has the most counties in it? (hint: consider the sumlevel key carefully! You'll need this for future questions too...)\n", 202 | "\n", 203 | "*This function should return a single string value.*" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": { 210 | "collapsed": true 211 | }, 212 | "outputs": [], 213 | "source": [ 214 | "census_df = pd.read_csv('census.csv')\n", 215 | "census_df.head()" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": { 222 | "collapsed": true 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "def answer_five():\n", 227 | " return \"YOUR ANSWER HERE\"" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "### Question 6\n", 235 | "Only looking at the three most populous counties for each state, what are the three most populous states (in order of highest population to lowest population)?\n", 236 | "\n", 237 | "*This function should return a list of string values.*" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": { 244 | "collapsed": true 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "def answer_six():\n", 249 | " return \"YOUR ANSWER HERE\"" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "### Question 7\n", 257 | "Which county has had the largest absolute change in population within the period 2010-2015? (Hint: population values are stored in columns POPESTIMATE2010 through POPESTIMATE2015, you need to consider all six columns.)\n", 258 | "\n", 259 | "e.g. If County Population in the 5 year period is 100, 120, 80, 105, 100, 130, then its largest change in the period would be |130-80| = 50.\n", 260 | "\n", 261 | "*This function should return a single string value.*" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": { 268 | "collapsed": true 269 | }, 270 | "outputs": [], 271 | "source": [ 272 | "def answer_seven():\n", 273 | " return \"YOUR ANSWER HERE\"" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "### Question 8\n", 281 | "In this datafile, the United States is broken up into four regions using the \"REGION\" column. \n", 282 | "\n", 283 | "Create a query that finds the counties that belong to regions 1 or 2, whose name starts with 'Washington', and whose POPESTIMATE2015 was greater than their POPESTIMATE 2014.\n", 284 | "\n", 285 | "*This function should return a 5x2 DataFrame with the columns = ['STNAME', 'CTYNAME'] and the same index ID as the census_df (sorted ascending by index).*" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": { 292 | "collapsed": false 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "def answer_eight():\n", 297 | " return \"YOUR ANSWER HERE\"" 298 | ] 299 | } 300 | ], 301 | "metadata": { 302 | "anaconda-cloud": {}, 303 | "coursera": { 304 | "course_slug": "python-data-analysis", 305 | "graded_item_id": "tHmgx", 306 | "launcher_item_id": "Um6Bz", 307 | "part_id": "OQsnr" 308 | }, 309 | "kernelspec": { 310 | "display_name": "Python [default]", 311 | "language": "python", 312 | "name": "python3" 313 | }, 314 | "language_info": { 315 | "codemirror_mode": { 316 | "name": "ipython", 317 | "version": 3 318 | }, 319 | "file_extension": ".py", 320 | "mimetype": "text/x-python", 321 | "name": "python", 322 | "nbconvert_exporter": "python", 323 | "pygments_lexer": "ipython3", 324 | "version": "3.5.2" 325 | } 326 | }, 327 | "nbformat": 4, 328 | "nbformat_minor": 0 329 | } 330 | -------------------------------------------------------------------------------- /course1_downloads/Assignment 4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "---\n", 8 | "\n", 9 | "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n", 10 | "\n", 11 | "---" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import pandas as pd\n", 23 | "import numpy as np\n", 24 | "from scipy.stats import ttest_ind" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "# Assignment 4 - Hypothesis Testing\n", 32 | "This assignment requires more individual learning than previous assignments - you are encouraged to check out the [pandas documentation](http://pandas.pydata.org/pandas-docs/stable/) to find functions or methods you might not have used yet, or ask questions on [Stack Overflow](http://stackoverflow.com/) and tag them as pandas and python related. And of course, the discussion forums are open for interaction with your peers and the course staff.\n", 33 | "\n", 34 | "Definitions:\n", 35 | "* A _quarter_ is a specific three month period, Q1 is January through March, Q2 is April through June, Q3 is July through September, Q4 is October through December.\n", 36 | "* A _recession_ is defined as starting with two consecutive quarters of GDP decline, and ending with two consecutive quarters of GDP growth.\n", 37 | "* A _recession bottom_ is the quarter within a recession which had the lowest GDP.\n", 38 | "* A _university town_ is a city which has a high percentage of university students compared to the total population of the city.\n", 39 | "\n", 40 | "**Hypothesis**: University towns have their mean housing prices less effected by recessions. Run a t-test to compare the ratio of the mean price of houses in university towns the quarter before the recession starts compared to the recession bottom. (`price_ratio=quarter_before_recession/recession_bottom`)\n", 41 | "\n", 42 | "The following data files are available for this assignment:\n", 43 | "* From the [Zillow research data site](http://www.zillow.com/research/data/) there is housing data for the United States. In particular the datafile for [all homes at a city level](http://files.zillowstatic.com/research/public/City/City_Zhvi_AllHomes.csv), ```City_Zhvi_AllHomes.csv```, has median home sale prices at a fine grained level.\n", 44 | "* From the Wikipedia page on college towns is a list of [university towns in the United States](https://en.wikipedia.org/wiki/List_of_college_towns#College_towns_in_the_United_States) which has been copy and pasted into the file ```university_towns.txt```.\n", 45 | "* From Bureau of Economic Analysis, US Department of Commerce, the [GDP over time](http://www.bea.gov/national/index.htm#gdp) of the United States in current dollars (use the chained value in 2009 dollars), in quarterly intervals, in the file ```gdplev.xls```. For this assignment, only look at GDP data from the first quarter of 2000 onward.\n", 46 | "\n", 47 | "Each function in this assignment below is worth 10%, with the exception of ```run_ttest()```, which is worth 50%." 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": { 54 | "collapsed": false 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "# Use this dictionary to map state names to two letter acronyms\n", 59 | "states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 'ND': 'North Dakota', 'VA': 'Virginia'}" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "collapsed": false 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "def get_list_of_university_towns():\n", 71 | " '''Returns a DataFrame of towns and the states they are in from the \n", 72 | " university_towns.txt list. The format of the DataFrame should be:\n", 73 | " DataFrame( [ [\"Michigan\",\"Ann Arbor\"], [\"Michigan\", \"Yipsilanti\"] ], \n", 74 | " columns=[\"State\",\"RegionName\"] )'''\n", 75 | " \n", 76 | " return \"ANSWER\"" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "collapsed": false 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "def get_recession_start():\n", 88 | " '''Returns the year and quarter of the recession start time as a \n", 89 | " string value in a format such as 2005q3'''\n", 90 | " \n", 91 | " return \"ANSWER\"" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "def get_recession_end():\n", 103 | " '''Returns the year and quarter of the recession end time as a \n", 104 | " string value in a format such as 2005q3'''\n", 105 | " \n", 106 | " return \"ANSWER\"" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "def get_recession_bottom():\n", 118 | " '''Returns the year and quarter of the recession bottom time as a \n", 119 | " string value in a format such as 2005q3'''\n", 120 | " \n", 121 | " return \"ANSWER\"" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "def convert_housing_data_to_quarters():\n", 133 | " '''Converts the housing data to quarters and returns it as mean \n", 134 | " values in a dataframe. This dataframe should be a dataframe with\n", 135 | " columns for 2000q1 through 2016q3, and should have a multi-index\n", 136 | " in the shape of [\"State\",\"RegionName\"].\n", 137 | " \n", 138 | " Note: Quarters are defined in the assignment description, they are\n", 139 | " not arbitrary three month periods.\n", 140 | " \n", 141 | " The resulting dataframe should have 67 columns, and 10,730 rows.\n", 142 | " '''\n", 143 | " \n", 144 | " return \"ANSWER\"" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "def run_ttest():\n", 156 | " '''First creates new data showing the decline or growth of housing prices\n", 157 | " between the recession start and the recession bottom. Then runs a ttest\n", 158 | " comparing the university town values to the non-university towns values, \n", 159 | " return whether the alternative hypothesis (that the two groups are the same)\n", 160 | " is true or not as well as the p-value of the confidence. \n", 161 | " \n", 162 | " Return the tuple (different, p, better) where different=True if the t-test is\n", 163 | " True at a p<0.01 (we reject the null hypothesis), or different=False if \n", 164 | " otherwise (we cannot reject the null hypothesis). The variable p should\n", 165 | " be equal to the exact p value returned from scipy.stats.ttest_ind(). The\n", 166 | " value for better should be either \"university town\" or \"non-university town\"\n", 167 | " depending on which has a lower mean price ratio (which is equivilent to a\n", 168 | " reduced market loss).'''\n", 169 | " \n", 170 | " return \"ANSWER\"" 171 | ] 172 | } 173 | ], 174 | "metadata": { 175 | "coursera": { 176 | "course_slug": "python-data-analysis", 177 | "graded_item_id": "Il9Fx", 178 | "launcher_item_id": "TeDW0", 179 | "part_id": "WGlun" 180 | }, 181 | "kernelspec": { 182 | "display_name": "Python 3", 183 | "language": "python", 184 | "name": "python3" 185 | }, 186 | "language_info": { 187 | "codemirror_mode": { 188 | "name": "ipython", 189 | "version": 3 190 | }, 191 | "file_extension": ".py", 192 | "mimetype": "text/x-python", 193 | "name": "python", 194 | "nbconvert_exporter": "python", 195 | "pygments_lexer": "ipython3", 196 | "version": "3.5.2" 197 | } 198 | }, 199 | "nbformat": 4, 200 | "nbformat_minor": 0 201 | } 202 | -------------------------------------------------------------------------------- /course1_downloads/Assignment+2.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # --- 5 | # 6 | # _You are currently looking at **version 1.1** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._ 7 | # 8 | # --- 9 | 10 | # # Assignment 2 - Pandas Introduction 11 | # All questions are weighted the same in this assignment. 12 | # ## Part 1 13 | # The following code loads the olympics dataset (olympics.csv), which was derrived from the Wikipedia entry on [All Time Olympic Games Medals](https://en.wikipedia.org/wiki/All-time_Olympic_Games_medal_table), and does some basic data cleaning. 14 | # 15 | # The columns are organized as # of Summer games, Summer medals, # of Winter games, Winter medals, total # number of games, total # of medals. Use this dataset to answer the questions below. 16 | 17 | # In[ ]: 18 | 19 | import pandas as pd 20 | 21 | df = pd.read_csv('olympics.csv', index_col=0, skiprows=1) 22 | 23 | for col in df.columns: 24 | if col[:2]=='01': 25 | df.rename(columns={col:'Gold'+col[4:]}, inplace=True) 26 | if col[:2]=='02': 27 | df.rename(columns={col:'Silver'+col[4:]}, inplace=True) 28 | if col[:2]=='03': 29 | df.rename(columns={col:'Bronze'+col[4:]}, inplace=True) 30 | if col[:1]=='№': 31 | df.rename(columns={col:'#'+col[1:]}, inplace=True) 32 | 33 | names_ids = df.index.str.split('\s\(') # split the index by '(' 34 | 35 | df.index = names_ids.str[0] # the [0] element is the country name (new index) 36 | df['ID'] = names_ids.str[1].str[:3] # the [1] element is the abbreviation or ID (take first 3 characters from that) 37 | 38 | df = df.drop('Totals') 39 | df.head() 40 | 41 | 42 | # ### Question 0 (Example) 43 | # 44 | # What is the first country in df? 45 | # 46 | # *This function should return a Series.* 47 | 48 | # In[ ]: 49 | 50 | # You should write your whole answer within the function provided. The autograder will call 51 | # this function and compare the return value against the correct solution value 52 | def answer_zero(): 53 | # This function returns the row for Afghanistan, which is a Series object. The assignment 54 | # question description will tell you the general format the autograder is expecting 55 | return df.iloc[0] 56 | 57 | # You can examine what your function returns by calling it in the cell. If you have questions 58 | # about the assignment formats, check out the discussion forums for any FAQs 59 | answer_zero() 60 | 61 | 62 | # ### Question 1 63 | # Which country has won the most gold medals in summer games? 64 | # 65 | # *This function should return a single string value.* 66 | 67 | # In[ ]: 68 | 69 | def answer_one(): 70 | return "YOUR ANSWER HERE" 71 | 72 | 73 | # ### Question 2 74 | # Which country had the biggest difference between their summer and winter gold medal counts? 75 | # 76 | # *This function should return a single string value.* 77 | 78 | # In[ ]: 79 | 80 | def answer_two(): 81 | return "YOUR ANSWER HERE" 82 | 83 | 84 | # ### Question 3 85 | # Which country has the biggest difference between their summer gold medal counts and winter gold medal counts relative to their total gold medal count? 86 | # 87 | # $$\frac{Summer~Gold - Winter~Gold}{Total~Gold}$$ 88 | # 89 | # Only include countries that have won at least 1 gold in both summer and winter. 90 | # 91 | # *This function should return a single string value.* 92 | 93 | # In[ ]: 94 | 95 | def answer_three(): 96 | return "YOUR ANSWER HERE" 97 | 98 | 99 | # ### Question 4 100 | # Write a function to update the dataframe to include a new column called "Points" which is a weighted value where each gold medal counts for 3 points, silver medals for 2 points, and bronze mdeals for 1 point. The function should return only the column (a Series object) which you created. 101 | # 102 | # *This function should return a Series named `Points` of length 146* 103 | 104 | # In[ ]: 105 | 106 | def answer_four(): 107 | return "YOUR ANSWER HERE" 108 | 109 | 110 | # ## Part 2 111 | # For the next set of questions, we will be using census data from the [United States Census Bureau](http://www.census.gov/popest/data/counties/totals/2015/CO-EST2015-alldata.html). Counties are political and geographic subdivisions of states in the United States. This dataset contains population data for counties and states in the US from 2010 to 2015. [See this document](http://www.census.gov/popest/data/counties/totals/2015/files/CO-EST2015-alldata.pdf) for a description of the variable names. 112 | # 113 | # The census dataset (census.csv) should be loaded as census_df. Answer questions using this as appropriate. 114 | # 115 | # ### Question 5 116 | # Which state has the most counties in it? (hint: consider the sumlevel key carefully! You'll need this for future questions too...) 117 | # 118 | # *This function should return a single string value.* 119 | 120 | # In[ ]: 121 | 122 | census_df = pd.read_csv('census.csv') 123 | census_df.head() 124 | 125 | 126 | # In[ ]: 127 | 128 | def answer_five(): 129 | return "YOUR ANSWER HERE" 130 | 131 | 132 | # ### Question 6 133 | # Only looking at the three most populous counties for each state, what are the three most populous states (in order of highest population to lowest population)? 134 | # 135 | # *This function should return a list of string values.* 136 | 137 | # In[ ]: 138 | 139 | def answer_six(): 140 | return "YOUR ANSWER HERE" 141 | 142 | 143 | # ### Question 7 144 | # Which county has had the largest absolute change in population within the period 2010-2015? (Hint: population values are stored in columns POPESTIMATE2010 through POPESTIMATE2015, you need to consider all six columns.) 145 | # 146 | # e.g. If County Population in the 5 year period is 100, 120, 80, 105, 100, 130, then its largest change in the period would be |130-80| = 50. 147 | # 148 | # *This function should return a single string value.* 149 | 150 | # In[ ]: 151 | 152 | def answer_seven(): 153 | return "YOUR ANSWER HERE" 154 | 155 | 156 | # ### Question 8 157 | # In this datafile, the United States is broken up into four regions using the "REGION" column. 158 | # 159 | # Create a query that finds the counties that belong to regions 1 or 2, whose name starts with 'Washington', and whose POPESTIMATE2015 was greater than their POPESTIMATE 2014. 160 | # 161 | # *This function should return a 5x2 DataFrame with the columns = ['STNAME', 'CTYNAME'] and the same index ID as the census_df (sorted ascending by index).* 162 | 163 | # In[ ]: 164 | 165 | def answer_eight(): 166 | return "YOUR ANSWER HERE" 167 | 168 | -------------------------------------------------------------------------------- /course1_downloads/Assignment+3.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # --- 5 | # 6 | # _You are currently looking at **version 1.2** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._ 7 | # 8 | # --- 9 | 10 | # # Assignment 3 - More Pandas 11 | # All questions are weighted the same in this assignment. This assignment requires more individual learning then the last one did - you are encouraged to check out the [pandas documentation](http://pandas.pydata.org/pandas-docs/stable/) to find functions or methods you might not have used yet, or ask questions on [Stack Overflow](http://stackoverflow.com/) and tag them as pandas and python related. And of course, the discussion forums are open for interaction with your peers and the course staff. 12 | 13 | # ### Question 1 (20%) 14 | # Load the energy data from the file `Energy Indicators.xls`, which is a list of indicators of [energy supply and renewable electricity production](Energy%20Indicators.xls) from the [United Nations](http://unstats.un.org/unsd/environment/excel_file_tables/2013/Energy%20Indicators.xls) for the year 2013, and should be put into a DataFrame with the variable name of **energy**. 15 | # 16 | # Keep in mind that this is an Excel file, and not a comma separated values file. Also, make sure to exclude the footer and header information from the datafile. The first two columns are unneccessary, so you should get rid of them, and you should change the column labels so that the columns are: 17 | # 18 | # `['Country', 'Energy Supply', 'Energy Supply per Capita', '% Renewable]` 19 | # 20 | # Convert `Energy Supply` to gigajoules (there are 1,000,000 gigajoules in a petajoule). For all countries which have missing data (e.g. data with "...") make sure this is reflected as `np.NaN` values. 21 | # 22 | # Rename the following list of countries (for use in later questions): 23 | # 24 | # ```"Republic of Korea": "South Korea", 25 | # "United States of America": "United States", 26 | # "United Kingdom of Great Britain and Northern Ireland": "United Kingdom", 27 | # "China, Hong Kong Special Administrative Region": "Hong Kong"``` 28 | # 29 | # There are also several countries with parenthesis in their name. Be sure to remove these, e.g. `'Bolivia (Plurinational State of)'` should be `'Bolivia'`. 30 | # 31 | #
32 | # 33 | # Next, load the GDP data from the file `world_bank.csv`, which is a csv containing countries' GDP from 1960 to 2015 from [World Bank](http://data.worldbank.org/indicator/NY.GDP.MKTP.CD). Call this DataFrame **GDP**. 34 | # 35 | # Make sure to skip the header, and rename the following list of countries: 36 | # 37 | # ```"Korea, Rep.": "South Korea", 38 | # "Iran, Islamic Rep.": "Iran", 39 | # "Hong Kong SAR, China": "Hong Kong"``` 40 | # 41 | #
42 | # 43 | # Finally, load the [Sciamgo Journal and Country Rank data for Energy Engineering and Power Technology](http://www.scimagojr.com/countryrank.php?category=2102) from the file `scimagojr-3.xlsx`, which ranks countries based on their journal contributions in the aforementioned area. Call this DataFrame **ScimEn**. 44 | # 45 | # Join the three datasets: GDP, Energy, and ScimEn into a new dataset (using the intersection of country names). Use only the last 10 years (2006-2015) of GDP data and only the top 15 countries by Scimagojr 'Rank' (Rank 1 through 15). 46 | # 47 | # The index of this DataFrame should be the name of the country, and the columns should be ['Rank', 'Documents', 'Citable documents', 'Citations', 'Self-citations', 48 | # 'Citations per document', 'H index', 'Energy Supply', 49 | # 'Energy Supply per Capita', '% Renewable', '2006', '2007', '2008', 50 | # '2009', '2010', '2011', '2012', '2013', '2014', '2015']. 51 | # 52 | # *This function should return a DataFrame with 20 columns and 15 entries.* 53 | 54 | # In[ ]: 55 | 56 | def answer_one(): 57 | return "ANSWER" 58 | 59 | 60 | # ### Question 2 (6.6%) 61 | # The previous question joined three datasets then reduced this to just the top 15 entries. When you joined the datasets, but before you reduced this to the top 15 items, how many entries did you lose? 62 | # 63 | # *This function should return a single number.* 64 | 65 | # In[1]: 66 | 67 | get_ipython().run_cell_magic('HTML', '', '\n \n \n \n \n Everything but this!\n') 68 | 69 | 70 | # In[ ]: 71 | 72 | def answer_two(): 73 | return "ANSWER" 74 | 75 | 76 | # ### Question 3 (6.6%) 77 | # What are the top 15 countries for average GDP over the last 10 years? 78 | # 79 | # *This function should return a Series named `avgGDP` with 15 countries and their average GDP sorted in descending order.* 80 | 81 | # In[ ]: 82 | 83 | def answer_three(): 84 | Top15 = answer_one() 85 | return "ANSWER" 86 | 87 | 88 | # ### Question 4 (6.6%) 89 | # By how much had the GDP changed over the 10 year span for the country with the 6th largest average GDP? 90 | # 91 | # *This function should return a single number.* 92 | 93 | # In[ ]: 94 | 95 | def answer_four(): 96 | Top15 = answer_one() 97 | return "ANSWER" 98 | 99 | 100 | # ### Question 5 (6.6%) 101 | # What is the mean energy supply per capita? 102 | # 103 | # *This function should return a single number.* 104 | 105 | # In[ ]: 106 | 107 | def answer_five(): 108 | Top15 = answer_one() 109 | return "ANSWER" 110 | 111 | 112 | # ### Question 6 (6.6%) 113 | # What country has the maximum % Renewable and what is the percentage? 114 | # 115 | # *This function should return a tuple with the name of the country and the percentage.* 116 | 117 | # In[ ]: 118 | 119 | def answer_six(): 120 | Top15 = answer_one() 121 | return "ANSWER" 122 | 123 | 124 | # ### Question 7 (6.6%) 125 | # Create a new column that is the ratio of Self-Citations to Total Citations. 126 | # What is the maximum value for this new column, and what country has the highest ratio? 127 | # 128 | # *This function should return a tuple with the name of the country and the ratio.* 129 | 130 | # In[ ]: 131 | 132 | def answer_seven(): 133 | Top15 = answer_one() 134 | return "ANSWER" 135 | 136 | 137 | # ### Question 8 (6.6%) 138 | # 139 | # Create a column that estimates the population using Energy Supply and Energy Supply per capita. 140 | # What is the third most populous country according to this estimate? 141 | # 142 | # *This function should return a single string value.* 143 | 144 | # In[ ]: 145 | 146 | def answer_eight(): 147 | Top15 = answer_one() 148 | return "ANSWER" 149 | 150 | 151 | # ### Question 9 152 | # Create a column that estimates the number of citable documents per person. 153 | # What is the correlation between the number of citable documents per capita and the energy supply per capita? Use the `.corr()` method, (Pearson's correlation). 154 | # 155 | # *This function should return a single number.* 156 | # 157 | # *(Optional: Use the built-in function `plot9()` to visualize the relationship between Energy Supply per Capita vs. Citable docs per Capita)* 158 | 159 | # In[ ]: 160 | 161 | def answer_nine(): 162 | Top15 = answer_one() 163 | return "ANSWER" 164 | 165 | 166 | # In[ ]: 167 | 168 | def plot9(): 169 | import matplotlib as plt 170 | get_ipython().magic('matplotlib inline') 171 | 172 | Top15 = answer_one() 173 | Top15['PopEst'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita'] 174 | Top15['Citable docs per Capita'] = Top15['Citable documents'] / Top15['PopEst'] 175 | Top15.plot(x='Citable docs per Capita', y='Energy Supply per Capita', kind='scatter', xlim=[0, 0.0006]) 176 | 177 | 178 | # In[ ]: 179 | 180 | #plot9() # Be sure to comment out plot9() before submitting the assignment! 181 | 182 | 183 | # ### Question 10 (6.6%) 184 | # Create a new column with a 1 if the country's % Renewable value is at or above the median for all countries in the top 15, and a 0 if the country's % Renewable value is below the median. 185 | # 186 | # *This function should return a series named `HighRenew` whose index is the country name sorted in ascending order of rank.* 187 | 188 | # In[ ]: 189 | 190 | def answer_ten(): 191 | Top15 = answer_one() 192 | return "ANSWER" 193 | 194 | 195 | # ### Question 11 (6.6%) 196 | # Use the following dictionary to group the Countries by Continent, then create a dateframe that displays the sample size (the number of countries in each continent bin), and the sum, mean, and std deviation for the estimated population of each country. 197 | # 198 | # ```python 199 | # ContinentDict = {'China':'Asia', 200 | # 'United States':'North America', 201 | # 'Japan':'Asia', 202 | # 'United Kingdom':'Europe', 203 | # 'Russian Federation':'Europe', 204 | # 'Canada':'North America', 205 | # 'Germany':'Europe', 206 | # 'India':'Asia', 207 | # 'France':'Europe', 208 | # 'South Korea':'Asia', 209 | # 'Italy':'Europe', 210 | # 'Spain':'Europe', 211 | # 'Iran':'Asia', 212 | # 'Australia':'Australia', 213 | # 'Brazil':'South America'} 214 | # ``` 215 | # 216 | # *This function should return a DataFrame with index named Continent `['Asia', 'Australia', 'Europe', 'North America', 'South America']` and columns `['size', 'sum', 'mean', 'std']`* 217 | 218 | # In[ ]: 219 | 220 | def answer_eleven(): 221 | Top15 = answer_one() 222 | return "ANSWER" 223 | 224 | 225 | # ### Question 12 (6.6%) 226 | # Cut % Renewable into 5 bins. Group Top15 by the Continent, as well as these new % Renewable bins. How many countries are in each of these groups? 227 | # 228 | # *This function should return a Series with a MultiIndex of `Continent`, then the bins for `% Renewable`. Do not include groups with no countries.* 229 | 230 | # In[ ]: 231 | 232 | def answer_twelve(): 233 | Top15 = answer_one() 234 | return "ANSWER" 235 | 236 | 237 | # ### Question 13 (6.6%) 238 | # Convert the Population Estimate series to a string with thousands separator (using commas). Use all significant digits (do not round the results). 239 | # 240 | # e.g. 12345678.90 -> 12,345,678.90 241 | # 242 | # *This function should return a Series `PopEst` whose index is the country name and whose values are the population estimate string.* 243 | 244 | # In[ ]: 245 | 246 | def answer_thirteen(): 247 | Top15 = answer_one() 248 | return "ANSWER" 249 | 250 | 251 | # ### Optional 252 | # 253 | # Use the built in function `plot_optional()` to see an example visualization. 254 | 255 | # In[ ]: 256 | 257 | def plot_optional(): 258 | import matplotlib as plt 259 | get_ipython().magic('matplotlib inline') 260 | Top15 = answer_one() 261 | ax = Top15.plot(x='Rank', y='% Renewable', kind='scatter', 262 | c=['#e41a1c','#377eb8','#e41a1c','#4daf4a','#4daf4a','#377eb8','#4daf4a','#e41a1c', 263 | '#4daf4a','#e41a1c','#4daf4a','#4daf4a','#e41a1c','#dede00','#ff7f00'], 264 | xticks=range(1,16), s=6*Top15['2014']/10**10, alpha=.75, figsize=[16,6]); 265 | 266 | for i, txt in enumerate(Top15.index): 267 | ax.annotate(txt, [Top15['Rank'][i], Top15['% Renewable'][i]], ha='center') 268 | 269 | print("This is an example of a visualization that can be created to help understand the data. This is a bubble chart showing % Renewable vs. Rank. The size of the bubble corresponds to the countries' 2014 GDP, and the color corresponds to the continent.") 270 | 271 | 272 | # In[ ]: 273 | 274 | #plot_optional() # Be sure to comment out plot_optional() before submitting the assignment! 275 | 276 | -------------------------------------------------------------------------------- /course1_downloads/Assignment+4.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # --- 5 | # 6 | # _You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._ 7 | # 8 | # --- 9 | 10 | # In[ ]: 11 | 12 | import pandas as pd 13 | import numpy as np 14 | from scipy.stats import ttest_ind 15 | 16 | 17 | # # Assignment 4 - Hypothesis Testing 18 | # This assignment requires more individual learning than previous assignments - you are encouraged to check out the [pandas documentation](http://pandas.pydata.org/pandas-docs/stable/) to find functions or methods you might not have used yet, or ask questions on [Stack Overflow](http://stackoverflow.com/) and tag them as pandas and python related. And of course, the discussion forums are open for interaction with your peers and the course staff. 19 | # 20 | # Definitions: 21 | # * A _quarter_ is a specific three month period, Q1 is January through March, Q2 is April through June, Q3 is July through September, Q4 is October through December. 22 | # * A _recession_ is defined as starting with two consecutive quarters of GDP decline, and ending with two consecutive quarters of GDP growth. 23 | # * A _recession bottom_ is the quarter within a recession which had the lowest GDP. 24 | # * A _university town_ is a city which has a high percentage of university students compared to the total population of the city. 25 | # 26 | # **Hypothesis**: University towns have their mean housing prices less effected by recessions. Run a t-test to compare the ratio of the mean price of houses in university towns the quarter before the recession starts compared to the recession bottom. (`price_ratio=quarter_before_recession/recession_bottom`) 27 | # 28 | # The following data files are available for this assignment: 29 | # * From the [Zillow research data site](http://www.zillow.com/research/data/) there is housing data for the United States. In particular the datafile for [all homes at a city level](http://files.zillowstatic.com/research/public/City/City_Zhvi_AllHomes.csv), ```City_Zhvi_AllHomes.csv```, has median home sale prices at a fine grained level. 30 | # * From the Wikipedia page on college towns is a list of [university towns in the United States](https://en.wikipedia.org/wiki/List_of_college_towns#College_towns_in_the_United_States) which has been copy and pasted into the file ```university_towns.txt```. 31 | # * From Bureau of Economic Analysis, US Department of Commerce, the [GDP over time](http://www.bea.gov/national/index.htm#gdp) of the United States in current dollars (use the chained value in 2009 dollars), in quarterly intervals, in the file ```gdplev.xls```. For this assignment, only look at GDP data from the first quarter of 2000 onward. 32 | # 33 | # Each function in this assignment below is worth 10%, with the exception of ```run_ttest()```, which is worth 50%. 34 | 35 | # In[ ]: 36 | 37 | # Use this dictionary to map state names to two letter acronyms 38 | states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 'ND': 'North Dakota', 'VA': 'Virginia'} 39 | 40 | 41 | # In[ ]: 42 | 43 | def get_list_of_university_towns(): 44 | '''Returns a DataFrame of towns and the states they are in from the 45 | university_towns.txt list. The format of the DataFrame should be: 46 | DataFrame( [ ["Michigan","Ann Arbor"], ["Michigan", "Yipsilanti"] ], 47 | columns=["State","RegionName"] )''' 48 | 49 | return "ANSWER" 50 | 51 | 52 | # In[ ]: 53 | 54 | def get_recession_start(): 55 | '''Returns the year and quarter of the recession start time as a 56 | string value in a format such as 2005q3''' 57 | 58 | return "ANSWER" 59 | 60 | 61 | # In[ ]: 62 | 63 | def get_recession_end(): 64 | '''Returns the year and quarter of the recession end time as a 65 | string value in a format such as 2005q3''' 66 | 67 | return "ANSWER" 68 | 69 | 70 | # In[ ]: 71 | 72 | def get_recession_bottom(): 73 | '''Returns the year and quarter of the recession bottom time as a 74 | string value in a format such as 2005q3''' 75 | 76 | return "ANSWER" 77 | 78 | 79 | # In[ ]: 80 | 81 | def convert_housing_data_to_quarters(): 82 | '''Converts the housing data to quarters and returns it as mean 83 | values in a dataframe. This dataframe should be a dataframe with 84 | columns for 2000q1 through 2016q3, and should have a multi-index 85 | in the shape of ["State","RegionName"]. 86 | 87 | Note: Quarters are defined in the assignment description, they are 88 | not arbitrary three month periods. 89 | 90 | The resulting dataframe should have 67 columns, and 10,730 rows. 91 | ''' 92 | 93 | return "ANSWER" 94 | 95 | 96 | # In[ ]: 97 | 98 | def run_ttest(): 99 | '''First creates new data showing the decline or growth of housing prices 100 | between the recession start and the recession bottom. Then runs a ttest 101 | comparing the university town values to the non-university towns values, 102 | return whether the alternative hypothesis (that the two groups are the same) 103 | is true or not as well as the p-value of the confidence. 104 | 105 | Return the tuple (different, p, better) where different=True if the t-test is 106 | True at a p<0.01 (we reject the null hypothesis), or different=False if 107 | otherwise (we cannot reject the null hypothesis). The variable p should 108 | be equal to the exact p value returned from scipy.stats.ttest_ind(). The 109 | value for better should be either "university town" or "non-university town" 110 | depending on which has a lower mean price ratio (which is equivilent to a 111 | reduced market loss).''' 112 | 113 | return "ANSWER" 114 | 115 | -------------------------------------------------------------------------------- /course1_downloads/Energy Indicators.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/course1_downloads/Energy Indicators.xls -------------------------------------------------------------------------------- /course1_downloads/Week 4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "---\n", 8 | "\n", 9 | "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n", 10 | "\n", 11 | "---" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Distributions in Pandas" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "import pandas as pd\n", 30 | "import numpy as np" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "np.random.binomial(1, 0.5)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "np.random.binomial(1000, 0.5)/1000" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "chance_of_tornado = 0.01/100\n", 64 | "np.random.binomial(100000, chance_of_tornado)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "chance_of_tornado = 0.01\n", 76 | "\n", 77 | "tornado_events = np.random.binomial(1, chance_of_tornado, 1000000)\n", 78 | " \n", 79 | "two_days_in_a_row = 0\n", 80 | "for j in range(1,len(tornado_events)-1):\n", 81 | " if tornado_events[j]==1 and tornado_events[j-1]==1:\n", 82 | " two_days_in_a_row+=1\n", 83 | "\n", 84 | "print('{} tornadoes back to back in {} years'.format(two_days_in_a_row, 1000000/365))" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "np.random.uniform(0, 1)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "np.random.normal(0.75)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "Formula for standard deviation\n", 114 | "$$\\sqrt{\\frac{1}{N} \\sum_{i=1}^N (x_i - \\overline{x})^2}$$" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "collapsed": false 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "distribution = np.random.normal(0.75,size=1000)\n", 126 | "\n", 127 | "np.sqrt(np.sum((np.mean(distribution)-distribution)**2)/len(distribution))" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "collapsed": false, 135 | "scrolled": true 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "np.std(distribution)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "collapsed": false 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "import scipy.stats as stats\n", 151 | "stats.kurtosis(distribution)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "collapsed": false 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "stats.skew(distribution)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": { 169 | "collapsed": false 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "chi_squared_df2 = np.random.chisquare(2, size=10000)\n", 174 | "stats.skew(chi_squared_df2)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "chi_squared_df5 = np.random.chisquare(5, size=10000)\n", 186 | "stats.skew(chi_squared_df5)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": { 193 | "collapsed": false 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "%matplotlib inline\n", 198 | "import matplotlib\n", 199 | "import matplotlib.pyplot as plt\n", 200 | "\n", 201 | "output = plt.hist([chi_squared_df2,chi_squared_df5], bins=50, histtype='step', \n", 202 | " label=['2 degrees of freedom','5 degrees of freedom'])\n", 203 | "plt.legend(loc='upper right')\n" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "# Hypothesis Testing" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": { 217 | "collapsed": false 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "df = pd.read_csv('grades.csv')" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": { 228 | "collapsed": false 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "df.head()" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": { 239 | "collapsed": false 240 | }, 241 | "outputs": [], 242 | "source": [ 243 | "len(df)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": { 250 | "collapsed": false 251 | }, 252 | "outputs": [], 253 | "source": [ 254 | "early = df[df['assignment1_submission'] <= '2015-12-31']\n", 255 | "late = df[df['assignment1_submission'] > '2015-12-31']" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "collapsed": false 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "early.mean()" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": { 273 | "collapsed": false 274 | }, 275 | "outputs": [], 276 | "source": [ 277 | "late.mean()" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": { 284 | "collapsed": false 285 | }, 286 | "outputs": [], 287 | "source": [ 288 | "from scipy import stats\n", 289 | "stats.ttest_ind?" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": { 296 | "collapsed": false 297 | }, 298 | "outputs": [], 299 | "source": [ 300 | "stats.ttest_ind(early['assignment1_grade'], late['assignment1_grade'])" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": { 307 | "collapsed": false 308 | }, 309 | "outputs": [], 310 | "source": [ 311 | "stats.ttest_ind(early['assignment2_grade'], late['assignment2_grade'])" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": { 318 | "collapsed": false 319 | }, 320 | "outputs": [], 321 | "source": [ 322 | "stats.ttest_ind(early['assignment3_grade'], late['assignment3_grade'])" 323 | ] 324 | } 325 | ], 326 | "metadata": { 327 | "kernelspec": { 328 | "display_name": "Python 3", 329 | "language": "python", 330 | "name": "python3" 331 | }, 332 | "language_info": { 333 | "codemirror_mode": { 334 | "name": "ipython", 335 | "version": 3 336 | }, 337 | "file_extension": ".py", 338 | "mimetype": "text/x-python", 339 | "name": "python", 340 | "nbconvert_exporter": "python", 341 | "pygments_lexer": "ipython3", 342 | "version": "3.5.2" 343 | } 344 | }, 345 | "nbformat": 4, 346 | "nbformat_minor": 0 347 | } 348 | -------------------------------------------------------------------------------- /course1_downloads/cars.csv: -------------------------------------------------------------------------------- 1 | YEAR,Make,Model,Size,(kW),Unnamed: 5,TYPE,CITY (kWh/100 km),HWY (kWh/100 km),COMB (kWh/100 km),CITY (Le/100 km),HWY (Le/100 km),COMB (Le/100 km),(g/km),RATING,(km),TIME (h) 2 | 2012,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7 3 | 2012,NISSAN,LEAF,MID-SIZE,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,0,n/a,117,7 4 | 2013,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4 5 | 2013,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7 6 | 2013,NISSAN,LEAF,MID-SIZE,80,A1,B,19.3,23.0,21.1,2.2,2.6,2.4,0,n/a,117,7 7 | 2013,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 8 | 2013,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 9 | 2013,TESLA,MODEL S (40 kWh battery),FULL-SIZE,270,A1,B,22.4,21.9,22.2,2.5,2.5,2.5,0,n/a,224,6 10 | 2013,TESLA,MODEL S (60 kWh battery),FULL-SIZE,270,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10 11 | 2013,TESLA,MODEL S (85 kWh battery),FULL-SIZE,270,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 12 | 2013,TESLA,MODEL S PERFORMANCE,FULL-SIZE,310,A1,B,23.9,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 13 | 2014,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,n/a,131,7 14 | 2014,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4 15 | 2014,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7 16 | 2014,NISSAN,LEAF,MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,n/a,135,5 17 | 2014,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 18 | 2014,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 19 | 2014,TESLA,MODEL S (60 kWh battery),FULL-SIZE,225,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10 20 | 2014,TESLA,MODEL S (85 kWh battery),FULL-SIZE,270,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 21 | 2014,TESLA,MODEL S PERFORMANCE,FULL-SIZE,310,A1,B,23.9,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 22 | 2015,BMW,i3,SUBCOMPACT,125,A1,B,15.2,18.8,16.8,1.7,2.1,1.9,0,n/a,130,4 23 | 2015,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,n/a,131,7 24 | 2015,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,n/a,122,4 25 | 2015,KIA,SOUL EV,STATION WAGON - SMALL,81,A1,B,17.5,22.7,19.9,2.0,2.6,2.2,0,n/a,149,4 26 | 2015,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,n/a,100,7 27 | 2015,NISSAN,LEAF,MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,n/a,135,5 28 | 2015,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 29 | 2015,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,n/a,109,8 30 | 2015,TESLA,MODEL S (60 kWh battery),FULL-SIZE,283,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,n/a,335,10 31 | 2015,TESLA,MODEL S (70 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,377,12 32 | 2015,TESLA,MODEL S (85/90 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,n/a,426,12 33 | 2015,TESLA,MODEL S 70D,FULL-SIZE,280,A1,B,20.8,20.6,20.7,2.3,2.3,2.3,0,n/a,386,12 34 | 2015,TESLA,MODEL S 85D/90D,FULL-SIZE,280,A1,B,22.0,19.8,21.0,2.5,2.2,2.4,0,n/a,435,12 35 | 2015,TESLA,MODEL S P85D/P90D,FULL-SIZE,515,A1,B,23.4,21.5,22.5,2.6,2.4,2.5,0,n/a,407,12 36 | 2016,BMW,i3,SUBCOMPACT,125,A1,B,15.2,18.8,16.8,1.7,2.1,1.9,0,10,130,4 37 | 2016,CHEVROLET,SPARK EV,SUBCOMPACT,104,A1,B,16.0,19.6,17.8,1.8,2.2,2.0,0,10,131,7 38 | 2016,FORD,FOCUS ELECTRIC,COMPACT,107,A1,B,19.0,21.1,20.0,2.1,2.4,2.2,0,10,122,4 39 | 2016,KIA,SOUL EV,STATION WAGON - SMALL,81,A1,B,17.5,22.7,19.9,2.0,2.6,2.2,0,10,149,4 40 | 2016,MITSUBISHI,i-MiEV,SUBCOMPACT,49,A1,B,16.9,21.4,18.7,1.9,2.4,2.1,0,10,100,7 41 | 2016,NISSAN,LEAF (24 kWh battery),MID-SIZE,80,A1,B,16.5,20.8,18.4,1.9,2.3,2.1,0,10,135,5 42 | 2016,NISSAN,LEAF (30 kWh battery),MID-SIZE,80,A1,B,17.0,20.7,18.6,1.9,2.3,2.1,0,10,172,6 43 | 2016,SMART,FORTWO ELECTRIC DRIVE CABRIOLET,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,10,109,8 44 | 2016,SMART,FORTWO ELECTRIC DRIVE COUPE,TWO-SEATER,35,A1,B,17.2,22.5,19.6,1.9,2.5,2.2,0,10,109,8 45 | 2016,TESLA,MODEL S (60 kWh battery),FULL-SIZE,283,A1,B,22.2,21.7,21.9,2.5,2.4,2.5,0,10,335,10 46 | 2016,TESLA,MODEL S (70 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,10,377,12 47 | 2016,TESLA,MODEL S (85/90 kWh battery),FULL-SIZE,283,A1,B,23.8,23.2,23.6,2.7,2.6,2.6,0,10,426,12 48 | 2016,TESLA,MODEL S 70D,FULL-SIZE,386,A1,B,20.8,20.6,20.7,2.3,2.3,2.3,0,10,386,12 49 | 2016,TESLA,MODEL S 85D/90D,FULL-SIZE,386,A1,B,22.0,19.8,21.0,2.5,2.2,2.4,0,10,435,12 50 | 2016,TESLA,MODEL S 90D (Refresh),FULL-SIZE,386,A1,B,20.8,19.7,20.3,2.3,2.2,2.3,0,10,473,12 51 | 2016,TESLA,MODEL S P85D/P90D,FULL-SIZE,568,A1,B,23.4,21.5,22.5,2.6,2.4,2.5,0,10,407,12 52 | 2016,TESLA,MODEL S P90D (Refresh),FULL-SIZE,568,A1,B,22.9,21.0,22.1,2.6,2.4,2.5,0,10,435,12 53 | 2016,TESLA,MODEL X 90D,SUV - STANDARD,386,A1,B,23.2,22.2,22.7,2.6,2.5,2.6,0,10,414,12 54 | 2016,TESLA,MODEL X P90D,SUV - STANDARD,568,A1,B,23.6,23.3,23.5,2.7,2.6,2.6,0,10,402,12 55 | -------------------------------------------------------------------------------- /course1_downloads/gdplev.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/course1_downloads/gdplev.xls -------------------------------------------------------------------------------- /course1_downloads/log.csv: -------------------------------------------------------------------------------- 1 | time,user,video,playback position,paused,volume 2 | 1469974424,cheryl,intro.html,5,FALSE,10 3 | 1469974454,cheryl,intro.html,6,, 4 | 1469974544,cheryl,intro.html,9,, 5 | 1469974574,cheryl,intro.html,10,, 6 | 1469977514,bob,intro.html,1,, 7 | 1469977544,bob,intro.html,1,, 8 | 1469977574,bob,intro.html,1,, 9 | 1469977604,bob,intro.html,1,, 10 | 1469974604,cheryl,intro.html,11,, 11 | 1469974694,cheryl,intro.html,14,, 12 | 1469974724,cheryl,intro.html,15,, 13 | 1469974454,sue,advanced.html,24,, 14 | 1469974524,sue,advanced.html,25,, 15 | 1469974424,sue,advanced.html,23,FALSE,10 16 | 1469974554,sue,advanced.html,26,, 17 | 1469974624,sue,advanced.html,27,, 18 | 1469974654,sue,advanced.html,28,,5 19 | 1469974724,sue,advanced.html,29,, 20 | 1469974484,cheryl,intro.html,7,, 21 | 1469974514,cheryl,intro.html,8,, 22 | 1469974754,sue,advanced.html,30,, 23 | 1469974824,sue,advanced.html,31,, 24 | 1469974854,sue,advanced.html,32,, 25 | 1469974924,sue,advanced.html,33,, 26 | 1469977424,bob,intro.html,1,TRUE,10 27 | 1469977454,bob,intro.html,1,, 28 | 1469977484,bob,intro.html,1,, 29 | 1469977634,bob,intro.html,1,, 30 | 1469977664,bob,intro.html,1,, 31 | 1469974634,cheryl,intro.html,12,, 32 | 1469974664,cheryl,intro.html,13,, 33 | 1469977694,bob,intro.html,1,, 34 | 1469977724,bob,intro.html,1,, 35 | -------------------------------------------------------------------------------- /course1_downloads/log.txt: -------------------------------------------------------------------------------- 1 | time,user,video,playback position,paused,volume 2 | 1469974424,cheryl,intro.html,5,FALSE,10 3 | 1469974454,cheryl,intro.html,6,, 4 | 1469974544,cheryl,intro.html,9,, 5 | 1469974574,cheryl,intro.html,10,, 6 | 1469977514,bob,intro.html,1,, 7 | 1469977544,bob,intro.html,1,, 8 | 1469977574,bob,intro.html,1,, 9 | 1469977604,bob,intro.html,1,, 10 | 1469974604,cheryl,intro.html,11,, 11 | 1469974694,cheryl,intro.html,14,, 12 | 1469974724,cheryl,intro.html,15,, 13 | 1469974454,sue,advanced.html,24,, 14 | 1469974524,sue,advanced.html,25,, 15 | 1469974424,sue,advanced.html,23,FALSE,10 16 | 1469974554,sue,advanced.html,26,, 17 | 1469974624,sue,advanced.html,27,, 18 | 1469974654,sue,advanced.html,28,,5 19 | 1469974724,sue,advanced.html,29,, 20 | 1469974484,cheryl,intro.html,7,, 21 | 1469974514,cheryl,intro.html,8,, 22 | 1469974754,sue,advanced.html,30,, 23 | 1469974824,sue,advanced.html,31,, 24 | 1469974854,sue,advanced.html,32,, 25 | 1469974924,sue,advanced.html,33,, 26 | 1469977424,bob,intro.html,1,TRUE,10 27 | 1469977454,bob,intro.html,1,, 28 | 1469977484,bob,intro.html,1,, 29 | 1469977634,bob,intro.html,1,, 30 | 1469977664,bob,intro.html,1,, 31 | 1469974634,cheryl,intro.html,12,, 32 | 1469974664,cheryl,intro.html,13,, 33 | 1469977694,bob,intro.html,1,, 34 | 1469977724,bob,intro.html,1,, 35 | -------------------------------------------------------------------------------- /course1_downloads/olympics.csv: -------------------------------------------------------------------------------- 1 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 2 | ,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total 3 | Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2 4 | Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15 5 | Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70 6 | Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12 7 | Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12 8 | Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480 9 | Austria (AUT),26,18,33,35,86,22,59,78,81,218,48,77,111,116,304 10 | Azerbaijan (AZE),5,6,5,15,26,5,0,0,0,0,10,6,5,15,26 11 | Bahamas (BAH),15,5,2,5,12,0,0,0,0,0,15,5,2,5,12 12 | Bahrain (BRN),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1 13 | Barbados (BAR) [BAR],11,0,0,1,1,0,0,0,0,0,11,0,0,1,1 14 | Belarus (BLR),5,12,24,39,75,6,6,4,5,15,11,18,28,44,90 15 | Belgium (BEL),25,37,52,53,142,20,1,1,3,5,45,38,53,56,147 16 | Bermuda (BER),17,0,0,1,1,7,0,0,0,0,24,0,0,1,1 17 | Bohemia (BOH) [BOH] [Z],3,0,1,3,4,0,0,0,0,0,3,0,1,3,4 18 | Botswana (BOT),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1 19 | Brazil (BRA),21,23,30,55,108,7,0,0,0,0,28,23,30,55,108 20 | British West Indies (BWI) [BWI],1,0,0,2,2,0,0,0,0,0,1,0,0,2,2 21 | Bulgaria (BUL) [H],19,51,85,78,214,19,1,2,3,6,38,52,87,81,220 22 | Burundi (BDI),5,1,0,0,1,0,0,0,0,0,5,1,0,0,1 23 | Cameroon (CMR),13,3,1,1,5,1,0,0,0,0,14,3,1,1,5 24 | Canada (CAN),25,59,99,121,279,22,62,56,52,170,47,121,155,173,449 25 | Chile (CHI) [I],22,2,7,4,13,16,0,0,0,0,38,2,7,4,13 26 | China (CHN) [CHN],9,201,146,126,473,10,12,22,19,53,19,213,168,145,526 27 | Colombia (COL),18,2,6,11,19,1,0,0,0,0,19,2,6,11,19 28 | Costa Rica (CRC),14,1,1,2,4,6,0,0,0,0,20,1,1,2,4 29 | Ivory Coast (CIV) [CIV],12,0,1,0,1,0,0,0,0,0,12,0,1,0,1 30 | Croatia (CRO),6,6,7,10,23,7,4,6,1,11,13,10,13,11,34 31 | Cuba (CUB) [Z],19,72,67,70,209,0,0,0,0,0,19,72,67,70,209 32 | Cyprus (CYP),9,0,1,0,1,10,0,0,0,0,19,0,1,0,1 33 | Czech Republic (CZE) [CZE],5,14,15,15,44,6,7,9,8,24,11,21,24,23,68 34 | Czechoslovakia (TCH) [TCH],16,49,49,45,143,16,2,8,15,25,32,51,57,60,168 35 | Denmark (DEN) [Z],26,43,68,68,179,13,0,1,0,1,39,43,69,68,180 36 | Djibouti (DJI) [B],7,0,0,1,1,0,0,0,0,0,7,0,0,1,1 37 | Dominican Republic (DOM),13,3,2,1,6,0,0,0,0,0,13,3,2,1,6 38 | Ecuador (ECU),13,1,1,0,2,0,0,0,0,0,13,1,1,0,2 39 | Egypt (EGY) [EGY] [Z],21,7,9,10,26,1,0,0,0,0,22,7,9,10,26 40 | Eritrea (ERI),4,0,0,1,1,0,0,0,0,0,4,0,0,1,1 41 | Estonia (EST),11,9,9,15,33,9,4,2,1,7,20,13,11,16,40 42 | Ethiopia (ETH),12,21,7,17,45,2,0,0,0,0,14,21,7,17,45 43 | Finland (FIN),24,101,84,117,302,22,42,62,57,161,46,143,146,174,463 44 | France (FRA) [O] [P] [Z],27,202,223,246,671,22,31,31,47,109,49,233,254,293,780 45 | Gabon (GAB),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1 46 | Georgia (GEO),5,6,5,14,25,6,0,0,0,0,11,6,5,14,25 47 | Germany (GER) [GER] [Z],15,174,182,217,573,11,78,78,53,209,26,252,260,270,782 48 | United Team of Germany (EUA) [EUA],3,28,54,36,118,3,8,6,5,19,6,36,60,41,137 49 | East Germany (GDR) [GDR],5,153,129,127,409,6,39,36,35,110,11,192,165,162,519 50 | West Germany (FRG) [FRG],5,56,67,81,204,6,11,15,13,39,11,67,82,94,243 51 | Ghana (GHA) [GHA],13,0,1,3,4,1,0,0,0,0,14,0,1,3,4 52 | Great Britain (GBR) [GBR] [Z],27,236,272,272,780,22,10,4,12,26,49,246,276,284,806 53 | Greece (GRE) [Z],27,30,42,39,111,18,0,0,0,0,45,30,42,39,111 54 | Grenada (GRN),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1 55 | Guatemala (GUA),13,0,1,0,1,1,0,0,0,0,14,0,1,0,1 56 | Guyana (GUY) [GUY],16,0,0,1,1,0,0,0,0,0,16,0,0,1,1 57 | Haiti (HAI) [J],14,0,1,1,2,0,0,0,0,0,14,0,1,1,2 58 | Hong Kong (HKG) [HKG],15,1,1,1,3,4,0,0,0,0,19,1,1,1,3 59 | Hungary (HUN),25,167,144,165,476,22,0,2,4,6,47,167,146,169,482 60 | Iceland (ISL),19,0,2,2,4,17,0,0,0,0,36,0,2,2,4 61 | India (IND) [F],23,9,6,11,26,9,0,0,0,0,32,9,6,11,26 62 | Indonesia (INA),14,6,10,11,27,0,0,0,0,0,14,6,10,11,27 63 | Iran (IRI) [K],15,15,20,25,60,10,0,0,0,0,25,15,20,25,60 64 | Iraq (IRQ),13,0,0,1,1,0,0,0,0,0,13,0,0,1,1 65 | Ireland (IRL),20,9,8,12,29,6,0,0,0,0,26,9,8,12,29 66 | Israel (ISR),15,1,1,5,7,6,0,0,0,0,21,1,1,5,7 67 | Italy (ITA) [M] [S],26,198,166,185,549,22,37,34,43,114,48,235,200,228,663 68 | Jamaica (JAM) [JAM],16,17,30,20,67,7,0,0,0,0,23,17,30,20,67 69 | Japan (JPN),21,130,126,142,398,20,10,17,18,45,41,140,143,160,443 70 | Kazakhstan (KAZ),5,16,17,19,52,6,1,3,3,7,11,17,20,22,59 71 | Kenya (KEN),13,25,32,29,86,3,0,0,0,0,16,25,32,29,86 72 | North Korea (PRK),9,14,12,21,47,8,0,1,1,2,17,14,13,22,49 73 | South Korea (KOR),16,81,82,80,243,17,26,17,10,53,33,107,99,90,296 74 | Kuwait (KUW),12,0,0,2,2,0,0,0,0,0,12,0,0,2,2 75 | Kyrgyzstan (KGZ),5,0,1,2,3,6,0,0,0,0,11,0,1,2,3 76 | Latvia (LAT),10,3,11,5,19,10,0,4,3,7,20,3,15,8,26 77 | Lebanon (LIB),16,0,2,2,4,16,0,0,0,0,32,0,2,2,4 78 | Liechtenstein (LIE),16,0,0,0,0,18,2,2,5,9,34,2,2,5,9 79 | Lithuania (LTU),8,6,5,10,21,8,0,0,0,0,16,6,5,10,21 80 | Luxembourg (LUX) [O],22,1,1,0,2,8,0,2,0,2,30,1,3,0,4 81 | Macedonia (MKD),5,0,0,1,1,5,0,0,0,0,10,0,0,1,1 82 | Malaysia (MAS) [MAS],12,0,3,3,6,0,0,0,0,0,12,0,3,3,6 83 | Mauritius (MRI),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1 84 | Mexico (MEX),22,13,21,28,62,8,0,0,0,0,30,13,21,28,62 85 | Moldova (MDA),5,0,2,5,7,6,0,0,0,0,11,0,2,5,7 86 | Mongolia (MGL),12,2,9,13,24,13,0,0,0,0,25,2,9,13,24 87 | Montenegro (MNE),2,0,1,0,1,2,0,0,0,0,4,0,1,0,1 88 | Morocco (MAR),13,6,5,11,22,6,0,0,0,0,19,6,5,11,22 89 | Mozambique (MOZ),9,1,0,1,2,0,0,0,0,0,9,1,0,1,2 90 | Namibia (NAM),6,0,4,0,4,0,0,0,0,0,6,0,4,0,4 91 | Netherlands (NED) [Z],25,77,85,104,266,20,37,38,35,110,45,114,123,139,376 92 | Netherlands Antilles (AHO) [AHO] [I],13,0,1,0,1,2,0,0,0,0,15,0,1,0,1 93 | New Zealand (NZL) [NZL],22,42,18,39,99,15,0,1,0,1,37,42,19,39,100 94 | Niger (NIG),11,0,0,1,1,0,0,0,0,0,11,0,0,1,1 95 | Nigeria (NGR),15,3,8,12,23,0,0,0,0,0,15,3,8,12,23 96 | Norway (NOR) [Q],24,56,49,43,148,22,118,111,100,329,46,174,160,143,477 97 | Pakistan (PAK),16,3,3,4,10,2,0,0,0,0,18,3,3,4,10 98 | Panama (PAN),16,1,0,2,3,0,0,0,0,0,16,1,0,2,3 99 | Paraguay (PAR),11,0,1,0,1,1,0,0,0,0,12,0,1,0,1 100 | Peru (PER) [L],17,1,3,0,4,2,0,0,0,0,19,1,3,0,4 101 | Philippines (PHI),20,0,2,7,9,4,0,0,0,0,24,0,2,7,9 102 | Poland (POL),20,64,82,125,271,22,6,7,7,20,42,70,89,132,291 103 | Portugal (POR),23,4,8,11,23,7,0,0,0,0,30,4,8,11,23 104 | Puerto Rico (PUR),17,0,2,6,8,6,0,0,0,0,23,0,2,6,8 105 | Qatar (QAT),8,0,0,4,4,0,0,0,0,0,8,0,0,4,4 106 | Romania (ROU),20,88,94,119,301,20,0,0,1,1,40,88,94,120,302 107 | Russia (RUS) [RUS],5,132,121,142,395,6,49,40,35,124,11,181,161,177,519 108 | Russian Empire (RU1) [RU1],3,1,4,3,8,0,0,0,0,0,3,1,4,3,8 109 | Soviet Union (URS) [URS],9,395,319,296,1010,9,78,57,59,194,18,473,376,355,1204 110 | Unified Team (EUN) [EUN],1,45,38,29,112,1,9,6,8,23,2,54,44,37,135 111 | Saudi Arabia (KSA),10,0,1,2,3,0,0,0,0,0,10,0,1,2,3 112 | Senegal (SEN),13,0,1,0,1,5,0,0,0,0,18,0,1,0,1 113 | Serbia (SRB) [SRB],3,1,2,4,7,2,0,0,0,0,5,1,2,4,7 114 | Serbia and Montenegro (SCG) [SCG],3,2,4,3,9,3,0,0,0,0,6,2,4,3,9 115 | Singapore (SIN),15,0,2,2,4,0,0,0,0,0,15,0,2,2,4 116 | Slovakia (SVK) [SVK],5,7,9,8,24,6,2,2,1,5,11,9,11,9,29 117 | Slovenia (SLO),6,4,6,9,19,7,2,4,9,15,13,6,10,18,34 118 | South Africa (RSA),18,23,26,27,76,6,0,0,0,0,24,23,26,27,76 119 | Spain (ESP) [Z],22,37,59,35,131,19,1,0,1,2,41,38,59,36,133 120 | Sri Lanka (SRI) [SRI],16,0,2,0,2,0,0,0,0,0,16,0,2,0,2 121 | Sudan (SUD),11,0,1,0,1,0,0,0,0,0,11,0,1,0,1 122 | Suriname (SUR) [E],11,1,0,1,2,0,0,0,0,0,11,1,0,1,2 123 | Sweden (SWE) [Z],26,143,164,176,483,22,50,40,54,144,48,193,204,230,627 124 | Switzerland (SUI),27,47,73,65,185,22,50,40,48,138,49,97,113,113,323 125 | Syria (SYR),12,1,1,1,3,0,0,0,0,0,12,1,1,1,3 126 | Chinese Taipei (TPE) [TPE] [TPE2],13,2,7,12,21,11,0,0,0,0,24,2,7,12,21 127 | Tajikistan (TJK),5,0,1,2,3,4,0,0,0,0,9,0,1,2,3 128 | Tanzania (TAN) [TAN],12,0,2,0,2,0,0,0,0,0,12,0,2,0,2 129 | Thailand (THA),15,7,6,11,24,3,0,0,0,0,18,7,6,11,24 130 | Togo (TOG),9,0,0,1,1,1,0,0,0,0,10,0,0,1,1 131 | Tonga (TGA),8,0,1,0,1,1,0,0,0,0,9,0,1,0,1 132 | Trinidad and Tobago (TRI) [TRI],16,2,5,11,18,3,0,0,0,0,19,2,5,11,18 133 | Tunisia (TUN),13,3,3,4,10,0,0,0,0,0,13,3,3,4,10 134 | Turkey (TUR),21,39,25,24,88,16,0,0,0,0,37,39,25,24,88 135 | Uganda (UGA),14,2,3,2,7,0,0,0,0,0,14,2,3,2,7 136 | Ukraine (UKR),5,33,27,55,115,6,2,1,4,7,11,35,28,59,122 137 | United Arab Emirates (UAE),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1 138 | United States (USA) [P] [Q] [R] [Z],26,976,757,666,2399,22,96,102,84,282,48,1072,859,750,2681 139 | Uruguay (URU),20,2,2,6,10,1,0,0,0,0,21,2,2,6,10 140 | Uzbekistan (UZB),5,5,5,10,20,6,1,0,0,1,11,6,5,10,21 141 | Venezuela (VEN),17,2,2,8,12,4,0,0,0,0,21,2,2,8,12 142 | Vietnam (VIE),14,0,2,0,2,0,0,0,0,0,14,0,2,0,2 143 | Virgin Islands (ISV),11,0,1,0,1,7,0,0,0,0,18,0,1,0,1 144 | Yugoslavia (YUG) [YUG],16,26,29,28,83,14,0,3,1,4,30,26,32,29,87 145 | Independent Olympic Participants (IOP) [IOP],1,0,1,2,3,0,0,0,0,0,1,0,1,2,3 146 | Zambia (ZAM) [ZAM],12,0,1,1,2,0,0,0,0,0,12,0,1,1,2 147 | Zimbabwe (ZIM) [ZIM],12,3,4,1,8,1,0,0,0,0,13,3,4,1,8 148 | Mixed team (ZZX) [ZZX],3,8,5,4,17,0,0,0,0,0,3,8,5,4,17 149 | Totals,27,4809,4775,5130,14714,22,959,958,948,2865,49,5768,5733,6078,17579 150 | -------------------------------------------------------------------------------- /course1_downloads/scimagojr-3.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irJERAD/Intro-to-Data-Science-in-Python/950bb9291107265bb66cbde3584ffe52b82ae254/course1_downloads/scimagojr-3.xlsx --------------------------------------------------------------------------------