├── Assignment_2
    ├── assets
    │   └── NIS-PUF17-DUG.pdf
    ├── assignment2.ipynb
    └── .ipynb_checkpoints
    │   └── assignment2-checkpoint.ipynb
├── Assignment_3
    ├── assets
    │   ├── scimagojr-3.xlsx
    │   └── Energy Indicators.xls
    └── assignment3.ipynb
├── README.md
├── Assignment_1
    ├── assets
    │   └── grades.txt
    ├── assignment1.ipynb
    └── .ipynb_checkpoints
    │   └── assignment1-checkpoint.ipynb
└── Assignment_4
    ├── assets
        ├── mlb.csv
        ├── nba.csv
        ├── nhl.csv
        └── nfl.csv
    └── assignment4.ipynb


/Assignment_2/assets/NIS-PUF17-DUG.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tchagau/Introduction-to-Data-Science-in-Python/HEAD/Assignment_2/assets/NIS-PUF17-DUG.pdf


--------------------------------------------------------------------------------
/Assignment_3/assets/scimagojr-3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tchagau/Introduction-to-Data-Science-in-Python/HEAD/Assignment_3/assets/scimagojr-3.xlsx


--------------------------------------------------------------------------------
/Assignment_3/assets/Energy Indicators.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tchagau/Introduction-to-Data-Science-in-Python/HEAD/Assignment_3/assets/Energy Indicators.xls


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Introduction-to-Data-Science-in-Python
2 | This repository includes course assignments of Introduction to Data Science in Python on coursera by university of michigan
3 | 


--------------------------------------------------------------------------------
/Assignment_1/assets/grades.txt:
--------------------------------------------------------------------------------
 1 | Ronald Mayr: A
 2 | Bell Kassulke: B
 3 | Jacqueline Rupp: A 
 4 | Alexander Zeller: C
 5 | Valentina Denk: C 
 6 | Simon Loidl: B 
 7 | Elias Jovanovic: B 
 8 | Stefanie Weninger: A 
 9 | Fabian Peer: C 
10 | Hakim Botros: B
11 | Emilie Lorentsen: B
12 | Herman Karlsen: C
13 | Nathalie Delacruz: C
14 | Casey Hartman: C
15 | Lily Walker : A
16 | Gerard Wang: C
17 | Tony Mcdowell: C
18 | Jake Wood: B
19 | Fatemeh Akhtar: B
20 | Kim Weston: B
21 | Nicholas Beatty: A
22 | Kirsten Williams: C
23 | Vaishali Surana: C
24 | Coby Mccormack: C
25 | Yasmin Dar: B
26 | Romy Donnelly: A
27 | Viswamitra Upandhye: B
28 | Kendrick Hilpert: A
29 | Killian Kaufman: B
30 | Elwood Page: B
31 | Mukti Patel: A
32 | Emily Lesch: C
33 | Elodie Booker: B
34 | Jedd Kim: A
35 | Annabel Davies: A
36 | Adnan Chen: B
37 | Jonathan Berg: C
38 | Hank Spinka: B
39 | Agnes Schneider: C
40 | Kimberly Green: A
41 | Lola-Rose Coates: C
42 | Rose Christiansen: C
43 | Shirley Hintz: C
44 | Hannah Bayer: B
45 | 


--------------------------------------------------------------------------------
/Assignment_4/assets/mlb.csv:
--------------------------------------------------------------------------------
  1 | team,W,L,W-L%,GB,year,League
  2 | Boston Red Sox,108,54,0.667,--,2018,MLB
  3 | New York Yankees,100,62,0.617,8.0,2018,MLB
  4 | Tampa Bay Rays,90,72,0.556,18.0,2018,MLB
  5 | Toronto Blue Jays,73,89,0.451,35.0,2018,MLB
  6 | Baltimore Orioles,47,115,0.29,61.0,2018,MLB
  7 | Cleveland Indians,91,71,0.562,--,2018,MLB
  8 | Minnesota Twins,78,84,0.48100000000000004,13.0,2018,MLB
  9 | Detroit Tigers,64,98,0.395,27.0,2018,MLB
 10 | Chicago White Sox,62,100,0.38299999999999995,29.0,2018,MLB
 11 | Kansas City Royals,58,104,0.358,33.0,2018,MLB
 12 | Houston Astros,103,59,0.636,--,2018,MLB
 13 | Oakland Athletics,97,65,0.599,6.0,2018,MLB
 14 | Seattle Mariners,89,73,0.5489999999999999,14.0,2018,MLB
 15 | Los Angeles Angels,80,82,0.494,23.0,2018,MLB
 16 | Texas Rangers,67,95,0.414,36.0,2018,MLB
 17 | Atlanta Braves,90,72,0.556,--,2018,MLB
 18 | Washington Nationals,82,80,0.506,8.0,2018,MLB
 19 | Philadelphia Phillies,80,82,0.494,10.0,2018,MLB
 20 | New York Mets,77,85,0.475,13.0,2018,MLB
 21 | Miami Marlins,63,98,0.391,26.5,2018,MLB
 22 | Milwaukee Brewers,96,67,0.589,--,2018,MLB
 23 | Chicago Cubs,95,68,0.583,1.0,2018,MLB
 24 | St. Louis Cardinals,88,74,0.5429999999999999,7.5,2018,MLB
 25 | Pittsburgh Pirates,82,79,0.509,13.0,2018,MLB
 26 | Cincinnati Reds,67,95,0.414,28.5,2018,MLB
 27 | Los Angeles Dodgers,92,71,0.564,--,2018,MLB
 28 | Colorado Rockies,91,72,0.5579999999999999,1.0,2018,MLB
 29 | Arizona Diamondbacks,82,80,0.506,9.5,2018,MLB
 30 | San Francisco Giants,73,89,0.451,18.5,2018,MLB
 31 | San Diego Padres,66,96,0.40700000000000003,25.5,2018,MLB
 32 | Boston Red Sox,93,69,0.574,--,2017,MLB
 33 | New York Yankees,91,71,0.562,2.0,2017,MLB
 34 | Tampa Bay Rays,80,82,0.494,13.0,2017,MLB
 35 | Toronto Blue Jays,76,86,0.469,17.0,2017,MLB
 36 | Baltimore Orioles,75,87,0.46299999999999997,18.0,2017,MLB
 37 | Cleveland Indians,102,60,0.63,--,2017,MLB
 38 | Minnesota Twins,85,77,0.525,17.0,2017,MLB
 39 | Kansas City Royals,80,82,0.494,22.0,2017,MLB
 40 | Chicago White Sox,67,95,0.414,35.0,2017,MLB
 41 | Detroit Tigers,64,98,0.395,38.0,2017,MLB
 42 | Houston Astros,101,61,0.623,--,2017,MLB
 43 | Los Angeles Angels,80,82,0.494,21.0,2017,MLB
 44 | Seattle Mariners,78,84,0.48100000000000004,23.0,2017,MLB
 45 | Texas Rangers,78,84,0.48100000000000004,23.0,2017,MLB
 46 | Oakland Athletics,75,87,0.46299999999999997,26.0,2017,MLB
 47 | Washington Nationals,97,65,0.599,--,2017,MLB
 48 | Miami Marlins,77,85,0.475,20.0,2017,MLB
 49 | Atlanta Braves,72,90,0.444,25.0,2017,MLB
 50 | New York Mets,70,92,0.43200000000000005,27.0,2017,MLB
 51 | Philadelphia Phillies,66,96,0.40700000000000003,31.0,2017,MLB
 52 | Chicago Cubs,92,70,0.568,--,2017,MLB
 53 | Milwaukee Brewers,86,76,0.531,6.0,2017,MLB
 54 | St. Louis Cardinals,83,79,0.512,9.0,2017,MLB
 55 | Pittsburgh Pirates,75,87,0.46299999999999997,17.0,2017,MLB
 56 | Cincinnati Reds,68,94,0.42,24.0,2017,MLB
 57 | Los Angeles Dodgers,104,58,0.642,--,2017,MLB
 58 | Arizona Diamondbacks,93,69,0.574,11.0,2017,MLB
 59 | Colorado Rockies,87,75,0.537,17.0,2017,MLB
 60 | San Diego Padres,71,91,0.43799999999999994,33.0,2017,MLB
 61 | San Francisco Giants,64,98,0.395,40.0,2017,MLB
 62 | Boston Red Sox,93,69,0.574,--,2016,MLB
 63 | Baltimore Orioles,89,73,0.5489999999999999,4.0,2016,MLB
 64 | Toronto Blue Jays,89,73,0.5489999999999999,4.0,2016,MLB
 65 | New York Yankees,84,78,0.519,9.0,2016,MLB
 66 | Tampa Bay Rays,68,94,0.42,25.0,2016,MLB
 67 | Cleveland Indians,94,67,0.584,--,2016,MLB
 68 | Detroit Tigers,86,75,0.534,8.0,2016,MLB
 69 | Kansas City Royals,81,81,0.5,13.5,2016,MLB
 70 | Chicago White Sox,78,84,0.48100000000000004,16.5,2016,MLB
 71 | Minnesota Twins,59,103,0.364,35.5,2016,MLB
 72 | Texas Rangers,95,67,0.586,--,2016,MLB
 73 | Seattle Mariners,86,76,0.531,9.0,2016,MLB
 74 | Houston Astros,84,78,0.519,11.0,2016,MLB
 75 | Los Angeles Angels,74,88,0.457,21.0,2016,MLB
 76 | Oakland Athletics,69,93,0.426,26.0,2016,MLB
 77 | Washington Nationals,95,67,0.586,--,2016,MLB
 78 | New York Mets,87,75,0.537,8.0,2016,MLB
 79 | Miami Marlins,79,82,0.491,15.5,2016,MLB
 80 | Philadelphia Phillies,71,91,0.43799999999999994,24.0,2016,MLB
 81 | Atlanta Braves,68,93,0.42200000000000004,26.5,2016,MLB
 82 | Chicago Cubs,103,58,0.64,--,2016,MLB
 83 | St. Louis Cardinals,86,76,0.531,17.5,2016,MLB
 84 | Pittsburgh Pirates,78,83,0.484,25.0,2016,MLB
 85 | Milwaukee Brewers,73,89,0.451,30.5,2016,MLB
 86 | Cincinnati Reds,68,94,0.42,35.5,2016,MLB
 87 | Los Angeles Dodgers,91,71,0.562,--,2016,MLB
 88 | San Francisco Giants,87,75,0.537,4.0,2016,MLB
 89 | Colorado Rockies,75,87,0.46299999999999997,16.0,2016,MLB
 90 | Arizona Diamondbacks,69,93,0.426,22.0,2016,MLB
 91 | San Diego Padres,68,94,0.42,23.0,2016,MLB
 92 | Toronto Blue Jays,93,69,0.574,--,2015,MLB
 93 | New York Yankees,87,75,0.537,6.0,2015,MLB
 94 | Baltimore Orioles,81,81,0.5,12.0,2015,MLB
 95 | Tampa Bay Rays,80,82,0.494,13.0,2015,MLB
 96 | Boston Red Sox,78,84,0.48100000000000004,15.0,2015,MLB
 97 | Kansas City Royals,95,67,0.586,--,2015,MLB
 98 | Minnesota Twins,83,79,0.512,12.0,2015,MLB
 99 | Cleveland Indians,81,80,0.503,13.5,2015,MLB
100 | Chicago White Sox,76,86,0.469,19.0,2015,MLB
101 | Detroit Tigers,74,87,0.46,20.5,2015,MLB
102 | Texas Rangers,88,74,0.5429999999999999,--,2015,MLB
103 | Houston Astros,86,76,0.531,2.0,2015,MLB
104 | Los Angeles Angels of Anaheim,85,77,0.525,3.0,2015,MLB
105 | Seattle Mariners,76,86,0.469,12.0,2015,MLB
106 | Oakland Athletics,68,94,0.42,20.0,2015,MLB
107 | New York Mets,90,72,0.556,--,2015,MLB
108 | Washington Nationals,83,79,0.512,7.0,2015,MLB
109 | Miami Marlins,71,91,0.43799999999999994,19.0,2015,MLB
110 | Atlanta Braves,67,95,0.414,23.0,2015,MLB
111 | Philadelphia Phillies,63,99,0.389,27.0,2015,MLB
112 | St. Louis Cardinals,100,62,0.617,--,2015,MLB
113 | Pittsburgh Pirates,98,64,0.605,2.0,2015,MLB
114 | Chicago Cubs,97,65,0.599,3.0,2015,MLB
115 | Milwaukee Brewers,68,94,0.42,32.0,2015,MLB
116 | Cincinnati Reds,64,98,0.395,36.0,2015,MLB
117 | Los Angeles Dodgers,92,70,0.568,--,2015,MLB
118 | San Francisco Giants,84,78,0.519,8.0,2015,MLB
119 | Arizona Diamondbacks,79,83,0.488,13.0,2015,MLB
120 | San Diego Padres,74,88,0.457,18.0,2015,MLB
121 | Colorado Rockies,68,94,0.42,24.0,2015,MLB
122 | Baltimore Orioles,96,66,0.593,--,2014,MLB
123 | New York Yankees,84,78,0.519,12.0,2014,MLB
124 | Toronto Blue Jays,83,79,0.512,13.0,2014,MLB
125 | Tampa Bay Rays,77,85,0.475,19.0,2014,MLB
126 | Boston Red Sox,71,91,0.43799999999999994,25.0,2014,MLB
127 | Detroit Tigers,90,72,0.556,--,2014,MLB
128 | Kansas City Royals,89,73,0.5489999999999999,1.0,2014,MLB
129 | Cleveland Indians,85,77,0.525,5.0,2014,MLB
130 | Chicago White Sox,73,89,0.451,17.0,2014,MLB
131 | Minnesota Twins,70,92,0.43200000000000005,20.0,2014,MLB
132 | Los Angeles Angels of Anaheim,98,64,0.605,--,2014,MLB
133 | Oakland Athletics,88,74,0.5429999999999999,10.0,2014,MLB
134 | Seattle Mariners,87,75,0.537,11.0,2014,MLB
135 | Houston Astros,70,92,0.43200000000000005,28.0,2014,MLB
136 | Texas Rangers,67,95,0.414,31.0,2014,MLB
137 | Washington Nationals,96,66,0.593,--,2014,MLB
138 | Atlanta Braves,79,83,0.488,17.0,2014,MLB
139 | New York Mets,79,83,0.488,17.0,2014,MLB
140 | Miami Marlins,77,85,0.475,19.0,2014,MLB
141 | Philadelphia Phillies,73,89,0.451,23.0,2014,MLB
142 | St. Louis Cardinals,90,72,0.556,--,2014,MLB
143 | Pittsburgh Pirates,88,74,0.5429999999999999,2.0,2014,MLB
144 | Milwaukee Brewers,82,80,0.506,8.0,2014,MLB
145 | Cincinnati Reds,76,86,0.469,14.0,2014,MLB
146 | Chicago Cubs,73,89,0.451,17.0,2014,MLB
147 | Los Angeles Dodgers,94,68,0.58,--,2014,MLB
148 | San Francisco Giants,88,74,0.5429999999999999,6.0,2014,MLB
149 | San Diego Padres,77,85,0.475,17.0,2014,MLB
150 | Colorado Rockies,66,96,0.40700000000000003,28.0,2014,MLB
151 | Arizona Diamondbacks,64,98,0.395,30.0,2014,MLB
152 | 


--------------------------------------------------------------------------------
/Assignment_1/assignment1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "deletable": false,
  7 |     "editable": false,
  8 |     "nbgrader": {
  9 |      "checksum": "a7460f3e3c42534125a0802936889559",
 10 |      "grade": false,
 11 |      "grade_id": "cell-fa48e7f1b94baa5b",
 12 |      "locked": true,
 13 |      "schema_version": 1,
 14 |      "solution": false
 15 |     }
 16 |    },
 17 |    "source": [
 18 |     "# Assignment 1\n",
 19 |     "For this assignment you are welcomed to use other regex resources such a regex \"cheat sheets\" you find on the web.\n",
 20 |     "\n"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {
 26 |     "deletable": false,
 27 |     "editable": false,
 28 |     "nbgrader": {
 29 |      "checksum": "d17f561e3c6c08092810b982d085f5be",
 30 |      "grade": false,
 31 |      "grade_id": "cell-d4da7eb9acee2a6d",
 32 |      "locked": true,
 33 |      "schema_version": 1,
 34 |      "solution": false
 35 |     }
 36 |    },
 37 |    "source": [
 38 |     "Before start working on the problems, here is a small example to help you understand how to write your own answers. In short, the solution should be written within the function body given, and the final result should be returned. Then the autograder will try to call the function and validate your returned result accordingly. "
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 1,
 44 |    "metadata": {
 45 |     "deletable": false,
 46 |     "editable": false,
 47 |     "nbgrader": {
 48 |      "checksum": "7eeb5e7d0f0e0137caed9f3b5cb925b1",
 49 |      "grade": false,
 50 |      "grade_id": "cell-4a96535829224b3f",
 51 |      "locked": true,
 52 |      "schema_version": 1,
 53 |      "solution": false
 54 |     }
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "def example_word_count():\n",
 59 |     "    # This example question requires counting words in the example_string below.\n",
 60 |     "    example_string = \"Amy is 5 years old\"\n",
 61 |     "    \n",
 62 |     "    # YOUR CODE HERE.\n",
 63 |     "    # You should write your solution here, and return your result, you can comment out or delete the\n",
 64 |     "    # NotImplementedError below.\n",
 65 |     "    result = example_string.split(\" \")\n",
 66 |     "    return len(result)\n",
 67 |     "\n",
 68 |     "    #raise NotImplementedError()"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "## Part A\n",
 76 |     "\n",
 77 |     "Find a list of all of the names in the following string using regex."
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 2,
 83 |    "metadata": {
 84 |     "deletable": false,
 85 |     "nbgrader": {
 86 |      "checksum": "29bc8c161c0e246c1e3ef4820cc164f7",
 87 |      "grade": false,
 88 |      "grade_id": "names",
 89 |      "locked": false,
 90 |      "schema_version": 1,
 91 |      "solution": true
 92 |     }
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "import re\n",
 97 |     "def names():\n",
 98 |     "    simple_string = \"\"\"Amy is 5 years old, and her sister Mary is 2 years old. \n",
 99 |     "    Ruth and Peter, their parents, have 3 kids.\"\"\"\n",
100 |     "    \n",
101 |     "    return re.findall('[A-Z][a-z]+', simple_string)\n",
102 |     "\n",
103 |     "    # YOUR CODE HERE\n",
104 |     "    # raise NotImplementedError()"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 3,
110 |    "metadata": {
111 |     "deletable": false,
112 |     "editable": false,
113 |     "nbgrader": {
114 |      "checksum": "ed5c09ac57f7d98130d5abc557f6d6c4",
115 |      "grade": true,
116 |      "grade_id": "correct_names",
117 |      "locked": false,
118 |      "points": 1,
119 |      "schema_version": 1,
120 |      "solution": false
121 |     }
122 |    },
123 |    "outputs": [],
124 |    "source": [
125 |     "assert len(names()) == 4, \"There are four names in the simple_string\"\n"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {
131 |     "deletable": false,
132 |     "editable": false,
133 |     "nbgrader": {
134 |      "checksum": "77b3d100c47e9e41d98f82dfeb7eba9c",
135 |      "grade": false,
136 |      "grade_id": "cell-ed64e3464ddd7ba7",
137 |      "locked": true,
138 |      "schema_version": 1,
139 |      "solution": false
140 |     }
141 |    },
142 |    "source": [
143 |     "## Part B\n",
144 |     "\n",
145 |     "The dataset file in [assets/grades.txt](assets/grades.txt) contains a line separated list of people with their grade in \n",
146 |     "a class. Create a regex to generate a list of just those students who received a B in the course."
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 4,
152 |    "metadata": {
153 |     "deletable": false,
154 |     "nbgrader": {
155 |      "checksum": "e977a1df674e9fa684e6d172aec92824",
156 |      "grade": false,
157 |      "grade_id": "grades",
158 |      "locked": false,
159 |      "schema_version": 1,
160 |      "solution": true
161 |     }
162 |    },
163 |    "outputs": [],
164 |    "source": [
165 |     "import re\n",
166 |     "def grades():\n",
167 |     "    with open (\"assets/grades.txt\", \"r\") as file:\n",
168 |     "        grades = file.read()\n",
169 |     "    return re.findall('([A-Z]\\S+ [A-Z]\\S+): B', grades)\n",
170 |     "    # YOUR CODE HERE\n",
171 |     "    #raise NotImplementedError()"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 5,
177 |    "metadata": {
178 |     "deletable": false,
179 |     "editable": false,
180 |     "nbgrader": {
181 |      "checksum": "e0bcc452d60fc45259e58d3116d25477",
182 |      "grade": true,
183 |      "grade_id": "correct_grades",
184 |      "locked": false,
185 |      "points": 1,
186 |      "schema_version": 1,
187 |      "solution": false
188 |     }
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "assert len(grades()) == 16\n"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {
198 |     "deletable": false,
199 |     "editable": false,
200 |     "nbgrader": {
201 |      "checksum": "36e3e2a3a3e29fa7b823d22476392320",
202 |      "grade": false,
203 |      "grade_id": "cell-e253518e37d33f0c",
204 |      "locked": true,
205 |      "schema_version": 1,
206 |      "solution": false
207 |     }
208 |    },
209 |    "source": [
210 |     "## Part C\n",
211 |     "\n",
212 |     "Consider the standard web log file in [assets/logdata.txt](assets/logdata.txt). This file records the access a user makes when visiting a web page (like this one!). Each line of the log has the following items:\n",
213 |     "* a host (e.g., '146.204.224.152') \n",
214 |     "* a user_name (e.g., 'feest6811' **note: sometimes the user name is missing! In this case, use '-' as the value for the username.**)\n",
215 |     "* the time a request was made (e.g., '21/Jun/2019:15:45:24 -0700')\n",
216 |     "* the post request type (e.g., 'POST /incentivize HTTP/1.1' **note: not everything is a POST!**)\n",
217 |     "\n",
218 |     "Your task is to convert this into a list of dictionaries, where each dictionary looks like the following:\n",
219 |     "```\n",
220 |     "example_dict = {\"host\":\"146.204.224.152\", \n",
221 |     "                \"user_name\":\"feest6811\", \n",
222 |     "                \"time\":\"21/Jun/2019:15:45:24 -0700\",\n",
223 |     "                \"request\":\"POST /incentivize HTTP/1.1\"}\n",
224 |     "```"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": 6,
230 |    "metadata": {
231 |     "deletable": false,
232 |     "nbgrader": {
233 |      "checksum": "c04017e59e48b2f4c77bf425ed84b356",
234 |      "grade": false,
235 |      "grade_id": "logs",
236 |      "locked": false,
237 |      "schema_version": 1,
238 |      "solution": true
239 |     }
240 |    },
241 |    "outputs": [],
242 |    "source": [
243 |     "import re\n",
244 |     "def logs():\n",
245 |     "    with open(\"assets/logdata.txt\", \"r\") as file:\n",
246 |     "        logdata = file.read()\n",
247 |     "    req = re.findall('(\\d+[.]\\d+[.]\\d+[.]\\d+) - ([a-z-]\\S*) [\\[](\\S+ -0700)\\] \"([A-Z]\\S+ \\/\\S+ HTTP\\/[0-9.]+)',logdata)\n",
248 |     "    \n",
249 |     "    l = []\n",
250 |     "    for i in req:\n",
251 |     "        l.append({'host': i[0],'user_name': i[1],'time': i[2],'request': i[3]})\n",
252 |     "    return l\n",
253 |     "    # YOUR CODE HERE\n",
254 |     "    #raise NotImplementedError()"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": 7,
260 |    "metadata": {
261 |     "deletable": false,
262 |     "editable": false,
263 |     "nbgrader": {
264 |      "checksum": "1fd5f2cca190d37c667fb189352540d3",
265 |      "grade": true,
266 |      "grade_id": "cell-correct_logs",
267 |      "locked": true,
268 |      "points": 1,
269 |      "schema_version": 1,
270 |      "solution": false
271 |     }
272 |    },
273 |    "outputs": [],
274 |    "source": [
275 |     "assert len(logs()) == 979\n",
276 |     "\n",
277 |     "one_item={'host': '146.204.224.152',\n",
278 |     "  'user_name': 'feest6811',\n",
279 |     "  'time': '21/Jun/2019:15:45:24 -0700',\n",
280 |     "  'request': 'POST /incentivize HTTP/1.1'}\n",
281 |     "assert one_item in logs(), \"Sorry, this item should be in the log results, check your formating\"\n"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": null,
287 |    "metadata": {},
288 |    "outputs": [],
289 |    "source": []
290 |   }
291 |  ],
292 |  "metadata": {
293 |   "coursera": {
294 |    "schema_names": [
295 |     "mooc_adswpy_1_v2_assignment1"
296 |    ]
297 |   },
298 |   "kernelspec": {
299 |    "display_name": "Python 3",
300 |    "language": "python",
301 |    "name": "python3"
302 |   },
303 |   "language_info": {
304 |    "codemirror_mode": {
305 |     "name": "ipython",
306 |     "version": 3
307 |    },
308 |    "file_extension": ".py",
309 |    "mimetype": "text/x-python",
310 |    "name": "python",
311 |    "nbconvert_exporter": "python",
312 |    "pygments_lexer": "ipython3",
313 |    "version": "3.7.6"
314 |   }
315 |  },
316 |  "nbformat": 4,
317 |  "nbformat_minor": 4
318 | }
319 | 


--------------------------------------------------------------------------------
/Assignment_1/.ipynb_checkpoints/assignment1-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "deletable": false,
  7 |     "editable": false,
  8 |     "nbgrader": {
  9 |      "checksum": "a7460f3e3c42534125a0802936889559",
 10 |      "grade": false,
 11 |      "grade_id": "cell-fa48e7f1b94baa5b",
 12 |      "locked": true,
 13 |      "schema_version": 1,
 14 |      "solution": false
 15 |     }
 16 |    },
 17 |    "source": [
 18 |     "# Assignment 1\n",
 19 |     "For this assignment you are welcomed to use other regex resources such a regex \"cheat sheets\" you find on the web.\n",
 20 |     "\n"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {
 26 |     "deletable": false,
 27 |     "editable": false,
 28 |     "nbgrader": {
 29 |      "checksum": "d17f561e3c6c08092810b982d085f5be",
 30 |      "grade": false,
 31 |      "grade_id": "cell-d4da7eb9acee2a6d",
 32 |      "locked": true,
 33 |      "schema_version": 1,
 34 |      "solution": false
 35 |     }
 36 |    },
 37 |    "source": [
 38 |     "Before start working on the problems, here is a small example to help you understand how to write your own answers. In short, the solution should be written within the function body given, and the final result should be returned. Then the autograder will try to call the function and validate your returned result accordingly. "
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 1,
 44 |    "metadata": {
 45 |     "deletable": false,
 46 |     "editable": false,
 47 |     "nbgrader": {
 48 |      "checksum": "7eeb5e7d0f0e0137caed9f3b5cb925b1",
 49 |      "grade": false,
 50 |      "grade_id": "cell-4a96535829224b3f",
 51 |      "locked": true,
 52 |      "schema_version": 1,
 53 |      "solution": false
 54 |     }
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "def example_word_count():\n",
 59 |     "    # This example question requires counting words in the example_string below.\n",
 60 |     "    example_string = \"Amy is 5 years old\"\n",
 61 |     "    \n",
 62 |     "    # YOUR CODE HERE.\n",
 63 |     "    # You should write your solution here, and return your result, you can comment out or delete the\n",
 64 |     "    # NotImplementedError below.\n",
 65 |     "    result = example_string.split(\" \")\n",
 66 |     "    return len(result)\n",
 67 |     "\n",
 68 |     "    #raise NotImplementedError()"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "## Part A\n",
 76 |     "\n",
 77 |     "Find a list of all of the names in the following string using regex."
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 2,
 83 |    "metadata": {
 84 |     "deletable": false,
 85 |     "nbgrader": {
 86 |      "checksum": "29bc8c161c0e246c1e3ef4820cc164f7",
 87 |      "grade": false,
 88 |      "grade_id": "names",
 89 |      "locked": false,
 90 |      "schema_version": 1,
 91 |      "solution": true
 92 |     }
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "import re\n",
 97 |     "def names():\n",
 98 |     "    simple_string = \"\"\"Amy is 5 years old, and her sister Mary is 2 years old. \n",
 99 |     "    Ruth and Peter, their parents, have 3 kids.\"\"\"\n",
100 |     "    \n",
101 |     "    return re.findall('[A-Z][a-z]+', simple_string)\n",
102 |     "\n",
103 |     "    # YOUR CODE HERE\n",
104 |     "    # raise NotImplementedError()"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 3,
110 |    "metadata": {
111 |     "deletable": false,
112 |     "editable": false,
113 |     "nbgrader": {
114 |      "checksum": "ed5c09ac57f7d98130d5abc557f6d6c4",
115 |      "grade": true,
116 |      "grade_id": "correct_names",
117 |      "locked": false,
118 |      "points": 1,
119 |      "schema_version": 1,
120 |      "solution": false
121 |     }
122 |    },
123 |    "outputs": [],
124 |    "source": [
125 |     "assert len(names()) == 4, \"There are four names in the simple_string\"\n"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {
131 |     "deletable": false,
132 |     "editable": false,
133 |     "nbgrader": {
134 |      "checksum": "77b3d100c47e9e41d98f82dfeb7eba9c",
135 |      "grade": false,
136 |      "grade_id": "cell-ed64e3464ddd7ba7",
137 |      "locked": true,
138 |      "schema_version": 1,
139 |      "solution": false
140 |     }
141 |    },
142 |    "source": [
143 |     "## Part B\n",
144 |     "\n",
145 |     "The dataset file in [assets/grades.txt](assets/grades.txt) contains a line separated list of people with their grade in \n",
146 |     "a class. Create a regex to generate a list of just those students who received a B in the course."
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 4,
152 |    "metadata": {
153 |     "deletable": false,
154 |     "nbgrader": {
155 |      "checksum": "e977a1df674e9fa684e6d172aec92824",
156 |      "grade": false,
157 |      "grade_id": "grades",
158 |      "locked": false,
159 |      "schema_version": 1,
160 |      "solution": true
161 |     }
162 |    },
163 |    "outputs": [],
164 |    "source": [
165 |     "import re\n",
166 |     "def grades():\n",
167 |     "    with open (\"assets/grades.txt\", \"r\") as file:\n",
168 |     "        grades = file.read()\n",
169 |     "    return re.findall('([A-Z]\\S+ [A-Z]\\S+): B', grades)\n",
170 |     "    # YOUR CODE HERE\n",
171 |     "    #raise NotImplementedError()"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 5,
177 |    "metadata": {
178 |     "deletable": false,
179 |     "editable": false,
180 |     "nbgrader": {
181 |      "checksum": "e0bcc452d60fc45259e58d3116d25477",
182 |      "grade": true,
183 |      "grade_id": "correct_grades",
184 |      "locked": false,
185 |      "points": 1,
186 |      "schema_version": 1,
187 |      "solution": false
188 |     }
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "assert len(grades()) == 16\n"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {
198 |     "deletable": false,
199 |     "editable": false,
200 |     "nbgrader": {
201 |      "checksum": "36e3e2a3a3e29fa7b823d22476392320",
202 |      "grade": false,
203 |      "grade_id": "cell-e253518e37d33f0c",
204 |      "locked": true,
205 |      "schema_version": 1,
206 |      "solution": false
207 |     }
208 |    },
209 |    "source": [
210 |     "## Part C\n",
211 |     "\n",
212 |     "Consider the standard web log file in [assets/logdata.txt](assets/logdata.txt). This file records the access a user makes when visiting a web page (like this one!). Each line of the log has the following items:\n",
213 |     "* a host (e.g., '146.204.224.152') \n",
214 |     "* a user_name (e.g., 'feest6811' **note: sometimes the user name is missing! In this case, use '-' as the value for the username.**)\n",
215 |     "* the time a request was made (e.g., '21/Jun/2019:15:45:24 -0700')\n",
216 |     "* the post request type (e.g., 'POST /incentivize HTTP/1.1' **note: not everything is a POST!**)\n",
217 |     "\n",
218 |     "Your task is to convert this into a list of dictionaries, where each dictionary looks like the following:\n",
219 |     "```\n",
220 |     "example_dict = {\"host\":\"146.204.224.152\", \n",
221 |     "                \"user_name\":\"feest6811\", \n",
222 |     "                \"time\":\"21/Jun/2019:15:45:24 -0700\",\n",
223 |     "                \"request\":\"POST /incentivize HTTP/1.1\"}\n",
224 |     "```"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": 6,
230 |    "metadata": {
231 |     "deletable": false,
232 |     "nbgrader": {
233 |      "checksum": "c04017e59e48b2f4c77bf425ed84b356",
234 |      "grade": false,
235 |      "grade_id": "logs",
236 |      "locked": false,
237 |      "schema_version": 1,
238 |      "solution": true
239 |     }
240 |    },
241 |    "outputs": [],
242 |    "source": [
243 |     "import re\n",
244 |     "def logs():\n",
245 |     "    with open(\"assets/logdata.txt\", \"r\") as file:\n",
246 |     "        logdata = file.read()\n",
247 |     "    req = re.findall('(\\d+[.]\\d+[.]\\d+[.]\\d+) - ([a-z-]\\S*) [\\[](\\S+ -0700)\\] \"([A-Z]\\S+ \\/\\S+ HTTP\\/[0-9.]+)',logdata)\n",
248 |     "    \n",
249 |     "    l = []\n",
250 |     "    for i in req:\n",
251 |     "        l.append({'host': i[0],'user_name': i[1],'time': i[2],'request': i[3]})\n",
252 |     "    return l\n",
253 |     "    # YOUR CODE HERE\n",
254 |     "    #raise NotImplementedError()"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": 7,
260 |    "metadata": {
261 |     "deletable": false,
262 |     "editable": false,
263 |     "nbgrader": {
264 |      "checksum": "1fd5f2cca190d37c667fb189352540d3",
265 |      "grade": true,
266 |      "grade_id": "cell-correct_logs",
267 |      "locked": true,
268 |      "points": 1,
269 |      "schema_version": 1,
270 |      "solution": false
271 |     }
272 |    },
273 |    "outputs": [],
274 |    "source": [
275 |     "assert len(logs()) == 979\n",
276 |     "\n",
277 |     "one_item={'host': '146.204.224.152',\n",
278 |     "  'user_name': 'feest6811',\n",
279 |     "  'time': '21/Jun/2019:15:45:24 -0700',\n",
280 |     "  'request': 'POST /incentivize HTTP/1.1'}\n",
281 |     "assert one_item in logs(), \"Sorry, this item should be in the log results, check your formating\"\n"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": null,
287 |    "metadata": {},
288 |    "outputs": [],
289 |    "source": []
290 |   }
291 |  ],
292 |  "metadata": {
293 |   "coursera": {
294 |    "schema_names": [
295 |     "mooc_adswpy_1_v2_assignment1"
296 |    ]
297 |   },
298 |   "kernelspec": {
299 |    "display_name": "Python 3",
300 |    "language": "python",
301 |    "name": "python3"
302 |   },
303 |   "language_info": {
304 |    "codemirror_mode": {
305 |     "name": "ipython",
306 |     "version": 3
307 |    },
308 |    "file_extension": ".py",
309 |    "mimetype": "text/x-python",
310 |    "name": "python",
311 |    "nbconvert_exporter": "python",
312 |    "pygments_lexer": "ipython3",
313 |    "version": "3.7.6"
314 |   }
315 |  },
316 |  "nbformat": 4,
317 |  "nbformat_minor": 4
318 | }
319 | 


--------------------------------------------------------------------------------
/Assignment_4/assets/nba.csv:
--------------------------------------------------------------------------------
  1 | team,W,L,W/L%,GB,PS/G,PA/G,SRS,year,League
  2 | Toronto Raptors* (1),59,23,0.72,—,111.7,103.9,7.29,2018,NBA
  3 | Boston Celtics* (2),55,27,0.6709999999999999,4.0,104.0,100.4,3.23,2018,NBA
  4 | Philadelphia 76ers* (3),52,30,0.634,7.0,109.8,105.3,4.3,2018,NBA
  5 | Cleveland Cavaliers* (4),50,32,0.61,9.0,110.9,109.9,0.59,2018,NBA
  6 | Indiana Pacers* (5),48,34,0.585,11.0,105.6,104.2,1.18,2018,NBA
  7 | Miami Heat* (6),44,38,0.537,15.0,103.4,102.9,0.15,2018,NBA
  8 | Milwaukee Bucks* (7),44,38,0.537,15.0,106.5,106.8,-0.45,2018,NBA
  9 | Washington Wizards* (8),43,39,0.524,16.0,106.6,106.0,0.53,2018,NBA
 10 | Detroit Pistons (9),39,43,0.47600000000000003,20.0,103.8,103.9,-0.26,2018,NBA
 11 | Charlotte Hornets (10),36,46,0.439,23.0,108.2,108.0,0.07,2018,NBA
 12 | New York Knicks (11),29,53,0.354,30.0,104.5,108.0,-3.53,2018,NBA
 13 | Brooklyn Nets (12),28,54,0.341,31.0,106.6,110.3,-3.67,2018,NBA
 14 | Chicago Bulls (13),27,55,0.32899999999999996,32.0,102.9,110.0,-6.84,2018,NBA
 15 | Orlando Magic (14),25,57,0.305,34.0,103.4,108.2,-4.92,2018,NBA
 16 | Atlanta Hawks (15),24,58,0.293,35.0,103.4,108.8,-5.3,2018,NBA
 17 | Houston Rockets* (1),65,17,0.7929999999999999,—,112.4,103.9,8.21,2018,NBA
 18 | Golden State Warriors* (2),58,24,0.7070000000000001,7.0,113.5,107.5,5.79,2018,NBA
 19 | Portland Trail Blazers* (3),49,33,0.598,16.0,105.6,103.0,2.6,2018,NBA
 20 | Oklahoma City Thunder* (4),48,34,0.585,17.0,107.9,104.4,3.42,2018,NBA
 21 | Utah Jazz* (5),48,34,0.585,17.0,104.1,99.8,4.47,2018,NBA
 22 | New Orleans Pelicans* (6),48,34,0.585,17.0,111.7,110.4,1.48,2018,NBA
 23 | San Antonio Spurs* (7),47,35,0.573,18.0,102.7,99.8,2.89,2018,NBA
 24 | Minnesota Timberwolves* (8),47,35,0.573,18.0,109.5,107.3,2.35,2018,NBA
 25 | Denver Nuggets (9),46,36,0.561,19.0,110.0,108.5,1.57,2018,NBA
 26 | Los Angeles Clippers (10),42,40,0.512,23.0,109.0,109.0,0.15,2018,NBA
 27 | Los Angeles Lakers (11),35,47,0.42700000000000005,30.0,108.1,109.6,-1.44,2018,NBA
 28 | Sacramento Kings (12),27,55,0.32899999999999996,38.0,98.8,105.8,-6.6,2018,NBA
 29 | Dallas Mavericks (13),24,58,0.293,41.0,102.3,105.4,-2.7,2018,NBA
 30 | Memphis Grizzlies (14),22,60,0.268,43.0,99.3,105.5,-5.81,2018,NBA
 31 | Phoenix Suns (15),21,61,0.256,44.0,103.9,113.3,-8.8,2018,NBA
 32 | Boston Celtics* (1),53,29,0.6459999999999999,—,108.0,105.4,2.25,2017,NBA
 33 | Cleveland Cavaliers* (2),51,31,0.622,2.0,110.3,107.2,2.87,2017,NBA
 34 | Toronto Raptors* (3),51,31,0.622,2.0,106.9,102.6,3.65,2017,NBA
 35 | Washington Wizards* (4),49,33,0.598,4.0,109.2,107.4,1.36,2017,NBA
 36 | Atlanta Hawks* (5),43,39,0.524,10.0,103.2,104.0,-1.23,2017,NBA
 37 | Milwaukee Bucks* (6),42,40,0.512,11.0,103.6,103.8,-0.45,2017,NBA
 38 | Indiana Pacers* (7),42,40,0.512,11.0,105.1,105.3,-0.64,2017,NBA
 39 | Chicago Bulls* (8),41,41,0.5,12.0,102.9,102.4,0.03,2017,NBA
 40 | Miami Heat (9),41,41,0.5,12.0,103.2,102.1,0.77,2017,NBA
 41 | Detroit Pistons (10),37,45,0.451,16.0,101.3,102.5,-1.29,2017,NBA
 42 | Charlotte Hornets (11),36,46,0.439,17.0,104.9,104.7,-0.07,2017,NBA
 43 | New York Knicks (12),31,51,0.37799999999999995,22.0,104.3,108.0,-3.87,2017,NBA
 44 | Orlando Magic (13),29,53,0.354,24.0,101.1,107.6,-6.61,2017,NBA
 45 | Philadelphia 76ers (14),28,54,0.341,25.0,102.4,108.1,-5.83,2017,NBA
 46 | Brooklyn Nets (15),20,62,0.244,33.0,105.8,112.5,-6.74,2017,NBA
 47 | Golden State Warriors* (1),67,15,0.8170000000000001,—,115.9,104.3,11.35,2017,NBA
 48 | San Antonio Spurs* (2),61,21,0.7440000000000001,6.0,105.3,98.1,7.13,2017,NBA
 49 | Houston Rockets* (3),55,27,0.6709999999999999,12.0,115.3,109.6,5.84,2017,NBA
 50 | Los Angeles Clippers* (4),51,31,0.622,16.0,108.7,104.4,4.42,2017,NBA
 51 | Utah Jazz* (5),51,31,0.622,16.0,100.7,96.8,4.0,2017,NBA
 52 | Oklahoma City Thunder* (6),47,35,0.573,20.0,106.6,105.8,1.14,2017,NBA
 53 | Memphis Grizzlies* (7),43,39,0.524,24.0,100.5,100.0,0.96,2017,NBA
 54 | Portland Trail Blazers* (8),41,41,0.5,26.0,107.9,108.5,-0.23,2017,NBA
 55 | Denver Nuggets (9),40,42,0.488,27.0,111.7,111.2,0.7,2017,NBA
 56 | New Orleans Pelicans (10),34,48,0.415,33.0,104.3,106.4,-1.69,2017,NBA
 57 | Dallas Mavericks (11),33,49,0.402,34.0,97.9,100.8,-2.53,2017,NBA
 58 | Sacramento Kings (12),32,50,0.39,35.0,102.8,106.7,-3.29,2017,NBA
 59 | Minnesota Timberwolves (13),31,51,0.37799999999999995,36.0,105.6,106.7,-0.64,2017,NBA
 60 | Los Angeles Lakers (14),26,56,0.317,41.0,104.6,111.5,-6.29,2017,NBA
 61 | Phoenix Suns (15),24,58,0.293,43.0,107.7,113.3,-5.14,2017,NBA
 62 | Cleveland Cavaliers* (1),57,25,0.695,—,104.3,98.3,5.45,2016,NBA
 63 | Toronto Raptors* (2),56,26,0.6829999999999999,1.0,102.7,98.2,4.08,2016,NBA
 64 | Miami Heat* (3),48,34,0.585,9.0,100.0,98.4,1.5,2016,NBA
 65 | Atlanta Hawks* (4),48,34,0.585,9.0,102.8,99.2,3.49,2016,NBA
 66 | Boston Celtics* (5),48,34,0.585,9.0,105.7,102.5,2.84,2016,NBA
 67 | Charlotte Hornets* (6),48,34,0.585,9.0,103.4,100.7,2.36,2016,NBA
 68 | Indiana Pacers* (7),45,37,0.5489999999999999,12.0,102.2,100.5,1.62,2016,NBA
 69 | Detroit Pistons* (8),44,38,0.537,13.0,102.0,101.4,0.43,2016,NBA
 70 | Chicago Bulls (9),42,40,0.512,15.0,101.6,103.1,-1.46,2016,NBA
 71 | Washington Wizards (10),41,41,0.5,16.0,104.1,104.6,-0.5,2016,NBA
 72 | Orlando Magic (11),35,47,0.42700000000000005,22.0,102.1,103.7,-1.68,2016,NBA
 73 | Milwaukee Bucks (12),33,49,0.402,24.0,99.0,103.2,-3.98,2016,NBA
 74 | New York Knicks (13),32,50,0.39,25.0,98.4,101.1,-2.74,2016,NBA
 75 | Brooklyn Nets (14),21,61,0.256,36.0,98.6,106.0,-7.12,2016,NBA
 76 | Philadelphia 76ers (15),10,72,0.122,47.0,97.4,107.6,-9.92,2016,NBA
 77 | Golden State Warriors* (1),73,9,0.89,—,114.9,104.1,10.38,2016,NBA
 78 | San Antonio Spurs* (2),67,15,0.8170000000000001,6.0,103.5,92.9,10.28,2016,NBA
 79 | Oklahoma City Thunder* (3),55,27,0.6709999999999999,18.0,110.2,102.9,7.09,2016,NBA
 80 | Los Angeles Clippers* (4),53,29,0.6459999999999999,20.0,104.5,100.2,4.13,2016,NBA
 81 | Portland Trail Blazers* (5),44,38,0.537,29.0,105.1,104.3,0.98,2016,NBA
 82 | Dallas Mavericks* (6),42,40,0.512,31.0,102.3,102.6,-0.02,2016,NBA
 83 | Memphis Grizzlies* (7),42,40,0.512,31.0,99.1,101.3,-2.14,2016,NBA
 84 | Houston Rockets* (8),41,41,0.5,32.0,106.5,106.4,0.34,2016,NBA
 85 | Utah Jazz (9),40,42,0.488,33.0,97.7,95.9,1.84,2016,NBA
 86 | Sacramento Kings (10),33,49,0.402,40.0,106.6,109.1,-2.32,2016,NBA
 87 | Denver Nuggets (10),33,49,0.402,40.0,101.9,105.0,-2.81,2016,NBA
 88 | New Orleans Pelicans (12),30,52,0.366,43.0,102.7,106.5,-3.56,2016,NBA
 89 | Minnesota Timberwolves (13),29,53,0.354,44.0,102.4,106.0,-3.38,2016,NBA
 90 | Phoenix Suns (14),23,59,0.28,50.0,100.9,107.5,-6.32,2016,NBA
 91 | Los Angeles Lakers (15),17,65,0.207,56.0,97.3,106.9,-8.92,2016,NBA
 92 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2015,NBA
 93 | Toronto Raptors* (4),49,33,.598,—,104.0,100.9,2.45,2015,NBA
 94 | Boston Celtics* (7),40,42,.488,9.0,101.4,101.2,-0.40,2015,NBA
 95 | Brooklyn Nets* (8),38,44,.463,11.0,98.0,100.9,-3.13,2015,NBA
 96 | Philadelphia 76ers (14),18,64,.220,31.0,92.0,101.0,-9.04,2015,NBA
 97 | New York Knicks (15),17,65,.207,32.0,91.9,101.2,-9.50,2015,NBA
 98 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2015,NBA
 99 | Cleveland Cavaliers* (2),53,29,.646,—,103.1,98.7,4.08,2015,NBA
100 | Chicago Bulls* (3),50,32,.610,3.0,100.8,97.8,2.54,2015,NBA
101 | Milwaukee Bucks* (6),41,41,.500,12.0,97.8,97.4,-0.09,2015,NBA
102 | Indiana Pacers (9),38,44,.463,15.0,97.3,97.0,-0.23,2015,NBA
103 | Detroit Pistons (12),32,50,.390,21.0,98.5,99.5,-1.39,2015,NBA
104 | Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,2015,NBA
105 | Atlanta Hawks* (1),60,22,.732,—,102.5,97.1,4.75,2015,NBA
106 | Washington Wizards* (5),46,36,.561,14.0,98.5,97.8,0.17,2015,NBA
107 | Miami Heat (10),37,45,.451,23.0,94.7,97.3,-2.92,2015,NBA
108 | Charlotte Hornets (11),33,49,.402,27.0,94.2,97.3,-3.44,2015,NBA
109 | Orlando Magic (13),25,57,.305,35.0,95.7,101.4,-5.87,2015,NBA
110 | Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,2015,NBA
111 | Portland Trail Blazers* (4),51,31,.622,—,102.8,98.6,4.41,2015,NBA
112 | Oklahoma City Thunder (9),45,37,.549,6.0,104.0,101.8,2.47,2015,NBA
113 | Utah Jazz (11),38,44,.463,13.0,95.1,94.9,0.71,2015,NBA
114 | Denver Nuggets (12),30,52,.366,21.0,101.5,105.0,-3.07,2015,NBA
115 | Minnesota Timberwolves (15),16,66,.195,35.0,97.8,106.5,-8.00,2015,NBA
116 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2015,NBA
117 | Golden State Warriors* (1),67,15,.817,—,110.0,99.9,10.01,2015,NBA
118 | Los Angeles Clippers* (2),56,26,.683,11.0,106.7,100.1,6.80,2015,NBA
119 | Phoenix Suns (10),39,43,.476,28.0,102.4,103.3,-0.38,2015,NBA
120 | Sacramento Kings (13),29,53,.354,38.0,101.3,105.0,-3.07,2015,NBA
121 | Los Angeles Lakers (14),21,61,.256,46.0,98.5,105.3,-6.17,2015,NBA
122 | Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,2015,NBA
123 | Houston Rockets* (2),56,26,.683,—,103.9,100.5,3.82,2015,NBA
124 | San Antonio Spurs* (5),55,27,.671,1.0,103.2,97.0,6.34,2015,NBA
125 | Memphis Grizzlies* (5),55,27,.671,1.0,98.3,95.1,3.62,2015,NBA
126 | Dallas Mavericks* (7),50,32,.610,6.0,105.2,102.3,3.36,2015,NBA
127 | New Orleans Pelicans* (8),45,37,.549,11.0,99.4,98.6,1.13,2015,NBA
128 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2014,NBA
129 | Toronto Raptors* (3),48,34,.585,—,101.3,98.0,2.55,2014,NBA
130 | Brooklyn Nets* (6),44,38,.537,4.0,98.5,99.5,-1.58,2014,NBA
131 | New York Knicks (9),37,45,.451,11.0,98.6,99.4,-1.40,2014,NBA
132 | Boston Celtics (12),25,57,.305,23.0,96.2,100.7,-4.97,2014,NBA
133 | Philadelphia 76ers (14),19,63,.232,29.0,99.5,109.9,-10.66,2014,NBA
134 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2014,NBA
135 | Indiana Pacers* (1),56,26,.683,—,96.7,92.3,3.63,2014,NBA
136 | Chicago Bulls* (4),48,34,.585,8.0,93.7,91.8,1.20,2014,NBA
137 | Cleveland Cavaliers (10),33,49,.402,23.0,98.2,101.5,-3.86,2014,NBA
138 | Detroit Pistons (11),29,53,.354,27.0,101.0,104.7,-4.13,2014,NBA
139 | Milwaukee Bucks (15),15,67,.183,41.0,95.5,103.7,-8.41,2014,NBA
140 | Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,2014,NBA
141 | Miami Heat* (2),54,28,.659,—,102.2,97.4,4.15,2014,NBA
142 | Washington Wizards* (5),44,38,.537,10.0,100.7,99.4,0.48,2014,NBA
143 | Charlotte Bobcats* (7),43,39,.524,11.0,96.9,97.1,-0.89,2014,NBA
144 | Atlanta Hawks* (8),38,44,.463,16.0,101.0,101.5,-0.88,2014,NBA
145 | Orlando Magic (13),23,59,.280,31.0,96.5,102.0,-5.88,2014,NBA
146 | Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,2014,NBA
147 | Oklahoma City Thunder* (2),59,23,.720,—,106.2,99.8,6.66,2014,NBA
148 | Portland Trail Blazers* (4),54,28,.659,5.0,106.7,102.8,4.44,2014,NBA
149 | Minnesota Timberwolves (10),40,42,.488,19.0,106.9,104.3,3.10,2014,NBA
150 | Denver Nuggets (11),36,46,.439,23.0,104.4,106.5,-1.40,2014,NBA
151 | Utah Jazz (15),25,57,.305,34.0,95.0,102.2,-6.27,2014,NBA
152 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2014,NBA
153 | Los Angeles Clippers* (3),57,25,.695,—,107.9,101.0,7.27,2014,NBA
154 | Golden State Warriors* (6),51,31,.622,6.0,104.3,99.5,5.15,2014,NBA
155 | Phoenix Suns (9),48,34,.585,9.0,105.2,102.6,3.02,2014,NBA
156 | Sacramento Kings (13),28,54,.341,29.0,100.5,103.4,-2.08,2014,NBA
157 | Los Angeles Lakers (14),27,55,.329,30.0,103.0,109.2,-5.33,2014,NBA
158 | Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,2014,NBA
159 | San Antonio Spurs* (1),62,20,.756,—,105.4,97.6,8.00,2014,NBA
160 | Houston Rockets* (4),54,28,.659,8.0,107.7,103.1,5.06,2014,NBA
161 | Memphis Grizzlies* (7),50,32,.610,12.0,96.1,94.6,2.18,2014,NBA
162 | Dallas Mavericks* (8),49,33,.598,13.0,104.8,102.4,2.91,2014,NBA
163 | New Orleans Pelicans (12),34,48,.415,28.0,99.7,102.4,-1.98,2014,NBA
164 | 


--------------------------------------------------------------------------------
/Assignment_2/assignment2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "deletable": false,
  7 |     "editable": false,
  8 |     "nbgrader": {
  9 |      "checksum": "5a8d839ee00398fa3bd3bc58ec642beb",
 10 |      "grade": false,
 11 |      "grade_id": "cell-a839e7b47494b4c3",
 12 |      "locked": true,
 13 |      "schema_version": 1,
 14 |      "solution": false
 15 |     }
 16 |    },
 17 |    "source": [
 18 |     "# Assignment 2\n",
 19 |     "For this assignment you'll be looking at 2017 data on immunizations from the CDC. Your datafile for this assignment is in [assets/NISPUF17.csv](assets/NISPUF17.csv). A data users guide for this, which you'll need to map the variables in the data to the questions being asked, is available at [assets/NIS-PUF17-DUG.pdf](assets/NIS-PUF17-DUG.pdf). **Note: you may have to go to your Jupyter tree (click on the Coursera image) and navigate to the assignment 2 assets folder to see this PDF file).**"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {
 25 |     "deletable": false,
 26 |     "editable": false,
 27 |     "hideCode": false,
 28 |     "hidePrompt": false,
 29 |     "nbgrader": {
 30 |      "checksum": "aaa5e730f40ba21c1bc94f864bad4742",
 31 |      "grade": false,
 32 |      "grade_id": "cell-58fc2e5938733f6a",
 33 |      "locked": true,
 34 |      "schema_version": 1,
 35 |      "solution": false
 36 |     }
 37 |    },
 38 |    "source": [
 39 |     "## Question 1\n",
 40 |     "Write a function called `proportion_of_education` which returns the proportion of children in the dataset who had a mother with the education levels equal to less than high school (<12), high school (12), more than high school but not a college graduate (>12) and college degree.\n",
 41 |     "\n",
 42 |     "*This function should return a dictionary in the form of (use the correct numbers, do not round numbers):* \n",
 43 |     "```\n",
 44 |     "    {\"less than high school\":0.2,\n",
 45 |     "    \"high school\":0.4,\n",
 46 |     "    \"more than high school but not college\":0.2,\n",
 47 |     "    \"college\":0.2}\n",
 48 |     "```\n"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 1,
 54 |    "metadata": {
 55 |     "deletable": false,
 56 |     "nbgrader": {
 57 |      "checksum": "0ac58deb3f5ac988c643e903cbee7f3a",
 58 |      "grade": false,
 59 |      "grade_id": "cell-eea16d020eb52ae7",
 60 |      "locked": false,
 61 |      "schema_version": 1,
 62 |      "solution": true
 63 |     }
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "def proportion_of_education():\n",
 68 |     "    # your code goes here\n",
 69 |     "    # YOUR CODE HERE\n",
 70 |     "    import pandas as pd\n",
 71 |     "    mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
 72 |     "    n = mb['EDUC1'].value_counts()/mb['EDUC1'].shape[0]\n",
 73 |     "    li = ['less than high school', 'high school', 'more than high school but not college', 'college']\n",
 74 |     "    di = {}\n",
 75 |     "    i = 0\n",
 76 |     "    for l in li:\n",
 77 |     "        di[l] = n[i+1] \n",
 78 |     "        i += 1\n",
 79 |     "    \n",
 80 |     "    return di\n",
 81 |     "    #raise NotImplementedError()"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 2,
 87 |    "metadata": {
 88 |     "deletable": false,
 89 |     "editable": false,
 90 |     "hideCode": false,
 91 |     "hidePrompt": false,
 92 |     "nbgrader": {
 93 |      "checksum": "ac5d91a24a7f72f66c25d242c3d24a50",
 94 |      "grade": true,
 95 |      "grade_id": "cell-c0eeef201366f51c",
 96 |      "locked": true,
 97 |      "points": 1,
 98 |      "schema_version": 1,
 99 |      "solution": false
100 |     }
101 |    },
102 |    "outputs": [],
103 |    "source": [
104 |     "assert type(proportion_of_education())==type({}), \"You must return a dictionary.\"\n",
105 |     "assert len(proportion_of_education()) == 4, \"You have not returned a dictionary with four items in it.\"\n",
106 |     "assert \"less than high school\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n",
107 |     "assert \"high school\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n",
108 |     "assert \"more than high school but not college\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n",
109 |     "assert \"college\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {
115 |     "deletable": false,
116 |     "editable": false,
117 |     "nbgrader": {
118 |      "checksum": "562b78b7b9b79580269be0a3bebf4b42",
119 |      "grade": false,
120 |      "grade_id": "cell-8fcbb64516283f52",
121 |      "locked": true,
122 |      "schema_version": 1,
123 |      "solution": false
124 |     }
125 |    },
126 |    "source": [
127 |     "## Question 2\n",
128 |     "\n",
129 |     "Let's explore the relationship between being fed breastmilk as a child and getting a seasonal influenza vaccine from a healthcare provider. Return a tuple of the average number of influenza vaccines for those children we know received breastmilk as a child and those who know did not.\n",
130 |     "\n",
131 |     "*This function should return a tuple in the form (use the correct numbers:*\n",
132 |     "```\n",
133 |     "(2.5, 0.1)\n",
134 |     "```"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 3,
140 |    "metadata": {
141 |     "deletable": false,
142 |     "nbgrader": {
143 |      "checksum": "a405d639063c4a6408365479f29c95c9",
144 |      "grade": false,
145 |      "grade_id": "cell-77f18c512324eabb",
146 |      "locked": false,
147 |      "schema_version": 1,
148 |      "solution": true
149 |     }
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "def average_influenza_doses():\n",
154 |     "    # YOUR CODE HERE\n",
155 |     "    \n",
156 |     "    import pandas as pd\n",
157 |     "    import numpy as np\n",
158 |     "    mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
159 |     "    gp1 = mb[mb['CBF_01'] == 1]\n",
160 |     "    gp2 = mb[mb['CBF_01'] == 2]\n",
161 |     "    \n",
162 |     "    return np.mean(gp1['P_NUMFLU']), np.mean(gp2['P_NUMFLU'])\n",
163 |     "    #raise NotImplementedError()"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 4,
169 |    "metadata": {
170 |     "deletable": false,
171 |     "editable": false,
172 |     "nbgrader": {
173 |      "checksum": "19be955e97fdf7162d43fbb7c2c40951",
174 |      "grade": true,
175 |      "grade_id": "cell-54a3ba6cff31caa7",
176 |      "locked": true,
177 |      "points": 1,
178 |      "schema_version": 1,
179 |      "solution": false
180 |     }
181 |    },
182 |    "outputs": [],
183 |    "source": [
184 |     "assert len(average_influenza_doses())==2, \"Return two values in a tuple, the first for yes and the second for no.\"\n"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {
190 |     "deletable": false,
191 |     "editable": false,
192 |     "nbgrader": {
193 |      "checksum": "e10e2163f5957a0c398ef4f0b76b4efe",
194 |      "grade": false,
195 |      "grade_id": "cell-f63377f3c97aa7c4",
196 |      "locked": true,
197 |      "schema_version": 1,
198 |      "solution": false
199 |     }
200 |    },
201 |    "source": [
202 |     "## Question 3\n",
203 |     "It would be interesting to see if there is any evidence of a link between vaccine effectiveness and sex of the child. Calculate the ratio of the number of children who contracted chickenpox but were vaccinated against it (at least one varicella dose) versus those who were vaccinated but did not contract chicken pox. Return results by sex. \n",
204 |     "\n",
205 |     "*This function should return a dictionary in the form of (use the correct numbers):* \n",
206 |     "```\n",
207 |     "    {\"male\":0.2,\n",
208 |     "    \"female\":0.4}\n",
209 |     "```\n",
210 |     "\n",
211 |     "Note: To aid in verification, the `chickenpox_by_sex()['female']` value the autograder is looking for starts with the digits `0.0077`."
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 5,
217 |    "metadata": {
218 |     "deletable": false,
219 |     "nbgrader": {
220 |      "checksum": "b4d1b58acae002bc73eb0b19f95bc4af",
221 |      "grade": false,
222 |      "grade_id": "cell-a0a9e6fe67698006",
223 |      "locked": false,
224 |      "schema_version": 1,
225 |      "solution": true
226 |     }
227 |    },
228 |    "outputs": [],
229 |    "source": [
230 |     "def chickenpox_by_sex():\n",
231 |     "    # YOUR CODE HERE\n",
232 |     "    import pandas as pd\n",
233 |     "    mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
234 |     "    \n",
235 |     "    v1m = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 1) & (mb['SEX'] == 1)]\n",
236 |     "    v1f = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 1) & (mb['SEX'] == 2)]\n",
237 |     "    v2m = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 2) & (mb['SEX'] == 1)]\n",
238 |     "    v2f = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 2) & (mb['SEX'] == 2)]\n",
239 |     "    \n",
240 |     "    rm = v1m.shape[0]/v2m.shape[0]\n",
241 |     "    rf = v1f.shape[0]/v2f.shape[0]\n",
242 |     "    r = [rm, rf]\n",
243 |     "    \n",
244 |     "    sex = ['male', 'female']\n",
245 |     "    d = {}\n",
246 |     "    i = 0\n",
247 |     "    for s in sex:\n",
248 |     "        d[s] = r[i]\n",
249 |     "        i+=1\n",
250 |     "    return d  \n",
251 |     "    #raise NotImplementedError()"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": 6,
257 |    "metadata": {
258 |     "deletable": false,
259 |     "editable": false,
260 |     "nbgrader": {
261 |      "checksum": "1b6a113a633c55699ae478a3a9ee9c33",
262 |      "grade": true,
263 |      "grade_id": "cell-c4f1714db100c865",
264 |      "locked": true,
265 |      "points": 1,
266 |      "schema_version": 1,
267 |      "solution": false
268 |     }
269 |    },
270 |    "outputs": [],
271 |    "source": [
272 |     "assert len(chickenpox_by_sex())==2, \"Return a dictionary with two items, the first for males and the second for females.\"\n"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "## Question 4\n",
280 |     "A correlation is a statistical relationship between two variables. If we wanted to know if vaccines work, we might look at the correlation between the use of the vaccine and whether it results in prevention of the infection or disease [1]. In this question, you are to see if there is a correlation between having had the chicken pox and the number of chickenpox vaccine doses given (varicella).\n",
281 |     "\n",
282 |     "Some notes on interpreting the answer. The `had_chickenpox_column` is either `1` (for yes) or `2` (for no), and the `num_chickenpox_vaccine_column` is the number of doses a child has been given of the varicella vaccine. A positive correlation (e.g., `corr > 0`) means that an increase in `had_chickenpox_column` (which means more no’s) would also increase the values of `num_chickenpox_vaccine_column` (which means more doses of vaccine). If there is a negative correlation (e.g., `corr < 0`), it indicates that having had chickenpox is related to an increase in the number of vaccine doses.\n",
283 |     "\n",
284 |     "Also, `pval` is the probability that we observe a correlation between `had_chickenpox_column` and `num_chickenpox_vaccine_column` which is greater than or equal to a particular value occurred by chance. A small `pval` means that the observed correlation is highly unlikely to occur by chance. In this case, `pval` should be very small (will end in `e-18` indicating a very small number).\n",
285 |     "\n",
286 |     "[1] This isn’t really the full picture, since we are not looking at when the dose was given. It’s possible that children had chickenpox and then their parents went to get them the vaccine. Does this dataset have the data we would need to investigate the timing of the dose?"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": 7,
292 |    "metadata": {
293 |     "deletable": false,
294 |     "nbgrader": {
295 |      "checksum": "3e645859949447913cd11d30eb33cb1e",
296 |      "grade": false,
297 |      "grade_id": "cell-8afff07f564cf79a",
298 |      "locked": false,
299 |      "schema_version": 1,
300 |      "solution": true
301 |     }
302 |    },
303 |    "outputs": [],
304 |    "source": [
305 |     "def corr_chickenpox():\n",
306 |     "    import scipy.stats as stats\n",
307 |     "    import numpy as np\n",
308 |     "    import pandas as pd\n",
309 |     "    \n",
310 |     "    mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
311 |     "    \n",
312 |     "    v1 = mb[(mb['P_NUMVRC'] >=0) & (mb['HAD_CPOX'] <= 2)]\n",
313 |     "    \n",
314 |     "    no_yes = v1['HAD_CPOX']\n",
315 |     "    \n",
316 |     "    est_vaccine = v1['P_NUMVRC']\n",
317 |     "    \n",
318 |     "    # this is just an example dataframe\n",
319 |     "    df=pd.DataFrame({\"had_chickenpox_column\":no_yes,\n",
320 |     "                   \"num_chickenpox_vaccine_column\":est_vaccine})\n",
321 |     "\n",
322 |     "    # here is some stub code to actually run the correlation\n",
323 |     "    corr, pval=stats.pearsonr(df[\"had_chickenpox_column\"],df[\"num_chickenpox_vaccine_column\"])\n",
324 |     "    \n",
325 |     "    # just return the correlation\n",
326 |     "    return corr\n",
327 |     "\n",
328 |     "    # YOUR CODE HERE\n",
329 |     "    #raise NotImplementedError()"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": 8,
335 |    "metadata": {
336 |     "deletable": false,
337 |     "editable": false,
338 |     "nbgrader": {
339 |      "checksum": "ac50ccb747b99f6bbcc76da017e66528",
340 |      "grade": true,
341 |      "grade_id": "cell-73408733533a29a5",
342 |      "locked": true,
343 |      "points": 1,
344 |      "schema_version": 1,
345 |      "solution": false
346 |     }
347 |    },
348 |    "outputs": [],
349 |    "source": [
350 |     "assert -1<=corr_chickenpox()<=1, \"You must return a float number between -1.0 and 1.0.\"\n"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": null,
356 |    "metadata": {},
357 |    "outputs": [],
358 |    "source": []
359 |   }
360 |  ],
361 |  "metadata": {
362 |   "anaconda-cloud": {},
363 |   "coursera": {
364 |    "schema_names": [
365 |     "mooc_adswpy_1_v2_assignment2"
366 |    ]
367 |   },
368 |   "hide_code_all_hidden": false,
369 |   "kernelspec": {
370 |    "display_name": "Python 3",
371 |    "language": "python",
372 |    "name": "python3"
373 |   },
374 |   "language_info": {
375 |    "codemirror_mode": {
376 |     "name": "ipython",
377 |     "version": 3
378 |    },
379 |    "file_extension": ".py",
380 |    "mimetype": "text/x-python",
381 |    "name": "python",
382 |    "nbconvert_exporter": "python",
383 |    "pygments_lexer": "ipython3",
384 |    "version": "3.7.6"
385 |   }
386 |  },
387 |  "nbformat": 4,
388 |  "nbformat_minor": 1
389 | }
390 | 


--------------------------------------------------------------------------------
/Assignment_2/.ipynb_checkpoints/assignment2-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "deletable": false,
  7 |     "editable": false,
  8 |     "nbgrader": {
  9 |      "checksum": "5a8d839ee00398fa3bd3bc58ec642beb",
 10 |      "grade": false,
 11 |      "grade_id": "cell-a839e7b47494b4c3",
 12 |      "locked": true,
 13 |      "schema_version": 1,
 14 |      "solution": false
 15 |     }
 16 |    },
 17 |    "source": [
 18 |     "# Assignment 2\n",
 19 |     "For this assignment you'll be looking at 2017 data on immunizations from the CDC. Your datafile for this assignment is in [assets/NISPUF17.csv](assets/NISPUF17.csv). A data users guide for this, which you'll need to map the variables in the data to the questions being asked, is available at [assets/NIS-PUF17-DUG.pdf](assets/NIS-PUF17-DUG.pdf). **Note: you may have to go to your Jupyter tree (click on the Coursera image) and navigate to the assignment 2 assets folder to see this PDF file).**"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {
 25 |     "deletable": false,
 26 |     "editable": false,
 27 |     "hideCode": false,
 28 |     "hidePrompt": false,
 29 |     "nbgrader": {
 30 |      "checksum": "aaa5e730f40ba21c1bc94f864bad4742",
 31 |      "grade": false,
 32 |      "grade_id": "cell-58fc2e5938733f6a",
 33 |      "locked": true,
 34 |      "schema_version": 1,
 35 |      "solution": false
 36 |     }
 37 |    },
 38 |    "source": [
 39 |     "## Question 1\n",
 40 |     "Write a function called `proportion_of_education` which returns the proportion of children in the dataset who had a mother with the education levels equal to less than high school (<12), high school (12), more than high school but not a college graduate (>12) and college degree.\n",
 41 |     "\n",
 42 |     "*This function should return a dictionary in the form of (use the correct numbers, do not round numbers):* \n",
 43 |     "```\n",
 44 |     "    {\"less than high school\":0.2,\n",
 45 |     "    \"high school\":0.4,\n",
 46 |     "    \"more than high school but not college\":0.2,\n",
 47 |     "    \"college\":0.2}\n",
 48 |     "```\n"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 1,
 54 |    "metadata": {
 55 |     "deletable": false,
 56 |     "nbgrader": {
 57 |      "checksum": "0ac58deb3f5ac988c643e903cbee7f3a",
 58 |      "grade": false,
 59 |      "grade_id": "cell-eea16d020eb52ae7",
 60 |      "locked": false,
 61 |      "schema_version": 1,
 62 |      "solution": true
 63 |     }
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "def proportion_of_education():\n",
 68 |     "    # your code goes here\n",
 69 |     "    # YOUR CODE HERE\n",
 70 |     "    import pandas as pd\n",
 71 |     "    mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
 72 |     "    n = mb['EDUC1'].value_counts()/mb['EDUC1'].shape[0]\n",
 73 |     "    li = ['less than high school', 'high school', 'more than high school but not college', 'college']\n",
 74 |     "    di = {}\n",
 75 |     "    i = 0\n",
 76 |     "    for l in li:\n",
 77 |     "        di[l] = n[i+1] \n",
 78 |     "        i += 1\n",
 79 |     "    \n",
 80 |     "    return di\n",
 81 |     "    #raise NotImplementedError()"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 2,
 87 |    "metadata": {
 88 |     "deletable": false,
 89 |     "editable": false,
 90 |     "hideCode": false,
 91 |     "hidePrompt": false,
 92 |     "nbgrader": {
 93 |      "checksum": "ac5d91a24a7f72f66c25d242c3d24a50",
 94 |      "grade": true,
 95 |      "grade_id": "cell-c0eeef201366f51c",
 96 |      "locked": true,
 97 |      "points": 1,
 98 |      "schema_version": 1,
 99 |      "solution": false
100 |     }
101 |    },
102 |    "outputs": [],
103 |    "source": [
104 |     "assert type(proportion_of_education())==type({}), \"You must return a dictionary.\"\n",
105 |     "assert len(proportion_of_education()) == 4, \"You have not returned a dictionary with four items in it.\"\n",
106 |     "assert \"less than high school\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n",
107 |     "assert \"high school\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n",
108 |     "assert \"more than high school but not college\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n",
109 |     "assert \"college\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {
115 |     "deletable": false,
116 |     "editable": false,
117 |     "nbgrader": {
118 |      "checksum": "562b78b7b9b79580269be0a3bebf4b42",
119 |      "grade": false,
120 |      "grade_id": "cell-8fcbb64516283f52",
121 |      "locked": true,
122 |      "schema_version": 1,
123 |      "solution": false
124 |     }
125 |    },
126 |    "source": [
127 |     "## Question 2\n",
128 |     "\n",
129 |     "Let's explore the relationship between being fed breastmilk as a child and getting a seasonal influenza vaccine from a healthcare provider. Return a tuple of the average number of influenza vaccines for those children we know received breastmilk as a child and those who know did not.\n",
130 |     "\n",
131 |     "*This function should return a tuple in the form (use the correct numbers:*\n",
132 |     "```\n",
133 |     "(2.5, 0.1)\n",
134 |     "```"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 3,
140 |    "metadata": {
141 |     "deletable": false,
142 |     "nbgrader": {
143 |      "checksum": "a405d639063c4a6408365479f29c95c9",
144 |      "grade": false,
145 |      "grade_id": "cell-77f18c512324eabb",
146 |      "locked": false,
147 |      "schema_version": 1,
148 |      "solution": true
149 |     }
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "def average_influenza_doses():\n",
154 |     "    # YOUR CODE HERE\n",
155 |     "    \n",
156 |     "    import pandas as pd\n",
157 |     "    import numpy as np\n",
158 |     "    mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
159 |     "    gp1 = mb[mb['CBF_01'] == 1]\n",
160 |     "    gp2 = mb[mb['CBF_01'] == 2]\n",
161 |     "    \n",
162 |     "    return np.mean(gp1['P_NUMFLU']), np.mean(gp2['P_NUMFLU'])\n",
163 |     "    #raise NotImplementedError()"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 4,
169 |    "metadata": {
170 |     "deletable": false,
171 |     "editable": false,
172 |     "nbgrader": {
173 |      "checksum": "19be955e97fdf7162d43fbb7c2c40951",
174 |      "grade": true,
175 |      "grade_id": "cell-54a3ba6cff31caa7",
176 |      "locked": true,
177 |      "points": 1,
178 |      "schema_version": 1,
179 |      "solution": false
180 |     }
181 |    },
182 |    "outputs": [],
183 |    "source": [
184 |     "assert len(average_influenza_doses())==2, \"Return two values in a tuple, the first for yes and the second for no.\"\n"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {
190 |     "deletable": false,
191 |     "editable": false,
192 |     "nbgrader": {
193 |      "checksum": "e10e2163f5957a0c398ef4f0b76b4efe",
194 |      "grade": false,
195 |      "grade_id": "cell-f63377f3c97aa7c4",
196 |      "locked": true,
197 |      "schema_version": 1,
198 |      "solution": false
199 |     }
200 |    },
201 |    "source": [
202 |     "## Question 3\n",
203 |     "It would be interesting to see if there is any evidence of a link between vaccine effectiveness and sex of the child. Calculate the ratio of the number of children who contracted chickenpox but were vaccinated against it (at least one varicella dose) versus those who were vaccinated but did not contract chicken pox. Return results by sex. \n",
204 |     "\n",
205 |     "*This function should return a dictionary in the form of (use the correct numbers):* \n",
206 |     "```\n",
207 |     "    {\"male\":0.2,\n",
208 |     "    \"female\":0.4}\n",
209 |     "```\n",
210 |     "\n",
211 |     "Note: To aid in verification, the `chickenpox_by_sex()['female']` value the autograder is looking for starts with the digits `0.0077`."
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 5,
217 |    "metadata": {
218 |     "deletable": false,
219 |     "nbgrader": {
220 |      "checksum": "b4d1b58acae002bc73eb0b19f95bc4af",
221 |      "grade": false,
222 |      "grade_id": "cell-a0a9e6fe67698006",
223 |      "locked": false,
224 |      "schema_version": 1,
225 |      "solution": true
226 |     }
227 |    },
228 |    "outputs": [],
229 |    "source": [
230 |     "def chickenpox_by_sex():\n",
231 |     "    # YOUR CODE HERE\n",
232 |     "    import pandas as pd\n",
233 |     "    mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
234 |     "    \n",
235 |     "    v1m = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 1) & (mb['SEX'] == 1)]\n",
236 |     "    v1f = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 1) & (mb['SEX'] == 2)]\n",
237 |     "    v2m = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 2) & (mb['SEX'] == 1)]\n",
238 |     "    v2f = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 2) & (mb['SEX'] == 2)]\n",
239 |     "    \n",
240 |     "    rm = v1m.shape[0]/v2m.shape[0]\n",
241 |     "    rf = v1f.shape[0]/v2f.shape[0]\n",
242 |     "    r = [rm, rf]\n",
243 |     "    \n",
244 |     "    sex = ['male', 'female']\n",
245 |     "    d = {}\n",
246 |     "    i = 0\n",
247 |     "    for s in sex:\n",
248 |     "        d[s] = r[i]\n",
249 |     "        i+=1\n",
250 |     "    return d  \n",
251 |     "    #raise NotImplementedError()"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": 6,
257 |    "metadata": {
258 |     "deletable": false,
259 |     "editable": false,
260 |     "nbgrader": {
261 |      "checksum": "1b6a113a633c55699ae478a3a9ee9c33",
262 |      "grade": true,
263 |      "grade_id": "cell-c4f1714db100c865",
264 |      "locked": true,
265 |      "points": 1,
266 |      "schema_version": 1,
267 |      "solution": false
268 |     }
269 |    },
270 |    "outputs": [],
271 |    "source": [
272 |     "assert len(chickenpox_by_sex())==2, \"Return a dictionary with two items, the first for males and the second for females.\"\n"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "## Question 4\n",
280 |     "A correlation is a statistical relationship between two variables. If we wanted to know if vaccines work, we might look at the correlation between the use of the vaccine and whether it results in prevention of the infection or disease [1]. In this question, you are to see if there is a correlation between having had the chicken pox and the number of chickenpox vaccine doses given (varicella).\n",
281 |     "\n",
282 |     "Some notes on interpreting the answer. The `had_chickenpox_column` is either `1` (for yes) or `2` (for no), and the `num_chickenpox_vaccine_column` is the number of doses a child has been given of the varicella vaccine. A positive correlation (e.g., `corr > 0`) means that an increase in `had_chickenpox_column` (which means more no’s) would also increase the values of `num_chickenpox_vaccine_column` (which means more doses of vaccine). If there is a negative correlation (e.g., `corr < 0`), it indicates that having had chickenpox is related to an increase in the number of vaccine doses.\n",
283 |     "\n",
284 |     "Also, `pval` is the probability that we observe a correlation between `had_chickenpox_column` and `num_chickenpox_vaccine_column` which is greater than or equal to a particular value occurred by chance. A small `pval` means that the observed correlation is highly unlikely to occur by chance. In this case, `pval` should be very small (will end in `e-18` indicating a very small number).\n",
285 |     "\n",
286 |     "[1] This isn’t really the full picture, since we are not looking at when the dose was given. It’s possible that children had chickenpox and then their parents went to get them the vaccine. Does this dataset have the data we would need to investigate the timing of the dose?"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": 7,
292 |    "metadata": {
293 |     "deletable": false,
294 |     "nbgrader": {
295 |      "checksum": "3e645859949447913cd11d30eb33cb1e",
296 |      "grade": false,
297 |      "grade_id": "cell-8afff07f564cf79a",
298 |      "locked": false,
299 |      "schema_version": 1,
300 |      "solution": true
301 |     }
302 |    },
303 |    "outputs": [],
304 |    "source": [
305 |     "def corr_chickenpox():\n",
306 |     "    import scipy.stats as stats\n",
307 |     "    import numpy as np\n",
308 |     "    import pandas as pd\n",
309 |     "    \n",
310 |     "    mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
311 |     "    \n",
312 |     "    v1 = mb[(mb['P_NUMVRC'] >=0) & (mb['HAD_CPOX'] <= 2)]\n",
313 |     "    \n",
314 |     "    no_yes = v1['HAD_CPOX']\n",
315 |     "    \n",
316 |     "    est_vaccine = v1['P_NUMVRC']\n",
317 |     "    \n",
318 |     "    # this is just an example dataframe\n",
319 |     "    df=pd.DataFrame({\"had_chickenpox_column\":no_yes,\n",
320 |     "                   \"num_chickenpox_vaccine_column\":est_vaccine})\n",
321 |     "\n",
322 |     "    # here is some stub code to actually run the correlation\n",
323 |     "    corr, pval=stats.pearsonr(df[\"had_chickenpox_column\"],df[\"num_chickenpox_vaccine_column\"])\n",
324 |     "    \n",
325 |     "    # just return the correlation\n",
326 |     "    return corr\n",
327 |     "\n",
328 |     "    # YOUR CODE HERE\n",
329 |     "    #raise NotImplementedError()"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": 8,
335 |    "metadata": {
336 |     "deletable": false,
337 |     "editable": false,
338 |     "nbgrader": {
339 |      "checksum": "ac50ccb747b99f6bbcc76da017e66528",
340 |      "grade": true,
341 |      "grade_id": "cell-73408733533a29a5",
342 |      "locked": true,
343 |      "points": 1,
344 |      "schema_version": 1,
345 |      "solution": false
346 |     }
347 |    },
348 |    "outputs": [],
349 |    "source": [
350 |     "assert -1<=corr_chickenpox()<=1, \"You must return a float number between -1.0 and 1.0.\"\n"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": null,
356 |    "metadata": {},
357 |    "outputs": [],
358 |    "source": []
359 |   }
360 |  ],
361 |  "metadata": {
362 |   "anaconda-cloud": {},
363 |   "coursera": {
364 |    "schema_names": [
365 |     "mooc_adswpy_1_v2_assignment2"
366 |    ]
367 |   },
368 |   "hide_code_all_hidden": false,
369 |   "kernelspec": {
370 |    "display_name": "Python 3",
371 |    "language": "python",
372 |    "name": "python3"
373 |   },
374 |   "language_info": {
375 |    "codemirror_mode": {
376 |     "name": "ipython",
377 |     "version": 3
378 |    },
379 |    "file_extension": ".py",
380 |    "mimetype": "text/x-python",
381 |    "name": "python",
382 |    "nbconvert_exporter": "python",
383 |    "pygments_lexer": "ipython3",
384 |    "version": "3.7.6"
385 |   }
386 |  },
387 |  "nbformat": 4,
388 |  "nbformat_minor": 1
389 | }
390 | 


--------------------------------------------------------------------------------
/Assignment_4/assets/nhl.csv:
--------------------------------------------------------------------------------
  1 | team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,ROW,year,League
  2 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2018,NHL
  3 | Tampa Bay Lightning*,82,54,23,5,113,.689,296,236,0.66,-0.07,.634,48,2018,NHL
  4 | Boston Bruins*,82,50,20,12,112,.683,270,214,0.62,-0.07,.610,47,2018,NHL
  5 | Toronto Maple Leafs*,82,49,26,7,105,.640,277,232,0.49,-0.06,.567,42,2018,NHL
  6 | Florida Panthers,82,44,30,8,96,.585,248,246,-0.01,-0.04,.537,41,2018,NHL
  7 | Detroit Red Wings,82,30,39,13,73,.445,217,255,-0.48,-0.01,.341,25,2018,NHL
  8 | Montreal Canadiens,82,29,40,13,71,.433,209,264,-0.68,0.00,.378,27,2018,NHL
  9 | Ottawa Senators,82,28,43,11,67,.409,221,291,-0.85,0.00,.372,26,2018,NHL
 10 | Buffalo Sabres,82,25,45,12,62,.378,199,280,-0.98,0.01,.311,24,2018,NHL
 11 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2018,NHL
 12 | Washington Capitals*,82,49,26,7,105,.640,259,239,0.21,-0.04,.585,46,2018,NHL
 13 | Pittsburgh Penguins*,82,47,29,6,100,.610,272,250,0.23,-0.04,.573,45,2018,NHL
 14 | Philadelphia Flyers*,82,42,26,14,98,.598,251,243,0.07,-0.03,.543,40,2018,NHL
 15 | Columbus Blue Jackets*,82,45,30,7,97,.591,242,230,0.11,-0.04,.537,39,2018,NHL
 16 | New Jersey Devils*,82,44,29,9,97,.591,248,244,0.02,-0.03,.530,39,2018,NHL
 17 | Carolina Hurricanes,82,36,35,11,83,.506,228,256,-0.35,-0.01,.439,33,2018,NHL
 18 | New York Islanders,82,35,37,10,80,.488,264,296,-0.40,-0.01,.427,32,2018,NHL
 19 | New York Rangers,82,34,39,9,77,.470,231,268,-0.46,-0.01,.427,31,2018,NHL
 20 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2018,NHL
 21 | Nashville Predators*,82,53,18,11,117,.713,267,211,0.71,0.03,.652,47,2018,NHL
 22 | Winnipeg Jets*,82,52,20,10,114,.695,277,218,0.74,0.02,.622,48,2018,NHL
 23 | Minnesota Wild*,82,45,26,11,101,.616,253,232,0.29,0.04,.549,42,2018,NHL
 24 | Colorado Avalanche*,82,43,30,9,95,.579,257,237,0.28,0.04,.518,41,2018,NHL
 25 | St. Louis Blues,82,44,32,6,94,.573,226,222,0.10,0.05,.518,41,2018,NHL
 26 | Dallas Stars,82,42,32,8,92,.561,235,225,0.17,0.04,.506,38,2018,NHL
 27 | Chicago Blackhawks,82,33,39,10,76,.463,229,256,-0.26,0.07,.409,32,2018,NHL
 28 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2018,NHL
 29 | Vegas Golden Knights*,82,51,24,7,109,.665,272,228,0.52,-0.01,.616,47,2018,NHL
 30 | Anaheim Ducks*,82,44,25,13,101,.616,235,216,0.24,0.01,.555,40,2018,NHL
 31 | San Jose Sharks*,82,45,27,10,100,.610,252,229,0.28,0.00,.537,40,2018,NHL
 32 | Los Angeles Kings*,82,45,29,8,98,.598,239,203,0.44,0.00,.543,43,2018,NHL
 33 | Calgary Flames,82,37,35,10,84,.512,218,248,-0.33,0.03,.470,35,2018,NHL
 34 | Edmonton Oilers,82,36,40,6,78,.476,234,263,-0.32,0.03,.415,31,2018,NHL
 35 | Vancouver Canucks,82,31,40,11,73,.445,218,264,-0.51,0.05,.409,31,2018,NHL
 36 | Arizona Coyotes,82,29,41,12,70,.427,208,256,-0.53,0.05,.372,27,2018,NHL
 37 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2017,NHL
 38 | Montreal Canadiens*,82,47,26,9,103,.628,226,200,0.31,-0.01,.567,44,2017,NHL
 39 | Ottawa Senators*,82,44,28,10,98,.598,212,214,-0.01,0.01,.524,38,2017,NHL
 40 | Boston Bruins*,82,44,31,7,95,.579,234,212,0.27,0.00,.543,42,2017,NHL
 41 | Toronto Maple Leafs*,82,40,27,15,95,.579,251,242,0.11,0.00,.530,39,2017,NHL
 42 | Tampa Bay Lightning,82,42,30,10,94,.573,234,227,0.09,0.01,.506,38,2017,NHL
 43 | Florida Panthers,82,35,36,11,81,.494,210,237,-0.30,0.03,.433,30,2017,NHL
 44 | Detroit Red Wings,82,33,36,13,79,.482,207,244,-0.41,0.04,.348,24,2017,NHL
 45 | Buffalo Sabres,82,33,37,12,78,.476,201,237,-0.41,0.03,.427,31,2017,NHL
 46 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2017,NHL
 47 | Washington Capitals*,82,55,19,8,118,.720,263,182,0.99,0.00,.689,53,2017,NHL
 48 | Pittsburgh Penguins*,82,50,21,11,111,.677,282,234,0.59,0.01,.616,46,2017,NHL
 49 | Columbus Blue Jackets*,82,50,24,8,108,.659,249,195,0.68,0.02,.610,48,2017,NHL
 50 | New York Rangers*,82,48,28,6,102,.622,256,220,0.47,0.03,.591,45,2017,NHL
 51 | New York Islanders,82,41,29,12,94,.573,241,242,0.03,0.05,.512,39,2017,NHL
 52 | Philadelphia Flyers,82,39,33,10,88,.537,219,236,-0.17,0.04,.463,32,2017,NHL
 53 | Carolina Hurricanes,82,36,31,15,87,.530,215,236,-0.21,0.05,.457,33,2017,NHL
 54 | New Jersey Devils,82,28,40,14,70,.427,183,244,-0.67,0.08,.341,25,2017,NHL
 55 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2017,NHL
 56 | Chicago Blackhawks*,82,50,23,9,109,.665,244,213,0.32,-0.06,.591,46,2017,NHL
 57 | Minnesota Wild*,82,49,25,8,106,.646,266,208,0.63,-0.08,.591,46,2017,NHL
 58 | St. Louis Blues*,82,46,29,7,99,.604,235,218,0.17,-0.04,.561,44,2017,NHL
 59 | Nashville Predators*,82,41,29,12,94,.573,240,224,0.16,-0.04,.512,39,2017,NHL
 60 | Winnipeg Jets,82,40,35,7,87,.530,249,256,-0.11,-0.03,.476,37,2017,NHL
 61 | Dallas Stars,82,34,37,11,79,.482,223,262,-0.48,-0.01,.421,33,2017,NHL
 62 | Colorado Avalanche,82,22,56,4,48,.293,166,278,-1.32,0.05,.274,21,2017,NHL
 63 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2017,NHL
 64 | Anaheim Ducks*,82,46,23,13,105,.640,223,200,0.24,-0.04,.561,43,2017,NHL
 65 | Edmonton Oilers*,82,47,26,9,103,.628,247,212,0.37,-0.06,.579,43,2017,NHL
 66 | San Jose Sharks*,82,46,29,7,99,.604,221,201,0.21,-0.03,.555,44,2017,NHL
 67 | Calgary Flames*,82,45,33,4,94,.573,226,221,0.02,-0.04,.537,41,2017,NHL
 68 | Los Angeles Kings,82,39,35,8,86,.524,201,205,-0.07,-0.02,.488,37,2017,NHL
 69 | Arizona Coyotes,82,30,42,10,70,.427,197,260,-0.76,0.01,.341,24,2017,NHL
 70 | Vancouver Canucks,82,30,43,9,69,.421,182,243,-0.73,0.01,.354,26,2017,NHL
 71 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2017,NHL
 72 | Florida Panthers*,82,47,26,9,103,.628,239,203,0.42,-0.02,.549,40,2017,NHL
 73 | Tampa Bay Lightning*,82,46,31,5,97,.591,227,201,0.30,-0.02,.561,43,2017,NHL
 74 | Detroit Red Wings*,82,41,30,11,93,.567,211,224,-0.16,0.00,.518,39,2017,NHL
 75 | Boston Bruins,82,42,31,9,93,.567,240,230,0.11,-0.02,.500,38,2017,NHL
 76 | Ottawa Senators,82,38,35,9,85,.518,236,247,-0.13,0.01,.463,32,2017,NHL
 77 | Montreal Canadiens,82,38,38,6,82,.500,221,236,-0.18,0.00,.451,33,2017,NHL
 78 | Buffalo Sabres,82,35,36,11,81,.494,201,222,-0.25,0.01,.457,33,2017,NHL
 79 | Toronto Maple Leafs,82,29,42,11,69,.421,198,246,-0.56,0.03,.354,23,2017,NHL
 80 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2017,NHL
 81 | Washington Capitals*,82,56,18,8,120,.732,252,193,0.70,-0.02,.671,52,2017,NHL
 82 | Pittsburgh Penguins*,82,48,26,8,104,.634,245,203,0.50,-0.01,.585,44,2017,NHL
 83 | New York Rangers*,82,46,27,9,101,.616,236,217,0.24,0.01,.555,43,2017,NHL
 84 | New York Islanders*,82,45,27,10,100,.610,232,216,0.19,0.00,.549,40,2017,NHL
 85 | Philadelphia Flyers*,82,41,27,14,96,.585,214,218,-0.03,0.02,.530,38,2017,NHL
 86 | Carolina Hurricanes,82,35,31,16,86,.524,198,226,-0.31,0.03,.445,33,2017,NHL
 87 | New Jersey Devils,82,38,36,8,84,.512,184,208,-0.28,0.02,.488,36,2017,NHL
 88 | Columbus Blue Jackets,82,34,40,8,76,.463,219,252,-0.38,0.03,.402,28,2017,NHL
 89 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2017,NHL
 90 | Dallas Stars*,82,50,23,9,109,.665,267,230,0.45,-0.01,.610,48,2017,NHL
 91 | St. Louis Blues*,82,49,24,9,107,.652,224,201,0.28,0.00,.591,44,2017,NHL
 92 | Chicago Blackhawks*,82,47,26,9,103,.628,235,209,0.32,0.00,.579,46,2017,NHL
 93 | Nashville Predators*,82,41,27,14,96,.585,228,215,0.17,0.01,.488,37,2017,NHL
 94 | Minnesota Wild*,82,38,33,11,87,.530,216,206,0.13,0.01,.457,35,2017,NHL
 95 | Colorado Avalanche,82,39,39,4,82,.500,216,240,-0.26,0.03,.451,35,2017,NHL
 96 | Winnipeg Jets,82,35,39,8,78,.476,215,239,-0.26,0.03,.427,32,2017,NHL
 97 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2017,NHL
 98 | Anaheim Ducks*,82,46,25,11,103,.628,218,192,0.27,-0.05,.567,43,2017,NHL
 99 | Los Angeles Kings*,82,48,28,6,102,.622,225,195,0.32,-0.04,.591,46,2017,NHL
100 | San Jose Sharks*,82,46,30,6,98,.598,241,210,0.33,-0.05,.555,42,2017,NHL
101 | Arizona Coyotes,82,35,39,8,78,.476,209,245,-0.43,0.01,.427,34,2017,NHL
102 | Calgary Flames,82,35,40,7,77,.470,231,260,-0.36,-0.01,.433,33,2017,NHL
103 | Vancouver Canucks,82,31,38,13,75,.457,191,243,-0.62,0.02,.372,26,2017,NHL
104 | Edmonton Oilers,82,31,43,8,70,.427,203,245,-0.51,0.00,.372,27,2017,NHL
105 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2015,NHL
106 | Montreal Canadiens*,82,50,22,10,110,.671,221,189,0.36,-0.03,.598,43,2015,NHL
107 | Tampa Bay Lightning*,82,50,24,8,108,.659,262,211,0.57,-0.06,.622,47,2015,NHL
108 | Detroit Red Wings*,82,43,25,14,100,.610,235,221,0.14,-0.03,.561,39,2015,NHL
109 | Ottawa Senators*,82,43,26,13,99,.604,238,215,0.25,-0.04,.530,37,2015,NHL
110 | Boston Bruins,82,41,27,14,96,.585,213,211,0.01,-0.01,.537,37,2015,NHL
111 | Florida Panthers,82,38,29,15,91,.555,206,223,-0.23,-0.02,.476,30,2015,NHL
112 | Toronto Maple Leafs,82,30,44,8,68,.415,211,262,-0.61,0.02,.366,25,2015,NHL
113 | Buffalo Sabres,82,23,51,8,54,.329,161,274,-1.33,0.05,.262,15,2015,NHL
114 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2015,NHL
115 | New York Rangers*,82,53,22,7,113,.689,252,192,0.69,-0.04,.652,49,2015,NHL
116 | Washington Capitals*,82,45,26,11,101,.616,242,203,0.44,-0.03,.543,40,2015,NHL
117 | New York Islanders*,82,47,28,7,101,.616,252,230,0.26,-0.01,.567,40,2015,NHL
118 | Pittsburgh Penguins*,82,43,27,12,98,.598,221,210,0.12,-0.01,.537,39,2015,NHL
119 | Columbus Blue Jackets,82,42,35,5,89,.543,236,250,-0.16,0.01,.470,33,2015,NHL
120 | Philadelphia Flyers,82,33,31,18,84,.512,215,234,-0.23,0.00,.451,30,2015,NHL
121 | New Jersey Devils,82,32,36,14,78,.476,181,216,-0.40,0.02,.402,27,2015,NHL
122 | Carolina Hurricanes,82,30,41,11,71,.433,188,226,-0.44,0.02,.378,25,2015,NHL
123 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2015,NHL
124 | St. Louis Blues*,82,51,24,7,109,.665,248,201,0.59,0.02,.591,42,2015,NHL
125 | Nashville Predators*,82,47,25,10,104,.634,232,208,0.33,0.03,.573,41,2015,NHL
126 | Chicago Blackhawks*,82,48,28,6,102,.622,229,189,0.51,0.02,.549,39,2015,NHL
127 | Minnesota Wild*,82,46,28,8,100,.610,231,201,0.39,0.03,.555,42,2015,NHL
128 | Winnipeg Jets*,82,43,26,13,99,.604,230,210,0.29,0.04,.518,36,2015,NHL
129 | Dallas Stars,82,41,31,10,92,.561,261,260,0.06,0.05,.494,37,2015,NHL
130 | Colorado Avalanche,82,39,31,12,90,.549,219,227,-0.04,0.06,.439,29,2015,NHL
131 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2015,NHL
132 | Anaheim Ducks*,82,51,24,7,109,.665,236,226,0.10,-0.03,.604,43,2015,NHL
133 | Vancouver Canucks*,82,48,29,5,101,.616,242,222,0.20,-0.05,.561,42,2015,NHL
134 | Calgary Flames*,82,45,30,7,97,.591,241,216,0.26,-0.05,.543,41,2015,NHL
135 | Los Angeles Kings,82,40,27,15,95,.579,220,205,0.16,-0.03,.524,38,2015,NHL
136 | San Jose Sharks,82,40,33,9,89,.543,228,232,-0.08,-0.03,.500,36,2015,NHL
137 | Edmonton Oilers,82,24,44,14,62,.378,198,283,-1.01,0.03,.305,19,2015,NHL
138 | Arizona Coyotes,82,24,50,8,56,.341,170,272,-1.20,0.04,.293,19,2015,NHL
139 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2014,NHL
140 | Boston Bruins*,82,54,19,9,117,.713,261,177,0.92,-0.11,.677,51,2014,NHL
141 | Tampa Bay Lightning*,82,46,27,9,101,.616,240,215,0.25,-0.06,.549,38,2014,NHL
142 | Montreal Canadiens*,82,46,28,8,100,.610,215,204,0.08,-0.05,.543,40,2014,NHL
143 | Detroit Red Wings*,82,39,28,15,93,.567,222,230,-0.14,-0.04,.500,34,2014,NHL
144 | Ottawa Senators,82,37,31,14,88,.537,236,265,-0.37,-0.02,.451,30,2014,NHL
145 | Toronto Maple Leafs,82,38,36,8,84,.512,231,256,-0.34,-0.04,.433,29,2014,NHL
146 | Florida Panthers,82,29,45,8,66,.402,196,268,-0.87,0.01,.341,21,2014,NHL
147 | Buffalo Sabres,82,21,51,10,52,.317,157,248,-1.09,0.02,.244,14,2014,NHL
148 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2014,NHL
149 | Pittsburgh Penguins*,82,51,24,7,109,.665,249,207,0.47,-0.04,.598,44,2014,NHL
150 | New York Rangers*,82,45,31,6,96,.585,218,193,0.26,-0.04,.543,41,2014,NHL
151 | Philadelphia Flyers*,82,42,30,10,94,.573,236,235,-0.01,-0.02,.543,39,2014,NHL
152 | Columbus Blue Jackets*,82,43,32,7,93,.567,231,216,0.16,-0.03,.506,38,2014,NHL
153 | Washington Capitals,82,38,30,14,90,.549,235,240,-0.08,-0.02,.470,28,2014,NHL
154 | New Jersey Devils,82,35,29,18,88,.537,197,208,-0.15,-0.02,.506,35,2014,NHL
155 | Carolina Hurricanes,82,36,35,11,83,.506,207,230,-0.29,-0.01,.451,34,2014,NHL
156 | New York Islanders,82,34,37,11,79,.482,225,267,-0.51,0.00,.396,25,2014,NHL
157 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2014,NHL
158 | Colorado Avalanche*,82,52,22,8,112,.683,250,220,0.40,0.04,.628,47,2014,NHL
159 | St. Louis Blues*,82,52,23,7,111,.677,248,191,0.71,0.01,.598,43,2014,NHL
160 | Chicago Blackhawks*,82,46,21,15,107,.652,267,220,0.60,0.02,.573,40,2014,NHL
161 | Minnesota Wild*,82,43,27,12,98,.598,207,206,0.07,0.05,.524,35,2014,NHL
162 | Dallas Stars*,82,40,31,11,91,.555,235,228,0.14,0.05,.494,36,2014,NHL
163 | Nashville Predators,82,38,32,12,88,.537,216,242,-0.25,0.07,.506,36,2014,NHL
164 | Winnipeg Jets,82,37,35,10,84,.512,227,237,-0.07,0.06,.439,29,2014,NHL
165 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2014,NHL
166 | Anaheim Ducks*,82,54,20,8,116,.707,266,209,0.68,-0.01,.677,51,2014,NHL
167 | San Jose Sharks*,82,51,22,9,111,.677,249,200,0.60,0.00,.604,41,2014,NHL
168 | Los Angeles Kings*,82,46,28,8,100,.610,206,174,0.40,0.01,.549,38,2014,NHL
169 | Phoenix Coyotes,82,37,30,15,89,.543,216,231,-0.16,0.03,.457,31,2014,NHL
170 | Vancouver Canucks,82,36,35,11,83,.506,196,223,-0.30,0.03,.451,31,2014,NHL
171 | Calgary Flames,82,35,40,7,77,.470,209,241,-0.35,0.04,.402,28,2014,NHL
172 | Edmonton Oilers,82,29,44,9,67,.409,203,270,-0.75,0.07,.348,25,2014,NHL
173 | 


--------------------------------------------------------------------------------
/Assignment_4/assets/nfl.csv:
--------------------------------------------------------------------------------
  1 | DSRS,L,League,MoV,OSRS,PA,PD,PF,SRS,SoS,T,W,W-L%,team,year
  2 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,2018
  3 | 2.1,5,NFL,6.9,3.1,325,111,436,5.2,-1.8,0,11,.688,New England Patriots*,2018
  4 | -5.2,9,NFL,-7.1,-3.6,433,-114,319,-8.8,-1.7,0,7,.438,Miami Dolphins,2018
  5 | -0.6,10,NFL,-6.6,-6.3,374,-105,269,-6.9,-0.3,0,6,.375,Buffalo Bills,2018
  6 | -5.9,12,NFL,-6.8,-2.0,441,-108,333,-7.8,-1.1,0,4,.250,New York Jets,2018
  7 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,2018
  8 | 6.4,6,NFL,6.4,0.6,287,102,389,7.0,0.6,0,10,.625,Baltimore Ravens*,2018
  9 | 1.7,6,NFL,4.3,3.9,360,68,428,5.6,1.3,1,9,.594,Pittsburgh Steelers,2018
 10 | 0.6,8,NFL,-2.1,-1.0,392,-33,359,-0.3,1.7,1,7,.469,Cleveland Browns,2018
 11 | -3.4,10,NFL,-5.4,0.0,455,-87,368,-3.4,2.0,0,6,.375,Cincinnati Bengals,2018
 12 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,2018
 13 | 1.4,5,NFL,5.4,2.4,316,86,402,3.8,-1.5,0,11,.688,Houston Texans*,2018
 14 | -0.6,6,NFL,5.6,3.9,344,89,433,3.4,-2.2,0,10,.625,Indianapolis Colts+,2018
 15 | 3.5,7,NFL,0.4,-3.2,303,7,310,0.2,-0.2,0,9,.563,Tennessee Titans,2018
 16 | 4.0,11,NFL,-4.4,-8.1,316,-71,245,-4.0,0.4,0,5,.313,Jacksonville Jaguars,2018
 17 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,2018
 18 | -3.8,4,NFL,9.0,12.6,421,144,565,8.9,-0.1,0,12,.750,Kansas City Chiefs*,2018
 19 | 2.9,4,NFL,6.2,3.0,329,99,428,6.0,-0.2,0,12,.750,Los Angeles Chargers+,2018
 20 | 3.1,10,NFL,-1.3,-3.6,349,-20,329,-0.5,0.7,0,6,.375,Denver Broncos,2018
 21 | -4.1,12,NFL,-11.1,-5.2,467,-177,290,-9.3,1.8,0,4,.250,Oakland Raiders,2018
 22 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,2018
 23 | 2.9,6,NFL,0.9,-1.9,324,15,339,1.1,0.2,0,10,.625,Dallas Cowboys*,2018
 24 | 1.8,7,NFL,1.2,0.0,348,19,367,1.7,0.5,0,9,.563,Philadelphia Eagles+,2018
 25 | 0.6,9,NFL,-4.9,-5.6,359,-78,281,-4.9,-0.1,0,7,.438,Washington Redskins,2018
 26 | -2.9,11,NFL,-2.7,0.8,412,-43,369,-2.2,0.5,0,5,.313,New York Giants,2018
 27 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,2018
 28 | 4.8,4,NFL,8.6,1.5,283,138,421,6.3,-2.3,0,12,.750,Chicago Bears*,2018
 29 | 1.8,7,NFL,1.2,-1.2,341,19,360,0.6,-0.6,1,8,.531,Minnesota Vikings,2018
 30 | -2.7,9,NFL,-1.5,0.0,400,-24,376,-2.7,-1.2,1,6,.406,Green Bay Packers,2018
 31 | 0.3,10,NFL,-2.3,-3.3,360,-36,324,-3.0,-0.8,0,6,.375,Detroit Lions,2018
 32 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,2018
 33 | 2.2,3,NFL,9.4,7.9,353,151,504,10.1,0.6,0,13,.813,New Orleans Saints*,2018
 34 | 0.8,9,NFL,-0.4,0.1,382,-6,376,0.9,1.3,0,7,.438,Carolina Panthers,2018
 35 | -2.6,9,NFL,-0.6,2.5,423,-9,414,-0.1,0.4,0,7,.438,Atlanta Falcons,2018
 36 | -4.6,11,NFL,-4.3,2.0,464,-68,396,-2.6,1.7,0,5,.313,Tampa Bay Buccaneers,2018
 37 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,2018
 38 | -1.1,3,NFL,8.9,9.5,384,143,527,8.5,-0.4,0,13,.813,Los Angeles Rams*,2018
 39 | 1.5,6,NFL,5.1,3.0,347,81,428,4.5,-0.6,0,10,.625,Seattle Seahawks+,2018
 40 | -3.1,12,NFL,-5.8,-2.5,435,-93,342,-5.5,0.3,0,4,.250,San Francisco 49ers,2018
 41 | -1.9,13,NFL,-12.5,-9.6,425,-200,225,-11.5,1.0,0,3,.188,Arizona Cardinals,2018
 42 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,,AFC East,AFC East,AFC East,2017
 43 | 2.6,3,NFL,10.1,6.3,296,162,458,8.9,-1.2,,13,.813,New England Patriots*,2017
 44 | -1.0,7,NFL,-3.6,-3.0,359,-57,302,-4.0,-0.5,,9,.563,Buffalo Bills+,2017
 45 | -2.4,10,NFL,-7.0,-3.9,393,-112,281,-6.3,0.7,,6,.375,Miami Dolphins,2017
 46 | -2.1,11,NFL,-5.3,-2.9,382,-84,298,-4.9,0.3,,5,.313,New York Jets,2017
 47 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,,AFC North,AFC North,AFC North,2017
 48 | 1.8,3,NFL,6.1,3.2,308,98,406,5.0,-1.1,,13,.813,Pittsburgh Steelers*,2017
 49 | 1.2,7,NFL,5.8,2.2,303,92,395,3.4,-2.4,,9,.563,Baltimore Ravens,2017
 50 | -0.9,9,NFL,-3.7,-4.1,349,-59,290,-5.0,-1.3,,7,.438,Cincinnati Bengals,2017
 51 | -4.1,16,NFL,-11.0,-6.8,410,-176,234,-11.0,0.0,,0,.000,Cleveland Browns,2017
 52 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,,AFC South,AFC South,AFC South,2017
 53 | 3.6,6,NFL,9.3,3.0,268,149,417,6.5,-2.8,,10,.625,Jacksonville Jaguars*,2017
 54 | -1.5,7,NFL,-1.4,-2.0,356,-22,334,-3.5,-2.1,,9,.563,Tennessee Titans+,2017
 55 | -5.6,12,NFL,-6.1,-0.8,436,-98,338,-6.4,-0.3,,4,.250,Houston Texans,2017
 56 | -4.0,12,NFL,-8.8,-6.1,404,-141,263,-10.1,-1.3,,4,.250,Indianapolis Colts,2017
 57 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,,AFC West,AFC West,AFC West,2017
 58 | -0.3,6,NFL,4.8,3.8,339,76,415,3.4,-1.3,,10,.625,Kansas City Chiefs*,2017
 59 | 4.0,7,NFL,5.2,-0.3,272,83,355,3.6,-1.5,,9,.563,Los Angeles Chargers,2017
 60 | -1.8,10,NFL,-4.5,-3.0,373,-72,301,-4.7,-0.2,,6,.375,Oakland Raiders,2017
 61 | -2.9,11,NFL,-5.8,-3.9,382,-93,289,-6.7,-0.9,,5,.313,Denver Broncos,2017
 62 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,,NFC East,NFC East,NFC East,2017
 63 | 2.5,3,NFL,10.1,7.0,295,162,457,9.4,-0.7,,13,.813,Philadelphia Eagles*,2017
 64 | 1.2,7,NFL,1.4,0.4,332,22,354,1.6,0.2,,9,.563,Dallas Cowboys,2017
 65 | -1.8,9,NFL,-2.9,0.5,388,-46,342,-1.3,1.6,,7,.438,Washington Redskins,2017
 66 | -1.2,13,NFL,-8.9,-6.4,388,-142,246,-7.6,1.3,,3,.188,New York Giants,2017
 67 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,,NFC North,NFC North,NFC North,2017
 68 | 6.8,3,NFL,8.1,2.3,252,130,382,9.1,1.0,,13,.813,Minnesota Vikings*,2017
 69 | -2.5,7,NFL,2.1,5.2,376,34,410,2.7,0.6,,9,.563,Detroit Lions,2017
 70 | -1.6,9,NFL,-4.0,-0.3,384,-64,320,-1.9,2.1,,7,.438,Green Bay Packers,2017
 71 | 3.3,11,NFL,-3.5,-4.6,320,-56,264,-1.3,2.2,,5,.313,Chicago Bears,2017
 72 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,,NFC South,NFC South,NFC South,2017
 73 | 2.2,5,NFL,7.6,7.0,326,122,448,9.2,1.5,,11,.688,New Orleans Saints*,2017
 74 | 2.7,5,NFL,2.3,1.7,327,36,363,4.3,2.1,,11,.688,Carolina Panthers+,2017
 75 | 3.2,6,NFL,2.4,1.1,315,38,353,4.3,1.9,,10,.625,Atlanta Falcons+,2017
 76 | -1.7,11,NFL,-2.9,0.4,382,-47,335,-1.3,1.7,,5,.313,Tampa Bay Buccaneers,2017
 77 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,,NFC West,NFC West,NFC West,2017
 78 | 1.0,5,NFL,9.3,8.2,329,149,478,9.2,-0.2,,11,.688,Los Angeles Rams*,2017
 79 | 1.2,7,NFL,2.1,0.7,332,34,366,1.9,-0.2,,9,.563,Seattle Seahawks,2017
 80 | 0.2,8,NFL,-4.1,-4.0,361,-66,295,-3.7,0.4,,8,.500,Arizona Cardinals,2017
 81 | -2.1,10,NFL,-3.3,-0.8,383,-52,331,-2.9,0.4,,6,.375,San Francisco 49ers,2017
 82 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,2016
 83 | 5.0,2,NFL,11.9,4.3,250,191,441,9.3,-2.7,0,14,.875,New England Patriots*,2016
 84 | -1.8,6,NFL,-1.1,-0.6,380,-17,363,-2.4,-1.3,0,10,.625,Miami Dolphins+,2016
 85 | -2.2,9,NFL,1.3,1.8,378,21,399,-0.3,-1.6,0,7,.438,Buffalo Bills,2016
 86 | -3.0,11,NFL,-8.4,-5.5,409,-134,275,-8.5,-0.1,0,5,.313,New York Jets,2016
 87 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,2016
 88 | 2.0,5,NFL,4.5,2.8,327,72,399,4.7,0.2,0,11,.688,Pittsburgh Steelers*,2016
 89 | 2.6,8,NFL,1.4,-1.1,321,22,343,1.5,0.2,0,8,.500,Baltimore Ravens,2016
 90 | 2.5,9,NFL,0.6,-1.5,315,10,325,1.0,0.4,1,6,.406,Cincinnati Bengals,2016
 91 | -4.9,15,NFL,-11.8,-5.2,452,-188,264,-10.1,1.7,0,1,.063,Cleveland Browns,2016
 92 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,2016
 93 | 2.7,7,NFL,-3.1,-5.3,328,-49,279,-2.6,0.4,0,9,.563,Houston Texans*,2016
 94 | -1.7,7,NFL,0.2,0.7,378,3,381,-1.0,-1.2,0,9,.563,Tennessee Titans,2016
 95 | -2.7,8,NFL,1.2,3.1,392,19,411,0.4,-0.8,0,8,.500,Indianapolis Colts,2016
 96 | -2.3,13,NFL,-5.1,-2.7,400,-82,318,-5.0,0.2,0,3,.188,Jacksonville Jaguars,2016
 97 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,2016
 98 | 4.4,4,NFL,4.9,1.2,311,78,389,5.6,0.7,0,12,.750,Kansas City Chiefs*,2016
 99 | -0.3,4,NFL,1.9,3.5,385,31,416,3.3,1.3,0,12,.750,Oakland Raiders+,2016
100 | 6.1,7,NFL,2.3,-2.0,297,36,333,4.0,1.8,0,9,.563,Denver Broncos,2016
101 | -3.0,11,NFL,-0.8,3.0,423,-13,410,0.1,0.9,0,5,.313,San Diego Chargers,2016
102 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,2016
103 | 2.9,3,NFL,7.2,4.1,306,115,421,7.0,-0.2,0,13,.813,Dallas Cowboys*,2016
104 | 5.4,5,NFL,1.6,-3.2,284,26,310,2.1,0.5,0,11,.688,New York Giants+,2016
105 | -1.3,7,NFL,0.8,3.3,383,13,396,2.0,1.2,1,8,.531,Washington Redskins,2016
106 | 2.5,9,NFL,2.3,1.3,331,36,367,3.8,1.6,0,7,.438,Philadelphia Eagles,2016
107 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,2016
108 | -2.0,6,NFL,2.8,4.9,388,44,432,2.8,0.1,0,10,.625,Green Bay Packers*,2016
109 | -0.1,7,NFL,-0.8,-1.3,358,-12,346,-1.4,-0.6,0,9,.563,Detroit Lions+,2016
110 | 3.6,8,NFL,1.3,-2.6,307,20,327,0.9,-0.3,0,8,.500,Minnesota Vikings,2016
111 | -2.3,13,NFL,-7.5,-5.2,399,-120,279,-7.5,0.0,0,3,.188,Chicago Bears,2016
112 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,2016
113 | -2.0,5,NFL,8.4,10.5,406,134,540,8.5,0.1,0,11,.688,Atlanta Falcons*,2016
114 | 1.3,7,NFL,-0.9,-1.5,369,-15,354,-0.2,0.7,0,9,.563,Tampa Bay Buccaneers,2016
115 | -5.3,9,NFL,0.9,6.8,454,15,469,1.5,0.6,0,7,.438,New Orleans Saints,2016
116 | -0.8,10,NFL,-2.1,-0.2,402,-33,369,-1.0,1.1,0,6,.375,Carolina Panthers,2016
117 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,2016
118 | 4.5,5,NFL,3.9,-2.4,292,62,354,2.1,-1.7,1,10,.656,Seattle Seahawks*,2016
119 | -0.8,8,NFL,3.5,2.4,362,56,418,1.6,-1.9,1,7,.469,Arizona Cardinals,2016
120 | -1.6,12,NFL,-10.6,-9.5,394,-170,224,-11.1,-0.5,0,4,.250,Los Angeles Rams,2016
121 | -7.5,14,NFL,-10.7,-3.7,480,-171,309,-11.2,-0.5,0,2,.125,San Francisco 49ers,2016
122 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,,AFC East,AFC East,AFC East,2015
123 | 1.7,4,NFL,9.4,5.3,315,150,465,7.0,-2.4,,12,.750,New England Patriots*,2015
124 | 2.0,6,NFL,4.6,-0.5,314,73,387,1.5,-3.0,,10,.625,New York Jets,2015
125 | -0.2,8,NFL,1.3,0.3,359,20,379,0.0,-1.2,,8,.500,Buffalo Bills,2015
126 | -2.2,10,NFL,-4.9,-4.7,389,-79,310,-6.8,-1.9,,6,.375,Miami Dolphins,2015
127 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,,AFC North,AFC North,AFC North,2015
128 | 5.8,4,NFL,8.8,4.8,279,140,419,10.6,1.9,,12,.750,Cincinnati Bengals*,2015
129 | 3.6,6,NFL,6.5,5.1,319,104,423,8.7,2.2,,10,.625,Pittsburgh Steelers+,2015
130 | -1.2,11,NFL,-4.6,-0.7,401,-73,328,-1.9,2.6,,5,.313,Baltimore Ravens,2015
131 | -2.9,13,NFL,-9.6,-3.2,432,-154,278,-6.1,3.5,,3,.188,Cleveland Browns,2015
132 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,,AFC South,AFC South,AFC South,2015
133 | 2.6,7,NFL,1.6,-3.3,313,26,339,-0.8,-2.4,,9,.563,Houston Texans*,2015
134 | -3.6,8,NFL,-4.7,-3.1,408,-75,333,-6.7,-2.0,,8,.500,Indianapolis Colts,2015
135 | -6.9,11,NFL,-4.5,-0.7,448,-72,376,-7.5,-3.0,,5,.313,Jacksonville Jaguars,2015
136 | -4.6,13,NFL,-7.8,-5.9,423,-124,299,-10.5,-2.8,,3,.188,Tennessee Titans,2015
137 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,,AFC West,AFC West,AFC West,2015
138 | 5.5,4,NFL,3.7,0.3,296,59,355,5.8,2.1,,12,.750,Denver Broncos*,2015
139 | 5.3,5,NFL,7.4,3.7,287,118,405,9.0,1.6,,11,.688,Kansas City Chiefs+,2015
140 | -1.6,9,NFL,-2.5,1.4,399,-40,359,-0.2,2.3,,7,.438,Oakland Raiders,2015
141 | -1.1,12,NFL,-4.9,-1.5,398,-78,320,-2.6,2.2,,4,.250,San Diego Chargers,2015
142 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,,NFC East,NFC East,NFC East,2015
143 | -1.8,7,NFL,0.6,-0.2,379,9,388,-1.9,-2.5,,9,.563,Washington Redskins*,2015
144 | -4.0,9,NFL,-3.3,-0.6,430,-53,377,-4.6,-1.3,,7,.438,Philadelphia Eagles,2015
145 | -6.1,10,NFL,-1.4,2.5,442,-22,420,-3.6,-2.2,,6,.375,New York Giants,2015
146 | 0.1,12,NFL,-6.2,-7.0,374,-99,275,-6.9,-0.7,,4,.250,Dallas Cowboys,2015
147 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,,NFC North,NFC North,NFC North,2015
148 | 4.7,5,NFL,3.9,1.1,302,63,365,5.8,1.9,,11,.688,Minnesota Vikings*,2015
149 | 3.3,6,NFL,2.8,2.0,323,45,368,5.3,2.5,,10,.625,Green Bay Packers+,2015
150 | -1.3,9,NFL,-2.6,1.0,400,-42,358,-0.2,2.4,,7,.438,Detroit Lions,2015
151 | -1.2,10,NFL,-3.9,-0.1,397,-62,335,-1.3,2.6,,6,.375,Chicago Bears,2015
152 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,,NFC South,NFC South,NFC South,2015
153 | 2.1,1,NFL,12.0,6.0,308,192,500,8.1,-3.9,,15,.938,Carolina Panthers*,2015
154 | 0.3,8,NFL,-0.4,-4.0,345,-6,339,-3.8,-3.4,,8,.500,Atlanta Falcons,2015
155 | -7.6,9,NFL,-4.3,1.1,476,-68,408,-6.6,-2.3,,7,.438,New Orleans Saints,2015
156 | -4.2,10,NFL,-4.7,-3.5,417,-75,342,-7.7,-3.0,,6,.375,Tampa Bay Buccaneers,2015
157 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,,NFC West,NFC West,NFC West,2015
158 | 3.4,3,NFL,11.0,9.0,313,176,489,12.3,1.3,,13,.813,Arizona Cardinals*,2015
159 | 6.0,6,NFL,9.1,5.4,277,146,423,11.3,2.2,,10,.625,Seattle Seahawks+,2015
160 | 3.6,9,NFL,-3.1,-3.8,330,-50,280,-0.2,3.0,,7,.438,St. Louis Rams,2015
161 | 0.5,11,NFL,-9.3,-6.0,387,-149,238,-5.5,3.8,,5,.313,San Francisco 49ers,2015
162 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,2014
163 | 3.5,4,NFL,9.7,7.5,313,155,468,10.9,1.3,0,12,.750,New England Patriots*,2014
164 | 5.3,7,NFL,3.4,-0.4,289,54,343,4.9,1.6,0,9,.563,Buffalo Bills,2014
165 | -0.4,8,NFL,0.9,2.9,373,15,388,2.6,1.6,0,8,.500,Miami Dolphins,2014
166 | -1.0,12,NFL,-7.4,-4.0,401,-118,283,-5.0,2.3,0,4,.250,New York Jets,2014
167 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,2014
168 | -2.1,5,NFL,4.3,4.4,368,68,436,2.2,-2.0,0,11,.688,Pittsburgh Steelers*,2014
169 | 1.3,5,NFL,1.3,-0.5,344,21,365,0.7,-0.6,1,10,.656,Cincinnati Bengals+,2014
170 | 2.8,6,NFL,6.7,1.8,302,107,409,4.6,-2.1,0,10,.625,Baltimore Ravens+,2014
171 | 0.9,9,NFL,-2.4,-4.8,337,-38,299,-3.9,-1.5,0,7,.438,Cleveland Browns,2014
172 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,2014
173 | -0.8,5,NFL,5.6,5.2,369,89,458,4.4,-1.1,0,11,.688,Indianapolis Colts*,2014
174 | 2.5,7,NFL,4.1,-0.8,307,65,372,1.7,-2.3,0,9,.563,Houston Texans,2014
175 | -2.7,13,NFL,-10.2,-7.8,412,-163,249,-10.5,-0.3,0,3,.188,Jacksonville Jaguars,2014
176 | -4.9,14,NFL,-11.5,-7.0,438,-184,254,-11.8,-0.3,0,2,.125,Tennessee Titans,2014
177 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,2014
178 | 0.4,4,NFL,8.0,9.2,354,128,482,9.6,1.6,0,12,.750,Denver Broncos*,2014
179 | 5.7,7,NFL,4.5,0.0,281,72,353,5.7,1.2,0,9,.563,Kansas City Chiefs,2014
180 | 1.2,7,NFL,0.0,0.7,348,0,348,1.9,1.9,0,9,.563,San Diego Chargers,2014
181 | -4.7,13,NFL,-12.4,-4.3,452,-199,253,-9.0,3.4,0,3,.188,Oakland Raiders,2014
182 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,2014
183 | 0.1,4,NFL,7.2,5.3,352,115,467,5.4,-1.8,0,12,.750,Dallas Cowboys*,2014
184 | -2.7,6,NFL,4.6,6.6,400,74,474,3.9,-0.7,0,10,.625,Philadelphia Eagles,2014
185 | -2.5,10,NFL,-1.3,0.8,400,-20,380,-1.7,-0.4,0,6,.375,New York Giants,2014
186 | -4.7,12,NFL,-8.6,-4.0,438,-137,301,-8.7,-0.2,0,4,.250,Washington Redskins,2014
187 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,2014
188 | 0.4,4,NFL,8.6,7.9,348,138,486,8.3,-0.3,0,12,.750,Green Bay Packers*,2014
189 | 5.2,5,NFL,2.4,-3.2,282,39,321,2.1,-0.4,0,11,.688,Detroit Lions+,2014
190 | 1.1,9,NFL,-1.1,-2.8,343,-18,325,-1.7,-0.5,0,7,.438,Minnesota Vikings,2014
191 | -4.7,11,NFL,-7.7,-2.0,442,-123,319,-6.7,1.0,0,5,.313,Chicago Bears,2014
192 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,2014
193 | -0.7,8,NFL,-2.2,-2.4,374,-35,339,-3.1,-0.9,1,7,.469,Carolina Panthers*,2014
194 | -4.8,9,NFL,-1.4,1.9,424,-23,401,-2.9,-1.5,0,7,.438,New Orleans Saints,2014
195 | -4.4,10,NFL,-2.3,0.6,417,-36,381,-3.8,-1.6,0,6,.375,Atlanta Falcons,2014
196 | -3.3,14,NFL,-8.3,-6.5,410,-133,277,-9.8,-1.5,0,2,.125,Tampa Bay Buccaneers,2014
197 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,2014
198 | 7.1,4,NFL,8.8,2.4,254,140,394,9.5,0.8,0,12,.750,Seattle Seahawks*,2014
199 | 4.4,5,NFL,0.7,-2.4,299,11,310,2.0,1.3,0,11,.688,Arizona Cardinals+,2014
200 | 2.1,8,NFL,-2.1,-3.0,340,-34,306,-1.0,1.2,0,8,.500,San Francisco 49ers,2014
201 | 0.4,10,NFL,-1.9,-1.2,354,-30,324,-0.8,1.0,0,6,.375,St. Louis Rams,2014
202 | 


--------------------------------------------------------------------------------
/Assignment_4/assignment4.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {
   6 |     "deletable": false,
   7 |     "editable": false,
   8 |     "nbgrader": {
   9 |      "checksum": "48770f8b5f5d3062d3badd51fcafc401",
  10 |      "grade": false,
  11 |      "grade_id": "cell-a6c4f74309fc2379",
  12 |      "locked": true,
  13 |      "schema_version": 1,
  14 |      "solution": false
  15 |     }
  16 |    },
  17 |    "source": [
  18 |     "# Assignment 4\n",
  19 |     "## Description\n",
  20 |     "In this assignment you must read in a file of metropolitan regions and associated sports teams from [assets/wikipedia_data.html](assets/wikipedia_data.html) and answer some questions about each metropolitan region. Each of these regions may have one or more teams from the \"Big 4\": NFL (football, in [assets/nfl.csv](assets/nfl.csv)), MLB (baseball, in [assets/mlb.csv](assets/mlb.csv)), NBA (basketball, in [assets/nba.csv](assets/nba.csv) or NHL (hockey, in [assets/nhl.csv](assets/nhl.csv)). Please keep in mind that all questions are from the perspective of the metropolitan region, and that this file is the \"source of authority\" for the location of a given sports team. Thus teams which are commonly known by a different area (e.g. \"Oakland Raiders\") need to be mapped into the metropolitan region given (e.g. San Francisco Bay Area). This will require some human data understanding outside of the data you've been given (e.g. you will have to hand-code some names, and might need to google to find out where teams are)!\n",
  21 |     "\n",
  22 |     "For each sport I would like you to answer the question: **what is the win/loss ratio's correlation with the population of the city it is in?** Win/Loss ratio refers to the number of wins over the number of wins plus the number of losses. Remember that to calculate the correlation with [`pearsonr`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html), so you are going to send in two ordered lists of values, the populations from the wikipedia_data.html file and the win/loss ratio for a given sport in the same order. Average the win/loss ratios for those cities which have multiple teams of a single sport. Each sport is worth an equal amount in this assignment (20%\\*4=80%) of the grade for this assignment. You should only use data **from year 2018** for your analysis -- this is important!\n",
  23 |     "\n",
  24 |     "## Notes\n",
  25 |     "\n",
  26 |     "1. Do not include data about the MLS or CFL in any of the work you are doing, we're only interested in the Big 4 in this assignment.\n",
  27 |     "2. I highly suggest that you first tackle the four correlation questions in order, as they are all similar and worth the majority of grades for this assignment. This is by design!\n",
  28 |     "3. It's fair game to talk with peers about high level strategy as well as the relationship between metropolitan areas and sports teams. However, do not post code solving aspects of the assignment (including such as dictionaries mapping areas to teams, or regexes which will clean up names).\n",
  29 |     "4. There may be more teams than the assert statements test, remember to collapse multiple teams in one city into a single value!"
  30 |    ]
  31 |   },
  32 |   {
  33 |    "cell_type": "markdown",
  34 |    "metadata": {
  35 |     "deletable": false,
  36 |     "editable": false,
  37 |     "nbgrader": {
  38 |      "checksum": "369ff9ecf0ee04640574205cbc697f94",
  39 |      "grade": false,
  40 |      "grade_id": "cell-712b2b5da63d4505",
  41 |      "locked": true,
  42 |      "schema_version": 1,
  43 |      "solution": false
  44 |     }
  45 |    },
  46 |    "source": [
  47 |     "## Question 1\n",
  48 |     "For this question, calculate the win/loss ratio's correlation with the population of the city it is in for the **NHL** using **2018** data."
  49 |    ]
  50 |   },
  51 |   {
  52 |    "cell_type": "code",
  53 |    "execution_count": 1,
  54 |    "metadata": {
  55 |     "deletable": false,
  56 |     "nbgrader": {
  57 |      "checksum": "1cac4803b02502929f5b1612d48db2b5",
  58 |      "grade": false,
  59 |      "grade_id": "cell-69b16e4386e58030",
  60 |      "locked": false,
  61 |      "schema_version": 1,
  62 |      "solution": true
  63 |     }
  64 |    },
  65 |    "outputs": [],
  66 |    "source": [
  67 |     "import pandas as pd\n",
  68 |     "import numpy as np\n",
  69 |     "import scipy.stats as stats\n",
  70 |     "import re\n",
  71 |     "\n",
  72 |     "\n",
  73 |     "\n",
  74 |     "def nhl_correlation(): \n",
  75 |     "    # YOUR CODE HERE\n",
  76 |     "    #raise NotImplementedError()\n",
  77 |     "    \n",
  78 |     "    nhl_df=pd.read_csv(\"assets/nhl.csv\")\n",
  79 |     "    cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
  80 |     "    cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
  81 |     "    \n",
  82 |     "    nhl_df.drop([0,9,18,26],0,inplace=True)\n",
  83 |     "    cities.drop([14,15,18,19,20,21,23,24,25,27,28,32,33,38,40,41,42,44,45,46,48,49,50],0,inplace=True)\n",
  84 |     "\n",
  85 |     "    l= []\n",
  86 |     "    for i in cities['NHL']:\n",
  87 |     "        i=i.split('[')\n",
  88 |     "        l.append(i[0])\n",
  89 |     "    cities['NHL'] = l\n",
  90 |     "\n",
  91 |     "    li = []\n",
  92 |     "    for i in nhl_df['team']:\n",
  93 |     "        i = re.findall(\"[^*]+\", i)\n",
  94 |     "        li.append(i[0])\n",
  95 |     "    nhl_df['team'] = li\n",
  96 |     "\n",
  97 |     "    nhl_df = nhl_df.head(31)\n",
  98 |     "\n",
  99 |     "    nhl_df['team_ville'] = nhl_df['team']\n",
 100 |     "    nhl_df['team_ville'] = nhl_df['team_ville'].map({'Tampa Bay Lightning':'Tampa Bay Area',\n",
 101 |     "     'Boston Bruins':'Boston',\n",
 102 |     "     'Toronto Maple Leafs':'Toronto',\n",
 103 |     "     'Florida Panthers':'Miami–Fort Lauderdale',\n",
 104 |     "     'Detroit Red Wings':'Detroit',\n",
 105 |     "     'Montreal Canadiens':'Montreal',\n",
 106 |     "     'Ottawa Senators':'Ottawa',\n",
 107 |     "     'Buffalo Sabres':'Buffalo',\n",
 108 |     "     'Washington Capitals':'Washington, D.C.',\n",
 109 |     "     'Pittsburgh Penguins':'Pittsburgh',\n",
 110 |     "     'Philadelphia Flyers':'Philadelphia',\n",
 111 |     "     'Columbus Blue Jackets':'Columbus',\n",
 112 |     "     'New Jersey Devils':'New York City',\n",
 113 |     "     'Carolina Hurricanes':'Raleigh',\n",
 114 |     "     'New York Islanders':'New York City',\n",
 115 |     "     'New York Rangers':'New York City',\n",
 116 |     "     'Nashville Predators':'Nashville',\n",
 117 |     "     'Winnipeg Jets':'Winnipeg',\n",
 118 |     "     'Minnesota Wild':'Minneapolis–Saint Paul',\n",
 119 |     "     'Colorado Avalanche':'Denver',\n",
 120 |     "     'St. Louis Blues':'St. Louis',\n",
 121 |     "     'Dallas Stars':'Dallas–Fort Worth',\n",
 122 |     "     'Chicago Blackhawks':'Chicago',\n",
 123 |     "     'Vegas Golden Knights':'Las Vegas',\n",
 124 |     "     'Anaheim Ducks':'Los Angeles',\n",
 125 |     "     'San Jose Sharks':'San Francisco Bay Area',\n",
 126 |     "     'Los Angeles Kings':'Los Angeles',\n",
 127 |     "     'Calgary Flames':'Calgary',\n",
 128 |     "     'Edmonton Oilers':'Edmonton',\n",
 129 |     "     'Vancouver Canucks':'Vancouver',\n",
 130 |     "     'Arizona Coyotes':'Phoenix'})\n",
 131 |     "\n",
 132 |     "    df = pd.merge(nhl_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
 133 |     "\n",
 134 |     "    df['W'] = pd.to_numeric(df['W'])\n",
 135 |     "    df['L'] = pd.to_numeric(df['L'])\n",
 136 |     "    df['Population (2016 est.)[8]'] = pd.to_numeric(df['Population (2016 est.)[8]'])\n",
 137 |     "\n",
 138 |     "    he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n",
 139 |     "\n",
 140 |     "    df = df[he]\n",
 141 |     "\n",
 142 |     "    df['W/L'] = df['W']/(df['L']+df['W'])\n",
 143 |     "\n",
 144 |     "    df = df.groupby('Metropolitan area').mean().reset_index()\n",
 145 |     "    \n",
 146 |     "    population_by_region = df['Population (2016 est.)[8]'] # pass in metropolitan area population from cities\n",
 147 |     "    win_loss_by_region = df['W/L'] # pass in win/loss ratio from nhl_df in the same order as cities[\"Metropolitan area\"]\n",
 148 |     "\n",
 149 |     "    assert len(population_by_region) == len(win_loss_by_region), \"Q1: Your lists must be the same length\"\n",
 150 |     "    assert len(population_by_region) == 28, \"Q1: There should be 28 teams being analysed for NHL\"\n",
 151 |     "    \n",
 152 |     "    return stats.pearsonr(population_by_region, win_loss_by_region)[0]"
 153 |    ]
 154 |   },
 155 |   {
 156 |    "cell_type": "code",
 157 |    "execution_count": null,
 158 |    "metadata": {
 159 |     "deletable": false,
 160 |     "editable": false,
 161 |     "nbgrader": {
 162 |      "checksum": "52a581df513c71153e105b93764cda4b",
 163 |      "grade": true,
 164 |      "grade_id": "cell-ebe0b2dfe1067e63",
 165 |      "locked": true,
 166 |      "points": 20,
 167 |      "schema_version": 1,
 168 |      "solution": false
 169 |     }
 170 |    },
 171 |    "outputs": [],
 172 |    "source": []
 173 |   },
 174 |   {
 175 |    "cell_type": "markdown",
 176 |    "metadata": {
 177 |     "deletable": false,
 178 |     "editable": false,
 179 |     "nbgrader": {
 180 |      "checksum": "988912cae4968d81473f46d783e79c16",
 181 |      "grade": false,
 182 |      "grade_id": "cell-cb964e690298b71d",
 183 |      "locked": true,
 184 |      "schema_version": 1,
 185 |      "solution": false
 186 |     }
 187 |    },
 188 |    "source": [
 189 |     "## Question 2\n",
 190 |     "For this question, calculate the win/loss ratio's correlation with the population of the city it is in for the **NBA** using **2018** data."
 191 |    ]
 192 |   },
 193 |   {
 194 |    "cell_type": "code",
 195 |    "execution_count": 2,
 196 |    "metadata": {
 197 |     "deletable": false,
 198 |     "nbgrader": {
 199 |      "checksum": "9394222aafc8ccab0a228098ba0d6010",
 200 |      "grade": false,
 201 |      "grade_id": "cell-5a5f21279e3d3572",
 202 |      "locked": false,
 203 |      "schema_version": 1,
 204 |      "solution": true
 205 |     }
 206 |    },
 207 |    "outputs": [],
 208 |    "source": [
 209 |     "import pandas as pd\n",
 210 |     "import numpy as np\n",
 211 |     "import scipy.stats as stats\n",
 212 |     "import re\n",
 213 |     "\n",
 214 |     "\n",
 215 |     "\n",
 216 |     "def nba_correlation():\n",
 217 |     "    # YOUR CODE HERE\n",
 218 |     "    #raise NotImplementedError()\n",
 219 |     "    \n",
 220 |     "    nba_df=pd.read_csv(\"assets/nba.csv\")\n",
 221 |     "    cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
 222 |     "    cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
 223 |     "    \n",
 224 |     "    cities.drop([16,17,19,20,21,22,23,26,29,30,31,34,35,36,37,39,40,43,44,47,48,49,50],0,inplace=True)\n",
 225 |     "    \n",
 226 |     "    l1 = []\n",
 227 |     "    for i in nba_df['team']:\n",
 228 |     "        #i=i.rstrip()\n",
 229 |     "        i=i.split('*')\n",
 230 |     "        l1.append(i[0])\n",
 231 |     "    nba_df['team'] = l1\n",
 232 |     "    \n",
 233 |     "    l2 = []\n",
 234 |     "    for i in nba_df['team']:\n",
 235 |     "        i=i.split('(')\n",
 236 |     "        l2.append(i[0])\n",
 237 |     "    nba_df['team'] = l2\n",
 238 |     "    \n",
 239 |     "    l3 = []\n",
 240 |     "    for i in nba_df['team']:\n",
 241 |     "        i=i.rstrip()\n",
 242 |     "        l3.append(i)\n",
 243 |     "    nba_df['team'] = l3\n",
 244 |     "    \n",
 245 |     "    \n",
 246 |     "    \n",
 247 |     "    nba_df = nba_df.head(30)\n",
 248 |     "\n",
 249 |     "    nba_df['team_ville'] = nba_df['team']\n",
 250 |     "    nba_df['team_ville'] = nba_df['team_ville'].map({'Toronto Raptors':'Toronto',\n",
 251 |     "     'Boston Celtics':'Boston',\n",
 252 |     "     'Philadelphia 76ers':'Philadelphia',\n",
 253 |     "     'Cleveland Cavaliers':'Cleveland',\n",
 254 |     "     'Indiana Pacers':'Indianapolis',\n",
 255 |     "     'Miami Heat':'Miami–Fort Lauderdale',\n",
 256 |     "     'Milwaukee Bucks':'Milwaukee',\n",
 257 |     "     'Washington Wizards':'Washington, D.C.',\n",
 258 |     "     'Detroit Pistons':'Detroit',\n",
 259 |     "     'Charlotte Hornets':'Charlotte',\n",
 260 |     "     'New York Knicks':'New York City',\n",
 261 |     "     'Brooklyn Nets':'New York City',\n",
 262 |     "     'Chicago Bulls':'Chicago',\n",
 263 |     "     'Orlando Magic':'Orlando',\n",
 264 |     "     'Atlanta Hawks':'Atlanta',\n",
 265 |     "     'Houston Rockets':'Houston',\n",
 266 |     "     'Golden State Warriors':'San Francisco Bay Area',\n",
 267 |     "     'Portland Trail Blazers':'Portland',\n",
 268 |     "     'Oklahoma City Thunder':'Oklahoma City',\n",
 269 |     "     'Utah Jazz':'Salt Lake City',\n",
 270 |     "     'New Orleans Pelicans':'New Orleans',\n",
 271 |     "     'San Antonio Spurs':'San Antonio',\n",
 272 |     "     'Minnesota Timberwolves':'Minneapolis–Saint Paul',\n",
 273 |     "     'Denver Nuggets':'Denver',\n",
 274 |     "     'Los Angeles Clippers':'Los Angeles',\n",
 275 |     "     'Los Angeles Lakers':'Los Angeles',\n",
 276 |     "     'Sacramento Kings':'Sacramento',\n",
 277 |     "     'Dallas Mavericks':'Dallas–Fort Worth',\n",
 278 |     "     'Memphis Grizzlies':'Memphis',\n",
 279 |     "     'Phoenix Suns':'Phoenix'})\n",
 280 |     "    \n",
 281 |     "    df2 = pd.merge(nba_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
 282 |     "    \n",
 283 |     "    df2['W/L%'] = pd.to_numeric(df2['W/L%'])\n",
 284 |     "    df2['W'] = pd.to_numeric(df2['W'])\n",
 285 |     "    df2['L'] = pd.to_numeric(df2['L'])\n",
 286 |     "    df2['Population (2016 est.)[8]'] = pd.to_numeric(df2['Population (2016 est.)[8]'])\n",
 287 |     "    he = ['team','W','L','W/L%','Metropolitan area','Population (2016 est.)[8]']\n",
 288 |     "    df2 = df2[he]\n",
 289 |     "    df2['W/L'] = df2['W']/(df2['L']+df2['W'])\n",
 290 |     "    df2 = df2.groupby('Metropolitan area').mean().reset_index()\n",
 291 |     "    \n",
 292 |     "    population_by_region = df2['Population (2016 est.)[8]'] # pass in metropolitan area population from cities\n",
 293 |     "    win_loss_by_region = df2['W/L'] # pass in win/loss ratio from nba_df in the same order as cities[\"Metropolitan area\"]\n",
 294 |     "\n",
 295 |     "    assert len(population_by_region) == len(win_loss_by_region), \"Q2: Your lists must be the same length\"\n",
 296 |     "    assert len(population_by_region) == 28, \"Q2: There should be 28 teams being analysed for NBA\"\n",
 297 |     "\n",
 298 |     "    return stats.pearsonr(population_by_region, win_loss_by_region)[0]"
 299 |    ]
 300 |   },
 301 |   {
 302 |    "cell_type": "code",
 303 |    "execution_count": null,
 304 |    "metadata": {
 305 |     "deletable": false,
 306 |     "editable": false,
 307 |     "nbgrader": {
 308 |      "checksum": "bbdeb8eb22f525a34c10dc8798324e42",
 309 |      "grade": true,
 310 |      "grade_id": "cell-e573b2b4a282b470",
 311 |      "locked": true,
 312 |      "points": 20,
 313 |      "schema_version": 1,
 314 |      "solution": false
 315 |     }
 316 |    },
 317 |    "outputs": [],
 318 |    "source": []
 319 |   },
 320 |   {
 321 |    "cell_type": "markdown",
 322 |    "metadata": {
 323 |     "deletable": false,
 324 |     "editable": false,
 325 |     "nbgrader": {
 326 |      "checksum": "1a1a5809f675ca033086422007cd73bd",
 327 |      "grade": false,
 328 |      "grade_id": "cell-96e15e4335df78f4",
 329 |      "locked": true,
 330 |      "schema_version": 1,
 331 |      "solution": false
 332 |     }
 333 |    },
 334 |    "source": [
 335 |     "## Question 3\n",
 336 |     "For this question, calculate the win/loss ratio's correlation with the population of the city it is in for the **MLB** using **2018** data."
 337 |    ]
 338 |   },
 339 |   {
 340 |    "cell_type": "code",
 341 |    "execution_count": 3,
 342 |    "metadata": {
 343 |     "deletable": false,
 344 |     "nbgrader": {
 345 |      "checksum": "27e8c0da6c9fa0dffc10488314335b6c",
 346 |      "grade": false,
 347 |      "grade_id": "cell-33b00fc3f3467b0c",
 348 |      "locked": false,
 349 |      "schema_version": 1,
 350 |      "solution": true
 351 |     }
 352 |    },
 353 |    "outputs": [],
 354 |    "source": [
 355 |     "import pandas as pd\n",
 356 |     "import numpy as np\n",
 357 |     "import scipy.stats as stats\n",
 358 |     "import re\n",
 359 |     "\n",
 360 |     "\n",
 361 |     "def mlb_correlation(): \n",
 362 |     "    # YOUR CODE HERE\n",
 363 |     "    #raise NotImplementedError()\n",
 364 |     "    \n",
 365 |     "    mlb_df=pd.read_csv(\"assets/mlb.csv\")\n",
 366 |     "    cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
 367 |     "    cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
 368 |     "    \n",
 369 |     "    cities.drop([24,25,26,28,29,30,31,32,33,34,35,36,37,38,39,41,42,43,44,45,46,47,48,49,50],0,inplace=True)\n",
 370 |     "    \n",
 371 |     "    mlb_df = mlb_df.head(30)\n",
 372 |     "    \n",
 373 |     "    mlb_df['team_ville'] = mlb_df['team']\n",
 374 |     "    mlb_df['team_ville'] = mlb_df['team_ville'].map({'Boston Red Sox':'Boston',\n",
 375 |     "     'New York Yankees':'New York City',\n",
 376 |     "     'Tampa Bay Rays':'Tampa Bay Area',\n",
 377 |     "     'Toronto Blue Jays':'Toronto',\n",
 378 |     "     'Baltimore Orioles':'Baltimore',\n",
 379 |     "     'Cleveland Indians':'Cleveland',\n",
 380 |     "     'Minnesota Twins':'Minneapolis–Saint Paul',\n",
 381 |     "     'Detroit Tigers':'Detroit',\n",
 382 |     "     'Chicago White Sox':'Chicago',\n",
 383 |     "     'Kansas City Royals':'Kansas City',\n",
 384 |     "     'Houston Astros':'Houston',\n",
 385 |     "     'Oakland Athletics':'San Francisco Bay Area',\n",
 386 |     "     'Seattle Mariners':'Seattle',\n",
 387 |     "     'Los Angeles Angels':'Los Angeles',\n",
 388 |     "     'Texas Rangers':'Dallas–Fort Worth',\n",
 389 |     "     'Atlanta Braves':'Atlanta',\n",
 390 |     "     'Washington Nationals':'Washington, D.C.',\n",
 391 |     "     'Philadelphia Phillies':'Philadelphia',\n",
 392 |     "     'New York Mets':'New York City',\n",
 393 |     "     'Miami Marlins':'Miami–Fort Lauderdale',\n",
 394 |     "     'Milwaukee Brewers':'Milwaukee',\n",
 395 |     "     'Chicago Cubs':'Chicago',\n",
 396 |     "     'St. Louis Cardinals':'St. Louis',\n",
 397 |     "     'Pittsburgh Pirates':'Pittsburgh',\n",
 398 |     "     'Cincinnati Reds':'Cincinnati',\n",
 399 |     "     'Los Angeles Dodgers':'Los Angeles',\n",
 400 |     "     'Colorado Rockies':'Denver',\n",
 401 |     "     'Arizona Diamondbacks':'Phoenix',\n",
 402 |     "     'San Francisco Giants':'San Francisco Bay Area',\n",
 403 |     "     'San Diego Padres':'San Diego'})\n",
 404 |     "    \n",
 405 |     "    df3 = pd.merge(mlb_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
 406 |     "    \n",
 407 |     "    #df2['W/L%'] = pd.to_numeric(df2['W/L%'])\n",
 408 |     "    df3['W'] = pd.to_numeric(df3['W'])\n",
 409 |     "    df3['L'] = pd.to_numeric(df3['L'])\n",
 410 |     "    df3['Population (2016 est.)[8]'] = pd.to_numeric(df3['Population (2016 est.)[8]'])\n",
 411 |     "    he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n",
 412 |     "    df3 = df3[he]\n",
 413 |     "    df3['W/L'] = df3['W']/(df3['L']+df3['W'])\n",
 414 |     "    df3 = df3.groupby('Metropolitan area').mean().reset_index()\n",
 415 |     "    \n",
 416 |     "    \n",
 417 |     "    population_by_region = df3['Population (2016 est.)[8]'] # pass in metropolitan area population from cities\n",
 418 |     "    win_loss_by_region = df3['W/L'] # pass in win/loss ratio from mlb_df in the same order as cities[\"Metropolitan area\"]\n",
 419 |     "\n",
 420 |     "    assert len(population_by_region) == len(win_loss_by_region), \"Q3: Your lists must be the same length\"\n",
 421 |     "    assert len(population_by_region) == 26, \"Q3: There should be 26 teams being analysed for MLB\"\n",
 422 |     "\n",
 423 |     "    return stats.pearsonr(population_by_region, win_loss_by_region)[0]"
 424 |    ]
 425 |   },
 426 |   {
 427 |    "cell_type": "code",
 428 |    "execution_count": null,
 429 |    "metadata": {
 430 |     "deletable": false,
 431 |     "editable": false,
 432 |     "nbgrader": {
 433 |      "checksum": "cda33b094ba19ccc37a481e0dd29e0bc",
 434 |      "grade": true,
 435 |      "grade_id": "cell-764d4476f425c5a2",
 436 |      "locked": true,
 437 |      "points": 20,
 438 |      "schema_version": 1,
 439 |      "solution": false
 440 |     }
 441 |    },
 442 |    "outputs": [],
 443 |    "source": []
 444 |   },
 445 |   {
 446 |    "cell_type": "markdown",
 447 |    "metadata": {
 448 |     "deletable": false,
 449 |     "editable": false,
 450 |     "nbgrader": {
 451 |      "checksum": "6977a6da9ed6d8b7a0b7e37bbeda709b",
 452 |      "grade": false,
 453 |      "grade_id": "cell-793df6c04dfb126e",
 454 |      "locked": true,
 455 |      "schema_version": 1,
 456 |      "solution": false
 457 |     }
 458 |    },
 459 |    "source": [
 460 |     "## Question 4\n",
 461 |     "For this question, calculate the win/loss ratio's correlation with the population of the city it is in for the **NFL** using **2018** data."
 462 |    ]
 463 |   },
 464 |   {
 465 |    "cell_type": "code",
 466 |    "execution_count": 4,
 467 |    "metadata": {
 468 |     "deletable": false,
 469 |     "nbgrader": {
 470 |      "checksum": "c4914ad1e119278ec2bd567c52640b66",
 471 |      "grade": false,
 472 |      "grade_id": "cell-8ccebc209aeec8d9",
 473 |      "locked": false,
 474 |      "schema_version": 1,
 475 |      "solution": true
 476 |     }
 477 |    },
 478 |    "outputs": [],
 479 |    "source": [
 480 |     "import pandas as pd\n",
 481 |     "import numpy as np\n",
 482 |     "import scipy.stats as stats\n",
 483 |     "import re\n",
 484 |     "\n",
 485 |     "\n",
 486 |     "\n",
 487 |     "def nfl_correlation(): \n",
 488 |     "    # YOUR CODE HERE\n",
 489 |     "    #raise NotImplementedError()\n",
 490 |     "    \n",
 491 |     "    nfl_df=pd.read_csv(\"assets/nfl.csv\")\n",
 492 |     "    cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
 493 |     "    cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
 494 |     "    \n",
 495 |     "    nfl_df.drop([0,5,10,15,20,25,30,35],0,inplace=True)\n",
 496 |     "    \n",
 497 |     "    cities.drop([13,22,27,30,31,32,33,34,35,36,37,38,39,40,41,42,43,45,46,47,49,50],0,inplace=True)\n",
 498 |     "    \n",
 499 |     "    l1 = []\n",
 500 |     "    for i in nfl_df['team']:\n",
 501 |     "        #i=i.rstrip()\n",
 502 |     "        i=i.split('*')\n",
 503 |     "        l1.append(i[0])\n",
 504 |     "    nfl_df['team'] = l1\n",
 505 |     "    \n",
 506 |     "    l2 = []\n",
 507 |     "    for i in nfl_df['team']:\n",
 508 |     "        i=i.split('+')\n",
 509 |     "        l2.append(i[0])\n",
 510 |     "    nfl_df['team'] = l2\n",
 511 |     "    \n",
 512 |     "    nfl_df = nfl_df.head(32)\n",
 513 |     "    \n",
 514 |     "    nfl_df['team_ville'] = nfl_df['team']\n",
 515 |     "    nfl_df['team_ville'] = nfl_df['team_ville'].map({'New England Patriots':'Boston',\n",
 516 |     "     'Miami Dolphins':'Miami–Fort Lauderdale',\n",
 517 |     "     'Buffalo Bills':'Buffalo',\n",
 518 |     "     'New York Jets':'New York City',\n",
 519 |     "     'Baltimore Ravens':'Baltimore',\n",
 520 |     "     'Pittsburgh Steelers':'Pittsburgh',\n",
 521 |     "     'Cleveland Browns':'Cleveland',\n",
 522 |     "     'Cincinnati Bengals':'Cincinnati',\n",
 523 |     "     'Houston Texans':'Houston',\n",
 524 |     "     'Indianapolis Colts':'Indianapolis',\n",
 525 |     "     'Tennessee Titans':'Nashville',\n",
 526 |     "     'Jacksonville Jaguars':'Jacksonville',\n",
 527 |     "     'Kansas City Chiefs':'Kansas City',\n",
 528 |     "     'Los Angeles Chargers':'Los Angeles',\n",
 529 |     "     'Denver Broncos':'Denver',\n",
 530 |     "     'Oakland Raiders':'San Francisco Bay Area',\n",
 531 |     "     'Dallas Cowboys':'Dallas–Fort Worth',\n",
 532 |     "     'Philadelphia Eagles':'Philadelphia',\n",
 533 |     "     'Washington Redskins':'Washington, D.C.',\n",
 534 |     "     'New York Giants':'New York City',\n",
 535 |     "     'Chicago Bears':'Chicago',\n",
 536 |     "     'Minnesota Vikings':'Minneapolis–Saint Paul',\n",
 537 |     "     'Green Bay Packers':'Green Bay',\n",
 538 |     "     'Detroit Lions':'Detroit',\n",
 539 |     "     'New Orleans Saints':'New Orleans',\n",
 540 |     "     'Carolina Panthers':'Charlotte',\n",
 541 |     "     'Atlanta Falcons':'Atlanta',\n",
 542 |     "     'Tampa Bay Buccaneers':'Tampa Bay Area',\n",
 543 |     "     'Los Angeles Rams':'Los Angeles',\n",
 544 |     "     'Seattle Seahawks':'Seattle',\n",
 545 |     "     'San Francisco 49ers':'San Francisco Bay Area',\n",
 546 |     "     'Arizona Cardinals':'Phoenix'}) \n",
 547 |     "    \n",
 548 |     "    df4 = pd.merge(nfl_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
 549 |     "    \n",
 550 |     "    \n",
 551 |     "    df4['W'] = pd.to_numeric(df4['W'])\n",
 552 |     "    df4['L'] = pd.to_numeric(df4['L'])\n",
 553 |     "    df4['Population (2016 est.)[8]'] = pd.to_numeric(df4['Population (2016 est.)[8]'])\n",
 554 |     "    he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n",
 555 |     "    df4 = df4[he]\n",
 556 |     "    df4['W/L'] = df4['W']/(df4['L']+df4['W'])\n",
 557 |     "    df4 = df4.groupby('Metropolitan area').mean().reset_index()\n",
 558 |     "\n",
 559 |     "    \n",
 560 |     "    population_by_region = df4['Population (2016 est.)[8]'] # pass in metropolitan area population from cities\n",
 561 |     "    win_loss_by_region = df4['W/L'] # pass in win/loss ratio from nfl_df in the same order as cities[\"Metropolitan area\"]\n",
 562 |     "\n",
 563 |     "    assert len(population_by_region) == len(win_loss_by_region), \"Q4: Your lists must be the same length\"\n",
 564 |     "    assert len(population_by_region) == 29, \"Q4: There should be 29 teams being analysed for NFL\"\n",
 565 |     "\n",
 566 |     "    return stats.pearsonr(population_by_region, win_loss_by_region)[0]"
 567 |    ]
 568 |   },
 569 |   {
 570 |    "cell_type": "code",
 571 |    "execution_count": null,
 572 |    "metadata": {
 573 |     "deletable": false,
 574 |     "editable": false,
 575 |     "nbgrader": {
 576 |      "checksum": "e9415d6399aa49e3a1a60813afdefa3b",
 577 |      "grade": true,
 578 |      "grade_id": "cell-de7b148b9554dbda",
 579 |      "locked": true,
 580 |      "points": 20,
 581 |      "schema_version": 1,
 582 |      "solution": false
 583 |     }
 584 |    },
 585 |    "outputs": [],
 586 |    "source": []
 587 |   },
 588 |   {
 589 |    "cell_type": "markdown",
 590 |    "metadata": {
 591 |     "deletable": false,
 592 |     "editable": false,
 593 |     "nbgrader": {
 594 |      "checksum": "b02d5cd3273f561e4ae939bb2a41740c",
 595 |      "grade": false,
 596 |      "grade_id": "cell-97b49d8639e908c4",
 597 |      "locked": true,
 598 |      "schema_version": 1,
 599 |      "solution": false
 600 |     }
 601 |    },
 602 |    "source": [
 603 |     "## Question 5\n",
 604 |     "In this question I would like you to explore the hypothesis that **given that an area has two sports teams in different sports, those teams will perform the same within their respective sports**. How I would like to see this explored is with a series of paired t-tests (so use [`ttest_rel`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html)) between all pairs of sports. Are there any sports where we can reject the null hypothesis? Again, average values where a sport has multiple teams in one region. Remember, you will only be including, for each sport, cities which have teams engaged in that sport, drop others as appropriate. This question is worth 20% of the grade for this assignment."
 605 |    ]
 606 |   },
 607 |   {
 608 |    "cell_type": "code",
 609 |    "execution_count": 6,
 610 |    "metadata": {
 611 |     "deletable": false,
 612 |     "nbgrader": {
 613 |      "checksum": "6d78c961eb66f8d8c81f06d33ae8f393",
 614 |      "grade": false,
 615 |      "grade_id": "cell-92f25f44b8d1179f",
 616 |      "locked": false,
 617 |      "schema_version": 1,
 618 |      "solution": true
 619 |     }
 620 |    },
 621 |    "outputs": [],
 622 |    "source": [
 623 |     "import pandas as pd\n",
 624 |     "import numpy as np\n",
 625 |     "import scipy.stats as stats\n",
 626 |     "import re\n",
 627 |     "\n",
 628 |     "#mlb_df=pd.read_csv(\"assets/mlb.csv\")\n",
 629 |     "#nhl_df=pd.read_csv(\"assets/nhl.csv\")\n",
 630 |     "#nba_df=pd.read_csv(\"assets/nba.csv\")\n",
 631 |     "#nfl_df=pd.read_csv(\"assets/nfl.csv\")\n",
 632 |     "#cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
 633 |     "#cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
 634 |     "\n",
 635 |     "def nhl_correla(): \n",
 636 |     "    # YOUR CODE HERE\n",
 637 |     "    #raise NotImplementedError()\n",
 638 |     "    \n",
 639 |     "    nhl_df=pd.read_csv(\"assets/nhl.csv\")\n",
 640 |     "    cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
 641 |     "    cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
 642 |     "    \n",
 643 |     "    nhl_df.drop([0,9,18,26],0,inplace=True)\n",
 644 |     "    cities.drop([14,15,18,19,20,21,23,24,25,27,28,32,33,38,40,41,42,44,45,46,48,49,50],0,inplace=True)\n",
 645 |     "\n",
 646 |     "    l= []\n",
 647 |     "    for i in cities['NHL']:\n",
 648 |     "        i=i.split('[')\n",
 649 |     "        l.append(i[0])\n",
 650 |     "    cities['NHL'] = l\n",
 651 |     "\n",
 652 |     "    li = []\n",
 653 |     "    for i in nhl_df['team']:\n",
 654 |     "        i = re.findall(\"[^*]+\", i)\n",
 655 |     "        li.append(i[0])\n",
 656 |     "    nhl_df['team'] = li\n",
 657 |     "\n",
 658 |     "    nhl_df = nhl_df.head(31)\n",
 659 |     "\n",
 660 |     "    nhl_df['team_ville'] = nhl_df['team']\n",
 661 |     "    nhl_df['team_ville'] = nhl_df['team_ville'].map({'Tampa Bay Lightning':'Tampa Bay Area',\n",
 662 |     "     'Boston Bruins':'Boston',\n",
 663 |     "     'Toronto Maple Leafs':'Toronto',\n",
 664 |     "     'Florida Panthers':'Miami–Fort Lauderdale',\n",
 665 |     "     'Detroit Red Wings':'Detroit',\n",
 666 |     "     'Montreal Canadiens':'Montreal',\n",
 667 |     "     'Ottawa Senators':'Ottawa',\n",
 668 |     "     'Buffalo Sabres':'Buffalo',\n",
 669 |     "     'Washington Capitals':'Washington, D.C.',\n",
 670 |     "     'Pittsburgh Penguins':'Pittsburgh',\n",
 671 |     "     'Philadelphia Flyers':'Philadelphia',\n",
 672 |     "     'Columbus Blue Jackets':'Columbus',\n",
 673 |     "     'New Jersey Devils':'New York City',\n",
 674 |     "     'Carolina Hurricanes':'Raleigh',\n",
 675 |     "     'New York Islanders':'New York City',\n",
 676 |     "     'New York Rangers':'New York City',\n",
 677 |     "     'Nashville Predators':'Nashville',\n",
 678 |     "     'Winnipeg Jets':'Winnipeg',\n",
 679 |     "     'Minnesota Wild':'Minneapolis–Saint Paul',\n",
 680 |     "     'Colorado Avalanche':'Denver',\n",
 681 |     "     'St. Louis Blues':'St. Louis',\n",
 682 |     "     'Dallas Stars':'Dallas–Fort Worth',\n",
 683 |     "     'Chicago Blackhawks':'Chicago',\n",
 684 |     "     'Vegas Golden Knights':'Las Vegas',\n",
 685 |     "     'Anaheim Ducks':'Los Angeles',\n",
 686 |     "     'San Jose Sharks':'San Francisco Bay Area',\n",
 687 |     "     'Los Angeles Kings':'Los Angeles',\n",
 688 |     "     'Calgary Flames':'Calgary',\n",
 689 |     "     'Edmonton Oilers':'Edmonton',\n",
 690 |     "     'Vancouver Canucks':'Vancouver',\n",
 691 |     "     'Arizona Coyotes':'Phoenix'})\n",
 692 |     "\n",
 693 |     "    df = pd.merge(nhl_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
 694 |     "\n",
 695 |     "    df['W'] = pd.to_numeric(df['W'])\n",
 696 |     "    df['L'] = pd.to_numeric(df['L'])\n",
 697 |     "    df['Population (2016 est.)[8]'] = pd.to_numeric(df['Population (2016 est.)[8]'])\n",
 698 |     "\n",
 699 |     "    he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n",
 700 |     "\n",
 701 |     "    df = df[he]\n",
 702 |     "\n",
 703 |     "    df['W/L'] = df['W']/(df['L']+df['W'])\n",
 704 |     "\n",
 705 |     "    df = df.groupby('Metropolitan area').mean().reset_index()\n",
 706 |     "    \n",
 707 |     "    return df\n",
 708 |     "\n",
 709 |     "def nba_correla():\n",
 710 |     "    # YOUR CODE HERE\n",
 711 |     "    #raise NotImplementedError()\n",
 712 |     "    \n",
 713 |     "    nba_df=pd.read_csv(\"assets/nba.csv\")\n",
 714 |     "    cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
 715 |     "    cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
 716 |     "    \n",
 717 |     "    cities.drop([16,17,19,20,21,22,23,26,29,30,31,34,35,36,37,39,40,43,44,47,48,49,50],0,inplace=True)\n",
 718 |     "    \n",
 719 |     "    l1 = []\n",
 720 |     "    for i in nba_df['team']:\n",
 721 |     "        #i=i.rstrip()\n",
 722 |     "        i=i.split('*')\n",
 723 |     "        l1.append(i[0])\n",
 724 |     "    nba_df['team'] = l1\n",
 725 |     "    \n",
 726 |     "    l2 = []\n",
 727 |     "    for i in nba_df['team']:\n",
 728 |     "        i=i.split('(')\n",
 729 |     "        l2.append(i[0])\n",
 730 |     "    nba_df['team'] = l2\n",
 731 |     "    \n",
 732 |     "    l3 = []\n",
 733 |     "    for i in nba_df['team']:\n",
 734 |     "        i=i.rstrip()\n",
 735 |     "        l3.append(i)\n",
 736 |     "    nba_df['team'] = l3\n",
 737 |     "    \n",
 738 |     "    \n",
 739 |     "    \n",
 740 |     "    nba_df = nba_df.head(30)\n",
 741 |     "\n",
 742 |     "    nba_df['team_ville'] = nba_df['team']\n",
 743 |     "    nba_df['team_ville'] = nba_df['team_ville'].map({'Toronto Raptors':'Toronto',\n",
 744 |     "     'Boston Celtics':'Boston',\n",
 745 |     "     'Philadelphia 76ers':'Philadelphia',\n",
 746 |     "     'Cleveland Cavaliers':'Cleveland',\n",
 747 |     "     'Indiana Pacers':'Indianapolis',\n",
 748 |     "     'Miami Heat':'Miami–Fort Lauderdale',\n",
 749 |     "     'Milwaukee Bucks':'Milwaukee',\n",
 750 |     "     'Washington Wizards':'Washington, D.C.',\n",
 751 |     "     'Detroit Pistons':'Detroit',\n",
 752 |     "     'Charlotte Hornets':'Charlotte',\n",
 753 |     "     'New York Knicks':'New York City',\n",
 754 |     "     'Brooklyn Nets':'New York City',\n",
 755 |     "     'Chicago Bulls':'Chicago',\n",
 756 |     "     'Orlando Magic':'Orlando',\n",
 757 |     "     'Atlanta Hawks':'Atlanta',\n",
 758 |     "     'Houston Rockets':'Houston',\n",
 759 |     "     'Golden State Warriors':'San Francisco Bay Area',\n",
 760 |     "     'Portland Trail Blazers':'Portland',\n",
 761 |     "     'Oklahoma City Thunder':'Oklahoma City',\n",
 762 |     "     'Utah Jazz':'Salt Lake City',\n",
 763 |     "     'New Orleans Pelicans':'New Orleans',\n",
 764 |     "     'San Antonio Spurs':'San Antonio',\n",
 765 |     "     'Minnesota Timberwolves':'Minneapolis–Saint Paul',\n",
 766 |     "     'Denver Nuggets':'Denver',\n",
 767 |     "     'Los Angeles Clippers':'Los Angeles',\n",
 768 |     "     'Los Angeles Lakers':'Los Angeles',\n",
 769 |     "     'Sacramento Kings':'Sacramento',\n",
 770 |     "     'Dallas Mavericks':'Dallas–Fort Worth',\n",
 771 |     "     'Memphis Grizzlies':'Memphis',\n",
 772 |     "     'Phoenix Suns':'Phoenix'})\n",
 773 |     "    \n",
 774 |     "    df2 = pd.merge(nba_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
 775 |     "    \n",
 776 |     "    df2['W/L%'] = pd.to_numeric(df2['W/L%'])\n",
 777 |     "    df2['W'] = pd.to_numeric(df2['W'])\n",
 778 |     "    df2['L'] = pd.to_numeric(df2['L'])\n",
 779 |     "    df2['Population (2016 est.)[8]'] = pd.to_numeric(df2['Population (2016 est.)[8]'])\n",
 780 |     "    he = ['team','W','L','W/L%','Metropolitan area','Population (2016 est.)[8]']\n",
 781 |     "    df2 = df2[he]\n",
 782 |     "    df2['W/L'] = df2['W']/(df2['L']+df2['W'])\n",
 783 |     "    df2 = df2.groupby('Metropolitan area').mean().reset_index()\n",
 784 |     "    \n",
 785 |     "    return df2\n",
 786 |     "\n",
 787 |     "def mlb_correla(): \n",
 788 |     "    # YOUR CODE HERE\n",
 789 |     "    #raise NotImplementedError()\n",
 790 |     "    \n",
 791 |     "    mlb_df=pd.read_csv(\"assets/mlb.csv\")\n",
 792 |     "    cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
 793 |     "    cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
 794 |     "    \n",
 795 |     "    cities.drop([24,25,26,28,29,30,31,32,33,34,35,36,37,38,39,41,42,43,44,45,46,47,48,49,50],0,inplace=True)\n",
 796 |     "    \n",
 797 |     "    mlb_df = mlb_df.head(30)\n",
 798 |     "    \n",
 799 |     "    mlb_df['team_ville'] = mlb_df['team']\n",
 800 |     "    mlb_df['team_ville'] = mlb_df['team_ville'].map({'Boston Red Sox':'Boston',\n",
 801 |     "     'New York Yankees':'New York City',\n",
 802 |     "     'Tampa Bay Rays':'Tampa Bay Area',\n",
 803 |     "     'Toronto Blue Jays':'Toronto',\n",
 804 |     "     'Baltimore Orioles':'Baltimore',\n",
 805 |     "     'Cleveland Indians':'Cleveland',\n",
 806 |     "     'Minnesota Twins':'Minneapolis–Saint Paul',\n",
 807 |     "     'Detroit Tigers':'Detroit',\n",
 808 |     "     'Chicago White Sox':'Chicago',\n",
 809 |     "     'Kansas City Royals':'Kansas City',\n",
 810 |     "     'Houston Astros':'Houston',\n",
 811 |     "     'Oakland Athletics':'San Francisco Bay Area',\n",
 812 |     "     'Seattle Mariners':'Seattle',\n",
 813 |     "     'Los Angeles Angels':'Los Angeles',\n",
 814 |     "     'Texas Rangers':'Dallas–Fort Worth',\n",
 815 |     "     'Atlanta Braves':'Atlanta',\n",
 816 |     "     'Washington Nationals':'Washington, D.C.',\n",
 817 |     "     'Philadelphia Phillies':'Philadelphia',\n",
 818 |     "     'New York Mets':'New York City',\n",
 819 |     "     'Miami Marlins':'Miami–Fort Lauderdale',\n",
 820 |     "     'Milwaukee Brewers':'Milwaukee',\n",
 821 |     "     'Chicago Cubs':'Chicago',\n",
 822 |     "     'St. Louis Cardinals':'St. Louis',\n",
 823 |     "     'Pittsburgh Pirates':'Pittsburgh',\n",
 824 |     "     'Cincinnati Reds':'Cincinnati',\n",
 825 |     "     'Los Angeles Dodgers':'Los Angeles',\n",
 826 |     "     'Colorado Rockies':'Denver',\n",
 827 |     "     'Arizona Diamondbacks':'Phoenix',\n",
 828 |     "     'San Francisco Giants':'San Francisco Bay Area',\n",
 829 |     "     'San Diego Padres':'San Diego'})\n",
 830 |     "    \n",
 831 |     "    df3 = pd.merge(mlb_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
 832 |     "    \n",
 833 |     "    #df2['W/L%'] = pd.to_numeric(df2['W/L%'])\n",
 834 |     "    df3['W'] = pd.to_numeric(df3['W'])\n",
 835 |     "    df3['L'] = pd.to_numeric(df3['L'])\n",
 836 |     "    df3['Population (2016 est.)[8]'] = pd.to_numeric(df3['Population (2016 est.)[8]'])\n",
 837 |     "    he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n",
 838 |     "    df3 = df3[he]\n",
 839 |     "    df3['W/L'] = df3['W']/(df3['L']+df3['W'])\n",
 840 |     "    df3 = df3.groupby('Metropolitan area').mean().reset_index()\n",
 841 |     "    \n",
 842 |     "    return df3\n",
 843 |     "\n",
 844 |     "def nfl_correla(): \n",
 845 |     "    # YOUR CODE HERE\n",
 846 |     "    #raise NotImplementedError()\n",
 847 |     "    \n",
 848 |     "    nfl_df=pd.read_csv(\"assets/nfl.csv\")\n",
 849 |     "    cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
 850 |     "    cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
 851 |     "    \n",
 852 |     "    nfl_df.drop([0,5,10,15,20,25,30,35],0,inplace=True)\n",
 853 |     "    \n",
 854 |     "    cities.drop([13,22,27,30,31,32,33,34,35,36,37,38,39,40,41,42,43,45,46,47,49,50],0,inplace=True)\n",
 855 |     "    \n",
 856 |     "    l1 = []\n",
 857 |     "    for i in nfl_df['team']:\n",
 858 |     "        #i=i.rstrip()\n",
 859 |     "        i=i.split('*')\n",
 860 |     "        l1.append(i[0])\n",
 861 |     "    nfl_df['team'] = l1\n",
 862 |     "    \n",
 863 |     "    l2 = []\n",
 864 |     "    for i in nfl_df['team']:\n",
 865 |     "        i=i.split('+')\n",
 866 |     "        l2.append(i[0])\n",
 867 |     "    nfl_df['team'] = l2\n",
 868 |     "    \n",
 869 |     "    nfl_df = nfl_df.head(32)\n",
 870 |     "    \n",
 871 |     "    nfl_df['team_ville'] = nfl_df['team']\n",
 872 |     "    nfl_df['team_ville'] = nfl_df['team_ville'].map({'New England Patriots':'Boston',\n",
 873 |     "     'Miami Dolphins':'Miami–Fort Lauderdale',\n",
 874 |     "     'Buffalo Bills':'Buffalo',\n",
 875 |     "     'New York Jets':'New York City',\n",
 876 |     "     'Baltimore Ravens':'Baltimore',\n",
 877 |     "     'Pittsburgh Steelers':'Pittsburgh',\n",
 878 |     "     'Cleveland Browns':'Cleveland',\n",
 879 |     "     'Cincinnati Bengals':'Cincinnati',\n",
 880 |     "     'Houston Texans':'Houston',\n",
 881 |     "     'Indianapolis Colts':'Indianapolis',\n",
 882 |     "     'Tennessee Titans':'Nashville',\n",
 883 |     "     'Jacksonville Jaguars':'Jacksonville',\n",
 884 |     "     'Kansas City Chiefs':'Kansas City',\n",
 885 |     "     'Los Angeles Chargers':'Los Angeles',\n",
 886 |     "     'Denver Broncos':'Denver',\n",
 887 |     "     'Oakland Raiders':'San Francisco Bay Area',\n",
 888 |     "     'Dallas Cowboys':'Dallas–Fort Worth',\n",
 889 |     "     'Philadelphia Eagles':'Philadelphia',\n",
 890 |     "     'Washington Redskins':'Washington, D.C.',\n",
 891 |     "     'New York Giants':'New York City',\n",
 892 |     "     'Chicago Bears':'Chicago',\n",
 893 |     "     'Minnesota Vikings':'Minneapolis–Saint Paul',\n",
 894 |     "     'Green Bay Packers':'Green Bay',\n",
 895 |     "     'Detroit Lions':'Detroit',\n",
 896 |     "     'New Orleans Saints':'New Orleans',\n",
 897 |     "     'Carolina Panthers':'Charlotte',\n",
 898 |     "     'Atlanta Falcons':'Atlanta',\n",
 899 |     "     'Tampa Bay Buccaneers':'Tampa Bay Area',\n",
 900 |     "     'Los Angeles Rams':'Los Angeles',\n",
 901 |     "     'Seattle Seahawks':'Seattle',\n",
 902 |     "     'San Francisco 49ers':'San Francisco Bay Area',\n",
 903 |     "     'Arizona Cardinals':'Phoenix'}) \n",
 904 |     "    \n",
 905 |     "    df4 = pd.merge(nfl_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
 906 |     "    \n",
 907 |     "    \n",
 908 |     "    df4['W'] = pd.to_numeric(df4['W'])\n",
 909 |     "    df4['L'] = pd.to_numeric(df4['L'])\n",
 910 |     "    df4['Population (2016 est.)[8]'] = pd.to_numeric(df4['Population (2016 est.)[8]'])\n",
 911 |     "    he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n",
 912 |     "    df4 = df4[he]\n",
 913 |     "    df4['W/L'] = df4['W']/(df4['L']+df4['W'])\n",
 914 |     "    df4 = df4.groupby('Metropolitan area').mean().reset_index()\n",
 915 |     "    \n",
 916 |     "    return df4\n",
 917 |     "\n",
 918 |     "\n",
 919 |     "\n",
 920 |     "\n",
 921 |     "\n",
 922 |     "def sports_team_performance():\n",
 923 |     "    # YOUR CODE HERE\n",
 924 |     "    #raise NotImplementedError()\n",
 925 |     "    \n",
 926 |     "    nfl = nfl_correla()\n",
 927 |     "    nba = nba_correla()\n",
 928 |     "    mlb = mlb_correla()\n",
 929 |     "    nhl = nhl_correla()\n",
 930 |     "\n",
 931 |     "    nba_nfl = pd.merge(nba,nfl, on='Metropolitan area')\n",
 932 |     "    pval_nba_nfl = stats.ttest_rel(nba_nfl['W/L_x'],nba_nfl['W/L_y'])[1]\n",
 933 |     "    nba_nhl = pd.merge(nba,nhl, on='Metropolitan area')\n",
 934 |     "    pval_nba_nhl = stats.ttest_rel(nba_nhl['W/L_x'],nba_nhl['W/L_y'])[1]\n",
 935 |     "    mlb_nfl = pd.merge(mlb,nfl, on='Metropolitan area')\n",
 936 |     "    pval_mlb_nfl = stats.ttest_rel(mlb_nfl['W/L_x'],mlb_nfl['W/L_y'])[1]\n",
 937 |     "    mlb_nhl = pd.merge(mlb,nhl, on='Metropolitan area')\n",
 938 |     "    pval_mlb_nhl = stats.ttest_rel(mlb_nhl['W/L_x'],mlb_nhl['W/L_y'])[1]\n",
 939 |     "    mlb_nba = pd.merge(mlb,nba, on='Metropolitan area')\n",
 940 |     "    pval_mlb_nba = stats.ttest_rel(mlb_nba['W/L_x'],mlb_nba['W/L_y'])[1]\n",
 941 |     "    nhl_nfl = pd.merge(nhl,nfl, on='Metropolitan area')\n",
 942 |     "    pval_nhl_nfl = stats.ttest_rel(nhl_nfl['W/L_x'],nhl_nfl['W/L_y'])[1]\n",
 943 |     "    \n",
 944 |     "    pv = {'NFL': {\"NFL\": np.nan, 'NBA': pval_nba_nfl, 'NHL': pval_nhl_nfl, 'MLB': pval_mlb_nfl},\n",
 945 |     "       'NBA': {\"NFL\": pval_nba_nfl, 'NBA': np.nan, 'NHL': pval_nba_nhl, 'MLB': pval_mlb_nba},\n",
 946 |     "       'NHL': {\"NFL\": pval_nhl_nfl, 'NBA': pval_nba_nhl, 'NHL': np.nan, 'MLB': pval_mlb_nhl},\n",
 947 |     "       'MLB': {\"NFL\": pval_mlb_nfl, 'NBA': pval_mlb_nba, 'NHL': pval_mlb_nhl, 'MLB': np.nan}\n",
 948 |     "      }\n",
 949 |     "\n",
 950 |     "    \n",
 951 |     "    # Note: p_values is a full dataframe, so df.loc[\"NFL\",\"NBA\"] should be the same as df.loc[\"NBA\",\"NFL\"] and\n",
 952 |     "    # df.loc[\"NFL\",\"NFL\"] should return np.nan\n",
 953 |     "    #sports = ['NFL', 'NBA', 'NHL', 'MLB']\n",
 954 |     "    #p_values = pd.DataFrame({k:np.nan for k in sports}, index=sports)\n",
 955 |     "    p_values = pd.DataFrame(pv)\n",
 956 |     "    \n",
 957 |     "    assert abs(p_values.loc[\"NBA\", \"NHL\"] - 0.02) <= 1e-2, \"The NBA-NHL p-value should be around 0.02\"\n",
 958 |     "    assert abs(p_values.loc[\"MLB\", \"NFL\"] - 0.80) <= 1e-2, \"The MLB-NFL p-value should be around 0.80\"\n",
 959 |     "    return p_values"
 960 |    ]
 961 |   },
 962 |   {
 963 |    "cell_type": "code",
 964 |    "execution_count": null,
 965 |    "metadata": {
 966 |     "deletable": false,
 967 |     "editable": false,
 968 |     "nbgrader": {
 969 |      "checksum": "2a596ab421a45cc01168d10e8fbb8f89",
 970 |      "grade": true,
 971 |      "grade_id": "cell-fb4b9cb5ff4570a6",
 972 |      "locked": true,
 973 |      "points": 20,
 974 |      "schema_version": 1,
 975 |      "solution": false
 976 |     }
 977 |    },
 978 |    "outputs": [],
 979 |    "source": []
 980 |   }
 981 |  ],
 982 |  "metadata": {
 983 |   "coursera": {
 984 |    "schema_names": [
 985 |     "mooc_adswpy_1_v2_assignment4"
 986 |    ]
 987 |   },
 988 |   "kernelspec": {
 989 |    "display_name": "Python 3",
 990 |    "language": "python",
 991 |    "name": "python3"
 992 |   },
 993 |   "language_info": {
 994 |    "codemirror_mode": {
 995 |     "name": "ipython",
 996 |     "version": 3
 997 |    },
 998 |    "file_extension": ".py",
 999 |    "mimetype": "text/x-python",
1000 |    "name": "python",
1001 |    "nbconvert_exporter": "python",
1002 |    "pygments_lexer": "ipython3",
1003 |    "version": "3.7.3"
1004 |   }
1005 |  },
1006 |  "nbformat": 4,
1007 |  "nbformat_minor": 4
1008 | }
1009 | 


--------------------------------------------------------------------------------
/Assignment_3/assignment3.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {
   6 |     "deletable": false,
   7 |     "editable": false,
   8 |     "nbgrader": {
   9 |      "checksum": "ab80976c194c2c1bfebb7f3a12fc4e58",
  10 |      "grade": false,
  11 |      "grade_id": "cell-018440ed2f1b6a62",
  12 |      "locked": true,
  13 |      "schema_version": 1,
  14 |      "solution": false
  15 |     }
  16 |    },
  17 |    "source": [
  18 |     "# Assignment 3\n",
  19 |     "All questions are weighted the same in this assignment. This assignment requires more individual learning then the last one did - you are encouraged to check out the [pandas documentation](http://pandas.pydata.org/pandas-docs/stable/) to find functions or methods you might not have used yet, or ask questions on [Stack Overflow](http://stackoverflow.com/) and tag them as pandas and python related. All questions are worth the same number of points except question 1 which is worth 17% of the assignment grade.\n",
  20 |     "\n",
  21 |     "**Note**: Questions 2-13 rely on your question 1 answer."
  22 |    ]
  23 |   },
  24 |   {
  25 |    "cell_type": "code",
  26 |    "execution_count": 1,
  27 |    "metadata": {},
  28 |    "outputs": [],
  29 |    "source": [
  30 |     "import pandas as pd\n",
  31 |     "import numpy as np\n",
  32 |     "import re\n",
  33 |     "\n",
  34 |     "# Filter all warnings. If you would like to see the warnings, please comment the two lines below.\n",
  35 |     "import warnings\n",
  36 |     "warnings.filterwarnings('ignore')"
  37 |    ]
  38 |   },
  39 |   {
  40 |    "cell_type": "markdown",
  41 |    "metadata": {
  42 |     "deletable": false,
  43 |     "editable": false,
  44 |     "nbgrader": {
  45 |      "checksum": "68063b8b0783f3d8122b516e0cce5f45",
  46 |      "grade": false,
  47 |      "grade_id": "cell-7e5190c7ff1f2e42",
  48 |      "locked": true,
  49 |      "schema_version": 1,
  50 |      "solution": false
  51 |     }
  52 |    },
  53 |    "source": [
  54 |     "### Question 1\n",
  55 |     "Load the energy data from the file `assets/Energy Indicators.xls`, which is a list of indicators of [energy supply and renewable electricity production](assets/Energy%20Indicators.xls) from the [United Nations](http://unstats.un.org/unsd/environment/excel_file_tables/2013/Energy%20Indicators.xls) for the year 2013, and should be put into a DataFrame with the variable name of **Energy**.\n",
  56 |     "\n",
  57 |     "Keep in mind that this is an Excel file, and not a comma separated values file. Also, make sure to exclude the footer and header information from the datafile. The first two columns are unneccessary, so you should get rid of them, and you should change the column labels so that the columns are:\n",
  58 |     "\n",
  59 |     "`['Country', 'Energy Supply', 'Energy Supply per Capita', '% Renewable]`\n",
  60 |     "\n",
  61 |     "Convert `Energy Supply` to gigajoules (**Note: there are 1,000,000 gigajoules in a petajoule**). For all countries which have missing data (e.g. data with \"...\") make sure this is reflected as `np.NaN` values.\n",
  62 |     "\n",
  63 |     "Rename the following list of countries (for use in later questions):\n",
  64 |     "\n",
  65 |     "```\"Republic of Korea\": \"South Korea\",\n",
  66 |     "\"United States of America\": \"United States\",\n",
  67 |     "\"United Kingdom of Great Britain and Northern Ireland\": \"United Kingdom\",\n",
  68 |     "\"China, Hong Kong Special Administrative Region\": \"Hong Kong\"```\n",
  69 |     "\n",
  70 |     "There are also several countries with numbers and/or parenthesis in their name. Be sure to remove these, e.g. `'Bolivia (Plurinational State of)'` should be `'Bolivia'`.  `'Switzerland17'` should be `'Switzerland'`.\n",
  71 |     "\n",
  72 |     "Next, load the GDP data from the file `assets/world_bank.csv`, which is a csv containing countries' GDP from 1960 to 2015 from [World Bank](http://data.worldbank.org/indicator/NY.GDP.MKTP.CD). Call this DataFrame **GDP**. \n",
  73 |     "\n",
  74 |     "Make sure to skip the header, and rename the following list of countries:\n",
  75 |     "\n",
  76 |     "```\"Korea, Rep.\": \"South Korea\", \n",
  77 |     "\"Iran, Islamic Rep.\": \"Iran\",\n",
  78 |     "\"Hong Kong SAR, China\": \"Hong Kong\"```\n",
  79 |     "\n",
  80 |     "Finally, load the [Sciamgo Journal and Country Rank data for Energy Engineering and Power Technology](http://www.scimagojr.com/countryrank.php?category=2102) from the file `assets/scimagojr-3.xlsx`, which ranks countries based on their journal contributions in the aforementioned area. Call this DataFrame **ScimEn**.\n",
  81 |     "\n",
  82 |     "Join the three datasets: GDP, Energy, and ScimEn into a new dataset (using the intersection of country names). Use only the last 10 years (2006-2015) of GDP data and only the top 15 countries by Scimagojr 'Rank' (Rank 1 through 15). \n",
  83 |     "\n",
  84 |     "The index of this DataFrame should be the name of the country, and the columns should be ['Rank', 'Documents', 'Citable documents', 'Citations', 'Self-citations',\n",
  85 |     "       'Citations per document', 'H index', 'Energy Supply',\n",
  86 |     "       'Energy Supply per Capita', '% Renewable', '2006', '2007', '2008',\n",
  87 |     "       '2009', '2010', '2011', '2012', '2013', '2014', '2015'].\n",
  88 |     "\n",
  89 |     "*This function should return a DataFrame with 20 columns and 15 entries, and the rows of the DataFrame should be sorted by \"Rank\".*"
  90 |    ]
  91 |   },
  92 |   {
  93 |    "cell_type": "code",
  94 |    "execution_count": 2,
  95 |    "metadata": {
  96 |     "deletable": false,
  97 |     "nbgrader": {
  98 |      "checksum": "57e040f07954f979910eddc0f489ffe5",
  99 |      "grade": false,
 100 |      "grade_id": "cell-bce4d6f2ecdd1297",
 101 |      "locked": false,
 102 |      "schema_version": 1,
 103 |      "solution": true
 104 |     }
 105 |    },
 106 |    "outputs": [],
 107 |    "source": [
 108 |     "def answer_one():\n",
 109 |     "    # YOUR CODE HERE\n",
 110 |     "    Energy = pd.read_excel(\"assets/Energy Indicators.xls\")\n",
 111 |     "    Energy.drop(columns=['Unnamed: 0', 'Unnamed: 1'],inplace=True)\n",
 112 |     "    Energy.drop(Energy.index[0:17],0,inplace=True)\n",
 113 |     "    Energy.drop(Energy.index[227:],0,inplace=True)\n",
 114 |     "    Energy.rename(columns={'Unnamed: 2': 'Country', 'Unnamed: 3': 'Energy Supply', 'Unnamed: 4': 'Energy Supply per Capita', 'Unnamed: 5': '% Renewable' }, inplace=True )\n",
 115 |     "    Energy.replace({'...':np.nan}, inplace= True)\n",
 116 |     "    Energy['Energy Supply'] = Energy['Energy Supply']*1000000\n",
 117 |     "    \n",
 118 |     "    l= []\n",
 119 |     "    for i in Energy['Country']:\n",
 120 |     "        i=i.split(' (')\n",
 121 |     "        l.append(i[0])\n",
 122 |     "    Energy['Country'] = l\n",
 123 |     "    \n",
 124 |     "    li = []\n",
 125 |     "    for i in Energy['Country']:\n",
 126 |     "        i = re.findall(\"[^0-9]+\", i)\n",
 127 |     "        li.append(i[0])\n",
 128 |     "    Energy['Country'] = li\n",
 129 |     "    \n",
 130 |     "    Energy.replace({\"Republic of Korea\": \"South Korea\",\n",
 131 |     "    \"United States of America\": \"United States\",\n",
 132 |     "    \"United Kingdom of Great Britain and Northern Ireland\": \"United Kingdom\",\n",
 133 |     "    \"China, Hong Kong Special Administrative Region\": \"Hong Kong\"}, inplace= True)\n",
 134 |     "    \n",
 135 |     "    GDP = pd.read_csv(\"assets/world_bank.csv\")\n",
 136 |     "    GDP.drop(GDP.index[0:3],0,inplace=True)\n",
 137 |     "    GDP.replace({\"Korea, Rep.\": \"South Korea\", \"Iran, Islamic Rep.\": \"Iran\", \"Hong Kong SAR, China\": \"Hong Kong\"}, inplace=True)\n",
 138 |     "    \n",
 139 |     "    il = GDP.iloc[0]\n",
 140 |     "    di = {}\n",
 141 |     "    i = 0\n",
 142 |     "    for d in GDP.columns:\n",
 143 |     "        if type(il[i]) == np.float64:\n",
 144 |     "            di[d] = str(int(il[i]))\n",
 145 |     "        else:\n",
 146 |     "            di[d] = il[i]\n",
 147 |     "        i += 1\n",
 148 |     "    \n",
 149 |     "    GDP.rename(columns=di, inplace=True)\n",
 150 |     "    GDP.drop(GDP.index[0:1],0,inplace=True)\n",
 151 |     "    GDP.rename(columns={'Country Name': 'Country'}, inplace=True)\n",
 152 |     "    \n",
 153 |     "    ScimEn = pd.read_excel(\"assets/scimagojr-3.xlsx\")\n",
 154 |     "    \n",
 155 |     "    j1 = pd.merge(ScimEn,Energy)\n",
 156 |     "    j2 = pd.merge(j1, GDP)\n",
 157 |     "    j2.set_index('Country', inplace = True)\n",
 158 |     "    j2 = j2[0:15]\n",
 159 |     "    j2.drop(j2.columns[[np.arange(10,59)]], axis='columns', inplace = True)\n",
 160 |     "    \n",
 161 |     "    return j2\n",
 162 |     "    \n",
 163 |     "    #raise NotImplementedError()"
 164 |    ]
 165 |   },
 166 |   {
 167 |    "cell_type": "code",
 168 |    "execution_count": 3,
 169 |    "metadata": {
 170 |     "deletable": false,
 171 |     "editable": false,
 172 |     "nbgrader": {
 173 |      "checksum": "7bcc18b325d2935427ac2566cddd3661",
 174 |      "grade": true,
 175 |      "grade_id": "cell-780b5a4da845dbc3",
 176 |      "locked": true,
 177 |      "points": 5,
 178 |      "schema_version": 1,
 179 |      "solution": false
 180 |     }
 181 |    },
 182 |    "outputs": [],
 183 |    "source": [
 184 |     "assert type(answer_one()) == pd.DataFrame, \"Q1: You should return a DataFrame!\"\n",
 185 |     "\n",
 186 |     "assert answer_one().shape == (15,20), \"Q1: Your DataFrame should have 20 columns and 15 entries!\"\n"
 187 |    ]
 188 |   },
 189 |   {
 190 |    "cell_type": "code",
 191 |    "execution_count": 4,
 192 |    "metadata": {
 193 |     "deletable": false,
 194 |     "editable": false,
 195 |     "nbgrader": {
 196 |      "checksum": "e241830bcf3f63326b4c9cdf50be8f86",
 197 |      "grade": true,
 198 |      "grade_id": "cell-74b5f0b971379f64",
 199 |      "locked": true,
 200 |      "points": 10,
 201 |      "schema_version": 1,
 202 |      "solution": false
 203 |     }
 204 |    },
 205 |    "outputs": [],
 206 |    "source": [
 207 |     "# Cell for autograder.\n"
 208 |    ]
 209 |   },
 210 |   {
 211 |    "cell_type": "markdown",
 212 |    "metadata": {
 213 |     "deletable": false,
 214 |     "editable": false,
 215 |     "nbgrader": {
 216 |      "checksum": "596280cd22ed98c5540580c62954ec2f",
 217 |      "grade": false,
 218 |      "grade_id": "cell-babe0ff2a1fc6b17",
 219 |      "locked": true,
 220 |      "schema_version": 1,
 221 |      "solution": false
 222 |     }
 223 |    },
 224 |    "source": [
 225 |     "### Question 2\n",
 226 |     "The previous question joined three datasets then reduced this to just the top 15 entries. When you joined the datasets, but before you reduced this to the top 15 items, how many entries did you lose?\n",
 227 |     "\n",
 228 |     "*This function should return a single number.*"
 229 |    ]
 230 |   },
 231 |   {
 232 |    "cell_type": "code",
 233 |    "execution_count": 5,
 234 |    "metadata": {
 235 |     "deletable": false,
 236 |     "editable": false,
 237 |     "nbgrader": {
 238 |      "checksum": "c9a34da81c80126fd23ae2eac33f98f8",
 239 |      "grade": false,
 240 |      "grade_id": "cell-96f84e7b693bef63",
 241 |      "locked": true,
 242 |      "schema_version": 1,
 243 |      "solution": false
 244 |     }
 245 |    },
 246 |    "outputs": [
 247 |     {
 248 |      "data": {
 249 |       "text/html": [
 250 |        "<svg width=\"800\" height=\"300\">\n",
 251 |        "  <circle cx=\"150\" cy=\"180\" r=\"80\" fill-opacity=\"0.2\" stroke=\"black\" stroke-width=\"2\" fill=\"blue\" />\n",
 252 |        "  <circle cx=\"200\" cy=\"100\" r=\"80\" fill-opacity=\"0.2\" stroke=\"black\" stroke-width=\"2\" fill=\"red\" />\n",
 253 |        "  <circle cx=\"100\" cy=\"100\" r=\"80\" fill-opacity=\"0.2\" stroke=\"black\" stroke-width=\"2\" fill=\"green\" />\n",
 254 |        "  <line x1=\"150\" y1=\"125\" x2=\"300\" y2=\"150\" stroke=\"black\" stroke-width=\"2\" fill=\"black\" stroke-dasharray=\"5,3\"/>\n",
 255 |        "  <text x=\"300\" y=\"165\" font-family=\"Verdana\" font-size=\"35\">Everything but this!</text>\n",
 256 |        "</svg>\n"
 257 |       ],
 258 |       "text/plain": [
 259 |        "<IPython.core.display.HTML object>"
 260 |       ]
 261 |      },
 262 |      "metadata": {},
 263 |      "output_type": "display_data"
 264 |     }
 265 |    ],
 266 |    "source": [
 267 |     "%%HTML\n",
 268 |     "<svg width=\"800\" height=\"300\">\n",
 269 |     "  <circle cx=\"150\" cy=\"180\" r=\"80\" fill-opacity=\"0.2\" stroke=\"black\" stroke-width=\"2\" fill=\"blue\" />\n",
 270 |     "  <circle cx=\"200\" cy=\"100\" r=\"80\" fill-opacity=\"0.2\" stroke=\"black\" stroke-width=\"2\" fill=\"red\" />\n",
 271 |     "  <circle cx=\"100\" cy=\"100\" r=\"80\" fill-opacity=\"0.2\" stroke=\"black\" stroke-width=\"2\" fill=\"green\" />\n",
 272 |     "  <line x1=\"150\" y1=\"125\" x2=\"300\" y2=\"150\" stroke=\"black\" stroke-width=\"2\" fill=\"black\" stroke-dasharray=\"5,3\"/>\n",
 273 |     "  <text x=\"300\" y=\"165\" font-family=\"Verdana\" font-size=\"35\">Everything but this!</text>\n",
 274 |     "</svg>"
 275 |    ]
 276 |   },
 277 |   {
 278 |    "cell_type": "code",
 279 |    "execution_count": 6,
 280 |    "metadata": {
 281 |     "deletable": false,
 282 |     "nbgrader": {
 283 |      "checksum": "aeeb01fb73054269dd7b818d0945e2f7",
 284 |      "grade": false,
 285 |      "grade_id": "cell-b0c3202c000aada4",
 286 |      "locked": false,
 287 |      "schema_version": 1,
 288 |      "solution": true
 289 |     }
 290 |    },
 291 |    "outputs": [],
 292 |    "source": [
 293 |     "def answer_two():\n",
 294 |     "    # YOUR CODE HERE\n",
 295 |     "    \n",
 296 |     "    Energy = pd.read_excel(\"assets/Energy Indicators.xls\")\n",
 297 |     "    Energy.drop(columns=['Unnamed: 0', 'Unnamed: 1'],inplace=True)\n",
 298 |     "    Energy.drop(Energy.index[0:17],0,inplace=True)\n",
 299 |     "    Energy.drop(Energy.index[227:],0,inplace=True)\n",
 300 |     "    Energy.rename(columns={'Unnamed: 2': 'Country', 'Unnamed: 3': 'Energy Supply', 'Unnamed: 4': 'Energy Supply per Capita', 'Unnamed: 5': '% Renewable' }, inplace=True )\n",
 301 |     "    Energy.replace({'...':np.nan}, inplace= True)\n",
 302 |     "    Energy['Energy Supply'] = Energy['Energy Supply']*1000000\n",
 303 |     "    \n",
 304 |     "    l= []\n",
 305 |     "    for i in Energy['Country']:\n",
 306 |     "        i=i.split(' (')\n",
 307 |     "        l.append(i[0])\n",
 308 |     "    Energy['Country'] = l\n",
 309 |     "    \n",
 310 |     "    li = []\n",
 311 |     "    for i in Energy['Country']:\n",
 312 |     "        i = re.findall(\"[^0-9]+\", i)\n",
 313 |     "        li.append(i[0])\n",
 314 |     "    Energy['Country'] = li\n",
 315 |     "    \n",
 316 |     "    Energy.replace({\"Republic of Korea\": \"South Korea\",\n",
 317 |     "    \"United States of America\": \"United States\",\n",
 318 |     "    \"United Kingdom of Great Britain and Northern Ireland\": \"United Kingdom\",\n",
 319 |     "    \"China, Hong Kong Special Administrative Region\": \"Hong Kong\"}, inplace= True)\n",
 320 |     "    \n",
 321 |     "    GDP = pd.read_csv(\"assets/world_bank.csv\")\n",
 322 |     "    GDP.drop(GDP.index[0:3],0,inplace=True)\n",
 323 |     "    GDP.replace({\"Korea, Rep.\": \"South Korea\", \"Iran, Islamic Rep.\": \"Iran\", \"Hong Kong SAR, China\": \"Hong Kong\"}, inplace=True)\n",
 324 |     "    \n",
 325 |     "    il = GDP.iloc[0]\n",
 326 |     "    di = {}\n",
 327 |     "    i = 0\n",
 328 |     "    for d in GDP.columns:\n",
 329 |     "        if type(il[i]) == np.float64:\n",
 330 |     "            di[d] = str(int(il[i]))\n",
 331 |     "        else:\n",
 332 |     "            di[d] = il[i]\n",
 333 |     "        i += 1\n",
 334 |     "    \n",
 335 |     "    GDP.rename(columns=di, inplace=True)\n",
 336 |     "    GDP.drop(GDP.index[0:1],0,inplace=True)\n",
 337 |     "    GDP.rename(columns={'Country Name': 'Country'}, inplace=True)\n",
 338 |     "    \n",
 339 |     "    ScimEn = pd.read_excel(\"assets/scimagojr-3.xlsx\")\n",
 340 |     "    \n",
 341 |     "    ji = pd.merge(ScimEn,Energy)\n",
 342 |     "    ji = pd.merge(ji, GDP)\n",
 343 |     "    ji.set_index('Country', inplace = True)\n",
 344 |     "    \n",
 345 |     "    j1 = pd.merge(ScimEn,Energy, how=\"outer\")\n",
 346 |     "    j2 = pd.merge(j1, GDP, how=\"outer\")\n",
 347 |     "    j2.set_index('Country', inplace = True)\n",
 348 |     "    \n",
 349 |     "    diff = j2.shape[0] - ji.shape[0]\n",
 350 |     "    \n",
 351 |     "    return  diff\n",
 352 |     "    #raise NotImplementedError()"
 353 |    ]
 354 |   },
 355 |   {
 356 |    "cell_type": "code",
 357 |    "execution_count": 7,
 358 |    "metadata": {
 359 |     "deletable": false,
 360 |     "editable": false,
 361 |     "nbgrader": {
 362 |      "checksum": "19759b4a6c03f34b647f66d343952619",
 363 |      "grade": true,
 364 |      "grade_id": "cell-be24cfcaa87ab071",
 365 |      "locked": true,
 366 |      "points": 6.66,
 367 |      "schema_version": 1,
 368 |      "solution": false
 369 |     }
 370 |    },
 371 |    "outputs": [],
 372 |    "source": [
 373 |     "assert type(answer_two()) == int, \"Q2: You should return an int number!\"\n"
 374 |    ]
 375 |   },
 376 |   {
 377 |    "cell_type": "markdown",
 378 |    "metadata": {
 379 |     "deletable": false,
 380 |     "editable": false,
 381 |     "nbgrader": {
 382 |      "checksum": "5af1b4f99cd383263130f4c00442a133",
 383 |      "grade": false,
 384 |      "grade_id": "cell-2e54816014e48c18",
 385 |      "locked": true,
 386 |      "schema_version": 1,
 387 |      "solution": false
 388 |     }
 389 |    },
 390 |    "source": [
 391 |     "### Question 3\n",
 392 |     "What are the top 15 countries for average GDP over the last 10 years?\n",
 393 |     "\n",
 394 |     "*This function should return a Series named `avgGDP` with 15 countries and their average GDP sorted in descending order.*"
 395 |    ]
 396 |   },
 397 |   {
 398 |    "cell_type": "code",
 399 |    "execution_count": 8,
 400 |    "metadata": {
 401 |     "deletable": false,
 402 |     "nbgrader": {
 403 |      "checksum": "a3490fd71a46cecfa3da698e006fe729",
 404 |      "grade": false,
 405 |      "grade_id": "cell-8c3d74335c0d489a",
 406 |      "locked": false,
 407 |      "schema_version": 1,
 408 |      "solution": true
 409 |     }
 410 |    },
 411 |    "outputs": [],
 412 |    "source": [
 413 |     "def answer_three():\n",
 414 |     "    # YOUR CODE HERE\n",
 415 |     "    ng = np.arange(10,20)\n",
 416 |     "    dat = answer_one().columns[[ng]]\n",
 417 |     "    avgGDP = answer_one()[dat].mean(axis=1).sort_values(ascending=False)\n",
 418 |     "    \n",
 419 |     "    \n",
 420 |     "    return avgGDP\n",
 421 |     "    #raise NotImplementedError()"
 422 |    ]
 423 |   },
 424 |   {
 425 |    "cell_type": "code",
 426 |    "execution_count": 9,
 427 |    "metadata": {
 428 |     "deletable": false,
 429 |     "editable": false,
 430 |     "nbgrader": {
 431 |      "checksum": "2f9c90ee07138f94c027c5d2f907ab13",
 432 |      "grade": true,
 433 |      "grade_id": "cell-aaaa11ef7d26f4cf",
 434 |      "locked": true,
 435 |      "points": 6.66,
 436 |      "schema_version": 1,
 437 |      "solution": false
 438 |     }
 439 |    },
 440 |    "outputs": [],
 441 |    "source": [
 442 |     "assert type(answer_three()) == pd.Series, \"Q3: You should return a Series!\"\n"
 443 |    ]
 444 |   },
 445 |   {
 446 |    "cell_type": "markdown",
 447 |    "metadata": {
 448 |     "deletable": false,
 449 |     "editable": false,
 450 |     "locked": true
 451 |    },
 452 |    "source": [
 453 |     "### Question 4\n",
 454 |     "By how much had the GDP changed over the 10 year span for the country with the 6th largest average GDP?\n",
 455 |     "\n",
 456 |     "*This function should return a single number.*"
 457 |    ]
 458 |   },
 459 |   {
 460 |    "cell_type": "code",
 461 |    "execution_count": 30,
 462 |    "metadata": {
 463 |     "deletable": false,
 464 |     "nbgrader": {
 465 |      "checksum": "768a19bcc8adc6991fe5c757e95ba784",
 466 |      "grade": false,
 467 |      "grade_id": "cell-7f77d099e3e0bbee",
 468 |      "locked": false,
 469 |      "schema_version": 1,
 470 |      "solution": true
 471 |     }
 472 |    },
 473 |    "outputs": [],
 474 |    "source": [
 475 |     "def answer_four():\n",
 476 |     "    # YOUR CODE HERE\n",
 477 |     "    \n",
 478 |     "    pg = answer_one().loc['United Kingdom', ['2006']]['2006']\n",
 479 |     "    dg = answer_one().loc['United Kingdom', ['2015']]['2015']\n",
 480 |     "    dkd = dg - pg\n",
 481 |     "    \n",
 482 |     "    return dkd\n",
 483 |     "    #raise NotImplementedError()"
 484 |    ]
 485 |   },
 486 |   {
 487 |    "cell_type": "code",
 488 |    "execution_count": 11,
 489 |    "metadata": {
 490 |     "deletable": false,
 491 |     "editable": false,
 492 |     "nbgrader": {
 493 |      "checksum": "a7770c49cdfac4fa6368dfe8b39e6474",
 494 |      "grade": true,
 495 |      "grade_id": "cell-564dd8e5e24b0f83",
 496 |      "locked": true,
 497 |      "points": 6.66,
 498 |      "schema_version": 1,
 499 |      "solution": false
 500 |     }
 501 |    },
 502 |    "outputs": [],
 503 |    "source": [
 504 |     "# Cell for autograder.\n"
 505 |    ]
 506 |   },
 507 |   {
 508 |    "cell_type": "markdown",
 509 |    "metadata": {
 510 |     "deletable": false,
 511 |     "editable": false,
 512 |     "nbgrader": {
 513 |      "checksum": "ed6dbc94ff1b6268873413fee12741cd",
 514 |      "grade": false,
 515 |      "grade_id": "cell-617669111e38ca15",
 516 |      "locked": true,
 517 |      "schema_version": 1,
 518 |      "solution": false
 519 |     }
 520 |    },
 521 |    "source": [
 522 |     "### Question 5\n",
 523 |     "What is the mean energy supply per capita?\n",
 524 |     "\n",
 525 |     "*This function should return a single number.*"
 526 |    ]
 527 |   },
 528 |   {
 529 |    "cell_type": "code",
 530 |    "execution_count": 12,
 531 |    "metadata": {
 532 |     "deletable": false,
 533 |     "nbgrader": {
 534 |      "checksum": "cfd61a1735889e7ef20692ca0d28ddcb",
 535 |      "grade": false,
 536 |      "grade_id": "cell-58e79d558e982eef",
 537 |      "locked": false,
 538 |      "schema_version": 1,
 539 |      "solution": true
 540 |     }
 541 |    },
 542 |    "outputs": [],
 543 |    "source": [
 544 |     "def answer_five():\n",
 545 |     "    # YOUR CODE HERE\n",
 546 |     "    mpc = answer_one()['Energy Supply per Capita'].mean()\n",
 547 |     "    \n",
 548 |     "    return mpc\n",
 549 |     "    #raise NotImplementedError()"
 550 |    ]
 551 |   },
 552 |   {
 553 |    "cell_type": "code",
 554 |    "execution_count": 13,
 555 |    "metadata": {
 556 |     "deletable": false,
 557 |     "editable": false,
 558 |     "nbgrader": {
 559 |      "checksum": "9d61bf22656baeecc77f63d54448590e",
 560 |      "grade": true,
 561 |      "grade_id": "cell-30cc66180851638c",
 562 |      "locked": true,
 563 |      "points": 6.66,
 564 |      "schema_version": 1,
 565 |      "solution": false
 566 |     }
 567 |    },
 568 |    "outputs": [],
 569 |    "source": [
 570 |     "# Cell for autograder.\n"
 571 |    ]
 572 |   },
 573 |   {
 574 |    "cell_type": "markdown",
 575 |    "metadata": {
 576 |     "deletable": false,
 577 |     "editable": false,
 578 |     "nbgrader": {
 579 |      "checksum": "2c7a163ae96f56317756456b0d9d695b",
 580 |      "grade": false,
 581 |      "grade_id": "cell-5c11ddd12fd71b3f",
 582 |      "locked": true,
 583 |      "schema_version": 1,
 584 |      "solution": false
 585 |     }
 586 |    },
 587 |    "source": [
 588 |     "### Question 6\n",
 589 |     "What country has the maximum % Renewable and what is the percentage?\n",
 590 |     "\n",
 591 |     "*This function should return a tuple with the name of the country and the percentage.*"
 592 |    ]
 593 |   },
 594 |   {
 595 |    "cell_type": "code",
 596 |    "execution_count": 14,
 597 |    "metadata": {
 598 |     "deletable": false,
 599 |     "nbgrader": {
 600 |      "checksum": "f8657f18c77eb0f752bca3cc48561da3",
 601 |      "grade": false,
 602 |      "grade_id": "cell-b6824b78e74619f9",
 603 |      "locked": false,
 604 |      "schema_version": 1,
 605 |      "solution": true
 606 |     }
 607 |    },
 608 |    "outputs": [],
 609 |    "source": [
 610 |     "def answer_six():\n",
 611 |     "    # YOUR CODE HERE\n",
 612 |     "    max_ren = answer_one()['% Renewable'].max()\n",
 613 |     "    ind = answer_one().index[answer_one()['% Renewable'] == max_ren][0]\n",
 614 |     "    return ind, max_ren\n",
 615 |     "    #raise NotImplementedError()"
 616 |    ]
 617 |   },
 618 |   {
 619 |    "cell_type": "code",
 620 |    "execution_count": 15,
 621 |    "metadata": {
 622 |     "deletable": false,
 623 |     "editable": false,
 624 |     "nbgrader": {
 625 |      "checksum": "f8b28b0a824a3b76a6244c1273648ccd",
 626 |      "grade": true,
 627 |      "grade_id": "cell-2bd201c5c7bdd80f",
 628 |      "locked": true,
 629 |      "points": 6.66,
 630 |      "schema_version": 1,
 631 |      "solution": false
 632 |     }
 633 |    },
 634 |    "outputs": [],
 635 |    "source": [
 636 |     "assert type(answer_six()) == tuple, \"Q6: You should return a tuple!\"\n",
 637 |     "\n",
 638 |     "assert type(answer_six()[0]) == str, \"Q6: The first element in your result should be the name of the country!\"\n"
 639 |    ]
 640 |   },
 641 |   {
 642 |    "cell_type": "markdown",
 643 |    "metadata": {
 644 |     "deletable": false,
 645 |     "editable": false,
 646 |     "nbgrader": {
 647 |      "checksum": "a7b561a486a28ee4ba80a40715617c6d",
 648 |      "grade": false,
 649 |      "grade_id": "cell-ddf52a85ad3d5a11",
 650 |      "locked": true,
 651 |      "schema_version": 1,
 652 |      "solution": false
 653 |     }
 654 |    },
 655 |    "source": [
 656 |     "### Question 7\n",
 657 |     "Create a new column that is the ratio of Self-Citations to Total Citations. \n",
 658 |     "What is the maximum value for this new column, and what country has the highest ratio?\n",
 659 |     "\n",
 660 |     "*This function should return a tuple with the name of the country and the ratio.*"
 661 |    ]
 662 |   },
 663 |   {
 664 |    "cell_type": "code",
 665 |    "execution_count": 16,
 666 |    "metadata": {
 667 |     "deletable": false,
 668 |     "nbgrader": {
 669 |      "checksum": "e4b1cc5e3deefd24be992fbee18d0e74",
 670 |      "grade": false,
 671 |      "grade_id": "cell-a4f39737f38aa53c",
 672 |      "locked": false,
 673 |      "schema_version": 1,
 674 |      "solution": true
 675 |     }
 676 |    },
 677 |    "outputs": [],
 678 |    "source": [
 679 |     "def answer_seven():\n",
 680 |     "    # YOUR CODE HERE\n",
 681 |     "    new_df = answer_one().assign(ratio = answer_one()['Self-citations']/answer_one()['Citations'])\n",
 682 |     "    max_ra = new_df['ratio'].max()\n",
 683 |     "    con = new_df.index[new_df['ratio'] == max_ra][0]\n",
 684 |     "    return con, max_ra\n",
 685 |     "    #raise NotImplementedError()"
 686 |    ]
 687 |   },
 688 |   {
 689 |    "cell_type": "code",
 690 |    "execution_count": 17,
 691 |    "metadata": {
 692 |     "deletable": false,
 693 |     "editable": false,
 694 |     "nbgrader": {
 695 |      "checksum": "ca448b3a16b65a3a08533cac736cc4d9",
 696 |      "grade": true,
 697 |      "grade_id": "cell-b7a163e9231b88c9",
 698 |      "locked": true,
 699 |      "points": 6.66,
 700 |      "schema_version": 1,
 701 |      "solution": false
 702 |     }
 703 |    },
 704 |    "outputs": [],
 705 |    "source": [
 706 |     "assert type(answer_seven()) == tuple, \"Q7: You should return a tuple!\"\n",
 707 |     "\n",
 708 |     "assert type(answer_seven()[0]) == str, \"Q7: The first element in your result should be the name of the country!\"\n"
 709 |    ]
 710 |   },
 711 |   {
 712 |    "cell_type": "markdown",
 713 |    "metadata": {
 714 |     "deletable": false,
 715 |     "editable": false,
 716 |     "nbgrader": {
 717 |      "checksum": "7be7b86ee7467539dd88746818c78c0e",
 718 |      "grade": false,
 719 |      "grade_id": "cell-5c89296ab6f94218",
 720 |      "locked": true,
 721 |      "schema_version": 1,
 722 |      "solution": false
 723 |     }
 724 |    },
 725 |    "source": [
 726 |     "### Question 8\n",
 727 |     "\n",
 728 |     "Create a column that estimates the population using Energy Supply and Energy Supply per capita. \n",
 729 |     "What is the third most populous country according to this estimate?\n",
 730 |     "\n",
 731 |     "*This function should return the name of the country*"
 732 |    ]
 733 |   },
 734 |   {
 735 |    "cell_type": "code",
 736 |    "execution_count": 18,
 737 |    "metadata": {
 738 |     "deletable": false,
 739 |     "nbgrader": {
 740 |      "checksum": "9d733b2abf089b1931e2e792ff51d488",
 741 |      "grade": false,
 742 |      "grade_id": "cell-9ca58137846b84d6",
 743 |      "locked": false,
 744 |      "schema_version": 1,
 745 |      "solution": true
 746 |     }
 747 |    },
 748 |    "outputs": [],
 749 |    "source": [
 750 |     "def answer_eight():\n",
 751 |     "    # YOUR CODE HERE\n",
 752 |     "    Top15 = answer_one()\n",
 753 |     "    Top15['pop'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']\n",
 754 |     "    dpop = Top15['pop'].sort_values(ascending=False)[2]\n",
 755 |     "    py = Top15.index[Top15['pop'] == dpop][0]\n",
 756 |     "    return py\n",
 757 |     "    #raise NotImplementedError()"
 758 |    ]
 759 |   },
 760 |   {
 761 |    "cell_type": "code",
 762 |    "execution_count": 19,
 763 |    "metadata": {
 764 |     "deletable": false,
 765 |     "editable": false,
 766 |     "nbgrader": {
 767 |      "checksum": "ba2ad50cf8198767b0bd2f75b8d97e87",
 768 |      "grade": true,
 769 |      "grade_id": "cell-3f3620c88df08b20",
 770 |      "locked": true,
 771 |      "points": 0,
 772 |      "schema_version": 1,
 773 |      "solution": false
 774 |     }
 775 |    },
 776 |    "outputs": [],
 777 |    "source": [
 778 |     "assert type(answer_eight()) == str, \"Q8: You should return the name of the country!\"\n"
 779 |    ]
 780 |   },
 781 |   {
 782 |    "cell_type": "markdown",
 783 |    "metadata": {
 784 |     "deletable": false,
 785 |     "editable": false,
 786 |     "nbgrader": {
 787 |      "checksum": "164cba98164a1045db7de10dd37115c8",
 788 |      "grade": false,
 789 |      "grade_id": "cell-2065207e66e5ec01",
 790 |      "locked": true,
 791 |      "schema_version": 1,
 792 |      "solution": false
 793 |     }
 794 |    },
 795 |    "source": [
 796 |     "### Question 9\n",
 797 |     "Create a column that estimates the number of citable documents per person. \n",
 798 |     "What is the correlation between the number of citable documents per capita and the energy supply per capita? Use the `.corr()` method, (Pearson's correlation).\n",
 799 |     "\n",
 800 |     "*This function should return a single number.*\n",
 801 |     "\n",
 802 |     "*(Optional: Use the built-in function `plot9()` to visualize the relationship between Energy Supply per Capita vs. Citable docs per Capita)*"
 803 |    ]
 804 |   },
 805 |   {
 806 |    "cell_type": "code",
 807 |    "execution_count": 20,
 808 |    "metadata": {
 809 |     "deletable": false,
 810 |     "nbgrader": {
 811 |      "checksum": "94e06c4c3a9618b94dbb0e86913b546c",
 812 |      "grade": false,
 813 |      "grade_id": "cell-033679ea456bfb9d",
 814 |      "locked": false,
 815 |      "schema_version": 1,
 816 |      "solution": true
 817 |     }
 818 |    },
 819 |    "outputs": [],
 820 |    "source": [
 821 |     "def answer_nine():\n",
 822 |     "    # YOUR CODE HERE\n",
 823 |     "    \n",
 824 |     "    Top15 = answer_one()\n",
 825 |     "    Top15 = Top15.assign(pop = Top15['Energy Supply']/Top15['Energy Supply per Capita'])\n",
 826 |     "    Top15 = Top15.assign(Citable_docs_per_Capita = Top15['Citable documents'] / Top15['pop'])\n",
 827 |     "    corre = Top15['Citable_docs_per_Capita'].corr(Top15['Energy Supply per Capita'])\n",
 828 |     "    return corre\n",
 829 |     "    #raise NotImplementedError()"
 830 |    ]
 831 |   },
 832 |   {
 833 |    "cell_type": "code",
 834 |    "execution_count": 21,
 835 |    "metadata": {
 836 |     "deletable": false,
 837 |     "editable": false,
 838 |     "nbgrader": {
 839 |      "checksum": "01a146bbcca0fa9c9c13e71ab52e710f",
 840 |      "grade": false,
 841 |      "grade_id": "cell-644824f6c708bf80",
 842 |      "locked": true,
 843 |      "schema_version": 1,
 844 |      "solution": false
 845 |     }
 846 |    },
 847 |    "outputs": [],
 848 |    "source": [
 849 |     "def plot9():\n",
 850 |     "    import matplotlib as plt\n",
 851 |     "    %matplotlib inline\n",
 852 |     "    \n",
 853 |     "    Top15 = answer_one()\n",
 854 |     "    Top15['PopEst'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']\n",
 855 |     "    Top15['Citable docs per Capita'] = Top15['Citable documents'] / Top15['PopEst']\n",
 856 |     "    Top15.plot(x='Citable docs per Capita', y='Energy Supply per Capita', kind='scatter', xlim=[0, 0.0006])"
 857 |    ]
 858 |   },
 859 |   {
 860 |    "cell_type": "code",
 861 |    "execution_count": 22,
 862 |    "metadata": {
 863 |     "deletable": false,
 864 |     "editable": false,
 865 |     "nbgrader": {
 866 |      "checksum": "8dced1dde88b6877f89bdec482870476",
 867 |      "grade": true,
 868 |      "grade_id": "cell-3cb5c699065a4a20",
 869 |      "locked": true,
 870 |      "points": 6.66,
 871 |      "schema_version": 1,
 872 |      "solution": false
 873 |     }
 874 |    },
 875 |    "outputs": [],
 876 |    "source": [
 877 |     "assert answer_nine() >= -1. and answer_nine() <= 1., \"Q9: A valid correlation should between -1 to 1!\"\n"
 878 |    ]
 879 |   },
 880 |   {
 881 |    "cell_type": "markdown",
 882 |    "metadata": {
 883 |     "deletable": false,
 884 |     "editable": false,
 885 |     "nbgrader": {
 886 |      "checksum": "8af5ffad89be1e5c6292438724d6f8d5",
 887 |      "grade": false,
 888 |      "grade_id": "cell-ad09765e29b91157",
 889 |      "locked": true,
 890 |      "schema_version": 1,
 891 |      "solution": false
 892 |     }
 893 |    },
 894 |    "source": [
 895 |     "### Question 10\n",
 896 |     "Create a new column with a 1 if the country's % Renewable value is at or above the median for all countries in the top 15, and a 0 if the country's % Renewable value is below the median.\n",
 897 |     "\n",
 898 |     "*This function should return a series named `HighRenew` whose index is the country name sorted in ascending order of rank.*"
 899 |    ]
 900 |   },
 901 |   {
 902 |    "cell_type": "code",
 903 |    "execution_count": 23,
 904 |    "metadata": {
 905 |     "deletable": false,
 906 |     "nbgrader": {
 907 |      "checksum": "340c06bd50a9a027a2190674cfb981b9",
 908 |      "grade": false,
 909 |      "grade_id": "cell-0fdf60e64bf1a4f9",
 910 |      "locked": false,
 911 |      "schema_version": 1,
 912 |      "solution": true
 913 |     }
 914 |    },
 915 |    "outputs": [],
 916 |    "source": [
 917 |     "def answer_ten():\n",
 918 |     "    # YOUR CODE HERE\n",
 919 |     "    \n",
 920 |     "    Top15 = answer_one()\n",
 921 |     "    Top15['HighRenew'] = 1\n",
 922 |     "    j = 0\n",
 923 |     "    for i in Top15['% Renewable']:\n",
 924 |     "        if i >= Top15['% Renewable'].median():\n",
 925 |     "            Top15['HighRenew'].iloc[j] = 1\n",
 926 |     "        else:\n",
 927 |     "            Top15['HighRenew'].iloc[j] = 0\n",
 928 |     "        j+=1\n",
 929 |     "    return Top15['HighRenew']\n",
 930 |     "    #raise NotImplementedError()"
 931 |    ]
 932 |   },
 933 |   {
 934 |    "cell_type": "code",
 935 |    "execution_count": 24,
 936 |    "metadata": {
 937 |     "deletable": false,
 938 |     "editable": false,
 939 |     "nbgrader": {
 940 |      "checksum": "f624e6996eca5796eaf27fb4d0593175",
 941 |      "grade": true,
 942 |      "grade_id": "cell-b29a631fd9a7730f",
 943 |      "locked": true,
 944 |      "points": 6.66,
 945 |      "schema_version": 1,
 946 |      "solution": false
 947 |     }
 948 |    },
 949 |    "outputs": [],
 950 |    "source": [
 951 |     "assert type(answer_ten()) == pd.Series, \"Q10: You should return a Series!\"\n"
 952 |    ]
 953 |   },
 954 |   {
 955 |    "cell_type": "markdown",
 956 |    "metadata": {
 957 |     "deletable": false,
 958 |     "editable": false,
 959 |     "nbgrader": {
 960 |      "checksum": "52f682e7066791c34cd3b2402855cbf5",
 961 |      "grade": false,
 962 |      "grade_id": "cell-677c51ba711c3af7",
 963 |      "locked": true,
 964 |      "schema_version": 1,
 965 |      "solution": false
 966 |     }
 967 |    },
 968 |    "source": [
 969 |     "### Question 11\n",
 970 |     "Use the following dictionary to group the Countries by Continent, then create a DataFrame that displays the sample size (the number of countries in each continent bin), and the sum, mean, and std deviation for the estimated population of each country.\n",
 971 |     "\n",
 972 |     "```python\n",
 973 |     "ContinentDict  = {'China':'Asia', \n",
 974 |     "                  'United States':'North America', \n",
 975 |     "                  'Japan':'Asia', \n",
 976 |     "                  'United Kingdom':'Europe', \n",
 977 |     "                  'Russian Federation':'Europe', \n",
 978 |     "                  'Canada':'North America', \n",
 979 |     "                  'Germany':'Europe', \n",
 980 |     "                  'India':'Asia',\n",
 981 |     "                  'France':'Europe', \n",
 982 |     "                  'South Korea':'Asia', \n",
 983 |     "                  'Italy':'Europe', \n",
 984 |     "                  'Spain':'Europe', \n",
 985 |     "                  'Iran':'Asia',\n",
 986 |     "                  'Australia':'Australia', \n",
 987 |     "                  'Brazil':'South America'}\n",
 988 |     "```\n",
 989 |     "\n",
 990 |     "*This function should return a DataFrame with index named Continent `['Asia', 'Australia', 'Europe', 'North America', 'South America']` and columns `['size', 'sum', 'mean', 'std']`*"
 991 |    ]
 992 |   },
 993 |   {
 994 |    "cell_type": "code",
 995 |    "execution_count": 25,
 996 |    "metadata": {
 997 |     "deletable": false,
 998 |     "nbgrader": {
 999 |      "checksum": "b55846bc20cd01b0acbcb776504a766d",
1000 |      "grade": false,
1001 |      "grade_id": "cell-a5e0c0df27304f98",
1002 |      "locked": false,
1003 |      "schema_version": 1,
1004 |      "solution": true
1005 |     }
1006 |    },
1007 |    "outputs": [],
1008 |    "source": [
1009 |     "def answer_eleven():\n",
1010 |     "    # YOUR CODE HERE\n",
1011 |     "    \n",
1012 |     "    ContinentDict  = {'China':'Asia', \n",
1013 |     "                  'United States':'North America', \n",
1014 |     "                  'Japan':'Asia', \n",
1015 |     "                  'United Kingdom':'Europe', \n",
1016 |     "                  'Russian Federation':'Europe', \n",
1017 |     "                  'Canada':'North America', \n",
1018 |     "                  'Germany':'Europe', \n",
1019 |     "                  'India':'Asia',\n",
1020 |     "                  'France':'Europe', \n",
1021 |     "                  'South Korea':'Asia', \n",
1022 |     "                  'Italy':'Europe', \n",
1023 |     "                  'Spain':'Europe', \n",
1024 |     "                  'Iran':'Asia',\n",
1025 |     "                  'Australia':'Australia', \n",
1026 |     "                  'Brazil':'South America'}\n",
1027 |     "    \n",
1028 |     "    j = 0\n",
1029 |     "    Top15 = answer_one()\n",
1030 |     "    new_df = pd.DataFrame(index=['Asia', 'Australia', 'Europe', 'North America', 'South America'], columns = ['size', 'sum', 'mean', 'std'])\n",
1031 |     "    #ind = answer_one().index\n",
1032 |     "    Top15['pop'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']\n",
1033 |     "    Top15['Continent'] = 'cont'\n",
1034 |     "    for v in ContinentDict.values():\n",
1035 |     "        Top15['Continent'].iloc[j] = v\n",
1036 |     "        j += 1\n",
1037 |     "    new_df['size'] = Top15.groupby(Top15['Continent']).size()\n",
1038 |     "    new_df['sum'] = Top15['pop'].groupby(Top15['Continent']).sum()\n",
1039 |     "    new_df['mean'] = Top15['pop'].groupby(Top15['Continent']).mean()\n",
1040 |     "    new_df['std'] = Top15['pop'].groupby(Top15['Continent']).std()\n",
1041 |     "    return new_df \n",
1042 |     "    #raise NotImplementedError()"
1043 |    ]
1044 |   },
1045 |   {
1046 |    "cell_type": "code",
1047 |    "execution_count": 26,
1048 |    "metadata": {
1049 |     "deletable": false,
1050 |     "editable": false,
1051 |     "nbgrader": {
1052 |      "checksum": "233318097d9c94fdc87395c967da14c4",
1053 |      "grade": true,
1054 |      "grade_id": "cell-18d1a07971b25743",
1055 |      "locked": true,
1056 |      "points": 6.66,
1057 |      "schema_version": 1,
1058 |      "solution": false
1059 |     }
1060 |    },
1061 |    "outputs": [],
1062 |    "source": [
1063 |     "assert type(answer_eleven()) == pd.DataFrame, \"Q11: You should return a DataFrame!\"\n",
1064 |     "\n",
1065 |     "assert answer_eleven().shape[0] == 5, \"Q11: Wrong row numbers!\"\n",
1066 |     "\n",
1067 |     "assert answer_eleven().shape[1] == 4, \"Q11: Wrong column numbers!\"\n"
1068 |    ]
1069 |   },
1070 |   {
1071 |    "cell_type": "markdown",
1072 |    "metadata": {
1073 |     "deletable": false,
1074 |     "editable": false,
1075 |     "nbgrader": {
1076 |      "checksum": "78d9dbb8ff6e0a1ac1e0d16e026a7d98",
1077 |      "grade": false,
1078 |      "grade_id": "cell-fa26f5c1eac39c6c",
1079 |      "locked": true,
1080 |      "schema_version": 1,
1081 |      "solution": false
1082 |     }
1083 |    },
1084 |    "source": [
1085 |     "### Question 12\n",
1086 |     "Cut % Renewable into 5 bins. Group Top15 by the Continent, as well as these new % Renewable bins. How many countries are in each of these groups?\n",
1087 |     "\n",
1088 |     "*This function should return a Series with a MultiIndex of `Continent`, then the bins for `% Renewable`. Do not include groups with no countries.*"
1089 |    ]
1090 |   },
1091 |   {
1092 |    "cell_type": "code",
1093 |    "execution_count": 27,
1094 |    "metadata": {
1095 |     "deletable": false,
1096 |     "nbgrader": {
1097 |      "checksum": "27eb27ec7a3347530174f7047288a881",
1098 |      "grade": false,
1099 |      "grade_id": "cell-2ecd9a4076abd8f0",
1100 |      "locked": false,
1101 |      "schema_version": 1,
1102 |      "solution": true
1103 |     }
1104 |    },
1105 |    "outputs": [],
1106 |    "source": [
1107 |     "def answer_twelve():\n",
1108 |     "    # YOUR CODE HERE\n",
1109 |     "    ContinentDict  = {'China':'Asia', \n",
1110 |     "                  'United States':'North America', \n",
1111 |     "                  'Japan':'Asia', \n",
1112 |     "                  'United Kingdom':'Europe', \n",
1113 |     "                  'Russian Federation':'Europe', \n",
1114 |     "                  'Canada':'North America', \n",
1115 |     "                  'Germany':'Europe', \n",
1116 |     "                  'India':'Asia',\n",
1117 |     "                  'France':'Europe', \n",
1118 |     "                  'South Korea':'Asia', \n",
1119 |     "                  'Italy':'Europe', \n",
1120 |     "                  'Spain':'Europe', \n",
1121 |     "                  'Iran':'Asia',\n",
1122 |     "                  'Australia':'Australia', \n",
1123 |     "                  'Brazil':'South America'}\n",
1124 |     "    \n",
1125 |     "    j = 0\n",
1126 |     "    Top15 = answer_one()\n",
1127 |     "    Top15['Continent'] = None\n",
1128 |     "    for v in ContinentDict.values():\n",
1129 |     "        Top15['Continent'].iloc[j] = v\n",
1130 |     "        j += 1\n",
1131 |     "    Top15['% Renewable'] = pd.cut(Top15['% Renewable'],bins=5)\n",
1132 |     "    new_renou = Top15.groupby(['Continent','% Renewable']).size()\n",
1133 |     "    return new_renou\n",
1134 |     "    #raise NotImplementedError()\n"
1135 |    ]
1136 |   },
1137 |   {
1138 |    "cell_type": "code",
1139 |    "execution_count": null,
1140 |    "metadata": {
1141 |     "deletable": false,
1142 |     "editable": false,
1143 |     "nbgrader": {
1144 |      "checksum": "79ed0cf577c7941dc357efd8bf1c5d26",
1145 |      "grade": true,
1146 |      "grade_id": "cell-6c665602d6babab9",
1147 |      "locked": true,
1148 |      "points": 6.66,
1149 |      "schema_version": 1,
1150 |      "solution": false
1151 |     }
1152 |    },
1153 |    "outputs": [],
1154 |    "source": [
1155 |     "assert type(answer_twelve()) == pd.Series, \"Q12: You should return a Series!\"\n",
1156 |     "\n",
1157 |     "assert len(answer_twelve()) == 9, \"Q12: Wrong result numbers!\"\n"
1158 |    ]
1159 |   },
1160 |   {
1161 |    "cell_type": "markdown",
1162 |    "metadata": {
1163 |     "deletable": false,
1164 |     "editable": false,
1165 |     "nbgrader": {
1166 |      "checksum": "bdfd9b1bb897304b6337fdc47a05967c",
1167 |      "grade": false,
1168 |      "grade_id": "cell-4209a10d8f208739",
1169 |      "locked": true,
1170 |      "schema_version": 1,
1171 |      "solution": false
1172 |     }
1173 |    },
1174 |    "source": [
1175 |     "### Question 13\n",
1176 |     "Convert the Population Estimate series to a string with thousands separator (using commas). Use all significant digits (do not round the results).\n",
1177 |     "\n",
1178 |     "e.g. 12345678.90 -> 12,345,678.90\n",
1179 |     "\n",
1180 |     "*This function should return a series `PopEst` whose index is the country name and whose values are the population estimate string*"
1181 |    ]
1182 |   },
1183 |   {
1184 |    "cell_type": "code",
1185 |    "execution_count": 28,
1186 |    "metadata": {
1187 |     "deletable": false,
1188 |     "nbgrader": {
1189 |      "checksum": "1efd09964334b7d6100d81d4b3ead3e9",
1190 |      "grade": false,
1191 |      "grade_id": "cell-58eb0ee0921d93fb",
1192 |      "locked": false,
1193 |      "schema_version": 1,
1194 |      "solution": true
1195 |     }
1196 |    },
1197 |    "outputs": [],
1198 |    "source": [
1199 |     "def answer_thirteen():\n",
1200 |     "    # YOUR CODE HERE\n",
1201 |     "    Top15 = answer_one()\n",
1202 |     "    Top15['pop'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']\n",
1203 |     "    Top15['PopEst'] = Top15['pop'].map('{:,}'.format)\n",
1204 |     "\n",
1205 |     "    return Top15['PopEst']\n",
1206 |     "    #raise NotImplementedError()"
1207 |    ]
1208 |   },
1209 |   {
1210 |    "cell_type": "code",
1211 |    "execution_count": 29,
1212 |    "metadata": {
1213 |     "deletable": false,
1214 |     "editable": false,
1215 |     "nbgrader": {
1216 |      "checksum": "e014781df77c7edab2a181d2d943be8f",
1217 |      "grade": true,
1218 |      "grade_id": "cell-10fee7228cf973f6",
1219 |      "locked": true,
1220 |      "points": 6.74,
1221 |      "schema_version": 1,
1222 |      "solution": false
1223 |     }
1224 |    },
1225 |    "outputs": [],
1226 |    "source": [
1227 |     "assert type(answer_thirteen()) == pd.Series, \"Q13: You should return a Series!\"\n",
1228 |     "\n",
1229 |     "assert len(answer_thirteen()) == 15, \"Q13: Wrong result numbers!\"\n"
1230 |    ]
1231 |   },
1232 |   {
1233 |    "cell_type": "markdown",
1234 |    "metadata": {
1235 |     "deletable": false,
1236 |     "editable": false,
1237 |     "nbgrader": {
1238 |      "checksum": "61562b9b667bd5efbcec0dcd7becbfaa",
1239 |      "grade": false,
1240 |      "grade_id": "cell-998b62d4f390ef15",
1241 |      "locked": true,
1242 |      "schema_version": 1,
1243 |      "solution": false
1244 |     }
1245 |    },
1246 |    "source": [
1247 |     "### Optional\n",
1248 |     "\n",
1249 |     "Use the built in function `plot_optional()` to see an example visualization."
1250 |    ]
1251 |   },
1252 |   {
1253 |    "cell_type": "code",
1254 |    "execution_count": null,
1255 |    "metadata": {
1256 |     "deletable": false,
1257 |     "editable": false,
1258 |     "nbgrader": {
1259 |      "checksum": "479786c97cb5f34d07231c6d7c602a47",
1260 |      "grade": false,
1261 |      "grade_id": "cell-741fd55ea57cd40a",
1262 |      "locked": true,
1263 |      "schema_version": 1,
1264 |      "solution": false
1265 |     }
1266 |    },
1267 |    "outputs": [],
1268 |    "source": [
1269 |     "def plot_optional():\n",
1270 |     "    import matplotlib as plt\n",
1271 |     "    %matplotlib inline\n",
1272 |     "    Top15 = answer_one()\n",
1273 |     "    ax = Top15.plot(x='Rank', y='% Renewable', kind='scatter', \n",
1274 |     "                    c=['#e41a1c','#377eb8','#e41a1c','#4daf4a','#4daf4a','#377eb8','#4daf4a','#e41a1c',\n",
1275 |     "                       '#4daf4a','#e41a1c','#4daf4a','#4daf4a','#e41a1c','#dede00','#ff7f00'], \n",
1276 |     "                    xticks=range(1,16), s=6*Top15['2014']/10**10, alpha=.75, figsize=[16,6]);\n",
1277 |     "\n",
1278 |     "    for i, txt in enumerate(Top15.index):\n",
1279 |     "        ax.annotate(txt, [Top15['Rank'][i], Top15['% Renewable'][i]], ha='center')\n",
1280 |     "\n",
1281 |     "    print(\"This is an example of a visualization that can be created to help understand the data. \\\n",
1282 |     "This is a bubble chart showing % Renewable vs. Rank. The size of the bubble corresponds to the countries' \\\n",
1283 |     "2014 GDP, and the color corresponds to the continent.\")"
1284 |    ]
1285 |   }
1286 |  ],
1287 |  "metadata": {
1288 |   "anaconda-cloud": {},
1289 |   "coursera": {
1290 |    "schema_names": [
1291 |     "mooc_adswpy_1_v2_assignment3"
1292 |    ]
1293 |   },
1294 |   "kernelspec": {
1295 |    "display_name": "Python 3",
1296 |    "language": "python",
1297 |    "name": "python3"
1298 |   },
1299 |   "language_info": {
1300 |    "codemirror_mode": {
1301 |     "name": "ipython",
1302 |     "version": 3
1303 |    },
1304 |    "file_extension": ".py",
1305 |    "mimetype": "text/x-python",
1306 |    "name": "python",
1307 |    "nbconvert_exporter": "python",
1308 |    "pygments_lexer": "ipython3",
1309 |    "version": "3.7.6"
1310 |   }
1311 |  },
1312 |  "nbformat": 4,
1313 |  "nbformat_minor": 4
1314 | }
1315 | 


--------------------------------------------------------------------------------