├── Assignment_2
├── assets
│ └── NIS-PUF17-DUG.pdf
├── assignment2.ipynb
└── .ipynb_checkpoints
│ └── assignment2-checkpoint.ipynb
├── Assignment_3
├── assets
│ ├── scimagojr-3.xlsx
│ └── Energy Indicators.xls
└── assignment3.ipynb
├── README.md
├── Assignment_1
├── assets
│ └── grades.txt
├── assignment1.ipynb
└── .ipynb_checkpoints
│ └── assignment1-checkpoint.ipynb
└── Assignment_4
├── assets
├── mlb.csv
├── nba.csv
├── nhl.csv
└── nfl.csv
└── assignment4.ipynb
/Assignment_2/assets/NIS-PUF17-DUG.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tchagau/Introduction-to-Data-Science-in-Python/HEAD/Assignment_2/assets/NIS-PUF17-DUG.pdf
--------------------------------------------------------------------------------
/Assignment_3/assets/scimagojr-3.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tchagau/Introduction-to-Data-Science-in-Python/HEAD/Assignment_3/assets/scimagojr-3.xlsx
--------------------------------------------------------------------------------
/Assignment_3/assets/Energy Indicators.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tchagau/Introduction-to-Data-Science-in-Python/HEAD/Assignment_3/assets/Energy Indicators.xls
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Introduction-to-Data-Science-in-Python
2 | This repository includes course assignments of Introduction to Data Science in Python on coursera by university of michigan
3 |
--------------------------------------------------------------------------------
/Assignment_1/assets/grades.txt:
--------------------------------------------------------------------------------
1 | Ronald Mayr: A
2 | Bell Kassulke: B
3 | Jacqueline Rupp: A
4 | Alexander Zeller: C
5 | Valentina Denk: C
6 | Simon Loidl: B
7 | Elias Jovanovic: B
8 | Stefanie Weninger: A
9 | Fabian Peer: C
10 | Hakim Botros: B
11 | Emilie Lorentsen: B
12 | Herman Karlsen: C
13 | Nathalie Delacruz: C
14 | Casey Hartman: C
15 | Lily Walker : A
16 | Gerard Wang: C
17 | Tony Mcdowell: C
18 | Jake Wood: B
19 | Fatemeh Akhtar: B
20 | Kim Weston: B
21 | Nicholas Beatty: A
22 | Kirsten Williams: C
23 | Vaishali Surana: C
24 | Coby Mccormack: C
25 | Yasmin Dar: B
26 | Romy Donnelly: A
27 | Viswamitra Upandhye: B
28 | Kendrick Hilpert: A
29 | Killian Kaufman: B
30 | Elwood Page: B
31 | Mukti Patel: A
32 | Emily Lesch: C
33 | Elodie Booker: B
34 | Jedd Kim: A
35 | Annabel Davies: A
36 | Adnan Chen: B
37 | Jonathan Berg: C
38 | Hank Spinka: B
39 | Agnes Schneider: C
40 | Kimberly Green: A
41 | Lola-Rose Coates: C
42 | Rose Christiansen: C
43 | Shirley Hintz: C
44 | Hannah Bayer: B
45 |
--------------------------------------------------------------------------------
/Assignment_4/assets/mlb.csv:
--------------------------------------------------------------------------------
1 | team,W,L,W-L%,GB,year,League
2 | Boston Red Sox,108,54,0.667,--,2018,MLB
3 | New York Yankees,100,62,0.617,8.0,2018,MLB
4 | Tampa Bay Rays,90,72,0.556,18.0,2018,MLB
5 | Toronto Blue Jays,73,89,0.451,35.0,2018,MLB
6 | Baltimore Orioles,47,115,0.29,61.0,2018,MLB
7 | Cleveland Indians,91,71,0.562,--,2018,MLB
8 | Minnesota Twins,78,84,0.48100000000000004,13.0,2018,MLB
9 | Detroit Tigers,64,98,0.395,27.0,2018,MLB
10 | Chicago White Sox,62,100,0.38299999999999995,29.0,2018,MLB
11 | Kansas City Royals,58,104,0.358,33.0,2018,MLB
12 | Houston Astros,103,59,0.636,--,2018,MLB
13 | Oakland Athletics,97,65,0.599,6.0,2018,MLB
14 | Seattle Mariners,89,73,0.5489999999999999,14.0,2018,MLB
15 | Los Angeles Angels,80,82,0.494,23.0,2018,MLB
16 | Texas Rangers,67,95,0.414,36.0,2018,MLB
17 | Atlanta Braves,90,72,0.556,--,2018,MLB
18 | Washington Nationals,82,80,0.506,8.0,2018,MLB
19 | Philadelphia Phillies,80,82,0.494,10.0,2018,MLB
20 | New York Mets,77,85,0.475,13.0,2018,MLB
21 | Miami Marlins,63,98,0.391,26.5,2018,MLB
22 | Milwaukee Brewers,96,67,0.589,--,2018,MLB
23 | Chicago Cubs,95,68,0.583,1.0,2018,MLB
24 | St. Louis Cardinals,88,74,0.5429999999999999,7.5,2018,MLB
25 | Pittsburgh Pirates,82,79,0.509,13.0,2018,MLB
26 | Cincinnati Reds,67,95,0.414,28.5,2018,MLB
27 | Los Angeles Dodgers,92,71,0.564,--,2018,MLB
28 | Colorado Rockies,91,72,0.5579999999999999,1.0,2018,MLB
29 | Arizona Diamondbacks,82,80,0.506,9.5,2018,MLB
30 | San Francisco Giants,73,89,0.451,18.5,2018,MLB
31 | San Diego Padres,66,96,0.40700000000000003,25.5,2018,MLB
32 | Boston Red Sox,93,69,0.574,--,2017,MLB
33 | New York Yankees,91,71,0.562,2.0,2017,MLB
34 | Tampa Bay Rays,80,82,0.494,13.0,2017,MLB
35 | Toronto Blue Jays,76,86,0.469,17.0,2017,MLB
36 | Baltimore Orioles,75,87,0.46299999999999997,18.0,2017,MLB
37 | Cleveland Indians,102,60,0.63,--,2017,MLB
38 | Minnesota Twins,85,77,0.525,17.0,2017,MLB
39 | Kansas City Royals,80,82,0.494,22.0,2017,MLB
40 | Chicago White Sox,67,95,0.414,35.0,2017,MLB
41 | Detroit Tigers,64,98,0.395,38.0,2017,MLB
42 | Houston Astros,101,61,0.623,--,2017,MLB
43 | Los Angeles Angels,80,82,0.494,21.0,2017,MLB
44 | Seattle Mariners,78,84,0.48100000000000004,23.0,2017,MLB
45 | Texas Rangers,78,84,0.48100000000000004,23.0,2017,MLB
46 | Oakland Athletics,75,87,0.46299999999999997,26.0,2017,MLB
47 | Washington Nationals,97,65,0.599,--,2017,MLB
48 | Miami Marlins,77,85,0.475,20.0,2017,MLB
49 | Atlanta Braves,72,90,0.444,25.0,2017,MLB
50 | New York Mets,70,92,0.43200000000000005,27.0,2017,MLB
51 | Philadelphia Phillies,66,96,0.40700000000000003,31.0,2017,MLB
52 | Chicago Cubs,92,70,0.568,--,2017,MLB
53 | Milwaukee Brewers,86,76,0.531,6.0,2017,MLB
54 | St. Louis Cardinals,83,79,0.512,9.0,2017,MLB
55 | Pittsburgh Pirates,75,87,0.46299999999999997,17.0,2017,MLB
56 | Cincinnati Reds,68,94,0.42,24.0,2017,MLB
57 | Los Angeles Dodgers,104,58,0.642,--,2017,MLB
58 | Arizona Diamondbacks,93,69,0.574,11.0,2017,MLB
59 | Colorado Rockies,87,75,0.537,17.0,2017,MLB
60 | San Diego Padres,71,91,0.43799999999999994,33.0,2017,MLB
61 | San Francisco Giants,64,98,0.395,40.0,2017,MLB
62 | Boston Red Sox,93,69,0.574,--,2016,MLB
63 | Baltimore Orioles,89,73,0.5489999999999999,4.0,2016,MLB
64 | Toronto Blue Jays,89,73,0.5489999999999999,4.0,2016,MLB
65 | New York Yankees,84,78,0.519,9.0,2016,MLB
66 | Tampa Bay Rays,68,94,0.42,25.0,2016,MLB
67 | Cleveland Indians,94,67,0.584,--,2016,MLB
68 | Detroit Tigers,86,75,0.534,8.0,2016,MLB
69 | Kansas City Royals,81,81,0.5,13.5,2016,MLB
70 | Chicago White Sox,78,84,0.48100000000000004,16.5,2016,MLB
71 | Minnesota Twins,59,103,0.364,35.5,2016,MLB
72 | Texas Rangers,95,67,0.586,--,2016,MLB
73 | Seattle Mariners,86,76,0.531,9.0,2016,MLB
74 | Houston Astros,84,78,0.519,11.0,2016,MLB
75 | Los Angeles Angels,74,88,0.457,21.0,2016,MLB
76 | Oakland Athletics,69,93,0.426,26.0,2016,MLB
77 | Washington Nationals,95,67,0.586,--,2016,MLB
78 | New York Mets,87,75,0.537,8.0,2016,MLB
79 | Miami Marlins,79,82,0.491,15.5,2016,MLB
80 | Philadelphia Phillies,71,91,0.43799999999999994,24.0,2016,MLB
81 | Atlanta Braves,68,93,0.42200000000000004,26.5,2016,MLB
82 | Chicago Cubs,103,58,0.64,--,2016,MLB
83 | St. Louis Cardinals,86,76,0.531,17.5,2016,MLB
84 | Pittsburgh Pirates,78,83,0.484,25.0,2016,MLB
85 | Milwaukee Brewers,73,89,0.451,30.5,2016,MLB
86 | Cincinnati Reds,68,94,0.42,35.5,2016,MLB
87 | Los Angeles Dodgers,91,71,0.562,--,2016,MLB
88 | San Francisco Giants,87,75,0.537,4.0,2016,MLB
89 | Colorado Rockies,75,87,0.46299999999999997,16.0,2016,MLB
90 | Arizona Diamondbacks,69,93,0.426,22.0,2016,MLB
91 | San Diego Padres,68,94,0.42,23.0,2016,MLB
92 | Toronto Blue Jays,93,69,0.574,--,2015,MLB
93 | New York Yankees,87,75,0.537,6.0,2015,MLB
94 | Baltimore Orioles,81,81,0.5,12.0,2015,MLB
95 | Tampa Bay Rays,80,82,0.494,13.0,2015,MLB
96 | Boston Red Sox,78,84,0.48100000000000004,15.0,2015,MLB
97 | Kansas City Royals,95,67,0.586,--,2015,MLB
98 | Minnesota Twins,83,79,0.512,12.0,2015,MLB
99 | Cleveland Indians,81,80,0.503,13.5,2015,MLB
100 | Chicago White Sox,76,86,0.469,19.0,2015,MLB
101 | Detroit Tigers,74,87,0.46,20.5,2015,MLB
102 | Texas Rangers,88,74,0.5429999999999999,--,2015,MLB
103 | Houston Astros,86,76,0.531,2.0,2015,MLB
104 | Los Angeles Angels of Anaheim,85,77,0.525,3.0,2015,MLB
105 | Seattle Mariners,76,86,0.469,12.0,2015,MLB
106 | Oakland Athletics,68,94,0.42,20.0,2015,MLB
107 | New York Mets,90,72,0.556,--,2015,MLB
108 | Washington Nationals,83,79,0.512,7.0,2015,MLB
109 | Miami Marlins,71,91,0.43799999999999994,19.0,2015,MLB
110 | Atlanta Braves,67,95,0.414,23.0,2015,MLB
111 | Philadelphia Phillies,63,99,0.389,27.0,2015,MLB
112 | St. Louis Cardinals,100,62,0.617,--,2015,MLB
113 | Pittsburgh Pirates,98,64,0.605,2.0,2015,MLB
114 | Chicago Cubs,97,65,0.599,3.0,2015,MLB
115 | Milwaukee Brewers,68,94,0.42,32.0,2015,MLB
116 | Cincinnati Reds,64,98,0.395,36.0,2015,MLB
117 | Los Angeles Dodgers,92,70,0.568,--,2015,MLB
118 | San Francisco Giants,84,78,0.519,8.0,2015,MLB
119 | Arizona Diamondbacks,79,83,0.488,13.0,2015,MLB
120 | San Diego Padres,74,88,0.457,18.0,2015,MLB
121 | Colorado Rockies,68,94,0.42,24.0,2015,MLB
122 | Baltimore Orioles,96,66,0.593,--,2014,MLB
123 | New York Yankees,84,78,0.519,12.0,2014,MLB
124 | Toronto Blue Jays,83,79,0.512,13.0,2014,MLB
125 | Tampa Bay Rays,77,85,0.475,19.0,2014,MLB
126 | Boston Red Sox,71,91,0.43799999999999994,25.0,2014,MLB
127 | Detroit Tigers,90,72,0.556,--,2014,MLB
128 | Kansas City Royals,89,73,0.5489999999999999,1.0,2014,MLB
129 | Cleveland Indians,85,77,0.525,5.0,2014,MLB
130 | Chicago White Sox,73,89,0.451,17.0,2014,MLB
131 | Minnesota Twins,70,92,0.43200000000000005,20.0,2014,MLB
132 | Los Angeles Angels of Anaheim,98,64,0.605,--,2014,MLB
133 | Oakland Athletics,88,74,0.5429999999999999,10.0,2014,MLB
134 | Seattle Mariners,87,75,0.537,11.0,2014,MLB
135 | Houston Astros,70,92,0.43200000000000005,28.0,2014,MLB
136 | Texas Rangers,67,95,0.414,31.0,2014,MLB
137 | Washington Nationals,96,66,0.593,--,2014,MLB
138 | Atlanta Braves,79,83,0.488,17.0,2014,MLB
139 | New York Mets,79,83,0.488,17.0,2014,MLB
140 | Miami Marlins,77,85,0.475,19.0,2014,MLB
141 | Philadelphia Phillies,73,89,0.451,23.0,2014,MLB
142 | St. Louis Cardinals,90,72,0.556,--,2014,MLB
143 | Pittsburgh Pirates,88,74,0.5429999999999999,2.0,2014,MLB
144 | Milwaukee Brewers,82,80,0.506,8.0,2014,MLB
145 | Cincinnati Reds,76,86,0.469,14.0,2014,MLB
146 | Chicago Cubs,73,89,0.451,17.0,2014,MLB
147 | Los Angeles Dodgers,94,68,0.58,--,2014,MLB
148 | San Francisco Giants,88,74,0.5429999999999999,6.0,2014,MLB
149 | San Diego Padres,77,85,0.475,17.0,2014,MLB
150 | Colorado Rockies,66,96,0.40700000000000003,28.0,2014,MLB
151 | Arizona Diamondbacks,64,98,0.395,30.0,2014,MLB
152 |
--------------------------------------------------------------------------------
/Assignment_1/assignment1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "deletable": false,
7 | "editable": false,
8 | "nbgrader": {
9 | "checksum": "a7460f3e3c42534125a0802936889559",
10 | "grade": false,
11 | "grade_id": "cell-fa48e7f1b94baa5b",
12 | "locked": true,
13 | "schema_version": 1,
14 | "solution": false
15 | }
16 | },
17 | "source": [
18 | "# Assignment 1\n",
19 | "For this assignment you are welcomed to use other regex resources such a regex \"cheat sheets\" you find on the web.\n",
20 | "\n"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {
26 | "deletable": false,
27 | "editable": false,
28 | "nbgrader": {
29 | "checksum": "d17f561e3c6c08092810b982d085f5be",
30 | "grade": false,
31 | "grade_id": "cell-d4da7eb9acee2a6d",
32 | "locked": true,
33 | "schema_version": 1,
34 | "solution": false
35 | }
36 | },
37 | "source": [
38 | "Before start working on the problems, here is a small example to help you understand how to write your own answers. In short, the solution should be written within the function body given, and the final result should be returned. Then the autograder will try to call the function and validate your returned result accordingly. "
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 1,
44 | "metadata": {
45 | "deletable": false,
46 | "editable": false,
47 | "nbgrader": {
48 | "checksum": "7eeb5e7d0f0e0137caed9f3b5cb925b1",
49 | "grade": false,
50 | "grade_id": "cell-4a96535829224b3f",
51 | "locked": true,
52 | "schema_version": 1,
53 | "solution": false
54 | }
55 | },
56 | "outputs": [],
57 | "source": [
58 | "def example_word_count():\n",
59 | " # This example question requires counting words in the example_string below.\n",
60 | " example_string = \"Amy is 5 years old\"\n",
61 | " \n",
62 | " # YOUR CODE HERE.\n",
63 | " # You should write your solution here, and return your result, you can comment out or delete the\n",
64 | " # NotImplementedError below.\n",
65 | " result = example_string.split(\" \")\n",
66 | " return len(result)\n",
67 | "\n",
68 | " #raise NotImplementedError()"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "## Part A\n",
76 | "\n",
77 | "Find a list of all of the names in the following string using regex."
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 2,
83 | "metadata": {
84 | "deletable": false,
85 | "nbgrader": {
86 | "checksum": "29bc8c161c0e246c1e3ef4820cc164f7",
87 | "grade": false,
88 | "grade_id": "names",
89 | "locked": false,
90 | "schema_version": 1,
91 | "solution": true
92 | }
93 | },
94 | "outputs": [],
95 | "source": [
96 | "import re\n",
97 | "def names():\n",
98 | " simple_string = \"\"\"Amy is 5 years old, and her sister Mary is 2 years old. \n",
99 | " Ruth and Peter, their parents, have 3 kids.\"\"\"\n",
100 | " \n",
101 | " return re.findall('[A-Z][a-z]+', simple_string)\n",
102 | "\n",
103 | " # YOUR CODE HERE\n",
104 | " # raise NotImplementedError()"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 3,
110 | "metadata": {
111 | "deletable": false,
112 | "editable": false,
113 | "nbgrader": {
114 | "checksum": "ed5c09ac57f7d98130d5abc557f6d6c4",
115 | "grade": true,
116 | "grade_id": "correct_names",
117 | "locked": false,
118 | "points": 1,
119 | "schema_version": 1,
120 | "solution": false
121 | }
122 | },
123 | "outputs": [],
124 | "source": [
125 | "assert len(names()) == 4, \"There are four names in the simple_string\"\n"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {
131 | "deletable": false,
132 | "editable": false,
133 | "nbgrader": {
134 | "checksum": "77b3d100c47e9e41d98f82dfeb7eba9c",
135 | "grade": false,
136 | "grade_id": "cell-ed64e3464ddd7ba7",
137 | "locked": true,
138 | "schema_version": 1,
139 | "solution": false
140 | }
141 | },
142 | "source": [
143 | "## Part B\n",
144 | "\n",
145 | "The dataset file in [assets/grades.txt](assets/grades.txt) contains a line separated list of people with their grade in \n",
146 | "a class. Create a regex to generate a list of just those students who received a B in the course."
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 4,
152 | "metadata": {
153 | "deletable": false,
154 | "nbgrader": {
155 | "checksum": "e977a1df674e9fa684e6d172aec92824",
156 | "grade": false,
157 | "grade_id": "grades",
158 | "locked": false,
159 | "schema_version": 1,
160 | "solution": true
161 | }
162 | },
163 | "outputs": [],
164 | "source": [
165 | "import re\n",
166 | "def grades():\n",
167 | " with open (\"assets/grades.txt\", \"r\") as file:\n",
168 | " grades = file.read()\n",
169 | " return re.findall('([A-Z]\\S+ [A-Z]\\S+): B', grades)\n",
170 | " # YOUR CODE HERE\n",
171 | " #raise NotImplementedError()"
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "execution_count": 5,
177 | "metadata": {
178 | "deletable": false,
179 | "editable": false,
180 | "nbgrader": {
181 | "checksum": "e0bcc452d60fc45259e58d3116d25477",
182 | "grade": true,
183 | "grade_id": "correct_grades",
184 | "locked": false,
185 | "points": 1,
186 | "schema_version": 1,
187 | "solution": false
188 | }
189 | },
190 | "outputs": [],
191 | "source": [
192 | "assert len(grades()) == 16\n"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {
198 | "deletable": false,
199 | "editable": false,
200 | "nbgrader": {
201 | "checksum": "36e3e2a3a3e29fa7b823d22476392320",
202 | "grade": false,
203 | "grade_id": "cell-e253518e37d33f0c",
204 | "locked": true,
205 | "schema_version": 1,
206 | "solution": false
207 | }
208 | },
209 | "source": [
210 | "## Part C\n",
211 | "\n",
212 | "Consider the standard web log file in [assets/logdata.txt](assets/logdata.txt). This file records the access a user makes when visiting a web page (like this one!). Each line of the log has the following items:\n",
213 | "* a host (e.g., '146.204.224.152') \n",
214 | "* a user_name (e.g., 'feest6811' **note: sometimes the user name is missing! In this case, use '-' as the value for the username.**)\n",
215 | "* the time a request was made (e.g., '21/Jun/2019:15:45:24 -0700')\n",
216 | "* the post request type (e.g., 'POST /incentivize HTTP/1.1' **note: not everything is a POST!**)\n",
217 | "\n",
218 | "Your task is to convert this into a list of dictionaries, where each dictionary looks like the following:\n",
219 | "```\n",
220 | "example_dict = {\"host\":\"146.204.224.152\", \n",
221 | " \"user_name\":\"feest6811\", \n",
222 | " \"time\":\"21/Jun/2019:15:45:24 -0700\",\n",
223 | " \"request\":\"POST /incentivize HTTP/1.1\"}\n",
224 | "```"
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": 6,
230 | "metadata": {
231 | "deletable": false,
232 | "nbgrader": {
233 | "checksum": "c04017e59e48b2f4c77bf425ed84b356",
234 | "grade": false,
235 | "grade_id": "logs",
236 | "locked": false,
237 | "schema_version": 1,
238 | "solution": true
239 | }
240 | },
241 | "outputs": [],
242 | "source": [
243 | "import re\n",
244 | "def logs():\n",
245 | " with open(\"assets/logdata.txt\", \"r\") as file:\n",
246 | " logdata = file.read()\n",
247 | " req = re.findall('(\\d+[.]\\d+[.]\\d+[.]\\d+) - ([a-z-]\\S*) [\\[](\\S+ -0700)\\] \"([A-Z]\\S+ \\/\\S+ HTTP\\/[0-9.]+)',logdata)\n",
248 | " \n",
249 | " l = []\n",
250 | " for i in req:\n",
251 | " l.append({'host': i[0],'user_name': i[1],'time': i[2],'request': i[3]})\n",
252 | " return l\n",
253 | " # YOUR CODE HERE\n",
254 | " #raise NotImplementedError()"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": 7,
260 | "metadata": {
261 | "deletable": false,
262 | "editable": false,
263 | "nbgrader": {
264 | "checksum": "1fd5f2cca190d37c667fb189352540d3",
265 | "grade": true,
266 | "grade_id": "cell-correct_logs",
267 | "locked": true,
268 | "points": 1,
269 | "schema_version": 1,
270 | "solution": false
271 | }
272 | },
273 | "outputs": [],
274 | "source": [
275 | "assert len(logs()) == 979\n",
276 | "\n",
277 | "one_item={'host': '146.204.224.152',\n",
278 | " 'user_name': 'feest6811',\n",
279 | " 'time': '21/Jun/2019:15:45:24 -0700',\n",
280 | " 'request': 'POST /incentivize HTTP/1.1'}\n",
281 | "assert one_item in logs(), \"Sorry, this item should be in the log results, check your formating\"\n"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": null,
287 | "metadata": {},
288 | "outputs": [],
289 | "source": []
290 | }
291 | ],
292 | "metadata": {
293 | "coursera": {
294 | "schema_names": [
295 | "mooc_adswpy_1_v2_assignment1"
296 | ]
297 | },
298 | "kernelspec": {
299 | "display_name": "Python 3",
300 | "language": "python",
301 | "name": "python3"
302 | },
303 | "language_info": {
304 | "codemirror_mode": {
305 | "name": "ipython",
306 | "version": 3
307 | },
308 | "file_extension": ".py",
309 | "mimetype": "text/x-python",
310 | "name": "python",
311 | "nbconvert_exporter": "python",
312 | "pygments_lexer": "ipython3",
313 | "version": "3.7.6"
314 | }
315 | },
316 | "nbformat": 4,
317 | "nbformat_minor": 4
318 | }
319 |
--------------------------------------------------------------------------------
/Assignment_1/.ipynb_checkpoints/assignment1-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "deletable": false,
7 | "editable": false,
8 | "nbgrader": {
9 | "checksum": "a7460f3e3c42534125a0802936889559",
10 | "grade": false,
11 | "grade_id": "cell-fa48e7f1b94baa5b",
12 | "locked": true,
13 | "schema_version": 1,
14 | "solution": false
15 | }
16 | },
17 | "source": [
18 | "# Assignment 1\n",
19 | "For this assignment you are welcomed to use other regex resources such a regex \"cheat sheets\" you find on the web.\n",
20 | "\n"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {
26 | "deletable": false,
27 | "editable": false,
28 | "nbgrader": {
29 | "checksum": "d17f561e3c6c08092810b982d085f5be",
30 | "grade": false,
31 | "grade_id": "cell-d4da7eb9acee2a6d",
32 | "locked": true,
33 | "schema_version": 1,
34 | "solution": false
35 | }
36 | },
37 | "source": [
38 | "Before start working on the problems, here is a small example to help you understand how to write your own answers. In short, the solution should be written within the function body given, and the final result should be returned. Then the autograder will try to call the function and validate your returned result accordingly. "
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 1,
44 | "metadata": {
45 | "deletable": false,
46 | "editable": false,
47 | "nbgrader": {
48 | "checksum": "7eeb5e7d0f0e0137caed9f3b5cb925b1",
49 | "grade": false,
50 | "grade_id": "cell-4a96535829224b3f",
51 | "locked": true,
52 | "schema_version": 1,
53 | "solution": false
54 | }
55 | },
56 | "outputs": [],
57 | "source": [
58 | "def example_word_count():\n",
59 | " # This example question requires counting words in the example_string below.\n",
60 | " example_string = \"Amy is 5 years old\"\n",
61 | " \n",
62 | " # YOUR CODE HERE.\n",
63 | " # You should write your solution here, and return your result, you can comment out or delete the\n",
64 | " # NotImplementedError below.\n",
65 | " result = example_string.split(\" \")\n",
66 | " return len(result)\n",
67 | "\n",
68 | " #raise NotImplementedError()"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "## Part A\n",
76 | "\n",
77 | "Find a list of all of the names in the following string using regex."
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 2,
83 | "metadata": {
84 | "deletable": false,
85 | "nbgrader": {
86 | "checksum": "29bc8c161c0e246c1e3ef4820cc164f7",
87 | "grade": false,
88 | "grade_id": "names",
89 | "locked": false,
90 | "schema_version": 1,
91 | "solution": true
92 | }
93 | },
94 | "outputs": [],
95 | "source": [
96 | "import re\n",
97 | "def names():\n",
98 | " simple_string = \"\"\"Amy is 5 years old, and her sister Mary is 2 years old. \n",
99 | " Ruth and Peter, their parents, have 3 kids.\"\"\"\n",
100 | " \n",
101 | " return re.findall('[A-Z][a-z]+', simple_string)\n",
102 | "\n",
103 | " # YOUR CODE HERE\n",
104 | " # raise NotImplementedError()"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 3,
110 | "metadata": {
111 | "deletable": false,
112 | "editable": false,
113 | "nbgrader": {
114 | "checksum": "ed5c09ac57f7d98130d5abc557f6d6c4",
115 | "grade": true,
116 | "grade_id": "correct_names",
117 | "locked": false,
118 | "points": 1,
119 | "schema_version": 1,
120 | "solution": false
121 | }
122 | },
123 | "outputs": [],
124 | "source": [
125 | "assert len(names()) == 4, \"There are four names in the simple_string\"\n"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {
131 | "deletable": false,
132 | "editable": false,
133 | "nbgrader": {
134 | "checksum": "77b3d100c47e9e41d98f82dfeb7eba9c",
135 | "grade": false,
136 | "grade_id": "cell-ed64e3464ddd7ba7",
137 | "locked": true,
138 | "schema_version": 1,
139 | "solution": false
140 | }
141 | },
142 | "source": [
143 | "## Part B\n",
144 | "\n",
145 | "The dataset file in [assets/grades.txt](assets/grades.txt) contains a line separated list of people with their grade in \n",
146 | "a class. Create a regex to generate a list of just those students who received a B in the course."
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 4,
152 | "metadata": {
153 | "deletable": false,
154 | "nbgrader": {
155 | "checksum": "e977a1df674e9fa684e6d172aec92824",
156 | "grade": false,
157 | "grade_id": "grades",
158 | "locked": false,
159 | "schema_version": 1,
160 | "solution": true
161 | }
162 | },
163 | "outputs": [],
164 | "source": [
165 | "import re\n",
166 | "def grades():\n",
167 | " with open (\"assets/grades.txt\", \"r\") as file:\n",
168 | " grades = file.read()\n",
169 | " return re.findall('([A-Z]\\S+ [A-Z]\\S+): B', grades)\n",
170 | " # YOUR CODE HERE\n",
171 | " #raise NotImplementedError()"
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "execution_count": 5,
177 | "metadata": {
178 | "deletable": false,
179 | "editable": false,
180 | "nbgrader": {
181 | "checksum": "e0bcc452d60fc45259e58d3116d25477",
182 | "grade": true,
183 | "grade_id": "correct_grades",
184 | "locked": false,
185 | "points": 1,
186 | "schema_version": 1,
187 | "solution": false
188 | }
189 | },
190 | "outputs": [],
191 | "source": [
192 | "assert len(grades()) == 16\n"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {
198 | "deletable": false,
199 | "editable": false,
200 | "nbgrader": {
201 | "checksum": "36e3e2a3a3e29fa7b823d22476392320",
202 | "grade": false,
203 | "grade_id": "cell-e253518e37d33f0c",
204 | "locked": true,
205 | "schema_version": 1,
206 | "solution": false
207 | }
208 | },
209 | "source": [
210 | "## Part C\n",
211 | "\n",
212 | "Consider the standard web log file in [assets/logdata.txt](assets/logdata.txt). This file records the access a user makes when visiting a web page (like this one!). Each line of the log has the following items:\n",
213 | "* a host (e.g., '146.204.224.152') \n",
214 | "* a user_name (e.g., 'feest6811' **note: sometimes the user name is missing! In this case, use '-' as the value for the username.**)\n",
215 | "* the time a request was made (e.g., '21/Jun/2019:15:45:24 -0700')\n",
216 | "* the post request type (e.g., 'POST /incentivize HTTP/1.1' **note: not everything is a POST!**)\n",
217 | "\n",
218 | "Your task is to convert this into a list of dictionaries, where each dictionary looks like the following:\n",
219 | "```\n",
220 | "example_dict = {\"host\":\"146.204.224.152\", \n",
221 | " \"user_name\":\"feest6811\", \n",
222 | " \"time\":\"21/Jun/2019:15:45:24 -0700\",\n",
223 | " \"request\":\"POST /incentivize HTTP/1.1\"}\n",
224 | "```"
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": 6,
230 | "metadata": {
231 | "deletable": false,
232 | "nbgrader": {
233 | "checksum": "c04017e59e48b2f4c77bf425ed84b356",
234 | "grade": false,
235 | "grade_id": "logs",
236 | "locked": false,
237 | "schema_version": 1,
238 | "solution": true
239 | }
240 | },
241 | "outputs": [],
242 | "source": [
243 | "import re\n",
244 | "def logs():\n",
245 | " with open(\"assets/logdata.txt\", \"r\") as file:\n",
246 | " logdata = file.read()\n",
247 | " req = re.findall('(\\d+[.]\\d+[.]\\d+[.]\\d+) - ([a-z-]\\S*) [\\[](\\S+ -0700)\\] \"([A-Z]\\S+ \\/\\S+ HTTP\\/[0-9.]+)',logdata)\n",
248 | " \n",
249 | " l = []\n",
250 | " for i in req:\n",
251 | " l.append({'host': i[0],'user_name': i[1],'time': i[2],'request': i[3]})\n",
252 | " return l\n",
253 | " # YOUR CODE HERE\n",
254 | " #raise NotImplementedError()"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": 7,
260 | "metadata": {
261 | "deletable": false,
262 | "editable": false,
263 | "nbgrader": {
264 | "checksum": "1fd5f2cca190d37c667fb189352540d3",
265 | "grade": true,
266 | "grade_id": "cell-correct_logs",
267 | "locked": true,
268 | "points": 1,
269 | "schema_version": 1,
270 | "solution": false
271 | }
272 | },
273 | "outputs": [],
274 | "source": [
275 | "assert len(logs()) == 979\n",
276 | "\n",
277 | "one_item={'host': '146.204.224.152',\n",
278 | " 'user_name': 'feest6811',\n",
279 | " 'time': '21/Jun/2019:15:45:24 -0700',\n",
280 | " 'request': 'POST /incentivize HTTP/1.1'}\n",
281 | "assert one_item in logs(), \"Sorry, this item should be in the log results, check your formating\"\n"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": null,
287 | "metadata": {},
288 | "outputs": [],
289 | "source": []
290 | }
291 | ],
292 | "metadata": {
293 | "coursera": {
294 | "schema_names": [
295 | "mooc_adswpy_1_v2_assignment1"
296 | ]
297 | },
298 | "kernelspec": {
299 | "display_name": "Python 3",
300 | "language": "python",
301 | "name": "python3"
302 | },
303 | "language_info": {
304 | "codemirror_mode": {
305 | "name": "ipython",
306 | "version": 3
307 | },
308 | "file_extension": ".py",
309 | "mimetype": "text/x-python",
310 | "name": "python",
311 | "nbconvert_exporter": "python",
312 | "pygments_lexer": "ipython3",
313 | "version": "3.7.6"
314 | }
315 | },
316 | "nbformat": 4,
317 | "nbformat_minor": 4
318 | }
319 |
--------------------------------------------------------------------------------
/Assignment_4/assets/nba.csv:
--------------------------------------------------------------------------------
1 | team,W,L,W/L%,GB,PS/G,PA/G,SRS,year,League
2 | Toronto Raptors* (1),59,23,0.72,—,111.7,103.9,7.29,2018,NBA
3 | Boston Celtics* (2),55,27,0.6709999999999999,4.0,104.0,100.4,3.23,2018,NBA
4 | Philadelphia 76ers* (3),52,30,0.634,7.0,109.8,105.3,4.3,2018,NBA
5 | Cleveland Cavaliers* (4),50,32,0.61,9.0,110.9,109.9,0.59,2018,NBA
6 | Indiana Pacers* (5),48,34,0.585,11.0,105.6,104.2,1.18,2018,NBA
7 | Miami Heat* (6),44,38,0.537,15.0,103.4,102.9,0.15,2018,NBA
8 | Milwaukee Bucks* (7),44,38,0.537,15.0,106.5,106.8,-0.45,2018,NBA
9 | Washington Wizards* (8),43,39,0.524,16.0,106.6,106.0,0.53,2018,NBA
10 | Detroit Pistons (9),39,43,0.47600000000000003,20.0,103.8,103.9,-0.26,2018,NBA
11 | Charlotte Hornets (10),36,46,0.439,23.0,108.2,108.0,0.07,2018,NBA
12 | New York Knicks (11),29,53,0.354,30.0,104.5,108.0,-3.53,2018,NBA
13 | Brooklyn Nets (12),28,54,0.341,31.0,106.6,110.3,-3.67,2018,NBA
14 | Chicago Bulls (13),27,55,0.32899999999999996,32.0,102.9,110.0,-6.84,2018,NBA
15 | Orlando Magic (14),25,57,0.305,34.0,103.4,108.2,-4.92,2018,NBA
16 | Atlanta Hawks (15),24,58,0.293,35.0,103.4,108.8,-5.3,2018,NBA
17 | Houston Rockets* (1),65,17,0.7929999999999999,—,112.4,103.9,8.21,2018,NBA
18 | Golden State Warriors* (2),58,24,0.7070000000000001,7.0,113.5,107.5,5.79,2018,NBA
19 | Portland Trail Blazers* (3),49,33,0.598,16.0,105.6,103.0,2.6,2018,NBA
20 | Oklahoma City Thunder* (4),48,34,0.585,17.0,107.9,104.4,3.42,2018,NBA
21 | Utah Jazz* (5),48,34,0.585,17.0,104.1,99.8,4.47,2018,NBA
22 | New Orleans Pelicans* (6),48,34,0.585,17.0,111.7,110.4,1.48,2018,NBA
23 | San Antonio Spurs* (7),47,35,0.573,18.0,102.7,99.8,2.89,2018,NBA
24 | Minnesota Timberwolves* (8),47,35,0.573,18.0,109.5,107.3,2.35,2018,NBA
25 | Denver Nuggets (9),46,36,0.561,19.0,110.0,108.5,1.57,2018,NBA
26 | Los Angeles Clippers (10),42,40,0.512,23.0,109.0,109.0,0.15,2018,NBA
27 | Los Angeles Lakers (11),35,47,0.42700000000000005,30.0,108.1,109.6,-1.44,2018,NBA
28 | Sacramento Kings (12),27,55,0.32899999999999996,38.0,98.8,105.8,-6.6,2018,NBA
29 | Dallas Mavericks (13),24,58,0.293,41.0,102.3,105.4,-2.7,2018,NBA
30 | Memphis Grizzlies (14),22,60,0.268,43.0,99.3,105.5,-5.81,2018,NBA
31 | Phoenix Suns (15),21,61,0.256,44.0,103.9,113.3,-8.8,2018,NBA
32 | Boston Celtics* (1),53,29,0.6459999999999999,—,108.0,105.4,2.25,2017,NBA
33 | Cleveland Cavaliers* (2),51,31,0.622,2.0,110.3,107.2,2.87,2017,NBA
34 | Toronto Raptors* (3),51,31,0.622,2.0,106.9,102.6,3.65,2017,NBA
35 | Washington Wizards* (4),49,33,0.598,4.0,109.2,107.4,1.36,2017,NBA
36 | Atlanta Hawks* (5),43,39,0.524,10.0,103.2,104.0,-1.23,2017,NBA
37 | Milwaukee Bucks* (6),42,40,0.512,11.0,103.6,103.8,-0.45,2017,NBA
38 | Indiana Pacers* (7),42,40,0.512,11.0,105.1,105.3,-0.64,2017,NBA
39 | Chicago Bulls* (8),41,41,0.5,12.0,102.9,102.4,0.03,2017,NBA
40 | Miami Heat (9),41,41,0.5,12.0,103.2,102.1,0.77,2017,NBA
41 | Detroit Pistons (10),37,45,0.451,16.0,101.3,102.5,-1.29,2017,NBA
42 | Charlotte Hornets (11),36,46,0.439,17.0,104.9,104.7,-0.07,2017,NBA
43 | New York Knicks (12),31,51,0.37799999999999995,22.0,104.3,108.0,-3.87,2017,NBA
44 | Orlando Magic (13),29,53,0.354,24.0,101.1,107.6,-6.61,2017,NBA
45 | Philadelphia 76ers (14),28,54,0.341,25.0,102.4,108.1,-5.83,2017,NBA
46 | Brooklyn Nets (15),20,62,0.244,33.0,105.8,112.5,-6.74,2017,NBA
47 | Golden State Warriors* (1),67,15,0.8170000000000001,—,115.9,104.3,11.35,2017,NBA
48 | San Antonio Spurs* (2),61,21,0.7440000000000001,6.0,105.3,98.1,7.13,2017,NBA
49 | Houston Rockets* (3),55,27,0.6709999999999999,12.0,115.3,109.6,5.84,2017,NBA
50 | Los Angeles Clippers* (4),51,31,0.622,16.0,108.7,104.4,4.42,2017,NBA
51 | Utah Jazz* (5),51,31,0.622,16.0,100.7,96.8,4.0,2017,NBA
52 | Oklahoma City Thunder* (6),47,35,0.573,20.0,106.6,105.8,1.14,2017,NBA
53 | Memphis Grizzlies* (7),43,39,0.524,24.0,100.5,100.0,0.96,2017,NBA
54 | Portland Trail Blazers* (8),41,41,0.5,26.0,107.9,108.5,-0.23,2017,NBA
55 | Denver Nuggets (9),40,42,0.488,27.0,111.7,111.2,0.7,2017,NBA
56 | New Orleans Pelicans (10),34,48,0.415,33.0,104.3,106.4,-1.69,2017,NBA
57 | Dallas Mavericks (11),33,49,0.402,34.0,97.9,100.8,-2.53,2017,NBA
58 | Sacramento Kings (12),32,50,0.39,35.0,102.8,106.7,-3.29,2017,NBA
59 | Minnesota Timberwolves (13),31,51,0.37799999999999995,36.0,105.6,106.7,-0.64,2017,NBA
60 | Los Angeles Lakers (14),26,56,0.317,41.0,104.6,111.5,-6.29,2017,NBA
61 | Phoenix Suns (15),24,58,0.293,43.0,107.7,113.3,-5.14,2017,NBA
62 | Cleveland Cavaliers* (1),57,25,0.695,—,104.3,98.3,5.45,2016,NBA
63 | Toronto Raptors* (2),56,26,0.6829999999999999,1.0,102.7,98.2,4.08,2016,NBA
64 | Miami Heat* (3),48,34,0.585,9.0,100.0,98.4,1.5,2016,NBA
65 | Atlanta Hawks* (4),48,34,0.585,9.0,102.8,99.2,3.49,2016,NBA
66 | Boston Celtics* (5),48,34,0.585,9.0,105.7,102.5,2.84,2016,NBA
67 | Charlotte Hornets* (6),48,34,0.585,9.0,103.4,100.7,2.36,2016,NBA
68 | Indiana Pacers* (7),45,37,0.5489999999999999,12.0,102.2,100.5,1.62,2016,NBA
69 | Detroit Pistons* (8),44,38,0.537,13.0,102.0,101.4,0.43,2016,NBA
70 | Chicago Bulls (9),42,40,0.512,15.0,101.6,103.1,-1.46,2016,NBA
71 | Washington Wizards (10),41,41,0.5,16.0,104.1,104.6,-0.5,2016,NBA
72 | Orlando Magic (11),35,47,0.42700000000000005,22.0,102.1,103.7,-1.68,2016,NBA
73 | Milwaukee Bucks (12),33,49,0.402,24.0,99.0,103.2,-3.98,2016,NBA
74 | New York Knicks (13),32,50,0.39,25.0,98.4,101.1,-2.74,2016,NBA
75 | Brooklyn Nets (14),21,61,0.256,36.0,98.6,106.0,-7.12,2016,NBA
76 | Philadelphia 76ers (15),10,72,0.122,47.0,97.4,107.6,-9.92,2016,NBA
77 | Golden State Warriors* (1),73,9,0.89,—,114.9,104.1,10.38,2016,NBA
78 | San Antonio Spurs* (2),67,15,0.8170000000000001,6.0,103.5,92.9,10.28,2016,NBA
79 | Oklahoma City Thunder* (3),55,27,0.6709999999999999,18.0,110.2,102.9,7.09,2016,NBA
80 | Los Angeles Clippers* (4),53,29,0.6459999999999999,20.0,104.5,100.2,4.13,2016,NBA
81 | Portland Trail Blazers* (5),44,38,0.537,29.0,105.1,104.3,0.98,2016,NBA
82 | Dallas Mavericks* (6),42,40,0.512,31.0,102.3,102.6,-0.02,2016,NBA
83 | Memphis Grizzlies* (7),42,40,0.512,31.0,99.1,101.3,-2.14,2016,NBA
84 | Houston Rockets* (8),41,41,0.5,32.0,106.5,106.4,0.34,2016,NBA
85 | Utah Jazz (9),40,42,0.488,33.0,97.7,95.9,1.84,2016,NBA
86 | Sacramento Kings (10),33,49,0.402,40.0,106.6,109.1,-2.32,2016,NBA
87 | Denver Nuggets (10),33,49,0.402,40.0,101.9,105.0,-2.81,2016,NBA
88 | New Orleans Pelicans (12),30,52,0.366,43.0,102.7,106.5,-3.56,2016,NBA
89 | Minnesota Timberwolves (13),29,53,0.354,44.0,102.4,106.0,-3.38,2016,NBA
90 | Phoenix Suns (14),23,59,0.28,50.0,100.9,107.5,-6.32,2016,NBA
91 | Los Angeles Lakers (15),17,65,0.207,56.0,97.3,106.9,-8.92,2016,NBA
92 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2015,NBA
93 | Toronto Raptors* (4),49,33,.598,—,104.0,100.9,2.45,2015,NBA
94 | Boston Celtics* (7),40,42,.488,9.0,101.4,101.2,-0.40,2015,NBA
95 | Brooklyn Nets* (8),38,44,.463,11.0,98.0,100.9,-3.13,2015,NBA
96 | Philadelphia 76ers (14),18,64,.220,31.0,92.0,101.0,-9.04,2015,NBA
97 | New York Knicks (15),17,65,.207,32.0,91.9,101.2,-9.50,2015,NBA
98 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2015,NBA
99 | Cleveland Cavaliers* (2),53,29,.646,—,103.1,98.7,4.08,2015,NBA
100 | Chicago Bulls* (3),50,32,.610,3.0,100.8,97.8,2.54,2015,NBA
101 | Milwaukee Bucks* (6),41,41,.500,12.0,97.8,97.4,-0.09,2015,NBA
102 | Indiana Pacers (9),38,44,.463,15.0,97.3,97.0,-0.23,2015,NBA
103 | Detroit Pistons (12),32,50,.390,21.0,98.5,99.5,-1.39,2015,NBA
104 | Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,2015,NBA
105 | Atlanta Hawks* (1),60,22,.732,—,102.5,97.1,4.75,2015,NBA
106 | Washington Wizards* (5),46,36,.561,14.0,98.5,97.8,0.17,2015,NBA
107 | Miami Heat (10),37,45,.451,23.0,94.7,97.3,-2.92,2015,NBA
108 | Charlotte Hornets (11),33,49,.402,27.0,94.2,97.3,-3.44,2015,NBA
109 | Orlando Magic (13),25,57,.305,35.0,95.7,101.4,-5.87,2015,NBA
110 | Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,2015,NBA
111 | Portland Trail Blazers* (4),51,31,.622,—,102.8,98.6,4.41,2015,NBA
112 | Oklahoma City Thunder (9),45,37,.549,6.0,104.0,101.8,2.47,2015,NBA
113 | Utah Jazz (11),38,44,.463,13.0,95.1,94.9,0.71,2015,NBA
114 | Denver Nuggets (12),30,52,.366,21.0,101.5,105.0,-3.07,2015,NBA
115 | Minnesota Timberwolves (15),16,66,.195,35.0,97.8,106.5,-8.00,2015,NBA
116 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2015,NBA
117 | Golden State Warriors* (1),67,15,.817,—,110.0,99.9,10.01,2015,NBA
118 | Los Angeles Clippers* (2),56,26,.683,11.0,106.7,100.1,6.80,2015,NBA
119 | Phoenix Suns (10),39,43,.476,28.0,102.4,103.3,-0.38,2015,NBA
120 | Sacramento Kings (13),29,53,.354,38.0,101.3,105.0,-3.07,2015,NBA
121 | Los Angeles Lakers (14),21,61,.256,46.0,98.5,105.3,-6.17,2015,NBA
122 | Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,2015,NBA
123 | Houston Rockets* (2),56,26,.683,—,103.9,100.5,3.82,2015,NBA
124 | San Antonio Spurs* (5),55,27,.671,1.0,103.2,97.0,6.34,2015,NBA
125 | Memphis Grizzlies* (5),55,27,.671,1.0,98.3,95.1,3.62,2015,NBA
126 | Dallas Mavericks* (7),50,32,.610,6.0,105.2,102.3,3.36,2015,NBA
127 | New Orleans Pelicans* (8),45,37,.549,11.0,99.4,98.6,1.13,2015,NBA
128 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2014,NBA
129 | Toronto Raptors* (3),48,34,.585,—,101.3,98.0,2.55,2014,NBA
130 | Brooklyn Nets* (6),44,38,.537,4.0,98.5,99.5,-1.58,2014,NBA
131 | New York Knicks (9),37,45,.451,11.0,98.6,99.4,-1.40,2014,NBA
132 | Boston Celtics (12),25,57,.305,23.0,96.2,100.7,-4.97,2014,NBA
133 | Philadelphia 76ers (14),19,63,.232,29.0,99.5,109.9,-10.66,2014,NBA
134 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2014,NBA
135 | Indiana Pacers* (1),56,26,.683,—,96.7,92.3,3.63,2014,NBA
136 | Chicago Bulls* (4),48,34,.585,8.0,93.7,91.8,1.20,2014,NBA
137 | Cleveland Cavaliers (10),33,49,.402,23.0,98.2,101.5,-3.86,2014,NBA
138 | Detroit Pistons (11),29,53,.354,27.0,101.0,104.7,-4.13,2014,NBA
139 | Milwaukee Bucks (15),15,67,.183,41.0,95.5,103.7,-8.41,2014,NBA
140 | Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,2014,NBA
141 | Miami Heat* (2),54,28,.659,—,102.2,97.4,4.15,2014,NBA
142 | Washington Wizards* (5),44,38,.537,10.0,100.7,99.4,0.48,2014,NBA
143 | Charlotte Bobcats* (7),43,39,.524,11.0,96.9,97.1,-0.89,2014,NBA
144 | Atlanta Hawks* (8),38,44,.463,16.0,101.0,101.5,-0.88,2014,NBA
145 | Orlando Magic (13),23,59,.280,31.0,96.5,102.0,-5.88,2014,NBA
146 | Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,2014,NBA
147 | Oklahoma City Thunder* (2),59,23,.720,—,106.2,99.8,6.66,2014,NBA
148 | Portland Trail Blazers* (4),54,28,.659,5.0,106.7,102.8,4.44,2014,NBA
149 | Minnesota Timberwolves (10),40,42,.488,19.0,106.9,104.3,3.10,2014,NBA
150 | Denver Nuggets (11),36,46,.439,23.0,104.4,106.5,-1.40,2014,NBA
151 | Utah Jazz (15),25,57,.305,34.0,95.0,102.2,-6.27,2014,NBA
152 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2014,NBA
153 | Los Angeles Clippers* (3),57,25,.695,—,107.9,101.0,7.27,2014,NBA
154 | Golden State Warriors* (6),51,31,.622,6.0,104.3,99.5,5.15,2014,NBA
155 | Phoenix Suns (9),48,34,.585,9.0,105.2,102.6,3.02,2014,NBA
156 | Sacramento Kings (13),28,54,.341,29.0,100.5,103.4,-2.08,2014,NBA
157 | Los Angeles Lakers (14),27,55,.329,30.0,103.0,109.2,-5.33,2014,NBA
158 | Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,2014,NBA
159 | San Antonio Spurs* (1),62,20,.756,—,105.4,97.6,8.00,2014,NBA
160 | Houston Rockets* (4),54,28,.659,8.0,107.7,103.1,5.06,2014,NBA
161 | Memphis Grizzlies* (7),50,32,.610,12.0,96.1,94.6,2.18,2014,NBA
162 | Dallas Mavericks* (8),49,33,.598,13.0,104.8,102.4,2.91,2014,NBA
163 | New Orleans Pelicans (12),34,48,.415,28.0,99.7,102.4,-1.98,2014,NBA
164 |
--------------------------------------------------------------------------------
/Assignment_2/assignment2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "deletable": false,
7 | "editable": false,
8 | "nbgrader": {
9 | "checksum": "5a8d839ee00398fa3bd3bc58ec642beb",
10 | "grade": false,
11 | "grade_id": "cell-a839e7b47494b4c3",
12 | "locked": true,
13 | "schema_version": 1,
14 | "solution": false
15 | }
16 | },
17 | "source": [
18 | "# Assignment 2\n",
19 | "For this assignment you'll be looking at 2017 data on immunizations from the CDC. Your datafile for this assignment is in [assets/NISPUF17.csv](assets/NISPUF17.csv). A data users guide for this, which you'll need to map the variables in the data to the questions being asked, is available at [assets/NIS-PUF17-DUG.pdf](assets/NIS-PUF17-DUG.pdf). **Note: you may have to go to your Jupyter tree (click on the Coursera image) and navigate to the assignment 2 assets folder to see this PDF file).**"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {
25 | "deletable": false,
26 | "editable": false,
27 | "hideCode": false,
28 | "hidePrompt": false,
29 | "nbgrader": {
30 | "checksum": "aaa5e730f40ba21c1bc94f864bad4742",
31 | "grade": false,
32 | "grade_id": "cell-58fc2e5938733f6a",
33 | "locked": true,
34 | "schema_version": 1,
35 | "solution": false
36 | }
37 | },
38 | "source": [
39 | "## Question 1\n",
40 | "Write a function called `proportion_of_education` which returns the proportion of children in the dataset who had a mother with the education levels equal to less than high school (<12), high school (12), more than high school but not a college graduate (>12) and college degree.\n",
41 | "\n",
42 | "*This function should return a dictionary in the form of (use the correct numbers, do not round numbers):* \n",
43 | "```\n",
44 | " {\"less than high school\":0.2,\n",
45 | " \"high school\":0.4,\n",
46 | " \"more than high school but not college\":0.2,\n",
47 | " \"college\":0.2}\n",
48 | "```\n"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 1,
54 | "metadata": {
55 | "deletable": false,
56 | "nbgrader": {
57 | "checksum": "0ac58deb3f5ac988c643e903cbee7f3a",
58 | "grade": false,
59 | "grade_id": "cell-eea16d020eb52ae7",
60 | "locked": false,
61 | "schema_version": 1,
62 | "solution": true
63 | }
64 | },
65 | "outputs": [],
66 | "source": [
67 | "def proportion_of_education():\n",
68 | " # your code goes here\n",
69 | " # YOUR CODE HERE\n",
70 | " import pandas as pd\n",
71 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
72 | " n = mb['EDUC1'].value_counts()/mb['EDUC1'].shape[0]\n",
73 | " li = ['less than high school', 'high school', 'more than high school but not college', 'college']\n",
74 | " di = {}\n",
75 | " i = 0\n",
76 | " for l in li:\n",
77 | " di[l] = n[i+1] \n",
78 | " i += 1\n",
79 | " \n",
80 | " return di\n",
81 | " #raise NotImplementedError()"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 2,
87 | "metadata": {
88 | "deletable": false,
89 | "editable": false,
90 | "hideCode": false,
91 | "hidePrompt": false,
92 | "nbgrader": {
93 | "checksum": "ac5d91a24a7f72f66c25d242c3d24a50",
94 | "grade": true,
95 | "grade_id": "cell-c0eeef201366f51c",
96 | "locked": true,
97 | "points": 1,
98 | "schema_version": 1,
99 | "solution": false
100 | }
101 | },
102 | "outputs": [],
103 | "source": [
104 | "assert type(proportion_of_education())==type({}), \"You must return a dictionary.\"\n",
105 | "assert len(proportion_of_education()) == 4, \"You have not returned a dictionary with four items in it.\"\n",
106 | "assert \"less than high school\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n",
107 | "assert \"high school\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n",
108 | "assert \"more than high school but not college\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n",
109 | "assert \"college\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n"
110 | ]
111 | },
112 | {
113 | "cell_type": "markdown",
114 | "metadata": {
115 | "deletable": false,
116 | "editable": false,
117 | "nbgrader": {
118 | "checksum": "562b78b7b9b79580269be0a3bebf4b42",
119 | "grade": false,
120 | "grade_id": "cell-8fcbb64516283f52",
121 | "locked": true,
122 | "schema_version": 1,
123 | "solution": false
124 | }
125 | },
126 | "source": [
127 | "## Question 2\n",
128 | "\n",
129 | "Let's explore the relationship between being fed breastmilk as a child and getting a seasonal influenza vaccine from a healthcare provider. Return a tuple of the average number of influenza vaccines for those children we know received breastmilk as a child and those who know did not.\n",
130 | "\n",
131 | "*This function should return a tuple in the form (use the correct numbers:*\n",
132 | "```\n",
133 | "(2.5, 0.1)\n",
134 | "```"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": 3,
140 | "metadata": {
141 | "deletable": false,
142 | "nbgrader": {
143 | "checksum": "a405d639063c4a6408365479f29c95c9",
144 | "grade": false,
145 | "grade_id": "cell-77f18c512324eabb",
146 | "locked": false,
147 | "schema_version": 1,
148 | "solution": true
149 | }
150 | },
151 | "outputs": [],
152 | "source": [
153 | "def average_influenza_doses():\n",
154 | " # YOUR CODE HERE\n",
155 | " \n",
156 | " import pandas as pd\n",
157 | " import numpy as np\n",
158 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
159 | " gp1 = mb[mb['CBF_01'] == 1]\n",
160 | " gp2 = mb[mb['CBF_01'] == 2]\n",
161 | " \n",
162 | " return np.mean(gp1['P_NUMFLU']), np.mean(gp2['P_NUMFLU'])\n",
163 | " #raise NotImplementedError()"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 4,
169 | "metadata": {
170 | "deletable": false,
171 | "editable": false,
172 | "nbgrader": {
173 | "checksum": "19be955e97fdf7162d43fbb7c2c40951",
174 | "grade": true,
175 | "grade_id": "cell-54a3ba6cff31caa7",
176 | "locked": true,
177 | "points": 1,
178 | "schema_version": 1,
179 | "solution": false
180 | }
181 | },
182 | "outputs": [],
183 | "source": [
184 | "assert len(average_influenza_doses())==2, \"Return two values in a tuple, the first for yes and the second for no.\"\n"
185 | ]
186 | },
187 | {
188 | "cell_type": "markdown",
189 | "metadata": {
190 | "deletable": false,
191 | "editable": false,
192 | "nbgrader": {
193 | "checksum": "e10e2163f5957a0c398ef4f0b76b4efe",
194 | "grade": false,
195 | "grade_id": "cell-f63377f3c97aa7c4",
196 | "locked": true,
197 | "schema_version": 1,
198 | "solution": false
199 | }
200 | },
201 | "source": [
202 | "## Question 3\n",
203 | "It would be interesting to see if there is any evidence of a link between vaccine effectiveness and sex of the child. Calculate the ratio of the number of children who contracted chickenpox but were vaccinated against it (at least one varicella dose) versus those who were vaccinated but did not contract chicken pox. Return results by sex. \n",
204 | "\n",
205 | "*This function should return a dictionary in the form of (use the correct numbers):* \n",
206 | "```\n",
207 | " {\"male\":0.2,\n",
208 | " \"female\":0.4}\n",
209 | "```\n",
210 | "\n",
211 | "Note: To aid in verification, the `chickenpox_by_sex()['female']` value the autograder is looking for starts with the digits `0.0077`."
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 5,
217 | "metadata": {
218 | "deletable": false,
219 | "nbgrader": {
220 | "checksum": "b4d1b58acae002bc73eb0b19f95bc4af",
221 | "grade": false,
222 | "grade_id": "cell-a0a9e6fe67698006",
223 | "locked": false,
224 | "schema_version": 1,
225 | "solution": true
226 | }
227 | },
228 | "outputs": [],
229 | "source": [
230 | "def chickenpox_by_sex():\n",
231 | " # YOUR CODE HERE\n",
232 | " import pandas as pd\n",
233 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
234 | " \n",
235 | " v1m = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 1) & (mb['SEX'] == 1)]\n",
236 | " v1f = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 1) & (mb['SEX'] == 2)]\n",
237 | " v2m = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 2) & (mb['SEX'] == 1)]\n",
238 | " v2f = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 2) & (mb['SEX'] == 2)]\n",
239 | " \n",
240 | " rm = v1m.shape[0]/v2m.shape[0]\n",
241 | " rf = v1f.shape[0]/v2f.shape[0]\n",
242 | " r = [rm, rf]\n",
243 | " \n",
244 | " sex = ['male', 'female']\n",
245 | " d = {}\n",
246 | " i = 0\n",
247 | " for s in sex:\n",
248 | " d[s] = r[i]\n",
249 | " i+=1\n",
250 | " return d \n",
251 | " #raise NotImplementedError()"
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": 6,
257 | "metadata": {
258 | "deletable": false,
259 | "editable": false,
260 | "nbgrader": {
261 | "checksum": "1b6a113a633c55699ae478a3a9ee9c33",
262 | "grade": true,
263 | "grade_id": "cell-c4f1714db100c865",
264 | "locked": true,
265 | "points": 1,
266 | "schema_version": 1,
267 | "solution": false
268 | }
269 | },
270 | "outputs": [],
271 | "source": [
272 | "assert len(chickenpox_by_sex())==2, \"Return a dictionary with two items, the first for males and the second for females.\"\n"
273 | ]
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "metadata": {},
278 | "source": [
279 | "## Question 4\n",
280 | "A correlation is a statistical relationship between two variables. If we wanted to know if vaccines work, we might look at the correlation between the use of the vaccine and whether it results in prevention of the infection or disease [1]. In this question, you are to see if there is a correlation between having had the chicken pox and the number of chickenpox vaccine doses given (varicella).\n",
281 | "\n",
282 | "Some notes on interpreting the answer. The `had_chickenpox_column` is either `1` (for yes) or `2` (for no), and the `num_chickenpox_vaccine_column` is the number of doses a child has been given of the varicella vaccine. A positive correlation (e.g., `corr > 0`) means that an increase in `had_chickenpox_column` (which means more no’s) would also increase the values of `num_chickenpox_vaccine_column` (which means more doses of vaccine). If there is a negative correlation (e.g., `corr < 0`), it indicates that having had chickenpox is related to an increase in the number of vaccine doses.\n",
283 | "\n",
284 | "Also, `pval` is the probability that we observe a correlation between `had_chickenpox_column` and `num_chickenpox_vaccine_column` which is greater than or equal to a particular value occurred by chance. A small `pval` means that the observed correlation is highly unlikely to occur by chance. In this case, `pval` should be very small (will end in `e-18` indicating a very small number).\n",
285 | "\n",
286 | "[1] This isn’t really the full picture, since we are not looking at when the dose was given. It’s possible that children had chickenpox and then their parents went to get them the vaccine. Does this dataset have the data we would need to investigate the timing of the dose?"
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "execution_count": 7,
292 | "metadata": {
293 | "deletable": false,
294 | "nbgrader": {
295 | "checksum": "3e645859949447913cd11d30eb33cb1e",
296 | "grade": false,
297 | "grade_id": "cell-8afff07f564cf79a",
298 | "locked": false,
299 | "schema_version": 1,
300 | "solution": true
301 | }
302 | },
303 | "outputs": [],
304 | "source": [
305 | "def corr_chickenpox():\n",
306 | " import scipy.stats as stats\n",
307 | " import numpy as np\n",
308 | " import pandas as pd\n",
309 | " \n",
310 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
311 | " \n",
312 | " v1 = mb[(mb['P_NUMVRC'] >=0) & (mb['HAD_CPOX'] <= 2)]\n",
313 | " \n",
314 | " no_yes = v1['HAD_CPOX']\n",
315 | " \n",
316 | " est_vaccine = v1['P_NUMVRC']\n",
317 | " \n",
318 | " # this is just an example dataframe\n",
319 | " df=pd.DataFrame({\"had_chickenpox_column\":no_yes,\n",
320 | " \"num_chickenpox_vaccine_column\":est_vaccine})\n",
321 | "\n",
322 | " # here is some stub code to actually run the correlation\n",
323 | " corr, pval=stats.pearsonr(df[\"had_chickenpox_column\"],df[\"num_chickenpox_vaccine_column\"])\n",
324 | " \n",
325 | " # just return the correlation\n",
326 | " return corr\n",
327 | "\n",
328 | " # YOUR CODE HERE\n",
329 | " #raise NotImplementedError()"
330 | ]
331 | },
332 | {
333 | "cell_type": "code",
334 | "execution_count": 8,
335 | "metadata": {
336 | "deletable": false,
337 | "editable": false,
338 | "nbgrader": {
339 | "checksum": "ac50ccb747b99f6bbcc76da017e66528",
340 | "grade": true,
341 | "grade_id": "cell-73408733533a29a5",
342 | "locked": true,
343 | "points": 1,
344 | "schema_version": 1,
345 | "solution": false
346 | }
347 | },
348 | "outputs": [],
349 | "source": [
350 | "assert -1<=corr_chickenpox()<=1, \"You must return a float number between -1.0 and 1.0.\"\n"
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": null,
356 | "metadata": {},
357 | "outputs": [],
358 | "source": []
359 | }
360 | ],
361 | "metadata": {
362 | "anaconda-cloud": {},
363 | "coursera": {
364 | "schema_names": [
365 | "mooc_adswpy_1_v2_assignment2"
366 | ]
367 | },
368 | "hide_code_all_hidden": false,
369 | "kernelspec": {
370 | "display_name": "Python 3",
371 | "language": "python",
372 | "name": "python3"
373 | },
374 | "language_info": {
375 | "codemirror_mode": {
376 | "name": "ipython",
377 | "version": 3
378 | },
379 | "file_extension": ".py",
380 | "mimetype": "text/x-python",
381 | "name": "python",
382 | "nbconvert_exporter": "python",
383 | "pygments_lexer": "ipython3",
384 | "version": "3.7.6"
385 | }
386 | },
387 | "nbformat": 4,
388 | "nbformat_minor": 1
389 | }
390 |
--------------------------------------------------------------------------------
/Assignment_2/.ipynb_checkpoints/assignment2-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "deletable": false,
7 | "editable": false,
8 | "nbgrader": {
9 | "checksum": "5a8d839ee00398fa3bd3bc58ec642beb",
10 | "grade": false,
11 | "grade_id": "cell-a839e7b47494b4c3",
12 | "locked": true,
13 | "schema_version": 1,
14 | "solution": false
15 | }
16 | },
17 | "source": [
18 | "# Assignment 2\n",
19 | "For this assignment you'll be looking at 2017 data on immunizations from the CDC. Your datafile for this assignment is in [assets/NISPUF17.csv](assets/NISPUF17.csv). A data users guide for this, which you'll need to map the variables in the data to the questions being asked, is available at [assets/NIS-PUF17-DUG.pdf](assets/NIS-PUF17-DUG.pdf). **Note: you may have to go to your Jupyter tree (click on the Coursera image) and navigate to the assignment 2 assets folder to see this PDF file).**"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {
25 | "deletable": false,
26 | "editable": false,
27 | "hideCode": false,
28 | "hidePrompt": false,
29 | "nbgrader": {
30 | "checksum": "aaa5e730f40ba21c1bc94f864bad4742",
31 | "grade": false,
32 | "grade_id": "cell-58fc2e5938733f6a",
33 | "locked": true,
34 | "schema_version": 1,
35 | "solution": false
36 | }
37 | },
38 | "source": [
39 | "## Question 1\n",
40 | "Write a function called `proportion_of_education` which returns the proportion of children in the dataset who had a mother with the education levels equal to less than high school (<12), high school (12), more than high school but not a college graduate (>12) and college degree.\n",
41 | "\n",
42 | "*This function should return a dictionary in the form of (use the correct numbers, do not round numbers):* \n",
43 | "```\n",
44 | " {\"less than high school\":0.2,\n",
45 | " \"high school\":0.4,\n",
46 | " \"more than high school but not college\":0.2,\n",
47 | " \"college\":0.2}\n",
48 | "```\n"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 1,
54 | "metadata": {
55 | "deletable": false,
56 | "nbgrader": {
57 | "checksum": "0ac58deb3f5ac988c643e903cbee7f3a",
58 | "grade": false,
59 | "grade_id": "cell-eea16d020eb52ae7",
60 | "locked": false,
61 | "schema_version": 1,
62 | "solution": true
63 | }
64 | },
65 | "outputs": [],
66 | "source": [
67 | "def proportion_of_education():\n",
68 | " # your code goes here\n",
69 | " # YOUR CODE HERE\n",
70 | " import pandas as pd\n",
71 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
72 | " n = mb['EDUC1'].value_counts()/mb['EDUC1'].shape[0]\n",
73 | " li = ['less than high school', 'high school', 'more than high school but not college', 'college']\n",
74 | " di = {}\n",
75 | " i = 0\n",
76 | " for l in li:\n",
77 | " di[l] = n[i+1] \n",
78 | " i += 1\n",
79 | " \n",
80 | " return di\n",
81 | " #raise NotImplementedError()"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 2,
87 | "metadata": {
88 | "deletable": false,
89 | "editable": false,
90 | "hideCode": false,
91 | "hidePrompt": false,
92 | "nbgrader": {
93 | "checksum": "ac5d91a24a7f72f66c25d242c3d24a50",
94 | "grade": true,
95 | "grade_id": "cell-c0eeef201366f51c",
96 | "locked": true,
97 | "points": 1,
98 | "schema_version": 1,
99 | "solution": false
100 | }
101 | },
102 | "outputs": [],
103 | "source": [
104 | "assert type(proportion_of_education())==type({}), \"You must return a dictionary.\"\n",
105 | "assert len(proportion_of_education()) == 4, \"You have not returned a dictionary with four items in it.\"\n",
106 | "assert \"less than high school\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n",
107 | "assert \"high school\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n",
108 | "assert \"more than high school but not college\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n",
109 | "assert \"college\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n"
110 | ]
111 | },
112 | {
113 | "cell_type": "markdown",
114 | "metadata": {
115 | "deletable": false,
116 | "editable": false,
117 | "nbgrader": {
118 | "checksum": "562b78b7b9b79580269be0a3bebf4b42",
119 | "grade": false,
120 | "grade_id": "cell-8fcbb64516283f52",
121 | "locked": true,
122 | "schema_version": 1,
123 | "solution": false
124 | }
125 | },
126 | "source": [
127 | "## Question 2\n",
128 | "\n",
129 | "Let's explore the relationship between being fed breastmilk as a child and getting a seasonal influenza vaccine from a healthcare provider. Return a tuple of the average number of influenza vaccines for those children we know received breastmilk as a child and those who know did not.\n",
130 | "\n",
131 | "*This function should return a tuple in the form (use the correct numbers:*\n",
132 | "```\n",
133 | "(2.5, 0.1)\n",
134 | "```"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": 3,
140 | "metadata": {
141 | "deletable": false,
142 | "nbgrader": {
143 | "checksum": "a405d639063c4a6408365479f29c95c9",
144 | "grade": false,
145 | "grade_id": "cell-77f18c512324eabb",
146 | "locked": false,
147 | "schema_version": 1,
148 | "solution": true
149 | }
150 | },
151 | "outputs": [],
152 | "source": [
153 | "def average_influenza_doses():\n",
154 | " # YOUR CODE HERE\n",
155 | " \n",
156 | " import pandas as pd\n",
157 | " import numpy as np\n",
158 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
159 | " gp1 = mb[mb['CBF_01'] == 1]\n",
160 | " gp2 = mb[mb['CBF_01'] == 2]\n",
161 | " \n",
162 | " return np.mean(gp1['P_NUMFLU']), np.mean(gp2['P_NUMFLU'])\n",
163 | " #raise NotImplementedError()"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 4,
169 | "metadata": {
170 | "deletable": false,
171 | "editable": false,
172 | "nbgrader": {
173 | "checksum": "19be955e97fdf7162d43fbb7c2c40951",
174 | "grade": true,
175 | "grade_id": "cell-54a3ba6cff31caa7",
176 | "locked": true,
177 | "points": 1,
178 | "schema_version": 1,
179 | "solution": false
180 | }
181 | },
182 | "outputs": [],
183 | "source": [
184 | "assert len(average_influenza_doses())==2, \"Return two values in a tuple, the first for yes and the second for no.\"\n"
185 | ]
186 | },
187 | {
188 | "cell_type": "markdown",
189 | "metadata": {
190 | "deletable": false,
191 | "editable": false,
192 | "nbgrader": {
193 | "checksum": "e10e2163f5957a0c398ef4f0b76b4efe",
194 | "grade": false,
195 | "grade_id": "cell-f63377f3c97aa7c4",
196 | "locked": true,
197 | "schema_version": 1,
198 | "solution": false
199 | }
200 | },
201 | "source": [
202 | "## Question 3\n",
203 | "It would be interesting to see if there is any evidence of a link between vaccine effectiveness and sex of the child. Calculate the ratio of the number of children who contracted chickenpox but were vaccinated against it (at least one varicella dose) versus those who were vaccinated but did not contract chicken pox. Return results by sex. \n",
204 | "\n",
205 | "*This function should return a dictionary in the form of (use the correct numbers):* \n",
206 | "```\n",
207 | " {\"male\":0.2,\n",
208 | " \"female\":0.4}\n",
209 | "```\n",
210 | "\n",
211 | "Note: To aid in verification, the `chickenpox_by_sex()['female']` value the autograder is looking for starts with the digits `0.0077`."
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 5,
217 | "metadata": {
218 | "deletable": false,
219 | "nbgrader": {
220 | "checksum": "b4d1b58acae002bc73eb0b19f95bc4af",
221 | "grade": false,
222 | "grade_id": "cell-a0a9e6fe67698006",
223 | "locked": false,
224 | "schema_version": 1,
225 | "solution": true
226 | }
227 | },
228 | "outputs": [],
229 | "source": [
230 | "def chickenpox_by_sex():\n",
231 | " # YOUR CODE HERE\n",
232 | " import pandas as pd\n",
233 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
234 | " \n",
235 | " v1m = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 1) & (mb['SEX'] == 1)]\n",
236 | " v1f = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 1) & (mb['SEX'] == 2)]\n",
237 | " v2m = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 2) & (mb['SEX'] == 1)]\n",
238 | " v2f = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 2) & (mb['SEX'] == 2)]\n",
239 | " \n",
240 | " rm = v1m.shape[0]/v2m.shape[0]\n",
241 | " rf = v1f.shape[0]/v2f.shape[0]\n",
242 | " r = [rm, rf]\n",
243 | " \n",
244 | " sex = ['male', 'female']\n",
245 | " d = {}\n",
246 | " i = 0\n",
247 | " for s in sex:\n",
248 | " d[s] = r[i]\n",
249 | " i+=1\n",
250 | " return d \n",
251 | " #raise NotImplementedError()"
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": 6,
257 | "metadata": {
258 | "deletable": false,
259 | "editable": false,
260 | "nbgrader": {
261 | "checksum": "1b6a113a633c55699ae478a3a9ee9c33",
262 | "grade": true,
263 | "grade_id": "cell-c4f1714db100c865",
264 | "locked": true,
265 | "points": 1,
266 | "schema_version": 1,
267 | "solution": false
268 | }
269 | },
270 | "outputs": [],
271 | "source": [
272 | "assert len(chickenpox_by_sex())==2, \"Return a dictionary with two items, the first for males and the second for females.\"\n"
273 | ]
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "metadata": {},
278 | "source": [
279 | "## Question 4\n",
280 | "A correlation is a statistical relationship between two variables. If we wanted to know if vaccines work, we might look at the correlation between the use of the vaccine and whether it results in prevention of the infection or disease [1]. In this question, you are to see if there is a correlation between having had the chicken pox and the number of chickenpox vaccine doses given (varicella).\n",
281 | "\n",
282 | "Some notes on interpreting the answer. The `had_chickenpox_column` is either `1` (for yes) or `2` (for no), and the `num_chickenpox_vaccine_column` is the number of doses a child has been given of the varicella vaccine. A positive correlation (e.g., `corr > 0`) means that an increase in `had_chickenpox_column` (which means more no’s) would also increase the values of `num_chickenpox_vaccine_column` (which means more doses of vaccine). If there is a negative correlation (e.g., `corr < 0`), it indicates that having had chickenpox is related to an increase in the number of vaccine doses.\n",
283 | "\n",
284 | "Also, `pval` is the probability that we observe a correlation between `had_chickenpox_column` and `num_chickenpox_vaccine_column` which is greater than or equal to a particular value occurred by chance. A small `pval` means that the observed correlation is highly unlikely to occur by chance. In this case, `pval` should be very small (will end in `e-18` indicating a very small number).\n",
285 | "\n",
286 | "[1] This isn’t really the full picture, since we are not looking at when the dose was given. It’s possible that children had chickenpox and then their parents went to get them the vaccine. Does this dataset have the data we would need to investigate the timing of the dose?"
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "execution_count": 7,
292 | "metadata": {
293 | "deletable": false,
294 | "nbgrader": {
295 | "checksum": "3e645859949447913cd11d30eb33cb1e",
296 | "grade": false,
297 | "grade_id": "cell-8afff07f564cf79a",
298 | "locked": false,
299 | "schema_version": 1,
300 | "solution": true
301 | }
302 | },
303 | "outputs": [],
304 | "source": [
305 | "def corr_chickenpox():\n",
306 | " import scipy.stats as stats\n",
307 | " import numpy as np\n",
308 | " import pandas as pd\n",
309 | " \n",
310 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n",
311 | " \n",
312 | " v1 = mb[(mb['P_NUMVRC'] >=0) & (mb['HAD_CPOX'] <= 2)]\n",
313 | " \n",
314 | " no_yes = v1['HAD_CPOX']\n",
315 | " \n",
316 | " est_vaccine = v1['P_NUMVRC']\n",
317 | " \n",
318 | " # this is just an example dataframe\n",
319 | " df=pd.DataFrame({\"had_chickenpox_column\":no_yes,\n",
320 | " \"num_chickenpox_vaccine_column\":est_vaccine})\n",
321 | "\n",
322 | " # here is some stub code to actually run the correlation\n",
323 | " corr, pval=stats.pearsonr(df[\"had_chickenpox_column\"],df[\"num_chickenpox_vaccine_column\"])\n",
324 | " \n",
325 | " # just return the correlation\n",
326 | " return corr\n",
327 | "\n",
328 | " # YOUR CODE HERE\n",
329 | " #raise NotImplementedError()"
330 | ]
331 | },
332 | {
333 | "cell_type": "code",
334 | "execution_count": 8,
335 | "metadata": {
336 | "deletable": false,
337 | "editable": false,
338 | "nbgrader": {
339 | "checksum": "ac50ccb747b99f6bbcc76da017e66528",
340 | "grade": true,
341 | "grade_id": "cell-73408733533a29a5",
342 | "locked": true,
343 | "points": 1,
344 | "schema_version": 1,
345 | "solution": false
346 | }
347 | },
348 | "outputs": [],
349 | "source": [
350 | "assert -1<=corr_chickenpox()<=1, \"You must return a float number between -1.0 and 1.0.\"\n"
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": null,
356 | "metadata": {},
357 | "outputs": [],
358 | "source": []
359 | }
360 | ],
361 | "metadata": {
362 | "anaconda-cloud": {},
363 | "coursera": {
364 | "schema_names": [
365 | "mooc_adswpy_1_v2_assignment2"
366 | ]
367 | },
368 | "hide_code_all_hidden": false,
369 | "kernelspec": {
370 | "display_name": "Python 3",
371 | "language": "python",
372 | "name": "python3"
373 | },
374 | "language_info": {
375 | "codemirror_mode": {
376 | "name": "ipython",
377 | "version": 3
378 | },
379 | "file_extension": ".py",
380 | "mimetype": "text/x-python",
381 | "name": "python",
382 | "nbconvert_exporter": "python",
383 | "pygments_lexer": "ipython3",
384 | "version": "3.7.6"
385 | }
386 | },
387 | "nbformat": 4,
388 | "nbformat_minor": 1
389 | }
390 |
--------------------------------------------------------------------------------
/Assignment_4/assets/nhl.csv:
--------------------------------------------------------------------------------
1 | team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,ROW,year,League
2 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2018,NHL
3 | Tampa Bay Lightning*,82,54,23,5,113,.689,296,236,0.66,-0.07,.634,48,2018,NHL
4 | Boston Bruins*,82,50,20,12,112,.683,270,214,0.62,-0.07,.610,47,2018,NHL
5 | Toronto Maple Leafs*,82,49,26,7,105,.640,277,232,0.49,-0.06,.567,42,2018,NHL
6 | Florida Panthers,82,44,30,8,96,.585,248,246,-0.01,-0.04,.537,41,2018,NHL
7 | Detroit Red Wings,82,30,39,13,73,.445,217,255,-0.48,-0.01,.341,25,2018,NHL
8 | Montreal Canadiens,82,29,40,13,71,.433,209,264,-0.68,0.00,.378,27,2018,NHL
9 | Ottawa Senators,82,28,43,11,67,.409,221,291,-0.85,0.00,.372,26,2018,NHL
10 | Buffalo Sabres,82,25,45,12,62,.378,199,280,-0.98,0.01,.311,24,2018,NHL
11 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2018,NHL
12 | Washington Capitals*,82,49,26,7,105,.640,259,239,0.21,-0.04,.585,46,2018,NHL
13 | Pittsburgh Penguins*,82,47,29,6,100,.610,272,250,0.23,-0.04,.573,45,2018,NHL
14 | Philadelphia Flyers*,82,42,26,14,98,.598,251,243,0.07,-0.03,.543,40,2018,NHL
15 | Columbus Blue Jackets*,82,45,30,7,97,.591,242,230,0.11,-0.04,.537,39,2018,NHL
16 | New Jersey Devils*,82,44,29,9,97,.591,248,244,0.02,-0.03,.530,39,2018,NHL
17 | Carolina Hurricanes,82,36,35,11,83,.506,228,256,-0.35,-0.01,.439,33,2018,NHL
18 | New York Islanders,82,35,37,10,80,.488,264,296,-0.40,-0.01,.427,32,2018,NHL
19 | New York Rangers,82,34,39,9,77,.470,231,268,-0.46,-0.01,.427,31,2018,NHL
20 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2018,NHL
21 | Nashville Predators*,82,53,18,11,117,.713,267,211,0.71,0.03,.652,47,2018,NHL
22 | Winnipeg Jets*,82,52,20,10,114,.695,277,218,0.74,0.02,.622,48,2018,NHL
23 | Minnesota Wild*,82,45,26,11,101,.616,253,232,0.29,0.04,.549,42,2018,NHL
24 | Colorado Avalanche*,82,43,30,9,95,.579,257,237,0.28,0.04,.518,41,2018,NHL
25 | St. Louis Blues,82,44,32,6,94,.573,226,222,0.10,0.05,.518,41,2018,NHL
26 | Dallas Stars,82,42,32,8,92,.561,235,225,0.17,0.04,.506,38,2018,NHL
27 | Chicago Blackhawks,82,33,39,10,76,.463,229,256,-0.26,0.07,.409,32,2018,NHL
28 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2018,NHL
29 | Vegas Golden Knights*,82,51,24,7,109,.665,272,228,0.52,-0.01,.616,47,2018,NHL
30 | Anaheim Ducks*,82,44,25,13,101,.616,235,216,0.24,0.01,.555,40,2018,NHL
31 | San Jose Sharks*,82,45,27,10,100,.610,252,229,0.28,0.00,.537,40,2018,NHL
32 | Los Angeles Kings*,82,45,29,8,98,.598,239,203,0.44,0.00,.543,43,2018,NHL
33 | Calgary Flames,82,37,35,10,84,.512,218,248,-0.33,0.03,.470,35,2018,NHL
34 | Edmonton Oilers,82,36,40,6,78,.476,234,263,-0.32,0.03,.415,31,2018,NHL
35 | Vancouver Canucks,82,31,40,11,73,.445,218,264,-0.51,0.05,.409,31,2018,NHL
36 | Arizona Coyotes,82,29,41,12,70,.427,208,256,-0.53,0.05,.372,27,2018,NHL
37 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2017,NHL
38 | Montreal Canadiens*,82,47,26,9,103,.628,226,200,0.31,-0.01,.567,44,2017,NHL
39 | Ottawa Senators*,82,44,28,10,98,.598,212,214,-0.01,0.01,.524,38,2017,NHL
40 | Boston Bruins*,82,44,31,7,95,.579,234,212,0.27,0.00,.543,42,2017,NHL
41 | Toronto Maple Leafs*,82,40,27,15,95,.579,251,242,0.11,0.00,.530,39,2017,NHL
42 | Tampa Bay Lightning,82,42,30,10,94,.573,234,227,0.09,0.01,.506,38,2017,NHL
43 | Florida Panthers,82,35,36,11,81,.494,210,237,-0.30,0.03,.433,30,2017,NHL
44 | Detroit Red Wings,82,33,36,13,79,.482,207,244,-0.41,0.04,.348,24,2017,NHL
45 | Buffalo Sabres,82,33,37,12,78,.476,201,237,-0.41,0.03,.427,31,2017,NHL
46 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2017,NHL
47 | Washington Capitals*,82,55,19,8,118,.720,263,182,0.99,0.00,.689,53,2017,NHL
48 | Pittsburgh Penguins*,82,50,21,11,111,.677,282,234,0.59,0.01,.616,46,2017,NHL
49 | Columbus Blue Jackets*,82,50,24,8,108,.659,249,195,0.68,0.02,.610,48,2017,NHL
50 | New York Rangers*,82,48,28,6,102,.622,256,220,0.47,0.03,.591,45,2017,NHL
51 | New York Islanders,82,41,29,12,94,.573,241,242,0.03,0.05,.512,39,2017,NHL
52 | Philadelphia Flyers,82,39,33,10,88,.537,219,236,-0.17,0.04,.463,32,2017,NHL
53 | Carolina Hurricanes,82,36,31,15,87,.530,215,236,-0.21,0.05,.457,33,2017,NHL
54 | New Jersey Devils,82,28,40,14,70,.427,183,244,-0.67,0.08,.341,25,2017,NHL
55 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2017,NHL
56 | Chicago Blackhawks*,82,50,23,9,109,.665,244,213,0.32,-0.06,.591,46,2017,NHL
57 | Minnesota Wild*,82,49,25,8,106,.646,266,208,0.63,-0.08,.591,46,2017,NHL
58 | St. Louis Blues*,82,46,29,7,99,.604,235,218,0.17,-0.04,.561,44,2017,NHL
59 | Nashville Predators*,82,41,29,12,94,.573,240,224,0.16,-0.04,.512,39,2017,NHL
60 | Winnipeg Jets,82,40,35,7,87,.530,249,256,-0.11,-0.03,.476,37,2017,NHL
61 | Dallas Stars,82,34,37,11,79,.482,223,262,-0.48,-0.01,.421,33,2017,NHL
62 | Colorado Avalanche,82,22,56,4,48,.293,166,278,-1.32,0.05,.274,21,2017,NHL
63 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2017,NHL
64 | Anaheim Ducks*,82,46,23,13,105,.640,223,200,0.24,-0.04,.561,43,2017,NHL
65 | Edmonton Oilers*,82,47,26,9,103,.628,247,212,0.37,-0.06,.579,43,2017,NHL
66 | San Jose Sharks*,82,46,29,7,99,.604,221,201,0.21,-0.03,.555,44,2017,NHL
67 | Calgary Flames*,82,45,33,4,94,.573,226,221,0.02,-0.04,.537,41,2017,NHL
68 | Los Angeles Kings,82,39,35,8,86,.524,201,205,-0.07,-0.02,.488,37,2017,NHL
69 | Arizona Coyotes,82,30,42,10,70,.427,197,260,-0.76,0.01,.341,24,2017,NHL
70 | Vancouver Canucks,82,30,43,9,69,.421,182,243,-0.73,0.01,.354,26,2017,NHL
71 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2017,NHL
72 | Florida Panthers*,82,47,26,9,103,.628,239,203,0.42,-0.02,.549,40,2017,NHL
73 | Tampa Bay Lightning*,82,46,31,5,97,.591,227,201,0.30,-0.02,.561,43,2017,NHL
74 | Detroit Red Wings*,82,41,30,11,93,.567,211,224,-0.16,0.00,.518,39,2017,NHL
75 | Boston Bruins,82,42,31,9,93,.567,240,230,0.11,-0.02,.500,38,2017,NHL
76 | Ottawa Senators,82,38,35,9,85,.518,236,247,-0.13,0.01,.463,32,2017,NHL
77 | Montreal Canadiens,82,38,38,6,82,.500,221,236,-0.18,0.00,.451,33,2017,NHL
78 | Buffalo Sabres,82,35,36,11,81,.494,201,222,-0.25,0.01,.457,33,2017,NHL
79 | Toronto Maple Leafs,82,29,42,11,69,.421,198,246,-0.56,0.03,.354,23,2017,NHL
80 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2017,NHL
81 | Washington Capitals*,82,56,18,8,120,.732,252,193,0.70,-0.02,.671,52,2017,NHL
82 | Pittsburgh Penguins*,82,48,26,8,104,.634,245,203,0.50,-0.01,.585,44,2017,NHL
83 | New York Rangers*,82,46,27,9,101,.616,236,217,0.24,0.01,.555,43,2017,NHL
84 | New York Islanders*,82,45,27,10,100,.610,232,216,0.19,0.00,.549,40,2017,NHL
85 | Philadelphia Flyers*,82,41,27,14,96,.585,214,218,-0.03,0.02,.530,38,2017,NHL
86 | Carolina Hurricanes,82,35,31,16,86,.524,198,226,-0.31,0.03,.445,33,2017,NHL
87 | New Jersey Devils,82,38,36,8,84,.512,184,208,-0.28,0.02,.488,36,2017,NHL
88 | Columbus Blue Jackets,82,34,40,8,76,.463,219,252,-0.38,0.03,.402,28,2017,NHL
89 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2017,NHL
90 | Dallas Stars*,82,50,23,9,109,.665,267,230,0.45,-0.01,.610,48,2017,NHL
91 | St. Louis Blues*,82,49,24,9,107,.652,224,201,0.28,0.00,.591,44,2017,NHL
92 | Chicago Blackhawks*,82,47,26,9,103,.628,235,209,0.32,0.00,.579,46,2017,NHL
93 | Nashville Predators*,82,41,27,14,96,.585,228,215,0.17,0.01,.488,37,2017,NHL
94 | Minnesota Wild*,82,38,33,11,87,.530,216,206,0.13,0.01,.457,35,2017,NHL
95 | Colorado Avalanche,82,39,39,4,82,.500,216,240,-0.26,0.03,.451,35,2017,NHL
96 | Winnipeg Jets,82,35,39,8,78,.476,215,239,-0.26,0.03,.427,32,2017,NHL
97 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2017,NHL
98 | Anaheim Ducks*,82,46,25,11,103,.628,218,192,0.27,-0.05,.567,43,2017,NHL
99 | Los Angeles Kings*,82,48,28,6,102,.622,225,195,0.32,-0.04,.591,46,2017,NHL
100 | San Jose Sharks*,82,46,30,6,98,.598,241,210,0.33,-0.05,.555,42,2017,NHL
101 | Arizona Coyotes,82,35,39,8,78,.476,209,245,-0.43,0.01,.427,34,2017,NHL
102 | Calgary Flames,82,35,40,7,77,.470,231,260,-0.36,-0.01,.433,33,2017,NHL
103 | Vancouver Canucks,82,31,38,13,75,.457,191,243,-0.62,0.02,.372,26,2017,NHL
104 | Edmonton Oilers,82,31,43,8,70,.427,203,245,-0.51,0.00,.372,27,2017,NHL
105 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2015,NHL
106 | Montreal Canadiens*,82,50,22,10,110,.671,221,189,0.36,-0.03,.598,43,2015,NHL
107 | Tampa Bay Lightning*,82,50,24,8,108,.659,262,211,0.57,-0.06,.622,47,2015,NHL
108 | Detroit Red Wings*,82,43,25,14,100,.610,235,221,0.14,-0.03,.561,39,2015,NHL
109 | Ottawa Senators*,82,43,26,13,99,.604,238,215,0.25,-0.04,.530,37,2015,NHL
110 | Boston Bruins,82,41,27,14,96,.585,213,211,0.01,-0.01,.537,37,2015,NHL
111 | Florida Panthers,82,38,29,15,91,.555,206,223,-0.23,-0.02,.476,30,2015,NHL
112 | Toronto Maple Leafs,82,30,44,8,68,.415,211,262,-0.61,0.02,.366,25,2015,NHL
113 | Buffalo Sabres,82,23,51,8,54,.329,161,274,-1.33,0.05,.262,15,2015,NHL
114 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2015,NHL
115 | New York Rangers*,82,53,22,7,113,.689,252,192,0.69,-0.04,.652,49,2015,NHL
116 | Washington Capitals*,82,45,26,11,101,.616,242,203,0.44,-0.03,.543,40,2015,NHL
117 | New York Islanders*,82,47,28,7,101,.616,252,230,0.26,-0.01,.567,40,2015,NHL
118 | Pittsburgh Penguins*,82,43,27,12,98,.598,221,210,0.12,-0.01,.537,39,2015,NHL
119 | Columbus Blue Jackets,82,42,35,5,89,.543,236,250,-0.16,0.01,.470,33,2015,NHL
120 | Philadelphia Flyers,82,33,31,18,84,.512,215,234,-0.23,0.00,.451,30,2015,NHL
121 | New Jersey Devils,82,32,36,14,78,.476,181,216,-0.40,0.02,.402,27,2015,NHL
122 | Carolina Hurricanes,82,30,41,11,71,.433,188,226,-0.44,0.02,.378,25,2015,NHL
123 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2015,NHL
124 | St. Louis Blues*,82,51,24,7,109,.665,248,201,0.59,0.02,.591,42,2015,NHL
125 | Nashville Predators*,82,47,25,10,104,.634,232,208,0.33,0.03,.573,41,2015,NHL
126 | Chicago Blackhawks*,82,48,28,6,102,.622,229,189,0.51,0.02,.549,39,2015,NHL
127 | Minnesota Wild*,82,46,28,8,100,.610,231,201,0.39,0.03,.555,42,2015,NHL
128 | Winnipeg Jets*,82,43,26,13,99,.604,230,210,0.29,0.04,.518,36,2015,NHL
129 | Dallas Stars,82,41,31,10,92,.561,261,260,0.06,0.05,.494,37,2015,NHL
130 | Colorado Avalanche,82,39,31,12,90,.549,219,227,-0.04,0.06,.439,29,2015,NHL
131 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2015,NHL
132 | Anaheim Ducks*,82,51,24,7,109,.665,236,226,0.10,-0.03,.604,43,2015,NHL
133 | Vancouver Canucks*,82,48,29,5,101,.616,242,222,0.20,-0.05,.561,42,2015,NHL
134 | Calgary Flames*,82,45,30,7,97,.591,241,216,0.26,-0.05,.543,41,2015,NHL
135 | Los Angeles Kings,82,40,27,15,95,.579,220,205,0.16,-0.03,.524,38,2015,NHL
136 | San Jose Sharks,82,40,33,9,89,.543,228,232,-0.08,-0.03,.500,36,2015,NHL
137 | Edmonton Oilers,82,24,44,14,62,.378,198,283,-1.01,0.03,.305,19,2015,NHL
138 | Arizona Coyotes,82,24,50,8,56,.341,170,272,-1.20,0.04,.293,19,2015,NHL
139 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2014,NHL
140 | Boston Bruins*,82,54,19,9,117,.713,261,177,0.92,-0.11,.677,51,2014,NHL
141 | Tampa Bay Lightning*,82,46,27,9,101,.616,240,215,0.25,-0.06,.549,38,2014,NHL
142 | Montreal Canadiens*,82,46,28,8,100,.610,215,204,0.08,-0.05,.543,40,2014,NHL
143 | Detroit Red Wings*,82,39,28,15,93,.567,222,230,-0.14,-0.04,.500,34,2014,NHL
144 | Ottawa Senators,82,37,31,14,88,.537,236,265,-0.37,-0.02,.451,30,2014,NHL
145 | Toronto Maple Leafs,82,38,36,8,84,.512,231,256,-0.34,-0.04,.433,29,2014,NHL
146 | Florida Panthers,82,29,45,8,66,.402,196,268,-0.87,0.01,.341,21,2014,NHL
147 | Buffalo Sabres,82,21,51,10,52,.317,157,248,-1.09,0.02,.244,14,2014,NHL
148 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2014,NHL
149 | Pittsburgh Penguins*,82,51,24,7,109,.665,249,207,0.47,-0.04,.598,44,2014,NHL
150 | New York Rangers*,82,45,31,6,96,.585,218,193,0.26,-0.04,.543,41,2014,NHL
151 | Philadelphia Flyers*,82,42,30,10,94,.573,236,235,-0.01,-0.02,.543,39,2014,NHL
152 | Columbus Blue Jackets*,82,43,32,7,93,.567,231,216,0.16,-0.03,.506,38,2014,NHL
153 | Washington Capitals,82,38,30,14,90,.549,235,240,-0.08,-0.02,.470,28,2014,NHL
154 | New Jersey Devils,82,35,29,18,88,.537,197,208,-0.15,-0.02,.506,35,2014,NHL
155 | Carolina Hurricanes,82,36,35,11,83,.506,207,230,-0.29,-0.01,.451,34,2014,NHL
156 | New York Islanders,82,34,37,11,79,.482,225,267,-0.51,0.00,.396,25,2014,NHL
157 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2014,NHL
158 | Colorado Avalanche*,82,52,22,8,112,.683,250,220,0.40,0.04,.628,47,2014,NHL
159 | St. Louis Blues*,82,52,23,7,111,.677,248,191,0.71,0.01,.598,43,2014,NHL
160 | Chicago Blackhawks*,82,46,21,15,107,.652,267,220,0.60,0.02,.573,40,2014,NHL
161 | Minnesota Wild*,82,43,27,12,98,.598,207,206,0.07,0.05,.524,35,2014,NHL
162 | Dallas Stars*,82,40,31,11,91,.555,235,228,0.14,0.05,.494,36,2014,NHL
163 | Nashville Predators,82,38,32,12,88,.537,216,242,-0.25,0.07,.506,36,2014,NHL
164 | Winnipeg Jets,82,37,35,10,84,.512,227,237,-0.07,0.06,.439,29,2014,NHL
165 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2014,NHL
166 | Anaheim Ducks*,82,54,20,8,116,.707,266,209,0.68,-0.01,.677,51,2014,NHL
167 | San Jose Sharks*,82,51,22,9,111,.677,249,200,0.60,0.00,.604,41,2014,NHL
168 | Los Angeles Kings*,82,46,28,8,100,.610,206,174,0.40,0.01,.549,38,2014,NHL
169 | Phoenix Coyotes,82,37,30,15,89,.543,216,231,-0.16,0.03,.457,31,2014,NHL
170 | Vancouver Canucks,82,36,35,11,83,.506,196,223,-0.30,0.03,.451,31,2014,NHL
171 | Calgary Flames,82,35,40,7,77,.470,209,241,-0.35,0.04,.402,28,2014,NHL
172 | Edmonton Oilers,82,29,44,9,67,.409,203,270,-0.75,0.07,.348,25,2014,NHL
173 |
--------------------------------------------------------------------------------
/Assignment_4/assets/nfl.csv:
--------------------------------------------------------------------------------
1 | DSRS,L,League,MoV,OSRS,PA,PD,PF,SRS,SoS,T,W,W-L%,team,year
2 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,2018
3 | 2.1,5,NFL,6.9,3.1,325,111,436,5.2,-1.8,0,11,.688,New England Patriots*,2018
4 | -5.2,9,NFL,-7.1,-3.6,433,-114,319,-8.8,-1.7,0,7,.438,Miami Dolphins,2018
5 | -0.6,10,NFL,-6.6,-6.3,374,-105,269,-6.9,-0.3,0,6,.375,Buffalo Bills,2018
6 | -5.9,12,NFL,-6.8,-2.0,441,-108,333,-7.8,-1.1,0,4,.250,New York Jets,2018
7 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,2018
8 | 6.4,6,NFL,6.4,0.6,287,102,389,7.0,0.6,0,10,.625,Baltimore Ravens*,2018
9 | 1.7,6,NFL,4.3,3.9,360,68,428,5.6,1.3,1,9,.594,Pittsburgh Steelers,2018
10 | 0.6,8,NFL,-2.1,-1.0,392,-33,359,-0.3,1.7,1,7,.469,Cleveland Browns,2018
11 | -3.4,10,NFL,-5.4,0.0,455,-87,368,-3.4,2.0,0,6,.375,Cincinnati Bengals,2018
12 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,2018
13 | 1.4,5,NFL,5.4,2.4,316,86,402,3.8,-1.5,0,11,.688,Houston Texans*,2018
14 | -0.6,6,NFL,5.6,3.9,344,89,433,3.4,-2.2,0,10,.625,Indianapolis Colts+,2018
15 | 3.5,7,NFL,0.4,-3.2,303,7,310,0.2,-0.2,0,9,.563,Tennessee Titans,2018
16 | 4.0,11,NFL,-4.4,-8.1,316,-71,245,-4.0,0.4,0,5,.313,Jacksonville Jaguars,2018
17 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,2018
18 | -3.8,4,NFL,9.0,12.6,421,144,565,8.9,-0.1,0,12,.750,Kansas City Chiefs*,2018
19 | 2.9,4,NFL,6.2,3.0,329,99,428,6.0,-0.2,0,12,.750,Los Angeles Chargers+,2018
20 | 3.1,10,NFL,-1.3,-3.6,349,-20,329,-0.5,0.7,0,6,.375,Denver Broncos,2018
21 | -4.1,12,NFL,-11.1,-5.2,467,-177,290,-9.3,1.8,0,4,.250,Oakland Raiders,2018
22 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,2018
23 | 2.9,6,NFL,0.9,-1.9,324,15,339,1.1,0.2,0,10,.625,Dallas Cowboys*,2018
24 | 1.8,7,NFL,1.2,0.0,348,19,367,1.7,0.5,0,9,.563,Philadelphia Eagles+,2018
25 | 0.6,9,NFL,-4.9,-5.6,359,-78,281,-4.9,-0.1,0,7,.438,Washington Redskins,2018
26 | -2.9,11,NFL,-2.7,0.8,412,-43,369,-2.2,0.5,0,5,.313,New York Giants,2018
27 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,2018
28 | 4.8,4,NFL,8.6,1.5,283,138,421,6.3,-2.3,0,12,.750,Chicago Bears*,2018
29 | 1.8,7,NFL,1.2,-1.2,341,19,360,0.6,-0.6,1,8,.531,Minnesota Vikings,2018
30 | -2.7,9,NFL,-1.5,0.0,400,-24,376,-2.7,-1.2,1,6,.406,Green Bay Packers,2018
31 | 0.3,10,NFL,-2.3,-3.3,360,-36,324,-3.0,-0.8,0,6,.375,Detroit Lions,2018
32 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,2018
33 | 2.2,3,NFL,9.4,7.9,353,151,504,10.1,0.6,0,13,.813,New Orleans Saints*,2018
34 | 0.8,9,NFL,-0.4,0.1,382,-6,376,0.9,1.3,0,7,.438,Carolina Panthers,2018
35 | -2.6,9,NFL,-0.6,2.5,423,-9,414,-0.1,0.4,0,7,.438,Atlanta Falcons,2018
36 | -4.6,11,NFL,-4.3,2.0,464,-68,396,-2.6,1.7,0,5,.313,Tampa Bay Buccaneers,2018
37 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,2018
38 | -1.1,3,NFL,8.9,9.5,384,143,527,8.5,-0.4,0,13,.813,Los Angeles Rams*,2018
39 | 1.5,6,NFL,5.1,3.0,347,81,428,4.5,-0.6,0,10,.625,Seattle Seahawks+,2018
40 | -3.1,12,NFL,-5.8,-2.5,435,-93,342,-5.5,0.3,0,4,.250,San Francisco 49ers,2018
41 | -1.9,13,NFL,-12.5,-9.6,425,-200,225,-11.5,1.0,0,3,.188,Arizona Cardinals,2018
42 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,,AFC East,AFC East,AFC East,2017
43 | 2.6,3,NFL,10.1,6.3,296,162,458,8.9,-1.2,,13,.813,New England Patriots*,2017
44 | -1.0,7,NFL,-3.6,-3.0,359,-57,302,-4.0,-0.5,,9,.563,Buffalo Bills+,2017
45 | -2.4,10,NFL,-7.0,-3.9,393,-112,281,-6.3,0.7,,6,.375,Miami Dolphins,2017
46 | -2.1,11,NFL,-5.3,-2.9,382,-84,298,-4.9,0.3,,5,.313,New York Jets,2017
47 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,,AFC North,AFC North,AFC North,2017
48 | 1.8,3,NFL,6.1,3.2,308,98,406,5.0,-1.1,,13,.813,Pittsburgh Steelers*,2017
49 | 1.2,7,NFL,5.8,2.2,303,92,395,3.4,-2.4,,9,.563,Baltimore Ravens,2017
50 | -0.9,9,NFL,-3.7,-4.1,349,-59,290,-5.0,-1.3,,7,.438,Cincinnati Bengals,2017
51 | -4.1,16,NFL,-11.0,-6.8,410,-176,234,-11.0,0.0,,0,.000,Cleveland Browns,2017
52 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,,AFC South,AFC South,AFC South,2017
53 | 3.6,6,NFL,9.3,3.0,268,149,417,6.5,-2.8,,10,.625,Jacksonville Jaguars*,2017
54 | -1.5,7,NFL,-1.4,-2.0,356,-22,334,-3.5,-2.1,,9,.563,Tennessee Titans+,2017
55 | -5.6,12,NFL,-6.1,-0.8,436,-98,338,-6.4,-0.3,,4,.250,Houston Texans,2017
56 | -4.0,12,NFL,-8.8,-6.1,404,-141,263,-10.1,-1.3,,4,.250,Indianapolis Colts,2017
57 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,,AFC West,AFC West,AFC West,2017
58 | -0.3,6,NFL,4.8,3.8,339,76,415,3.4,-1.3,,10,.625,Kansas City Chiefs*,2017
59 | 4.0,7,NFL,5.2,-0.3,272,83,355,3.6,-1.5,,9,.563,Los Angeles Chargers,2017
60 | -1.8,10,NFL,-4.5,-3.0,373,-72,301,-4.7,-0.2,,6,.375,Oakland Raiders,2017
61 | -2.9,11,NFL,-5.8,-3.9,382,-93,289,-6.7,-0.9,,5,.313,Denver Broncos,2017
62 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,,NFC East,NFC East,NFC East,2017
63 | 2.5,3,NFL,10.1,7.0,295,162,457,9.4,-0.7,,13,.813,Philadelphia Eagles*,2017
64 | 1.2,7,NFL,1.4,0.4,332,22,354,1.6,0.2,,9,.563,Dallas Cowboys,2017
65 | -1.8,9,NFL,-2.9,0.5,388,-46,342,-1.3,1.6,,7,.438,Washington Redskins,2017
66 | -1.2,13,NFL,-8.9,-6.4,388,-142,246,-7.6,1.3,,3,.188,New York Giants,2017
67 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,,NFC North,NFC North,NFC North,2017
68 | 6.8,3,NFL,8.1,2.3,252,130,382,9.1,1.0,,13,.813,Minnesota Vikings*,2017
69 | -2.5,7,NFL,2.1,5.2,376,34,410,2.7,0.6,,9,.563,Detroit Lions,2017
70 | -1.6,9,NFL,-4.0,-0.3,384,-64,320,-1.9,2.1,,7,.438,Green Bay Packers,2017
71 | 3.3,11,NFL,-3.5,-4.6,320,-56,264,-1.3,2.2,,5,.313,Chicago Bears,2017
72 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,,NFC South,NFC South,NFC South,2017
73 | 2.2,5,NFL,7.6,7.0,326,122,448,9.2,1.5,,11,.688,New Orleans Saints*,2017
74 | 2.7,5,NFL,2.3,1.7,327,36,363,4.3,2.1,,11,.688,Carolina Panthers+,2017
75 | 3.2,6,NFL,2.4,1.1,315,38,353,4.3,1.9,,10,.625,Atlanta Falcons+,2017
76 | -1.7,11,NFL,-2.9,0.4,382,-47,335,-1.3,1.7,,5,.313,Tampa Bay Buccaneers,2017
77 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,,NFC West,NFC West,NFC West,2017
78 | 1.0,5,NFL,9.3,8.2,329,149,478,9.2,-0.2,,11,.688,Los Angeles Rams*,2017
79 | 1.2,7,NFL,2.1,0.7,332,34,366,1.9,-0.2,,9,.563,Seattle Seahawks,2017
80 | 0.2,8,NFL,-4.1,-4.0,361,-66,295,-3.7,0.4,,8,.500,Arizona Cardinals,2017
81 | -2.1,10,NFL,-3.3,-0.8,383,-52,331,-2.9,0.4,,6,.375,San Francisco 49ers,2017
82 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,2016
83 | 5.0,2,NFL,11.9,4.3,250,191,441,9.3,-2.7,0,14,.875,New England Patriots*,2016
84 | -1.8,6,NFL,-1.1,-0.6,380,-17,363,-2.4,-1.3,0,10,.625,Miami Dolphins+,2016
85 | -2.2,9,NFL,1.3,1.8,378,21,399,-0.3,-1.6,0,7,.438,Buffalo Bills,2016
86 | -3.0,11,NFL,-8.4,-5.5,409,-134,275,-8.5,-0.1,0,5,.313,New York Jets,2016
87 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,2016
88 | 2.0,5,NFL,4.5,2.8,327,72,399,4.7,0.2,0,11,.688,Pittsburgh Steelers*,2016
89 | 2.6,8,NFL,1.4,-1.1,321,22,343,1.5,0.2,0,8,.500,Baltimore Ravens,2016
90 | 2.5,9,NFL,0.6,-1.5,315,10,325,1.0,0.4,1,6,.406,Cincinnati Bengals,2016
91 | -4.9,15,NFL,-11.8,-5.2,452,-188,264,-10.1,1.7,0,1,.063,Cleveland Browns,2016
92 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,2016
93 | 2.7,7,NFL,-3.1,-5.3,328,-49,279,-2.6,0.4,0,9,.563,Houston Texans*,2016
94 | -1.7,7,NFL,0.2,0.7,378,3,381,-1.0,-1.2,0,9,.563,Tennessee Titans,2016
95 | -2.7,8,NFL,1.2,3.1,392,19,411,0.4,-0.8,0,8,.500,Indianapolis Colts,2016
96 | -2.3,13,NFL,-5.1,-2.7,400,-82,318,-5.0,0.2,0,3,.188,Jacksonville Jaguars,2016
97 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,2016
98 | 4.4,4,NFL,4.9,1.2,311,78,389,5.6,0.7,0,12,.750,Kansas City Chiefs*,2016
99 | -0.3,4,NFL,1.9,3.5,385,31,416,3.3,1.3,0,12,.750,Oakland Raiders+,2016
100 | 6.1,7,NFL,2.3,-2.0,297,36,333,4.0,1.8,0,9,.563,Denver Broncos,2016
101 | -3.0,11,NFL,-0.8,3.0,423,-13,410,0.1,0.9,0,5,.313,San Diego Chargers,2016
102 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,2016
103 | 2.9,3,NFL,7.2,4.1,306,115,421,7.0,-0.2,0,13,.813,Dallas Cowboys*,2016
104 | 5.4,5,NFL,1.6,-3.2,284,26,310,2.1,0.5,0,11,.688,New York Giants+,2016
105 | -1.3,7,NFL,0.8,3.3,383,13,396,2.0,1.2,1,8,.531,Washington Redskins,2016
106 | 2.5,9,NFL,2.3,1.3,331,36,367,3.8,1.6,0,7,.438,Philadelphia Eagles,2016
107 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,2016
108 | -2.0,6,NFL,2.8,4.9,388,44,432,2.8,0.1,0,10,.625,Green Bay Packers*,2016
109 | -0.1,7,NFL,-0.8,-1.3,358,-12,346,-1.4,-0.6,0,9,.563,Detroit Lions+,2016
110 | 3.6,8,NFL,1.3,-2.6,307,20,327,0.9,-0.3,0,8,.500,Minnesota Vikings,2016
111 | -2.3,13,NFL,-7.5,-5.2,399,-120,279,-7.5,0.0,0,3,.188,Chicago Bears,2016
112 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,2016
113 | -2.0,5,NFL,8.4,10.5,406,134,540,8.5,0.1,0,11,.688,Atlanta Falcons*,2016
114 | 1.3,7,NFL,-0.9,-1.5,369,-15,354,-0.2,0.7,0,9,.563,Tampa Bay Buccaneers,2016
115 | -5.3,9,NFL,0.9,6.8,454,15,469,1.5,0.6,0,7,.438,New Orleans Saints,2016
116 | -0.8,10,NFL,-2.1,-0.2,402,-33,369,-1.0,1.1,0,6,.375,Carolina Panthers,2016
117 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,2016
118 | 4.5,5,NFL,3.9,-2.4,292,62,354,2.1,-1.7,1,10,.656,Seattle Seahawks*,2016
119 | -0.8,8,NFL,3.5,2.4,362,56,418,1.6,-1.9,1,7,.469,Arizona Cardinals,2016
120 | -1.6,12,NFL,-10.6,-9.5,394,-170,224,-11.1,-0.5,0,4,.250,Los Angeles Rams,2016
121 | -7.5,14,NFL,-10.7,-3.7,480,-171,309,-11.2,-0.5,0,2,.125,San Francisco 49ers,2016
122 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,,AFC East,AFC East,AFC East,2015
123 | 1.7,4,NFL,9.4,5.3,315,150,465,7.0,-2.4,,12,.750,New England Patriots*,2015
124 | 2.0,6,NFL,4.6,-0.5,314,73,387,1.5,-3.0,,10,.625,New York Jets,2015
125 | -0.2,8,NFL,1.3,0.3,359,20,379,0.0,-1.2,,8,.500,Buffalo Bills,2015
126 | -2.2,10,NFL,-4.9,-4.7,389,-79,310,-6.8,-1.9,,6,.375,Miami Dolphins,2015
127 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,,AFC North,AFC North,AFC North,2015
128 | 5.8,4,NFL,8.8,4.8,279,140,419,10.6,1.9,,12,.750,Cincinnati Bengals*,2015
129 | 3.6,6,NFL,6.5,5.1,319,104,423,8.7,2.2,,10,.625,Pittsburgh Steelers+,2015
130 | -1.2,11,NFL,-4.6,-0.7,401,-73,328,-1.9,2.6,,5,.313,Baltimore Ravens,2015
131 | -2.9,13,NFL,-9.6,-3.2,432,-154,278,-6.1,3.5,,3,.188,Cleveland Browns,2015
132 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,,AFC South,AFC South,AFC South,2015
133 | 2.6,7,NFL,1.6,-3.3,313,26,339,-0.8,-2.4,,9,.563,Houston Texans*,2015
134 | -3.6,8,NFL,-4.7,-3.1,408,-75,333,-6.7,-2.0,,8,.500,Indianapolis Colts,2015
135 | -6.9,11,NFL,-4.5,-0.7,448,-72,376,-7.5,-3.0,,5,.313,Jacksonville Jaguars,2015
136 | -4.6,13,NFL,-7.8,-5.9,423,-124,299,-10.5,-2.8,,3,.188,Tennessee Titans,2015
137 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,,AFC West,AFC West,AFC West,2015
138 | 5.5,4,NFL,3.7,0.3,296,59,355,5.8,2.1,,12,.750,Denver Broncos*,2015
139 | 5.3,5,NFL,7.4,3.7,287,118,405,9.0,1.6,,11,.688,Kansas City Chiefs+,2015
140 | -1.6,9,NFL,-2.5,1.4,399,-40,359,-0.2,2.3,,7,.438,Oakland Raiders,2015
141 | -1.1,12,NFL,-4.9,-1.5,398,-78,320,-2.6,2.2,,4,.250,San Diego Chargers,2015
142 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,,NFC East,NFC East,NFC East,2015
143 | -1.8,7,NFL,0.6,-0.2,379,9,388,-1.9,-2.5,,9,.563,Washington Redskins*,2015
144 | -4.0,9,NFL,-3.3,-0.6,430,-53,377,-4.6,-1.3,,7,.438,Philadelphia Eagles,2015
145 | -6.1,10,NFL,-1.4,2.5,442,-22,420,-3.6,-2.2,,6,.375,New York Giants,2015
146 | 0.1,12,NFL,-6.2,-7.0,374,-99,275,-6.9,-0.7,,4,.250,Dallas Cowboys,2015
147 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,,NFC North,NFC North,NFC North,2015
148 | 4.7,5,NFL,3.9,1.1,302,63,365,5.8,1.9,,11,.688,Minnesota Vikings*,2015
149 | 3.3,6,NFL,2.8,2.0,323,45,368,5.3,2.5,,10,.625,Green Bay Packers+,2015
150 | -1.3,9,NFL,-2.6,1.0,400,-42,358,-0.2,2.4,,7,.438,Detroit Lions,2015
151 | -1.2,10,NFL,-3.9,-0.1,397,-62,335,-1.3,2.6,,6,.375,Chicago Bears,2015
152 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,,NFC South,NFC South,NFC South,2015
153 | 2.1,1,NFL,12.0,6.0,308,192,500,8.1,-3.9,,15,.938,Carolina Panthers*,2015
154 | 0.3,8,NFL,-0.4,-4.0,345,-6,339,-3.8,-3.4,,8,.500,Atlanta Falcons,2015
155 | -7.6,9,NFL,-4.3,1.1,476,-68,408,-6.6,-2.3,,7,.438,New Orleans Saints,2015
156 | -4.2,10,NFL,-4.7,-3.5,417,-75,342,-7.7,-3.0,,6,.375,Tampa Bay Buccaneers,2015
157 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,,NFC West,NFC West,NFC West,2015
158 | 3.4,3,NFL,11.0,9.0,313,176,489,12.3,1.3,,13,.813,Arizona Cardinals*,2015
159 | 6.0,6,NFL,9.1,5.4,277,146,423,11.3,2.2,,10,.625,Seattle Seahawks+,2015
160 | 3.6,9,NFL,-3.1,-3.8,330,-50,280,-0.2,3.0,,7,.438,St. Louis Rams,2015
161 | 0.5,11,NFL,-9.3,-6.0,387,-149,238,-5.5,3.8,,5,.313,San Francisco 49ers,2015
162 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,2014
163 | 3.5,4,NFL,9.7,7.5,313,155,468,10.9,1.3,0,12,.750,New England Patriots*,2014
164 | 5.3,7,NFL,3.4,-0.4,289,54,343,4.9,1.6,0,9,.563,Buffalo Bills,2014
165 | -0.4,8,NFL,0.9,2.9,373,15,388,2.6,1.6,0,8,.500,Miami Dolphins,2014
166 | -1.0,12,NFL,-7.4,-4.0,401,-118,283,-5.0,2.3,0,4,.250,New York Jets,2014
167 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,2014
168 | -2.1,5,NFL,4.3,4.4,368,68,436,2.2,-2.0,0,11,.688,Pittsburgh Steelers*,2014
169 | 1.3,5,NFL,1.3,-0.5,344,21,365,0.7,-0.6,1,10,.656,Cincinnati Bengals+,2014
170 | 2.8,6,NFL,6.7,1.8,302,107,409,4.6,-2.1,0,10,.625,Baltimore Ravens+,2014
171 | 0.9,9,NFL,-2.4,-4.8,337,-38,299,-3.9,-1.5,0,7,.438,Cleveland Browns,2014
172 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,2014
173 | -0.8,5,NFL,5.6,5.2,369,89,458,4.4,-1.1,0,11,.688,Indianapolis Colts*,2014
174 | 2.5,7,NFL,4.1,-0.8,307,65,372,1.7,-2.3,0,9,.563,Houston Texans,2014
175 | -2.7,13,NFL,-10.2,-7.8,412,-163,249,-10.5,-0.3,0,3,.188,Jacksonville Jaguars,2014
176 | -4.9,14,NFL,-11.5,-7.0,438,-184,254,-11.8,-0.3,0,2,.125,Tennessee Titans,2014
177 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,2014
178 | 0.4,4,NFL,8.0,9.2,354,128,482,9.6,1.6,0,12,.750,Denver Broncos*,2014
179 | 5.7,7,NFL,4.5,0.0,281,72,353,5.7,1.2,0,9,.563,Kansas City Chiefs,2014
180 | 1.2,7,NFL,0.0,0.7,348,0,348,1.9,1.9,0,9,.563,San Diego Chargers,2014
181 | -4.7,13,NFL,-12.4,-4.3,452,-199,253,-9.0,3.4,0,3,.188,Oakland Raiders,2014
182 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,2014
183 | 0.1,4,NFL,7.2,5.3,352,115,467,5.4,-1.8,0,12,.750,Dallas Cowboys*,2014
184 | -2.7,6,NFL,4.6,6.6,400,74,474,3.9,-0.7,0,10,.625,Philadelphia Eagles,2014
185 | -2.5,10,NFL,-1.3,0.8,400,-20,380,-1.7,-0.4,0,6,.375,New York Giants,2014
186 | -4.7,12,NFL,-8.6,-4.0,438,-137,301,-8.7,-0.2,0,4,.250,Washington Redskins,2014
187 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,2014
188 | 0.4,4,NFL,8.6,7.9,348,138,486,8.3,-0.3,0,12,.750,Green Bay Packers*,2014
189 | 5.2,5,NFL,2.4,-3.2,282,39,321,2.1,-0.4,0,11,.688,Detroit Lions+,2014
190 | 1.1,9,NFL,-1.1,-2.8,343,-18,325,-1.7,-0.5,0,7,.438,Minnesota Vikings,2014
191 | -4.7,11,NFL,-7.7,-2.0,442,-123,319,-6.7,1.0,0,5,.313,Chicago Bears,2014
192 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,2014
193 | -0.7,8,NFL,-2.2,-2.4,374,-35,339,-3.1,-0.9,1,7,.469,Carolina Panthers*,2014
194 | -4.8,9,NFL,-1.4,1.9,424,-23,401,-2.9,-1.5,0,7,.438,New Orleans Saints,2014
195 | -4.4,10,NFL,-2.3,0.6,417,-36,381,-3.8,-1.6,0,6,.375,Atlanta Falcons,2014
196 | -3.3,14,NFL,-8.3,-6.5,410,-133,277,-9.8,-1.5,0,2,.125,Tampa Bay Buccaneers,2014
197 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,2014
198 | 7.1,4,NFL,8.8,2.4,254,140,394,9.5,0.8,0,12,.750,Seattle Seahawks*,2014
199 | 4.4,5,NFL,0.7,-2.4,299,11,310,2.0,1.3,0,11,.688,Arizona Cardinals+,2014
200 | 2.1,8,NFL,-2.1,-3.0,340,-34,306,-1.0,1.2,0,8,.500,San Francisco 49ers,2014
201 | 0.4,10,NFL,-1.9,-1.2,354,-30,324,-0.8,1.0,0,6,.375,St. Louis Rams,2014
202 |
--------------------------------------------------------------------------------
/Assignment_4/assignment4.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "deletable": false,
7 | "editable": false,
8 | "nbgrader": {
9 | "checksum": "48770f8b5f5d3062d3badd51fcafc401",
10 | "grade": false,
11 | "grade_id": "cell-a6c4f74309fc2379",
12 | "locked": true,
13 | "schema_version": 1,
14 | "solution": false
15 | }
16 | },
17 | "source": [
18 | "# Assignment 4\n",
19 | "## Description\n",
20 | "In this assignment you must read in a file of metropolitan regions and associated sports teams from [assets/wikipedia_data.html](assets/wikipedia_data.html) and answer some questions about each metropolitan region. Each of these regions may have one or more teams from the \"Big 4\": NFL (football, in [assets/nfl.csv](assets/nfl.csv)), MLB (baseball, in [assets/mlb.csv](assets/mlb.csv)), NBA (basketball, in [assets/nba.csv](assets/nba.csv) or NHL (hockey, in [assets/nhl.csv](assets/nhl.csv)). Please keep in mind that all questions are from the perspective of the metropolitan region, and that this file is the \"source of authority\" for the location of a given sports team. Thus teams which are commonly known by a different area (e.g. \"Oakland Raiders\") need to be mapped into the metropolitan region given (e.g. San Francisco Bay Area). This will require some human data understanding outside of the data you've been given (e.g. you will have to hand-code some names, and might need to google to find out where teams are)!\n",
21 | "\n",
22 | "For each sport I would like you to answer the question: **what is the win/loss ratio's correlation with the population of the city it is in?** Win/Loss ratio refers to the number of wins over the number of wins plus the number of losses. Remember that to calculate the correlation with [`pearsonr`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html), so you are going to send in two ordered lists of values, the populations from the wikipedia_data.html file and the win/loss ratio for a given sport in the same order. Average the win/loss ratios for those cities which have multiple teams of a single sport. Each sport is worth an equal amount in this assignment (20%\\*4=80%) of the grade for this assignment. You should only use data **from year 2018** for your analysis -- this is important!\n",
23 | "\n",
24 | "## Notes\n",
25 | "\n",
26 | "1. Do not include data about the MLS or CFL in any of the work you are doing, we're only interested in the Big 4 in this assignment.\n",
27 | "2. I highly suggest that you first tackle the four correlation questions in order, as they are all similar and worth the majority of grades for this assignment. This is by design!\n",
28 | "3. It's fair game to talk with peers about high level strategy as well as the relationship between metropolitan areas and sports teams. However, do not post code solving aspects of the assignment (including such as dictionaries mapping areas to teams, or regexes which will clean up names).\n",
29 | "4. There may be more teams than the assert statements test, remember to collapse multiple teams in one city into a single value!"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {
35 | "deletable": false,
36 | "editable": false,
37 | "nbgrader": {
38 | "checksum": "369ff9ecf0ee04640574205cbc697f94",
39 | "grade": false,
40 | "grade_id": "cell-712b2b5da63d4505",
41 | "locked": true,
42 | "schema_version": 1,
43 | "solution": false
44 | }
45 | },
46 | "source": [
47 | "## Question 1\n",
48 | "For this question, calculate the win/loss ratio's correlation with the population of the city it is in for the **NHL** using **2018** data."
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 1,
54 | "metadata": {
55 | "deletable": false,
56 | "nbgrader": {
57 | "checksum": "1cac4803b02502929f5b1612d48db2b5",
58 | "grade": false,
59 | "grade_id": "cell-69b16e4386e58030",
60 | "locked": false,
61 | "schema_version": 1,
62 | "solution": true
63 | }
64 | },
65 | "outputs": [],
66 | "source": [
67 | "import pandas as pd\n",
68 | "import numpy as np\n",
69 | "import scipy.stats as stats\n",
70 | "import re\n",
71 | "\n",
72 | "\n",
73 | "\n",
74 | "def nhl_correlation(): \n",
75 | " # YOUR CODE HERE\n",
76 | " #raise NotImplementedError()\n",
77 | " \n",
78 | " nhl_df=pd.read_csv(\"assets/nhl.csv\")\n",
79 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
80 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
81 | " \n",
82 | " nhl_df.drop([0,9,18,26],0,inplace=True)\n",
83 | " cities.drop([14,15,18,19,20,21,23,24,25,27,28,32,33,38,40,41,42,44,45,46,48,49,50],0,inplace=True)\n",
84 | "\n",
85 | " l= []\n",
86 | " for i in cities['NHL']:\n",
87 | " i=i.split('[')\n",
88 | " l.append(i[0])\n",
89 | " cities['NHL'] = l\n",
90 | "\n",
91 | " li = []\n",
92 | " for i in nhl_df['team']:\n",
93 | " i = re.findall(\"[^*]+\", i)\n",
94 | " li.append(i[0])\n",
95 | " nhl_df['team'] = li\n",
96 | "\n",
97 | " nhl_df = nhl_df.head(31)\n",
98 | "\n",
99 | " nhl_df['team_ville'] = nhl_df['team']\n",
100 | " nhl_df['team_ville'] = nhl_df['team_ville'].map({'Tampa Bay Lightning':'Tampa Bay Area',\n",
101 | " 'Boston Bruins':'Boston',\n",
102 | " 'Toronto Maple Leafs':'Toronto',\n",
103 | " 'Florida Panthers':'Miami–Fort Lauderdale',\n",
104 | " 'Detroit Red Wings':'Detroit',\n",
105 | " 'Montreal Canadiens':'Montreal',\n",
106 | " 'Ottawa Senators':'Ottawa',\n",
107 | " 'Buffalo Sabres':'Buffalo',\n",
108 | " 'Washington Capitals':'Washington, D.C.',\n",
109 | " 'Pittsburgh Penguins':'Pittsburgh',\n",
110 | " 'Philadelphia Flyers':'Philadelphia',\n",
111 | " 'Columbus Blue Jackets':'Columbus',\n",
112 | " 'New Jersey Devils':'New York City',\n",
113 | " 'Carolina Hurricanes':'Raleigh',\n",
114 | " 'New York Islanders':'New York City',\n",
115 | " 'New York Rangers':'New York City',\n",
116 | " 'Nashville Predators':'Nashville',\n",
117 | " 'Winnipeg Jets':'Winnipeg',\n",
118 | " 'Minnesota Wild':'Minneapolis–Saint Paul',\n",
119 | " 'Colorado Avalanche':'Denver',\n",
120 | " 'St. Louis Blues':'St. Louis',\n",
121 | " 'Dallas Stars':'Dallas–Fort Worth',\n",
122 | " 'Chicago Blackhawks':'Chicago',\n",
123 | " 'Vegas Golden Knights':'Las Vegas',\n",
124 | " 'Anaheim Ducks':'Los Angeles',\n",
125 | " 'San Jose Sharks':'San Francisco Bay Area',\n",
126 | " 'Los Angeles Kings':'Los Angeles',\n",
127 | " 'Calgary Flames':'Calgary',\n",
128 | " 'Edmonton Oilers':'Edmonton',\n",
129 | " 'Vancouver Canucks':'Vancouver',\n",
130 | " 'Arizona Coyotes':'Phoenix'})\n",
131 | "\n",
132 | " df = pd.merge(nhl_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
133 | "\n",
134 | " df['W'] = pd.to_numeric(df['W'])\n",
135 | " df['L'] = pd.to_numeric(df['L'])\n",
136 | " df['Population (2016 est.)[8]'] = pd.to_numeric(df['Population (2016 est.)[8]'])\n",
137 | "\n",
138 | " he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n",
139 | "\n",
140 | " df = df[he]\n",
141 | "\n",
142 | " df['W/L'] = df['W']/(df['L']+df['W'])\n",
143 | "\n",
144 | " df = df.groupby('Metropolitan area').mean().reset_index()\n",
145 | " \n",
146 | " population_by_region = df['Population (2016 est.)[8]'] # pass in metropolitan area population from cities\n",
147 | " win_loss_by_region = df['W/L'] # pass in win/loss ratio from nhl_df in the same order as cities[\"Metropolitan area\"]\n",
148 | "\n",
149 | " assert len(population_by_region) == len(win_loss_by_region), \"Q1: Your lists must be the same length\"\n",
150 | " assert len(population_by_region) == 28, \"Q1: There should be 28 teams being analysed for NHL\"\n",
151 | " \n",
152 | " return stats.pearsonr(population_by_region, win_loss_by_region)[0]"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": null,
158 | "metadata": {
159 | "deletable": false,
160 | "editable": false,
161 | "nbgrader": {
162 | "checksum": "52a581df513c71153e105b93764cda4b",
163 | "grade": true,
164 | "grade_id": "cell-ebe0b2dfe1067e63",
165 | "locked": true,
166 | "points": 20,
167 | "schema_version": 1,
168 | "solution": false
169 | }
170 | },
171 | "outputs": [],
172 | "source": []
173 | },
174 | {
175 | "cell_type": "markdown",
176 | "metadata": {
177 | "deletable": false,
178 | "editable": false,
179 | "nbgrader": {
180 | "checksum": "988912cae4968d81473f46d783e79c16",
181 | "grade": false,
182 | "grade_id": "cell-cb964e690298b71d",
183 | "locked": true,
184 | "schema_version": 1,
185 | "solution": false
186 | }
187 | },
188 | "source": [
189 | "## Question 2\n",
190 | "For this question, calculate the win/loss ratio's correlation with the population of the city it is in for the **NBA** using **2018** data."
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": 2,
196 | "metadata": {
197 | "deletable": false,
198 | "nbgrader": {
199 | "checksum": "9394222aafc8ccab0a228098ba0d6010",
200 | "grade": false,
201 | "grade_id": "cell-5a5f21279e3d3572",
202 | "locked": false,
203 | "schema_version": 1,
204 | "solution": true
205 | }
206 | },
207 | "outputs": [],
208 | "source": [
209 | "import pandas as pd\n",
210 | "import numpy as np\n",
211 | "import scipy.stats as stats\n",
212 | "import re\n",
213 | "\n",
214 | "\n",
215 | "\n",
216 | "def nba_correlation():\n",
217 | " # YOUR CODE HERE\n",
218 | " #raise NotImplementedError()\n",
219 | " \n",
220 | " nba_df=pd.read_csv(\"assets/nba.csv\")\n",
221 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
222 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
223 | " \n",
224 | " cities.drop([16,17,19,20,21,22,23,26,29,30,31,34,35,36,37,39,40,43,44,47,48,49,50],0,inplace=True)\n",
225 | " \n",
226 | " l1 = []\n",
227 | " for i in nba_df['team']:\n",
228 | " #i=i.rstrip()\n",
229 | " i=i.split('*')\n",
230 | " l1.append(i[0])\n",
231 | " nba_df['team'] = l1\n",
232 | " \n",
233 | " l2 = []\n",
234 | " for i in nba_df['team']:\n",
235 | " i=i.split('(')\n",
236 | " l2.append(i[0])\n",
237 | " nba_df['team'] = l2\n",
238 | " \n",
239 | " l3 = []\n",
240 | " for i in nba_df['team']:\n",
241 | " i=i.rstrip()\n",
242 | " l3.append(i)\n",
243 | " nba_df['team'] = l3\n",
244 | " \n",
245 | " \n",
246 | " \n",
247 | " nba_df = nba_df.head(30)\n",
248 | "\n",
249 | " nba_df['team_ville'] = nba_df['team']\n",
250 | " nba_df['team_ville'] = nba_df['team_ville'].map({'Toronto Raptors':'Toronto',\n",
251 | " 'Boston Celtics':'Boston',\n",
252 | " 'Philadelphia 76ers':'Philadelphia',\n",
253 | " 'Cleveland Cavaliers':'Cleveland',\n",
254 | " 'Indiana Pacers':'Indianapolis',\n",
255 | " 'Miami Heat':'Miami–Fort Lauderdale',\n",
256 | " 'Milwaukee Bucks':'Milwaukee',\n",
257 | " 'Washington Wizards':'Washington, D.C.',\n",
258 | " 'Detroit Pistons':'Detroit',\n",
259 | " 'Charlotte Hornets':'Charlotte',\n",
260 | " 'New York Knicks':'New York City',\n",
261 | " 'Brooklyn Nets':'New York City',\n",
262 | " 'Chicago Bulls':'Chicago',\n",
263 | " 'Orlando Magic':'Orlando',\n",
264 | " 'Atlanta Hawks':'Atlanta',\n",
265 | " 'Houston Rockets':'Houston',\n",
266 | " 'Golden State Warriors':'San Francisco Bay Area',\n",
267 | " 'Portland Trail Blazers':'Portland',\n",
268 | " 'Oklahoma City Thunder':'Oklahoma City',\n",
269 | " 'Utah Jazz':'Salt Lake City',\n",
270 | " 'New Orleans Pelicans':'New Orleans',\n",
271 | " 'San Antonio Spurs':'San Antonio',\n",
272 | " 'Minnesota Timberwolves':'Minneapolis–Saint Paul',\n",
273 | " 'Denver Nuggets':'Denver',\n",
274 | " 'Los Angeles Clippers':'Los Angeles',\n",
275 | " 'Los Angeles Lakers':'Los Angeles',\n",
276 | " 'Sacramento Kings':'Sacramento',\n",
277 | " 'Dallas Mavericks':'Dallas–Fort Worth',\n",
278 | " 'Memphis Grizzlies':'Memphis',\n",
279 | " 'Phoenix Suns':'Phoenix'})\n",
280 | " \n",
281 | " df2 = pd.merge(nba_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
282 | " \n",
283 | " df2['W/L%'] = pd.to_numeric(df2['W/L%'])\n",
284 | " df2['W'] = pd.to_numeric(df2['W'])\n",
285 | " df2['L'] = pd.to_numeric(df2['L'])\n",
286 | " df2['Population (2016 est.)[8]'] = pd.to_numeric(df2['Population (2016 est.)[8]'])\n",
287 | " he = ['team','W','L','W/L%','Metropolitan area','Population (2016 est.)[8]']\n",
288 | " df2 = df2[he]\n",
289 | " df2['W/L'] = df2['W']/(df2['L']+df2['W'])\n",
290 | " df2 = df2.groupby('Metropolitan area').mean().reset_index()\n",
291 | " \n",
292 | " population_by_region = df2['Population (2016 est.)[8]'] # pass in metropolitan area population from cities\n",
293 | " win_loss_by_region = df2['W/L'] # pass in win/loss ratio from nba_df in the same order as cities[\"Metropolitan area\"]\n",
294 | "\n",
295 | " assert len(population_by_region) == len(win_loss_by_region), \"Q2: Your lists must be the same length\"\n",
296 | " assert len(population_by_region) == 28, \"Q2: There should be 28 teams being analysed for NBA\"\n",
297 | "\n",
298 | " return stats.pearsonr(population_by_region, win_loss_by_region)[0]"
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": null,
304 | "metadata": {
305 | "deletable": false,
306 | "editable": false,
307 | "nbgrader": {
308 | "checksum": "bbdeb8eb22f525a34c10dc8798324e42",
309 | "grade": true,
310 | "grade_id": "cell-e573b2b4a282b470",
311 | "locked": true,
312 | "points": 20,
313 | "schema_version": 1,
314 | "solution": false
315 | }
316 | },
317 | "outputs": [],
318 | "source": []
319 | },
320 | {
321 | "cell_type": "markdown",
322 | "metadata": {
323 | "deletable": false,
324 | "editable": false,
325 | "nbgrader": {
326 | "checksum": "1a1a5809f675ca033086422007cd73bd",
327 | "grade": false,
328 | "grade_id": "cell-96e15e4335df78f4",
329 | "locked": true,
330 | "schema_version": 1,
331 | "solution": false
332 | }
333 | },
334 | "source": [
335 | "## Question 3\n",
336 | "For this question, calculate the win/loss ratio's correlation with the population of the city it is in for the **MLB** using **2018** data."
337 | ]
338 | },
339 | {
340 | "cell_type": "code",
341 | "execution_count": 3,
342 | "metadata": {
343 | "deletable": false,
344 | "nbgrader": {
345 | "checksum": "27e8c0da6c9fa0dffc10488314335b6c",
346 | "grade": false,
347 | "grade_id": "cell-33b00fc3f3467b0c",
348 | "locked": false,
349 | "schema_version": 1,
350 | "solution": true
351 | }
352 | },
353 | "outputs": [],
354 | "source": [
355 | "import pandas as pd\n",
356 | "import numpy as np\n",
357 | "import scipy.stats as stats\n",
358 | "import re\n",
359 | "\n",
360 | "\n",
361 | "def mlb_correlation(): \n",
362 | " # YOUR CODE HERE\n",
363 | " #raise NotImplementedError()\n",
364 | " \n",
365 | " mlb_df=pd.read_csv(\"assets/mlb.csv\")\n",
366 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
367 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
368 | " \n",
369 | " cities.drop([24,25,26,28,29,30,31,32,33,34,35,36,37,38,39,41,42,43,44,45,46,47,48,49,50],0,inplace=True)\n",
370 | " \n",
371 | " mlb_df = mlb_df.head(30)\n",
372 | " \n",
373 | " mlb_df['team_ville'] = mlb_df['team']\n",
374 | " mlb_df['team_ville'] = mlb_df['team_ville'].map({'Boston Red Sox':'Boston',\n",
375 | " 'New York Yankees':'New York City',\n",
376 | " 'Tampa Bay Rays':'Tampa Bay Area',\n",
377 | " 'Toronto Blue Jays':'Toronto',\n",
378 | " 'Baltimore Orioles':'Baltimore',\n",
379 | " 'Cleveland Indians':'Cleveland',\n",
380 | " 'Minnesota Twins':'Minneapolis–Saint Paul',\n",
381 | " 'Detroit Tigers':'Detroit',\n",
382 | " 'Chicago White Sox':'Chicago',\n",
383 | " 'Kansas City Royals':'Kansas City',\n",
384 | " 'Houston Astros':'Houston',\n",
385 | " 'Oakland Athletics':'San Francisco Bay Area',\n",
386 | " 'Seattle Mariners':'Seattle',\n",
387 | " 'Los Angeles Angels':'Los Angeles',\n",
388 | " 'Texas Rangers':'Dallas–Fort Worth',\n",
389 | " 'Atlanta Braves':'Atlanta',\n",
390 | " 'Washington Nationals':'Washington, D.C.',\n",
391 | " 'Philadelphia Phillies':'Philadelphia',\n",
392 | " 'New York Mets':'New York City',\n",
393 | " 'Miami Marlins':'Miami–Fort Lauderdale',\n",
394 | " 'Milwaukee Brewers':'Milwaukee',\n",
395 | " 'Chicago Cubs':'Chicago',\n",
396 | " 'St. Louis Cardinals':'St. Louis',\n",
397 | " 'Pittsburgh Pirates':'Pittsburgh',\n",
398 | " 'Cincinnati Reds':'Cincinnati',\n",
399 | " 'Los Angeles Dodgers':'Los Angeles',\n",
400 | " 'Colorado Rockies':'Denver',\n",
401 | " 'Arizona Diamondbacks':'Phoenix',\n",
402 | " 'San Francisco Giants':'San Francisco Bay Area',\n",
403 | " 'San Diego Padres':'San Diego'})\n",
404 | " \n",
405 | " df3 = pd.merge(mlb_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
406 | " \n",
407 | " #df2['W/L%'] = pd.to_numeric(df2['W/L%'])\n",
408 | " df3['W'] = pd.to_numeric(df3['W'])\n",
409 | " df3['L'] = pd.to_numeric(df3['L'])\n",
410 | " df3['Population (2016 est.)[8]'] = pd.to_numeric(df3['Population (2016 est.)[8]'])\n",
411 | " he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n",
412 | " df3 = df3[he]\n",
413 | " df3['W/L'] = df3['W']/(df3['L']+df3['W'])\n",
414 | " df3 = df3.groupby('Metropolitan area').mean().reset_index()\n",
415 | " \n",
416 | " \n",
417 | " population_by_region = df3['Population (2016 est.)[8]'] # pass in metropolitan area population from cities\n",
418 | " win_loss_by_region = df3['W/L'] # pass in win/loss ratio from mlb_df in the same order as cities[\"Metropolitan area\"]\n",
419 | "\n",
420 | " assert len(population_by_region) == len(win_loss_by_region), \"Q3: Your lists must be the same length\"\n",
421 | " assert len(population_by_region) == 26, \"Q3: There should be 26 teams being analysed for MLB\"\n",
422 | "\n",
423 | " return stats.pearsonr(population_by_region, win_loss_by_region)[0]"
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": null,
429 | "metadata": {
430 | "deletable": false,
431 | "editable": false,
432 | "nbgrader": {
433 | "checksum": "cda33b094ba19ccc37a481e0dd29e0bc",
434 | "grade": true,
435 | "grade_id": "cell-764d4476f425c5a2",
436 | "locked": true,
437 | "points": 20,
438 | "schema_version": 1,
439 | "solution": false
440 | }
441 | },
442 | "outputs": [],
443 | "source": []
444 | },
445 | {
446 | "cell_type": "markdown",
447 | "metadata": {
448 | "deletable": false,
449 | "editable": false,
450 | "nbgrader": {
451 | "checksum": "6977a6da9ed6d8b7a0b7e37bbeda709b",
452 | "grade": false,
453 | "grade_id": "cell-793df6c04dfb126e",
454 | "locked": true,
455 | "schema_version": 1,
456 | "solution": false
457 | }
458 | },
459 | "source": [
460 | "## Question 4\n",
461 | "For this question, calculate the win/loss ratio's correlation with the population of the city it is in for the **NFL** using **2018** data."
462 | ]
463 | },
464 | {
465 | "cell_type": "code",
466 | "execution_count": 4,
467 | "metadata": {
468 | "deletable": false,
469 | "nbgrader": {
470 | "checksum": "c4914ad1e119278ec2bd567c52640b66",
471 | "grade": false,
472 | "grade_id": "cell-8ccebc209aeec8d9",
473 | "locked": false,
474 | "schema_version": 1,
475 | "solution": true
476 | }
477 | },
478 | "outputs": [],
479 | "source": [
480 | "import pandas as pd\n",
481 | "import numpy as np\n",
482 | "import scipy.stats as stats\n",
483 | "import re\n",
484 | "\n",
485 | "\n",
486 | "\n",
487 | "def nfl_correlation(): \n",
488 | " # YOUR CODE HERE\n",
489 | " #raise NotImplementedError()\n",
490 | " \n",
491 | " nfl_df=pd.read_csv(\"assets/nfl.csv\")\n",
492 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
493 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
494 | " \n",
495 | " nfl_df.drop([0,5,10,15,20,25,30,35],0,inplace=True)\n",
496 | " \n",
497 | " cities.drop([13,22,27,30,31,32,33,34,35,36,37,38,39,40,41,42,43,45,46,47,49,50],0,inplace=True)\n",
498 | " \n",
499 | " l1 = []\n",
500 | " for i in nfl_df['team']:\n",
501 | " #i=i.rstrip()\n",
502 | " i=i.split('*')\n",
503 | " l1.append(i[0])\n",
504 | " nfl_df['team'] = l1\n",
505 | " \n",
506 | " l2 = []\n",
507 | " for i in nfl_df['team']:\n",
508 | " i=i.split('+')\n",
509 | " l2.append(i[0])\n",
510 | " nfl_df['team'] = l2\n",
511 | " \n",
512 | " nfl_df = nfl_df.head(32)\n",
513 | " \n",
514 | " nfl_df['team_ville'] = nfl_df['team']\n",
515 | " nfl_df['team_ville'] = nfl_df['team_ville'].map({'New England Patriots':'Boston',\n",
516 | " 'Miami Dolphins':'Miami–Fort Lauderdale',\n",
517 | " 'Buffalo Bills':'Buffalo',\n",
518 | " 'New York Jets':'New York City',\n",
519 | " 'Baltimore Ravens':'Baltimore',\n",
520 | " 'Pittsburgh Steelers':'Pittsburgh',\n",
521 | " 'Cleveland Browns':'Cleveland',\n",
522 | " 'Cincinnati Bengals':'Cincinnati',\n",
523 | " 'Houston Texans':'Houston',\n",
524 | " 'Indianapolis Colts':'Indianapolis',\n",
525 | " 'Tennessee Titans':'Nashville',\n",
526 | " 'Jacksonville Jaguars':'Jacksonville',\n",
527 | " 'Kansas City Chiefs':'Kansas City',\n",
528 | " 'Los Angeles Chargers':'Los Angeles',\n",
529 | " 'Denver Broncos':'Denver',\n",
530 | " 'Oakland Raiders':'San Francisco Bay Area',\n",
531 | " 'Dallas Cowboys':'Dallas–Fort Worth',\n",
532 | " 'Philadelphia Eagles':'Philadelphia',\n",
533 | " 'Washington Redskins':'Washington, D.C.',\n",
534 | " 'New York Giants':'New York City',\n",
535 | " 'Chicago Bears':'Chicago',\n",
536 | " 'Minnesota Vikings':'Minneapolis–Saint Paul',\n",
537 | " 'Green Bay Packers':'Green Bay',\n",
538 | " 'Detroit Lions':'Detroit',\n",
539 | " 'New Orleans Saints':'New Orleans',\n",
540 | " 'Carolina Panthers':'Charlotte',\n",
541 | " 'Atlanta Falcons':'Atlanta',\n",
542 | " 'Tampa Bay Buccaneers':'Tampa Bay Area',\n",
543 | " 'Los Angeles Rams':'Los Angeles',\n",
544 | " 'Seattle Seahawks':'Seattle',\n",
545 | " 'San Francisco 49ers':'San Francisco Bay Area',\n",
546 | " 'Arizona Cardinals':'Phoenix'}) \n",
547 | " \n",
548 | " df4 = pd.merge(nfl_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
549 | " \n",
550 | " \n",
551 | " df4['W'] = pd.to_numeric(df4['W'])\n",
552 | " df4['L'] = pd.to_numeric(df4['L'])\n",
553 | " df4['Population (2016 est.)[8]'] = pd.to_numeric(df4['Population (2016 est.)[8]'])\n",
554 | " he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n",
555 | " df4 = df4[he]\n",
556 | " df4['W/L'] = df4['W']/(df4['L']+df4['W'])\n",
557 | " df4 = df4.groupby('Metropolitan area').mean().reset_index()\n",
558 | "\n",
559 | " \n",
560 | " population_by_region = df4['Population (2016 est.)[8]'] # pass in metropolitan area population from cities\n",
561 | " win_loss_by_region = df4['W/L'] # pass in win/loss ratio from nfl_df in the same order as cities[\"Metropolitan area\"]\n",
562 | "\n",
563 | " assert len(population_by_region) == len(win_loss_by_region), \"Q4: Your lists must be the same length\"\n",
564 | " assert len(population_by_region) == 29, \"Q4: There should be 29 teams being analysed for NFL\"\n",
565 | "\n",
566 | " return stats.pearsonr(population_by_region, win_loss_by_region)[0]"
567 | ]
568 | },
569 | {
570 | "cell_type": "code",
571 | "execution_count": null,
572 | "metadata": {
573 | "deletable": false,
574 | "editable": false,
575 | "nbgrader": {
576 | "checksum": "e9415d6399aa49e3a1a60813afdefa3b",
577 | "grade": true,
578 | "grade_id": "cell-de7b148b9554dbda",
579 | "locked": true,
580 | "points": 20,
581 | "schema_version": 1,
582 | "solution": false
583 | }
584 | },
585 | "outputs": [],
586 | "source": []
587 | },
588 | {
589 | "cell_type": "markdown",
590 | "metadata": {
591 | "deletable": false,
592 | "editable": false,
593 | "nbgrader": {
594 | "checksum": "b02d5cd3273f561e4ae939bb2a41740c",
595 | "grade": false,
596 | "grade_id": "cell-97b49d8639e908c4",
597 | "locked": true,
598 | "schema_version": 1,
599 | "solution": false
600 | }
601 | },
602 | "source": [
603 | "## Question 5\n",
604 | "In this question I would like you to explore the hypothesis that **given that an area has two sports teams in different sports, those teams will perform the same within their respective sports**. How I would like to see this explored is with a series of paired t-tests (so use [`ttest_rel`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html)) between all pairs of sports. Are there any sports where we can reject the null hypothesis? Again, average values where a sport has multiple teams in one region. Remember, you will only be including, for each sport, cities which have teams engaged in that sport, drop others as appropriate. This question is worth 20% of the grade for this assignment."
605 | ]
606 | },
607 | {
608 | "cell_type": "code",
609 | "execution_count": 6,
610 | "metadata": {
611 | "deletable": false,
612 | "nbgrader": {
613 | "checksum": "6d78c961eb66f8d8c81f06d33ae8f393",
614 | "grade": false,
615 | "grade_id": "cell-92f25f44b8d1179f",
616 | "locked": false,
617 | "schema_version": 1,
618 | "solution": true
619 | }
620 | },
621 | "outputs": [],
622 | "source": [
623 | "import pandas as pd\n",
624 | "import numpy as np\n",
625 | "import scipy.stats as stats\n",
626 | "import re\n",
627 | "\n",
628 | "#mlb_df=pd.read_csv(\"assets/mlb.csv\")\n",
629 | "#nhl_df=pd.read_csv(\"assets/nhl.csv\")\n",
630 | "#nba_df=pd.read_csv(\"assets/nba.csv\")\n",
631 | "#nfl_df=pd.read_csv(\"assets/nfl.csv\")\n",
632 | "#cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
633 | "#cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
634 | "\n",
635 | "def nhl_correla(): \n",
636 | " # YOUR CODE HERE\n",
637 | " #raise NotImplementedError()\n",
638 | " \n",
639 | " nhl_df=pd.read_csv(\"assets/nhl.csv\")\n",
640 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
641 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
642 | " \n",
643 | " nhl_df.drop([0,9,18,26],0,inplace=True)\n",
644 | " cities.drop([14,15,18,19,20,21,23,24,25,27,28,32,33,38,40,41,42,44,45,46,48,49,50],0,inplace=True)\n",
645 | "\n",
646 | " l= []\n",
647 | " for i in cities['NHL']:\n",
648 | " i=i.split('[')\n",
649 | " l.append(i[0])\n",
650 | " cities['NHL'] = l\n",
651 | "\n",
652 | " li = []\n",
653 | " for i in nhl_df['team']:\n",
654 | " i = re.findall(\"[^*]+\", i)\n",
655 | " li.append(i[0])\n",
656 | " nhl_df['team'] = li\n",
657 | "\n",
658 | " nhl_df = nhl_df.head(31)\n",
659 | "\n",
660 | " nhl_df['team_ville'] = nhl_df['team']\n",
661 | " nhl_df['team_ville'] = nhl_df['team_ville'].map({'Tampa Bay Lightning':'Tampa Bay Area',\n",
662 | " 'Boston Bruins':'Boston',\n",
663 | " 'Toronto Maple Leafs':'Toronto',\n",
664 | " 'Florida Panthers':'Miami–Fort Lauderdale',\n",
665 | " 'Detroit Red Wings':'Detroit',\n",
666 | " 'Montreal Canadiens':'Montreal',\n",
667 | " 'Ottawa Senators':'Ottawa',\n",
668 | " 'Buffalo Sabres':'Buffalo',\n",
669 | " 'Washington Capitals':'Washington, D.C.',\n",
670 | " 'Pittsburgh Penguins':'Pittsburgh',\n",
671 | " 'Philadelphia Flyers':'Philadelphia',\n",
672 | " 'Columbus Blue Jackets':'Columbus',\n",
673 | " 'New Jersey Devils':'New York City',\n",
674 | " 'Carolina Hurricanes':'Raleigh',\n",
675 | " 'New York Islanders':'New York City',\n",
676 | " 'New York Rangers':'New York City',\n",
677 | " 'Nashville Predators':'Nashville',\n",
678 | " 'Winnipeg Jets':'Winnipeg',\n",
679 | " 'Minnesota Wild':'Minneapolis–Saint Paul',\n",
680 | " 'Colorado Avalanche':'Denver',\n",
681 | " 'St. Louis Blues':'St. Louis',\n",
682 | " 'Dallas Stars':'Dallas–Fort Worth',\n",
683 | " 'Chicago Blackhawks':'Chicago',\n",
684 | " 'Vegas Golden Knights':'Las Vegas',\n",
685 | " 'Anaheim Ducks':'Los Angeles',\n",
686 | " 'San Jose Sharks':'San Francisco Bay Area',\n",
687 | " 'Los Angeles Kings':'Los Angeles',\n",
688 | " 'Calgary Flames':'Calgary',\n",
689 | " 'Edmonton Oilers':'Edmonton',\n",
690 | " 'Vancouver Canucks':'Vancouver',\n",
691 | " 'Arizona Coyotes':'Phoenix'})\n",
692 | "\n",
693 | " df = pd.merge(nhl_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
694 | "\n",
695 | " df['W'] = pd.to_numeric(df['W'])\n",
696 | " df['L'] = pd.to_numeric(df['L'])\n",
697 | " df['Population (2016 est.)[8]'] = pd.to_numeric(df['Population (2016 est.)[8]'])\n",
698 | "\n",
699 | " he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n",
700 | "\n",
701 | " df = df[he]\n",
702 | "\n",
703 | " df['W/L'] = df['W']/(df['L']+df['W'])\n",
704 | "\n",
705 | " df = df.groupby('Metropolitan area').mean().reset_index()\n",
706 | " \n",
707 | " return df\n",
708 | "\n",
709 | "def nba_correla():\n",
710 | " # YOUR CODE HERE\n",
711 | " #raise NotImplementedError()\n",
712 | " \n",
713 | " nba_df=pd.read_csv(\"assets/nba.csv\")\n",
714 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
715 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
716 | " \n",
717 | " cities.drop([16,17,19,20,21,22,23,26,29,30,31,34,35,36,37,39,40,43,44,47,48,49,50],0,inplace=True)\n",
718 | " \n",
719 | " l1 = []\n",
720 | " for i in nba_df['team']:\n",
721 | " #i=i.rstrip()\n",
722 | " i=i.split('*')\n",
723 | " l1.append(i[0])\n",
724 | " nba_df['team'] = l1\n",
725 | " \n",
726 | " l2 = []\n",
727 | " for i in nba_df['team']:\n",
728 | " i=i.split('(')\n",
729 | " l2.append(i[0])\n",
730 | " nba_df['team'] = l2\n",
731 | " \n",
732 | " l3 = []\n",
733 | " for i in nba_df['team']:\n",
734 | " i=i.rstrip()\n",
735 | " l3.append(i)\n",
736 | " nba_df['team'] = l3\n",
737 | " \n",
738 | " \n",
739 | " \n",
740 | " nba_df = nba_df.head(30)\n",
741 | "\n",
742 | " nba_df['team_ville'] = nba_df['team']\n",
743 | " nba_df['team_ville'] = nba_df['team_ville'].map({'Toronto Raptors':'Toronto',\n",
744 | " 'Boston Celtics':'Boston',\n",
745 | " 'Philadelphia 76ers':'Philadelphia',\n",
746 | " 'Cleveland Cavaliers':'Cleveland',\n",
747 | " 'Indiana Pacers':'Indianapolis',\n",
748 | " 'Miami Heat':'Miami–Fort Lauderdale',\n",
749 | " 'Milwaukee Bucks':'Milwaukee',\n",
750 | " 'Washington Wizards':'Washington, D.C.',\n",
751 | " 'Detroit Pistons':'Detroit',\n",
752 | " 'Charlotte Hornets':'Charlotte',\n",
753 | " 'New York Knicks':'New York City',\n",
754 | " 'Brooklyn Nets':'New York City',\n",
755 | " 'Chicago Bulls':'Chicago',\n",
756 | " 'Orlando Magic':'Orlando',\n",
757 | " 'Atlanta Hawks':'Atlanta',\n",
758 | " 'Houston Rockets':'Houston',\n",
759 | " 'Golden State Warriors':'San Francisco Bay Area',\n",
760 | " 'Portland Trail Blazers':'Portland',\n",
761 | " 'Oklahoma City Thunder':'Oklahoma City',\n",
762 | " 'Utah Jazz':'Salt Lake City',\n",
763 | " 'New Orleans Pelicans':'New Orleans',\n",
764 | " 'San Antonio Spurs':'San Antonio',\n",
765 | " 'Minnesota Timberwolves':'Minneapolis–Saint Paul',\n",
766 | " 'Denver Nuggets':'Denver',\n",
767 | " 'Los Angeles Clippers':'Los Angeles',\n",
768 | " 'Los Angeles Lakers':'Los Angeles',\n",
769 | " 'Sacramento Kings':'Sacramento',\n",
770 | " 'Dallas Mavericks':'Dallas–Fort Worth',\n",
771 | " 'Memphis Grizzlies':'Memphis',\n",
772 | " 'Phoenix Suns':'Phoenix'})\n",
773 | " \n",
774 | " df2 = pd.merge(nba_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
775 | " \n",
776 | " df2['W/L%'] = pd.to_numeric(df2['W/L%'])\n",
777 | " df2['W'] = pd.to_numeric(df2['W'])\n",
778 | " df2['L'] = pd.to_numeric(df2['L'])\n",
779 | " df2['Population (2016 est.)[8]'] = pd.to_numeric(df2['Population (2016 est.)[8]'])\n",
780 | " he = ['team','W','L','W/L%','Metropolitan area','Population (2016 est.)[8]']\n",
781 | " df2 = df2[he]\n",
782 | " df2['W/L'] = df2['W']/(df2['L']+df2['W'])\n",
783 | " df2 = df2.groupby('Metropolitan area').mean().reset_index()\n",
784 | " \n",
785 | " return df2\n",
786 | "\n",
787 | "def mlb_correla(): \n",
788 | " # YOUR CODE HERE\n",
789 | " #raise NotImplementedError()\n",
790 | " \n",
791 | " mlb_df=pd.read_csv(\"assets/mlb.csv\")\n",
792 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
793 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
794 | " \n",
795 | " cities.drop([24,25,26,28,29,30,31,32,33,34,35,36,37,38,39,41,42,43,44,45,46,47,48,49,50],0,inplace=True)\n",
796 | " \n",
797 | " mlb_df = mlb_df.head(30)\n",
798 | " \n",
799 | " mlb_df['team_ville'] = mlb_df['team']\n",
800 | " mlb_df['team_ville'] = mlb_df['team_ville'].map({'Boston Red Sox':'Boston',\n",
801 | " 'New York Yankees':'New York City',\n",
802 | " 'Tampa Bay Rays':'Tampa Bay Area',\n",
803 | " 'Toronto Blue Jays':'Toronto',\n",
804 | " 'Baltimore Orioles':'Baltimore',\n",
805 | " 'Cleveland Indians':'Cleveland',\n",
806 | " 'Minnesota Twins':'Minneapolis–Saint Paul',\n",
807 | " 'Detroit Tigers':'Detroit',\n",
808 | " 'Chicago White Sox':'Chicago',\n",
809 | " 'Kansas City Royals':'Kansas City',\n",
810 | " 'Houston Astros':'Houston',\n",
811 | " 'Oakland Athletics':'San Francisco Bay Area',\n",
812 | " 'Seattle Mariners':'Seattle',\n",
813 | " 'Los Angeles Angels':'Los Angeles',\n",
814 | " 'Texas Rangers':'Dallas–Fort Worth',\n",
815 | " 'Atlanta Braves':'Atlanta',\n",
816 | " 'Washington Nationals':'Washington, D.C.',\n",
817 | " 'Philadelphia Phillies':'Philadelphia',\n",
818 | " 'New York Mets':'New York City',\n",
819 | " 'Miami Marlins':'Miami–Fort Lauderdale',\n",
820 | " 'Milwaukee Brewers':'Milwaukee',\n",
821 | " 'Chicago Cubs':'Chicago',\n",
822 | " 'St. Louis Cardinals':'St. Louis',\n",
823 | " 'Pittsburgh Pirates':'Pittsburgh',\n",
824 | " 'Cincinnati Reds':'Cincinnati',\n",
825 | " 'Los Angeles Dodgers':'Los Angeles',\n",
826 | " 'Colorado Rockies':'Denver',\n",
827 | " 'Arizona Diamondbacks':'Phoenix',\n",
828 | " 'San Francisco Giants':'San Francisco Bay Area',\n",
829 | " 'San Diego Padres':'San Diego'})\n",
830 | " \n",
831 | " df3 = pd.merge(mlb_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
832 | " \n",
833 | " #df2['W/L%'] = pd.to_numeric(df2['W/L%'])\n",
834 | " df3['W'] = pd.to_numeric(df3['W'])\n",
835 | " df3['L'] = pd.to_numeric(df3['L'])\n",
836 | " df3['Population (2016 est.)[8]'] = pd.to_numeric(df3['Population (2016 est.)[8]'])\n",
837 | " he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n",
838 | " df3 = df3[he]\n",
839 | " df3['W/L'] = df3['W']/(df3['L']+df3['W'])\n",
840 | " df3 = df3.groupby('Metropolitan area').mean().reset_index()\n",
841 | " \n",
842 | " return df3\n",
843 | "\n",
844 | "def nfl_correla(): \n",
845 | " # YOUR CODE HERE\n",
846 | " #raise NotImplementedError()\n",
847 | " \n",
848 | " nfl_df=pd.read_csv(\"assets/nfl.csv\")\n",
849 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n",
850 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n",
851 | " \n",
852 | " nfl_df.drop([0,5,10,15,20,25,30,35],0,inplace=True)\n",
853 | " \n",
854 | " cities.drop([13,22,27,30,31,32,33,34,35,36,37,38,39,40,41,42,43,45,46,47,49,50],0,inplace=True)\n",
855 | " \n",
856 | " l1 = []\n",
857 | " for i in nfl_df['team']:\n",
858 | " #i=i.rstrip()\n",
859 | " i=i.split('*')\n",
860 | " l1.append(i[0])\n",
861 | " nfl_df['team'] = l1\n",
862 | " \n",
863 | " l2 = []\n",
864 | " for i in nfl_df['team']:\n",
865 | " i=i.split('+')\n",
866 | " l2.append(i[0])\n",
867 | " nfl_df['team'] = l2\n",
868 | " \n",
869 | " nfl_df = nfl_df.head(32)\n",
870 | " \n",
871 | " nfl_df['team_ville'] = nfl_df['team']\n",
872 | " nfl_df['team_ville'] = nfl_df['team_ville'].map({'New England Patriots':'Boston',\n",
873 | " 'Miami Dolphins':'Miami–Fort Lauderdale',\n",
874 | " 'Buffalo Bills':'Buffalo',\n",
875 | " 'New York Jets':'New York City',\n",
876 | " 'Baltimore Ravens':'Baltimore',\n",
877 | " 'Pittsburgh Steelers':'Pittsburgh',\n",
878 | " 'Cleveland Browns':'Cleveland',\n",
879 | " 'Cincinnati Bengals':'Cincinnati',\n",
880 | " 'Houston Texans':'Houston',\n",
881 | " 'Indianapolis Colts':'Indianapolis',\n",
882 | " 'Tennessee Titans':'Nashville',\n",
883 | " 'Jacksonville Jaguars':'Jacksonville',\n",
884 | " 'Kansas City Chiefs':'Kansas City',\n",
885 | " 'Los Angeles Chargers':'Los Angeles',\n",
886 | " 'Denver Broncos':'Denver',\n",
887 | " 'Oakland Raiders':'San Francisco Bay Area',\n",
888 | " 'Dallas Cowboys':'Dallas–Fort Worth',\n",
889 | " 'Philadelphia Eagles':'Philadelphia',\n",
890 | " 'Washington Redskins':'Washington, D.C.',\n",
891 | " 'New York Giants':'New York City',\n",
892 | " 'Chicago Bears':'Chicago',\n",
893 | " 'Minnesota Vikings':'Minneapolis–Saint Paul',\n",
894 | " 'Green Bay Packers':'Green Bay',\n",
895 | " 'Detroit Lions':'Detroit',\n",
896 | " 'New Orleans Saints':'New Orleans',\n",
897 | " 'Carolina Panthers':'Charlotte',\n",
898 | " 'Atlanta Falcons':'Atlanta',\n",
899 | " 'Tampa Bay Buccaneers':'Tampa Bay Area',\n",
900 | " 'Los Angeles Rams':'Los Angeles',\n",
901 | " 'Seattle Seahawks':'Seattle',\n",
902 | " 'San Francisco 49ers':'San Francisco Bay Area',\n",
903 | " 'Arizona Cardinals':'Phoenix'}) \n",
904 | " \n",
905 | " df4 = pd.merge(nfl_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n",
906 | " \n",
907 | " \n",
908 | " df4['W'] = pd.to_numeric(df4['W'])\n",
909 | " df4['L'] = pd.to_numeric(df4['L'])\n",
910 | " df4['Population (2016 est.)[8]'] = pd.to_numeric(df4['Population (2016 est.)[8]'])\n",
911 | " he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n",
912 | " df4 = df4[he]\n",
913 | " df4['W/L'] = df4['W']/(df4['L']+df4['W'])\n",
914 | " df4 = df4.groupby('Metropolitan area').mean().reset_index()\n",
915 | " \n",
916 | " return df4\n",
917 | "\n",
918 | "\n",
919 | "\n",
920 | "\n",
921 | "\n",
922 | "def sports_team_performance():\n",
923 | " # YOUR CODE HERE\n",
924 | " #raise NotImplementedError()\n",
925 | " \n",
926 | " nfl = nfl_correla()\n",
927 | " nba = nba_correla()\n",
928 | " mlb = mlb_correla()\n",
929 | " nhl = nhl_correla()\n",
930 | "\n",
931 | " nba_nfl = pd.merge(nba,nfl, on='Metropolitan area')\n",
932 | " pval_nba_nfl = stats.ttest_rel(nba_nfl['W/L_x'],nba_nfl['W/L_y'])[1]\n",
933 | " nba_nhl = pd.merge(nba,nhl, on='Metropolitan area')\n",
934 | " pval_nba_nhl = stats.ttest_rel(nba_nhl['W/L_x'],nba_nhl['W/L_y'])[1]\n",
935 | " mlb_nfl = pd.merge(mlb,nfl, on='Metropolitan area')\n",
936 | " pval_mlb_nfl = stats.ttest_rel(mlb_nfl['W/L_x'],mlb_nfl['W/L_y'])[1]\n",
937 | " mlb_nhl = pd.merge(mlb,nhl, on='Metropolitan area')\n",
938 | " pval_mlb_nhl = stats.ttest_rel(mlb_nhl['W/L_x'],mlb_nhl['W/L_y'])[1]\n",
939 | " mlb_nba = pd.merge(mlb,nba, on='Metropolitan area')\n",
940 | " pval_mlb_nba = stats.ttest_rel(mlb_nba['W/L_x'],mlb_nba['W/L_y'])[1]\n",
941 | " nhl_nfl = pd.merge(nhl,nfl, on='Metropolitan area')\n",
942 | " pval_nhl_nfl = stats.ttest_rel(nhl_nfl['W/L_x'],nhl_nfl['W/L_y'])[1]\n",
943 | " \n",
944 | " pv = {'NFL': {\"NFL\": np.nan, 'NBA': pval_nba_nfl, 'NHL': pval_nhl_nfl, 'MLB': pval_mlb_nfl},\n",
945 | " 'NBA': {\"NFL\": pval_nba_nfl, 'NBA': np.nan, 'NHL': pval_nba_nhl, 'MLB': pval_mlb_nba},\n",
946 | " 'NHL': {\"NFL\": pval_nhl_nfl, 'NBA': pval_nba_nhl, 'NHL': np.nan, 'MLB': pval_mlb_nhl},\n",
947 | " 'MLB': {\"NFL\": pval_mlb_nfl, 'NBA': pval_mlb_nba, 'NHL': pval_mlb_nhl, 'MLB': np.nan}\n",
948 | " }\n",
949 | "\n",
950 | " \n",
951 | " # Note: p_values is a full dataframe, so df.loc[\"NFL\",\"NBA\"] should be the same as df.loc[\"NBA\",\"NFL\"] and\n",
952 | " # df.loc[\"NFL\",\"NFL\"] should return np.nan\n",
953 | " #sports = ['NFL', 'NBA', 'NHL', 'MLB']\n",
954 | " #p_values = pd.DataFrame({k:np.nan for k in sports}, index=sports)\n",
955 | " p_values = pd.DataFrame(pv)\n",
956 | " \n",
957 | " assert abs(p_values.loc[\"NBA\", \"NHL\"] - 0.02) <= 1e-2, \"The NBA-NHL p-value should be around 0.02\"\n",
958 | " assert abs(p_values.loc[\"MLB\", \"NFL\"] - 0.80) <= 1e-2, \"The MLB-NFL p-value should be around 0.80\"\n",
959 | " return p_values"
960 | ]
961 | },
962 | {
963 | "cell_type": "code",
964 | "execution_count": null,
965 | "metadata": {
966 | "deletable": false,
967 | "editable": false,
968 | "nbgrader": {
969 | "checksum": "2a596ab421a45cc01168d10e8fbb8f89",
970 | "grade": true,
971 | "grade_id": "cell-fb4b9cb5ff4570a6",
972 | "locked": true,
973 | "points": 20,
974 | "schema_version": 1,
975 | "solution": false
976 | }
977 | },
978 | "outputs": [],
979 | "source": []
980 | }
981 | ],
982 | "metadata": {
983 | "coursera": {
984 | "schema_names": [
985 | "mooc_adswpy_1_v2_assignment4"
986 | ]
987 | },
988 | "kernelspec": {
989 | "display_name": "Python 3",
990 | "language": "python",
991 | "name": "python3"
992 | },
993 | "language_info": {
994 | "codemirror_mode": {
995 | "name": "ipython",
996 | "version": 3
997 | },
998 | "file_extension": ".py",
999 | "mimetype": "text/x-python",
1000 | "name": "python",
1001 | "nbconvert_exporter": "python",
1002 | "pygments_lexer": "ipython3",
1003 | "version": "3.7.3"
1004 | }
1005 | },
1006 | "nbformat": 4,
1007 | "nbformat_minor": 4
1008 | }
1009 |
--------------------------------------------------------------------------------
/Assignment_3/assignment3.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "deletable": false,
7 | "editable": false,
8 | "nbgrader": {
9 | "checksum": "ab80976c194c2c1bfebb7f3a12fc4e58",
10 | "grade": false,
11 | "grade_id": "cell-018440ed2f1b6a62",
12 | "locked": true,
13 | "schema_version": 1,
14 | "solution": false
15 | }
16 | },
17 | "source": [
18 | "# Assignment 3\n",
19 | "All questions are weighted the same in this assignment. This assignment requires more individual learning then the last one did - you are encouraged to check out the [pandas documentation](http://pandas.pydata.org/pandas-docs/stable/) to find functions or methods you might not have used yet, or ask questions on [Stack Overflow](http://stackoverflow.com/) and tag them as pandas and python related. All questions are worth the same number of points except question 1 which is worth 17% of the assignment grade.\n",
20 | "\n",
21 | "**Note**: Questions 2-13 rely on your question 1 answer."
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 1,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "import pandas as pd\n",
31 | "import numpy as np\n",
32 | "import re\n",
33 | "\n",
34 | "# Filter all warnings. If you would like to see the warnings, please comment the two lines below.\n",
35 | "import warnings\n",
36 | "warnings.filterwarnings('ignore')"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {
42 | "deletable": false,
43 | "editable": false,
44 | "nbgrader": {
45 | "checksum": "68063b8b0783f3d8122b516e0cce5f45",
46 | "grade": false,
47 | "grade_id": "cell-7e5190c7ff1f2e42",
48 | "locked": true,
49 | "schema_version": 1,
50 | "solution": false
51 | }
52 | },
53 | "source": [
54 | "### Question 1\n",
55 | "Load the energy data from the file `assets/Energy Indicators.xls`, which is a list of indicators of [energy supply and renewable electricity production](assets/Energy%20Indicators.xls) from the [United Nations](http://unstats.un.org/unsd/environment/excel_file_tables/2013/Energy%20Indicators.xls) for the year 2013, and should be put into a DataFrame with the variable name of **Energy**.\n",
56 | "\n",
57 | "Keep in mind that this is an Excel file, and not a comma separated values file. Also, make sure to exclude the footer and header information from the datafile. The first two columns are unneccessary, so you should get rid of them, and you should change the column labels so that the columns are:\n",
58 | "\n",
59 | "`['Country', 'Energy Supply', 'Energy Supply per Capita', '% Renewable]`\n",
60 | "\n",
61 | "Convert `Energy Supply` to gigajoules (**Note: there are 1,000,000 gigajoules in a petajoule**). For all countries which have missing data (e.g. data with \"...\") make sure this is reflected as `np.NaN` values.\n",
62 | "\n",
63 | "Rename the following list of countries (for use in later questions):\n",
64 | "\n",
65 | "```\"Republic of Korea\": \"South Korea\",\n",
66 | "\"United States of America\": \"United States\",\n",
67 | "\"United Kingdom of Great Britain and Northern Ireland\": \"United Kingdom\",\n",
68 | "\"China, Hong Kong Special Administrative Region\": \"Hong Kong\"```\n",
69 | "\n",
70 | "There are also several countries with numbers and/or parenthesis in their name. Be sure to remove these, e.g. `'Bolivia (Plurinational State of)'` should be `'Bolivia'`. `'Switzerland17'` should be `'Switzerland'`.\n",
71 | "\n",
72 | "Next, load the GDP data from the file `assets/world_bank.csv`, which is a csv containing countries' GDP from 1960 to 2015 from [World Bank](http://data.worldbank.org/indicator/NY.GDP.MKTP.CD). Call this DataFrame **GDP**. \n",
73 | "\n",
74 | "Make sure to skip the header, and rename the following list of countries:\n",
75 | "\n",
76 | "```\"Korea, Rep.\": \"South Korea\", \n",
77 | "\"Iran, Islamic Rep.\": \"Iran\",\n",
78 | "\"Hong Kong SAR, China\": \"Hong Kong\"```\n",
79 | "\n",
80 | "Finally, load the [Sciamgo Journal and Country Rank data for Energy Engineering and Power Technology](http://www.scimagojr.com/countryrank.php?category=2102) from the file `assets/scimagojr-3.xlsx`, which ranks countries based on their journal contributions in the aforementioned area. Call this DataFrame **ScimEn**.\n",
81 | "\n",
82 | "Join the three datasets: GDP, Energy, and ScimEn into a new dataset (using the intersection of country names). Use only the last 10 years (2006-2015) of GDP data and only the top 15 countries by Scimagojr 'Rank' (Rank 1 through 15). \n",
83 | "\n",
84 | "The index of this DataFrame should be the name of the country, and the columns should be ['Rank', 'Documents', 'Citable documents', 'Citations', 'Self-citations',\n",
85 | " 'Citations per document', 'H index', 'Energy Supply',\n",
86 | " 'Energy Supply per Capita', '% Renewable', '2006', '2007', '2008',\n",
87 | " '2009', '2010', '2011', '2012', '2013', '2014', '2015'].\n",
88 | "\n",
89 | "*This function should return a DataFrame with 20 columns and 15 entries, and the rows of the DataFrame should be sorted by \"Rank\".*"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": 2,
95 | "metadata": {
96 | "deletable": false,
97 | "nbgrader": {
98 | "checksum": "57e040f07954f979910eddc0f489ffe5",
99 | "grade": false,
100 | "grade_id": "cell-bce4d6f2ecdd1297",
101 | "locked": false,
102 | "schema_version": 1,
103 | "solution": true
104 | }
105 | },
106 | "outputs": [],
107 | "source": [
108 | "def answer_one():\n",
109 | " # YOUR CODE HERE\n",
110 | " Energy = pd.read_excel(\"assets/Energy Indicators.xls\")\n",
111 | " Energy.drop(columns=['Unnamed: 0', 'Unnamed: 1'],inplace=True)\n",
112 | " Energy.drop(Energy.index[0:17],0,inplace=True)\n",
113 | " Energy.drop(Energy.index[227:],0,inplace=True)\n",
114 | " Energy.rename(columns={'Unnamed: 2': 'Country', 'Unnamed: 3': 'Energy Supply', 'Unnamed: 4': 'Energy Supply per Capita', 'Unnamed: 5': '% Renewable' }, inplace=True )\n",
115 | " Energy.replace({'...':np.nan}, inplace= True)\n",
116 | " Energy['Energy Supply'] = Energy['Energy Supply']*1000000\n",
117 | " \n",
118 | " l= []\n",
119 | " for i in Energy['Country']:\n",
120 | " i=i.split(' (')\n",
121 | " l.append(i[0])\n",
122 | " Energy['Country'] = l\n",
123 | " \n",
124 | " li = []\n",
125 | " for i in Energy['Country']:\n",
126 | " i = re.findall(\"[^0-9]+\", i)\n",
127 | " li.append(i[0])\n",
128 | " Energy['Country'] = li\n",
129 | " \n",
130 | " Energy.replace({\"Republic of Korea\": \"South Korea\",\n",
131 | " \"United States of America\": \"United States\",\n",
132 | " \"United Kingdom of Great Britain and Northern Ireland\": \"United Kingdom\",\n",
133 | " \"China, Hong Kong Special Administrative Region\": \"Hong Kong\"}, inplace= True)\n",
134 | " \n",
135 | " GDP = pd.read_csv(\"assets/world_bank.csv\")\n",
136 | " GDP.drop(GDP.index[0:3],0,inplace=True)\n",
137 | " GDP.replace({\"Korea, Rep.\": \"South Korea\", \"Iran, Islamic Rep.\": \"Iran\", \"Hong Kong SAR, China\": \"Hong Kong\"}, inplace=True)\n",
138 | " \n",
139 | " il = GDP.iloc[0]\n",
140 | " di = {}\n",
141 | " i = 0\n",
142 | " for d in GDP.columns:\n",
143 | " if type(il[i]) == np.float64:\n",
144 | " di[d] = str(int(il[i]))\n",
145 | " else:\n",
146 | " di[d] = il[i]\n",
147 | " i += 1\n",
148 | " \n",
149 | " GDP.rename(columns=di, inplace=True)\n",
150 | " GDP.drop(GDP.index[0:1],0,inplace=True)\n",
151 | " GDP.rename(columns={'Country Name': 'Country'}, inplace=True)\n",
152 | " \n",
153 | " ScimEn = pd.read_excel(\"assets/scimagojr-3.xlsx\")\n",
154 | " \n",
155 | " j1 = pd.merge(ScimEn,Energy)\n",
156 | " j2 = pd.merge(j1, GDP)\n",
157 | " j2.set_index('Country', inplace = True)\n",
158 | " j2 = j2[0:15]\n",
159 | " j2.drop(j2.columns[[np.arange(10,59)]], axis='columns', inplace = True)\n",
160 | " \n",
161 | " return j2\n",
162 | " \n",
163 | " #raise NotImplementedError()"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 3,
169 | "metadata": {
170 | "deletable": false,
171 | "editable": false,
172 | "nbgrader": {
173 | "checksum": "7bcc18b325d2935427ac2566cddd3661",
174 | "grade": true,
175 | "grade_id": "cell-780b5a4da845dbc3",
176 | "locked": true,
177 | "points": 5,
178 | "schema_version": 1,
179 | "solution": false
180 | }
181 | },
182 | "outputs": [],
183 | "source": [
184 | "assert type(answer_one()) == pd.DataFrame, \"Q1: You should return a DataFrame!\"\n",
185 | "\n",
186 | "assert answer_one().shape == (15,20), \"Q1: Your DataFrame should have 20 columns and 15 entries!\"\n"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 4,
192 | "metadata": {
193 | "deletable": false,
194 | "editable": false,
195 | "nbgrader": {
196 | "checksum": "e241830bcf3f63326b4c9cdf50be8f86",
197 | "grade": true,
198 | "grade_id": "cell-74b5f0b971379f64",
199 | "locked": true,
200 | "points": 10,
201 | "schema_version": 1,
202 | "solution": false
203 | }
204 | },
205 | "outputs": [],
206 | "source": [
207 | "# Cell for autograder.\n"
208 | ]
209 | },
210 | {
211 | "cell_type": "markdown",
212 | "metadata": {
213 | "deletable": false,
214 | "editable": false,
215 | "nbgrader": {
216 | "checksum": "596280cd22ed98c5540580c62954ec2f",
217 | "grade": false,
218 | "grade_id": "cell-babe0ff2a1fc6b17",
219 | "locked": true,
220 | "schema_version": 1,
221 | "solution": false
222 | }
223 | },
224 | "source": [
225 | "### Question 2\n",
226 | "The previous question joined three datasets then reduced this to just the top 15 entries. When you joined the datasets, but before you reduced this to the top 15 items, how many entries did you lose?\n",
227 | "\n",
228 | "*This function should return a single number.*"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 5,
234 | "metadata": {
235 | "deletable": false,
236 | "editable": false,
237 | "nbgrader": {
238 | "checksum": "c9a34da81c80126fd23ae2eac33f98f8",
239 | "grade": false,
240 | "grade_id": "cell-96f84e7b693bef63",
241 | "locked": true,
242 | "schema_version": 1,
243 | "solution": false
244 | }
245 | },
246 | "outputs": [
247 | {
248 | "data": {
249 | "text/html": [
250 | "\n"
257 | ],
258 | "text/plain": [
259 | ""
260 | ]
261 | },
262 | "metadata": {},
263 | "output_type": "display_data"
264 | }
265 | ],
266 | "source": [
267 | "%%HTML\n",
268 | ""
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": 6,
280 | "metadata": {
281 | "deletable": false,
282 | "nbgrader": {
283 | "checksum": "aeeb01fb73054269dd7b818d0945e2f7",
284 | "grade": false,
285 | "grade_id": "cell-b0c3202c000aada4",
286 | "locked": false,
287 | "schema_version": 1,
288 | "solution": true
289 | }
290 | },
291 | "outputs": [],
292 | "source": [
293 | "def answer_two():\n",
294 | " # YOUR CODE HERE\n",
295 | " \n",
296 | " Energy = pd.read_excel(\"assets/Energy Indicators.xls\")\n",
297 | " Energy.drop(columns=['Unnamed: 0', 'Unnamed: 1'],inplace=True)\n",
298 | " Energy.drop(Energy.index[0:17],0,inplace=True)\n",
299 | " Energy.drop(Energy.index[227:],0,inplace=True)\n",
300 | " Energy.rename(columns={'Unnamed: 2': 'Country', 'Unnamed: 3': 'Energy Supply', 'Unnamed: 4': 'Energy Supply per Capita', 'Unnamed: 5': '% Renewable' }, inplace=True )\n",
301 | " Energy.replace({'...':np.nan}, inplace= True)\n",
302 | " Energy['Energy Supply'] = Energy['Energy Supply']*1000000\n",
303 | " \n",
304 | " l= []\n",
305 | " for i in Energy['Country']:\n",
306 | " i=i.split(' (')\n",
307 | " l.append(i[0])\n",
308 | " Energy['Country'] = l\n",
309 | " \n",
310 | " li = []\n",
311 | " for i in Energy['Country']:\n",
312 | " i = re.findall(\"[^0-9]+\", i)\n",
313 | " li.append(i[0])\n",
314 | " Energy['Country'] = li\n",
315 | " \n",
316 | " Energy.replace({\"Republic of Korea\": \"South Korea\",\n",
317 | " \"United States of America\": \"United States\",\n",
318 | " \"United Kingdom of Great Britain and Northern Ireland\": \"United Kingdom\",\n",
319 | " \"China, Hong Kong Special Administrative Region\": \"Hong Kong\"}, inplace= True)\n",
320 | " \n",
321 | " GDP = pd.read_csv(\"assets/world_bank.csv\")\n",
322 | " GDP.drop(GDP.index[0:3],0,inplace=True)\n",
323 | " GDP.replace({\"Korea, Rep.\": \"South Korea\", \"Iran, Islamic Rep.\": \"Iran\", \"Hong Kong SAR, China\": \"Hong Kong\"}, inplace=True)\n",
324 | " \n",
325 | " il = GDP.iloc[0]\n",
326 | " di = {}\n",
327 | " i = 0\n",
328 | " for d in GDP.columns:\n",
329 | " if type(il[i]) == np.float64:\n",
330 | " di[d] = str(int(il[i]))\n",
331 | " else:\n",
332 | " di[d] = il[i]\n",
333 | " i += 1\n",
334 | " \n",
335 | " GDP.rename(columns=di, inplace=True)\n",
336 | " GDP.drop(GDP.index[0:1],0,inplace=True)\n",
337 | " GDP.rename(columns={'Country Name': 'Country'}, inplace=True)\n",
338 | " \n",
339 | " ScimEn = pd.read_excel(\"assets/scimagojr-3.xlsx\")\n",
340 | " \n",
341 | " ji = pd.merge(ScimEn,Energy)\n",
342 | " ji = pd.merge(ji, GDP)\n",
343 | " ji.set_index('Country', inplace = True)\n",
344 | " \n",
345 | " j1 = pd.merge(ScimEn,Energy, how=\"outer\")\n",
346 | " j2 = pd.merge(j1, GDP, how=\"outer\")\n",
347 | " j2.set_index('Country', inplace = True)\n",
348 | " \n",
349 | " diff = j2.shape[0] - ji.shape[0]\n",
350 | " \n",
351 | " return diff\n",
352 | " #raise NotImplementedError()"
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": 7,
358 | "metadata": {
359 | "deletable": false,
360 | "editable": false,
361 | "nbgrader": {
362 | "checksum": "19759b4a6c03f34b647f66d343952619",
363 | "grade": true,
364 | "grade_id": "cell-be24cfcaa87ab071",
365 | "locked": true,
366 | "points": 6.66,
367 | "schema_version": 1,
368 | "solution": false
369 | }
370 | },
371 | "outputs": [],
372 | "source": [
373 | "assert type(answer_two()) == int, \"Q2: You should return an int number!\"\n"
374 | ]
375 | },
376 | {
377 | "cell_type": "markdown",
378 | "metadata": {
379 | "deletable": false,
380 | "editable": false,
381 | "nbgrader": {
382 | "checksum": "5af1b4f99cd383263130f4c00442a133",
383 | "grade": false,
384 | "grade_id": "cell-2e54816014e48c18",
385 | "locked": true,
386 | "schema_version": 1,
387 | "solution": false
388 | }
389 | },
390 | "source": [
391 | "### Question 3\n",
392 | "What are the top 15 countries for average GDP over the last 10 years?\n",
393 | "\n",
394 | "*This function should return a Series named `avgGDP` with 15 countries and their average GDP sorted in descending order.*"
395 | ]
396 | },
397 | {
398 | "cell_type": "code",
399 | "execution_count": 8,
400 | "metadata": {
401 | "deletable": false,
402 | "nbgrader": {
403 | "checksum": "a3490fd71a46cecfa3da698e006fe729",
404 | "grade": false,
405 | "grade_id": "cell-8c3d74335c0d489a",
406 | "locked": false,
407 | "schema_version": 1,
408 | "solution": true
409 | }
410 | },
411 | "outputs": [],
412 | "source": [
413 | "def answer_three():\n",
414 | " # YOUR CODE HERE\n",
415 | " ng = np.arange(10,20)\n",
416 | " dat = answer_one().columns[[ng]]\n",
417 | " avgGDP = answer_one()[dat].mean(axis=1).sort_values(ascending=False)\n",
418 | " \n",
419 | " \n",
420 | " return avgGDP\n",
421 | " #raise NotImplementedError()"
422 | ]
423 | },
424 | {
425 | "cell_type": "code",
426 | "execution_count": 9,
427 | "metadata": {
428 | "deletable": false,
429 | "editable": false,
430 | "nbgrader": {
431 | "checksum": "2f9c90ee07138f94c027c5d2f907ab13",
432 | "grade": true,
433 | "grade_id": "cell-aaaa11ef7d26f4cf",
434 | "locked": true,
435 | "points": 6.66,
436 | "schema_version": 1,
437 | "solution": false
438 | }
439 | },
440 | "outputs": [],
441 | "source": [
442 | "assert type(answer_three()) == pd.Series, \"Q3: You should return a Series!\"\n"
443 | ]
444 | },
445 | {
446 | "cell_type": "markdown",
447 | "metadata": {
448 | "deletable": false,
449 | "editable": false,
450 | "locked": true
451 | },
452 | "source": [
453 | "### Question 4\n",
454 | "By how much had the GDP changed over the 10 year span for the country with the 6th largest average GDP?\n",
455 | "\n",
456 | "*This function should return a single number.*"
457 | ]
458 | },
459 | {
460 | "cell_type": "code",
461 | "execution_count": 30,
462 | "metadata": {
463 | "deletable": false,
464 | "nbgrader": {
465 | "checksum": "768a19bcc8adc6991fe5c757e95ba784",
466 | "grade": false,
467 | "grade_id": "cell-7f77d099e3e0bbee",
468 | "locked": false,
469 | "schema_version": 1,
470 | "solution": true
471 | }
472 | },
473 | "outputs": [],
474 | "source": [
475 | "def answer_four():\n",
476 | " # YOUR CODE HERE\n",
477 | " \n",
478 | " pg = answer_one().loc['United Kingdom', ['2006']]['2006']\n",
479 | " dg = answer_one().loc['United Kingdom', ['2015']]['2015']\n",
480 | " dkd = dg - pg\n",
481 | " \n",
482 | " return dkd\n",
483 | " #raise NotImplementedError()"
484 | ]
485 | },
486 | {
487 | "cell_type": "code",
488 | "execution_count": 11,
489 | "metadata": {
490 | "deletable": false,
491 | "editable": false,
492 | "nbgrader": {
493 | "checksum": "a7770c49cdfac4fa6368dfe8b39e6474",
494 | "grade": true,
495 | "grade_id": "cell-564dd8e5e24b0f83",
496 | "locked": true,
497 | "points": 6.66,
498 | "schema_version": 1,
499 | "solution": false
500 | }
501 | },
502 | "outputs": [],
503 | "source": [
504 | "# Cell for autograder.\n"
505 | ]
506 | },
507 | {
508 | "cell_type": "markdown",
509 | "metadata": {
510 | "deletable": false,
511 | "editable": false,
512 | "nbgrader": {
513 | "checksum": "ed6dbc94ff1b6268873413fee12741cd",
514 | "grade": false,
515 | "grade_id": "cell-617669111e38ca15",
516 | "locked": true,
517 | "schema_version": 1,
518 | "solution": false
519 | }
520 | },
521 | "source": [
522 | "### Question 5\n",
523 | "What is the mean energy supply per capita?\n",
524 | "\n",
525 | "*This function should return a single number.*"
526 | ]
527 | },
528 | {
529 | "cell_type": "code",
530 | "execution_count": 12,
531 | "metadata": {
532 | "deletable": false,
533 | "nbgrader": {
534 | "checksum": "cfd61a1735889e7ef20692ca0d28ddcb",
535 | "grade": false,
536 | "grade_id": "cell-58e79d558e982eef",
537 | "locked": false,
538 | "schema_version": 1,
539 | "solution": true
540 | }
541 | },
542 | "outputs": [],
543 | "source": [
544 | "def answer_five():\n",
545 | " # YOUR CODE HERE\n",
546 | " mpc = answer_one()['Energy Supply per Capita'].mean()\n",
547 | " \n",
548 | " return mpc\n",
549 | " #raise NotImplementedError()"
550 | ]
551 | },
552 | {
553 | "cell_type": "code",
554 | "execution_count": 13,
555 | "metadata": {
556 | "deletable": false,
557 | "editable": false,
558 | "nbgrader": {
559 | "checksum": "9d61bf22656baeecc77f63d54448590e",
560 | "grade": true,
561 | "grade_id": "cell-30cc66180851638c",
562 | "locked": true,
563 | "points": 6.66,
564 | "schema_version": 1,
565 | "solution": false
566 | }
567 | },
568 | "outputs": [],
569 | "source": [
570 | "# Cell for autograder.\n"
571 | ]
572 | },
573 | {
574 | "cell_type": "markdown",
575 | "metadata": {
576 | "deletable": false,
577 | "editable": false,
578 | "nbgrader": {
579 | "checksum": "2c7a163ae96f56317756456b0d9d695b",
580 | "grade": false,
581 | "grade_id": "cell-5c11ddd12fd71b3f",
582 | "locked": true,
583 | "schema_version": 1,
584 | "solution": false
585 | }
586 | },
587 | "source": [
588 | "### Question 6\n",
589 | "What country has the maximum % Renewable and what is the percentage?\n",
590 | "\n",
591 | "*This function should return a tuple with the name of the country and the percentage.*"
592 | ]
593 | },
594 | {
595 | "cell_type": "code",
596 | "execution_count": 14,
597 | "metadata": {
598 | "deletable": false,
599 | "nbgrader": {
600 | "checksum": "f8657f18c77eb0f752bca3cc48561da3",
601 | "grade": false,
602 | "grade_id": "cell-b6824b78e74619f9",
603 | "locked": false,
604 | "schema_version": 1,
605 | "solution": true
606 | }
607 | },
608 | "outputs": [],
609 | "source": [
610 | "def answer_six():\n",
611 | " # YOUR CODE HERE\n",
612 | " max_ren = answer_one()['% Renewable'].max()\n",
613 | " ind = answer_one().index[answer_one()['% Renewable'] == max_ren][0]\n",
614 | " return ind, max_ren\n",
615 | " #raise NotImplementedError()"
616 | ]
617 | },
618 | {
619 | "cell_type": "code",
620 | "execution_count": 15,
621 | "metadata": {
622 | "deletable": false,
623 | "editable": false,
624 | "nbgrader": {
625 | "checksum": "f8b28b0a824a3b76a6244c1273648ccd",
626 | "grade": true,
627 | "grade_id": "cell-2bd201c5c7bdd80f",
628 | "locked": true,
629 | "points": 6.66,
630 | "schema_version": 1,
631 | "solution": false
632 | }
633 | },
634 | "outputs": [],
635 | "source": [
636 | "assert type(answer_six()) == tuple, \"Q6: You should return a tuple!\"\n",
637 | "\n",
638 | "assert type(answer_six()[0]) == str, \"Q6: The first element in your result should be the name of the country!\"\n"
639 | ]
640 | },
641 | {
642 | "cell_type": "markdown",
643 | "metadata": {
644 | "deletable": false,
645 | "editable": false,
646 | "nbgrader": {
647 | "checksum": "a7b561a486a28ee4ba80a40715617c6d",
648 | "grade": false,
649 | "grade_id": "cell-ddf52a85ad3d5a11",
650 | "locked": true,
651 | "schema_version": 1,
652 | "solution": false
653 | }
654 | },
655 | "source": [
656 | "### Question 7\n",
657 | "Create a new column that is the ratio of Self-Citations to Total Citations. \n",
658 | "What is the maximum value for this new column, and what country has the highest ratio?\n",
659 | "\n",
660 | "*This function should return a tuple with the name of the country and the ratio.*"
661 | ]
662 | },
663 | {
664 | "cell_type": "code",
665 | "execution_count": 16,
666 | "metadata": {
667 | "deletable": false,
668 | "nbgrader": {
669 | "checksum": "e4b1cc5e3deefd24be992fbee18d0e74",
670 | "grade": false,
671 | "grade_id": "cell-a4f39737f38aa53c",
672 | "locked": false,
673 | "schema_version": 1,
674 | "solution": true
675 | }
676 | },
677 | "outputs": [],
678 | "source": [
679 | "def answer_seven():\n",
680 | " # YOUR CODE HERE\n",
681 | " new_df = answer_one().assign(ratio = answer_one()['Self-citations']/answer_one()['Citations'])\n",
682 | " max_ra = new_df['ratio'].max()\n",
683 | " con = new_df.index[new_df['ratio'] == max_ra][0]\n",
684 | " return con, max_ra\n",
685 | " #raise NotImplementedError()"
686 | ]
687 | },
688 | {
689 | "cell_type": "code",
690 | "execution_count": 17,
691 | "metadata": {
692 | "deletable": false,
693 | "editable": false,
694 | "nbgrader": {
695 | "checksum": "ca448b3a16b65a3a08533cac736cc4d9",
696 | "grade": true,
697 | "grade_id": "cell-b7a163e9231b88c9",
698 | "locked": true,
699 | "points": 6.66,
700 | "schema_version": 1,
701 | "solution": false
702 | }
703 | },
704 | "outputs": [],
705 | "source": [
706 | "assert type(answer_seven()) == tuple, \"Q7: You should return a tuple!\"\n",
707 | "\n",
708 | "assert type(answer_seven()[0]) == str, \"Q7: The first element in your result should be the name of the country!\"\n"
709 | ]
710 | },
711 | {
712 | "cell_type": "markdown",
713 | "metadata": {
714 | "deletable": false,
715 | "editable": false,
716 | "nbgrader": {
717 | "checksum": "7be7b86ee7467539dd88746818c78c0e",
718 | "grade": false,
719 | "grade_id": "cell-5c89296ab6f94218",
720 | "locked": true,
721 | "schema_version": 1,
722 | "solution": false
723 | }
724 | },
725 | "source": [
726 | "### Question 8\n",
727 | "\n",
728 | "Create a column that estimates the population using Energy Supply and Energy Supply per capita. \n",
729 | "What is the third most populous country according to this estimate?\n",
730 | "\n",
731 | "*This function should return the name of the country*"
732 | ]
733 | },
734 | {
735 | "cell_type": "code",
736 | "execution_count": 18,
737 | "metadata": {
738 | "deletable": false,
739 | "nbgrader": {
740 | "checksum": "9d733b2abf089b1931e2e792ff51d488",
741 | "grade": false,
742 | "grade_id": "cell-9ca58137846b84d6",
743 | "locked": false,
744 | "schema_version": 1,
745 | "solution": true
746 | }
747 | },
748 | "outputs": [],
749 | "source": [
750 | "def answer_eight():\n",
751 | " # YOUR CODE HERE\n",
752 | " Top15 = answer_one()\n",
753 | " Top15['pop'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']\n",
754 | " dpop = Top15['pop'].sort_values(ascending=False)[2]\n",
755 | " py = Top15.index[Top15['pop'] == dpop][0]\n",
756 | " return py\n",
757 | " #raise NotImplementedError()"
758 | ]
759 | },
760 | {
761 | "cell_type": "code",
762 | "execution_count": 19,
763 | "metadata": {
764 | "deletable": false,
765 | "editable": false,
766 | "nbgrader": {
767 | "checksum": "ba2ad50cf8198767b0bd2f75b8d97e87",
768 | "grade": true,
769 | "grade_id": "cell-3f3620c88df08b20",
770 | "locked": true,
771 | "points": 0,
772 | "schema_version": 1,
773 | "solution": false
774 | }
775 | },
776 | "outputs": [],
777 | "source": [
778 | "assert type(answer_eight()) == str, \"Q8: You should return the name of the country!\"\n"
779 | ]
780 | },
781 | {
782 | "cell_type": "markdown",
783 | "metadata": {
784 | "deletable": false,
785 | "editable": false,
786 | "nbgrader": {
787 | "checksum": "164cba98164a1045db7de10dd37115c8",
788 | "grade": false,
789 | "grade_id": "cell-2065207e66e5ec01",
790 | "locked": true,
791 | "schema_version": 1,
792 | "solution": false
793 | }
794 | },
795 | "source": [
796 | "### Question 9\n",
797 | "Create a column that estimates the number of citable documents per person. \n",
798 | "What is the correlation between the number of citable documents per capita and the energy supply per capita? Use the `.corr()` method, (Pearson's correlation).\n",
799 | "\n",
800 | "*This function should return a single number.*\n",
801 | "\n",
802 | "*(Optional: Use the built-in function `plot9()` to visualize the relationship between Energy Supply per Capita vs. Citable docs per Capita)*"
803 | ]
804 | },
805 | {
806 | "cell_type": "code",
807 | "execution_count": 20,
808 | "metadata": {
809 | "deletable": false,
810 | "nbgrader": {
811 | "checksum": "94e06c4c3a9618b94dbb0e86913b546c",
812 | "grade": false,
813 | "grade_id": "cell-033679ea456bfb9d",
814 | "locked": false,
815 | "schema_version": 1,
816 | "solution": true
817 | }
818 | },
819 | "outputs": [],
820 | "source": [
821 | "def answer_nine():\n",
822 | " # YOUR CODE HERE\n",
823 | " \n",
824 | " Top15 = answer_one()\n",
825 | " Top15 = Top15.assign(pop = Top15['Energy Supply']/Top15['Energy Supply per Capita'])\n",
826 | " Top15 = Top15.assign(Citable_docs_per_Capita = Top15['Citable documents'] / Top15['pop'])\n",
827 | " corre = Top15['Citable_docs_per_Capita'].corr(Top15['Energy Supply per Capita'])\n",
828 | " return corre\n",
829 | " #raise NotImplementedError()"
830 | ]
831 | },
832 | {
833 | "cell_type": "code",
834 | "execution_count": 21,
835 | "metadata": {
836 | "deletable": false,
837 | "editable": false,
838 | "nbgrader": {
839 | "checksum": "01a146bbcca0fa9c9c13e71ab52e710f",
840 | "grade": false,
841 | "grade_id": "cell-644824f6c708bf80",
842 | "locked": true,
843 | "schema_version": 1,
844 | "solution": false
845 | }
846 | },
847 | "outputs": [],
848 | "source": [
849 | "def plot9():\n",
850 | " import matplotlib as plt\n",
851 | " %matplotlib inline\n",
852 | " \n",
853 | " Top15 = answer_one()\n",
854 | " Top15['PopEst'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']\n",
855 | " Top15['Citable docs per Capita'] = Top15['Citable documents'] / Top15['PopEst']\n",
856 | " Top15.plot(x='Citable docs per Capita', y='Energy Supply per Capita', kind='scatter', xlim=[0, 0.0006])"
857 | ]
858 | },
859 | {
860 | "cell_type": "code",
861 | "execution_count": 22,
862 | "metadata": {
863 | "deletable": false,
864 | "editable": false,
865 | "nbgrader": {
866 | "checksum": "8dced1dde88b6877f89bdec482870476",
867 | "grade": true,
868 | "grade_id": "cell-3cb5c699065a4a20",
869 | "locked": true,
870 | "points": 6.66,
871 | "schema_version": 1,
872 | "solution": false
873 | }
874 | },
875 | "outputs": [],
876 | "source": [
877 | "assert answer_nine() >= -1. and answer_nine() <= 1., \"Q9: A valid correlation should between -1 to 1!\"\n"
878 | ]
879 | },
880 | {
881 | "cell_type": "markdown",
882 | "metadata": {
883 | "deletable": false,
884 | "editable": false,
885 | "nbgrader": {
886 | "checksum": "8af5ffad89be1e5c6292438724d6f8d5",
887 | "grade": false,
888 | "grade_id": "cell-ad09765e29b91157",
889 | "locked": true,
890 | "schema_version": 1,
891 | "solution": false
892 | }
893 | },
894 | "source": [
895 | "### Question 10\n",
896 | "Create a new column with a 1 if the country's % Renewable value is at or above the median for all countries in the top 15, and a 0 if the country's % Renewable value is below the median.\n",
897 | "\n",
898 | "*This function should return a series named `HighRenew` whose index is the country name sorted in ascending order of rank.*"
899 | ]
900 | },
901 | {
902 | "cell_type": "code",
903 | "execution_count": 23,
904 | "metadata": {
905 | "deletable": false,
906 | "nbgrader": {
907 | "checksum": "340c06bd50a9a027a2190674cfb981b9",
908 | "grade": false,
909 | "grade_id": "cell-0fdf60e64bf1a4f9",
910 | "locked": false,
911 | "schema_version": 1,
912 | "solution": true
913 | }
914 | },
915 | "outputs": [],
916 | "source": [
917 | "def answer_ten():\n",
918 | " # YOUR CODE HERE\n",
919 | " \n",
920 | " Top15 = answer_one()\n",
921 | " Top15['HighRenew'] = 1\n",
922 | " j = 0\n",
923 | " for i in Top15['% Renewable']:\n",
924 | " if i >= Top15['% Renewable'].median():\n",
925 | " Top15['HighRenew'].iloc[j] = 1\n",
926 | " else:\n",
927 | " Top15['HighRenew'].iloc[j] = 0\n",
928 | " j+=1\n",
929 | " return Top15['HighRenew']\n",
930 | " #raise NotImplementedError()"
931 | ]
932 | },
933 | {
934 | "cell_type": "code",
935 | "execution_count": 24,
936 | "metadata": {
937 | "deletable": false,
938 | "editable": false,
939 | "nbgrader": {
940 | "checksum": "f624e6996eca5796eaf27fb4d0593175",
941 | "grade": true,
942 | "grade_id": "cell-b29a631fd9a7730f",
943 | "locked": true,
944 | "points": 6.66,
945 | "schema_version": 1,
946 | "solution": false
947 | }
948 | },
949 | "outputs": [],
950 | "source": [
951 | "assert type(answer_ten()) == pd.Series, \"Q10: You should return a Series!\"\n"
952 | ]
953 | },
954 | {
955 | "cell_type": "markdown",
956 | "metadata": {
957 | "deletable": false,
958 | "editable": false,
959 | "nbgrader": {
960 | "checksum": "52f682e7066791c34cd3b2402855cbf5",
961 | "grade": false,
962 | "grade_id": "cell-677c51ba711c3af7",
963 | "locked": true,
964 | "schema_version": 1,
965 | "solution": false
966 | }
967 | },
968 | "source": [
969 | "### Question 11\n",
970 | "Use the following dictionary to group the Countries by Continent, then create a DataFrame that displays the sample size (the number of countries in each continent bin), and the sum, mean, and std deviation for the estimated population of each country.\n",
971 | "\n",
972 | "```python\n",
973 | "ContinentDict = {'China':'Asia', \n",
974 | " 'United States':'North America', \n",
975 | " 'Japan':'Asia', \n",
976 | " 'United Kingdom':'Europe', \n",
977 | " 'Russian Federation':'Europe', \n",
978 | " 'Canada':'North America', \n",
979 | " 'Germany':'Europe', \n",
980 | " 'India':'Asia',\n",
981 | " 'France':'Europe', \n",
982 | " 'South Korea':'Asia', \n",
983 | " 'Italy':'Europe', \n",
984 | " 'Spain':'Europe', \n",
985 | " 'Iran':'Asia',\n",
986 | " 'Australia':'Australia', \n",
987 | " 'Brazil':'South America'}\n",
988 | "```\n",
989 | "\n",
990 | "*This function should return a DataFrame with index named Continent `['Asia', 'Australia', 'Europe', 'North America', 'South America']` and columns `['size', 'sum', 'mean', 'std']`*"
991 | ]
992 | },
993 | {
994 | "cell_type": "code",
995 | "execution_count": 25,
996 | "metadata": {
997 | "deletable": false,
998 | "nbgrader": {
999 | "checksum": "b55846bc20cd01b0acbcb776504a766d",
1000 | "grade": false,
1001 | "grade_id": "cell-a5e0c0df27304f98",
1002 | "locked": false,
1003 | "schema_version": 1,
1004 | "solution": true
1005 | }
1006 | },
1007 | "outputs": [],
1008 | "source": [
1009 | "def answer_eleven():\n",
1010 | " # YOUR CODE HERE\n",
1011 | " \n",
1012 | " ContinentDict = {'China':'Asia', \n",
1013 | " 'United States':'North America', \n",
1014 | " 'Japan':'Asia', \n",
1015 | " 'United Kingdom':'Europe', \n",
1016 | " 'Russian Federation':'Europe', \n",
1017 | " 'Canada':'North America', \n",
1018 | " 'Germany':'Europe', \n",
1019 | " 'India':'Asia',\n",
1020 | " 'France':'Europe', \n",
1021 | " 'South Korea':'Asia', \n",
1022 | " 'Italy':'Europe', \n",
1023 | " 'Spain':'Europe', \n",
1024 | " 'Iran':'Asia',\n",
1025 | " 'Australia':'Australia', \n",
1026 | " 'Brazil':'South America'}\n",
1027 | " \n",
1028 | " j = 0\n",
1029 | " Top15 = answer_one()\n",
1030 | " new_df = pd.DataFrame(index=['Asia', 'Australia', 'Europe', 'North America', 'South America'], columns = ['size', 'sum', 'mean', 'std'])\n",
1031 | " #ind = answer_one().index\n",
1032 | " Top15['pop'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']\n",
1033 | " Top15['Continent'] = 'cont'\n",
1034 | " for v in ContinentDict.values():\n",
1035 | " Top15['Continent'].iloc[j] = v\n",
1036 | " j += 1\n",
1037 | " new_df['size'] = Top15.groupby(Top15['Continent']).size()\n",
1038 | " new_df['sum'] = Top15['pop'].groupby(Top15['Continent']).sum()\n",
1039 | " new_df['mean'] = Top15['pop'].groupby(Top15['Continent']).mean()\n",
1040 | " new_df['std'] = Top15['pop'].groupby(Top15['Continent']).std()\n",
1041 | " return new_df \n",
1042 | " #raise NotImplementedError()"
1043 | ]
1044 | },
1045 | {
1046 | "cell_type": "code",
1047 | "execution_count": 26,
1048 | "metadata": {
1049 | "deletable": false,
1050 | "editable": false,
1051 | "nbgrader": {
1052 | "checksum": "233318097d9c94fdc87395c967da14c4",
1053 | "grade": true,
1054 | "grade_id": "cell-18d1a07971b25743",
1055 | "locked": true,
1056 | "points": 6.66,
1057 | "schema_version": 1,
1058 | "solution": false
1059 | }
1060 | },
1061 | "outputs": [],
1062 | "source": [
1063 | "assert type(answer_eleven()) == pd.DataFrame, \"Q11: You should return a DataFrame!\"\n",
1064 | "\n",
1065 | "assert answer_eleven().shape[0] == 5, \"Q11: Wrong row numbers!\"\n",
1066 | "\n",
1067 | "assert answer_eleven().shape[1] == 4, \"Q11: Wrong column numbers!\"\n"
1068 | ]
1069 | },
1070 | {
1071 | "cell_type": "markdown",
1072 | "metadata": {
1073 | "deletable": false,
1074 | "editable": false,
1075 | "nbgrader": {
1076 | "checksum": "78d9dbb8ff6e0a1ac1e0d16e026a7d98",
1077 | "grade": false,
1078 | "grade_id": "cell-fa26f5c1eac39c6c",
1079 | "locked": true,
1080 | "schema_version": 1,
1081 | "solution": false
1082 | }
1083 | },
1084 | "source": [
1085 | "### Question 12\n",
1086 | "Cut % Renewable into 5 bins. Group Top15 by the Continent, as well as these new % Renewable bins. How many countries are in each of these groups?\n",
1087 | "\n",
1088 | "*This function should return a Series with a MultiIndex of `Continent`, then the bins for `% Renewable`. Do not include groups with no countries.*"
1089 | ]
1090 | },
1091 | {
1092 | "cell_type": "code",
1093 | "execution_count": 27,
1094 | "metadata": {
1095 | "deletable": false,
1096 | "nbgrader": {
1097 | "checksum": "27eb27ec7a3347530174f7047288a881",
1098 | "grade": false,
1099 | "grade_id": "cell-2ecd9a4076abd8f0",
1100 | "locked": false,
1101 | "schema_version": 1,
1102 | "solution": true
1103 | }
1104 | },
1105 | "outputs": [],
1106 | "source": [
1107 | "def answer_twelve():\n",
1108 | " # YOUR CODE HERE\n",
1109 | " ContinentDict = {'China':'Asia', \n",
1110 | " 'United States':'North America', \n",
1111 | " 'Japan':'Asia', \n",
1112 | " 'United Kingdom':'Europe', \n",
1113 | " 'Russian Federation':'Europe', \n",
1114 | " 'Canada':'North America', \n",
1115 | " 'Germany':'Europe', \n",
1116 | " 'India':'Asia',\n",
1117 | " 'France':'Europe', \n",
1118 | " 'South Korea':'Asia', \n",
1119 | " 'Italy':'Europe', \n",
1120 | " 'Spain':'Europe', \n",
1121 | " 'Iran':'Asia',\n",
1122 | " 'Australia':'Australia', \n",
1123 | " 'Brazil':'South America'}\n",
1124 | " \n",
1125 | " j = 0\n",
1126 | " Top15 = answer_one()\n",
1127 | " Top15['Continent'] = None\n",
1128 | " for v in ContinentDict.values():\n",
1129 | " Top15['Continent'].iloc[j] = v\n",
1130 | " j += 1\n",
1131 | " Top15['% Renewable'] = pd.cut(Top15['% Renewable'],bins=5)\n",
1132 | " new_renou = Top15.groupby(['Continent','% Renewable']).size()\n",
1133 | " return new_renou\n",
1134 | " #raise NotImplementedError()\n"
1135 | ]
1136 | },
1137 | {
1138 | "cell_type": "code",
1139 | "execution_count": null,
1140 | "metadata": {
1141 | "deletable": false,
1142 | "editable": false,
1143 | "nbgrader": {
1144 | "checksum": "79ed0cf577c7941dc357efd8bf1c5d26",
1145 | "grade": true,
1146 | "grade_id": "cell-6c665602d6babab9",
1147 | "locked": true,
1148 | "points": 6.66,
1149 | "schema_version": 1,
1150 | "solution": false
1151 | }
1152 | },
1153 | "outputs": [],
1154 | "source": [
1155 | "assert type(answer_twelve()) == pd.Series, \"Q12: You should return a Series!\"\n",
1156 | "\n",
1157 | "assert len(answer_twelve()) == 9, \"Q12: Wrong result numbers!\"\n"
1158 | ]
1159 | },
1160 | {
1161 | "cell_type": "markdown",
1162 | "metadata": {
1163 | "deletable": false,
1164 | "editable": false,
1165 | "nbgrader": {
1166 | "checksum": "bdfd9b1bb897304b6337fdc47a05967c",
1167 | "grade": false,
1168 | "grade_id": "cell-4209a10d8f208739",
1169 | "locked": true,
1170 | "schema_version": 1,
1171 | "solution": false
1172 | }
1173 | },
1174 | "source": [
1175 | "### Question 13\n",
1176 | "Convert the Population Estimate series to a string with thousands separator (using commas). Use all significant digits (do not round the results).\n",
1177 | "\n",
1178 | "e.g. 12345678.90 -> 12,345,678.90\n",
1179 | "\n",
1180 | "*This function should return a series `PopEst` whose index is the country name and whose values are the population estimate string*"
1181 | ]
1182 | },
1183 | {
1184 | "cell_type": "code",
1185 | "execution_count": 28,
1186 | "metadata": {
1187 | "deletable": false,
1188 | "nbgrader": {
1189 | "checksum": "1efd09964334b7d6100d81d4b3ead3e9",
1190 | "grade": false,
1191 | "grade_id": "cell-58eb0ee0921d93fb",
1192 | "locked": false,
1193 | "schema_version": 1,
1194 | "solution": true
1195 | }
1196 | },
1197 | "outputs": [],
1198 | "source": [
1199 | "def answer_thirteen():\n",
1200 | " # YOUR CODE HERE\n",
1201 | " Top15 = answer_one()\n",
1202 | " Top15['pop'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']\n",
1203 | " Top15['PopEst'] = Top15['pop'].map('{:,}'.format)\n",
1204 | "\n",
1205 | " return Top15['PopEst']\n",
1206 | " #raise NotImplementedError()"
1207 | ]
1208 | },
1209 | {
1210 | "cell_type": "code",
1211 | "execution_count": 29,
1212 | "metadata": {
1213 | "deletable": false,
1214 | "editable": false,
1215 | "nbgrader": {
1216 | "checksum": "e014781df77c7edab2a181d2d943be8f",
1217 | "grade": true,
1218 | "grade_id": "cell-10fee7228cf973f6",
1219 | "locked": true,
1220 | "points": 6.74,
1221 | "schema_version": 1,
1222 | "solution": false
1223 | }
1224 | },
1225 | "outputs": [],
1226 | "source": [
1227 | "assert type(answer_thirteen()) == pd.Series, \"Q13: You should return a Series!\"\n",
1228 | "\n",
1229 | "assert len(answer_thirteen()) == 15, \"Q13: Wrong result numbers!\"\n"
1230 | ]
1231 | },
1232 | {
1233 | "cell_type": "markdown",
1234 | "metadata": {
1235 | "deletable": false,
1236 | "editable": false,
1237 | "nbgrader": {
1238 | "checksum": "61562b9b667bd5efbcec0dcd7becbfaa",
1239 | "grade": false,
1240 | "grade_id": "cell-998b62d4f390ef15",
1241 | "locked": true,
1242 | "schema_version": 1,
1243 | "solution": false
1244 | }
1245 | },
1246 | "source": [
1247 | "### Optional\n",
1248 | "\n",
1249 | "Use the built in function `plot_optional()` to see an example visualization."
1250 | ]
1251 | },
1252 | {
1253 | "cell_type": "code",
1254 | "execution_count": null,
1255 | "metadata": {
1256 | "deletable": false,
1257 | "editable": false,
1258 | "nbgrader": {
1259 | "checksum": "479786c97cb5f34d07231c6d7c602a47",
1260 | "grade": false,
1261 | "grade_id": "cell-741fd55ea57cd40a",
1262 | "locked": true,
1263 | "schema_version": 1,
1264 | "solution": false
1265 | }
1266 | },
1267 | "outputs": [],
1268 | "source": [
1269 | "def plot_optional():\n",
1270 | " import matplotlib as plt\n",
1271 | " %matplotlib inline\n",
1272 | " Top15 = answer_one()\n",
1273 | " ax = Top15.plot(x='Rank', y='% Renewable', kind='scatter', \n",
1274 | " c=['#e41a1c','#377eb8','#e41a1c','#4daf4a','#4daf4a','#377eb8','#4daf4a','#e41a1c',\n",
1275 | " '#4daf4a','#e41a1c','#4daf4a','#4daf4a','#e41a1c','#dede00','#ff7f00'], \n",
1276 | " xticks=range(1,16), s=6*Top15['2014']/10**10, alpha=.75, figsize=[16,6]);\n",
1277 | "\n",
1278 | " for i, txt in enumerate(Top15.index):\n",
1279 | " ax.annotate(txt, [Top15['Rank'][i], Top15['% Renewable'][i]], ha='center')\n",
1280 | "\n",
1281 | " print(\"This is an example of a visualization that can be created to help understand the data. \\\n",
1282 | "This is a bubble chart showing % Renewable vs. Rank. The size of the bubble corresponds to the countries' \\\n",
1283 | "2014 GDP, and the color corresponds to the continent.\")"
1284 | ]
1285 | }
1286 | ],
1287 | "metadata": {
1288 | "anaconda-cloud": {},
1289 | "coursera": {
1290 | "schema_names": [
1291 | "mooc_adswpy_1_v2_assignment3"
1292 | ]
1293 | },
1294 | "kernelspec": {
1295 | "display_name": "Python 3",
1296 | "language": "python",
1297 | "name": "python3"
1298 | },
1299 | "language_info": {
1300 | "codemirror_mode": {
1301 | "name": "ipython",
1302 | "version": 3
1303 | },
1304 | "file_extension": ".py",
1305 | "mimetype": "text/x-python",
1306 | "name": "python",
1307 | "nbconvert_exporter": "python",
1308 | "pygments_lexer": "ipython3",
1309 | "version": "3.7.6"
1310 | }
1311 | },
1312 | "nbformat": 4,
1313 | "nbformat_minor": 4
1314 | }
1315 |
--------------------------------------------------------------------------------