├── Assignment_2 ├── assets │ └── NIS-PUF17-DUG.pdf ├── assignment2.ipynb └── .ipynb_checkpoints │ └── assignment2-checkpoint.ipynb ├── Assignment_3 ├── assets │ ├── scimagojr-3.xlsx │ └── Energy Indicators.xls └── assignment3.ipynb ├── README.md ├── Assignment_1 ├── assets │ └── grades.txt ├── assignment1.ipynb └── .ipynb_checkpoints │ └── assignment1-checkpoint.ipynb └── Assignment_4 ├── assets ├── mlb.csv ├── nba.csv ├── nhl.csv └── nfl.csv └── assignment4.ipynb /Assignment_2/assets/NIS-PUF17-DUG.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tchagau/Introduction-to-Data-Science-in-Python/HEAD/Assignment_2/assets/NIS-PUF17-DUG.pdf -------------------------------------------------------------------------------- /Assignment_3/assets/scimagojr-3.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tchagau/Introduction-to-Data-Science-in-Python/HEAD/Assignment_3/assets/scimagojr-3.xlsx -------------------------------------------------------------------------------- /Assignment_3/assets/Energy Indicators.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tchagau/Introduction-to-Data-Science-in-Python/HEAD/Assignment_3/assets/Energy Indicators.xls -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction-to-Data-Science-in-Python 2 | This repository includes course assignments of Introduction to Data Science in Python on coursera by university of michigan 3 | -------------------------------------------------------------------------------- /Assignment_1/assets/grades.txt: -------------------------------------------------------------------------------- 1 | Ronald Mayr: A 2 | Bell Kassulke: B 3 | Jacqueline Rupp: A 4 | Alexander Zeller: C 5 | Valentina Denk: C 6 | Simon Loidl: B 7 | Elias Jovanovic: B 8 | Stefanie Weninger: A 9 | Fabian Peer: C 10 | Hakim Botros: B 11 | Emilie Lorentsen: B 12 | Herman Karlsen: C 13 | Nathalie Delacruz: C 14 | Casey Hartman: C 15 | Lily Walker : A 16 | Gerard Wang: C 17 | Tony Mcdowell: C 18 | Jake Wood: B 19 | Fatemeh Akhtar: B 20 | Kim Weston: B 21 | Nicholas Beatty: A 22 | Kirsten Williams: C 23 | Vaishali Surana: C 24 | Coby Mccormack: C 25 | Yasmin Dar: B 26 | Romy Donnelly: A 27 | Viswamitra Upandhye: B 28 | Kendrick Hilpert: A 29 | Killian Kaufman: B 30 | Elwood Page: B 31 | Mukti Patel: A 32 | Emily Lesch: C 33 | Elodie Booker: B 34 | Jedd Kim: A 35 | Annabel Davies: A 36 | Adnan Chen: B 37 | Jonathan Berg: C 38 | Hank Spinka: B 39 | Agnes Schneider: C 40 | Kimberly Green: A 41 | Lola-Rose Coates: C 42 | Rose Christiansen: C 43 | Shirley Hintz: C 44 | Hannah Bayer: B 45 | -------------------------------------------------------------------------------- /Assignment_4/assets/mlb.csv: -------------------------------------------------------------------------------- 1 | team,W,L,W-L%,GB,year,League 2 | Boston Red Sox,108,54,0.667,--,2018,MLB 3 | New York Yankees,100,62,0.617,8.0,2018,MLB 4 | Tampa Bay Rays,90,72,0.556,18.0,2018,MLB 5 | Toronto Blue Jays,73,89,0.451,35.0,2018,MLB 6 | Baltimore Orioles,47,115,0.29,61.0,2018,MLB 7 | Cleveland Indians,91,71,0.562,--,2018,MLB 8 | Minnesota Twins,78,84,0.48100000000000004,13.0,2018,MLB 9 | Detroit Tigers,64,98,0.395,27.0,2018,MLB 10 | Chicago White Sox,62,100,0.38299999999999995,29.0,2018,MLB 11 | Kansas City Royals,58,104,0.358,33.0,2018,MLB 12 | Houston Astros,103,59,0.636,--,2018,MLB 13 | Oakland Athletics,97,65,0.599,6.0,2018,MLB 14 | Seattle Mariners,89,73,0.5489999999999999,14.0,2018,MLB 15 | Los Angeles Angels,80,82,0.494,23.0,2018,MLB 16 | Texas Rangers,67,95,0.414,36.0,2018,MLB 17 | Atlanta Braves,90,72,0.556,--,2018,MLB 18 | Washington Nationals,82,80,0.506,8.0,2018,MLB 19 | Philadelphia Phillies,80,82,0.494,10.0,2018,MLB 20 | New York Mets,77,85,0.475,13.0,2018,MLB 21 | Miami Marlins,63,98,0.391,26.5,2018,MLB 22 | Milwaukee Brewers,96,67,0.589,--,2018,MLB 23 | Chicago Cubs,95,68,0.583,1.0,2018,MLB 24 | St. Louis Cardinals,88,74,0.5429999999999999,7.5,2018,MLB 25 | Pittsburgh Pirates,82,79,0.509,13.0,2018,MLB 26 | Cincinnati Reds,67,95,0.414,28.5,2018,MLB 27 | Los Angeles Dodgers,92,71,0.564,--,2018,MLB 28 | Colorado Rockies,91,72,0.5579999999999999,1.0,2018,MLB 29 | Arizona Diamondbacks,82,80,0.506,9.5,2018,MLB 30 | San Francisco Giants,73,89,0.451,18.5,2018,MLB 31 | San Diego Padres,66,96,0.40700000000000003,25.5,2018,MLB 32 | Boston Red Sox,93,69,0.574,--,2017,MLB 33 | New York Yankees,91,71,0.562,2.0,2017,MLB 34 | Tampa Bay Rays,80,82,0.494,13.0,2017,MLB 35 | Toronto Blue Jays,76,86,0.469,17.0,2017,MLB 36 | Baltimore Orioles,75,87,0.46299999999999997,18.0,2017,MLB 37 | Cleveland Indians,102,60,0.63,--,2017,MLB 38 | Minnesota Twins,85,77,0.525,17.0,2017,MLB 39 | Kansas City Royals,80,82,0.494,22.0,2017,MLB 40 | Chicago White Sox,67,95,0.414,35.0,2017,MLB 41 | Detroit Tigers,64,98,0.395,38.0,2017,MLB 42 | Houston Astros,101,61,0.623,--,2017,MLB 43 | Los Angeles Angels,80,82,0.494,21.0,2017,MLB 44 | Seattle Mariners,78,84,0.48100000000000004,23.0,2017,MLB 45 | Texas Rangers,78,84,0.48100000000000004,23.0,2017,MLB 46 | Oakland Athletics,75,87,0.46299999999999997,26.0,2017,MLB 47 | Washington Nationals,97,65,0.599,--,2017,MLB 48 | Miami Marlins,77,85,0.475,20.0,2017,MLB 49 | Atlanta Braves,72,90,0.444,25.0,2017,MLB 50 | New York Mets,70,92,0.43200000000000005,27.0,2017,MLB 51 | Philadelphia Phillies,66,96,0.40700000000000003,31.0,2017,MLB 52 | Chicago Cubs,92,70,0.568,--,2017,MLB 53 | Milwaukee Brewers,86,76,0.531,6.0,2017,MLB 54 | St. Louis Cardinals,83,79,0.512,9.0,2017,MLB 55 | Pittsburgh Pirates,75,87,0.46299999999999997,17.0,2017,MLB 56 | Cincinnati Reds,68,94,0.42,24.0,2017,MLB 57 | Los Angeles Dodgers,104,58,0.642,--,2017,MLB 58 | Arizona Diamondbacks,93,69,0.574,11.0,2017,MLB 59 | Colorado Rockies,87,75,0.537,17.0,2017,MLB 60 | San Diego Padres,71,91,0.43799999999999994,33.0,2017,MLB 61 | San Francisco Giants,64,98,0.395,40.0,2017,MLB 62 | Boston Red Sox,93,69,0.574,--,2016,MLB 63 | Baltimore Orioles,89,73,0.5489999999999999,4.0,2016,MLB 64 | Toronto Blue Jays,89,73,0.5489999999999999,4.0,2016,MLB 65 | New York Yankees,84,78,0.519,9.0,2016,MLB 66 | Tampa Bay Rays,68,94,0.42,25.0,2016,MLB 67 | Cleveland Indians,94,67,0.584,--,2016,MLB 68 | Detroit Tigers,86,75,0.534,8.0,2016,MLB 69 | Kansas City Royals,81,81,0.5,13.5,2016,MLB 70 | Chicago White Sox,78,84,0.48100000000000004,16.5,2016,MLB 71 | Minnesota Twins,59,103,0.364,35.5,2016,MLB 72 | Texas Rangers,95,67,0.586,--,2016,MLB 73 | Seattle Mariners,86,76,0.531,9.0,2016,MLB 74 | Houston Astros,84,78,0.519,11.0,2016,MLB 75 | Los Angeles Angels,74,88,0.457,21.0,2016,MLB 76 | Oakland Athletics,69,93,0.426,26.0,2016,MLB 77 | Washington Nationals,95,67,0.586,--,2016,MLB 78 | New York Mets,87,75,0.537,8.0,2016,MLB 79 | Miami Marlins,79,82,0.491,15.5,2016,MLB 80 | Philadelphia Phillies,71,91,0.43799999999999994,24.0,2016,MLB 81 | Atlanta Braves,68,93,0.42200000000000004,26.5,2016,MLB 82 | Chicago Cubs,103,58,0.64,--,2016,MLB 83 | St. Louis Cardinals,86,76,0.531,17.5,2016,MLB 84 | Pittsburgh Pirates,78,83,0.484,25.0,2016,MLB 85 | Milwaukee Brewers,73,89,0.451,30.5,2016,MLB 86 | Cincinnati Reds,68,94,0.42,35.5,2016,MLB 87 | Los Angeles Dodgers,91,71,0.562,--,2016,MLB 88 | San Francisco Giants,87,75,0.537,4.0,2016,MLB 89 | Colorado Rockies,75,87,0.46299999999999997,16.0,2016,MLB 90 | Arizona Diamondbacks,69,93,0.426,22.0,2016,MLB 91 | San Diego Padres,68,94,0.42,23.0,2016,MLB 92 | Toronto Blue Jays,93,69,0.574,--,2015,MLB 93 | New York Yankees,87,75,0.537,6.0,2015,MLB 94 | Baltimore Orioles,81,81,0.5,12.0,2015,MLB 95 | Tampa Bay Rays,80,82,0.494,13.0,2015,MLB 96 | Boston Red Sox,78,84,0.48100000000000004,15.0,2015,MLB 97 | Kansas City Royals,95,67,0.586,--,2015,MLB 98 | Minnesota Twins,83,79,0.512,12.0,2015,MLB 99 | Cleveland Indians,81,80,0.503,13.5,2015,MLB 100 | Chicago White Sox,76,86,0.469,19.0,2015,MLB 101 | Detroit Tigers,74,87,0.46,20.5,2015,MLB 102 | Texas Rangers,88,74,0.5429999999999999,--,2015,MLB 103 | Houston Astros,86,76,0.531,2.0,2015,MLB 104 | Los Angeles Angels of Anaheim,85,77,0.525,3.0,2015,MLB 105 | Seattle Mariners,76,86,0.469,12.0,2015,MLB 106 | Oakland Athletics,68,94,0.42,20.0,2015,MLB 107 | New York Mets,90,72,0.556,--,2015,MLB 108 | Washington Nationals,83,79,0.512,7.0,2015,MLB 109 | Miami Marlins,71,91,0.43799999999999994,19.0,2015,MLB 110 | Atlanta Braves,67,95,0.414,23.0,2015,MLB 111 | Philadelphia Phillies,63,99,0.389,27.0,2015,MLB 112 | St. Louis Cardinals,100,62,0.617,--,2015,MLB 113 | Pittsburgh Pirates,98,64,0.605,2.0,2015,MLB 114 | Chicago Cubs,97,65,0.599,3.0,2015,MLB 115 | Milwaukee Brewers,68,94,0.42,32.0,2015,MLB 116 | Cincinnati Reds,64,98,0.395,36.0,2015,MLB 117 | Los Angeles Dodgers,92,70,0.568,--,2015,MLB 118 | San Francisco Giants,84,78,0.519,8.0,2015,MLB 119 | Arizona Diamondbacks,79,83,0.488,13.0,2015,MLB 120 | San Diego Padres,74,88,0.457,18.0,2015,MLB 121 | Colorado Rockies,68,94,0.42,24.0,2015,MLB 122 | Baltimore Orioles,96,66,0.593,--,2014,MLB 123 | New York Yankees,84,78,0.519,12.0,2014,MLB 124 | Toronto Blue Jays,83,79,0.512,13.0,2014,MLB 125 | Tampa Bay Rays,77,85,0.475,19.0,2014,MLB 126 | Boston Red Sox,71,91,0.43799999999999994,25.0,2014,MLB 127 | Detroit Tigers,90,72,0.556,--,2014,MLB 128 | Kansas City Royals,89,73,0.5489999999999999,1.0,2014,MLB 129 | Cleveland Indians,85,77,0.525,5.0,2014,MLB 130 | Chicago White Sox,73,89,0.451,17.0,2014,MLB 131 | Minnesota Twins,70,92,0.43200000000000005,20.0,2014,MLB 132 | Los Angeles Angels of Anaheim,98,64,0.605,--,2014,MLB 133 | Oakland Athletics,88,74,0.5429999999999999,10.0,2014,MLB 134 | Seattle Mariners,87,75,0.537,11.0,2014,MLB 135 | Houston Astros,70,92,0.43200000000000005,28.0,2014,MLB 136 | Texas Rangers,67,95,0.414,31.0,2014,MLB 137 | Washington Nationals,96,66,0.593,--,2014,MLB 138 | Atlanta Braves,79,83,0.488,17.0,2014,MLB 139 | New York Mets,79,83,0.488,17.0,2014,MLB 140 | Miami Marlins,77,85,0.475,19.0,2014,MLB 141 | Philadelphia Phillies,73,89,0.451,23.0,2014,MLB 142 | St. Louis Cardinals,90,72,0.556,--,2014,MLB 143 | Pittsburgh Pirates,88,74,0.5429999999999999,2.0,2014,MLB 144 | Milwaukee Brewers,82,80,0.506,8.0,2014,MLB 145 | Cincinnati Reds,76,86,0.469,14.0,2014,MLB 146 | Chicago Cubs,73,89,0.451,17.0,2014,MLB 147 | Los Angeles Dodgers,94,68,0.58,--,2014,MLB 148 | San Francisco Giants,88,74,0.5429999999999999,6.0,2014,MLB 149 | San Diego Padres,77,85,0.475,17.0,2014,MLB 150 | Colorado Rockies,66,96,0.40700000000000003,28.0,2014,MLB 151 | Arizona Diamondbacks,64,98,0.395,30.0,2014,MLB 152 | -------------------------------------------------------------------------------- /Assignment_1/assignment1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": false, 7 | "editable": false, 8 | "nbgrader": { 9 | "checksum": "a7460f3e3c42534125a0802936889559", 10 | "grade": false, 11 | "grade_id": "cell-fa48e7f1b94baa5b", 12 | "locked": true, 13 | "schema_version": 1, 14 | "solution": false 15 | } 16 | }, 17 | "source": [ 18 | "# Assignment 1\n", 19 | "For this assignment you are welcomed to use other regex resources such a regex \"cheat sheets\" you find on the web.\n", 20 | "\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "deletable": false, 27 | "editable": false, 28 | "nbgrader": { 29 | "checksum": "d17f561e3c6c08092810b982d085f5be", 30 | "grade": false, 31 | "grade_id": "cell-d4da7eb9acee2a6d", 32 | "locked": true, 33 | "schema_version": 1, 34 | "solution": false 35 | } 36 | }, 37 | "source": [ 38 | "Before start working on the problems, here is a small example to help you understand how to write your own answers. In short, the solution should be written within the function body given, and the final result should be returned. Then the autograder will try to call the function and validate your returned result accordingly. " 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 1, 44 | "metadata": { 45 | "deletable": false, 46 | "editable": false, 47 | "nbgrader": { 48 | "checksum": "7eeb5e7d0f0e0137caed9f3b5cb925b1", 49 | "grade": false, 50 | "grade_id": "cell-4a96535829224b3f", 51 | "locked": true, 52 | "schema_version": 1, 53 | "solution": false 54 | } 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "def example_word_count():\n", 59 | " # This example question requires counting words in the example_string below.\n", 60 | " example_string = \"Amy is 5 years old\"\n", 61 | " \n", 62 | " # YOUR CODE HERE.\n", 63 | " # You should write your solution here, and return your result, you can comment out or delete the\n", 64 | " # NotImplementedError below.\n", 65 | " result = example_string.split(\" \")\n", 66 | " return len(result)\n", 67 | "\n", 68 | " #raise NotImplementedError()" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## Part A\n", 76 | "\n", 77 | "Find a list of all of the names in the following string using regex." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 2, 83 | "metadata": { 84 | "deletable": false, 85 | "nbgrader": { 86 | "checksum": "29bc8c161c0e246c1e3ef4820cc164f7", 87 | "grade": false, 88 | "grade_id": "names", 89 | "locked": false, 90 | "schema_version": 1, 91 | "solution": true 92 | } 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "import re\n", 97 | "def names():\n", 98 | " simple_string = \"\"\"Amy is 5 years old, and her sister Mary is 2 years old. \n", 99 | " Ruth and Peter, their parents, have 3 kids.\"\"\"\n", 100 | " \n", 101 | " return re.findall('[A-Z][a-z]+', simple_string)\n", 102 | "\n", 103 | " # YOUR CODE HERE\n", 104 | " # raise NotImplementedError()" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 3, 110 | "metadata": { 111 | "deletable": false, 112 | "editable": false, 113 | "nbgrader": { 114 | "checksum": "ed5c09ac57f7d98130d5abc557f6d6c4", 115 | "grade": true, 116 | "grade_id": "correct_names", 117 | "locked": false, 118 | "points": 1, 119 | "schema_version": 1, 120 | "solution": false 121 | } 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "assert len(names()) == 4, \"There are four names in the simple_string\"\n" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": { 131 | "deletable": false, 132 | "editable": false, 133 | "nbgrader": { 134 | "checksum": "77b3d100c47e9e41d98f82dfeb7eba9c", 135 | "grade": false, 136 | "grade_id": "cell-ed64e3464ddd7ba7", 137 | "locked": true, 138 | "schema_version": 1, 139 | "solution": false 140 | } 141 | }, 142 | "source": [ 143 | "## Part B\n", 144 | "\n", 145 | "The dataset file in [assets/grades.txt](assets/grades.txt) contains a line separated list of people with their grade in \n", 146 | "a class. Create a regex to generate a list of just those students who received a B in the course." 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 4, 152 | "metadata": { 153 | "deletable": false, 154 | "nbgrader": { 155 | "checksum": "e977a1df674e9fa684e6d172aec92824", 156 | "grade": false, 157 | "grade_id": "grades", 158 | "locked": false, 159 | "schema_version": 1, 160 | "solution": true 161 | } 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "import re\n", 166 | "def grades():\n", 167 | " with open (\"assets/grades.txt\", \"r\") as file:\n", 168 | " grades = file.read()\n", 169 | " return re.findall('([A-Z]\\S+ [A-Z]\\S+): B', grades)\n", 170 | " # YOUR CODE HERE\n", 171 | " #raise NotImplementedError()" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 5, 177 | "metadata": { 178 | "deletable": false, 179 | "editable": false, 180 | "nbgrader": { 181 | "checksum": "e0bcc452d60fc45259e58d3116d25477", 182 | "grade": true, 183 | "grade_id": "correct_grades", 184 | "locked": false, 185 | "points": 1, 186 | "schema_version": 1, 187 | "solution": false 188 | } 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "assert len(grades()) == 16\n" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": { 198 | "deletable": false, 199 | "editable": false, 200 | "nbgrader": { 201 | "checksum": "36e3e2a3a3e29fa7b823d22476392320", 202 | "grade": false, 203 | "grade_id": "cell-e253518e37d33f0c", 204 | "locked": true, 205 | "schema_version": 1, 206 | "solution": false 207 | } 208 | }, 209 | "source": [ 210 | "## Part C\n", 211 | "\n", 212 | "Consider the standard web log file in [assets/logdata.txt](assets/logdata.txt). This file records the access a user makes when visiting a web page (like this one!). Each line of the log has the following items:\n", 213 | "* a host (e.g., '146.204.224.152') \n", 214 | "* a user_name (e.g., 'feest6811' **note: sometimes the user name is missing! In this case, use '-' as the value for the username.**)\n", 215 | "* the time a request was made (e.g., '21/Jun/2019:15:45:24 -0700')\n", 216 | "* the post request type (e.g., 'POST /incentivize HTTP/1.1' **note: not everything is a POST!**)\n", 217 | "\n", 218 | "Your task is to convert this into a list of dictionaries, where each dictionary looks like the following:\n", 219 | "```\n", 220 | "example_dict = {\"host\":\"146.204.224.152\", \n", 221 | " \"user_name\":\"feest6811\", \n", 222 | " \"time\":\"21/Jun/2019:15:45:24 -0700\",\n", 223 | " \"request\":\"POST /incentivize HTTP/1.1\"}\n", 224 | "```" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 6, 230 | "metadata": { 231 | "deletable": false, 232 | "nbgrader": { 233 | "checksum": "c04017e59e48b2f4c77bf425ed84b356", 234 | "grade": false, 235 | "grade_id": "logs", 236 | "locked": false, 237 | "schema_version": 1, 238 | "solution": true 239 | } 240 | }, 241 | "outputs": [], 242 | "source": [ 243 | "import re\n", 244 | "def logs():\n", 245 | " with open(\"assets/logdata.txt\", \"r\") as file:\n", 246 | " logdata = file.read()\n", 247 | " req = re.findall('(\\d+[.]\\d+[.]\\d+[.]\\d+) - ([a-z-]\\S*) [\\[](\\S+ -0700)\\] \"([A-Z]\\S+ \\/\\S+ HTTP\\/[0-9.]+)',logdata)\n", 248 | " \n", 249 | " l = []\n", 250 | " for i in req:\n", 251 | " l.append({'host': i[0],'user_name': i[1],'time': i[2],'request': i[3]})\n", 252 | " return l\n", 253 | " # YOUR CODE HERE\n", 254 | " #raise NotImplementedError()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 7, 260 | "metadata": { 261 | "deletable": false, 262 | "editable": false, 263 | "nbgrader": { 264 | "checksum": "1fd5f2cca190d37c667fb189352540d3", 265 | "grade": true, 266 | "grade_id": "cell-correct_logs", 267 | "locked": true, 268 | "points": 1, 269 | "schema_version": 1, 270 | "solution": false 271 | } 272 | }, 273 | "outputs": [], 274 | "source": [ 275 | "assert len(logs()) == 979\n", 276 | "\n", 277 | "one_item={'host': '146.204.224.152',\n", 278 | " 'user_name': 'feest6811',\n", 279 | " 'time': '21/Jun/2019:15:45:24 -0700',\n", 280 | " 'request': 'POST /incentivize HTTP/1.1'}\n", 281 | "assert one_item in logs(), \"Sorry, this item should be in the log results, check your formating\"\n" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [] 290 | } 291 | ], 292 | "metadata": { 293 | "coursera": { 294 | "schema_names": [ 295 | "mooc_adswpy_1_v2_assignment1" 296 | ] 297 | }, 298 | "kernelspec": { 299 | "display_name": "Python 3", 300 | "language": "python", 301 | "name": "python3" 302 | }, 303 | "language_info": { 304 | "codemirror_mode": { 305 | "name": "ipython", 306 | "version": 3 307 | }, 308 | "file_extension": ".py", 309 | "mimetype": "text/x-python", 310 | "name": "python", 311 | "nbconvert_exporter": "python", 312 | "pygments_lexer": "ipython3", 313 | "version": "3.7.6" 314 | } 315 | }, 316 | "nbformat": 4, 317 | "nbformat_minor": 4 318 | } 319 | -------------------------------------------------------------------------------- /Assignment_1/.ipynb_checkpoints/assignment1-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": false, 7 | "editable": false, 8 | "nbgrader": { 9 | "checksum": "a7460f3e3c42534125a0802936889559", 10 | "grade": false, 11 | "grade_id": "cell-fa48e7f1b94baa5b", 12 | "locked": true, 13 | "schema_version": 1, 14 | "solution": false 15 | } 16 | }, 17 | "source": [ 18 | "# Assignment 1\n", 19 | "For this assignment you are welcomed to use other regex resources such a regex \"cheat sheets\" you find on the web.\n", 20 | "\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "deletable": false, 27 | "editable": false, 28 | "nbgrader": { 29 | "checksum": "d17f561e3c6c08092810b982d085f5be", 30 | "grade": false, 31 | "grade_id": "cell-d4da7eb9acee2a6d", 32 | "locked": true, 33 | "schema_version": 1, 34 | "solution": false 35 | } 36 | }, 37 | "source": [ 38 | "Before start working on the problems, here is a small example to help you understand how to write your own answers. In short, the solution should be written within the function body given, and the final result should be returned. Then the autograder will try to call the function and validate your returned result accordingly. " 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 1, 44 | "metadata": { 45 | "deletable": false, 46 | "editable": false, 47 | "nbgrader": { 48 | "checksum": "7eeb5e7d0f0e0137caed9f3b5cb925b1", 49 | "grade": false, 50 | "grade_id": "cell-4a96535829224b3f", 51 | "locked": true, 52 | "schema_version": 1, 53 | "solution": false 54 | } 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "def example_word_count():\n", 59 | " # This example question requires counting words in the example_string below.\n", 60 | " example_string = \"Amy is 5 years old\"\n", 61 | " \n", 62 | " # YOUR CODE HERE.\n", 63 | " # You should write your solution here, and return your result, you can comment out or delete the\n", 64 | " # NotImplementedError below.\n", 65 | " result = example_string.split(\" \")\n", 66 | " return len(result)\n", 67 | "\n", 68 | " #raise NotImplementedError()" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## Part A\n", 76 | "\n", 77 | "Find a list of all of the names in the following string using regex." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 2, 83 | "metadata": { 84 | "deletable": false, 85 | "nbgrader": { 86 | "checksum": "29bc8c161c0e246c1e3ef4820cc164f7", 87 | "grade": false, 88 | "grade_id": "names", 89 | "locked": false, 90 | "schema_version": 1, 91 | "solution": true 92 | } 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "import re\n", 97 | "def names():\n", 98 | " simple_string = \"\"\"Amy is 5 years old, and her sister Mary is 2 years old. \n", 99 | " Ruth and Peter, their parents, have 3 kids.\"\"\"\n", 100 | " \n", 101 | " return re.findall('[A-Z][a-z]+', simple_string)\n", 102 | "\n", 103 | " # YOUR CODE HERE\n", 104 | " # raise NotImplementedError()" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 3, 110 | "metadata": { 111 | "deletable": false, 112 | "editable": false, 113 | "nbgrader": { 114 | "checksum": "ed5c09ac57f7d98130d5abc557f6d6c4", 115 | "grade": true, 116 | "grade_id": "correct_names", 117 | "locked": false, 118 | "points": 1, 119 | "schema_version": 1, 120 | "solution": false 121 | } 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "assert len(names()) == 4, \"There are four names in the simple_string\"\n" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": { 131 | "deletable": false, 132 | "editable": false, 133 | "nbgrader": { 134 | "checksum": "77b3d100c47e9e41d98f82dfeb7eba9c", 135 | "grade": false, 136 | "grade_id": "cell-ed64e3464ddd7ba7", 137 | "locked": true, 138 | "schema_version": 1, 139 | "solution": false 140 | } 141 | }, 142 | "source": [ 143 | "## Part B\n", 144 | "\n", 145 | "The dataset file in [assets/grades.txt](assets/grades.txt) contains a line separated list of people with their grade in \n", 146 | "a class. Create a regex to generate a list of just those students who received a B in the course." 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 4, 152 | "metadata": { 153 | "deletable": false, 154 | "nbgrader": { 155 | "checksum": "e977a1df674e9fa684e6d172aec92824", 156 | "grade": false, 157 | "grade_id": "grades", 158 | "locked": false, 159 | "schema_version": 1, 160 | "solution": true 161 | } 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "import re\n", 166 | "def grades():\n", 167 | " with open (\"assets/grades.txt\", \"r\") as file:\n", 168 | " grades = file.read()\n", 169 | " return re.findall('([A-Z]\\S+ [A-Z]\\S+): B', grades)\n", 170 | " # YOUR CODE HERE\n", 171 | " #raise NotImplementedError()" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 5, 177 | "metadata": { 178 | "deletable": false, 179 | "editable": false, 180 | "nbgrader": { 181 | "checksum": "e0bcc452d60fc45259e58d3116d25477", 182 | "grade": true, 183 | "grade_id": "correct_grades", 184 | "locked": false, 185 | "points": 1, 186 | "schema_version": 1, 187 | "solution": false 188 | } 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "assert len(grades()) == 16\n" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": { 198 | "deletable": false, 199 | "editable": false, 200 | "nbgrader": { 201 | "checksum": "36e3e2a3a3e29fa7b823d22476392320", 202 | "grade": false, 203 | "grade_id": "cell-e253518e37d33f0c", 204 | "locked": true, 205 | "schema_version": 1, 206 | "solution": false 207 | } 208 | }, 209 | "source": [ 210 | "## Part C\n", 211 | "\n", 212 | "Consider the standard web log file in [assets/logdata.txt](assets/logdata.txt). This file records the access a user makes when visiting a web page (like this one!). Each line of the log has the following items:\n", 213 | "* a host (e.g., '146.204.224.152') \n", 214 | "* a user_name (e.g., 'feest6811' **note: sometimes the user name is missing! In this case, use '-' as the value for the username.**)\n", 215 | "* the time a request was made (e.g., '21/Jun/2019:15:45:24 -0700')\n", 216 | "* the post request type (e.g., 'POST /incentivize HTTP/1.1' **note: not everything is a POST!**)\n", 217 | "\n", 218 | "Your task is to convert this into a list of dictionaries, where each dictionary looks like the following:\n", 219 | "```\n", 220 | "example_dict = {\"host\":\"146.204.224.152\", \n", 221 | " \"user_name\":\"feest6811\", \n", 222 | " \"time\":\"21/Jun/2019:15:45:24 -0700\",\n", 223 | " \"request\":\"POST /incentivize HTTP/1.1\"}\n", 224 | "```" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 6, 230 | "metadata": { 231 | "deletable": false, 232 | "nbgrader": { 233 | "checksum": "c04017e59e48b2f4c77bf425ed84b356", 234 | "grade": false, 235 | "grade_id": "logs", 236 | "locked": false, 237 | "schema_version": 1, 238 | "solution": true 239 | } 240 | }, 241 | "outputs": [], 242 | "source": [ 243 | "import re\n", 244 | "def logs():\n", 245 | " with open(\"assets/logdata.txt\", \"r\") as file:\n", 246 | " logdata = file.read()\n", 247 | " req = re.findall('(\\d+[.]\\d+[.]\\d+[.]\\d+) - ([a-z-]\\S*) [\\[](\\S+ -0700)\\] \"([A-Z]\\S+ \\/\\S+ HTTP\\/[0-9.]+)',logdata)\n", 248 | " \n", 249 | " l = []\n", 250 | " for i in req:\n", 251 | " l.append({'host': i[0],'user_name': i[1],'time': i[2],'request': i[3]})\n", 252 | " return l\n", 253 | " # YOUR CODE HERE\n", 254 | " #raise NotImplementedError()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 7, 260 | "metadata": { 261 | "deletable": false, 262 | "editable": false, 263 | "nbgrader": { 264 | "checksum": "1fd5f2cca190d37c667fb189352540d3", 265 | "grade": true, 266 | "grade_id": "cell-correct_logs", 267 | "locked": true, 268 | "points": 1, 269 | "schema_version": 1, 270 | "solution": false 271 | } 272 | }, 273 | "outputs": [], 274 | "source": [ 275 | "assert len(logs()) == 979\n", 276 | "\n", 277 | "one_item={'host': '146.204.224.152',\n", 278 | " 'user_name': 'feest6811',\n", 279 | " 'time': '21/Jun/2019:15:45:24 -0700',\n", 280 | " 'request': 'POST /incentivize HTTP/1.1'}\n", 281 | "assert one_item in logs(), \"Sorry, this item should be in the log results, check your formating\"\n" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [] 290 | } 291 | ], 292 | "metadata": { 293 | "coursera": { 294 | "schema_names": [ 295 | "mooc_adswpy_1_v2_assignment1" 296 | ] 297 | }, 298 | "kernelspec": { 299 | "display_name": "Python 3", 300 | "language": "python", 301 | "name": "python3" 302 | }, 303 | "language_info": { 304 | "codemirror_mode": { 305 | "name": "ipython", 306 | "version": 3 307 | }, 308 | "file_extension": ".py", 309 | "mimetype": "text/x-python", 310 | "name": "python", 311 | "nbconvert_exporter": "python", 312 | "pygments_lexer": "ipython3", 313 | "version": "3.7.6" 314 | } 315 | }, 316 | "nbformat": 4, 317 | "nbformat_minor": 4 318 | } 319 | -------------------------------------------------------------------------------- /Assignment_4/assets/nba.csv: -------------------------------------------------------------------------------- 1 | team,W,L,W/L%,GB,PS/G,PA/G,SRS,year,League 2 | Toronto Raptors* (1),59,23,0.72,—,111.7,103.9,7.29,2018,NBA 3 | Boston Celtics* (2),55,27,0.6709999999999999,4.0,104.0,100.4,3.23,2018,NBA 4 | Philadelphia 76ers* (3),52,30,0.634,7.0,109.8,105.3,4.3,2018,NBA 5 | Cleveland Cavaliers* (4),50,32,0.61,9.0,110.9,109.9,0.59,2018,NBA 6 | Indiana Pacers* (5),48,34,0.585,11.0,105.6,104.2,1.18,2018,NBA 7 | Miami Heat* (6),44,38,0.537,15.0,103.4,102.9,0.15,2018,NBA 8 | Milwaukee Bucks* (7),44,38,0.537,15.0,106.5,106.8,-0.45,2018,NBA 9 | Washington Wizards* (8),43,39,0.524,16.0,106.6,106.0,0.53,2018,NBA 10 | Detroit Pistons (9),39,43,0.47600000000000003,20.0,103.8,103.9,-0.26,2018,NBA 11 | Charlotte Hornets (10),36,46,0.439,23.0,108.2,108.0,0.07,2018,NBA 12 | New York Knicks (11),29,53,0.354,30.0,104.5,108.0,-3.53,2018,NBA 13 | Brooklyn Nets (12),28,54,0.341,31.0,106.6,110.3,-3.67,2018,NBA 14 | Chicago Bulls (13),27,55,0.32899999999999996,32.0,102.9,110.0,-6.84,2018,NBA 15 | Orlando Magic (14),25,57,0.305,34.0,103.4,108.2,-4.92,2018,NBA 16 | Atlanta Hawks (15),24,58,0.293,35.0,103.4,108.8,-5.3,2018,NBA 17 | Houston Rockets* (1),65,17,0.7929999999999999,—,112.4,103.9,8.21,2018,NBA 18 | Golden State Warriors* (2),58,24,0.7070000000000001,7.0,113.5,107.5,5.79,2018,NBA 19 | Portland Trail Blazers* (3),49,33,0.598,16.0,105.6,103.0,2.6,2018,NBA 20 | Oklahoma City Thunder* (4),48,34,0.585,17.0,107.9,104.4,3.42,2018,NBA 21 | Utah Jazz* (5),48,34,0.585,17.0,104.1,99.8,4.47,2018,NBA 22 | New Orleans Pelicans* (6),48,34,0.585,17.0,111.7,110.4,1.48,2018,NBA 23 | San Antonio Spurs* (7),47,35,0.573,18.0,102.7,99.8,2.89,2018,NBA 24 | Minnesota Timberwolves* (8),47,35,0.573,18.0,109.5,107.3,2.35,2018,NBA 25 | Denver Nuggets (9),46,36,0.561,19.0,110.0,108.5,1.57,2018,NBA 26 | Los Angeles Clippers (10),42,40,0.512,23.0,109.0,109.0,0.15,2018,NBA 27 | Los Angeles Lakers (11),35,47,0.42700000000000005,30.0,108.1,109.6,-1.44,2018,NBA 28 | Sacramento Kings (12),27,55,0.32899999999999996,38.0,98.8,105.8,-6.6,2018,NBA 29 | Dallas Mavericks (13),24,58,0.293,41.0,102.3,105.4,-2.7,2018,NBA 30 | Memphis Grizzlies (14),22,60,0.268,43.0,99.3,105.5,-5.81,2018,NBA 31 | Phoenix Suns (15),21,61,0.256,44.0,103.9,113.3,-8.8,2018,NBA 32 | Boston Celtics* (1),53,29,0.6459999999999999,—,108.0,105.4,2.25,2017,NBA 33 | Cleveland Cavaliers* (2),51,31,0.622,2.0,110.3,107.2,2.87,2017,NBA 34 | Toronto Raptors* (3),51,31,0.622,2.0,106.9,102.6,3.65,2017,NBA 35 | Washington Wizards* (4),49,33,0.598,4.0,109.2,107.4,1.36,2017,NBA 36 | Atlanta Hawks* (5),43,39,0.524,10.0,103.2,104.0,-1.23,2017,NBA 37 | Milwaukee Bucks* (6),42,40,0.512,11.0,103.6,103.8,-0.45,2017,NBA 38 | Indiana Pacers* (7),42,40,0.512,11.0,105.1,105.3,-0.64,2017,NBA 39 | Chicago Bulls* (8),41,41,0.5,12.0,102.9,102.4,0.03,2017,NBA 40 | Miami Heat (9),41,41,0.5,12.0,103.2,102.1,0.77,2017,NBA 41 | Detroit Pistons (10),37,45,0.451,16.0,101.3,102.5,-1.29,2017,NBA 42 | Charlotte Hornets (11),36,46,0.439,17.0,104.9,104.7,-0.07,2017,NBA 43 | New York Knicks (12),31,51,0.37799999999999995,22.0,104.3,108.0,-3.87,2017,NBA 44 | Orlando Magic (13),29,53,0.354,24.0,101.1,107.6,-6.61,2017,NBA 45 | Philadelphia 76ers (14),28,54,0.341,25.0,102.4,108.1,-5.83,2017,NBA 46 | Brooklyn Nets (15),20,62,0.244,33.0,105.8,112.5,-6.74,2017,NBA 47 | Golden State Warriors* (1),67,15,0.8170000000000001,—,115.9,104.3,11.35,2017,NBA 48 | San Antonio Spurs* (2),61,21,0.7440000000000001,6.0,105.3,98.1,7.13,2017,NBA 49 | Houston Rockets* (3),55,27,0.6709999999999999,12.0,115.3,109.6,5.84,2017,NBA 50 | Los Angeles Clippers* (4),51,31,0.622,16.0,108.7,104.4,4.42,2017,NBA 51 | Utah Jazz* (5),51,31,0.622,16.0,100.7,96.8,4.0,2017,NBA 52 | Oklahoma City Thunder* (6),47,35,0.573,20.0,106.6,105.8,1.14,2017,NBA 53 | Memphis Grizzlies* (7),43,39,0.524,24.0,100.5,100.0,0.96,2017,NBA 54 | Portland Trail Blazers* (8),41,41,0.5,26.0,107.9,108.5,-0.23,2017,NBA 55 | Denver Nuggets (9),40,42,0.488,27.0,111.7,111.2,0.7,2017,NBA 56 | New Orleans Pelicans (10),34,48,0.415,33.0,104.3,106.4,-1.69,2017,NBA 57 | Dallas Mavericks (11),33,49,0.402,34.0,97.9,100.8,-2.53,2017,NBA 58 | Sacramento Kings (12),32,50,0.39,35.0,102.8,106.7,-3.29,2017,NBA 59 | Minnesota Timberwolves (13),31,51,0.37799999999999995,36.0,105.6,106.7,-0.64,2017,NBA 60 | Los Angeles Lakers (14),26,56,0.317,41.0,104.6,111.5,-6.29,2017,NBA 61 | Phoenix Suns (15),24,58,0.293,43.0,107.7,113.3,-5.14,2017,NBA 62 | Cleveland Cavaliers* (1),57,25,0.695,—,104.3,98.3,5.45,2016,NBA 63 | Toronto Raptors* (2),56,26,0.6829999999999999,1.0,102.7,98.2,4.08,2016,NBA 64 | Miami Heat* (3),48,34,0.585,9.0,100.0,98.4,1.5,2016,NBA 65 | Atlanta Hawks* (4),48,34,0.585,9.0,102.8,99.2,3.49,2016,NBA 66 | Boston Celtics* (5),48,34,0.585,9.0,105.7,102.5,2.84,2016,NBA 67 | Charlotte Hornets* (6),48,34,0.585,9.0,103.4,100.7,2.36,2016,NBA 68 | Indiana Pacers* (7),45,37,0.5489999999999999,12.0,102.2,100.5,1.62,2016,NBA 69 | Detroit Pistons* (8),44,38,0.537,13.0,102.0,101.4,0.43,2016,NBA 70 | Chicago Bulls (9),42,40,0.512,15.0,101.6,103.1,-1.46,2016,NBA 71 | Washington Wizards (10),41,41,0.5,16.0,104.1,104.6,-0.5,2016,NBA 72 | Orlando Magic (11),35,47,0.42700000000000005,22.0,102.1,103.7,-1.68,2016,NBA 73 | Milwaukee Bucks (12),33,49,0.402,24.0,99.0,103.2,-3.98,2016,NBA 74 | New York Knicks (13),32,50,0.39,25.0,98.4,101.1,-2.74,2016,NBA 75 | Brooklyn Nets (14),21,61,0.256,36.0,98.6,106.0,-7.12,2016,NBA 76 | Philadelphia 76ers (15),10,72,0.122,47.0,97.4,107.6,-9.92,2016,NBA 77 | Golden State Warriors* (1),73,9,0.89,—,114.9,104.1,10.38,2016,NBA 78 | San Antonio Spurs* (2),67,15,0.8170000000000001,6.0,103.5,92.9,10.28,2016,NBA 79 | Oklahoma City Thunder* (3),55,27,0.6709999999999999,18.0,110.2,102.9,7.09,2016,NBA 80 | Los Angeles Clippers* (4),53,29,0.6459999999999999,20.0,104.5,100.2,4.13,2016,NBA 81 | Portland Trail Blazers* (5),44,38,0.537,29.0,105.1,104.3,0.98,2016,NBA 82 | Dallas Mavericks* (6),42,40,0.512,31.0,102.3,102.6,-0.02,2016,NBA 83 | Memphis Grizzlies* (7),42,40,0.512,31.0,99.1,101.3,-2.14,2016,NBA 84 | Houston Rockets* (8),41,41,0.5,32.0,106.5,106.4,0.34,2016,NBA 85 | Utah Jazz (9),40,42,0.488,33.0,97.7,95.9,1.84,2016,NBA 86 | Sacramento Kings (10),33,49,0.402,40.0,106.6,109.1,-2.32,2016,NBA 87 | Denver Nuggets (10),33,49,0.402,40.0,101.9,105.0,-2.81,2016,NBA 88 | New Orleans Pelicans (12),30,52,0.366,43.0,102.7,106.5,-3.56,2016,NBA 89 | Minnesota Timberwolves (13),29,53,0.354,44.0,102.4,106.0,-3.38,2016,NBA 90 | Phoenix Suns (14),23,59,0.28,50.0,100.9,107.5,-6.32,2016,NBA 91 | Los Angeles Lakers (15),17,65,0.207,56.0,97.3,106.9,-8.92,2016,NBA 92 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2015,NBA 93 | Toronto Raptors* (4),49,33,.598,—,104.0,100.9,2.45,2015,NBA 94 | Boston Celtics* (7),40,42,.488,9.0,101.4,101.2,-0.40,2015,NBA 95 | Brooklyn Nets* (8),38,44,.463,11.0,98.0,100.9,-3.13,2015,NBA 96 | Philadelphia 76ers (14),18,64,.220,31.0,92.0,101.0,-9.04,2015,NBA 97 | New York Knicks (15),17,65,.207,32.0,91.9,101.2,-9.50,2015,NBA 98 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2015,NBA 99 | Cleveland Cavaliers* (2),53,29,.646,—,103.1,98.7,4.08,2015,NBA 100 | Chicago Bulls* (3),50,32,.610,3.0,100.8,97.8,2.54,2015,NBA 101 | Milwaukee Bucks* (6),41,41,.500,12.0,97.8,97.4,-0.09,2015,NBA 102 | Indiana Pacers (9),38,44,.463,15.0,97.3,97.0,-0.23,2015,NBA 103 | Detroit Pistons (12),32,50,.390,21.0,98.5,99.5,-1.39,2015,NBA 104 | Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,2015,NBA 105 | Atlanta Hawks* (1),60,22,.732,—,102.5,97.1,4.75,2015,NBA 106 | Washington Wizards* (5),46,36,.561,14.0,98.5,97.8,0.17,2015,NBA 107 | Miami Heat (10),37,45,.451,23.0,94.7,97.3,-2.92,2015,NBA 108 | Charlotte Hornets (11),33,49,.402,27.0,94.2,97.3,-3.44,2015,NBA 109 | Orlando Magic (13),25,57,.305,35.0,95.7,101.4,-5.87,2015,NBA 110 | Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,2015,NBA 111 | Portland Trail Blazers* (4),51,31,.622,—,102.8,98.6,4.41,2015,NBA 112 | Oklahoma City Thunder (9),45,37,.549,6.0,104.0,101.8,2.47,2015,NBA 113 | Utah Jazz (11),38,44,.463,13.0,95.1,94.9,0.71,2015,NBA 114 | Denver Nuggets (12),30,52,.366,21.0,101.5,105.0,-3.07,2015,NBA 115 | Minnesota Timberwolves (15),16,66,.195,35.0,97.8,106.5,-8.00,2015,NBA 116 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2015,NBA 117 | Golden State Warriors* (1),67,15,.817,—,110.0,99.9,10.01,2015,NBA 118 | Los Angeles Clippers* (2),56,26,.683,11.0,106.7,100.1,6.80,2015,NBA 119 | Phoenix Suns (10),39,43,.476,28.0,102.4,103.3,-0.38,2015,NBA 120 | Sacramento Kings (13),29,53,.354,38.0,101.3,105.0,-3.07,2015,NBA 121 | Los Angeles Lakers (14),21,61,.256,46.0,98.5,105.3,-6.17,2015,NBA 122 | Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,2015,NBA 123 | Houston Rockets* (2),56,26,.683,—,103.9,100.5,3.82,2015,NBA 124 | San Antonio Spurs* (5),55,27,.671,1.0,103.2,97.0,6.34,2015,NBA 125 | Memphis Grizzlies* (5),55,27,.671,1.0,98.3,95.1,3.62,2015,NBA 126 | Dallas Mavericks* (7),50,32,.610,6.0,105.2,102.3,3.36,2015,NBA 127 | New Orleans Pelicans* (8),45,37,.549,11.0,99.4,98.6,1.13,2015,NBA 128 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2014,NBA 129 | Toronto Raptors* (3),48,34,.585,—,101.3,98.0,2.55,2014,NBA 130 | Brooklyn Nets* (6),44,38,.537,4.0,98.5,99.5,-1.58,2014,NBA 131 | New York Knicks (9),37,45,.451,11.0,98.6,99.4,-1.40,2014,NBA 132 | Boston Celtics (12),25,57,.305,23.0,96.2,100.7,-4.97,2014,NBA 133 | Philadelphia 76ers (14),19,63,.232,29.0,99.5,109.9,-10.66,2014,NBA 134 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2014,NBA 135 | Indiana Pacers* (1),56,26,.683,—,96.7,92.3,3.63,2014,NBA 136 | Chicago Bulls* (4),48,34,.585,8.0,93.7,91.8,1.20,2014,NBA 137 | Cleveland Cavaliers (10),33,49,.402,23.0,98.2,101.5,-3.86,2014,NBA 138 | Detroit Pistons (11),29,53,.354,27.0,101.0,104.7,-4.13,2014,NBA 139 | Milwaukee Bucks (15),15,67,.183,41.0,95.5,103.7,-8.41,2014,NBA 140 | Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,Southeast Division,2014,NBA 141 | Miami Heat* (2),54,28,.659,—,102.2,97.4,4.15,2014,NBA 142 | Washington Wizards* (5),44,38,.537,10.0,100.7,99.4,0.48,2014,NBA 143 | Charlotte Bobcats* (7),43,39,.524,11.0,96.9,97.1,-0.89,2014,NBA 144 | Atlanta Hawks* (8),38,44,.463,16.0,101.0,101.5,-0.88,2014,NBA 145 | Orlando Magic (13),23,59,.280,31.0,96.5,102.0,-5.88,2014,NBA 146 | Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,Northwest Division,2014,NBA 147 | Oklahoma City Thunder* (2),59,23,.720,—,106.2,99.8,6.66,2014,NBA 148 | Portland Trail Blazers* (4),54,28,.659,5.0,106.7,102.8,4.44,2014,NBA 149 | Minnesota Timberwolves (10),40,42,.488,19.0,106.9,104.3,3.10,2014,NBA 150 | Denver Nuggets (11),36,46,.439,23.0,104.4,106.5,-1.40,2014,NBA 151 | Utah Jazz (15),25,57,.305,34.0,95.0,102.2,-6.27,2014,NBA 152 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2014,NBA 153 | Los Angeles Clippers* (3),57,25,.695,—,107.9,101.0,7.27,2014,NBA 154 | Golden State Warriors* (6),51,31,.622,6.0,104.3,99.5,5.15,2014,NBA 155 | Phoenix Suns (9),48,34,.585,9.0,105.2,102.6,3.02,2014,NBA 156 | Sacramento Kings (13),28,54,.341,29.0,100.5,103.4,-2.08,2014,NBA 157 | Los Angeles Lakers (14),27,55,.329,30.0,103.0,109.2,-5.33,2014,NBA 158 | Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,Southwest Division,2014,NBA 159 | San Antonio Spurs* (1),62,20,.756,—,105.4,97.6,8.00,2014,NBA 160 | Houston Rockets* (4),54,28,.659,8.0,107.7,103.1,5.06,2014,NBA 161 | Memphis Grizzlies* (7),50,32,.610,12.0,96.1,94.6,2.18,2014,NBA 162 | Dallas Mavericks* (8),49,33,.598,13.0,104.8,102.4,2.91,2014,NBA 163 | New Orleans Pelicans (12),34,48,.415,28.0,99.7,102.4,-1.98,2014,NBA 164 | -------------------------------------------------------------------------------- /Assignment_2/assignment2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": false, 7 | "editable": false, 8 | "nbgrader": { 9 | "checksum": "5a8d839ee00398fa3bd3bc58ec642beb", 10 | "grade": false, 11 | "grade_id": "cell-a839e7b47494b4c3", 12 | "locked": true, 13 | "schema_version": 1, 14 | "solution": false 15 | } 16 | }, 17 | "source": [ 18 | "# Assignment 2\n", 19 | "For this assignment you'll be looking at 2017 data on immunizations from the CDC. Your datafile for this assignment is in [assets/NISPUF17.csv](assets/NISPUF17.csv). A data users guide for this, which you'll need to map the variables in the data to the questions being asked, is available at [assets/NIS-PUF17-DUG.pdf](assets/NIS-PUF17-DUG.pdf). **Note: you may have to go to your Jupyter tree (click on the Coursera image) and navigate to the assignment 2 assets folder to see this PDF file).**" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "deletable": false, 26 | "editable": false, 27 | "hideCode": false, 28 | "hidePrompt": false, 29 | "nbgrader": { 30 | "checksum": "aaa5e730f40ba21c1bc94f864bad4742", 31 | "grade": false, 32 | "grade_id": "cell-58fc2e5938733f6a", 33 | "locked": true, 34 | "schema_version": 1, 35 | "solution": false 36 | } 37 | }, 38 | "source": [ 39 | "## Question 1\n", 40 | "Write a function called `proportion_of_education` which returns the proportion of children in the dataset who had a mother with the education levels equal to less than high school (<12), high school (12), more than high school but not a college graduate (>12) and college degree.\n", 41 | "\n", 42 | "*This function should return a dictionary in the form of (use the correct numbers, do not round numbers):* \n", 43 | "```\n", 44 | " {\"less than high school\":0.2,\n", 45 | " \"high school\":0.4,\n", 46 | " \"more than high school but not college\":0.2,\n", 47 | " \"college\":0.2}\n", 48 | "```\n" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 1, 54 | "metadata": { 55 | "deletable": false, 56 | "nbgrader": { 57 | "checksum": "0ac58deb3f5ac988c643e903cbee7f3a", 58 | "grade": false, 59 | "grade_id": "cell-eea16d020eb52ae7", 60 | "locked": false, 61 | "schema_version": 1, 62 | "solution": true 63 | } 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "def proportion_of_education():\n", 68 | " # your code goes here\n", 69 | " # YOUR CODE HERE\n", 70 | " import pandas as pd\n", 71 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n", 72 | " n = mb['EDUC1'].value_counts()/mb['EDUC1'].shape[0]\n", 73 | " li = ['less than high school', 'high school', 'more than high school but not college', 'college']\n", 74 | " di = {}\n", 75 | " i = 0\n", 76 | " for l in li:\n", 77 | " di[l] = n[i+1] \n", 78 | " i += 1\n", 79 | " \n", 80 | " return di\n", 81 | " #raise NotImplementedError()" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 2, 87 | "metadata": { 88 | "deletable": false, 89 | "editable": false, 90 | "hideCode": false, 91 | "hidePrompt": false, 92 | "nbgrader": { 93 | "checksum": "ac5d91a24a7f72f66c25d242c3d24a50", 94 | "grade": true, 95 | "grade_id": "cell-c0eeef201366f51c", 96 | "locked": true, 97 | "points": 1, 98 | "schema_version": 1, 99 | "solution": false 100 | } 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "assert type(proportion_of_education())==type({}), \"You must return a dictionary.\"\n", 105 | "assert len(proportion_of_education()) == 4, \"You have not returned a dictionary with four items in it.\"\n", 106 | "assert \"less than high school\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n", 107 | "assert \"high school\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n", 108 | "assert \"more than high school but not college\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n", 109 | "assert \"college\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": { 115 | "deletable": false, 116 | "editable": false, 117 | "nbgrader": { 118 | "checksum": "562b78b7b9b79580269be0a3bebf4b42", 119 | "grade": false, 120 | "grade_id": "cell-8fcbb64516283f52", 121 | "locked": true, 122 | "schema_version": 1, 123 | "solution": false 124 | } 125 | }, 126 | "source": [ 127 | "## Question 2\n", 128 | "\n", 129 | "Let's explore the relationship between being fed breastmilk as a child and getting a seasonal influenza vaccine from a healthcare provider. Return a tuple of the average number of influenza vaccines for those children we know received breastmilk as a child and those who know did not.\n", 130 | "\n", 131 | "*This function should return a tuple in the form (use the correct numbers:*\n", 132 | "```\n", 133 | "(2.5, 0.1)\n", 134 | "```" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 3, 140 | "metadata": { 141 | "deletable": false, 142 | "nbgrader": { 143 | "checksum": "a405d639063c4a6408365479f29c95c9", 144 | "grade": false, 145 | "grade_id": "cell-77f18c512324eabb", 146 | "locked": false, 147 | "schema_version": 1, 148 | "solution": true 149 | } 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "def average_influenza_doses():\n", 154 | " # YOUR CODE HERE\n", 155 | " \n", 156 | " import pandas as pd\n", 157 | " import numpy as np\n", 158 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n", 159 | " gp1 = mb[mb['CBF_01'] == 1]\n", 160 | " gp2 = mb[mb['CBF_01'] == 2]\n", 161 | " \n", 162 | " return np.mean(gp1['P_NUMFLU']), np.mean(gp2['P_NUMFLU'])\n", 163 | " #raise NotImplementedError()" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 4, 169 | "metadata": { 170 | "deletable": false, 171 | "editable": false, 172 | "nbgrader": { 173 | "checksum": "19be955e97fdf7162d43fbb7c2c40951", 174 | "grade": true, 175 | "grade_id": "cell-54a3ba6cff31caa7", 176 | "locked": true, 177 | "points": 1, 178 | "schema_version": 1, 179 | "solution": false 180 | } 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "assert len(average_influenza_doses())==2, \"Return two values in a tuple, the first for yes and the second for no.\"\n" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": { 190 | "deletable": false, 191 | "editable": false, 192 | "nbgrader": { 193 | "checksum": "e10e2163f5957a0c398ef4f0b76b4efe", 194 | "grade": false, 195 | "grade_id": "cell-f63377f3c97aa7c4", 196 | "locked": true, 197 | "schema_version": 1, 198 | "solution": false 199 | } 200 | }, 201 | "source": [ 202 | "## Question 3\n", 203 | "It would be interesting to see if there is any evidence of a link between vaccine effectiveness and sex of the child. Calculate the ratio of the number of children who contracted chickenpox but were vaccinated against it (at least one varicella dose) versus those who were vaccinated but did not contract chicken pox. Return results by sex. \n", 204 | "\n", 205 | "*This function should return a dictionary in the form of (use the correct numbers):* \n", 206 | "```\n", 207 | " {\"male\":0.2,\n", 208 | " \"female\":0.4}\n", 209 | "```\n", 210 | "\n", 211 | "Note: To aid in verification, the `chickenpox_by_sex()['female']` value the autograder is looking for starts with the digits `0.0077`." 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 5, 217 | "metadata": { 218 | "deletable": false, 219 | "nbgrader": { 220 | "checksum": "b4d1b58acae002bc73eb0b19f95bc4af", 221 | "grade": false, 222 | "grade_id": "cell-a0a9e6fe67698006", 223 | "locked": false, 224 | "schema_version": 1, 225 | "solution": true 226 | } 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "def chickenpox_by_sex():\n", 231 | " # YOUR CODE HERE\n", 232 | " import pandas as pd\n", 233 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n", 234 | " \n", 235 | " v1m = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 1) & (mb['SEX'] == 1)]\n", 236 | " v1f = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 1) & (mb['SEX'] == 2)]\n", 237 | " v2m = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 2) & (mb['SEX'] == 1)]\n", 238 | " v2f = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 2) & (mb['SEX'] == 2)]\n", 239 | " \n", 240 | " rm = v1m.shape[0]/v2m.shape[0]\n", 241 | " rf = v1f.shape[0]/v2f.shape[0]\n", 242 | " r = [rm, rf]\n", 243 | " \n", 244 | " sex = ['male', 'female']\n", 245 | " d = {}\n", 246 | " i = 0\n", 247 | " for s in sex:\n", 248 | " d[s] = r[i]\n", 249 | " i+=1\n", 250 | " return d \n", 251 | " #raise NotImplementedError()" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 6, 257 | "metadata": { 258 | "deletable": false, 259 | "editable": false, 260 | "nbgrader": { 261 | "checksum": "1b6a113a633c55699ae478a3a9ee9c33", 262 | "grade": true, 263 | "grade_id": "cell-c4f1714db100c865", 264 | "locked": true, 265 | "points": 1, 266 | "schema_version": 1, 267 | "solution": false 268 | } 269 | }, 270 | "outputs": [], 271 | "source": [ 272 | "assert len(chickenpox_by_sex())==2, \"Return a dictionary with two items, the first for males and the second for females.\"\n" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "## Question 4\n", 280 | "A correlation is a statistical relationship between two variables. If we wanted to know if vaccines work, we might look at the correlation between the use of the vaccine and whether it results in prevention of the infection or disease [1]. In this question, you are to see if there is a correlation between having had the chicken pox and the number of chickenpox vaccine doses given (varicella).\n", 281 | "\n", 282 | "Some notes on interpreting the answer. The `had_chickenpox_column` is either `1` (for yes) or `2` (for no), and the `num_chickenpox_vaccine_column` is the number of doses a child has been given of the varicella vaccine. A positive correlation (e.g., `corr > 0`) means that an increase in `had_chickenpox_column` (which means more no’s) would also increase the values of `num_chickenpox_vaccine_column` (which means more doses of vaccine). If there is a negative correlation (e.g., `corr < 0`), it indicates that having had chickenpox is related to an increase in the number of vaccine doses.\n", 283 | "\n", 284 | "Also, `pval` is the probability that we observe a correlation between `had_chickenpox_column` and `num_chickenpox_vaccine_column` which is greater than or equal to a particular value occurred by chance. A small `pval` means that the observed correlation is highly unlikely to occur by chance. In this case, `pval` should be very small (will end in `e-18` indicating a very small number).\n", 285 | "\n", 286 | "[1] This isn’t really the full picture, since we are not looking at when the dose was given. It’s possible that children had chickenpox and then their parents went to get them the vaccine. Does this dataset have the data we would need to investigate the timing of the dose?" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 7, 292 | "metadata": { 293 | "deletable": false, 294 | "nbgrader": { 295 | "checksum": "3e645859949447913cd11d30eb33cb1e", 296 | "grade": false, 297 | "grade_id": "cell-8afff07f564cf79a", 298 | "locked": false, 299 | "schema_version": 1, 300 | "solution": true 301 | } 302 | }, 303 | "outputs": [], 304 | "source": [ 305 | "def corr_chickenpox():\n", 306 | " import scipy.stats as stats\n", 307 | " import numpy as np\n", 308 | " import pandas as pd\n", 309 | " \n", 310 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n", 311 | " \n", 312 | " v1 = mb[(mb['P_NUMVRC'] >=0) & (mb['HAD_CPOX'] <= 2)]\n", 313 | " \n", 314 | " no_yes = v1['HAD_CPOX']\n", 315 | " \n", 316 | " est_vaccine = v1['P_NUMVRC']\n", 317 | " \n", 318 | " # this is just an example dataframe\n", 319 | " df=pd.DataFrame({\"had_chickenpox_column\":no_yes,\n", 320 | " \"num_chickenpox_vaccine_column\":est_vaccine})\n", 321 | "\n", 322 | " # here is some stub code to actually run the correlation\n", 323 | " corr, pval=stats.pearsonr(df[\"had_chickenpox_column\"],df[\"num_chickenpox_vaccine_column\"])\n", 324 | " \n", 325 | " # just return the correlation\n", 326 | " return corr\n", 327 | "\n", 328 | " # YOUR CODE HERE\n", 329 | " #raise NotImplementedError()" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 8, 335 | "metadata": { 336 | "deletable": false, 337 | "editable": false, 338 | "nbgrader": { 339 | "checksum": "ac50ccb747b99f6bbcc76da017e66528", 340 | "grade": true, 341 | "grade_id": "cell-73408733533a29a5", 342 | "locked": true, 343 | "points": 1, 344 | "schema_version": 1, 345 | "solution": false 346 | } 347 | }, 348 | "outputs": [], 349 | "source": [ 350 | "assert -1<=corr_chickenpox()<=1, \"You must return a float number between -1.0 and 1.0.\"\n" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": {}, 357 | "outputs": [], 358 | "source": [] 359 | } 360 | ], 361 | "metadata": { 362 | "anaconda-cloud": {}, 363 | "coursera": { 364 | "schema_names": [ 365 | "mooc_adswpy_1_v2_assignment2" 366 | ] 367 | }, 368 | "hide_code_all_hidden": false, 369 | "kernelspec": { 370 | "display_name": "Python 3", 371 | "language": "python", 372 | "name": "python3" 373 | }, 374 | "language_info": { 375 | "codemirror_mode": { 376 | "name": "ipython", 377 | "version": 3 378 | }, 379 | "file_extension": ".py", 380 | "mimetype": "text/x-python", 381 | "name": "python", 382 | "nbconvert_exporter": "python", 383 | "pygments_lexer": "ipython3", 384 | "version": "3.7.6" 385 | } 386 | }, 387 | "nbformat": 4, 388 | "nbformat_minor": 1 389 | } 390 | -------------------------------------------------------------------------------- /Assignment_2/.ipynb_checkpoints/assignment2-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": false, 7 | "editable": false, 8 | "nbgrader": { 9 | "checksum": "5a8d839ee00398fa3bd3bc58ec642beb", 10 | "grade": false, 11 | "grade_id": "cell-a839e7b47494b4c3", 12 | "locked": true, 13 | "schema_version": 1, 14 | "solution": false 15 | } 16 | }, 17 | "source": [ 18 | "# Assignment 2\n", 19 | "For this assignment you'll be looking at 2017 data on immunizations from the CDC. Your datafile for this assignment is in [assets/NISPUF17.csv](assets/NISPUF17.csv). A data users guide for this, which you'll need to map the variables in the data to the questions being asked, is available at [assets/NIS-PUF17-DUG.pdf](assets/NIS-PUF17-DUG.pdf). **Note: you may have to go to your Jupyter tree (click on the Coursera image) and navigate to the assignment 2 assets folder to see this PDF file).**" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "deletable": false, 26 | "editable": false, 27 | "hideCode": false, 28 | "hidePrompt": false, 29 | "nbgrader": { 30 | "checksum": "aaa5e730f40ba21c1bc94f864bad4742", 31 | "grade": false, 32 | "grade_id": "cell-58fc2e5938733f6a", 33 | "locked": true, 34 | "schema_version": 1, 35 | "solution": false 36 | } 37 | }, 38 | "source": [ 39 | "## Question 1\n", 40 | "Write a function called `proportion_of_education` which returns the proportion of children in the dataset who had a mother with the education levels equal to less than high school (<12), high school (12), more than high school but not a college graduate (>12) and college degree.\n", 41 | "\n", 42 | "*This function should return a dictionary in the form of (use the correct numbers, do not round numbers):* \n", 43 | "```\n", 44 | " {\"less than high school\":0.2,\n", 45 | " \"high school\":0.4,\n", 46 | " \"more than high school but not college\":0.2,\n", 47 | " \"college\":0.2}\n", 48 | "```\n" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 1, 54 | "metadata": { 55 | "deletable": false, 56 | "nbgrader": { 57 | "checksum": "0ac58deb3f5ac988c643e903cbee7f3a", 58 | "grade": false, 59 | "grade_id": "cell-eea16d020eb52ae7", 60 | "locked": false, 61 | "schema_version": 1, 62 | "solution": true 63 | } 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "def proportion_of_education():\n", 68 | " # your code goes here\n", 69 | " # YOUR CODE HERE\n", 70 | " import pandas as pd\n", 71 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n", 72 | " n = mb['EDUC1'].value_counts()/mb['EDUC1'].shape[0]\n", 73 | " li = ['less than high school', 'high school', 'more than high school but not college', 'college']\n", 74 | " di = {}\n", 75 | " i = 0\n", 76 | " for l in li:\n", 77 | " di[l] = n[i+1] \n", 78 | " i += 1\n", 79 | " \n", 80 | " return di\n", 81 | " #raise NotImplementedError()" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 2, 87 | "metadata": { 88 | "deletable": false, 89 | "editable": false, 90 | "hideCode": false, 91 | "hidePrompt": false, 92 | "nbgrader": { 93 | "checksum": "ac5d91a24a7f72f66c25d242c3d24a50", 94 | "grade": true, 95 | "grade_id": "cell-c0eeef201366f51c", 96 | "locked": true, 97 | "points": 1, 98 | "schema_version": 1, 99 | "solution": false 100 | } 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "assert type(proportion_of_education())==type({}), \"You must return a dictionary.\"\n", 105 | "assert len(proportion_of_education()) == 4, \"You have not returned a dictionary with four items in it.\"\n", 106 | "assert \"less than high school\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n", 107 | "assert \"high school\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n", 108 | "assert \"more than high school but not college\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n", 109 | "assert \"college\" in proportion_of_education().keys(), \"You have not returned a dictionary with the correct keys.\"\n" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": { 115 | "deletable": false, 116 | "editable": false, 117 | "nbgrader": { 118 | "checksum": "562b78b7b9b79580269be0a3bebf4b42", 119 | "grade": false, 120 | "grade_id": "cell-8fcbb64516283f52", 121 | "locked": true, 122 | "schema_version": 1, 123 | "solution": false 124 | } 125 | }, 126 | "source": [ 127 | "## Question 2\n", 128 | "\n", 129 | "Let's explore the relationship between being fed breastmilk as a child and getting a seasonal influenza vaccine from a healthcare provider. Return a tuple of the average number of influenza vaccines for those children we know received breastmilk as a child and those who know did not.\n", 130 | "\n", 131 | "*This function should return a tuple in the form (use the correct numbers:*\n", 132 | "```\n", 133 | "(2.5, 0.1)\n", 134 | "```" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 3, 140 | "metadata": { 141 | "deletable": false, 142 | "nbgrader": { 143 | "checksum": "a405d639063c4a6408365479f29c95c9", 144 | "grade": false, 145 | "grade_id": "cell-77f18c512324eabb", 146 | "locked": false, 147 | "schema_version": 1, 148 | "solution": true 149 | } 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "def average_influenza_doses():\n", 154 | " # YOUR CODE HERE\n", 155 | " \n", 156 | " import pandas as pd\n", 157 | " import numpy as np\n", 158 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n", 159 | " gp1 = mb[mb['CBF_01'] == 1]\n", 160 | " gp2 = mb[mb['CBF_01'] == 2]\n", 161 | " \n", 162 | " return np.mean(gp1['P_NUMFLU']), np.mean(gp2['P_NUMFLU'])\n", 163 | " #raise NotImplementedError()" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 4, 169 | "metadata": { 170 | "deletable": false, 171 | "editable": false, 172 | "nbgrader": { 173 | "checksum": "19be955e97fdf7162d43fbb7c2c40951", 174 | "grade": true, 175 | "grade_id": "cell-54a3ba6cff31caa7", 176 | "locked": true, 177 | "points": 1, 178 | "schema_version": 1, 179 | "solution": false 180 | } 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "assert len(average_influenza_doses())==2, \"Return two values in a tuple, the first for yes and the second for no.\"\n" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": { 190 | "deletable": false, 191 | "editable": false, 192 | "nbgrader": { 193 | "checksum": "e10e2163f5957a0c398ef4f0b76b4efe", 194 | "grade": false, 195 | "grade_id": "cell-f63377f3c97aa7c4", 196 | "locked": true, 197 | "schema_version": 1, 198 | "solution": false 199 | } 200 | }, 201 | "source": [ 202 | "## Question 3\n", 203 | "It would be interesting to see if there is any evidence of a link between vaccine effectiveness and sex of the child. Calculate the ratio of the number of children who contracted chickenpox but were vaccinated against it (at least one varicella dose) versus those who were vaccinated but did not contract chicken pox. Return results by sex. \n", 204 | "\n", 205 | "*This function should return a dictionary in the form of (use the correct numbers):* \n", 206 | "```\n", 207 | " {\"male\":0.2,\n", 208 | " \"female\":0.4}\n", 209 | "```\n", 210 | "\n", 211 | "Note: To aid in verification, the `chickenpox_by_sex()['female']` value the autograder is looking for starts with the digits `0.0077`." 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 5, 217 | "metadata": { 218 | "deletable": false, 219 | "nbgrader": { 220 | "checksum": "b4d1b58acae002bc73eb0b19f95bc4af", 221 | "grade": false, 222 | "grade_id": "cell-a0a9e6fe67698006", 223 | "locked": false, 224 | "schema_version": 1, 225 | "solution": true 226 | } 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "def chickenpox_by_sex():\n", 231 | " # YOUR CODE HERE\n", 232 | " import pandas as pd\n", 233 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n", 234 | " \n", 235 | " v1m = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 1) & (mb['SEX'] == 1)]\n", 236 | " v1f = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 1) & (mb['SEX'] == 2)]\n", 237 | " v2m = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 2) & (mb['SEX'] == 1)]\n", 238 | " v2f = mb[(mb['P_NUMVRC'] >=1) & (mb['HAD_CPOX'] == 2) & (mb['SEX'] == 2)]\n", 239 | " \n", 240 | " rm = v1m.shape[0]/v2m.shape[0]\n", 241 | " rf = v1f.shape[0]/v2f.shape[0]\n", 242 | " r = [rm, rf]\n", 243 | " \n", 244 | " sex = ['male', 'female']\n", 245 | " d = {}\n", 246 | " i = 0\n", 247 | " for s in sex:\n", 248 | " d[s] = r[i]\n", 249 | " i+=1\n", 250 | " return d \n", 251 | " #raise NotImplementedError()" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 6, 257 | "metadata": { 258 | "deletable": false, 259 | "editable": false, 260 | "nbgrader": { 261 | "checksum": "1b6a113a633c55699ae478a3a9ee9c33", 262 | "grade": true, 263 | "grade_id": "cell-c4f1714db100c865", 264 | "locked": true, 265 | "points": 1, 266 | "schema_version": 1, 267 | "solution": false 268 | } 269 | }, 270 | "outputs": [], 271 | "source": [ 272 | "assert len(chickenpox_by_sex())==2, \"Return a dictionary with two items, the first for males and the second for females.\"\n" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "## Question 4\n", 280 | "A correlation is a statistical relationship between two variables. If we wanted to know if vaccines work, we might look at the correlation between the use of the vaccine and whether it results in prevention of the infection or disease [1]. In this question, you are to see if there is a correlation between having had the chicken pox and the number of chickenpox vaccine doses given (varicella).\n", 281 | "\n", 282 | "Some notes on interpreting the answer. The `had_chickenpox_column` is either `1` (for yes) or `2` (for no), and the `num_chickenpox_vaccine_column` is the number of doses a child has been given of the varicella vaccine. A positive correlation (e.g., `corr > 0`) means that an increase in `had_chickenpox_column` (which means more no’s) would also increase the values of `num_chickenpox_vaccine_column` (which means more doses of vaccine). If there is a negative correlation (e.g., `corr < 0`), it indicates that having had chickenpox is related to an increase in the number of vaccine doses.\n", 283 | "\n", 284 | "Also, `pval` is the probability that we observe a correlation between `had_chickenpox_column` and `num_chickenpox_vaccine_column` which is greater than or equal to a particular value occurred by chance. A small `pval` means that the observed correlation is highly unlikely to occur by chance. In this case, `pval` should be very small (will end in `e-18` indicating a very small number).\n", 285 | "\n", 286 | "[1] This isn’t really the full picture, since we are not looking at when the dose was given. It’s possible that children had chickenpox and then their parents went to get them the vaccine. Does this dataset have the data we would need to investigate the timing of the dose?" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 7, 292 | "metadata": { 293 | "deletable": false, 294 | "nbgrader": { 295 | "checksum": "3e645859949447913cd11d30eb33cb1e", 296 | "grade": false, 297 | "grade_id": "cell-8afff07f564cf79a", 298 | "locked": false, 299 | "schema_version": 1, 300 | "solution": true 301 | } 302 | }, 303 | "outputs": [], 304 | "source": [ 305 | "def corr_chickenpox():\n", 306 | " import scipy.stats as stats\n", 307 | " import numpy as np\n", 308 | " import pandas as pd\n", 309 | " \n", 310 | " mb = pd.read_csv(\"assets/NISPUF17.csv\")\n", 311 | " \n", 312 | " v1 = mb[(mb['P_NUMVRC'] >=0) & (mb['HAD_CPOX'] <= 2)]\n", 313 | " \n", 314 | " no_yes = v1['HAD_CPOX']\n", 315 | " \n", 316 | " est_vaccine = v1['P_NUMVRC']\n", 317 | " \n", 318 | " # this is just an example dataframe\n", 319 | " df=pd.DataFrame({\"had_chickenpox_column\":no_yes,\n", 320 | " \"num_chickenpox_vaccine_column\":est_vaccine})\n", 321 | "\n", 322 | " # here is some stub code to actually run the correlation\n", 323 | " corr, pval=stats.pearsonr(df[\"had_chickenpox_column\"],df[\"num_chickenpox_vaccine_column\"])\n", 324 | " \n", 325 | " # just return the correlation\n", 326 | " return corr\n", 327 | "\n", 328 | " # YOUR CODE HERE\n", 329 | " #raise NotImplementedError()" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 8, 335 | "metadata": { 336 | "deletable": false, 337 | "editable": false, 338 | "nbgrader": { 339 | "checksum": "ac50ccb747b99f6bbcc76da017e66528", 340 | "grade": true, 341 | "grade_id": "cell-73408733533a29a5", 342 | "locked": true, 343 | "points": 1, 344 | "schema_version": 1, 345 | "solution": false 346 | } 347 | }, 348 | "outputs": [], 349 | "source": [ 350 | "assert -1<=corr_chickenpox()<=1, \"You must return a float number between -1.0 and 1.0.\"\n" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": {}, 357 | "outputs": [], 358 | "source": [] 359 | } 360 | ], 361 | "metadata": { 362 | "anaconda-cloud": {}, 363 | "coursera": { 364 | "schema_names": [ 365 | "mooc_adswpy_1_v2_assignment2" 366 | ] 367 | }, 368 | "hide_code_all_hidden": false, 369 | "kernelspec": { 370 | "display_name": "Python 3", 371 | "language": "python", 372 | "name": "python3" 373 | }, 374 | "language_info": { 375 | "codemirror_mode": { 376 | "name": "ipython", 377 | "version": 3 378 | }, 379 | "file_extension": ".py", 380 | "mimetype": "text/x-python", 381 | "name": "python", 382 | "nbconvert_exporter": "python", 383 | "pygments_lexer": "ipython3", 384 | "version": "3.7.6" 385 | } 386 | }, 387 | "nbformat": 4, 388 | "nbformat_minor": 1 389 | } 390 | -------------------------------------------------------------------------------- /Assignment_4/assets/nhl.csv: -------------------------------------------------------------------------------- 1 | team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,ROW,year,League 2 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2018,NHL 3 | Tampa Bay Lightning*,82,54,23,5,113,.689,296,236,0.66,-0.07,.634,48,2018,NHL 4 | Boston Bruins*,82,50,20,12,112,.683,270,214,0.62,-0.07,.610,47,2018,NHL 5 | Toronto Maple Leafs*,82,49,26,7,105,.640,277,232,0.49,-0.06,.567,42,2018,NHL 6 | Florida Panthers,82,44,30,8,96,.585,248,246,-0.01,-0.04,.537,41,2018,NHL 7 | Detroit Red Wings,82,30,39,13,73,.445,217,255,-0.48,-0.01,.341,25,2018,NHL 8 | Montreal Canadiens,82,29,40,13,71,.433,209,264,-0.68,0.00,.378,27,2018,NHL 9 | Ottawa Senators,82,28,43,11,67,.409,221,291,-0.85,0.00,.372,26,2018,NHL 10 | Buffalo Sabres,82,25,45,12,62,.378,199,280,-0.98,0.01,.311,24,2018,NHL 11 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2018,NHL 12 | Washington Capitals*,82,49,26,7,105,.640,259,239,0.21,-0.04,.585,46,2018,NHL 13 | Pittsburgh Penguins*,82,47,29,6,100,.610,272,250,0.23,-0.04,.573,45,2018,NHL 14 | Philadelphia Flyers*,82,42,26,14,98,.598,251,243,0.07,-0.03,.543,40,2018,NHL 15 | Columbus Blue Jackets*,82,45,30,7,97,.591,242,230,0.11,-0.04,.537,39,2018,NHL 16 | New Jersey Devils*,82,44,29,9,97,.591,248,244,0.02,-0.03,.530,39,2018,NHL 17 | Carolina Hurricanes,82,36,35,11,83,.506,228,256,-0.35,-0.01,.439,33,2018,NHL 18 | New York Islanders,82,35,37,10,80,.488,264,296,-0.40,-0.01,.427,32,2018,NHL 19 | New York Rangers,82,34,39,9,77,.470,231,268,-0.46,-0.01,.427,31,2018,NHL 20 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2018,NHL 21 | Nashville Predators*,82,53,18,11,117,.713,267,211,0.71,0.03,.652,47,2018,NHL 22 | Winnipeg Jets*,82,52,20,10,114,.695,277,218,0.74,0.02,.622,48,2018,NHL 23 | Minnesota Wild*,82,45,26,11,101,.616,253,232,0.29,0.04,.549,42,2018,NHL 24 | Colorado Avalanche*,82,43,30,9,95,.579,257,237,0.28,0.04,.518,41,2018,NHL 25 | St. Louis Blues,82,44,32,6,94,.573,226,222,0.10,0.05,.518,41,2018,NHL 26 | Dallas Stars,82,42,32,8,92,.561,235,225,0.17,0.04,.506,38,2018,NHL 27 | Chicago Blackhawks,82,33,39,10,76,.463,229,256,-0.26,0.07,.409,32,2018,NHL 28 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2018,NHL 29 | Vegas Golden Knights*,82,51,24,7,109,.665,272,228,0.52,-0.01,.616,47,2018,NHL 30 | Anaheim Ducks*,82,44,25,13,101,.616,235,216,0.24,0.01,.555,40,2018,NHL 31 | San Jose Sharks*,82,45,27,10,100,.610,252,229,0.28,0.00,.537,40,2018,NHL 32 | Los Angeles Kings*,82,45,29,8,98,.598,239,203,0.44,0.00,.543,43,2018,NHL 33 | Calgary Flames,82,37,35,10,84,.512,218,248,-0.33,0.03,.470,35,2018,NHL 34 | Edmonton Oilers,82,36,40,6,78,.476,234,263,-0.32,0.03,.415,31,2018,NHL 35 | Vancouver Canucks,82,31,40,11,73,.445,218,264,-0.51,0.05,.409,31,2018,NHL 36 | Arizona Coyotes,82,29,41,12,70,.427,208,256,-0.53,0.05,.372,27,2018,NHL 37 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2017,NHL 38 | Montreal Canadiens*,82,47,26,9,103,.628,226,200,0.31,-0.01,.567,44,2017,NHL 39 | Ottawa Senators*,82,44,28,10,98,.598,212,214,-0.01,0.01,.524,38,2017,NHL 40 | Boston Bruins*,82,44,31,7,95,.579,234,212,0.27,0.00,.543,42,2017,NHL 41 | Toronto Maple Leafs*,82,40,27,15,95,.579,251,242,0.11,0.00,.530,39,2017,NHL 42 | Tampa Bay Lightning,82,42,30,10,94,.573,234,227,0.09,0.01,.506,38,2017,NHL 43 | Florida Panthers,82,35,36,11,81,.494,210,237,-0.30,0.03,.433,30,2017,NHL 44 | Detroit Red Wings,82,33,36,13,79,.482,207,244,-0.41,0.04,.348,24,2017,NHL 45 | Buffalo Sabres,82,33,37,12,78,.476,201,237,-0.41,0.03,.427,31,2017,NHL 46 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2017,NHL 47 | Washington Capitals*,82,55,19,8,118,.720,263,182,0.99,0.00,.689,53,2017,NHL 48 | Pittsburgh Penguins*,82,50,21,11,111,.677,282,234,0.59,0.01,.616,46,2017,NHL 49 | Columbus Blue Jackets*,82,50,24,8,108,.659,249,195,0.68,0.02,.610,48,2017,NHL 50 | New York Rangers*,82,48,28,6,102,.622,256,220,0.47,0.03,.591,45,2017,NHL 51 | New York Islanders,82,41,29,12,94,.573,241,242,0.03,0.05,.512,39,2017,NHL 52 | Philadelphia Flyers,82,39,33,10,88,.537,219,236,-0.17,0.04,.463,32,2017,NHL 53 | Carolina Hurricanes,82,36,31,15,87,.530,215,236,-0.21,0.05,.457,33,2017,NHL 54 | New Jersey Devils,82,28,40,14,70,.427,183,244,-0.67,0.08,.341,25,2017,NHL 55 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2017,NHL 56 | Chicago Blackhawks*,82,50,23,9,109,.665,244,213,0.32,-0.06,.591,46,2017,NHL 57 | Minnesota Wild*,82,49,25,8,106,.646,266,208,0.63,-0.08,.591,46,2017,NHL 58 | St. Louis Blues*,82,46,29,7,99,.604,235,218,0.17,-0.04,.561,44,2017,NHL 59 | Nashville Predators*,82,41,29,12,94,.573,240,224,0.16,-0.04,.512,39,2017,NHL 60 | Winnipeg Jets,82,40,35,7,87,.530,249,256,-0.11,-0.03,.476,37,2017,NHL 61 | Dallas Stars,82,34,37,11,79,.482,223,262,-0.48,-0.01,.421,33,2017,NHL 62 | Colorado Avalanche,82,22,56,4,48,.293,166,278,-1.32,0.05,.274,21,2017,NHL 63 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2017,NHL 64 | Anaheim Ducks*,82,46,23,13,105,.640,223,200,0.24,-0.04,.561,43,2017,NHL 65 | Edmonton Oilers*,82,47,26,9,103,.628,247,212,0.37,-0.06,.579,43,2017,NHL 66 | San Jose Sharks*,82,46,29,7,99,.604,221,201,0.21,-0.03,.555,44,2017,NHL 67 | Calgary Flames*,82,45,33,4,94,.573,226,221,0.02,-0.04,.537,41,2017,NHL 68 | Los Angeles Kings,82,39,35,8,86,.524,201,205,-0.07,-0.02,.488,37,2017,NHL 69 | Arizona Coyotes,82,30,42,10,70,.427,197,260,-0.76,0.01,.341,24,2017,NHL 70 | Vancouver Canucks,82,30,43,9,69,.421,182,243,-0.73,0.01,.354,26,2017,NHL 71 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2017,NHL 72 | Florida Panthers*,82,47,26,9,103,.628,239,203,0.42,-0.02,.549,40,2017,NHL 73 | Tampa Bay Lightning*,82,46,31,5,97,.591,227,201,0.30,-0.02,.561,43,2017,NHL 74 | Detroit Red Wings*,82,41,30,11,93,.567,211,224,-0.16,0.00,.518,39,2017,NHL 75 | Boston Bruins,82,42,31,9,93,.567,240,230,0.11,-0.02,.500,38,2017,NHL 76 | Ottawa Senators,82,38,35,9,85,.518,236,247,-0.13,0.01,.463,32,2017,NHL 77 | Montreal Canadiens,82,38,38,6,82,.500,221,236,-0.18,0.00,.451,33,2017,NHL 78 | Buffalo Sabres,82,35,36,11,81,.494,201,222,-0.25,0.01,.457,33,2017,NHL 79 | Toronto Maple Leafs,82,29,42,11,69,.421,198,246,-0.56,0.03,.354,23,2017,NHL 80 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2017,NHL 81 | Washington Capitals*,82,56,18,8,120,.732,252,193,0.70,-0.02,.671,52,2017,NHL 82 | Pittsburgh Penguins*,82,48,26,8,104,.634,245,203,0.50,-0.01,.585,44,2017,NHL 83 | New York Rangers*,82,46,27,9,101,.616,236,217,0.24,0.01,.555,43,2017,NHL 84 | New York Islanders*,82,45,27,10,100,.610,232,216,0.19,0.00,.549,40,2017,NHL 85 | Philadelphia Flyers*,82,41,27,14,96,.585,214,218,-0.03,0.02,.530,38,2017,NHL 86 | Carolina Hurricanes,82,35,31,16,86,.524,198,226,-0.31,0.03,.445,33,2017,NHL 87 | New Jersey Devils,82,38,36,8,84,.512,184,208,-0.28,0.02,.488,36,2017,NHL 88 | Columbus Blue Jackets,82,34,40,8,76,.463,219,252,-0.38,0.03,.402,28,2017,NHL 89 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2017,NHL 90 | Dallas Stars*,82,50,23,9,109,.665,267,230,0.45,-0.01,.610,48,2017,NHL 91 | St. Louis Blues*,82,49,24,9,107,.652,224,201,0.28,0.00,.591,44,2017,NHL 92 | Chicago Blackhawks*,82,47,26,9,103,.628,235,209,0.32,0.00,.579,46,2017,NHL 93 | Nashville Predators*,82,41,27,14,96,.585,228,215,0.17,0.01,.488,37,2017,NHL 94 | Minnesota Wild*,82,38,33,11,87,.530,216,206,0.13,0.01,.457,35,2017,NHL 95 | Colorado Avalanche,82,39,39,4,82,.500,216,240,-0.26,0.03,.451,35,2017,NHL 96 | Winnipeg Jets,82,35,39,8,78,.476,215,239,-0.26,0.03,.427,32,2017,NHL 97 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2017,NHL 98 | Anaheim Ducks*,82,46,25,11,103,.628,218,192,0.27,-0.05,.567,43,2017,NHL 99 | Los Angeles Kings*,82,48,28,6,102,.622,225,195,0.32,-0.04,.591,46,2017,NHL 100 | San Jose Sharks*,82,46,30,6,98,.598,241,210,0.33,-0.05,.555,42,2017,NHL 101 | Arizona Coyotes,82,35,39,8,78,.476,209,245,-0.43,0.01,.427,34,2017,NHL 102 | Calgary Flames,82,35,40,7,77,.470,231,260,-0.36,-0.01,.433,33,2017,NHL 103 | Vancouver Canucks,82,31,38,13,75,.457,191,243,-0.62,0.02,.372,26,2017,NHL 104 | Edmonton Oilers,82,31,43,8,70,.427,203,245,-0.51,0.00,.372,27,2017,NHL 105 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2015,NHL 106 | Montreal Canadiens*,82,50,22,10,110,.671,221,189,0.36,-0.03,.598,43,2015,NHL 107 | Tampa Bay Lightning*,82,50,24,8,108,.659,262,211,0.57,-0.06,.622,47,2015,NHL 108 | Detroit Red Wings*,82,43,25,14,100,.610,235,221,0.14,-0.03,.561,39,2015,NHL 109 | Ottawa Senators*,82,43,26,13,99,.604,238,215,0.25,-0.04,.530,37,2015,NHL 110 | Boston Bruins,82,41,27,14,96,.585,213,211,0.01,-0.01,.537,37,2015,NHL 111 | Florida Panthers,82,38,29,15,91,.555,206,223,-0.23,-0.02,.476,30,2015,NHL 112 | Toronto Maple Leafs,82,30,44,8,68,.415,211,262,-0.61,0.02,.366,25,2015,NHL 113 | Buffalo Sabres,82,23,51,8,54,.329,161,274,-1.33,0.05,.262,15,2015,NHL 114 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2015,NHL 115 | New York Rangers*,82,53,22,7,113,.689,252,192,0.69,-0.04,.652,49,2015,NHL 116 | Washington Capitals*,82,45,26,11,101,.616,242,203,0.44,-0.03,.543,40,2015,NHL 117 | New York Islanders*,82,47,28,7,101,.616,252,230,0.26,-0.01,.567,40,2015,NHL 118 | Pittsburgh Penguins*,82,43,27,12,98,.598,221,210,0.12,-0.01,.537,39,2015,NHL 119 | Columbus Blue Jackets,82,42,35,5,89,.543,236,250,-0.16,0.01,.470,33,2015,NHL 120 | Philadelphia Flyers,82,33,31,18,84,.512,215,234,-0.23,0.00,.451,30,2015,NHL 121 | New Jersey Devils,82,32,36,14,78,.476,181,216,-0.40,0.02,.402,27,2015,NHL 122 | Carolina Hurricanes,82,30,41,11,71,.433,188,226,-0.44,0.02,.378,25,2015,NHL 123 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2015,NHL 124 | St. Louis Blues*,82,51,24,7,109,.665,248,201,0.59,0.02,.591,42,2015,NHL 125 | Nashville Predators*,82,47,25,10,104,.634,232,208,0.33,0.03,.573,41,2015,NHL 126 | Chicago Blackhawks*,82,48,28,6,102,.622,229,189,0.51,0.02,.549,39,2015,NHL 127 | Minnesota Wild*,82,46,28,8,100,.610,231,201,0.39,0.03,.555,42,2015,NHL 128 | Winnipeg Jets*,82,43,26,13,99,.604,230,210,0.29,0.04,.518,36,2015,NHL 129 | Dallas Stars,82,41,31,10,92,.561,261,260,0.06,0.05,.494,37,2015,NHL 130 | Colorado Avalanche,82,39,31,12,90,.549,219,227,-0.04,0.06,.439,29,2015,NHL 131 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2015,NHL 132 | Anaheim Ducks*,82,51,24,7,109,.665,236,226,0.10,-0.03,.604,43,2015,NHL 133 | Vancouver Canucks*,82,48,29,5,101,.616,242,222,0.20,-0.05,.561,42,2015,NHL 134 | Calgary Flames*,82,45,30,7,97,.591,241,216,0.26,-0.05,.543,41,2015,NHL 135 | Los Angeles Kings,82,40,27,15,95,.579,220,205,0.16,-0.03,.524,38,2015,NHL 136 | San Jose Sharks,82,40,33,9,89,.543,228,232,-0.08,-0.03,.500,36,2015,NHL 137 | Edmonton Oilers,82,24,44,14,62,.378,198,283,-1.01,0.03,.305,19,2015,NHL 138 | Arizona Coyotes,82,24,50,8,56,.341,170,272,-1.20,0.04,.293,19,2015,NHL 139 | Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,Atlantic Division,2014,NHL 140 | Boston Bruins*,82,54,19,9,117,.713,261,177,0.92,-0.11,.677,51,2014,NHL 141 | Tampa Bay Lightning*,82,46,27,9,101,.616,240,215,0.25,-0.06,.549,38,2014,NHL 142 | Montreal Canadiens*,82,46,28,8,100,.610,215,204,0.08,-0.05,.543,40,2014,NHL 143 | Detroit Red Wings*,82,39,28,15,93,.567,222,230,-0.14,-0.04,.500,34,2014,NHL 144 | Ottawa Senators,82,37,31,14,88,.537,236,265,-0.37,-0.02,.451,30,2014,NHL 145 | Toronto Maple Leafs,82,38,36,8,84,.512,231,256,-0.34,-0.04,.433,29,2014,NHL 146 | Florida Panthers,82,29,45,8,66,.402,196,268,-0.87,0.01,.341,21,2014,NHL 147 | Buffalo Sabres,82,21,51,10,52,.317,157,248,-1.09,0.02,.244,14,2014,NHL 148 | Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,Metropolitan Division,2014,NHL 149 | Pittsburgh Penguins*,82,51,24,7,109,.665,249,207,0.47,-0.04,.598,44,2014,NHL 150 | New York Rangers*,82,45,31,6,96,.585,218,193,0.26,-0.04,.543,41,2014,NHL 151 | Philadelphia Flyers*,82,42,30,10,94,.573,236,235,-0.01,-0.02,.543,39,2014,NHL 152 | Columbus Blue Jackets*,82,43,32,7,93,.567,231,216,0.16,-0.03,.506,38,2014,NHL 153 | Washington Capitals,82,38,30,14,90,.549,235,240,-0.08,-0.02,.470,28,2014,NHL 154 | New Jersey Devils,82,35,29,18,88,.537,197,208,-0.15,-0.02,.506,35,2014,NHL 155 | Carolina Hurricanes,82,36,35,11,83,.506,207,230,-0.29,-0.01,.451,34,2014,NHL 156 | New York Islanders,82,34,37,11,79,.482,225,267,-0.51,0.00,.396,25,2014,NHL 157 | Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2014,NHL 158 | Colorado Avalanche*,82,52,22,8,112,.683,250,220,0.40,0.04,.628,47,2014,NHL 159 | St. Louis Blues*,82,52,23,7,111,.677,248,191,0.71,0.01,.598,43,2014,NHL 160 | Chicago Blackhawks*,82,46,21,15,107,.652,267,220,0.60,0.02,.573,40,2014,NHL 161 | Minnesota Wild*,82,43,27,12,98,.598,207,206,0.07,0.05,.524,35,2014,NHL 162 | Dallas Stars*,82,40,31,11,91,.555,235,228,0.14,0.05,.494,36,2014,NHL 163 | Nashville Predators,82,38,32,12,88,.537,216,242,-0.25,0.07,.506,36,2014,NHL 164 | Winnipeg Jets,82,37,35,10,84,.512,227,237,-0.07,0.06,.439,29,2014,NHL 165 | Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,Pacific Division,2014,NHL 166 | Anaheim Ducks*,82,54,20,8,116,.707,266,209,0.68,-0.01,.677,51,2014,NHL 167 | San Jose Sharks*,82,51,22,9,111,.677,249,200,0.60,0.00,.604,41,2014,NHL 168 | Los Angeles Kings*,82,46,28,8,100,.610,206,174,0.40,0.01,.549,38,2014,NHL 169 | Phoenix Coyotes,82,37,30,15,89,.543,216,231,-0.16,0.03,.457,31,2014,NHL 170 | Vancouver Canucks,82,36,35,11,83,.506,196,223,-0.30,0.03,.451,31,2014,NHL 171 | Calgary Flames,82,35,40,7,77,.470,209,241,-0.35,0.04,.402,28,2014,NHL 172 | Edmonton Oilers,82,29,44,9,67,.409,203,270,-0.75,0.07,.348,25,2014,NHL 173 | -------------------------------------------------------------------------------- /Assignment_4/assets/nfl.csv: -------------------------------------------------------------------------------- 1 | DSRS,L,League,MoV,OSRS,PA,PD,PF,SRS,SoS,T,W,W-L%,team,year 2 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,2018 3 | 2.1,5,NFL,6.9,3.1,325,111,436,5.2,-1.8,0,11,.688,New England Patriots*,2018 4 | -5.2,9,NFL,-7.1,-3.6,433,-114,319,-8.8,-1.7,0,7,.438,Miami Dolphins,2018 5 | -0.6,10,NFL,-6.6,-6.3,374,-105,269,-6.9,-0.3,0,6,.375,Buffalo Bills,2018 6 | -5.9,12,NFL,-6.8,-2.0,441,-108,333,-7.8,-1.1,0,4,.250,New York Jets,2018 7 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,2018 8 | 6.4,6,NFL,6.4,0.6,287,102,389,7.0,0.6,0,10,.625,Baltimore Ravens*,2018 9 | 1.7,6,NFL,4.3,3.9,360,68,428,5.6,1.3,1,9,.594,Pittsburgh Steelers,2018 10 | 0.6,8,NFL,-2.1,-1.0,392,-33,359,-0.3,1.7,1,7,.469,Cleveland Browns,2018 11 | -3.4,10,NFL,-5.4,0.0,455,-87,368,-3.4,2.0,0,6,.375,Cincinnati Bengals,2018 12 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,2018 13 | 1.4,5,NFL,5.4,2.4,316,86,402,3.8,-1.5,0,11,.688,Houston Texans*,2018 14 | -0.6,6,NFL,5.6,3.9,344,89,433,3.4,-2.2,0,10,.625,Indianapolis Colts+,2018 15 | 3.5,7,NFL,0.4,-3.2,303,7,310,0.2,-0.2,0,9,.563,Tennessee Titans,2018 16 | 4.0,11,NFL,-4.4,-8.1,316,-71,245,-4.0,0.4,0,5,.313,Jacksonville Jaguars,2018 17 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,2018 18 | -3.8,4,NFL,9.0,12.6,421,144,565,8.9,-0.1,0,12,.750,Kansas City Chiefs*,2018 19 | 2.9,4,NFL,6.2,3.0,329,99,428,6.0,-0.2,0,12,.750,Los Angeles Chargers+,2018 20 | 3.1,10,NFL,-1.3,-3.6,349,-20,329,-0.5,0.7,0,6,.375,Denver Broncos,2018 21 | -4.1,12,NFL,-11.1,-5.2,467,-177,290,-9.3,1.8,0,4,.250,Oakland Raiders,2018 22 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,2018 23 | 2.9,6,NFL,0.9,-1.9,324,15,339,1.1,0.2,0,10,.625,Dallas Cowboys*,2018 24 | 1.8,7,NFL,1.2,0.0,348,19,367,1.7,0.5,0,9,.563,Philadelphia Eagles+,2018 25 | 0.6,9,NFL,-4.9,-5.6,359,-78,281,-4.9,-0.1,0,7,.438,Washington Redskins,2018 26 | -2.9,11,NFL,-2.7,0.8,412,-43,369,-2.2,0.5,0,5,.313,New York Giants,2018 27 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,2018 28 | 4.8,4,NFL,8.6,1.5,283,138,421,6.3,-2.3,0,12,.750,Chicago Bears*,2018 29 | 1.8,7,NFL,1.2,-1.2,341,19,360,0.6,-0.6,1,8,.531,Minnesota Vikings,2018 30 | -2.7,9,NFL,-1.5,0.0,400,-24,376,-2.7,-1.2,1,6,.406,Green Bay Packers,2018 31 | 0.3,10,NFL,-2.3,-3.3,360,-36,324,-3.0,-0.8,0,6,.375,Detroit Lions,2018 32 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,2018 33 | 2.2,3,NFL,9.4,7.9,353,151,504,10.1,0.6,0,13,.813,New Orleans Saints*,2018 34 | 0.8,9,NFL,-0.4,0.1,382,-6,376,0.9,1.3,0,7,.438,Carolina Panthers,2018 35 | -2.6,9,NFL,-0.6,2.5,423,-9,414,-0.1,0.4,0,7,.438,Atlanta Falcons,2018 36 | -4.6,11,NFL,-4.3,2.0,464,-68,396,-2.6,1.7,0,5,.313,Tampa Bay Buccaneers,2018 37 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,2018 38 | -1.1,3,NFL,8.9,9.5,384,143,527,8.5,-0.4,0,13,.813,Los Angeles Rams*,2018 39 | 1.5,6,NFL,5.1,3.0,347,81,428,4.5,-0.6,0,10,.625,Seattle Seahawks+,2018 40 | -3.1,12,NFL,-5.8,-2.5,435,-93,342,-5.5,0.3,0,4,.250,San Francisco 49ers,2018 41 | -1.9,13,NFL,-12.5,-9.6,425,-200,225,-11.5,1.0,0,3,.188,Arizona Cardinals,2018 42 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,,AFC East,AFC East,AFC East,2017 43 | 2.6,3,NFL,10.1,6.3,296,162,458,8.9,-1.2,,13,.813,New England Patriots*,2017 44 | -1.0,7,NFL,-3.6,-3.0,359,-57,302,-4.0,-0.5,,9,.563,Buffalo Bills+,2017 45 | -2.4,10,NFL,-7.0,-3.9,393,-112,281,-6.3,0.7,,6,.375,Miami Dolphins,2017 46 | -2.1,11,NFL,-5.3,-2.9,382,-84,298,-4.9,0.3,,5,.313,New York Jets,2017 47 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,,AFC North,AFC North,AFC North,2017 48 | 1.8,3,NFL,6.1,3.2,308,98,406,5.0,-1.1,,13,.813,Pittsburgh Steelers*,2017 49 | 1.2,7,NFL,5.8,2.2,303,92,395,3.4,-2.4,,9,.563,Baltimore Ravens,2017 50 | -0.9,9,NFL,-3.7,-4.1,349,-59,290,-5.0,-1.3,,7,.438,Cincinnati Bengals,2017 51 | -4.1,16,NFL,-11.0,-6.8,410,-176,234,-11.0,0.0,,0,.000,Cleveland Browns,2017 52 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,,AFC South,AFC South,AFC South,2017 53 | 3.6,6,NFL,9.3,3.0,268,149,417,6.5,-2.8,,10,.625,Jacksonville Jaguars*,2017 54 | -1.5,7,NFL,-1.4,-2.0,356,-22,334,-3.5,-2.1,,9,.563,Tennessee Titans+,2017 55 | -5.6,12,NFL,-6.1,-0.8,436,-98,338,-6.4,-0.3,,4,.250,Houston Texans,2017 56 | -4.0,12,NFL,-8.8,-6.1,404,-141,263,-10.1,-1.3,,4,.250,Indianapolis Colts,2017 57 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,,AFC West,AFC West,AFC West,2017 58 | -0.3,6,NFL,4.8,3.8,339,76,415,3.4,-1.3,,10,.625,Kansas City Chiefs*,2017 59 | 4.0,7,NFL,5.2,-0.3,272,83,355,3.6,-1.5,,9,.563,Los Angeles Chargers,2017 60 | -1.8,10,NFL,-4.5,-3.0,373,-72,301,-4.7,-0.2,,6,.375,Oakland Raiders,2017 61 | -2.9,11,NFL,-5.8,-3.9,382,-93,289,-6.7,-0.9,,5,.313,Denver Broncos,2017 62 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,,NFC East,NFC East,NFC East,2017 63 | 2.5,3,NFL,10.1,7.0,295,162,457,9.4,-0.7,,13,.813,Philadelphia Eagles*,2017 64 | 1.2,7,NFL,1.4,0.4,332,22,354,1.6,0.2,,9,.563,Dallas Cowboys,2017 65 | -1.8,9,NFL,-2.9,0.5,388,-46,342,-1.3,1.6,,7,.438,Washington Redskins,2017 66 | -1.2,13,NFL,-8.9,-6.4,388,-142,246,-7.6,1.3,,3,.188,New York Giants,2017 67 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,,NFC North,NFC North,NFC North,2017 68 | 6.8,3,NFL,8.1,2.3,252,130,382,9.1,1.0,,13,.813,Minnesota Vikings*,2017 69 | -2.5,7,NFL,2.1,5.2,376,34,410,2.7,0.6,,9,.563,Detroit Lions,2017 70 | -1.6,9,NFL,-4.0,-0.3,384,-64,320,-1.9,2.1,,7,.438,Green Bay Packers,2017 71 | 3.3,11,NFL,-3.5,-4.6,320,-56,264,-1.3,2.2,,5,.313,Chicago Bears,2017 72 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,,NFC South,NFC South,NFC South,2017 73 | 2.2,5,NFL,7.6,7.0,326,122,448,9.2,1.5,,11,.688,New Orleans Saints*,2017 74 | 2.7,5,NFL,2.3,1.7,327,36,363,4.3,2.1,,11,.688,Carolina Panthers+,2017 75 | 3.2,6,NFL,2.4,1.1,315,38,353,4.3,1.9,,10,.625,Atlanta Falcons+,2017 76 | -1.7,11,NFL,-2.9,0.4,382,-47,335,-1.3,1.7,,5,.313,Tampa Bay Buccaneers,2017 77 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,,NFC West,NFC West,NFC West,2017 78 | 1.0,5,NFL,9.3,8.2,329,149,478,9.2,-0.2,,11,.688,Los Angeles Rams*,2017 79 | 1.2,7,NFL,2.1,0.7,332,34,366,1.9,-0.2,,9,.563,Seattle Seahawks,2017 80 | 0.2,8,NFL,-4.1,-4.0,361,-66,295,-3.7,0.4,,8,.500,Arizona Cardinals,2017 81 | -2.1,10,NFL,-3.3,-0.8,383,-52,331,-2.9,0.4,,6,.375,San Francisco 49ers,2017 82 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,2016 83 | 5.0,2,NFL,11.9,4.3,250,191,441,9.3,-2.7,0,14,.875,New England Patriots*,2016 84 | -1.8,6,NFL,-1.1,-0.6,380,-17,363,-2.4,-1.3,0,10,.625,Miami Dolphins+,2016 85 | -2.2,9,NFL,1.3,1.8,378,21,399,-0.3,-1.6,0,7,.438,Buffalo Bills,2016 86 | -3.0,11,NFL,-8.4,-5.5,409,-134,275,-8.5,-0.1,0,5,.313,New York Jets,2016 87 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,2016 88 | 2.0,5,NFL,4.5,2.8,327,72,399,4.7,0.2,0,11,.688,Pittsburgh Steelers*,2016 89 | 2.6,8,NFL,1.4,-1.1,321,22,343,1.5,0.2,0,8,.500,Baltimore Ravens,2016 90 | 2.5,9,NFL,0.6,-1.5,315,10,325,1.0,0.4,1,6,.406,Cincinnati Bengals,2016 91 | -4.9,15,NFL,-11.8,-5.2,452,-188,264,-10.1,1.7,0,1,.063,Cleveland Browns,2016 92 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,2016 93 | 2.7,7,NFL,-3.1,-5.3,328,-49,279,-2.6,0.4,0,9,.563,Houston Texans*,2016 94 | -1.7,7,NFL,0.2,0.7,378,3,381,-1.0,-1.2,0,9,.563,Tennessee Titans,2016 95 | -2.7,8,NFL,1.2,3.1,392,19,411,0.4,-0.8,0,8,.500,Indianapolis Colts,2016 96 | -2.3,13,NFL,-5.1,-2.7,400,-82,318,-5.0,0.2,0,3,.188,Jacksonville Jaguars,2016 97 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,2016 98 | 4.4,4,NFL,4.9,1.2,311,78,389,5.6,0.7,0,12,.750,Kansas City Chiefs*,2016 99 | -0.3,4,NFL,1.9,3.5,385,31,416,3.3,1.3,0,12,.750,Oakland Raiders+,2016 100 | 6.1,7,NFL,2.3,-2.0,297,36,333,4.0,1.8,0,9,.563,Denver Broncos,2016 101 | -3.0,11,NFL,-0.8,3.0,423,-13,410,0.1,0.9,0,5,.313,San Diego Chargers,2016 102 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,2016 103 | 2.9,3,NFL,7.2,4.1,306,115,421,7.0,-0.2,0,13,.813,Dallas Cowboys*,2016 104 | 5.4,5,NFL,1.6,-3.2,284,26,310,2.1,0.5,0,11,.688,New York Giants+,2016 105 | -1.3,7,NFL,0.8,3.3,383,13,396,2.0,1.2,1,8,.531,Washington Redskins,2016 106 | 2.5,9,NFL,2.3,1.3,331,36,367,3.8,1.6,0,7,.438,Philadelphia Eagles,2016 107 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,2016 108 | -2.0,6,NFL,2.8,4.9,388,44,432,2.8,0.1,0,10,.625,Green Bay Packers*,2016 109 | -0.1,7,NFL,-0.8,-1.3,358,-12,346,-1.4,-0.6,0,9,.563,Detroit Lions+,2016 110 | 3.6,8,NFL,1.3,-2.6,307,20,327,0.9,-0.3,0,8,.500,Minnesota Vikings,2016 111 | -2.3,13,NFL,-7.5,-5.2,399,-120,279,-7.5,0.0,0,3,.188,Chicago Bears,2016 112 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,2016 113 | -2.0,5,NFL,8.4,10.5,406,134,540,8.5,0.1,0,11,.688,Atlanta Falcons*,2016 114 | 1.3,7,NFL,-0.9,-1.5,369,-15,354,-0.2,0.7,0,9,.563,Tampa Bay Buccaneers,2016 115 | -5.3,9,NFL,0.9,6.8,454,15,469,1.5,0.6,0,7,.438,New Orleans Saints,2016 116 | -0.8,10,NFL,-2.1,-0.2,402,-33,369,-1.0,1.1,0,6,.375,Carolina Panthers,2016 117 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,2016 118 | 4.5,5,NFL,3.9,-2.4,292,62,354,2.1,-1.7,1,10,.656,Seattle Seahawks*,2016 119 | -0.8,8,NFL,3.5,2.4,362,56,418,1.6,-1.9,1,7,.469,Arizona Cardinals,2016 120 | -1.6,12,NFL,-10.6,-9.5,394,-170,224,-11.1,-0.5,0,4,.250,Los Angeles Rams,2016 121 | -7.5,14,NFL,-10.7,-3.7,480,-171,309,-11.2,-0.5,0,2,.125,San Francisco 49ers,2016 122 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,,AFC East,AFC East,AFC East,2015 123 | 1.7,4,NFL,9.4,5.3,315,150,465,7.0,-2.4,,12,.750,New England Patriots*,2015 124 | 2.0,6,NFL,4.6,-0.5,314,73,387,1.5,-3.0,,10,.625,New York Jets,2015 125 | -0.2,8,NFL,1.3,0.3,359,20,379,0.0,-1.2,,8,.500,Buffalo Bills,2015 126 | -2.2,10,NFL,-4.9,-4.7,389,-79,310,-6.8,-1.9,,6,.375,Miami Dolphins,2015 127 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,,AFC North,AFC North,AFC North,2015 128 | 5.8,4,NFL,8.8,4.8,279,140,419,10.6,1.9,,12,.750,Cincinnati Bengals*,2015 129 | 3.6,6,NFL,6.5,5.1,319,104,423,8.7,2.2,,10,.625,Pittsburgh Steelers+,2015 130 | -1.2,11,NFL,-4.6,-0.7,401,-73,328,-1.9,2.6,,5,.313,Baltimore Ravens,2015 131 | -2.9,13,NFL,-9.6,-3.2,432,-154,278,-6.1,3.5,,3,.188,Cleveland Browns,2015 132 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,,AFC South,AFC South,AFC South,2015 133 | 2.6,7,NFL,1.6,-3.3,313,26,339,-0.8,-2.4,,9,.563,Houston Texans*,2015 134 | -3.6,8,NFL,-4.7,-3.1,408,-75,333,-6.7,-2.0,,8,.500,Indianapolis Colts,2015 135 | -6.9,11,NFL,-4.5,-0.7,448,-72,376,-7.5,-3.0,,5,.313,Jacksonville Jaguars,2015 136 | -4.6,13,NFL,-7.8,-5.9,423,-124,299,-10.5,-2.8,,3,.188,Tennessee Titans,2015 137 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,,AFC West,AFC West,AFC West,2015 138 | 5.5,4,NFL,3.7,0.3,296,59,355,5.8,2.1,,12,.750,Denver Broncos*,2015 139 | 5.3,5,NFL,7.4,3.7,287,118,405,9.0,1.6,,11,.688,Kansas City Chiefs+,2015 140 | -1.6,9,NFL,-2.5,1.4,399,-40,359,-0.2,2.3,,7,.438,Oakland Raiders,2015 141 | -1.1,12,NFL,-4.9,-1.5,398,-78,320,-2.6,2.2,,4,.250,San Diego Chargers,2015 142 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,,NFC East,NFC East,NFC East,2015 143 | -1.8,7,NFL,0.6,-0.2,379,9,388,-1.9,-2.5,,9,.563,Washington Redskins*,2015 144 | -4.0,9,NFL,-3.3,-0.6,430,-53,377,-4.6,-1.3,,7,.438,Philadelphia Eagles,2015 145 | -6.1,10,NFL,-1.4,2.5,442,-22,420,-3.6,-2.2,,6,.375,New York Giants,2015 146 | 0.1,12,NFL,-6.2,-7.0,374,-99,275,-6.9,-0.7,,4,.250,Dallas Cowboys,2015 147 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,,NFC North,NFC North,NFC North,2015 148 | 4.7,5,NFL,3.9,1.1,302,63,365,5.8,1.9,,11,.688,Minnesota Vikings*,2015 149 | 3.3,6,NFL,2.8,2.0,323,45,368,5.3,2.5,,10,.625,Green Bay Packers+,2015 150 | -1.3,9,NFL,-2.6,1.0,400,-42,358,-0.2,2.4,,7,.438,Detroit Lions,2015 151 | -1.2,10,NFL,-3.9,-0.1,397,-62,335,-1.3,2.6,,6,.375,Chicago Bears,2015 152 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,,NFC South,NFC South,NFC South,2015 153 | 2.1,1,NFL,12.0,6.0,308,192,500,8.1,-3.9,,15,.938,Carolina Panthers*,2015 154 | 0.3,8,NFL,-0.4,-4.0,345,-6,339,-3.8,-3.4,,8,.500,Atlanta Falcons,2015 155 | -7.6,9,NFL,-4.3,1.1,476,-68,408,-6.6,-2.3,,7,.438,New Orleans Saints,2015 156 | -4.2,10,NFL,-4.7,-3.5,417,-75,342,-7.7,-3.0,,6,.375,Tampa Bay Buccaneers,2015 157 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,,NFC West,NFC West,NFC West,2015 158 | 3.4,3,NFL,11.0,9.0,313,176,489,12.3,1.3,,13,.813,Arizona Cardinals*,2015 159 | 6.0,6,NFL,9.1,5.4,277,146,423,11.3,2.2,,10,.625,Seattle Seahawks+,2015 160 | 3.6,9,NFL,-3.1,-3.8,330,-50,280,-0.2,3.0,,7,.438,St. Louis Rams,2015 161 | 0.5,11,NFL,-9.3,-6.0,387,-149,238,-5.5,3.8,,5,.313,San Francisco 49ers,2015 162 | AFC East,AFC East,NFL,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,AFC East,2014 163 | 3.5,4,NFL,9.7,7.5,313,155,468,10.9,1.3,0,12,.750,New England Patriots*,2014 164 | 5.3,7,NFL,3.4,-0.4,289,54,343,4.9,1.6,0,9,.563,Buffalo Bills,2014 165 | -0.4,8,NFL,0.9,2.9,373,15,388,2.6,1.6,0,8,.500,Miami Dolphins,2014 166 | -1.0,12,NFL,-7.4,-4.0,401,-118,283,-5.0,2.3,0,4,.250,New York Jets,2014 167 | AFC North,AFC North,NFL,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,AFC North,2014 168 | -2.1,5,NFL,4.3,4.4,368,68,436,2.2,-2.0,0,11,.688,Pittsburgh Steelers*,2014 169 | 1.3,5,NFL,1.3,-0.5,344,21,365,0.7,-0.6,1,10,.656,Cincinnati Bengals+,2014 170 | 2.8,6,NFL,6.7,1.8,302,107,409,4.6,-2.1,0,10,.625,Baltimore Ravens+,2014 171 | 0.9,9,NFL,-2.4,-4.8,337,-38,299,-3.9,-1.5,0,7,.438,Cleveland Browns,2014 172 | AFC South,AFC South,NFL,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,AFC South,2014 173 | -0.8,5,NFL,5.6,5.2,369,89,458,4.4,-1.1,0,11,.688,Indianapolis Colts*,2014 174 | 2.5,7,NFL,4.1,-0.8,307,65,372,1.7,-2.3,0,9,.563,Houston Texans,2014 175 | -2.7,13,NFL,-10.2,-7.8,412,-163,249,-10.5,-0.3,0,3,.188,Jacksonville Jaguars,2014 176 | -4.9,14,NFL,-11.5,-7.0,438,-184,254,-11.8,-0.3,0,2,.125,Tennessee Titans,2014 177 | AFC West,AFC West,NFL,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,AFC West,2014 178 | 0.4,4,NFL,8.0,9.2,354,128,482,9.6,1.6,0,12,.750,Denver Broncos*,2014 179 | 5.7,7,NFL,4.5,0.0,281,72,353,5.7,1.2,0,9,.563,Kansas City Chiefs,2014 180 | 1.2,7,NFL,0.0,0.7,348,0,348,1.9,1.9,0,9,.563,San Diego Chargers,2014 181 | -4.7,13,NFL,-12.4,-4.3,452,-199,253,-9.0,3.4,0,3,.188,Oakland Raiders,2014 182 | NFC East,NFC East,NFL,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,NFC East,2014 183 | 0.1,4,NFL,7.2,5.3,352,115,467,5.4,-1.8,0,12,.750,Dallas Cowboys*,2014 184 | -2.7,6,NFL,4.6,6.6,400,74,474,3.9,-0.7,0,10,.625,Philadelphia Eagles,2014 185 | -2.5,10,NFL,-1.3,0.8,400,-20,380,-1.7,-0.4,0,6,.375,New York Giants,2014 186 | -4.7,12,NFL,-8.6,-4.0,438,-137,301,-8.7,-0.2,0,4,.250,Washington Redskins,2014 187 | NFC North,NFC North,NFL,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,NFC North,2014 188 | 0.4,4,NFL,8.6,7.9,348,138,486,8.3,-0.3,0,12,.750,Green Bay Packers*,2014 189 | 5.2,5,NFL,2.4,-3.2,282,39,321,2.1,-0.4,0,11,.688,Detroit Lions+,2014 190 | 1.1,9,NFL,-1.1,-2.8,343,-18,325,-1.7,-0.5,0,7,.438,Minnesota Vikings,2014 191 | -4.7,11,NFL,-7.7,-2.0,442,-123,319,-6.7,1.0,0,5,.313,Chicago Bears,2014 192 | NFC South,NFC South,NFL,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,NFC South,2014 193 | -0.7,8,NFL,-2.2,-2.4,374,-35,339,-3.1,-0.9,1,7,.469,Carolina Panthers*,2014 194 | -4.8,9,NFL,-1.4,1.9,424,-23,401,-2.9,-1.5,0,7,.438,New Orleans Saints,2014 195 | -4.4,10,NFL,-2.3,0.6,417,-36,381,-3.8,-1.6,0,6,.375,Atlanta Falcons,2014 196 | -3.3,14,NFL,-8.3,-6.5,410,-133,277,-9.8,-1.5,0,2,.125,Tampa Bay Buccaneers,2014 197 | NFC West,NFC West,NFL,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,2014 198 | 7.1,4,NFL,8.8,2.4,254,140,394,9.5,0.8,0,12,.750,Seattle Seahawks*,2014 199 | 4.4,5,NFL,0.7,-2.4,299,11,310,2.0,1.3,0,11,.688,Arizona Cardinals+,2014 200 | 2.1,8,NFL,-2.1,-3.0,340,-34,306,-1.0,1.2,0,8,.500,San Francisco 49ers,2014 201 | 0.4,10,NFL,-1.9,-1.2,354,-30,324,-0.8,1.0,0,6,.375,St. Louis Rams,2014 202 | -------------------------------------------------------------------------------- /Assignment_4/assignment4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": false, 7 | "editable": false, 8 | "nbgrader": { 9 | "checksum": "48770f8b5f5d3062d3badd51fcafc401", 10 | "grade": false, 11 | "grade_id": "cell-a6c4f74309fc2379", 12 | "locked": true, 13 | "schema_version": 1, 14 | "solution": false 15 | } 16 | }, 17 | "source": [ 18 | "# Assignment 4\n", 19 | "## Description\n", 20 | "In this assignment you must read in a file of metropolitan regions and associated sports teams from [assets/wikipedia_data.html](assets/wikipedia_data.html) and answer some questions about each metropolitan region. Each of these regions may have one or more teams from the \"Big 4\": NFL (football, in [assets/nfl.csv](assets/nfl.csv)), MLB (baseball, in [assets/mlb.csv](assets/mlb.csv)), NBA (basketball, in [assets/nba.csv](assets/nba.csv) or NHL (hockey, in [assets/nhl.csv](assets/nhl.csv)). Please keep in mind that all questions are from the perspective of the metropolitan region, and that this file is the \"source of authority\" for the location of a given sports team. Thus teams which are commonly known by a different area (e.g. \"Oakland Raiders\") need to be mapped into the metropolitan region given (e.g. San Francisco Bay Area). This will require some human data understanding outside of the data you've been given (e.g. you will have to hand-code some names, and might need to google to find out where teams are)!\n", 21 | "\n", 22 | "For each sport I would like you to answer the question: **what is the win/loss ratio's correlation with the population of the city it is in?** Win/Loss ratio refers to the number of wins over the number of wins plus the number of losses. Remember that to calculate the correlation with [`pearsonr`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html), so you are going to send in two ordered lists of values, the populations from the wikipedia_data.html file and the win/loss ratio for a given sport in the same order. Average the win/loss ratios for those cities which have multiple teams of a single sport. Each sport is worth an equal amount in this assignment (20%\\*4=80%) of the grade for this assignment. You should only use data **from year 2018** for your analysis -- this is important!\n", 23 | "\n", 24 | "## Notes\n", 25 | "\n", 26 | "1. Do not include data about the MLS or CFL in any of the work you are doing, we're only interested in the Big 4 in this assignment.\n", 27 | "2. I highly suggest that you first tackle the four correlation questions in order, as they are all similar and worth the majority of grades for this assignment. This is by design!\n", 28 | "3. It's fair game to talk with peers about high level strategy as well as the relationship between metropolitan areas and sports teams. However, do not post code solving aspects of the assignment (including such as dictionaries mapping areas to teams, or regexes which will clean up names).\n", 29 | "4. There may be more teams than the assert statements test, remember to collapse multiple teams in one city into a single value!" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": { 35 | "deletable": false, 36 | "editable": false, 37 | "nbgrader": { 38 | "checksum": "369ff9ecf0ee04640574205cbc697f94", 39 | "grade": false, 40 | "grade_id": "cell-712b2b5da63d4505", 41 | "locked": true, 42 | "schema_version": 1, 43 | "solution": false 44 | } 45 | }, 46 | "source": [ 47 | "## Question 1\n", 48 | "For this question, calculate the win/loss ratio's correlation with the population of the city it is in for the **NHL** using **2018** data." 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 1, 54 | "metadata": { 55 | "deletable": false, 56 | "nbgrader": { 57 | "checksum": "1cac4803b02502929f5b1612d48db2b5", 58 | "grade": false, 59 | "grade_id": "cell-69b16e4386e58030", 60 | "locked": false, 61 | "schema_version": 1, 62 | "solution": true 63 | } 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "import pandas as pd\n", 68 | "import numpy as np\n", 69 | "import scipy.stats as stats\n", 70 | "import re\n", 71 | "\n", 72 | "\n", 73 | "\n", 74 | "def nhl_correlation(): \n", 75 | " # YOUR CODE HERE\n", 76 | " #raise NotImplementedError()\n", 77 | " \n", 78 | " nhl_df=pd.read_csv(\"assets/nhl.csv\")\n", 79 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n", 80 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n", 81 | " \n", 82 | " nhl_df.drop([0,9,18,26],0,inplace=True)\n", 83 | " cities.drop([14,15,18,19,20,21,23,24,25,27,28,32,33,38,40,41,42,44,45,46,48,49,50],0,inplace=True)\n", 84 | "\n", 85 | " l= []\n", 86 | " for i in cities['NHL']:\n", 87 | " i=i.split('[')\n", 88 | " l.append(i[0])\n", 89 | " cities['NHL'] = l\n", 90 | "\n", 91 | " li = []\n", 92 | " for i in nhl_df['team']:\n", 93 | " i = re.findall(\"[^*]+\", i)\n", 94 | " li.append(i[0])\n", 95 | " nhl_df['team'] = li\n", 96 | "\n", 97 | " nhl_df = nhl_df.head(31)\n", 98 | "\n", 99 | " nhl_df['team_ville'] = nhl_df['team']\n", 100 | " nhl_df['team_ville'] = nhl_df['team_ville'].map({'Tampa Bay Lightning':'Tampa Bay Area',\n", 101 | " 'Boston Bruins':'Boston',\n", 102 | " 'Toronto Maple Leafs':'Toronto',\n", 103 | " 'Florida Panthers':'Miami–Fort Lauderdale',\n", 104 | " 'Detroit Red Wings':'Detroit',\n", 105 | " 'Montreal Canadiens':'Montreal',\n", 106 | " 'Ottawa Senators':'Ottawa',\n", 107 | " 'Buffalo Sabres':'Buffalo',\n", 108 | " 'Washington Capitals':'Washington, D.C.',\n", 109 | " 'Pittsburgh Penguins':'Pittsburgh',\n", 110 | " 'Philadelphia Flyers':'Philadelphia',\n", 111 | " 'Columbus Blue Jackets':'Columbus',\n", 112 | " 'New Jersey Devils':'New York City',\n", 113 | " 'Carolina Hurricanes':'Raleigh',\n", 114 | " 'New York Islanders':'New York City',\n", 115 | " 'New York Rangers':'New York City',\n", 116 | " 'Nashville Predators':'Nashville',\n", 117 | " 'Winnipeg Jets':'Winnipeg',\n", 118 | " 'Minnesota Wild':'Minneapolis–Saint Paul',\n", 119 | " 'Colorado Avalanche':'Denver',\n", 120 | " 'St. Louis Blues':'St. Louis',\n", 121 | " 'Dallas Stars':'Dallas–Fort Worth',\n", 122 | " 'Chicago Blackhawks':'Chicago',\n", 123 | " 'Vegas Golden Knights':'Las Vegas',\n", 124 | " 'Anaheim Ducks':'Los Angeles',\n", 125 | " 'San Jose Sharks':'San Francisco Bay Area',\n", 126 | " 'Los Angeles Kings':'Los Angeles',\n", 127 | " 'Calgary Flames':'Calgary',\n", 128 | " 'Edmonton Oilers':'Edmonton',\n", 129 | " 'Vancouver Canucks':'Vancouver',\n", 130 | " 'Arizona Coyotes':'Phoenix'})\n", 131 | "\n", 132 | " df = pd.merge(nhl_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n", 133 | "\n", 134 | " df['W'] = pd.to_numeric(df['W'])\n", 135 | " df['L'] = pd.to_numeric(df['L'])\n", 136 | " df['Population (2016 est.)[8]'] = pd.to_numeric(df['Population (2016 est.)[8]'])\n", 137 | "\n", 138 | " he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n", 139 | "\n", 140 | " df = df[he]\n", 141 | "\n", 142 | " df['W/L'] = df['W']/(df['L']+df['W'])\n", 143 | "\n", 144 | " df = df.groupby('Metropolitan area').mean().reset_index()\n", 145 | " \n", 146 | " population_by_region = df['Population (2016 est.)[8]'] # pass in metropolitan area population from cities\n", 147 | " win_loss_by_region = df['W/L'] # pass in win/loss ratio from nhl_df in the same order as cities[\"Metropolitan area\"]\n", 148 | "\n", 149 | " assert len(population_by_region) == len(win_loss_by_region), \"Q1: Your lists must be the same length\"\n", 150 | " assert len(population_by_region) == 28, \"Q1: There should be 28 teams being analysed for NHL\"\n", 151 | " \n", 152 | " return stats.pearsonr(population_by_region, win_loss_by_region)[0]" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "deletable": false, 160 | "editable": false, 161 | "nbgrader": { 162 | "checksum": "52a581df513c71153e105b93764cda4b", 163 | "grade": true, 164 | "grade_id": "cell-ebe0b2dfe1067e63", 165 | "locked": true, 166 | "points": 20, 167 | "schema_version": 1, 168 | "solution": false 169 | } 170 | }, 171 | "outputs": [], 172 | "source": [] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": { 177 | "deletable": false, 178 | "editable": false, 179 | "nbgrader": { 180 | "checksum": "988912cae4968d81473f46d783e79c16", 181 | "grade": false, 182 | "grade_id": "cell-cb964e690298b71d", 183 | "locked": true, 184 | "schema_version": 1, 185 | "solution": false 186 | } 187 | }, 188 | "source": [ 189 | "## Question 2\n", 190 | "For this question, calculate the win/loss ratio's correlation with the population of the city it is in for the **NBA** using **2018** data." 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 2, 196 | "metadata": { 197 | "deletable": false, 198 | "nbgrader": { 199 | "checksum": "9394222aafc8ccab0a228098ba0d6010", 200 | "grade": false, 201 | "grade_id": "cell-5a5f21279e3d3572", 202 | "locked": false, 203 | "schema_version": 1, 204 | "solution": true 205 | } 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "import pandas as pd\n", 210 | "import numpy as np\n", 211 | "import scipy.stats as stats\n", 212 | "import re\n", 213 | "\n", 214 | "\n", 215 | "\n", 216 | "def nba_correlation():\n", 217 | " # YOUR CODE HERE\n", 218 | " #raise NotImplementedError()\n", 219 | " \n", 220 | " nba_df=pd.read_csv(\"assets/nba.csv\")\n", 221 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n", 222 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n", 223 | " \n", 224 | " cities.drop([16,17,19,20,21,22,23,26,29,30,31,34,35,36,37,39,40,43,44,47,48,49,50],0,inplace=True)\n", 225 | " \n", 226 | " l1 = []\n", 227 | " for i in nba_df['team']:\n", 228 | " #i=i.rstrip()\n", 229 | " i=i.split('*')\n", 230 | " l1.append(i[0])\n", 231 | " nba_df['team'] = l1\n", 232 | " \n", 233 | " l2 = []\n", 234 | " for i in nba_df['team']:\n", 235 | " i=i.split('(')\n", 236 | " l2.append(i[0])\n", 237 | " nba_df['team'] = l2\n", 238 | " \n", 239 | " l3 = []\n", 240 | " for i in nba_df['team']:\n", 241 | " i=i.rstrip()\n", 242 | " l3.append(i)\n", 243 | " nba_df['team'] = l3\n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " nba_df = nba_df.head(30)\n", 248 | "\n", 249 | " nba_df['team_ville'] = nba_df['team']\n", 250 | " nba_df['team_ville'] = nba_df['team_ville'].map({'Toronto Raptors':'Toronto',\n", 251 | " 'Boston Celtics':'Boston',\n", 252 | " 'Philadelphia 76ers':'Philadelphia',\n", 253 | " 'Cleveland Cavaliers':'Cleveland',\n", 254 | " 'Indiana Pacers':'Indianapolis',\n", 255 | " 'Miami Heat':'Miami–Fort Lauderdale',\n", 256 | " 'Milwaukee Bucks':'Milwaukee',\n", 257 | " 'Washington Wizards':'Washington, D.C.',\n", 258 | " 'Detroit Pistons':'Detroit',\n", 259 | " 'Charlotte Hornets':'Charlotte',\n", 260 | " 'New York Knicks':'New York City',\n", 261 | " 'Brooklyn Nets':'New York City',\n", 262 | " 'Chicago Bulls':'Chicago',\n", 263 | " 'Orlando Magic':'Orlando',\n", 264 | " 'Atlanta Hawks':'Atlanta',\n", 265 | " 'Houston Rockets':'Houston',\n", 266 | " 'Golden State Warriors':'San Francisco Bay Area',\n", 267 | " 'Portland Trail Blazers':'Portland',\n", 268 | " 'Oklahoma City Thunder':'Oklahoma City',\n", 269 | " 'Utah Jazz':'Salt Lake City',\n", 270 | " 'New Orleans Pelicans':'New Orleans',\n", 271 | " 'San Antonio Spurs':'San Antonio',\n", 272 | " 'Minnesota Timberwolves':'Minneapolis–Saint Paul',\n", 273 | " 'Denver Nuggets':'Denver',\n", 274 | " 'Los Angeles Clippers':'Los Angeles',\n", 275 | " 'Los Angeles Lakers':'Los Angeles',\n", 276 | " 'Sacramento Kings':'Sacramento',\n", 277 | " 'Dallas Mavericks':'Dallas–Fort Worth',\n", 278 | " 'Memphis Grizzlies':'Memphis',\n", 279 | " 'Phoenix Suns':'Phoenix'})\n", 280 | " \n", 281 | " df2 = pd.merge(nba_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n", 282 | " \n", 283 | " df2['W/L%'] = pd.to_numeric(df2['W/L%'])\n", 284 | " df2['W'] = pd.to_numeric(df2['W'])\n", 285 | " df2['L'] = pd.to_numeric(df2['L'])\n", 286 | " df2['Population (2016 est.)[8]'] = pd.to_numeric(df2['Population (2016 est.)[8]'])\n", 287 | " he = ['team','W','L','W/L%','Metropolitan area','Population (2016 est.)[8]']\n", 288 | " df2 = df2[he]\n", 289 | " df2['W/L'] = df2['W']/(df2['L']+df2['W'])\n", 290 | " df2 = df2.groupby('Metropolitan area').mean().reset_index()\n", 291 | " \n", 292 | " population_by_region = df2['Population (2016 est.)[8]'] # pass in metropolitan area population from cities\n", 293 | " win_loss_by_region = df2['W/L'] # pass in win/loss ratio from nba_df in the same order as cities[\"Metropolitan area\"]\n", 294 | "\n", 295 | " assert len(population_by_region) == len(win_loss_by_region), \"Q2: Your lists must be the same length\"\n", 296 | " assert len(population_by_region) == 28, \"Q2: There should be 28 teams being analysed for NBA\"\n", 297 | "\n", 298 | " return stats.pearsonr(population_by_region, win_loss_by_region)[0]" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": { 305 | "deletable": false, 306 | "editable": false, 307 | "nbgrader": { 308 | "checksum": "bbdeb8eb22f525a34c10dc8798324e42", 309 | "grade": true, 310 | "grade_id": "cell-e573b2b4a282b470", 311 | "locked": true, 312 | "points": 20, 313 | "schema_version": 1, 314 | "solution": false 315 | } 316 | }, 317 | "outputs": [], 318 | "source": [] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": { 323 | "deletable": false, 324 | "editable": false, 325 | "nbgrader": { 326 | "checksum": "1a1a5809f675ca033086422007cd73bd", 327 | "grade": false, 328 | "grade_id": "cell-96e15e4335df78f4", 329 | "locked": true, 330 | "schema_version": 1, 331 | "solution": false 332 | } 333 | }, 334 | "source": [ 335 | "## Question 3\n", 336 | "For this question, calculate the win/loss ratio's correlation with the population of the city it is in for the **MLB** using **2018** data." 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 3, 342 | "metadata": { 343 | "deletable": false, 344 | "nbgrader": { 345 | "checksum": "27e8c0da6c9fa0dffc10488314335b6c", 346 | "grade": false, 347 | "grade_id": "cell-33b00fc3f3467b0c", 348 | "locked": false, 349 | "schema_version": 1, 350 | "solution": true 351 | } 352 | }, 353 | "outputs": [], 354 | "source": [ 355 | "import pandas as pd\n", 356 | "import numpy as np\n", 357 | "import scipy.stats as stats\n", 358 | "import re\n", 359 | "\n", 360 | "\n", 361 | "def mlb_correlation(): \n", 362 | " # YOUR CODE HERE\n", 363 | " #raise NotImplementedError()\n", 364 | " \n", 365 | " mlb_df=pd.read_csv(\"assets/mlb.csv\")\n", 366 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n", 367 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n", 368 | " \n", 369 | " cities.drop([24,25,26,28,29,30,31,32,33,34,35,36,37,38,39,41,42,43,44,45,46,47,48,49,50],0,inplace=True)\n", 370 | " \n", 371 | " mlb_df = mlb_df.head(30)\n", 372 | " \n", 373 | " mlb_df['team_ville'] = mlb_df['team']\n", 374 | " mlb_df['team_ville'] = mlb_df['team_ville'].map({'Boston Red Sox':'Boston',\n", 375 | " 'New York Yankees':'New York City',\n", 376 | " 'Tampa Bay Rays':'Tampa Bay Area',\n", 377 | " 'Toronto Blue Jays':'Toronto',\n", 378 | " 'Baltimore Orioles':'Baltimore',\n", 379 | " 'Cleveland Indians':'Cleveland',\n", 380 | " 'Minnesota Twins':'Minneapolis–Saint Paul',\n", 381 | " 'Detroit Tigers':'Detroit',\n", 382 | " 'Chicago White Sox':'Chicago',\n", 383 | " 'Kansas City Royals':'Kansas City',\n", 384 | " 'Houston Astros':'Houston',\n", 385 | " 'Oakland Athletics':'San Francisco Bay Area',\n", 386 | " 'Seattle Mariners':'Seattle',\n", 387 | " 'Los Angeles Angels':'Los Angeles',\n", 388 | " 'Texas Rangers':'Dallas–Fort Worth',\n", 389 | " 'Atlanta Braves':'Atlanta',\n", 390 | " 'Washington Nationals':'Washington, D.C.',\n", 391 | " 'Philadelphia Phillies':'Philadelphia',\n", 392 | " 'New York Mets':'New York City',\n", 393 | " 'Miami Marlins':'Miami–Fort Lauderdale',\n", 394 | " 'Milwaukee Brewers':'Milwaukee',\n", 395 | " 'Chicago Cubs':'Chicago',\n", 396 | " 'St. Louis Cardinals':'St. Louis',\n", 397 | " 'Pittsburgh Pirates':'Pittsburgh',\n", 398 | " 'Cincinnati Reds':'Cincinnati',\n", 399 | " 'Los Angeles Dodgers':'Los Angeles',\n", 400 | " 'Colorado Rockies':'Denver',\n", 401 | " 'Arizona Diamondbacks':'Phoenix',\n", 402 | " 'San Francisco Giants':'San Francisco Bay Area',\n", 403 | " 'San Diego Padres':'San Diego'})\n", 404 | " \n", 405 | " df3 = pd.merge(mlb_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n", 406 | " \n", 407 | " #df2['W/L%'] = pd.to_numeric(df2['W/L%'])\n", 408 | " df3['W'] = pd.to_numeric(df3['W'])\n", 409 | " df3['L'] = pd.to_numeric(df3['L'])\n", 410 | " df3['Population (2016 est.)[8]'] = pd.to_numeric(df3['Population (2016 est.)[8]'])\n", 411 | " he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n", 412 | " df3 = df3[he]\n", 413 | " df3['W/L'] = df3['W']/(df3['L']+df3['W'])\n", 414 | " df3 = df3.groupby('Metropolitan area').mean().reset_index()\n", 415 | " \n", 416 | " \n", 417 | " population_by_region = df3['Population (2016 est.)[8]'] # pass in metropolitan area population from cities\n", 418 | " win_loss_by_region = df3['W/L'] # pass in win/loss ratio from mlb_df in the same order as cities[\"Metropolitan area\"]\n", 419 | "\n", 420 | " assert len(population_by_region) == len(win_loss_by_region), \"Q3: Your lists must be the same length\"\n", 421 | " assert len(population_by_region) == 26, \"Q3: There should be 26 teams being analysed for MLB\"\n", 422 | "\n", 423 | " return stats.pearsonr(population_by_region, win_loss_by_region)[0]" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": { 430 | "deletable": false, 431 | "editable": false, 432 | "nbgrader": { 433 | "checksum": "cda33b094ba19ccc37a481e0dd29e0bc", 434 | "grade": true, 435 | "grade_id": "cell-764d4476f425c5a2", 436 | "locked": true, 437 | "points": 20, 438 | "schema_version": 1, 439 | "solution": false 440 | } 441 | }, 442 | "outputs": [], 443 | "source": [] 444 | }, 445 | { 446 | "cell_type": "markdown", 447 | "metadata": { 448 | "deletable": false, 449 | "editable": false, 450 | "nbgrader": { 451 | "checksum": "6977a6da9ed6d8b7a0b7e37bbeda709b", 452 | "grade": false, 453 | "grade_id": "cell-793df6c04dfb126e", 454 | "locked": true, 455 | "schema_version": 1, 456 | "solution": false 457 | } 458 | }, 459 | "source": [ 460 | "## Question 4\n", 461 | "For this question, calculate the win/loss ratio's correlation with the population of the city it is in for the **NFL** using **2018** data." 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": 4, 467 | "metadata": { 468 | "deletable": false, 469 | "nbgrader": { 470 | "checksum": "c4914ad1e119278ec2bd567c52640b66", 471 | "grade": false, 472 | "grade_id": "cell-8ccebc209aeec8d9", 473 | "locked": false, 474 | "schema_version": 1, 475 | "solution": true 476 | } 477 | }, 478 | "outputs": [], 479 | "source": [ 480 | "import pandas as pd\n", 481 | "import numpy as np\n", 482 | "import scipy.stats as stats\n", 483 | "import re\n", 484 | "\n", 485 | "\n", 486 | "\n", 487 | "def nfl_correlation(): \n", 488 | " # YOUR CODE HERE\n", 489 | " #raise NotImplementedError()\n", 490 | " \n", 491 | " nfl_df=pd.read_csv(\"assets/nfl.csv\")\n", 492 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n", 493 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n", 494 | " \n", 495 | " nfl_df.drop([0,5,10,15,20,25,30,35],0,inplace=True)\n", 496 | " \n", 497 | " cities.drop([13,22,27,30,31,32,33,34,35,36,37,38,39,40,41,42,43,45,46,47,49,50],0,inplace=True)\n", 498 | " \n", 499 | " l1 = []\n", 500 | " for i in nfl_df['team']:\n", 501 | " #i=i.rstrip()\n", 502 | " i=i.split('*')\n", 503 | " l1.append(i[0])\n", 504 | " nfl_df['team'] = l1\n", 505 | " \n", 506 | " l2 = []\n", 507 | " for i in nfl_df['team']:\n", 508 | " i=i.split('+')\n", 509 | " l2.append(i[0])\n", 510 | " nfl_df['team'] = l2\n", 511 | " \n", 512 | " nfl_df = nfl_df.head(32)\n", 513 | " \n", 514 | " nfl_df['team_ville'] = nfl_df['team']\n", 515 | " nfl_df['team_ville'] = nfl_df['team_ville'].map({'New England Patriots':'Boston',\n", 516 | " 'Miami Dolphins':'Miami–Fort Lauderdale',\n", 517 | " 'Buffalo Bills':'Buffalo',\n", 518 | " 'New York Jets':'New York City',\n", 519 | " 'Baltimore Ravens':'Baltimore',\n", 520 | " 'Pittsburgh Steelers':'Pittsburgh',\n", 521 | " 'Cleveland Browns':'Cleveland',\n", 522 | " 'Cincinnati Bengals':'Cincinnati',\n", 523 | " 'Houston Texans':'Houston',\n", 524 | " 'Indianapolis Colts':'Indianapolis',\n", 525 | " 'Tennessee Titans':'Nashville',\n", 526 | " 'Jacksonville Jaguars':'Jacksonville',\n", 527 | " 'Kansas City Chiefs':'Kansas City',\n", 528 | " 'Los Angeles Chargers':'Los Angeles',\n", 529 | " 'Denver Broncos':'Denver',\n", 530 | " 'Oakland Raiders':'San Francisco Bay Area',\n", 531 | " 'Dallas Cowboys':'Dallas–Fort Worth',\n", 532 | " 'Philadelphia Eagles':'Philadelphia',\n", 533 | " 'Washington Redskins':'Washington, D.C.',\n", 534 | " 'New York Giants':'New York City',\n", 535 | " 'Chicago Bears':'Chicago',\n", 536 | " 'Minnesota Vikings':'Minneapolis–Saint Paul',\n", 537 | " 'Green Bay Packers':'Green Bay',\n", 538 | " 'Detroit Lions':'Detroit',\n", 539 | " 'New Orleans Saints':'New Orleans',\n", 540 | " 'Carolina Panthers':'Charlotte',\n", 541 | " 'Atlanta Falcons':'Atlanta',\n", 542 | " 'Tampa Bay Buccaneers':'Tampa Bay Area',\n", 543 | " 'Los Angeles Rams':'Los Angeles',\n", 544 | " 'Seattle Seahawks':'Seattle',\n", 545 | " 'San Francisco 49ers':'San Francisco Bay Area',\n", 546 | " 'Arizona Cardinals':'Phoenix'}) \n", 547 | " \n", 548 | " df4 = pd.merge(nfl_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n", 549 | " \n", 550 | " \n", 551 | " df4['W'] = pd.to_numeric(df4['W'])\n", 552 | " df4['L'] = pd.to_numeric(df4['L'])\n", 553 | " df4['Population (2016 est.)[8]'] = pd.to_numeric(df4['Population (2016 est.)[8]'])\n", 554 | " he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n", 555 | " df4 = df4[he]\n", 556 | " df4['W/L'] = df4['W']/(df4['L']+df4['W'])\n", 557 | " df4 = df4.groupby('Metropolitan area').mean().reset_index()\n", 558 | "\n", 559 | " \n", 560 | " population_by_region = df4['Population (2016 est.)[8]'] # pass in metropolitan area population from cities\n", 561 | " win_loss_by_region = df4['W/L'] # pass in win/loss ratio from nfl_df in the same order as cities[\"Metropolitan area\"]\n", 562 | "\n", 563 | " assert len(population_by_region) == len(win_loss_by_region), \"Q4: Your lists must be the same length\"\n", 564 | " assert len(population_by_region) == 29, \"Q4: There should be 29 teams being analysed for NFL\"\n", 565 | "\n", 566 | " return stats.pearsonr(population_by_region, win_loss_by_region)[0]" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": null, 572 | "metadata": { 573 | "deletable": false, 574 | "editable": false, 575 | "nbgrader": { 576 | "checksum": "e9415d6399aa49e3a1a60813afdefa3b", 577 | "grade": true, 578 | "grade_id": "cell-de7b148b9554dbda", 579 | "locked": true, 580 | "points": 20, 581 | "schema_version": 1, 582 | "solution": false 583 | } 584 | }, 585 | "outputs": [], 586 | "source": [] 587 | }, 588 | { 589 | "cell_type": "markdown", 590 | "metadata": { 591 | "deletable": false, 592 | "editable": false, 593 | "nbgrader": { 594 | "checksum": "b02d5cd3273f561e4ae939bb2a41740c", 595 | "grade": false, 596 | "grade_id": "cell-97b49d8639e908c4", 597 | "locked": true, 598 | "schema_version": 1, 599 | "solution": false 600 | } 601 | }, 602 | "source": [ 603 | "## Question 5\n", 604 | "In this question I would like you to explore the hypothesis that **given that an area has two sports teams in different sports, those teams will perform the same within their respective sports**. How I would like to see this explored is with a series of paired t-tests (so use [`ttest_rel`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html)) between all pairs of sports. Are there any sports where we can reject the null hypothesis? Again, average values where a sport has multiple teams in one region. Remember, you will only be including, for each sport, cities which have teams engaged in that sport, drop others as appropriate. This question is worth 20% of the grade for this assignment." 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": 6, 610 | "metadata": { 611 | "deletable": false, 612 | "nbgrader": { 613 | "checksum": "6d78c961eb66f8d8c81f06d33ae8f393", 614 | "grade": false, 615 | "grade_id": "cell-92f25f44b8d1179f", 616 | "locked": false, 617 | "schema_version": 1, 618 | "solution": true 619 | } 620 | }, 621 | "outputs": [], 622 | "source": [ 623 | "import pandas as pd\n", 624 | "import numpy as np\n", 625 | "import scipy.stats as stats\n", 626 | "import re\n", 627 | "\n", 628 | "#mlb_df=pd.read_csv(\"assets/mlb.csv\")\n", 629 | "#nhl_df=pd.read_csv(\"assets/nhl.csv\")\n", 630 | "#nba_df=pd.read_csv(\"assets/nba.csv\")\n", 631 | "#nfl_df=pd.read_csv(\"assets/nfl.csv\")\n", 632 | "#cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n", 633 | "#cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n", 634 | "\n", 635 | "def nhl_correla(): \n", 636 | " # YOUR CODE HERE\n", 637 | " #raise NotImplementedError()\n", 638 | " \n", 639 | " nhl_df=pd.read_csv(\"assets/nhl.csv\")\n", 640 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n", 641 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n", 642 | " \n", 643 | " nhl_df.drop([0,9,18,26],0,inplace=True)\n", 644 | " cities.drop([14,15,18,19,20,21,23,24,25,27,28,32,33,38,40,41,42,44,45,46,48,49,50],0,inplace=True)\n", 645 | "\n", 646 | " l= []\n", 647 | " for i in cities['NHL']:\n", 648 | " i=i.split('[')\n", 649 | " l.append(i[0])\n", 650 | " cities['NHL'] = l\n", 651 | "\n", 652 | " li = []\n", 653 | " for i in nhl_df['team']:\n", 654 | " i = re.findall(\"[^*]+\", i)\n", 655 | " li.append(i[0])\n", 656 | " nhl_df['team'] = li\n", 657 | "\n", 658 | " nhl_df = nhl_df.head(31)\n", 659 | "\n", 660 | " nhl_df['team_ville'] = nhl_df['team']\n", 661 | " nhl_df['team_ville'] = nhl_df['team_ville'].map({'Tampa Bay Lightning':'Tampa Bay Area',\n", 662 | " 'Boston Bruins':'Boston',\n", 663 | " 'Toronto Maple Leafs':'Toronto',\n", 664 | " 'Florida Panthers':'Miami–Fort Lauderdale',\n", 665 | " 'Detroit Red Wings':'Detroit',\n", 666 | " 'Montreal Canadiens':'Montreal',\n", 667 | " 'Ottawa Senators':'Ottawa',\n", 668 | " 'Buffalo Sabres':'Buffalo',\n", 669 | " 'Washington Capitals':'Washington, D.C.',\n", 670 | " 'Pittsburgh Penguins':'Pittsburgh',\n", 671 | " 'Philadelphia Flyers':'Philadelphia',\n", 672 | " 'Columbus Blue Jackets':'Columbus',\n", 673 | " 'New Jersey Devils':'New York City',\n", 674 | " 'Carolina Hurricanes':'Raleigh',\n", 675 | " 'New York Islanders':'New York City',\n", 676 | " 'New York Rangers':'New York City',\n", 677 | " 'Nashville Predators':'Nashville',\n", 678 | " 'Winnipeg Jets':'Winnipeg',\n", 679 | " 'Minnesota Wild':'Minneapolis–Saint Paul',\n", 680 | " 'Colorado Avalanche':'Denver',\n", 681 | " 'St. Louis Blues':'St. Louis',\n", 682 | " 'Dallas Stars':'Dallas–Fort Worth',\n", 683 | " 'Chicago Blackhawks':'Chicago',\n", 684 | " 'Vegas Golden Knights':'Las Vegas',\n", 685 | " 'Anaheim Ducks':'Los Angeles',\n", 686 | " 'San Jose Sharks':'San Francisco Bay Area',\n", 687 | " 'Los Angeles Kings':'Los Angeles',\n", 688 | " 'Calgary Flames':'Calgary',\n", 689 | " 'Edmonton Oilers':'Edmonton',\n", 690 | " 'Vancouver Canucks':'Vancouver',\n", 691 | " 'Arizona Coyotes':'Phoenix'})\n", 692 | "\n", 693 | " df = pd.merge(nhl_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n", 694 | "\n", 695 | " df['W'] = pd.to_numeric(df['W'])\n", 696 | " df['L'] = pd.to_numeric(df['L'])\n", 697 | " df['Population (2016 est.)[8]'] = pd.to_numeric(df['Population (2016 est.)[8]'])\n", 698 | "\n", 699 | " he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n", 700 | "\n", 701 | " df = df[he]\n", 702 | "\n", 703 | " df['W/L'] = df['W']/(df['L']+df['W'])\n", 704 | "\n", 705 | " df = df.groupby('Metropolitan area').mean().reset_index()\n", 706 | " \n", 707 | " return df\n", 708 | "\n", 709 | "def nba_correla():\n", 710 | " # YOUR CODE HERE\n", 711 | " #raise NotImplementedError()\n", 712 | " \n", 713 | " nba_df=pd.read_csv(\"assets/nba.csv\")\n", 714 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n", 715 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n", 716 | " \n", 717 | " cities.drop([16,17,19,20,21,22,23,26,29,30,31,34,35,36,37,39,40,43,44,47,48,49,50],0,inplace=True)\n", 718 | " \n", 719 | " l1 = []\n", 720 | " for i in nba_df['team']:\n", 721 | " #i=i.rstrip()\n", 722 | " i=i.split('*')\n", 723 | " l1.append(i[0])\n", 724 | " nba_df['team'] = l1\n", 725 | " \n", 726 | " l2 = []\n", 727 | " for i in nba_df['team']:\n", 728 | " i=i.split('(')\n", 729 | " l2.append(i[0])\n", 730 | " nba_df['team'] = l2\n", 731 | " \n", 732 | " l3 = []\n", 733 | " for i in nba_df['team']:\n", 734 | " i=i.rstrip()\n", 735 | " l3.append(i)\n", 736 | " nba_df['team'] = l3\n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " nba_df = nba_df.head(30)\n", 741 | "\n", 742 | " nba_df['team_ville'] = nba_df['team']\n", 743 | " nba_df['team_ville'] = nba_df['team_ville'].map({'Toronto Raptors':'Toronto',\n", 744 | " 'Boston Celtics':'Boston',\n", 745 | " 'Philadelphia 76ers':'Philadelphia',\n", 746 | " 'Cleveland Cavaliers':'Cleveland',\n", 747 | " 'Indiana Pacers':'Indianapolis',\n", 748 | " 'Miami Heat':'Miami–Fort Lauderdale',\n", 749 | " 'Milwaukee Bucks':'Milwaukee',\n", 750 | " 'Washington Wizards':'Washington, D.C.',\n", 751 | " 'Detroit Pistons':'Detroit',\n", 752 | " 'Charlotte Hornets':'Charlotte',\n", 753 | " 'New York Knicks':'New York City',\n", 754 | " 'Brooklyn Nets':'New York City',\n", 755 | " 'Chicago Bulls':'Chicago',\n", 756 | " 'Orlando Magic':'Orlando',\n", 757 | " 'Atlanta Hawks':'Atlanta',\n", 758 | " 'Houston Rockets':'Houston',\n", 759 | " 'Golden State Warriors':'San Francisco Bay Area',\n", 760 | " 'Portland Trail Blazers':'Portland',\n", 761 | " 'Oklahoma City Thunder':'Oklahoma City',\n", 762 | " 'Utah Jazz':'Salt Lake City',\n", 763 | " 'New Orleans Pelicans':'New Orleans',\n", 764 | " 'San Antonio Spurs':'San Antonio',\n", 765 | " 'Minnesota Timberwolves':'Minneapolis–Saint Paul',\n", 766 | " 'Denver Nuggets':'Denver',\n", 767 | " 'Los Angeles Clippers':'Los Angeles',\n", 768 | " 'Los Angeles Lakers':'Los Angeles',\n", 769 | " 'Sacramento Kings':'Sacramento',\n", 770 | " 'Dallas Mavericks':'Dallas–Fort Worth',\n", 771 | " 'Memphis Grizzlies':'Memphis',\n", 772 | " 'Phoenix Suns':'Phoenix'})\n", 773 | " \n", 774 | " df2 = pd.merge(nba_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n", 775 | " \n", 776 | " df2['W/L%'] = pd.to_numeric(df2['W/L%'])\n", 777 | " df2['W'] = pd.to_numeric(df2['W'])\n", 778 | " df2['L'] = pd.to_numeric(df2['L'])\n", 779 | " df2['Population (2016 est.)[8]'] = pd.to_numeric(df2['Population (2016 est.)[8]'])\n", 780 | " he = ['team','W','L','W/L%','Metropolitan area','Population (2016 est.)[8]']\n", 781 | " df2 = df2[he]\n", 782 | " df2['W/L'] = df2['W']/(df2['L']+df2['W'])\n", 783 | " df2 = df2.groupby('Metropolitan area').mean().reset_index()\n", 784 | " \n", 785 | " return df2\n", 786 | "\n", 787 | "def mlb_correla(): \n", 788 | " # YOUR CODE HERE\n", 789 | " #raise NotImplementedError()\n", 790 | " \n", 791 | " mlb_df=pd.read_csv(\"assets/mlb.csv\")\n", 792 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n", 793 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n", 794 | " \n", 795 | " cities.drop([24,25,26,28,29,30,31,32,33,34,35,36,37,38,39,41,42,43,44,45,46,47,48,49,50],0,inplace=True)\n", 796 | " \n", 797 | " mlb_df = mlb_df.head(30)\n", 798 | " \n", 799 | " mlb_df['team_ville'] = mlb_df['team']\n", 800 | " mlb_df['team_ville'] = mlb_df['team_ville'].map({'Boston Red Sox':'Boston',\n", 801 | " 'New York Yankees':'New York City',\n", 802 | " 'Tampa Bay Rays':'Tampa Bay Area',\n", 803 | " 'Toronto Blue Jays':'Toronto',\n", 804 | " 'Baltimore Orioles':'Baltimore',\n", 805 | " 'Cleveland Indians':'Cleveland',\n", 806 | " 'Minnesota Twins':'Minneapolis–Saint Paul',\n", 807 | " 'Detroit Tigers':'Detroit',\n", 808 | " 'Chicago White Sox':'Chicago',\n", 809 | " 'Kansas City Royals':'Kansas City',\n", 810 | " 'Houston Astros':'Houston',\n", 811 | " 'Oakland Athletics':'San Francisco Bay Area',\n", 812 | " 'Seattle Mariners':'Seattle',\n", 813 | " 'Los Angeles Angels':'Los Angeles',\n", 814 | " 'Texas Rangers':'Dallas–Fort Worth',\n", 815 | " 'Atlanta Braves':'Atlanta',\n", 816 | " 'Washington Nationals':'Washington, D.C.',\n", 817 | " 'Philadelphia Phillies':'Philadelphia',\n", 818 | " 'New York Mets':'New York City',\n", 819 | " 'Miami Marlins':'Miami–Fort Lauderdale',\n", 820 | " 'Milwaukee Brewers':'Milwaukee',\n", 821 | " 'Chicago Cubs':'Chicago',\n", 822 | " 'St. Louis Cardinals':'St. Louis',\n", 823 | " 'Pittsburgh Pirates':'Pittsburgh',\n", 824 | " 'Cincinnati Reds':'Cincinnati',\n", 825 | " 'Los Angeles Dodgers':'Los Angeles',\n", 826 | " 'Colorado Rockies':'Denver',\n", 827 | " 'Arizona Diamondbacks':'Phoenix',\n", 828 | " 'San Francisco Giants':'San Francisco Bay Area',\n", 829 | " 'San Diego Padres':'San Diego'})\n", 830 | " \n", 831 | " df3 = pd.merge(mlb_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n", 832 | " \n", 833 | " #df2['W/L%'] = pd.to_numeric(df2['W/L%'])\n", 834 | " df3['W'] = pd.to_numeric(df3['W'])\n", 835 | " df3['L'] = pd.to_numeric(df3['L'])\n", 836 | " df3['Population (2016 est.)[8]'] = pd.to_numeric(df3['Population (2016 est.)[8]'])\n", 837 | " he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n", 838 | " df3 = df3[he]\n", 839 | " df3['W/L'] = df3['W']/(df3['L']+df3['W'])\n", 840 | " df3 = df3.groupby('Metropolitan area').mean().reset_index()\n", 841 | " \n", 842 | " return df3\n", 843 | "\n", 844 | "def nfl_correla(): \n", 845 | " # YOUR CODE HERE\n", 846 | " #raise NotImplementedError()\n", 847 | " \n", 848 | " nfl_df=pd.read_csv(\"assets/nfl.csv\")\n", 849 | " cities=pd.read_html(\"assets/wikipedia_data.html\")[1]\n", 850 | " cities=cities.iloc[:-1,[0,3,5,6,7,8]]\n", 851 | " \n", 852 | " nfl_df.drop([0,5,10,15,20,25,30,35],0,inplace=True)\n", 853 | " \n", 854 | " cities.drop([13,22,27,30,31,32,33,34,35,36,37,38,39,40,41,42,43,45,46,47,49,50],0,inplace=True)\n", 855 | " \n", 856 | " l1 = []\n", 857 | " for i in nfl_df['team']:\n", 858 | " #i=i.rstrip()\n", 859 | " i=i.split('*')\n", 860 | " l1.append(i[0])\n", 861 | " nfl_df['team'] = l1\n", 862 | " \n", 863 | " l2 = []\n", 864 | " for i in nfl_df['team']:\n", 865 | " i=i.split('+')\n", 866 | " l2.append(i[0])\n", 867 | " nfl_df['team'] = l2\n", 868 | " \n", 869 | " nfl_df = nfl_df.head(32)\n", 870 | " \n", 871 | " nfl_df['team_ville'] = nfl_df['team']\n", 872 | " nfl_df['team_ville'] = nfl_df['team_ville'].map({'New England Patriots':'Boston',\n", 873 | " 'Miami Dolphins':'Miami–Fort Lauderdale',\n", 874 | " 'Buffalo Bills':'Buffalo',\n", 875 | " 'New York Jets':'New York City',\n", 876 | " 'Baltimore Ravens':'Baltimore',\n", 877 | " 'Pittsburgh Steelers':'Pittsburgh',\n", 878 | " 'Cleveland Browns':'Cleveland',\n", 879 | " 'Cincinnati Bengals':'Cincinnati',\n", 880 | " 'Houston Texans':'Houston',\n", 881 | " 'Indianapolis Colts':'Indianapolis',\n", 882 | " 'Tennessee Titans':'Nashville',\n", 883 | " 'Jacksonville Jaguars':'Jacksonville',\n", 884 | " 'Kansas City Chiefs':'Kansas City',\n", 885 | " 'Los Angeles Chargers':'Los Angeles',\n", 886 | " 'Denver Broncos':'Denver',\n", 887 | " 'Oakland Raiders':'San Francisco Bay Area',\n", 888 | " 'Dallas Cowboys':'Dallas–Fort Worth',\n", 889 | " 'Philadelphia Eagles':'Philadelphia',\n", 890 | " 'Washington Redskins':'Washington, D.C.',\n", 891 | " 'New York Giants':'New York City',\n", 892 | " 'Chicago Bears':'Chicago',\n", 893 | " 'Minnesota Vikings':'Minneapolis–Saint Paul',\n", 894 | " 'Green Bay Packers':'Green Bay',\n", 895 | " 'Detroit Lions':'Detroit',\n", 896 | " 'New Orleans Saints':'New Orleans',\n", 897 | " 'Carolina Panthers':'Charlotte',\n", 898 | " 'Atlanta Falcons':'Atlanta',\n", 899 | " 'Tampa Bay Buccaneers':'Tampa Bay Area',\n", 900 | " 'Los Angeles Rams':'Los Angeles',\n", 901 | " 'Seattle Seahawks':'Seattle',\n", 902 | " 'San Francisco 49ers':'San Francisco Bay Area',\n", 903 | " 'Arizona Cardinals':'Phoenix'}) \n", 904 | " \n", 905 | " df4 = pd.merge(nfl_df,cities, left_on= \"team_ville\", right_on= \"Metropolitan area\")\n", 906 | " \n", 907 | " \n", 908 | " df4['W'] = pd.to_numeric(df4['W'])\n", 909 | " df4['L'] = pd.to_numeric(df4['L'])\n", 910 | " df4['Population (2016 est.)[8]'] = pd.to_numeric(df4['Population (2016 est.)[8]'])\n", 911 | " he = ['team','W','L','Metropolitan area','Population (2016 est.)[8]']\n", 912 | " df4 = df4[he]\n", 913 | " df4['W/L'] = df4['W']/(df4['L']+df4['W'])\n", 914 | " df4 = df4.groupby('Metropolitan area').mean().reset_index()\n", 915 | " \n", 916 | " return df4\n", 917 | "\n", 918 | "\n", 919 | "\n", 920 | "\n", 921 | "\n", 922 | "def sports_team_performance():\n", 923 | " # YOUR CODE HERE\n", 924 | " #raise NotImplementedError()\n", 925 | " \n", 926 | " nfl = nfl_correla()\n", 927 | " nba = nba_correla()\n", 928 | " mlb = mlb_correla()\n", 929 | " nhl = nhl_correla()\n", 930 | "\n", 931 | " nba_nfl = pd.merge(nba,nfl, on='Metropolitan area')\n", 932 | " pval_nba_nfl = stats.ttest_rel(nba_nfl['W/L_x'],nba_nfl['W/L_y'])[1]\n", 933 | " nba_nhl = pd.merge(nba,nhl, on='Metropolitan area')\n", 934 | " pval_nba_nhl = stats.ttest_rel(nba_nhl['W/L_x'],nba_nhl['W/L_y'])[1]\n", 935 | " mlb_nfl = pd.merge(mlb,nfl, on='Metropolitan area')\n", 936 | " pval_mlb_nfl = stats.ttest_rel(mlb_nfl['W/L_x'],mlb_nfl['W/L_y'])[1]\n", 937 | " mlb_nhl = pd.merge(mlb,nhl, on='Metropolitan area')\n", 938 | " pval_mlb_nhl = stats.ttest_rel(mlb_nhl['W/L_x'],mlb_nhl['W/L_y'])[1]\n", 939 | " mlb_nba = pd.merge(mlb,nba, on='Metropolitan area')\n", 940 | " pval_mlb_nba = stats.ttest_rel(mlb_nba['W/L_x'],mlb_nba['W/L_y'])[1]\n", 941 | " nhl_nfl = pd.merge(nhl,nfl, on='Metropolitan area')\n", 942 | " pval_nhl_nfl = stats.ttest_rel(nhl_nfl['W/L_x'],nhl_nfl['W/L_y'])[1]\n", 943 | " \n", 944 | " pv = {'NFL': {\"NFL\": np.nan, 'NBA': pval_nba_nfl, 'NHL': pval_nhl_nfl, 'MLB': pval_mlb_nfl},\n", 945 | " 'NBA': {\"NFL\": pval_nba_nfl, 'NBA': np.nan, 'NHL': pval_nba_nhl, 'MLB': pval_mlb_nba},\n", 946 | " 'NHL': {\"NFL\": pval_nhl_nfl, 'NBA': pval_nba_nhl, 'NHL': np.nan, 'MLB': pval_mlb_nhl},\n", 947 | " 'MLB': {\"NFL\": pval_mlb_nfl, 'NBA': pval_mlb_nba, 'NHL': pval_mlb_nhl, 'MLB': np.nan}\n", 948 | " }\n", 949 | "\n", 950 | " \n", 951 | " # Note: p_values is a full dataframe, so df.loc[\"NFL\",\"NBA\"] should be the same as df.loc[\"NBA\",\"NFL\"] and\n", 952 | " # df.loc[\"NFL\",\"NFL\"] should return np.nan\n", 953 | " #sports = ['NFL', 'NBA', 'NHL', 'MLB']\n", 954 | " #p_values = pd.DataFrame({k:np.nan for k in sports}, index=sports)\n", 955 | " p_values = pd.DataFrame(pv)\n", 956 | " \n", 957 | " assert abs(p_values.loc[\"NBA\", \"NHL\"] - 0.02) <= 1e-2, \"The NBA-NHL p-value should be around 0.02\"\n", 958 | " assert abs(p_values.loc[\"MLB\", \"NFL\"] - 0.80) <= 1e-2, \"The MLB-NFL p-value should be around 0.80\"\n", 959 | " return p_values" 960 | ] 961 | }, 962 | { 963 | "cell_type": "code", 964 | "execution_count": null, 965 | "metadata": { 966 | "deletable": false, 967 | "editable": false, 968 | "nbgrader": { 969 | "checksum": "2a596ab421a45cc01168d10e8fbb8f89", 970 | "grade": true, 971 | "grade_id": "cell-fb4b9cb5ff4570a6", 972 | "locked": true, 973 | "points": 20, 974 | "schema_version": 1, 975 | "solution": false 976 | } 977 | }, 978 | "outputs": [], 979 | "source": [] 980 | } 981 | ], 982 | "metadata": { 983 | "coursera": { 984 | "schema_names": [ 985 | "mooc_adswpy_1_v2_assignment4" 986 | ] 987 | }, 988 | "kernelspec": { 989 | "display_name": "Python 3", 990 | "language": "python", 991 | "name": "python3" 992 | }, 993 | "language_info": { 994 | "codemirror_mode": { 995 | "name": "ipython", 996 | "version": 3 997 | }, 998 | "file_extension": ".py", 999 | "mimetype": "text/x-python", 1000 | "name": "python", 1001 | "nbconvert_exporter": "python", 1002 | "pygments_lexer": "ipython3", 1003 | "version": "3.7.3" 1004 | } 1005 | }, 1006 | "nbformat": 4, 1007 | "nbformat_minor": 4 1008 | } 1009 | -------------------------------------------------------------------------------- /Assignment_3/assignment3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": false, 7 | "editable": false, 8 | "nbgrader": { 9 | "checksum": "ab80976c194c2c1bfebb7f3a12fc4e58", 10 | "grade": false, 11 | "grade_id": "cell-018440ed2f1b6a62", 12 | "locked": true, 13 | "schema_version": 1, 14 | "solution": false 15 | } 16 | }, 17 | "source": [ 18 | "# Assignment 3\n", 19 | "All questions are weighted the same in this assignment. This assignment requires more individual learning then the last one did - you are encouraged to check out the [pandas documentation](http://pandas.pydata.org/pandas-docs/stable/) to find functions or methods you might not have used yet, or ask questions on [Stack Overflow](http://stackoverflow.com/) and tag them as pandas and python related. All questions are worth the same number of points except question 1 which is worth 17% of the assignment grade.\n", 20 | "\n", 21 | "**Note**: Questions 2-13 rely on your question 1 answer." 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "import numpy as np\n", 32 | "import re\n", 33 | "\n", 34 | "# Filter all warnings. If you would like to see the warnings, please comment the two lines below.\n", 35 | "import warnings\n", 36 | "warnings.filterwarnings('ignore')" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "deletable": false, 43 | "editable": false, 44 | "nbgrader": { 45 | "checksum": "68063b8b0783f3d8122b516e0cce5f45", 46 | "grade": false, 47 | "grade_id": "cell-7e5190c7ff1f2e42", 48 | "locked": true, 49 | "schema_version": 1, 50 | "solution": false 51 | } 52 | }, 53 | "source": [ 54 | "### Question 1\n", 55 | "Load the energy data from the file `assets/Energy Indicators.xls`, which is a list of indicators of [energy supply and renewable electricity production](assets/Energy%20Indicators.xls) from the [United Nations](http://unstats.un.org/unsd/environment/excel_file_tables/2013/Energy%20Indicators.xls) for the year 2013, and should be put into a DataFrame with the variable name of **Energy**.\n", 56 | "\n", 57 | "Keep in mind that this is an Excel file, and not a comma separated values file. Also, make sure to exclude the footer and header information from the datafile. The first two columns are unneccessary, so you should get rid of them, and you should change the column labels so that the columns are:\n", 58 | "\n", 59 | "`['Country', 'Energy Supply', 'Energy Supply per Capita', '% Renewable]`\n", 60 | "\n", 61 | "Convert `Energy Supply` to gigajoules (**Note: there are 1,000,000 gigajoules in a petajoule**). For all countries which have missing data (e.g. data with \"...\") make sure this is reflected as `np.NaN` values.\n", 62 | "\n", 63 | "Rename the following list of countries (for use in later questions):\n", 64 | "\n", 65 | "```\"Republic of Korea\": \"South Korea\",\n", 66 | "\"United States of America\": \"United States\",\n", 67 | "\"United Kingdom of Great Britain and Northern Ireland\": \"United Kingdom\",\n", 68 | "\"China, Hong Kong Special Administrative Region\": \"Hong Kong\"```\n", 69 | "\n", 70 | "There are also several countries with numbers and/or parenthesis in their name. Be sure to remove these, e.g. `'Bolivia (Plurinational State of)'` should be `'Bolivia'`. `'Switzerland17'` should be `'Switzerland'`.\n", 71 | "\n", 72 | "Next, load the GDP data from the file `assets/world_bank.csv`, which is a csv containing countries' GDP from 1960 to 2015 from [World Bank](http://data.worldbank.org/indicator/NY.GDP.MKTP.CD). Call this DataFrame **GDP**. \n", 73 | "\n", 74 | "Make sure to skip the header, and rename the following list of countries:\n", 75 | "\n", 76 | "```\"Korea, Rep.\": \"South Korea\", \n", 77 | "\"Iran, Islamic Rep.\": \"Iran\",\n", 78 | "\"Hong Kong SAR, China\": \"Hong Kong\"```\n", 79 | "\n", 80 | "Finally, load the [Sciamgo Journal and Country Rank data for Energy Engineering and Power Technology](http://www.scimagojr.com/countryrank.php?category=2102) from the file `assets/scimagojr-3.xlsx`, which ranks countries based on their journal contributions in the aforementioned area. Call this DataFrame **ScimEn**.\n", 81 | "\n", 82 | "Join the three datasets: GDP, Energy, and ScimEn into a new dataset (using the intersection of country names). Use only the last 10 years (2006-2015) of GDP data and only the top 15 countries by Scimagojr 'Rank' (Rank 1 through 15). \n", 83 | "\n", 84 | "The index of this DataFrame should be the name of the country, and the columns should be ['Rank', 'Documents', 'Citable documents', 'Citations', 'Self-citations',\n", 85 | " 'Citations per document', 'H index', 'Energy Supply',\n", 86 | " 'Energy Supply per Capita', '% Renewable', '2006', '2007', '2008',\n", 87 | " '2009', '2010', '2011', '2012', '2013', '2014', '2015'].\n", 88 | "\n", 89 | "*This function should return a DataFrame with 20 columns and 15 entries, and the rows of the DataFrame should be sorted by \"Rank\".*" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 2, 95 | "metadata": { 96 | "deletable": false, 97 | "nbgrader": { 98 | "checksum": "57e040f07954f979910eddc0f489ffe5", 99 | "grade": false, 100 | "grade_id": "cell-bce4d6f2ecdd1297", 101 | "locked": false, 102 | "schema_version": 1, 103 | "solution": true 104 | } 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "def answer_one():\n", 109 | " # YOUR CODE HERE\n", 110 | " Energy = pd.read_excel(\"assets/Energy Indicators.xls\")\n", 111 | " Energy.drop(columns=['Unnamed: 0', 'Unnamed: 1'],inplace=True)\n", 112 | " Energy.drop(Energy.index[0:17],0,inplace=True)\n", 113 | " Energy.drop(Energy.index[227:],0,inplace=True)\n", 114 | " Energy.rename(columns={'Unnamed: 2': 'Country', 'Unnamed: 3': 'Energy Supply', 'Unnamed: 4': 'Energy Supply per Capita', 'Unnamed: 5': '% Renewable' }, inplace=True )\n", 115 | " Energy.replace({'...':np.nan}, inplace= True)\n", 116 | " Energy['Energy Supply'] = Energy['Energy Supply']*1000000\n", 117 | " \n", 118 | " l= []\n", 119 | " for i in Energy['Country']:\n", 120 | " i=i.split(' (')\n", 121 | " l.append(i[0])\n", 122 | " Energy['Country'] = l\n", 123 | " \n", 124 | " li = []\n", 125 | " for i in Energy['Country']:\n", 126 | " i = re.findall(\"[^0-9]+\", i)\n", 127 | " li.append(i[0])\n", 128 | " Energy['Country'] = li\n", 129 | " \n", 130 | " Energy.replace({\"Republic of Korea\": \"South Korea\",\n", 131 | " \"United States of America\": \"United States\",\n", 132 | " \"United Kingdom of Great Britain and Northern Ireland\": \"United Kingdom\",\n", 133 | " \"China, Hong Kong Special Administrative Region\": \"Hong Kong\"}, inplace= True)\n", 134 | " \n", 135 | " GDP = pd.read_csv(\"assets/world_bank.csv\")\n", 136 | " GDP.drop(GDP.index[0:3],0,inplace=True)\n", 137 | " GDP.replace({\"Korea, Rep.\": \"South Korea\", \"Iran, Islamic Rep.\": \"Iran\", \"Hong Kong SAR, China\": \"Hong Kong\"}, inplace=True)\n", 138 | " \n", 139 | " il = GDP.iloc[0]\n", 140 | " di = {}\n", 141 | " i = 0\n", 142 | " for d in GDP.columns:\n", 143 | " if type(il[i]) == np.float64:\n", 144 | " di[d] = str(int(il[i]))\n", 145 | " else:\n", 146 | " di[d] = il[i]\n", 147 | " i += 1\n", 148 | " \n", 149 | " GDP.rename(columns=di, inplace=True)\n", 150 | " GDP.drop(GDP.index[0:1],0,inplace=True)\n", 151 | " GDP.rename(columns={'Country Name': 'Country'}, inplace=True)\n", 152 | " \n", 153 | " ScimEn = pd.read_excel(\"assets/scimagojr-3.xlsx\")\n", 154 | " \n", 155 | " j1 = pd.merge(ScimEn,Energy)\n", 156 | " j2 = pd.merge(j1, GDP)\n", 157 | " j2.set_index('Country', inplace = True)\n", 158 | " j2 = j2[0:15]\n", 159 | " j2.drop(j2.columns[[np.arange(10,59)]], axis='columns', inplace = True)\n", 160 | " \n", 161 | " return j2\n", 162 | " \n", 163 | " #raise NotImplementedError()" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 3, 169 | "metadata": { 170 | "deletable": false, 171 | "editable": false, 172 | "nbgrader": { 173 | "checksum": "7bcc18b325d2935427ac2566cddd3661", 174 | "grade": true, 175 | "grade_id": "cell-780b5a4da845dbc3", 176 | "locked": true, 177 | "points": 5, 178 | "schema_version": 1, 179 | "solution": false 180 | } 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "assert type(answer_one()) == pd.DataFrame, \"Q1: You should return a DataFrame!\"\n", 185 | "\n", 186 | "assert answer_one().shape == (15,20), \"Q1: Your DataFrame should have 20 columns and 15 entries!\"\n" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 4, 192 | "metadata": { 193 | "deletable": false, 194 | "editable": false, 195 | "nbgrader": { 196 | "checksum": "e241830bcf3f63326b4c9cdf50be8f86", 197 | "grade": true, 198 | "grade_id": "cell-74b5f0b971379f64", 199 | "locked": true, 200 | "points": 10, 201 | "schema_version": 1, 202 | "solution": false 203 | } 204 | }, 205 | "outputs": [], 206 | "source": [ 207 | "# Cell for autograder.\n" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": { 213 | "deletable": false, 214 | "editable": false, 215 | "nbgrader": { 216 | "checksum": "596280cd22ed98c5540580c62954ec2f", 217 | "grade": false, 218 | "grade_id": "cell-babe0ff2a1fc6b17", 219 | "locked": true, 220 | "schema_version": 1, 221 | "solution": false 222 | } 223 | }, 224 | "source": [ 225 | "### Question 2\n", 226 | "The previous question joined three datasets then reduced this to just the top 15 entries. When you joined the datasets, but before you reduced this to the top 15 items, how many entries did you lose?\n", 227 | "\n", 228 | "*This function should return a single number.*" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 5, 234 | "metadata": { 235 | "deletable": false, 236 | "editable": false, 237 | "nbgrader": { 238 | "checksum": "c9a34da81c80126fd23ae2eac33f98f8", 239 | "grade": false, 240 | "grade_id": "cell-96f84e7b693bef63", 241 | "locked": true, 242 | "schema_version": 1, 243 | "solution": false 244 | } 245 | }, 246 | "outputs": [ 247 | { 248 | "data": { 249 | "text/html": [ 250 | "\n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " Everything but this!\n", 256 | "\n" 257 | ], 258 | "text/plain": [ 259 | "" 260 | ] 261 | }, 262 | "metadata": {}, 263 | "output_type": "display_data" 264 | } 265 | ], 266 | "source": [ 267 | "%%HTML\n", 268 | "\n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " Everything but this!\n", 274 | "" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 6, 280 | "metadata": { 281 | "deletable": false, 282 | "nbgrader": { 283 | "checksum": "aeeb01fb73054269dd7b818d0945e2f7", 284 | "grade": false, 285 | "grade_id": "cell-b0c3202c000aada4", 286 | "locked": false, 287 | "schema_version": 1, 288 | "solution": true 289 | } 290 | }, 291 | "outputs": [], 292 | "source": [ 293 | "def answer_two():\n", 294 | " # YOUR CODE HERE\n", 295 | " \n", 296 | " Energy = pd.read_excel(\"assets/Energy Indicators.xls\")\n", 297 | " Energy.drop(columns=['Unnamed: 0', 'Unnamed: 1'],inplace=True)\n", 298 | " Energy.drop(Energy.index[0:17],0,inplace=True)\n", 299 | " Energy.drop(Energy.index[227:],0,inplace=True)\n", 300 | " Energy.rename(columns={'Unnamed: 2': 'Country', 'Unnamed: 3': 'Energy Supply', 'Unnamed: 4': 'Energy Supply per Capita', 'Unnamed: 5': '% Renewable' }, inplace=True )\n", 301 | " Energy.replace({'...':np.nan}, inplace= True)\n", 302 | " Energy['Energy Supply'] = Energy['Energy Supply']*1000000\n", 303 | " \n", 304 | " l= []\n", 305 | " for i in Energy['Country']:\n", 306 | " i=i.split(' (')\n", 307 | " l.append(i[0])\n", 308 | " Energy['Country'] = l\n", 309 | " \n", 310 | " li = []\n", 311 | " for i in Energy['Country']:\n", 312 | " i = re.findall(\"[^0-9]+\", i)\n", 313 | " li.append(i[0])\n", 314 | " Energy['Country'] = li\n", 315 | " \n", 316 | " Energy.replace({\"Republic of Korea\": \"South Korea\",\n", 317 | " \"United States of America\": \"United States\",\n", 318 | " \"United Kingdom of Great Britain and Northern Ireland\": \"United Kingdom\",\n", 319 | " \"China, Hong Kong Special Administrative Region\": \"Hong Kong\"}, inplace= True)\n", 320 | " \n", 321 | " GDP = pd.read_csv(\"assets/world_bank.csv\")\n", 322 | " GDP.drop(GDP.index[0:3],0,inplace=True)\n", 323 | " GDP.replace({\"Korea, Rep.\": \"South Korea\", \"Iran, Islamic Rep.\": \"Iran\", \"Hong Kong SAR, China\": \"Hong Kong\"}, inplace=True)\n", 324 | " \n", 325 | " il = GDP.iloc[0]\n", 326 | " di = {}\n", 327 | " i = 0\n", 328 | " for d in GDP.columns:\n", 329 | " if type(il[i]) == np.float64:\n", 330 | " di[d] = str(int(il[i]))\n", 331 | " else:\n", 332 | " di[d] = il[i]\n", 333 | " i += 1\n", 334 | " \n", 335 | " GDP.rename(columns=di, inplace=True)\n", 336 | " GDP.drop(GDP.index[0:1],0,inplace=True)\n", 337 | " GDP.rename(columns={'Country Name': 'Country'}, inplace=True)\n", 338 | " \n", 339 | " ScimEn = pd.read_excel(\"assets/scimagojr-3.xlsx\")\n", 340 | " \n", 341 | " ji = pd.merge(ScimEn,Energy)\n", 342 | " ji = pd.merge(ji, GDP)\n", 343 | " ji.set_index('Country', inplace = True)\n", 344 | " \n", 345 | " j1 = pd.merge(ScimEn,Energy, how=\"outer\")\n", 346 | " j2 = pd.merge(j1, GDP, how=\"outer\")\n", 347 | " j2.set_index('Country', inplace = True)\n", 348 | " \n", 349 | " diff = j2.shape[0] - ji.shape[0]\n", 350 | " \n", 351 | " return diff\n", 352 | " #raise NotImplementedError()" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 7, 358 | "metadata": { 359 | "deletable": false, 360 | "editable": false, 361 | "nbgrader": { 362 | "checksum": "19759b4a6c03f34b647f66d343952619", 363 | "grade": true, 364 | "grade_id": "cell-be24cfcaa87ab071", 365 | "locked": true, 366 | "points": 6.66, 367 | "schema_version": 1, 368 | "solution": false 369 | } 370 | }, 371 | "outputs": [], 372 | "source": [ 373 | "assert type(answer_two()) == int, \"Q2: You should return an int number!\"\n" 374 | ] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "metadata": { 379 | "deletable": false, 380 | "editable": false, 381 | "nbgrader": { 382 | "checksum": "5af1b4f99cd383263130f4c00442a133", 383 | "grade": false, 384 | "grade_id": "cell-2e54816014e48c18", 385 | "locked": true, 386 | "schema_version": 1, 387 | "solution": false 388 | } 389 | }, 390 | "source": [ 391 | "### Question 3\n", 392 | "What are the top 15 countries for average GDP over the last 10 years?\n", 393 | "\n", 394 | "*This function should return a Series named `avgGDP` with 15 countries and their average GDP sorted in descending order.*" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": 8, 400 | "metadata": { 401 | "deletable": false, 402 | "nbgrader": { 403 | "checksum": "a3490fd71a46cecfa3da698e006fe729", 404 | "grade": false, 405 | "grade_id": "cell-8c3d74335c0d489a", 406 | "locked": false, 407 | "schema_version": 1, 408 | "solution": true 409 | } 410 | }, 411 | "outputs": [], 412 | "source": [ 413 | "def answer_three():\n", 414 | " # YOUR CODE HERE\n", 415 | " ng = np.arange(10,20)\n", 416 | " dat = answer_one().columns[[ng]]\n", 417 | " avgGDP = answer_one()[dat].mean(axis=1).sort_values(ascending=False)\n", 418 | " \n", 419 | " \n", 420 | " return avgGDP\n", 421 | " #raise NotImplementedError()" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 9, 427 | "metadata": { 428 | "deletable": false, 429 | "editable": false, 430 | "nbgrader": { 431 | "checksum": "2f9c90ee07138f94c027c5d2f907ab13", 432 | "grade": true, 433 | "grade_id": "cell-aaaa11ef7d26f4cf", 434 | "locked": true, 435 | "points": 6.66, 436 | "schema_version": 1, 437 | "solution": false 438 | } 439 | }, 440 | "outputs": [], 441 | "source": [ 442 | "assert type(answer_three()) == pd.Series, \"Q3: You should return a Series!\"\n" 443 | ] 444 | }, 445 | { 446 | "cell_type": "markdown", 447 | "metadata": { 448 | "deletable": false, 449 | "editable": false, 450 | "locked": true 451 | }, 452 | "source": [ 453 | "### Question 4\n", 454 | "By how much had the GDP changed over the 10 year span for the country with the 6th largest average GDP?\n", 455 | "\n", 456 | "*This function should return a single number.*" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": 30, 462 | "metadata": { 463 | "deletable": false, 464 | "nbgrader": { 465 | "checksum": "768a19bcc8adc6991fe5c757e95ba784", 466 | "grade": false, 467 | "grade_id": "cell-7f77d099e3e0bbee", 468 | "locked": false, 469 | "schema_version": 1, 470 | "solution": true 471 | } 472 | }, 473 | "outputs": [], 474 | "source": [ 475 | "def answer_four():\n", 476 | " # YOUR CODE HERE\n", 477 | " \n", 478 | " pg = answer_one().loc['United Kingdom', ['2006']]['2006']\n", 479 | " dg = answer_one().loc['United Kingdom', ['2015']]['2015']\n", 480 | " dkd = dg - pg\n", 481 | " \n", 482 | " return dkd\n", 483 | " #raise NotImplementedError()" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 11, 489 | "metadata": { 490 | "deletable": false, 491 | "editable": false, 492 | "nbgrader": { 493 | "checksum": "a7770c49cdfac4fa6368dfe8b39e6474", 494 | "grade": true, 495 | "grade_id": "cell-564dd8e5e24b0f83", 496 | "locked": true, 497 | "points": 6.66, 498 | "schema_version": 1, 499 | "solution": false 500 | } 501 | }, 502 | "outputs": [], 503 | "source": [ 504 | "# Cell for autograder.\n" 505 | ] 506 | }, 507 | { 508 | "cell_type": "markdown", 509 | "metadata": { 510 | "deletable": false, 511 | "editable": false, 512 | "nbgrader": { 513 | "checksum": "ed6dbc94ff1b6268873413fee12741cd", 514 | "grade": false, 515 | "grade_id": "cell-617669111e38ca15", 516 | "locked": true, 517 | "schema_version": 1, 518 | "solution": false 519 | } 520 | }, 521 | "source": [ 522 | "### Question 5\n", 523 | "What is the mean energy supply per capita?\n", 524 | "\n", 525 | "*This function should return a single number.*" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 12, 531 | "metadata": { 532 | "deletable": false, 533 | "nbgrader": { 534 | "checksum": "cfd61a1735889e7ef20692ca0d28ddcb", 535 | "grade": false, 536 | "grade_id": "cell-58e79d558e982eef", 537 | "locked": false, 538 | "schema_version": 1, 539 | "solution": true 540 | } 541 | }, 542 | "outputs": [], 543 | "source": [ 544 | "def answer_five():\n", 545 | " # YOUR CODE HERE\n", 546 | " mpc = answer_one()['Energy Supply per Capita'].mean()\n", 547 | " \n", 548 | " return mpc\n", 549 | " #raise NotImplementedError()" 550 | ] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "execution_count": 13, 555 | "metadata": { 556 | "deletable": false, 557 | "editable": false, 558 | "nbgrader": { 559 | "checksum": "9d61bf22656baeecc77f63d54448590e", 560 | "grade": true, 561 | "grade_id": "cell-30cc66180851638c", 562 | "locked": true, 563 | "points": 6.66, 564 | "schema_version": 1, 565 | "solution": false 566 | } 567 | }, 568 | "outputs": [], 569 | "source": [ 570 | "# Cell for autograder.\n" 571 | ] 572 | }, 573 | { 574 | "cell_type": "markdown", 575 | "metadata": { 576 | "deletable": false, 577 | "editable": false, 578 | "nbgrader": { 579 | "checksum": "2c7a163ae96f56317756456b0d9d695b", 580 | "grade": false, 581 | "grade_id": "cell-5c11ddd12fd71b3f", 582 | "locked": true, 583 | "schema_version": 1, 584 | "solution": false 585 | } 586 | }, 587 | "source": [ 588 | "### Question 6\n", 589 | "What country has the maximum % Renewable and what is the percentage?\n", 590 | "\n", 591 | "*This function should return a tuple with the name of the country and the percentage.*" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": 14, 597 | "metadata": { 598 | "deletable": false, 599 | "nbgrader": { 600 | "checksum": "f8657f18c77eb0f752bca3cc48561da3", 601 | "grade": false, 602 | "grade_id": "cell-b6824b78e74619f9", 603 | "locked": false, 604 | "schema_version": 1, 605 | "solution": true 606 | } 607 | }, 608 | "outputs": [], 609 | "source": [ 610 | "def answer_six():\n", 611 | " # YOUR CODE HERE\n", 612 | " max_ren = answer_one()['% Renewable'].max()\n", 613 | " ind = answer_one().index[answer_one()['% Renewable'] == max_ren][0]\n", 614 | " return ind, max_ren\n", 615 | " #raise NotImplementedError()" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": 15, 621 | "metadata": { 622 | "deletable": false, 623 | "editable": false, 624 | "nbgrader": { 625 | "checksum": "f8b28b0a824a3b76a6244c1273648ccd", 626 | "grade": true, 627 | "grade_id": "cell-2bd201c5c7bdd80f", 628 | "locked": true, 629 | "points": 6.66, 630 | "schema_version": 1, 631 | "solution": false 632 | } 633 | }, 634 | "outputs": [], 635 | "source": [ 636 | "assert type(answer_six()) == tuple, \"Q6: You should return a tuple!\"\n", 637 | "\n", 638 | "assert type(answer_six()[0]) == str, \"Q6: The first element in your result should be the name of the country!\"\n" 639 | ] 640 | }, 641 | { 642 | "cell_type": "markdown", 643 | "metadata": { 644 | "deletable": false, 645 | "editable": false, 646 | "nbgrader": { 647 | "checksum": "a7b561a486a28ee4ba80a40715617c6d", 648 | "grade": false, 649 | "grade_id": "cell-ddf52a85ad3d5a11", 650 | "locked": true, 651 | "schema_version": 1, 652 | "solution": false 653 | } 654 | }, 655 | "source": [ 656 | "### Question 7\n", 657 | "Create a new column that is the ratio of Self-Citations to Total Citations. \n", 658 | "What is the maximum value for this new column, and what country has the highest ratio?\n", 659 | "\n", 660 | "*This function should return a tuple with the name of the country and the ratio.*" 661 | ] 662 | }, 663 | { 664 | "cell_type": "code", 665 | "execution_count": 16, 666 | "metadata": { 667 | "deletable": false, 668 | "nbgrader": { 669 | "checksum": "e4b1cc5e3deefd24be992fbee18d0e74", 670 | "grade": false, 671 | "grade_id": "cell-a4f39737f38aa53c", 672 | "locked": false, 673 | "schema_version": 1, 674 | "solution": true 675 | } 676 | }, 677 | "outputs": [], 678 | "source": [ 679 | "def answer_seven():\n", 680 | " # YOUR CODE HERE\n", 681 | " new_df = answer_one().assign(ratio = answer_one()['Self-citations']/answer_one()['Citations'])\n", 682 | " max_ra = new_df['ratio'].max()\n", 683 | " con = new_df.index[new_df['ratio'] == max_ra][0]\n", 684 | " return con, max_ra\n", 685 | " #raise NotImplementedError()" 686 | ] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "execution_count": 17, 691 | "metadata": { 692 | "deletable": false, 693 | "editable": false, 694 | "nbgrader": { 695 | "checksum": "ca448b3a16b65a3a08533cac736cc4d9", 696 | "grade": true, 697 | "grade_id": "cell-b7a163e9231b88c9", 698 | "locked": true, 699 | "points": 6.66, 700 | "schema_version": 1, 701 | "solution": false 702 | } 703 | }, 704 | "outputs": [], 705 | "source": [ 706 | "assert type(answer_seven()) == tuple, \"Q7: You should return a tuple!\"\n", 707 | "\n", 708 | "assert type(answer_seven()[0]) == str, \"Q7: The first element in your result should be the name of the country!\"\n" 709 | ] 710 | }, 711 | { 712 | "cell_type": "markdown", 713 | "metadata": { 714 | "deletable": false, 715 | "editable": false, 716 | "nbgrader": { 717 | "checksum": "7be7b86ee7467539dd88746818c78c0e", 718 | "grade": false, 719 | "grade_id": "cell-5c89296ab6f94218", 720 | "locked": true, 721 | "schema_version": 1, 722 | "solution": false 723 | } 724 | }, 725 | "source": [ 726 | "### Question 8\n", 727 | "\n", 728 | "Create a column that estimates the population using Energy Supply and Energy Supply per capita. \n", 729 | "What is the third most populous country according to this estimate?\n", 730 | "\n", 731 | "*This function should return the name of the country*" 732 | ] 733 | }, 734 | { 735 | "cell_type": "code", 736 | "execution_count": 18, 737 | "metadata": { 738 | "deletable": false, 739 | "nbgrader": { 740 | "checksum": "9d733b2abf089b1931e2e792ff51d488", 741 | "grade": false, 742 | "grade_id": "cell-9ca58137846b84d6", 743 | "locked": false, 744 | "schema_version": 1, 745 | "solution": true 746 | } 747 | }, 748 | "outputs": [], 749 | "source": [ 750 | "def answer_eight():\n", 751 | " # YOUR CODE HERE\n", 752 | " Top15 = answer_one()\n", 753 | " Top15['pop'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']\n", 754 | " dpop = Top15['pop'].sort_values(ascending=False)[2]\n", 755 | " py = Top15.index[Top15['pop'] == dpop][0]\n", 756 | " return py\n", 757 | " #raise NotImplementedError()" 758 | ] 759 | }, 760 | { 761 | "cell_type": "code", 762 | "execution_count": 19, 763 | "metadata": { 764 | "deletable": false, 765 | "editable": false, 766 | "nbgrader": { 767 | "checksum": "ba2ad50cf8198767b0bd2f75b8d97e87", 768 | "grade": true, 769 | "grade_id": "cell-3f3620c88df08b20", 770 | "locked": true, 771 | "points": 0, 772 | "schema_version": 1, 773 | "solution": false 774 | } 775 | }, 776 | "outputs": [], 777 | "source": [ 778 | "assert type(answer_eight()) == str, \"Q8: You should return the name of the country!\"\n" 779 | ] 780 | }, 781 | { 782 | "cell_type": "markdown", 783 | "metadata": { 784 | "deletable": false, 785 | "editable": false, 786 | "nbgrader": { 787 | "checksum": "164cba98164a1045db7de10dd37115c8", 788 | "grade": false, 789 | "grade_id": "cell-2065207e66e5ec01", 790 | "locked": true, 791 | "schema_version": 1, 792 | "solution": false 793 | } 794 | }, 795 | "source": [ 796 | "### Question 9\n", 797 | "Create a column that estimates the number of citable documents per person. \n", 798 | "What is the correlation between the number of citable documents per capita and the energy supply per capita? Use the `.corr()` method, (Pearson's correlation).\n", 799 | "\n", 800 | "*This function should return a single number.*\n", 801 | "\n", 802 | "*(Optional: Use the built-in function `plot9()` to visualize the relationship between Energy Supply per Capita vs. Citable docs per Capita)*" 803 | ] 804 | }, 805 | { 806 | "cell_type": "code", 807 | "execution_count": 20, 808 | "metadata": { 809 | "deletable": false, 810 | "nbgrader": { 811 | "checksum": "94e06c4c3a9618b94dbb0e86913b546c", 812 | "grade": false, 813 | "grade_id": "cell-033679ea456bfb9d", 814 | "locked": false, 815 | "schema_version": 1, 816 | "solution": true 817 | } 818 | }, 819 | "outputs": [], 820 | "source": [ 821 | "def answer_nine():\n", 822 | " # YOUR CODE HERE\n", 823 | " \n", 824 | " Top15 = answer_one()\n", 825 | " Top15 = Top15.assign(pop = Top15['Energy Supply']/Top15['Energy Supply per Capita'])\n", 826 | " Top15 = Top15.assign(Citable_docs_per_Capita = Top15['Citable documents'] / Top15['pop'])\n", 827 | " corre = Top15['Citable_docs_per_Capita'].corr(Top15['Energy Supply per Capita'])\n", 828 | " return corre\n", 829 | " #raise NotImplementedError()" 830 | ] 831 | }, 832 | { 833 | "cell_type": "code", 834 | "execution_count": 21, 835 | "metadata": { 836 | "deletable": false, 837 | "editable": false, 838 | "nbgrader": { 839 | "checksum": "01a146bbcca0fa9c9c13e71ab52e710f", 840 | "grade": false, 841 | "grade_id": "cell-644824f6c708bf80", 842 | "locked": true, 843 | "schema_version": 1, 844 | "solution": false 845 | } 846 | }, 847 | "outputs": [], 848 | "source": [ 849 | "def plot9():\n", 850 | " import matplotlib as plt\n", 851 | " %matplotlib inline\n", 852 | " \n", 853 | " Top15 = answer_one()\n", 854 | " Top15['PopEst'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']\n", 855 | " Top15['Citable docs per Capita'] = Top15['Citable documents'] / Top15['PopEst']\n", 856 | " Top15.plot(x='Citable docs per Capita', y='Energy Supply per Capita', kind='scatter', xlim=[0, 0.0006])" 857 | ] 858 | }, 859 | { 860 | "cell_type": "code", 861 | "execution_count": 22, 862 | "metadata": { 863 | "deletable": false, 864 | "editable": false, 865 | "nbgrader": { 866 | "checksum": "8dced1dde88b6877f89bdec482870476", 867 | "grade": true, 868 | "grade_id": "cell-3cb5c699065a4a20", 869 | "locked": true, 870 | "points": 6.66, 871 | "schema_version": 1, 872 | "solution": false 873 | } 874 | }, 875 | "outputs": [], 876 | "source": [ 877 | "assert answer_nine() >= -1. and answer_nine() <= 1., \"Q9: A valid correlation should between -1 to 1!\"\n" 878 | ] 879 | }, 880 | { 881 | "cell_type": "markdown", 882 | "metadata": { 883 | "deletable": false, 884 | "editable": false, 885 | "nbgrader": { 886 | "checksum": "8af5ffad89be1e5c6292438724d6f8d5", 887 | "grade": false, 888 | "grade_id": "cell-ad09765e29b91157", 889 | "locked": true, 890 | "schema_version": 1, 891 | "solution": false 892 | } 893 | }, 894 | "source": [ 895 | "### Question 10\n", 896 | "Create a new column with a 1 if the country's % Renewable value is at or above the median for all countries in the top 15, and a 0 if the country's % Renewable value is below the median.\n", 897 | "\n", 898 | "*This function should return a series named `HighRenew` whose index is the country name sorted in ascending order of rank.*" 899 | ] 900 | }, 901 | { 902 | "cell_type": "code", 903 | "execution_count": 23, 904 | "metadata": { 905 | "deletable": false, 906 | "nbgrader": { 907 | "checksum": "340c06bd50a9a027a2190674cfb981b9", 908 | "grade": false, 909 | "grade_id": "cell-0fdf60e64bf1a4f9", 910 | "locked": false, 911 | "schema_version": 1, 912 | "solution": true 913 | } 914 | }, 915 | "outputs": [], 916 | "source": [ 917 | "def answer_ten():\n", 918 | " # YOUR CODE HERE\n", 919 | " \n", 920 | " Top15 = answer_one()\n", 921 | " Top15['HighRenew'] = 1\n", 922 | " j = 0\n", 923 | " for i in Top15['% Renewable']:\n", 924 | " if i >= Top15['% Renewable'].median():\n", 925 | " Top15['HighRenew'].iloc[j] = 1\n", 926 | " else:\n", 927 | " Top15['HighRenew'].iloc[j] = 0\n", 928 | " j+=1\n", 929 | " return Top15['HighRenew']\n", 930 | " #raise NotImplementedError()" 931 | ] 932 | }, 933 | { 934 | "cell_type": "code", 935 | "execution_count": 24, 936 | "metadata": { 937 | "deletable": false, 938 | "editable": false, 939 | "nbgrader": { 940 | "checksum": "f624e6996eca5796eaf27fb4d0593175", 941 | "grade": true, 942 | "grade_id": "cell-b29a631fd9a7730f", 943 | "locked": true, 944 | "points": 6.66, 945 | "schema_version": 1, 946 | "solution": false 947 | } 948 | }, 949 | "outputs": [], 950 | "source": [ 951 | "assert type(answer_ten()) == pd.Series, \"Q10: You should return a Series!\"\n" 952 | ] 953 | }, 954 | { 955 | "cell_type": "markdown", 956 | "metadata": { 957 | "deletable": false, 958 | "editable": false, 959 | "nbgrader": { 960 | "checksum": "52f682e7066791c34cd3b2402855cbf5", 961 | "grade": false, 962 | "grade_id": "cell-677c51ba711c3af7", 963 | "locked": true, 964 | "schema_version": 1, 965 | "solution": false 966 | } 967 | }, 968 | "source": [ 969 | "### Question 11\n", 970 | "Use the following dictionary to group the Countries by Continent, then create a DataFrame that displays the sample size (the number of countries in each continent bin), and the sum, mean, and std deviation for the estimated population of each country.\n", 971 | "\n", 972 | "```python\n", 973 | "ContinentDict = {'China':'Asia', \n", 974 | " 'United States':'North America', \n", 975 | " 'Japan':'Asia', \n", 976 | " 'United Kingdom':'Europe', \n", 977 | " 'Russian Federation':'Europe', \n", 978 | " 'Canada':'North America', \n", 979 | " 'Germany':'Europe', \n", 980 | " 'India':'Asia',\n", 981 | " 'France':'Europe', \n", 982 | " 'South Korea':'Asia', \n", 983 | " 'Italy':'Europe', \n", 984 | " 'Spain':'Europe', \n", 985 | " 'Iran':'Asia',\n", 986 | " 'Australia':'Australia', \n", 987 | " 'Brazil':'South America'}\n", 988 | "```\n", 989 | "\n", 990 | "*This function should return a DataFrame with index named Continent `['Asia', 'Australia', 'Europe', 'North America', 'South America']` and columns `['size', 'sum', 'mean', 'std']`*" 991 | ] 992 | }, 993 | { 994 | "cell_type": "code", 995 | "execution_count": 25, 996 | "metadata": { 997 | "deletable": false, 998 | "nbgrader": { 999 | "checksum": "b55846bc20cd01b0acbcb776504a766d", 1000 | "grade": false, 1001 | "grade_id": "cell-a5e0c0df27304f98", 1002 | "locked": false, 1003 | "schema_version": 1, 1004 | "solution": true 1005 | } 1006 | }, 1007 | "outputs": [], 1008 | "source": [ 1009 | "def answer_eleven():\n", 1010 | " # YOUR CODE HERE\n", 1011 | " \n", 1012 | " ContinentDict = {'China':'Asia', \n", 1013 | " 'United States':'North America', \n", 1014 | " 'Japan':'Asia', \n", 1015 | " 'United Kingdom':'Europe', \n", 1016 | " 'Russian Federation':'Europe', \n", 1017 | " 'Canada':'North America', \n", 1018 | " 'Germany':'Europe', \n", 1019 | " 'India':'Asia',\n", 1020 | " 'France':'Europe', \n", 1021 | " 'South Korea':'Asia', \n", 1022 | " 'Italy':'Europe', \n", 1023 | " 'Spain':'Europe', \n", 1024 | " 'Iran':'Asia',\n", 1025 | " 'Australia':'Australia', \n", 1026 | " 'Brazil':'South America'}\n", 1027 | " \n", 1028 | " j = 0\n", 1029 | " Top15 = answer_one()\n", 1030 | " new_df = pd.DataFrame(index=['Asia', 'Australia', 'Europe', 'North America', 'South America'], columns = ['size', 'sum', 'mean', 'std'])\n", 1031 | " #ind = answer_one().index\n", 1032 | " Top15['pop'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']\n", 1033 | " Top15['Continent'] = 'cont'\n", 1034 | " for v in ContinentDict.values():\n", 1035 | " Top15['Continent'].iloc[j] = v\n", 1036 | " j += 1\n", 1037 | " new_df['size'] = Top15.groupby(Top15['Continent']).size()\n", 1038 | " new_df['sum'] = Top15['pop'].groupby(Top15['Continent']).sum()\n", 1039 | " new_df['mean'] = Top15['pop'].groupby(Top15['Continent']).mean()\n", 1040 | " new_df['std'] = Top15['pop'].groupby(Top15['Continent']).std()\n", 1041 | " return new_df \n", 1042 | " #raise NotImplementedError()" 1043 | ] 1044 | }, 1045 | { 1046 | "cell_type": "code", 1047 | "execution_count": 26, 1048 | "metadata": { 1049 | "deletable": false, 1050 | "editable": false, 1051 | "nbgrader": { 1052 | "checksum": "233318097d9c94fdc87395c967da14c4", 1053 | "grade": true, 1054 | "grade_id": "cell-18d1a07971b25743", 1055 | "locked": true, 1056 | "points": 6.66, 1057 | "schema_version": 1, 1058 | "solution": false 1059 | } 1060 | }, 1061 | "outputs": [], 1062 | "source": [ 1063 | "assert type(answer_eleven()) == pd.DataFrame, \"Q11: You should return a DataFrame!\"\n", 1064 | "\n", 1065 | "assert answer_eleven().shape[0] == 5, \"Q11: Wrong row numbers!\"\n", 1066 | "\n", 1067 | "assert answer_eleven().shape[1] == 4, \"Q11: Wrong column numbers!\"\n" 1068 | ] 1069 | }, 1070 | { 1071 | "cell_type": "markdown", 1072 | "metadata": { 1073 | "deletable": false, 1074 | "editable": false, 1075 | "nbgrader": { 1076 | "checksum": "78d9dbb8ff6e0a1ac1e0d16e026a7d98", 1077 | "grade": false, 1078 | "grade_id": "cell-fa26f5c1eac39c6c", 1079 | "locked": true, 1080 | "schema_version": 1, 1081 | "solution": false 1082 | } 1083 | }, 1084 | "source": [ 1085 | "### Question 12\n", 1086 | "Cut % Renewable into 5 bins. Group Top15 by the Continent, as well as these new % Renewable bins. How many countries are in each of these groups?\n", 1087 | "\n", 1088 | "*This function should return a Series with a MultiIndex of `Continent`, then the bins for `% Renewable`. Do not include groups with no countries.*" 1089 | ] 1090 | }, 1091 | { 1092 | "cell_type": "code", 1093 | "execution_count": 27, 1094 | "metadata": { 1095 | "deletable": false, 1096 | "nbgrader": { 1097 | "checksum": "27eb27ec7a3347530174f7047288a881", 1098 | "grade": false, 1099 | "grade_id": "cell-2ecd9a4076abd8f0", 1100 | "locked": false, 1101 | "schema_version": 1, 1102 | "solution": true 1103 | } 1104 | }, 1105 | "outputs": [], 1106 | "source": [ 1107 | "def answer_twelve():\n", 1108 | " # YOUR CODE HERE\n", 1109 | " ContinentDict = {'China':'Asia', \n", 1110 | " 'United States':'North America', \n", 1111 | " 'Japan':'Asia', \n", 1112 | " 'United Kingdom':'Europe', \n", 1113 | " 'Russian Federation':'Europe', \n", 1114 | " 'Canada':'North America', \n", 1115 | " 'Germany':'Europe', \n", 1116 | " 'India':'Asia',\n", 1117 | " 'France':'Europe', \n", 1118 | " 'South Korea':'Asia', \n", 1119 | " 'Italy':'Europe', \n", 1120 | " 'Spain':'Europe', \n", 1121 | " 'Iran':'Asia',\n", 1122 | " 'Australia':'Australia', \n", 1123 | " 'Brazil':'South America'}\n", 1124 | " \n", 1125 | " j = 0\n", 1126 | " Top15 = answer_one()\n", 1127 | " Top15['Continent'] = None\n", 1128 | " for v in ContinentDict.values():\n", 1129 | " Top15['Continent'].iloc[j] = v\n", 1130 | " j += 1\n", 1131 | " Top15['% Renewable'] = pd.cut(Top15['% Renewable'],bins=5)\n", 1132 | " new_renou = Top15.groupby(['Continent','% Renewable']).size()\n", 1133 | " return new_renou\n", 1134 | " #raise NotImplementedError()\n" 1135 | ] 1136 | }, 1137 | { 1138 | "cell_type": "code", 1139 | "execution_count": null, 1140 | "metadata": { 1141 | "deletable": false, 1142 | "editable": false, 1143 | "nbgrader": { 1144 | "checksum": "79ed0cf577c7941dc357efd8bf1c5d26", 1145 | "grade": true, 1146 | "grade_id": "cell-6c665602d6babab9", 1147 | "locked": true, 1148 | "points": 6.66, 1149 | "schema_version": 1, 1150 | "solution": false 1151 | } 1152 | }, 1153 | "outputs": [], 1154 | "source": [ 1155 | "assert type(answer_twelve()) == pd.Series, \"Q12: You should return a Series!\"\n", 1156 | "\n", 1157 | "assert len(answer_twelve()) == 9, \"Q12: Wrong result numbers!\"\n" 1158 | ] 1159 | }, 1160 | { 1161 | "cell_type": "markdown", 1162 | "metadata": { 1163 | "deletable": false, 1164 | "editable": false, 1165 | "nbgrader": { 1166 | "checksum": "bdfd9b1bb897304b6337fdc47a05967c", 1167 | "grade": false, 1168 | "grade_id": "cell-4209a10d8f208739", 1169 | "locked": true, 1170 | "schema_version": 1, 1171 | "solution": false 1172 | } 1173 | }, 1174 | "source": [ 1175 | "### Question 13\n", 1176 | "Convert the Population Estimate series to a string with thousands separator (using commas). Use all significant digits (do not round the results).\n", 1177 | "\n", 1178 | "e.g. 12345678.90 -> 12,345,678.90\n", 1179 | "\n", 1180 | "*This function should return a series `PopEst` whose index is the country name and whose values are the population estimate string*" 1181 | ] 1182 | }, 1183 | { 1184 | "cell_type": "code", 1185 | "execution_count": 28, 1186 | "metadata": { 1187 | "deletable": false, 1188 | "nbgrader": { 1189 | "checksum": "1efd09964334b7d6100d81d4b3ead3e9", 1190 | "grade": false, 1191 | "grade_id": "cell-58eb0ee0921d93fb", 1192 | "locked": false, 1193 | "schema_version": 1, 1194 | "solution": true 1195 | } 1196 | }, 1197 | "outputs": [], 1198 | "source": [ 1199 | "def answer_thirteen():\n", 1200 | " # YOUR CODE HERE\n", 1201 | " Top15 = answer_one()\n", 1202 | " Top15['pop'] = Top15['Energy Supply'] / Top15['Energy Supply per Capita']\n", 1203 | " Top15['PopEst'] = Top15['pop'].map('{:,}'.format)\n", 1204 | "\n", 1205 | " return Top15['PopEst']\n", 1206 | " #raise NotImplementedError()" 1207 | ] 1208 | }, 1209 | { 1210 | "cell_type": "code", 1211 | "execution_count": 29, 1212 | "metadata": { 1213 | "deletable": false, 1214 | "editable": false, 1215 | "nbgrader": { 1216 | "checksum": "e014781df77c7edab2a181d2d943be8f", 1217 | "grade": true, 1218 | "grade_id": "cell-10fee7228cf973f6", 1219 | "locked": true, 1220 | "points": 6.74, 1221 | "schema_version": 1, 1222 | "solution": false 1223 | } 1224 | }, 1225 | "outputs": [], 1226 | "source": [ 1227 | "assert type(answer_thirteen()) == pd.Series, \"Q13: You should return a Series!\"\n", 1228 | "\n", 1229 | "assert len(answer_thirteen()) == 15, \"Q13: Wrong result numbers!\"\n" 1230 | ] 1231 | }, 1232 | { 1233 | "cell_type": "markdown", 1234 | "metadata": { 1235 | "deletable": false, 1236 | "editable": false, 1237 | "nbgrader": { 1238 | "checksum": "61562b9b667bd5efbcec0dcd7becbfaa", 1239 | "grade": false, 1240 | "grade_id": "cell-998b62d4f390ef15", 1241 | "locked": true, 1242 | "schema_version": 1, 1243 | "solution": false 1244 | } 1245 | }, 1246 | "source": [ 1247 | "### Optional\n", 1248 | "\n", 1249 | "Use the built in function `plot_optional()` to see an example visualization." 1250 | ] 1251 | }, 1252 | { 1253 | "cell_type": "code", 1254 | "execution_count": null, 1255 | "metadata": { 1256 | "deletable": false, 1257 | "editable": false, 1258 | "nbgrader": { 1259 | "checksum": "479786c97cb5f34d07231c6d7c602a47", 1260 | "grade": false, 1261 | "grade_id": "cell-741fd55ea57cd40a", 1262 | "locked": true, 1263 | "schema_version": 1, 1264 | "solution": false 1265 | } 1266 | }, 1267 | "outputs": [], 1268 | "source": [ 1269 | "def plot_optional():\n", 1270 | " import matplotlib as plt\n", 1271 | " %matplotlib inline\n", 1272 | " Top15 = answer_one()\n", 1273 | " ax = Top15.plot(x='Rank', y='% Renewable', kind='scatter', \n", 1274 | " c=['#e41a1c','#377eb8','#e41a1c','#4daf4a','#4daf4a','#377eb8','#4daf4a','#e41a1c',\n", 1275 | " '#4daf4a','#e41a1c','#4daf4a','#4daf4a','#e41a1c','#dede00','#ff7f00'], \n", 1276 | " xticks=range(1,16), s=6*Top15['2014']/10**10, alpha=.75, figsize=[16,6]);\n", 1277 | "\n", 1278 | " for i, txt in enumerate(Top15.index):\n", 1279 | " ax.annotate(txt, [Top15['Rank'][i], Top15['% Renewable'][i]], ha='center')\n", 1280 | "\n", 1281 | " print(\"This is an example of a visualization that can be created to help understand the data. \\\n", 1282 | "This is a bubble chart showing % Renewable vs. Rank. The size of the bubble corresponds to the countries' \\\n", 1283 | "2014 GDP, and the color corresponds to the continent.\")" 1284 | ] 1285 | } 1286 | ], 1287 | "metadata": { 1288 | "anaconda-cloud": {}, 1289 | "coursera": { 1290 | "schema_names": [ 1291 | "mooc_adswpy_1_v2_assignment3" 1292 | ] 1293 | }, 1294 | "kernelspec": { 1295 | "display_name": "Python 3", 1296 | "language": "python", 1297 | "name": "python3" 1298 | }, 1299 | "language_info": { 1300 | "codemirror_mode": { 1301 | "name": "ipython", 1302 | "version": 3 1303 | }, 1304 | "file_extension": ".py", 1305 | "mimetype": "text/x-python", 1306 | "name": "python", 1307 | "nbconvert_exporter": "python", 1308 | "pygments_lexer": "ipython3", 1309 | "version": "3.7.6" 1310 | } 1311 | }, 1312 | "nbformat": 4, 1313 | "nbformat_minor": 4 1314 | } 1315 | --------------------------------------------------------------------------------