├── Creating_Series_and_DataFrames └── Pokemon │ └── Exercises.ipynb ├── Data Input and Output.ipynb ├── DataFrames.ipynb ├── Ecommerce Purchases ├── Ecommerce Purchases Exercise .ipynb ├── Groupby.ipynb ├── Introduction to Pandas.ipynb ├── Merging, Joining, and Concatenating .ipynb ├── Missing Data.ipynb ├── Operations.ipynb ├── Pandas DataFrame Notes.pdf ├── Pandas_Cheat_Sheet.pdf ├── README.md ├── Series.ipynb ├── apply ├── US_Crime_Rates_1960_2014.csv ├── alcohol.ipynb ├── crime.ipynb └── student-mat.csv ├── data ├── chiptole.ipynb └── occupation.ipynb ├── deleting ├── Iris │ ├── Exercises.ipynb │ ├── Exercises_with_solutions_and_code.ipynb │ └── Solutions.ipynb └── Wine │ ├── Exercises.ipynb │ ├── Exercises_code_and_solutions.ipynb │ └── Solutions.ipynb ├── filter and sorting ├── army.ipynb ├── chipotle.ipynb └── euro12.ipynb ├── grouping ├── alcohol.ipynb ├── occupation.ipynb └── regiment.ipynb ├── merge ├── cars.ipynb ├── cars1.csv ├── cars2.csv ├── fictious name.ipynb └── housing market.ipynb ├── stats └── baby name.ipynb └── time series ├── Apple_Stock ├── Exercises.ipynb ├── Solutions.ipynb └── appl_1980_2014.csv ├── Getting_Financial_Data ├── Exercises.ipynb └── Solutions.ipynb └── Investor_Flow_of_Funds_US ├── Exercises.ipynb └── Solutions.ipynb /Creating_Series_and_DataFrames/Pokemon/Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pokemon" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "This time you will create the data.\n", 17 | "\n", 18 | "\n", 19 | "\n", 20 | "### Step 1. Import the necessary libraries" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import numpy as np\n", 30 | "import pandas as pd" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### Step 2. Create a data dictionary that looks like the DataFrame below" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "data = {\"evolution\":[\"Ivysaur\",\"Charmeleon\",\"Wartortle\",\"Metapod\"],\n", 47 | " \"hp\": [45,39,44,45],\n", 48 | " \"name\":['Bulbasaur','Charmander','Squirtle','Caterpie'],\n", 49 | " \"pokedex\":['yes','no','yes','no'],\n", 50 | " \"type\":['grass','fire','water','bug']}" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "### Step 3. Assign it to a variable called poke" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 12, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/html": [ 68 | "
\n", 69 | "\n", 82 | "\n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | "
evolutionhpnamepokedextype
0Ivysaur45Bulbasauryesgrass
1Charmeleon39Charmandernofire
2Wartortle44Squirtleyeswater
3Metapod45Caterpienobug
\n", 128 | "
" 129 | ], 130 | "text/plain": [ 131 | " evolution hp name pokedex type\n", 132 | "0 Ivysaur 45 Bulbasaur yes grass\n", 133 | "1 Charmeleon 39 Charmander no fire\n", 134 | "2 Wartortle 44 Squirtle yes water\n", 135 | "3 Metapod 45 Caterpie no bug" 136 | ] 137 | }, 138 | "execution_count": 12, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "poke = pd.DataFrame(data)\n", 145 | "poke.head()" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "### Step 4. Ops...it seems the DataFrame columns are in alphabetical order. Place the order of the columns as name, type, hp, evolution, pokedex" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 23, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/html": [ 163 | "
\n", 164 | "\n", 177 | "\n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | "
nametypehpevolutionpokedex
0Bulbasaurgrass45Ivysauryes
1Charmanderfire39Charmeleonno
2Squirtlewater44Wartortleyes
3Caterpiebug45Metapodno
\n", 223 | "
" 224 | ], 225 | "text/plain": [ 226 | " name type hp evolution pokedex\n", 227 | "0 Bulbasaur grass 45 Ivysaur yes\n", 228 | "1 Charmander fire 39 Charmeleon no\n", 229 | "2 Squirtle water 44 Wartortle yes\n", 230 | "3 Caterpie bug 45 Metapod no" 231 | ] 232 | }, 233 | "execution_count": 23, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "poke = poke.reindex(columns=['name','type','hp','evolution','pokedex'])\n", 240 | "poke.head()" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "### Step 5. Add another column called place, and insert what you have in mind." 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 25, 253 | "metadata": {}, 254 | "outputs": [ 255 | { 256 | "data": { 257 | "text/html": [ 258 | "
\n", 259 | "\n", 272 | "\n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | "
nametypehpevolutionpokedexplace
0Bulbasaurgrass45Ivysauryesnew city
1Charmanderfire39Charmeleonnopalace
2Squirtlewater44Wartortleyesplay ground
3Caterpiebug45Metapodnoswing
\n", 323 | "
" 324 | ], 325 | "text/plain": [ 326 | " name type hp evolution pokedex place\n", 327 | "0 Bulbasaur grass 45 Ivysaur yes new city\n", 328 | "1 Charmander fire 39 Charmeleon no palace\n", 329 | "2 Squirtle water 44 Wartortle yes play ground\n", 330 | "3 Caterpie bug 45 Metapod no swing" 331 | ] 332 | }, 333 | "execution_count": 25, 334 | "metadata": {}, 335 | "output_type": "execute_result" 336 | } 337 | ], 338 | "source": [ 339 | "poke['place']=['new city','palace','play ground','swing']\n", 340 | "poke" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "### Step 6. Present the type of each column" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 27, 353 | "metadata": {}, 354 | "outputs": [ 355 | { 356 | "name": "stdout", 357 | "output_type": "stream", 358 | "text": [ 359 | "\n", 360 | "RangeIndex: 4 entries, 0 to 3\n", 361 | "Data columns (total 6 columns):\n", 362 | "name 4 non-null object\n", 363 | "type 4 non-null object\n", 364 | "hp 4 non-null int64\n", 365 | "evolution 4 non-null object\n", 366 | "pokedex 4 non-null object\n", 367 | "place 4 non-null object\n", 368 | "dtypes: int64(1), object(5)\n", 369 | "memory usage: 272.0+ bytes\n" 370 | ] 371 | } 372 | ], 373 | "source": [ 374 | "poke.info()" 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "metadata": {}, 380 | "source": [ 381 | "### BONUS: Create your own question and answer it." 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": null, 387 | "metadata": { 388 | "collapsed": true 389 | }, 390 | "outputs": [], 391 | "source": [] 392 | } 393 | ], 394 | "metadata": { 395 | "kernelspec": { 396 | "display_name": "Python 3", 397 | "language": "python", 398 | "name": "python3" 399 | }, 400 | "language_info": { 401 | "codemirror_mode": { 402 | "name": "ipython", 403 | "version": 3 404 | }, 405 | "file_extension": ".py", 406 | "mimetype": "text/x-python", 407 | "name": "python", 408 | "nbconvert_exporter": "python", 409 | "pygments_lexer": "ipython3", 410 | "version": "3.6.5" 411 | } 412 | }, 413 | "nbformat": 4, 414 | "nbformat_minor": 1 415 | } 416 | -------------------------------------------------------------------------------- /Ecommerce Purchases Exercise .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___\n", 11 | "# Ecommerce Purchases Exercise\n", 12 | "\n", 13 | "In this Exercise you will be given some Fake Data about some purchases done through Amazon! Just go ahead and follow the directions and try your best to answer the questions and complete the tasks. Feel free to reference the solutions. Most of the tasks can be solved in different ways. For the most part, the questions get progressively harder.\n", 14 | "\n", 15 | "Please excuse anything that doesn't make \"Real-World\" sense in the dataframe, all the data is fake and made-up.\n", 16 | "\n", 17 | "Also note that all of these questions can be answered with one line of code.\n", 18 | "____\n", 19 | "** Import pandas and read in the Ecommerce Purchases csv file and set it to a DataFrame called ecom. **" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 81, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import numpy as np\n", 29 | "import pandas as pd" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 4, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "ecom = pd.read_csv(\"Ecommerce Purchases\")" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "**Check the head of the DataFrame.**" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 5, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "text/html": [ 56 | "
\n", 57 | "\n", 70 | "\n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | "
AddressLotAM or PMBrowser InfoCompanyCredit CardCC Exp DateCC Security CodeCC ProviderEmailJobIP AddressLanguagePurchase Price
016629 Pace Camp Apt. 448\\nAlexisborough, NE 77...46 inPMOpera/9.56.(X11; Linux x86_64; sl-SI) Presto/2...Martinez-Herman601192906112340602/20900JCB 16 digitpdunlap@yahoo.comScientist, product/process development149.146.147.205el98.14
19374 Jasmine Spurs Suite 508\\nSouth John, TN 8...28 rnPMOpera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr...Fletcher, Richards and Whitaker333775816964535611/18561Mastercardanthony41@reed.comDrilling engineer15.160.41.51fr70.73
2Unit 0065 Box 5052\\nDPO AP 2745094 vEPMMozilla/5.0 (compatible; MSIE 9.0; Windows NT ...Simpson, Williams and Pham67595766612508/19699JCB 16 digitamymiller@morales-harrison.comCustomer service manager132.207.160.22de0.95
37780 Julia Fords\\nNew Stacy, WA 4579836 vmPMMozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ...Williams, Marshall and Buchanan601157850443071002/24384Discoverbrent16@olson-robinson.infoDrilling engineer30.250.74.19es78.04
423012 Munoz Drive Suite 337\\nNew Cynthia, TX 5...20 IEAMOpera/9.58.(X11; Linux x86_64; it-IT) Presto/2...Brown, Watson and Andrews601145662320799810/25678Diners Club / Carte Blanchechristopherwright@gmail.comFine artist24.140.33.94es77.82
\n", 178 | "
" 179 | ], 180 | "text/plain": [ 181 | " Address Lot AM or PM \\\n", 182 | "0 16629 Pace Camp Apt. 448\\nAlexisborough, NE 77... 46 in PM \n", 183 | "1 9374 Jasmine Spurs Suite 508\\nSouth John, TN 8... 28 rn PM \n", 184 | "2 Unit 0065 Box 5052\\nDPO AP 27450 94 vE PM \n", 185 | "3 7780 Julia Fords\\nNew Stacy, WA 45798 36 vm PM \n", 186 | "4 23012 Munoz Drive Suite 337\\nNew Cynthia, TX 5... 20 IE AM \n", 187 | "\n", 188 | " Browser Info \\\n", 189 | "0 Opera/9.56.(X11; Linux x86_64; sl-SI) Presto/2... \n", 190 | "1 Opera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr... \n", 191 | "2 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ... \n", 192 | "3 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ... \n", 193 | "4 Opera/9.58.(X11; Linux x86_64; it-IT) Presto/2... \n", 194 | "\n", 195 | " Company Credit Card CC Exp Date \\\n", 196 | "0 Martinez-Herman 6011929061123406 02/20 \n", 197 | "1 Fletcher, Richards and Whitaker 3337758169645356 11/18 \n", 198 | "2 Simpson, Williams and Pham 675957666125 08/19 \n", 199 | "3 Williams, Marshall and Buchanan 6011578504430710 02/24 \n", 200 | "4 Brown, Watson and Andrews 6011456623207998 10/25 \n", 201 | "\n", 202 | " CC Security Code CC Provider \\\n", 203 | "0 900 JCB 16 digit \n", 204 | "1 561 Mastercard \n", 205 | "2 699 JCB 16 digit \n", 206 | "3 384 Discover \n", 207 | "4 678 Diners Club / Carte Blanche \n", 208 | "\n", 209 | " Email Job \\\n", 210 | "0 pdunlap@yahoo.com Scientist, product/process development \n", 211 | "1 anthony41@reed.com Drilling engineer \n", 212 | "2 amymiller@morales-harrison.com Customer service manager \n", 213 | "3 brent16@olson-robinson.info Drilling engineer \n", 214 | "4 christopherwright@gmail.com Fine artist \n", 215 | "\n", 216 | " IP Address Language Purchase Price \n", 217 | "0 149.146.147.205 el 98.14 \n", 218 | "1 15.160.41.51 fr 70.73 \n", 219 | "2 132.207.160.22 de 0.95 \n", 220 | "3 30.250.74.19 es 78.04 \n", 221 | "4 24.140.33.94 es 77.82 " 222 | ] 223 | }, 224 | "execution_count": 5, 225 | "metadata": {}, 226 | "output_type": "execute_result" 227 | } 228 | ], 229 | "source": [ 230 | "ecom.head()" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "** How many rows and columns are there? **" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 12, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "name": "stdout", 247 | "output_type": "stream", 248 | "text": [ 249 | "\n", 250 | "RangeIndex: 10000 entries, 0 to 9999\n", 251 | "Data columns (total 14 columns):\n", 252 | "Address 10000 non-null object\n", 253 | "Lot 10000 non-null object\n", 254 | "AM or PM 10000 non-null object\n", 255 | "Browser Info 10000 non-null object\n", 256 | "Company 10000 non-null object\n", 257 | "Credit Card 10000 non-null int64\n", 258 | "CC Exp Date 10000 non-null object\n", 259 | "CC Security Code 10000 non-null int64\n", 260 | "CC Provider 10000 non-null object\n", 261 | "Email 10000 non-null object\n", 262 | "Job 10000 non-null object\n", 263 | "IP Address 10000 non-null object\n", 264 | "Language 10000 non-null object\n", 265 | "Purchase Price 10000 non-null float64\n", 266 | "dtypes: float64(1), int64(2), object(11)\n", 267 | "memory usage: 1.1+ MB\n" 268 | ] 269 | } 270 | ], 271 | "source": [ 272 | "ecom.info()" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "** What is the average Purchase Price? **" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 11, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "data": { 289 | "text/plain": [ 290 | "50.34730200000025" 291 | ] 292 | }, 293 | "execution_count": 11, 294 | "metadata": {}, 295 | "output_type": "execute_result" 296 | } 297 | ], 298 | "source": [ 299 | "ecom[\"Purchase Price\"].mean()" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "** What were the highest and lowest purchase prices? **" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 14, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "data": { 316 | "text/plain": [ 317 | "99.99" 318 | ] 319 | }, 320 | "execution_count": 14, 321 | "metadata": {}, 322 | "output_type": "execute_result" 323 | } 324 | ], 325 | "source": [ 326 | "ecom[\"Purchase Price\"].max()" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 17, 332 | "metadata": {}, 333 | "outputs": [ 334 | { 335 | "data": { 336 | "text/plain": [ 337 | "0.0" 338 | ] 339 | }, 340 | "execution_count": 17, 341 | "metadata": {}, 342 | "output_type": "execute_result" 343 | } 344 | ], 345 | "source": [ 346 | "ecom[\"Purchase Price\"].min()" 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": [ 353 | "** How many people have English 'en' as their Language of choice on the website? **" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 25, 359 | "metadata": {}, 360 | "outputs": [ 361 | { 362 | "data": { 363 | "text/plain": [ 364 | "Address 1098\n", 365 | "Lot 1098\n", 366 | "AM or PM 1098\n", 367 | "Browser Info 1098\n", 368 | "Company 1098\n", 369 | "Credit Card 1098\n", 370 | "CC Exp Date 1098\n", 371 | "CC Security Code 1098\n", 372 | "CC Provider 1098\n", 373 | "Email 1098\n", 374 | "Job 1098\n", 375 | "IP Address 1098\n", 376 | "Language 1098\n", 377 | "Purchase Price 1098\n", 378 | "dtype: int64" 379 | ] 380 | }, 381 | "execution_count": 25, 382 | "metadata": {}, 383 | "output_type": "execute_result" 384 | } 385 | ], 386 | "source": [ 387 | "ecom[ecom[\"Language\"]==\"en\"].count()" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "** How many people have the job title of \"Lawyer\" ? **\n" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": 29, 400 | "metadata": {}, 401 | "outputs": [ 402 | { 403 | "name": "stdout", 404 | "output_type": "stream", 405 | "text": [ 406 | "\n", 407 | "Int64Index: 30 entries, 470 to 9979\n", 408 | "Data columns (total 14 columns):\n", 409 | "Address 30 non-null object\n", 410 | "Lot 30 non-null object\n", 411 | "AM or PM 30 non-null object\n", 412 | "Browser Info 30 non-null object\n", 413 | "Company 30 non-null object\n", 414 | "Credit Card 30 non-null int64\n", 415 | "CC Exp Date 30 non-null object\n", 416 | "CC Security Code 30 non-null int64\n", 417 | "CC Provider 30 non-null object\n", 418 | "Email 30 non-null object\n", 419 | "Job 30 non-null object\n", 420 | "IP Address 30 non-null object\n", 421 | "Language 30 non-null object\n", 422 | "Purchase Price 30 non-null float64\n", 423 | "dtypes: float64(1), int64(2), object(11)\n", 424 | "memory usage: 3.5+ KB\n" 425 | ] 426 | } 427 | ], 428 | "source": [ 429 | "ecom[ecom[\"Job\"]== \"Lawyer\"].info()" 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": {}, 435 | "source": [ 436 | "** How many people made the purchase during the AM and how many people made the purchase during PM ? **\n", 437 | "\n", 438 | "**(Hint: Check out [value_counts()](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.value_counts.html) ) **" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": 30, 444 | "metadata": {}, 445 | "outputs": [ 446 | { 447 | "data": { 448 | "text/plain": [ 449 | "PM 5068\n", 450 | "AM 4932\n", 451 | "Name: AM or PM, dtype: int64" 452 | ] 453 | }, 454 | "execution_count": 30, 455 | "metadata": {}, 456 | "output_type": "execute_result" 457 | } 458 | ], 459 | "source": [ 460 | "ecom[\"AM or PM\"].value_counts()" 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "metadata": {}, 466 | "source": [ 467 | "** What are the 5 most common Job Titles? **" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 36, 473 | "metadata": {}, 474 | "outputs": [ 475 | { 476 | "data": { 477 | "text/plain": [ 478 | "Interior and spatial designer 31\n", 479 | "Lawyer 30\n", 480 | "Social researcher 28\n", 481 | "Designer, jewellery 27\n", 482 | "Research officer, political party 27\n", 483 | "Name: Job, dtype: int64" 484 | ] 485 | }, 486 | "execution_count": 36, 487 | "metadata": {}, 488 | "output_type": "execute_result" 489 | } 490 | ], 491 | "source": [ 492 | "ecom[\"Job\"].value_counts().head()" 493 | ] 494 | }, 495 | { 496 | "cell_type": "markdown", 497 | "metadata": {}, 498 | "source": [ 499 | "** Someone made a purchase that came from Lot: \"90 WT\" , what was the Purchase Price for this transaction? **" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 49, 505 | "metadata": {}, 506 | "outputs": [ 507 | { 508 | "data": { 509 | "text/plain": [ 510 | "513 75.1\n", 511 | "Name: Purchase Price, dtype: float64" 512 | ] 513 | }, 514 | "execution_count": 49, 515 | "metadata": {}, 516 | "output_type": "execute_result" 517 | } 518 | ], 519 | "source": [ 520 | "ecom[ecom[\"Lot\"]==\"90 WT\"][\"Purchase Price\"]" 521 | ] 522 | }, 523 | { 524 | "cell_type": "markdown", 525 | "metadata": {}, 526 | "source": [ 527 | "** What is the email of the person with the following Credit Card Number: 4926535242672853 **" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": 51, 533 | "metadata": {}, 534 | "outputs": [ 535 | { 536 | "data": { 537 | "text/plain": [ 538 | "1234 bondellen@williams-garza.com\n", 539 | "Name: Email, dtype: object" 540 | ] 541 | }, 542 | "execution_count": 51, 543 | "metadata": {}, 544 | "output_type": "execute_result" 545 | } 546 | ], 547 | "source": [ 548 | "ecom[ecom[\"Credit Card\"]==4926535242672853][\"Email\"]" 549 | ] 550 | }, 551 | { 552 | "cell_type": "markdown", 553 | "metadata": {}, 554 | "source": [ 555 | "** How many people have American Express as their Credit Card Provider *and* made a purchase above $95 ?**" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": 62, 561 | "metadata": {}, 562 | "outputs": [ 563 | { 564 | "data": { 565 | "text/plain": [ 566 | "Address 39\n", 567 | "Lot 39\n", 568 | "AM or PM 39\n", 569 | "Browser Info 39\n", 570 | "Company 39\n", 571 | "Credit Card 39\n", 572 | "CC Exp Date 39\n", 573 | "CC Security Code 39\n", 574 | "CC Provider 39\n", 575 | "Email 39\n", 576 | "Job 39\n", 577 | "IP Address 39\n", 578 | "Language 39\n", 579 | "Purchase Price 39\n", 580 | "dtype: int64" 581 | ] 582 | }, 583 | "execution_count": 62, 584 | "metadata": {}, 585 | "output_type": "execute_result" 586 | } 587 | ], 588 | "source": [ 589 | "ecom[(ecom['CC Provider']==\"American Express\") & (ecom['Purchase Price']>95)].count()" 590 | ] 591 | }, 592 | { 593 | "cell_type": "markdown", 594 | "metadata": {}, 595 | "source": [ 596 | "** Hard: How many people have a credit card that expires in 2025? **" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 98, 602 | "metadata": {}, 603 | "outputs": [ 604 | { 605 | "data": { 606 | "text/plain": [ 607 | "1033" 608 | ] 609 | }, 610 | "execution_count": 98, 611 | "metadata": {}, 612 | "output_type": "execute_result" 613 | } 614 | ], 615 | "source": [ 616 | "sum(ecom['CC Exp Date'].apply(lambda exp:exp[3:] == \"25\"))" 617 | ] 618 | }, 619 | { 620 | "cell_type": "markdown", 621 | "metadata": {}, 622 | "source": [ 623 | "** Hard: What are the top 5 most popular email providers/hosts (e.g. gmail.com, yahoo.com, etc...) **" 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "execution_count": 105, 629 | "metadata": {}, 630 | "outputs": [ 631 | { 632 | "data": { 633 | "text/plain": [ 634 | "hotmail.com 1638\n", 635 | "yahoo.com 1616\n", 636 | "gmail.com 1605\n", 637 | "smith.com 42\n", 638 | "williams.com 37\n", 639 | "Name: Email, dtype: int64" 640 | ] 641 | }, 642 | "execution_count": 105, 643 | "metadata": {}, 644 | "output_type": "execute_result" 645 | } 646 | ], 647 | "source": [ 648 | "ecom[\"Email\"].apply(lambda exp:exp.split('@')[1]).value_counts().head()" 649 | ] 650 | }, 651 | { 652 | "cell_type": "markdown", 653 | "metadata": {}, 654 | "source": [ 655 | "# Great Job!" 656 | ] 657 | } 658 | ], 659 | "metadata": { 660 | "kernelspec": { 661 | "display_name": "Python 3", 662 | "language": "python", 663 | "name": "python3" 664 | }, 665 | "language_info": { 666 | "codemirror_mode": { 667 | "name": "ipython", 668 | "version": 3 669 | }, 670 | "file_extension": ".py", 671 | "mimetype": "text/x-python", 672 | "name": "python", 673 | "nbconvert_exporter": "python", 674 | "pygments_lexer": "ipython3", 675 | "version": "3.6.5" 676 | } 677 | }, 678 | "nbformat": 4, 679 | "nbformat_minor": 1 680 | } 681 | -------------------------------------------------------------------------------- /Introduction to Pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "source": [ 19 | "# Introduction to Pandas\n", 20 | "\n", 21 | "In this section of the course we will learn how to use pandas for data analysis. You can think of pandas as an extremely powerful version of Excel, with a lot more features. In this section of the course, you should go through the notebooks in this order:\n", 22 | "\n", 23 | "* Introduction to Pandas\n", 24 | "* Series\n", 25 | "* DataFrames\n", 26 | "* Missing Data\n", 27 | "* GroupBy\n", 28 | "* Merging,Joining,and Concatenating\n", 29 | "* Operations\n", 30 | "* Data Input and Output" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "___" 38 | ] 39 | } 40 | ], 41 | "metadata": { 42 | "kernelspec": { 43 | "display_name": "Python 3", 44 | "language": "python", 45 | "name": "python3" 46 | }, 47 | "language_info": { 48 | "codemirror_mode": { 49 | "name": "ipython", 50 | "version": 3 51 | }, 52 | "file_extension": ".py", 53 | "mimetype": "text/x-python", 54 | "name": "python", 55 | "nbconvert_exporter": "python", 56 | "pygments_lexer": "ipython3", 57 | "version": "3.6.5" 58 | } 59 | }, 60 | "nbformat": 4, 61 | "nbformat_minor": 1 62 | } 63 | -------------------------------------------------------------------------------- /Missing Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Missing Data\n", 18 | "\n", 19 | "Let's show a few convenient methods to deal with Missing Data in pandas:" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import numpy as np\n", 31 | "import pandas as pd" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 9, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "df = pd.DataFrame({'A':[1,2,np.nan],\n", 43 | " 'B':[5,np.nan,np.nan],\n", 44 | " 'C':[1,2,3]})" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 10, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/html": [ 57 | "
\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | "
ABC
01.05.01
12.0NaN2
2NaNNaN3
\n", 88 | "
" 89 | ], 90 | "text/plain": [ 91 | " A B C\n", 92 | "0 1.0 5.0 1\n", 93 | "1 2.0 NaN 2\n", 94 | "2 NaN NaN 3" 95 | ] 96 | }, 97 | "execution_count": 10, 98 | "metadata": {}, 99 | "output_type": "execute_result" 100 | } 101 | ], 102 | "source": [ 103 | "df" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 12, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/html": [ 116 | "
\n", 117 | "\n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | "
ABC
01.05.01
\n", 135 | "
" 136 | ], 137 | "text/plain": [ 138 | " A B C\n", 139 | "0 1.0 5.0 1" 140 | ] 141 | }, 142 | "execution_count": 12, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "df.dropna()" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 13, 154 | "metadata": { 155 | "collapsed": false 156 | }, 157 | "outputs": [ 158 | { 159 | "data": { 160 | "text/html": [ 161 | "
\n", 162 | "\n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | "
C
01
12
23
\n", 184 | "
" 185 | ], 186 | "text/plain": [ 187 | " C\n", 188 | "0 1\n", 189 | "1 2\n", 190 | "2 3" 191 | ] 192 | }, 193 | "execution_count": 13, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "df.dropna(axis=1)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 14, 205 | "metadata": { 206 | "collapsed": false 207 | }, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/html": [ 212 | "
\n", 213 | "\n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | "
ABC
01.05.01
12.0NaN2
\n", 237 | "
" 238 | ], 239 | "text/plain": [ 240 | " A B C\n", 241 | "0 1.0 5.0 1\n", 242 | "1 2.0 NaN 2" 243 | ] 244 | }, 245 | "execution_count": 14, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "df.dropna(thresh=2)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 15, 257 | "metadata": { 258 | "collapsed": false 259 | }, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/html": [ 264 | "
\n", 265 | "\n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | "
ABC
0151
12FILL VALUE2
2FILL VALUEFILL VALUE3
\n", 295 | "
" 296 | ], 297 | "text/plain": [ 298 | " A B C\n", 299 | "0 1 5 1\n", 300 | "1 2 FILL VALUE 2\n", 301 | "2 FILL VALUE FILL VALUE 3" 302 | ] 303 | }, 304 | "execution_count": 15, 305 | "metadata": {}, 306 | "output_type": "execute_result" 307 | } 308 | ], 309 | "source": [ 310 | "df.fillna(value='FILL VALUE')" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 17, 316 | "metadata": { 317 | "collapsed": false 318 | }, 319 | "outputs": [ 320 | { 321 | "data": { 322 | "text/plain": [ 323 | "0 1.0\n", 324 | "1 2.0\n", 325 | "2 1.5\n", 326 | "Name: A, dtype: float64" 327 | ] 328 | }, 329 | "execution_count": 17, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "df['A'].fillna(value=df['A'].mean())" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "# Great Job!" 343 | ] 344 | } 345 | ], 346 | "metadata": { 347 | "kernelspec": { 348 | "display_name": "Python 3", 349 | "language": "python", 350 | "name": "python3" 351 | }, 352 | "language_info": { 353 | "codemirror_mode": { 354 | "name": "ipython", 355 | "version": 3 356 | }, 357 | "file_extension": ".py", 358 | "mimetype": "text/x-python", 359 | "name": "python", 360 | "nbconvert_exporter": "python", 361 | "pygments_lexer": "ipython3", 362 | "version": "3.5.1" 363 | } 364 | }, 365 | "nbformat": 4, 366 | "nbformat_minor": 0 367 | } 368 | -------------------------------------------------------------------------------- /Pandas DataFrame Notes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakankshaws/Pandas-exercises/372db10707b9fd95961245f9e440ed7fc2248b9b/Pandas DataFrame Notes.pdf -------------------------------------------------------------------------------- /Pandas_Cheat_Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aakankshaws/Pandas-exercises/372db10707b9fd95961245f9e440ed7fc2248b9b/Pandas_Cheat_Sheet.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pandas-exercises 2 | Collection of pandas exercises from various sources 3 | 4 | # PDF 5 | Cheat sheet and notes on everything you need to know about pandas 6 | -------------------------------------------------------------------------------- /Series.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___\n", 11 | "# Series" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "The first main data type we will learn about for pandas is the Series data type. Let's import Pandas and explore the Series object.\n", 19 | "\n", 20 | "A Series is very similar to a NumPy array (in fact it is built on top of the NumPy array object). What differentiates the NumPy array from a Series, is that a Series can have axis labels, meaning it can be indexed by a label, instead of just a number location. It also doesn't need to hold numeric data, it can hold any arbitrary Python Object.\n", 21 | "\n", 22 | "Let's explore this concept through some examples:" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "import numpy as np\n", 34 | "import pandas as pd" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "### Creating a Series\n", 42 | "\n", 43 | "You can convert a list,numpy array, or dictionary to a Series:" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": { 50 | "collapsed": true 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "labels = ['a','b','c']\n", 55 | "my_list = [10,20,30]\n", 56 | "arr = np.array([10,20,30])\n", 57 | "d = {'a':10,'b':20,'c':30}" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "** Using Lists**" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 4, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "data": { 74 | "text/plain": [ 75 | "0 10\n", 76 | "1 20\n", 77 | "2 30\n", 78 | "dtype: int64" 79 | ] 80 | }, 81 | "execution_count": 4, 82 | "metadata": {}, 83 | "output_type": "execute_result" 84 | } 85 | ], 86 | "source": [ 87 | "pd.Series(data=my_list)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 5, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/plain": [ 98 | "a 10\n", 99 | "b 20\n", 100 | "c 30\n", 101 | "dtype: int64" 102 | ] 103 | }, 104 | "execution_count": 5, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "pd.Series(data=my_list,index=labels)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 6, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "a 10\n", 122 | "b 20\n", 123 | "c 30\n", 124 | "dtype: int64" 125 | ] 126 | }, 127 | "execution_count": 6, 128 | "metadata": {}, 129 | "output_type": "execute_result" 130 | } 131 | ], 132 | "source": [ 133 | "pd.Series(my_list,labels)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "** NumPy Arrays **" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 7, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "data": { 150 | "text/plain": [ 151 | "0 10\n", 152 | "1 20\n", 153 | "2 30\n", 154 | "dtype: int64" 155 | ] 156 | }, 157 | "execution_count": 7, 158 | "metadata": {}, 159 | "output_type": "execute_result" 160 | } 161 | ], 162 | "source": [ 163 | "pd.Series(arr)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 8, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "data": { 173 | "text/plain": [ 174 | "a 10\n", 175 | "b 20\n", 176 | "c 30\n", 177 | "dtype: int64" 178 | ] 179 | }, 180 | "execution_count": 8, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "pd.Series(arr,labels)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "** Dictionary**" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 9, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "a 10\n", 205 | "b 20\n", 206 | "c 30\n", 207 | "dtype: int64" 208 | ] 209 | }, 210 | "execution_count": 9, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "pd.Series(d)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "### Data in a Series\n", 224 | "\n", 225 | "A pandas Series can hold a variety of object types:" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 10, 231 | "metadata": {}, 232 | "outputs": [ 233 | { 234 | "data": { 235 | "text/plain": [ 236 | "0 a\n", 237 | "1 b\n", 238 | "2 c\n", 239 | "dtype: object" 240 | ] 241 | }, 242 | "execution_count": 10, 243 | "metadata": {}, 244 | "output_type": "execute_result" 245 | } 246 | ], 247 | "source": [ 248 | "pd.Series(data=labels)" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 11, 254 | "metadata": {}, 255 | "outputs": [ 256 | { 257 | "data": { 258 | "text/plain": [ 259 | "0 \n", 260 | "1 \n", 261 | "2 \n", 262 | "dtype: object" 263 | ] 264 | }, 265 | "execution_count": 11, 266 | "metadata": {}, 267 | "output_type": "execute_result" 268 | } 269 | ], 270 | "source": [ 271 | "# Even functions (although unlikely that you will use this)\n", 272 | "pd.Series([sum,print,len])" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "## Using an Index\n", 280 | "\n", 281 | "The key to using a Series is understanding its index. Pandas makes use of these index names or numbers by allowing for fast look ups of information (works like a hash table or dictionary).\n", 282 | "\n", 283 | "Let's see some examples of how to grab information from a Series. Let us create two sereis, ser1 and ser2:" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 12, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "ser1 = pd.Series([1,2,3,4],index = ['USA', 'Germany','USSR', 'Japan']) " 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 13, 298 | "metadata": {}, 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/plain": [ 303 | "USA 1\n", 304 | "Germany 2\n", 305 | "USSR 3\n", 306 | "Japan 4\n", 307 | "dtype: int64" 308 | ] 309 | }, 310 | "execution_count": 13, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "ser1" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 14, 322 | "metadata": { 323 | "collapsed": true 324 | }, 325 | "outputs": [], 326 | "source": [ 327 | "ser2 = pd.Series([1,2,5,4],index = ['USA', 'Germany','Italy', 'Japan']) " 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 15, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "data": { 337 | "text/plain": [ 338 | "USA 1\n", 339 | "Germany 2\n", 340 | "Italy 5\n", 341 | "Japan 4\n", 342 | "dtype: int64" 343 | ] 344 | }, 345 | "execution_count": 15, 346 | "metadata": {}, 347 | "output_type": "execute_result" 348 | } 349 | ], 350 | "source": [ 351 | "ser2" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 16, 357 | "metadata": {}, 358 | "outputs": [ 359 | { 360 | "data": { 361 | "text/plain": [ 362 | "1" 363 | ] 364 | }, 365 | "execution_count": 16, 366 | "metadata": {}, 367 | "output_type": "execute_result" 368 | } 369 | ], 370 | "source": [ 371 | "ser1['USA']" 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": {}, 377 | "source": [ 378 | "Operations are then also done based off of index:" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": 17, 384 | "metadata": {}, 385 | "outputs": [ 386 | { 387 | "data": { 388 | "text/plain": [ 389 | "Germany 4.0\n", 390 | "Italy NaN\n", 391 | "Japan 8.0\n", 392 | "USA 2.0\n", 393 | "USSR NaN\n", 394 | "dtype: float64" 395 | ] 396 | }, 397 | "execution_count": 17, 398 | "metadata": {}, 399 | "output_type": "execute_result" 400 | } 401 | ], 402 | "source": [ 403 | "ser1 + ser2" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "metadata": {}, 409 | "source": [ 410 | "Let's stop here for now and move on to DataFrames, which will expand on the concept of Series!\n", 411 | "# Great Job!" 412 | ] 413 | } 414 | ], 415 | "metadata": { 416 | "kernelspec": { 417 | "display_name": "Python 3", 418 | "language": "python", 419 | "name": "python3" 420 | }, 421 | "language_info": { 422 | "codemirror_mode": { 423 | "name": "ipython", 424 | "version": 3 425 | }, 426 | "file_extension": ".py", 427 | "mimetype": "text/x-python", 428 | "name": "python", 429 | "nbconvert_exporter": "python", 430 | "pygments_lexer": "ipython3", 431 | "version": "3.6.5" 432 | } 433 | }, 434 | "nbformat": 4, 435 | "nbformat_minor": 1 436 | } 437 | -------------------------------------------------------------------------------- /apply/US_Crime_Rates_1960_2014.csv: -------------------------------------------------------------------------------- 1 | Year,Population,Total,Violent,Property,Murder,Forcible_Rape,Robbery,Aggravated_assault,Burglary,Larceny_Theft,Vehicle_Theft 1960,179323175,3384200,288460,3095700,9110,17190,107840,154320,912100,1855400,328200 1961,182992000,3488000,289390,3198600,8740,17220,106670,156760,949600,1913000,336000 1962,185771000,3752200,301510,3450700,8530,17550,110860,164570,994300,2089600,366800 1963,188483000,4109500,316970,3792500,8640,17650,116470,174210,1086400,2297800,408300 1964,191141000,4564600,364220,4200400,9360,21420,130390,203050,1213200,2514400,472800 1965,193526000,4739400,387390,4352000,9960,23410,138690,215330,1282500,2572600,496900 1966,195576000,5223500,430180,4793300,11040,25820,157990,235330,1410100,2822000,561200 1967,197457000,5903400,499930,5403500,12240,27620,202910,257160,1632100,3111600,659800 1968,199399000,6720200,595010,6125200,13800,31670,262840,286700,1858900,3482700,783600 1969,201385000,7410900,661870,6749000,14760,37170,298850,311090,1981900,3888600,878500 1970,203235298,8098000,738820,7359200,16000,37990,349860,334970,2205000,4225800,928400 1971,206212000,8588200,816500,7771700,17780,42260,387700,368760,2399300,4424200,948200 1972,208230000,8248800,834900,7413900,18670,46850,376290,393090,2375500,4151200,887200 1973,209851000,8718100,875910,7842200,19640,51400,384220,420650,2565500,4347900,928800 1974,211392000,10253400,974720,9278700,20710,55400,442400,456210,3039200,5262500,977100 1975,213124000,11292400,1039710,10252700,20510,56090,470500,492620,3265300,5977700,1009600 1976,214659000,11349700,1004210,10345500,18780,57080,427810,500530,3108700,6270800,966000 1977,216332000,10984500,1029580,9955000,19120,63500,412610,534350,3071500,5905700,977700 1978,218059000,11209000,1085550,10123400,19560,67610,426930,571460,3128300,5991000,1004100 1979,220099000,12249500,1208030,11041500,21460,76390,480700,629480,3327700,6601000,1112800 1980,225349264,13408300,1344520,12063700,23040,82990,565840,672650,3795200,7136900,1131700 1981,229146000,13423800,1361820,12061900,22520,82500,592910,663900,3779700,7194400,1087800 1982,231534000,12974400,1322390,11652000,21010,78770,553130,669480,3447100,7142500,1062400 1983,233981000,12108600,1258090,10850500,19310,78920,506570,653290,3129900,6712800,1007900 1984,236158000,11881800,1273280,10608500,18690,84230,485010,685350,2984400,6591900,1032200 1985,238740000,12431400,1328800,11102600,18980,88670,497870,723250,3073300,6926400,1102900 1986,240132887,13211869,1489169,11722700,20613,91459,542775,834322,3241410,7257153,1224137 1987,242282918,13508700,1483999,12024700,20096,91110,517704,855088,3236184,7499900,1288674 1988,245807000,13923100,1566220,12356900,20680,92490,542970,910090,3218100,7705900,1432900 1989,248239000,14251400,1646040,12605400,21500,94500,578330,951710,3168200,7872400,1564800 1990,248709873,14475600,1820130,12655500,23440,102560,639270,1054860,3073900,7945700,1635900 1991,252177000,14872900,1911770,12961100,24700,106590,687730,1092740,3157200,8142200,1661700 1992,255082000,14438200,1932270,12505900,23760,109060,672480,1126970,2979900,7915200,1610800 1993,257908000,14144800,1926020,12218800,24530,106010,659870,1135610,2834800,7820900,1563100 1994,260341000,13989500,1857670,12131900,23330,102220,618950,1113180,2712800,7879800,1539300 1995,262755000,13862700,1798790,12063900,21610,97470,580510,1099210,2593800,7997700,1472400 1996,265228572,13493863,1688540,11805300,19650,96250,535590,1037050,2506400,7904700,1394200 1997,267637000,13194571,1634770,11558175,18208,96153,498534,1023201,2460526,7743760,1354189 1998,270296000,12475634,1531044,10944590,16914,93103,446625,974402,2329950,7373886,1240754 1999,272690813,11634378,1426044,10208334,15522,89411,409371,911740,2100739,6955520,1152075 2000,281421906,11608072,1425486,10182586,15586,90178,408016,911706,2050992,6971590,1160002 2001,285317559,11876669,1439480,10437480,16037,90863,423557,909023,2116531,7092267,1228391 2002,287973924,11878954,1423677,10455277,16229,95235,420806,891407,2151252,7057370,1246646 2003,290690788,11826538,1383676,10442862,16528,93883,414235,859030,2154834,7026802,1261226 2004,293656842,11679474,1360088,10319386,16148,95089,401470,847381,2144446,6937089,1237851 2005,296507061,11565499,1390745,10174754,16740,94347,417438,862220,2155448,6783447,1235859 2006,299398484,11401511,1418043,9983568,17030,92757,447403,860853,2183746,6607013,1192809 2007,301621157,11251828,1408337,9843481,16929,90427,445125,855856,2176140,6568572,1095769 2008,304374846,11160543,1392628,9767915,16442,90479,443574,842134,2228474,6588046,958629 2009,307006550,10762956,1325896,9337060,15399,89241,408742,812514,2203313,6338095,795652 2010,309330219,10363873,1251248,9112625,14772,85593,369089,781844,2168457,6204601,739565 2011,311587816,10258774,1206031,9052743,14661,84175,354772,752423,2185140,6151095,716508 2012,313873685,10219059,1217067,9001992,14866,85141,355051,762009,2109932,6168874,723186 2013,316497531,9850445,1199684,8650761,14319,82109,345095,726575,1931835,6018632,700294 2014,318857056,9475816,1197987,8277829,14249,84041,325802,741291,1729806,5858496,689527 -------------------------------------------------------------------------------- /apply/alcohol.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Regiment" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "Special thanks to: http://chrisalbon.com/ for sharing the dataset and materials.\n", 17 | "\n", 18 | "### Step 1. Import the necessary libraries" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import pandas as pd" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Step 2. Create the DataFrame with the following values:" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'], \n", 44 | " 'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'], \n", 45 | " 'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'], \n", 46 | " 'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],\n", 47 | " 'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "### Step 3. Assign it to a variable called regiment.\n", 55 | "#### Don't forget to name each column" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 50, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/html": [ 66 | "
\n", 67 | "\n", 80 | "\n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | "
regimentcompanynamepreTestScorepostTestScore
0Nighthawks1stMiller425
1Nighthawks1stJacobson2494
2Nighthawks2ndAli3157
3Nighthawks2ndMilner262
4Dragoons1stCooze370
\n", 134 | "
" 135 | ], 136 | "text/plain": [ 137 | " regiment company name preTestScore postTestScore\n", 138 | "0 Nighthawks 1st Miller 4 25\n", 139 | "1 Nighthawks 1st Jacobson 24 94\n", 140 | "2 Nighthawks 2nd Ali 31 57\n", 141 | "3 Nighthawks 2nd Milner 2 62\n", 142 | "4 Dragoons 1st Cooze 3 70" 143 | ] 144 | }, 145 | "execution_count": 50, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "regiment = pd.DataFrame(raw_data)\n", 152 | "regiment.head()" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "### Step 4. What is the mean preTestScore from the regiment Nighthawks? " 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 43, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "regiment\n", 171 | "Dragoons 15.50\n", 172 | "Nighthawks 15.25\n", 173 | "Scouts 2.50\n", 174 | "Name: preTestScore, dtype: float64" 175 | ] 176 | }, 177 | "execution_count": 43, 178 | "metadata": {}, 179 | "output_type": "execute_result" 180 | } 181 | ], 182 | "source": [ 183 | "regiment.groupby(by=\"regiment\")['preTestScore'].mean()" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "### Step 5. Present general statistics by company" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 44, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "data": { 200 | "text/html": [ 201 | "
\n", 202 | "\n", 219 | "\n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | "
postTestScorepreTestScore
countmeanstdmin25%50%75%maxcountmeanstdmin25%50%75%max
company
1st6.057.66666727.48575425.034.2566.070.094.06.06.6666678.5244752.03.003.54.0024.0
2nd6.067.00000014.05702757.058.2562.068.094.06.015.50000014.6526452.02.2513.529.2531.0
\n", 306 | "
" 307 | ], 308 | "text/plain": [ 309 | " postTestScore \\\n", 310 | " count mean std min 25% 50% 75% max \n", 311 | "company \n", 312 | "1st 6.0 57.666667 27.485754 25.0 34.25 66.0 70.0 94.0 \n", 313 | "2nd 6.0 67.000000 14.057027 57.0 58.25 62.0 68.0 94.0 \n", 314 | "\n", 315 | " preTestScore \n", 316 | " count mean std min 25% 50% 75% max \n", 317 | "company \n", 318 | "1st 6.0 6.666667 8.524475 2.0 3.00 3.5 4.00 24.0 \n", 319 | "2nd 6.0 15.500000 14.652645 2.0 2.25 13.5 29.25 31.0 " 320 | ] 321 | }, 322 | "execution_count": 44, 323 | "metadata": {}, 324 | "output_type": "execute_result" 325 | } 326 | ], 327 | "source": [ 328 | "regiment.groupby(by=\"company\").describe()" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "### Step 6. What is the mean each company's preTestScore?" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 45, 341 | "metadata": {}, 342 | "outputs": [ 343 | { 344 | "data": { 345 | "text/plain": [ 346 | "company\n", 347 | "1st 6.666667\n", 348 | "2nd 15.500000\n", 349 | "Name: preTestScore, dtype: float64" 350 | ] 351 | }, 352 | "execution_count": 45, 353 | "metadata": {}, 354 | "output_type": "execute_result" 355 | } 356 | ], 357 | "source": [ 358 | "regiment.groupby(by=\"company\").preTestScore.mean()" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "### Step 7. Present the mean preTestScores grouped by regiment and company" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 46, 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "data": { 375 | "text/plain": [ 376 | "regiment company\n", 377 | "Dragoons 1st 3.5\n", 378 | " 2nd 27.5\n", 379 | "Nighthawks 1st 14.0\n", 380 | " 2nd 16.5\n", 381 | "Scouts 1st 2.5\n", 382 | " 2nd 2.5\n", 383 | "Name: preTestScore, dtype: float64" 384 | ] 385 | }, 386 | "execution_count": 46, 387 | "metadata": {}, 388 | "output_type": "execute_result" 389 | } 390 | ], 391 | "source": [ 392 | "regiment.groupby(by=['regiment',\"company\"]).preTestScore.mean()" 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "### Step 8. Present the mean preTestScores grouped by regiment and company without heirarchical indexing" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 90, 405 | "metadata": {}, 406 | "outputs": [ 407 | { 408 | "data": { 409 | "text/html": [ 410 | "
\n", 411 | "\n", 424 | "\n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | "
company1st2nd
regiment
Dragoons3.527.5
Nighthawks14.016.5
Scouts2.52.5
\n", 455 | "
" 456 | ], 457 | "text/plain": [ 458 | "company 1st 2nd\n", 459 | "regiment \n", 460 | "Dragoons 3.5 27.5\n", 461 | "Nighthawks 14.0 16.5\n", 462 | "Scouts 2.5 2.5" 463 | ] 464 | }, 465 | "execution_count": 90, 466 | "metadata": {}, 467 | "output_type": "execute_result" 468 | } 469 | ], 470 | "source": [ 471 | "regiment.groupby(by=['regiment',\"company\"]).preTestScore.mean().unstack()" 472 | ] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": {}, 477 | "source": [ 478 | "### Step 9. Group the entire dataframe by regiment and company" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 53, 484 | "metadata": {}, 485 | "outputs": [ 486 | { 487 | "data": { 488 | "text/html": [ 489 | "
\n", 490 | "\n", 503 | "\n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | "
preTestScorepostTestScore
regimentcompany
Dragoons1st3.547.5
2nd27.575.5
Nighthawks1st14.059.5
2nd16.559.5
Scouts1st2.566.0
2nd2.566.0
\n", 554 | "
" 555 | ], 556 | "text/plain": [ 557 | " preTestScore postTestScore\n", 558 | "regiment company \n", 559 | "Dragoons 1st 3.5 47.5\n", 560 | " 2nd 27.5 75.5\n", 561 | "Nighthawks 1st 14.0 59.5\n", 562 | " 2nd 16.5 59.5\n", 563 | "Scouts 1st 2.5 66.0\n", 564 | " 2nd 2.5 66.0" 565 | ] 566 | }, 567 | "execution_count": 53, 568 | "metadata": {}, 569 | "output_type": "execute_result" 570 | } 571 | ], 572 | "source": [ 573 | "regiment.groupby(by=['regiment',\"company\"]).mean()" 574 | ] 575 | }, 576 | { 577 | "cell_type": "markdown", 578 | "metadata": {}, 579 | "source": [ 580 | "### Step 10. What is the number of observations in each regiment and company" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": 64, 586 | "metadata": {}, 587 | "outputs": [ 588 | { 589 | "data": { 590 | "text/plain": [ 591 | "regiment company\n", 592 | "Dragoons 1st 2\n", 593 | " 2nd 2\n", 594 | "Nighthawks 1st 2\n", 595 | " 2nd 2\n", 596 | "Scouts 1st 2\n", 597 | " 2nd 2\n", 598 | "Name: regiment, dtype: int64" 599 | ] 600 | }, 601 | "execution_count": 64, 602 | "metadata": {}, 603 | "output_type": "execute_result" 604 | } 605 | ], 606 | "source": [ 607 | "regiment.groupby(by=['regiment',\"company\"]).regiment.size()" 608 | ] 609 | }, 610 | { 611 | "cell_type": "markdown", 612 | "metadata": {}, 613 | "source": [ 614 | "### Step 11. Iterate over a group and print the name and the whole data from the regiment" 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": 88, 620 | "metadata": {}, 621 | "outputs": [ 622 | { 623 | "name": "stdout", 624 | "output_type": "stream", 625 | "text": [ 626 | "Dragoons\n", 627 | " regiment company name preTestScore postTestScore\n", 628 | "4 Dragoons 1st Cooze 3 70\n", 629 | "5 Dragoons 1st Jacon 4 25\n", 630 | "6 Dragoons 2nd Ryaner 24 94\n", 631 | "7 Dragoons 2nd Sone 31 57\n", 632 | "Nighthawks\n", 633 | " regiment company name preTestScore postTestScore\n", 634 | "0 Nighthawks 1st Miller 4 25\n", 635 | "1 Nighthawks 1st Jacobson 24 94\n", 636 | "2 Nighthawks 2nd Ali 31 57\n", 637 | "3 Nighthawks 2nd Milner 2 62\n", 638 | "Scouts\n", 639 | " regiment company name preTestScore postTestScore\n", 640 | "8 Scouts 1st Sloan 2 62\n", 641 | "9 Scouts 1st Piger 3 70\n", 642 | "10 Scouts 2nd Riani 2 62\n", 643 | "11 Scouts 2nd Ali 3 70\n" 644 | ] 645 | } 646 | ], 647 | "source": [ 648 | "for name,group in regiment.groupby('regiment'):\n", 649 | " print(name)\n", 650 | " print(group)" 651 | ] 652 | }, 653 | { 654 | "cell_type": "code", 655 | "execution_count": null, 656 | "metadata": {}, 657 | "outputs": [], 658 | "source": [] 659 | } 660 | ], 661 | "metadata": { 662 | "kernelspec": { 663 | "display_name": "Python 3", 664 | "language": "python", 665 | "name": "python3" 666 | }, 667 | "language_info": { 668 | "codemirror_mode": { 669 | "name": "ipython", 670 | "version": 3 671 | }, 672 | "file_extension": ".py", 673 | "mimetype": "text/x-python", 674 | "name": "python", 675 | "nbconvert_exporter": "python", 676 | "pygments_lexer": "ipython3", 677 | "version": "3.6.5" 678 | } 679 | }, 680 | "nbformat": 4, 681 | "nbformat_minor": 1 682 | } 683 | -------------------------------------------------------------------------------- /data/chiptole.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ex2 - Getting and Knowing your Data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This time we are going to pull data directly from the internet.\n", 15 | "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", 16 | "\n", 17 | "### Step 1. Import the necessary libraries" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 1, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import numpy as np\n", 27 | "import pandas as pd \n", 28 | "import requests" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). " 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "### Step 3. Assign it to a variable called chipo." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 5, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "url = \"https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv?_sm_au_=isHJFpf7VqD5L7k4\"" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "### Step 4. See the first 10 entries" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 9, 64 | "metadata": { 65 | "scrolled": false 66 | }, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/html": [ 71 | "
\n", 72 | "\n", 85 | "\n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | "
order_idquantityitem_namechoice_descriptionitem_price
011Chips and Fresh Tomato SalsaNaN$2.39
111Izze[Clementine]$3.39
211Nantucket Nectar[Apple]$3.39
311Chips and Tomatillo-Green Chili SalsaNaN$2.39
422Chicken Bowl[Tomatillo-Red Chili Salsa (Hot), [Black Beans...$16.98
531Chicken Bowl[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...$10.98
631Side of ChipsNaN$1.69
741Steak Burrito[Tomatillo Red Chili Salsa, [Fajita Vegetables...$11.75
841Steak Soft Tacos[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...$9.25
951Steak Burrito[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...$9.25
\n", 179 | "
" 180 | ], 181 | "text/plain": [ 182 | " order_id quantity item_name \\\n", 183 | "0 1 1 Chips and Fresh Tomato Salsa \n", 184 | "1 1 1 Izze \n", 185 | "2 1 1 Nantucket Nectar \n", 186 | "3 1 1 Chips and Tomatillo-Green Chili Salsa \n", 187 | "4 2 2 Chicken Bowl \n", 188 | "5 3 1 Chicken Bowl \n", 189 | "6 3 1 Side of Chips \n", 190 | "7 4 1 Steak Burrito \n", 191 | "8 4 1 Steak Soft Tacos \n", 192 | "9 5 1 Steak Burrito \n", 193 | "\n", 194 | " choice_description item_price \n", 195 | "0 NaN $2.39 \n", 196 | "1 [Clementine] $3.39 \n", 197 | "2 [Apple] $3.39 \n", 198 | "3 NaN $2.39 \n", 199 | "4 [Tomatillo-Red Chili Salsa (Hot), [Black Beans... $16.98 \n", 200 | "5 [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou... $10.98 \n", 201 | "6 NaN $1.69 \n", 202 | "7 [Tomatillo Red Chili Salsa, [Fajita Vegetables... $11.75 \n", 203 | "8 [Tomatillo Green Chili Salsa, [Pinto Beans, Ch... $9.25 \n", 204 | "9 [Fresh Tomato Salsa, [Rice, Black Beans, Pinto... $9.25 " 205 | ] 206 | }, 207 | "execution_count": 9, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "chipo = pd.read_csv(url, sep=\" \", delimiter='\\t')\n", 214 | "\n", 215 | "chipo.head(10)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "### Step 5. What is the number of observations in the dataset?" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 10, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "name": "stdout", 232 | "output_type": "stream", 233 | "text": [ 234 | "\n", 235 | "RangeIndex: 4622 entries, 0 to 4621\n", 236 | "Data columns (total 5 columns):\n", 237 | "order_id 4622 non-null int64\n", 238 | "quantity 4622 non-null int64\n", 239 | "item_name 4622 non-null object\n", 240 | "choice_description 3376 non-null object\n", 241 | "item_price 4622 non-null object\n", 242 | "dtypes: int64(2), object(3)\n", 243 | "memory usage: 180.6+ KB\n" 244 | ] 245 | } 246 | ], 247 | "source": [ 248 | "# Solution 1\n", 249 | "chipo.info()\n" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 12, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "data": { 259 | "text/plain": [ 260 | "4622" 261 | ] 262 | }, 263 | "execution_count": 12, 264 | "metadata": {}, 265 | "output_type": "execute_result" 266 | } 267 | ], 268 | "source": [ 269 | "# Solution 2\n", 270 | "chipo.count()[1]\n" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "### Step 6. What is the number of columns in the dataset?" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 13, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "data": { 287 | "text/plain": [ 288 | "5" 289 | ] 290 | }, 291 | "execution_count": 13, 292 | "metadata": {}, 293 | "output_type": "execute_result" 294 | } 295 | ], 296 | "source": [ 297 | "len(chipo.columns)" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "### Step 7. Print the name of all the columns." 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 14, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "data": { 314 | "text/plain": [ 315 | "Index(['order_id', 'quantity', 'item_name', 'choice_description',\n", 316 | " 'item_price'],\n", 317 | " dtype='object')" 318 | ] 319 | }, 320 | "execution_count": 14, 321 | "metadata": {}, 322 | "output_type": "execute_result" 323 | } 324 | ], 325 | "source": [ 326 | "chipo.columns" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "### Step 8. How is the dataset indexed?" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 15, 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "data": { 343 | "text/plain": [ 344 | "RangeIndex(start=0, stop=4622, step=1)" 345 | ] 346 | }, 347 | "execution_count": 15, 348 | "metadata": {}, 349 | "output_type": "execute_result" 350 | } 351 | ], 352 | "source": [ 353 | "chipo.index" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "### Step 9. Which was the most-ordered item? " 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 32, 366 | "metadata": {}, 367 | "outputs": [ 368 | { 369 | "data": { 370 | "text/plain": [ 371 | "Chicken Bowl 726\n", 372 | "Name: item_name, dtype: int64" 373 | ] 374 | }, 375 | "execution_count": 32, 376 | "metadata": {}, 377 | "output_type": "execute_result" 378 | } 379 | ], 380 | "source": [ 381 | "chipo['item_name'].value_counts().head(1)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "### Step 10. For the most-ordered item, how many items were ordered?" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": 29, 394 | "metadata": {}, 395 | "outputs": [ 396 | { 397 | "data": { 398 | "text/plain": [ 399 | "Chicken Bowl 726\n", 400 | "Name: item_name, dtype: int64" 401 | ] 402 | }, 403 | "execution_count": 29, 404 | "metadata": {}, 405 | "output_type": "execute_result" 406 | } 407 | ], 408 | "source": [ 409 | "chipo['item_name'].value_counts().head(1)" 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": {}, 415 | "source": [ 416 | "### Step 11. What was the most ordered item in the choice_description column?" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 38, 422 | "metadata": {}, 423 | "outputs": [ 424 | { 425 | "data": { 426 | "text/plain": [ 427 | "[Diet Coke] 134\n", 428 | "Name: choice_description, dtype: int64" 429 | ] 430 | }, 431 | "execution_count": 38, 432 | "metadata": {}, 433 | "output_type": "execute_result" 434 | } 435 | ], 436 | "source": [ 437 | "chipo['choice_description'].value_counts().head(1)" 438 | ] 439 | }, 440 | { 441 | "cell_type": "markdown", 442 | "metadata": {}, 443 | "source": [ 444 | "### Step 12. How many items were orderd in total?" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 39, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "data": { 454 | "text/plain": [ 455 | "4622" 456 | ] 457 | }, 458 | "execution_count": 39, 459 | "metadata": {}, 460 | "output_type": "execute_result" 461 | } 462 | ], 463 | "source": [ 464 | "chipo.count()[1]" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "### Step 13. Turn the item price into a float" 472 | ] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": {}, 477 | "source": [ 478 | "#### Step 13.a. Check the item price type" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 52, 484 | "metadata": {}, 485 | "outputs": [ 486 | { 487 | "data": { 488 | "text/plain": [ 489 | "dtype('O')" 490 | ] 491 | }, 492 | "execution_count": 52, 493 | "metadata": {}, 494 | "output_type": "execute_result" 495 | } 496 | ], 497 | "source": [ 498 | "chipo['item_price'].dtype" 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "metadata": {}, 504 | "source": [ 505 | "#### Step 13.b. Create a lambda function and change the type of item price" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 91, 511 | "metadata": {}, 512 | "outputs": [], 513 | "source": [ 514 | "#chipo['item_price'] = chipo['item_price'].replace('$','',regex=True).astype('float')\n", 515 | "#chipo['item_price'].apply(lambda exp:float(exp[3:4]))\n", 516 | "dollarizer = lambda x: float(x[1:-1])\n", 517 | "chipo.item_price = chipo.item_price.apply(dollarizer)" 518 | ] 519 | }, 520 | { 521 | "cell_type": "markdown", 522 | "metadata": {}, 523 | "source": [ 524 | "#### Step 13.c. Check the item price type" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": 92, 530 | "metadata": {}, 531 | "outputs": [ 532 | { 533 | "data": { 534 | "text/plain": [ 535 | "dtype('float64')" 536 | ] 537 | }, 538 | "execution_count": 92, 539 | "metadata": {}, 540 | "output_type": "execute_result" 541 | } 542 | ], 543 | "source": [ 544 | "chipo['item_price'].dtype" 545 | ] 546 | }, 547 | { 548 | "cell_type": "markdown", 549 | "metadata": {}, 550 | "source": [ 551 | "### Step 14. How much was the revenue for the period in the dataset?" 552 | ] 553 | }, 554 | { 555 | "cell_type": "code", 556 | "execution_count": 100, 557 | "metadata": {}, 558 | "outputs": [ 559 | { 560 | "name": "stdout", 561 | "output_type": "stream", 562 | "text": [ 563 | "Revenue was: $16855.08\n" 564 | ] 565 | } 566 | ], 567 | "source": [ 568 | "revenue = (chipo['quantity']* chipo['item_price']).sum()\n", 569 | "\n", 570 | "print('Revenue was: $' + str(np.round(revenue,2)))" 571 | ] 572 | }, 573 | { 574 | "cell_type": "markdown", 575 | "metadata": {}, 576 | "source": [ 577 | "### Step 15. How many orders were made in the period?" 578 | ] 579 | }, 580 | { 581 | "cell_type": "code", 582 | "execution_count": 103, 583 | "metadata": {}, 584 | "outputs": [ 585 | { 586 | "data": { 587 | "text/plain": [ 588 | "1834" 589 | ] 590 | }, 591 | "execution_count": 103, 592 | "metadata": {}, 593 | "output_type": "execute_result" 594 | } 595 | ], 596 | "source": [ 597 | "chipo.order_id.value_counts().count()" 598 | ] 599 | }, 600 | { 601 | "cell_type": "markdown", 602 | "metadata": {}, 603 | "source": [ 604 | "### Step 16. What is the average amount per order?" 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": 111, 610 | "metadata": {}, 611 | "outputs": [ 612 | { 613 | "data": { 614 | "text/plain": [ 615 | "8.543391494002332" 616 | ] 617 | }, 618 | "execution_count": 111, 619 | "metadata": {}, 620 | "output_type": "execute_result" 621 | } 622 | ], 623 | "source": [ 624 | "# Solution 1\n", 625 | "chipo.groupby(by=['order_id']).sum().mean()['item_price']" 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": 112, 631 | "metadata": {}, 632 | "outputs": [ 633 | { 634 | "data": { 635 | "text/plain": [ 636 | "8.543391494002332" 637 | ] 638 | }, 639 | "execution_count": 112, 640 | "metadata": {}, 641 | "output_type": "execute_result" 642 | } 643 | ], 644 | "source": [ 645 | "# Solution 2\n", 646 | "chipo['revenue'] = chipo['quantity'] * chipo['item_price']\n", 647 | "order_grouped = chipo.groupby(by=['order_id']).sum()\n", 648 | "order_grouped.mean()['item_price']" 649 | ] 650 | }, 651 | { 652 | "cell_type": "markdown", 653 | "metadata": {}, 654 | "source": [ 655 | "### Step 17. How many different items are sold?" 656 | ] 657 | }, 658 | { 659 | "cell_type": "code", 660 | "execution_count": 118, 661 | "metadata": {}, 662 | "outputs": [ 663 | { 664 | "data": { 665 | "text/plain": [ 666 | "50" 667 | ] 668 | }, 669 | "execution_count": 118, 670 | "metadata": {}, 671 | "output_type": "execute_result" 672 | } 673 | ], 674 | "source": [ 675 | "chipo.item_name.value_counts().count()" 676 | ] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": null, 681 | "metadata": {}, 682 | "outputs": [], 683 | "source": [] 684 | } 685 | ], 686 | "metadata": { 687 | "anaconda-cloud": {}, 688 | "kernelspec": { 689 | "display_name": "Python 3", 690 | "language": "python", 691 | "name": "python3" 692 | }, 693 | "language_info": { 694 | "codemirror_mode": { 695 | "name": "ipython", 696 | "version": 3 697 | }, 698 | "file_extension": ".py", 699 | "mimetype": "text/x-python", 700 | "name": "python", 701 | "nbconvert_exporter": "python", 702 | "pygments_lexer": "ipython3", 703 | "version": "3.6.5" 704 | } 705 | }, 706 | "nbformat": 4, 707 | "nbformat_minor": 1 708 | } 709 | -------------------------------------------------------------------------------- /grouping/alcohol.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ex - GroupBy" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "GroupBy can be summarizes as Split-Apply-Combine.\n", 17 | "\n", 18 | "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n", 19 | "\n", 20 | "Check out this [Diagram](http://i.imgur.com/yjNkiwL.png) \n", 21 | "### Step 1. Import the necessary libraries" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv). " 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### Step 3. Assign it to a variable called drinks." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 3, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/html": [ 55 | "
\n", 56 | "\n", 69 | "\n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | "
countrybeer_servingsspirit_servingswine_servingstotal_litres_of_pure_alcoholcontinent
0Afghanistan0000.0AS
1Albania89132544.9EU
2Algeria250140.7AF
3Andorra24513831212.4EU
4Angola21757455.9AF
\n", 129 | "
" 130 | ], 131 | "text/plain": [ 132 | " country beer_servings spirit_servings wine_servings \\\n", 133 | "0 Afghanistan 0 0 0 \n", 134 | "1 Albania 89 132 54 \n", 135 | "2 Algeria 25 0 14 \n", 136 | "3 Andorra 245 138 312 \n", 137 | "4 Angola 217 57 45 \n", 138 | "\n", 139 | " total_litres_of_pure_alcohol continent \n", 140 | "0 0.0 AS \n", 141 | "1 4.9 EU \n", 142 | "2 0.7 AF \n", 143 | "3 12.4 EU \n", 144 | "4 5.9 AF " 145 | ] 146 | }, 147 | "execution_count": 3, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "drinks = pd.read_csv(\"https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv\")\n", 154 | "drinks.head()" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "### Step 4. Which continent drinks more beer on average?" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 19, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "continent\n", 173 | "AF 61.471698\n", 174 | "AS 37.045455\n", 175 | "EU 193.777778\n", 176 | "OC 89.687500\n", 177 | "SA 175.083333\n", 178 | "Name: beer_servings, dtype: float64" 179 | ] 180 | }, 181 | "execution_count": 19, 182 | "metadata": {}, 183 | "output_type": "execute_result" 184 | } 185 | ], 186 | "source": [ 187 | "drinks.groupby(by='continent').beer_servings.mean()" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "### Step 5. For each continent print the statistics for wine consumption." 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 22, 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/html": [ 205 | "
\n", 206 | "\n", 219 | "\n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | "
countmeanstdmin25%50%75%max
continent
AF53.016.26415138.8464190.01.02.013.00233.0
AS44.09.06818221.6670340.00.01.08.00123.0
EU45.0142.22222297.4217380.059.0128.0195.00370.0
OC16.035.62500064.5557900.01.08.523.25212.0
SA12.062.41666788.6201891.03.012.098.50221.0
\n", 302 | "
" 303 | ], 304 | "text/plain": [ 305 | " count mean std min 25% 50% 75% max\n", 306 | "continent \n", 307 | "AF 53.0 16.264151 38.846419 0.0 1.0 2.0 13.00 233.0\n", 308 | "AS 44.0 9.068182 21.667034 0.0 0.0 1.0 8.00 123.0\n", 309 | "EU 45.0 142.222222 97.421738 0.0 59.0 128.0 195.00 370.0\n", 310 | "OC 16.0 35.625000 64.555790 0.0 1.0 8.5 23.25 212.0\n", 311 | "SA 12.0 62.416667 88.620189 1.0 3.0 12.0 98.50 221.0" 312 | ] 313 | }, 314 | "execution_count": 22, 315 | "metadata": {}, 316 | "output_type": "execute_result" 317 | } 318 | ], 319 | "source": [ 320 | "drinks.groupby(by='continent').wine_servings.describe()" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "### Step 6. Print the mean alcoohol consumption per continent for every column" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 24, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "data": { 337 | "text/html": [ 338 | "
\n", 339 | "\n", 352 | "\n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | "
beer_servingsspirit_servingswine_servingstotal_litres_of_pure_alcohol
continent
AF61.47169816.33962316.2641513.007547
AS37.04545560.8409099.0681822.170455
EU193.777778132.555556142.2222228.617778
OC89.68750058.43750035.6250003.381250
SA175.083333114.75000062.4166676.308333
\n", 407 | "
" 408 | ], 409 | "text/plain": [ 410 | " beer_servings spirit_servings wine_servings \\\n", 411 | "continent \n", 412 | "AF 61.471698 16.339623 16.264151 \n", 413 | "AS 37.045455 60.840909 9.068182 \n", 414 | "EU 193.777778 132.555556 142.222222 \n", 415 | "OC 89.687500 58.437500 35.625000 \n", 416 | "SA 175.083333 114.750000 62.416667 \n", 417 | "\n", 418 | " total_litres_of_pure_alcohol \n", 419 | "continent \n", 420 | "AF 3.007547 \n", 421 | "AS 2.170455 \n", 422 | "EU 8.617778 \n", 423 | "OC 3.381250 \n", 424 | "SA 6.308333 " 425 | ] 426 | }, 427 | "execution_count": 24, 428 | "metadata": {}, 429 | "output_type": "execute_result" 430 | } 431 | ], 432 | "source": [ 433 | "drinks.groupby(by='continent').mean()" 434 | ] 435 | }, 436 | { 437 | "cell_type": "markdown", 438 | "metadata": {}, 439 | "source": [ 440 | "### Step 7. Print the median alcoohol consumption per continent for every column" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": 26, 446 | "metadata": {}, 447 | "outputs": [ 448 | { 449 | "data": { 450 | "text/html": [ 451 | "
\n", 452 | "\n", 465 | "\n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | "
beer_servingsspirit_servingswine_servingstotal_litres_of_pure_alcohol
continent
AF32.03.02.02.30
AS17.516.01.01.20
EU219.0122.0128.010.00
OC52.537.08.51.75
SA162.5108.512.06.85
\n", 520 | "
" 521 | ], 522 | "text/plain": [ 523 | " beer_servings spirit_servings wine_servings \\\n", 524 | "continent \n", 525 | "AF 32.0 3.0 2.0 \n", 526 | "AS 17.5 16.0 1.0 \n", 527 | "EU 219.0 122.0 128.0 \n", 528 | "OC 52.5 37.0 8.5 \n", 529 | "SA 162.5 108.5 12.0 \n", 530 | "\n", 531 | " total_litres_of_pure_alcohol \n", 532 | "continent \n", 533 | "AF 2.30 \n", 534 | "AS 1.20 \n", 535 | "EU 10.00 \n", 536 | "OC 1.75 \n", 537 | "SA 6.85 " 538 | ] 539 | }, 540 | "execution_count": 26, 541 | "metadata": {}, 542 | "output_type": "execute_result" 543 | } 544 | ], 545 | "source": [ 546 | "drinks.groupby(by='continent').median()" 547 | ] 548 | }, 549 | { 550 | "cell_type": "markdown", 551 | "metadata": {}, 552 | "source": [ 553 | "### Step 8. Print the mean, min and max values for spirit consumption.\n", 554 | "#### This time output a DataFrame" 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": 32, 560 | "metadata": {}, 561 | "outputs": [ 562 | { 563 | "data": { 564 | "text/plain": [ 565 | "count 193.000000\n", 566 | "mean 80.994819\n", 567 | "std 88.284312\n", 568 | "min 0.000000\n", 569 | "25% 4.000000\n", 570 | "50% 56.000000\n", 571 | "75% 128.000000\n", 572 | "max 438.000000\n", 573 | "Name: spirit_servings, dtype: float64" 574 | ] 575 | }, 576 | "execution_count": 32, 577 | "metadata": {}, 578 | "output_type": "execute_result" 579 | } 580 | ], 581 | "source": [ 582 | "spirit = pd.DataFrame(drinks.spirit_servings)\n" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": null, 588 | "metadata": {}, 589 | "outputs": [], 590 | "source": [] 591 | } 592 | ], 593 | "metadata": { 594 | "kernelspec": { 595 | "display_name": "Python 3", 596 | "language": "python", 597 | "name": "python3" 598 | }, 599 | "language_info": { 600 | "codemirror_mode": { 601 | "name": "ipython", 602 | "version": 3 603 | }, 604 | "file_extension": ".py", 605 | "mimetype": "text/x-python", 606 | "name": "python", 607 | "nbconvert_exporter": "python", 608 | "pygments_lexer": "ipython3", 609 | "version": "3.6.5" 610 | } 611 | }, 612 | "nbformat": 4, 613 | "nbformat_minor": 1 614 | } 615 | -------------------------------------------------------------------------------- /grouping/regiment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Regiment" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "Special thanks to: http://chrisalbon.com/ for sharing the dataset and materials.\n", 17 | "\n", 18 | "### Step 1. Import the necessary libraries" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import pandas as pd" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Step 2. Create the DataFrame with the following values:" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'], \n", 44 | " 'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'], \n", 45 | " 'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'], \n", 46 | " 'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],\n", 47 | " 'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "### Step 3. Assign it to a variable called regiment.\n", 55 | "#### Don't forget to name each column" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 50, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/html": [ 66 | "
\n", 67 | "\n", 80 | "\n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | "
regimentcompanynamepreTestScorepostTestScore
0Nighthawks1stMiller425
1Nighthawks1stJacobson2494
2Nighthawks2ndAli3157
3Nighthawks2ndMilner262
4Dragoons1stCooze370
\n", 134 | "
" 135 | ], 136 | "text/plain": [ 137 | " regiment company name preTestScore postTestScore\n", 138 | "0 Nighthawks 1st Miller 4 25\n", 139 | "1 Nighthawks 1st Jacobson 24 94\n", 140 | "2 Nighthawks 2nd Ali 31 57\n", 141 | "3 Nighthawks 2nd Milner 2 62\n", 142 | "4 Dragoons 1st Cooze 3 70" 143 | ] 144 | }, 145 | "execution_count": 50, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "regiment = pd.DataFrame(raw_data)\n", 152 | "regiment.head()" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "### Step 4. What is the mean preTestScore from the regiment Nighthawks? " 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 43, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "regiment\n", 171 | "Dragoons 15.50\n", 172 | "Nighthawks 15.25\n", 173 | "Scouts 2.50\n", 174 | "Name: preTestScore, dtype: float64" 175 | ] 176 | }, 177 | "execution_count": 43, 178 | "metadata": {}, 179 | "output_type": "execute_result" 180 | } 181 | ], 182 | "source": [ 183 | "regiment.groupby(by=\"regiment\")['preTestScore'].mean()" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "### Step 5. Present general statistics by company" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 44, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "data": { 200 | "text/html": [ 201 | "
\n", 202 | "\n", 219 | "\n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | "
postTestScorepreTestScore
countmeanstdmin25%50%75%maxcountmeanstdmin25%50%75%max
company
1st6.057.66666727.48575425.034.2566.070.094.06.06.6666678.5244752.03.003.54.0024.0
2nd6.067.00000014.05702757.058.2562.068.094.06.015.50000014.6526452.02.2513.529.2531.0
\n", 306 | "
" 307 | ], 308 | "text/plain": [ 309 | " postTestScore \\\n", 310 | " count mean std min 25% 50% 75% max \n", 311 | "company \n", 312 | "1st 6.0 57.666667 27.485754 25.0 34.25 66.0 70.0 94.0 \n", 313 | "2nd 6.0 67.000000 14.057027 57.0 58.25 62.0 68.0 94.0 \n", 314 | "\n", 315 | " preTestScore \n", 316 | " count mean std min 25% 50% 75% max \n", 317 | "company \n", 318 | "1st 6.0 6.666667 8.524475 2.0 3.00 3.5 4.00 24.0 \n", 319 | "2nd 6.0 15.500000 14.652645 2.0 2.25 13.5 29.25 31.0 " 320 | ] 321 | }, 322 | "execution_count": 44, 323 | "metadata": {}, 324 | "output_type": "execute_result" 325 | } 326 | ], 327 | "source": [ 328 | "regiment.groupby(by=\"company\").describe()" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "### Step 6. What is the mean each company's preTestScore?" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 45, 341 | "metadata": {}, 342 | "outputs": [ 343 | { 344 | "data": { 345 | "text/plain": [ 346 | "company\n", 347 | "1st 6.666667\n", 348 | "2nd 15.500000\n", 349 | "Name: preTestScore, dtype: float64" 350 | ] 351 | }, 352 | "execution_count": 45, 353 | "metadata": {}, 354 | "output_type": "execute_result" 355 | } 356 | ], 357 | "source": [ 358 | "regiment.groupby(by=\"company\").preTestScore.mean()" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "### Step 7. Present the mean preTestScores grouped by regiment and company" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 46, 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "data": { 375 | "text/plain": [ 376 | "regiment company\n", 377 | "Dragoons 1st 3.5\n", 378 | " 2nd 27.5\n", 379 | "Nighthawks 1st 14.0\n", 380 | " 2nd 16.5\n", 381 | "Scouts 1st 2.5\n", 382 | " 2nd 2.5\n", 383 | "Name: preTestScore, dtype: float64" 384 | ] 385 | }, 386 | "execution_count": 46, 387 | "metadata": {}, 388 | "output_type": "execute_result" 389 | } 390 | ], 391 | "source": [ 392 | "regiment.groupby(by=['regiment',\"company\"]).preTestScore.mean()" 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "### Step 8. Present the mean preTestScores grouped by regiment and company without heirarchical indexing" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 90, 405 | "metadata": {}, 406 | "outputs": [ 407 | { 408 | "data": { 409 | "text/html": [ 410 | "
\n", 411 | "\n", 424 | "\n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | "
company1st2nd
regiment
Dragoons3.527.5
Nighthawks14.016.5
Scouts2.52.5
\n", 455 | "
" 456 | ], 457 | "text/plain": [ 458 | "company 1st 2nd\n", 459 | "regiment \n", 460 | "Dragoons 3.5 27.5\n", 461 | "Nighthawks 14.0 16.5\n", 462 | "Scouts 2.5 2.5" 463 | ] 464 | }, 465 | "execution_count": 90, 466 | "metadata": {}, 467 | "output_type": "execute_result" 468 | } 469 | ], 470 | "source": [ 471 | "regiment.groupby(by=['regiment',\"company\"]).preTestScore.mean().unstack()" 472 | ] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": {}, 477 | "source": [ 478 | "### Step 9. Group the entire dataframe by regiment and company" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 53, 484 | "metadata": {}, 485 | "outputs": [ 486 | { 487 | "data": { 488 | "text/html": [ 489 | "
\n", 490 | "\n", 503 | "\n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | "
preTestScorepostTestScore
regimentcompany
Dragoons1st3.547.5
2nd27.575.5
Nighthawks1st14.059.5
2nd16.559.5
Scouts1st2.566.0
2nd2.566.0
\n", 554 | "
" 555 | ], 556 | "text/plain": [ 557 | " preTestScore postTestScore\n", 558 | "regiment company \n", 559 | "Dragoons 1st 3.5 47.5\n", 560 | " 2nd 27.5 75.5\n", 561 | "Nighthawks 1st 14.0 59.5\n", 562 | " 2nd 16.5 59.5\n", 563 | "Scouts 1st 2.5 66.0\n", 564 | " 2nd 2.5 66.0" 565 | ] 566 | }, 567 | "execution_count": 53, 568 | "metadata": {}, 569 | "output_type": "execute_result" 570 | } 571 | ], 572 | "source": [ 573 | "regiment.groupby(by=['regiment',\"company\"]).mean()" 574 | ] 575 | }, 576 | { 577 | "cell_type": "markdown", 578 | "metadata": {}, 579 | "source": [ 580 | "### Step 10. What is the number of observations in each regiment and company" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": 64, 586 | "metadata": {}, 587 | "outputs": [ 588 | { 589 | "data": { 590 | "text/plain": [ 591 | "regiment company\n", 592 | "Dragoons 1st 2\n", 593 | " 2nd 2\n", 594 | "Nighthawks 1st 2\n", 595 | " 2nd 2\n", 596 | "Scouts 1st 2\n", 597 | " 2nd 2\n", 598 | "Name: regiment, dtype: int64" 599 | ] 600 | }, 601 | "execution_count": 64, 602 | "metadata": {}, 603 | "output_type": "execute_result" 604 | } 605 | ], 606 | "source": [ 607 | "regiment.groupby(by=['regiment',\"company\"]).regiment.size()" 608 | ] 609 | }, 610 | { 611 | "cell_type": "markdown", 612 | "metadata": {}, 613 | "source": [ 614 | "### Step 11. Iterate over a group and print the name and the whole data from the regiment" 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": 88, 620 | "metadata": {}, 621 | "outputs": [ 622 | { 623 | "name": "stdout", 624 | "output_type": "stream", 625 | "text": [ 626 | "Dragoons\n", 627 | " regiment company name preTestScore postTestScore\n", 628 | "4 Dragoons 1st Cooze 3 70\n", 629 | "5 Dragoons 1st Jacon 4 25\n", 630 | "6 Dragoons 2nd Ryaner 24 94\n", 631 | "7 Dragoons 2nd Sone 31 57\n", 632 | "Nighthawks\n", 633 | " regiment company name preTestScore postTestScore\n", 634 | "0 Nighthawks 1st Miller 4 25\n", 635 | "1 Nighthawks 1st Jacobson 24 94\n", 636 | "2 Nighthawks 2nd Ali 31 57\n", 637 | "3 Nighthawks 2nd Milner 2 62\n", 638 | "Scouts\n", 639 | " regiment company name preTestScore postTestScore\n", 640 | "8 Scouts 1st Sloan 2 62\n", 641 | "9 Scouts 1st Piger 3 70\n", 642 | "10 Scouts 2nd Riani 2 62\n", 643 | "11 Scouts 2nd Ali 3 70\n" 644 | ] 645 | } 646 | ], 647 | "source": [ 648 | "for name,group in regiment.groupby('regiment'):\n", 649 | " print(name)\n", 650 | " print(group)" 651 | ] 652 | }, 653 | { 654 | "cell_type": "code", 655 | "execution_count": null, 656 | "metadata": {}, 657 | "outputs": [], 658 | "source": [] 659 | } 660 | ], 661 | "metadata": { 662 | "kernelspec": { 663 | "display_name": "Python 3", 664 | "language": "python", 665 | "name": "python3" 666 | }, 667 | "language_info": { 668 | "codemirror_mode": { 669 | "name": "ipython", 670 | "version": 3 671 | }, 672 | "file_extension": ".py", 673 | "mimetype": "text/x-python", 674 | "name": "python", 675 | "nbconvert_exporter": "python", 676 | "pygments_lexer": "ipython3", 677 | "version": "3.6.5" 678 | } 679 | }, 680 | "nbformat": 4, 681 | "nbformat_minor": 1 682 | } 683 | -------------------------------------------------------------------------------- /merge/cars1.csv: -------------------------------------------------------------------------------- 1 | mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,,,,, 18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,,,,, 15.0,8,350,165,3693,11.5,70,1,buick skylark 320,,,,, 18.0,8,318,150,3436,11.0,70,1,plymouth satellite,,,,, 16.0,8,304,150,3433,12.0,70,1,amc rebel sst,,,,, 17.0,8,302,140,3449,10.5,70,1,ford torino,,,,, 15.0,8,429,198,4341,10.0,70,1,ford galaxie 500,,,,, 14.0,8,454,220,4354,9.0,70,1,chevrolet impala,,,,, 14.0,8,440,215,4312,8.5,70,1,plymouth fury iii,,,,, 14.0,8,455,225,4425,10.0,70,1,pontiac catalina,,,,, 15.0,8,390,190,3850,8.5,70,1,amc ambassador dpl,,,,, 15.0,8,383,170,3563,10.0,70,1,dodge challenger se,,,,, 14.0,8,340,160,3609,8.0,70,1,plymouth 'cuda 340,,,,, 15.0,8,400,150,3761,9.5,70,1,chevrolet monte carlo,,,,, 14.0,8,455,225,3086,10.0,70,1,buick estate wagon (sw),,,,, 24.0,4,113,95,2372,15.0,70,3,toyota corona mark ii,,,,, 22.0,6,198,95,2833,15.5,70,1,plymouth duster,,,,, 18.0,6,199,97,2774,15.5,70,1,amc hornet,,,,, 21.0,6,200,85,2587,16.0,70,1,ford maverick,,,,, 27.0,4,97,88,2130,14.5,70,3,datsun pl510,,,,, 26.0,4,97,46,1835,20.5,70,2,volkswagen 1131 deluxe sedan,,,,, 25.0,4,110,87,2672,17.5,70,2,peugeot 504,,,,, 24.0,4,107,90,2430,14.5,70,2,audi 100 ls,,,,, 25.0,4,104,95,2375,17.5,70,2,saab 99e,,,,, 26.0,4,121,113,2234,12.5,70,2,bmw 2002,,,,, 21.0,6,199,90,2648,15.0,70,1,amc gremlin,,,,, 10.0,8,360,215,4615,14.0,70,1,ford f250,,,,, 10.0,8,307,200,4376,15.0,70,1,chevy c20,,,,, 11.0,8,318,210,4382,13.5,70,1,dodge d200,,,,, 9.0,8,304,193,4732,18.5,70,1,hi 1200d,,,,, 27.0,4,97,88,2130,14.5,71,3,datsun pl510,,,,, 28.0,4,140,90,2264,15.5,71,1,chevrolet vega 2300,,,,, 25.0,4,113,95,2228,14.0,71,3,toyota corona,,,,, 25.0,4,98,?,2046,19.0,71,1,ford pinto,,,,, 19.0,6,232,100,2634,13.0,71,1,amc gremlin,,,,, 16.0,6,225,105,3439,15.5,71,1,plymouth satellite custom,,,,, 17.0,6,250,100,3329,15.5,71,1,chevrolet chevelle malibu,,,,, 19.0,6,250,88,3302,15.5,71,1,ford torino 500,,,,, 18.0,6,232,100,3288,15.5,71,1,amc matador,,,,, 14.0,8,350,165,4209,12.0,71,1,chevrolet impala,,,,, 14.0,8,400,175,4464,11.5,71,1,pontiac catalina brougham,,,,, 14.0,8,351,153,4154,13.5,71,1,ford galaxie 500,,,,, 14.0,8,318,150,4096,13.0,71,1,plymouth fury iii,,,,, 12.0,8,383,180,4955,11.5,71,1,dodge monaco (sw),,,,, 13.0,8,400,170,4746,12.0,71,1,ford country squire (sw),,,,, 13.0,8,400,175,5140,12.0,71,1,pontiac safari (sw),,,,, 18.0,6,258,110,2962,13.5,71,1,amc hornet sportabout (sw),,,,, 22.0,4,140,72,2408,19.0,71,1,chevrolet vega (sw),,,,, 19.0,6,250,100,3282,15.0,71,1,pontiac firebird,,,,, 18.0,6,250,88,3139,14.5,71,1,ford mustang,,,,, 23.0,4,122,86,2220,14.0,71,1,mercury capri 2000,,,,, 28.0,4,116,90,2123,14.0,71,2,opel 1900,,,,, 30.0,4,79,70,2074,19.5,71,2,peugeot 304,,,,, 30.0,4,88,76,2065,14.5,71,2,fiat 124b,,,,, 31.0,4,71,65,1773,19.0,71,3,toyota corolla 1200,,,,, 35.0,4,72,69,1613,18.0,71,3,datsun 1200,,,,, 27.0,4,97,60,1834,19.0,71,2,volkswagen model 111,,,,, 26.0,4,91,70,1955,20.5,71,1,plymouth cricket,,,,, 24.0,4,113,95,2278,15.5,72,3,toyota corona hardtop,,,,, 25.0,4,98,80,2126,17.0,72,1,dodge colt hardtop,,,,, 23.0,4,97,54,2254,23.5,72,2,volkswagen type 3,,,,, 20.0,4,140,90,2408,19.5,72,1,chevrolet vega,,,,, 21.0,4,122,86,2226,16.5,72,1,ford pinto runabout,,,,, 13.0,8,350,165,4274,12.0,72,1,chevrolet impala,,,,, 14.0,8,400,175,4385,12.0,72,1,pontiac catalina,,,,, 15.0,8,318,150,4135,13.5,72,1,plymouth fury iii,,,,, 14.0,8,351,153,4129,13.0,72,1,ford galaxie 500,,,,, 17.0,8,304,150,3672,11.5,72,1,amc ambassador sst,,,,, 11.0,8,429,208,4633,11.0,72,1,mercury marquis,,,,, 13.0,8,350,155,4502,13.5,72,1,buick lesabre custom,,,,, 12.0,8,350,160,4456,13.5,72,1,oldsmobile delta 88 royale,,,,, 13.0,8,400,190,4422,12.5,72,1,chrysler newport royal,,,,, 19.0,3,70,97,2330,13.5,72,3,mazda rx2 coupe,,,,, 15.0,8,304,150,3892,12.5,72,1,amc matador (sw),,,,, 13.0,8,307,130,4098,14.0,72,1,chevrolet chevelle concours (sw),,,,, 13.0,8,302,140,4294,16.0,72,1,ford gran torino (sw),,,,, 14.0,8,318,150,4077,14.0,72,1,plymouth satellite custom (sw),,,,, 18.0,4,121,112,2933,14.5,72,2,volvo 145e (sw),,,,, 22.0,4,121,76,2511,18.0,72,2,volkswagen 411 (sw),,,,, 21.0,4,120,87,2979,19.5,72,2,peugeot 504 (sw),,,,, 26.0,4,96,69,2189,18.0,72,2,renault 12 (sw),,,,, 22.0,4,122,86,2395,16.0,72,1,ford pinto (sw),,,,, 28.0,4,97,92,2288,17.0,72,3,datsun 510 (sw),,,,, 23.0,4,120,97,2506,14.5,72,3,toyouta corona mark ii (sw),,,,, 28.0,4,98,80,2164,15.0,72,1,dodge colt (sw),,,,, 27.0,4,97,88,2100,16.5,72,3,toyota corolla 1600 (sw),,,,, 13.0,8,350,175,4100,13.0,73,1,buick century 350,,,,, 14.0,8,304,150,3672,11.5,73,1,amc matador,,,,, 13.0,8,350,145,3988,13.0,73,1,chevrolet malibu,,,,, 14.0,8,302,137,4042,14.5,73,1,ford gran torino,,,,, 15.0,8,318,150,3777,12.5,73,1,dodge coronet custom,,,,, 12.0,8,429,198,4952,11.5,73,1,mercury marquis brougham,,,,, 13.0,8,400,150,4464,12.0,73,1,chevrolet caprice classic,,,,, 13.0,8,351,158,4363,13.0,73,1,ford ltd,,,,, 14.0,8,318,150,4237,14.5,73,1,plymouth fury gran sedan,,,,, 13.0,8,440,215,4735,11.0,73,1,chrysler new yorker brougham,,,,, 12.0,8,455,225,4951,11.0,73,1,buick electra 225 custom,,,,, 13.0,8,360,175,3821,11.0,73,1,amc ambassador brougham,,,,, 18.0,6,225,105,3121,16.5,73,1,plymouth valiant,,,,, 16.0,6,250,100,3278,18.0,73,1,chevrolet nova custom,,,,, 18.0,6,232,100,2945,16.0,73,1,amc hornet,,,,, 18.0,6,250,88,3021,16.5,73,1,ford maverick,,,,, 23.0,6,198,95,2904,16.0,73,1,plymouth duster,,,,, 26.0,4,97,46,1950,21.0,73,2,volkswagen super beetle,,,,, 11.0,8,400,150,4997,14.0,73,1,chevrolet impala,,,,, 12.0,8,400,167,4906,12.5,73,1,ford country,,,,, 13.0,8,360,170,4654,13.0,73,1,plymouth custom suburb,,,,, 12.0,8,350,180,4499,12.5,73,1,oldsmobile vista cruiser,,,,, 18.0,6,232,100,2789,15.0,73,1,amc gremlin,,,,, 20.0,4,97,88,2279,19.0,73,3,toyota carina,,,,, 21.0,4,140,72,2401,19.5,73,1,chevrolet vega,,,,, 22.0,4,108,94,2379,16.5,73,3,datsun 610,,,,, 18.0,3,70,90,2124,13.5,73,3,maxda rx3,,,,, 19.0,4,122,85,2310,18.5,73,1,ford pinto,,,,, 21.0,6,155,107,2472,14.0,73,1,mercury capri v6,,,,, 26.0,4,98,90,2265,15.5,73,2,fiat 124 sport coupe,,,,, 15.0,8,350,145,4082,13.0,73,1,chevrolet monte carlo s,,,,, 16.0,8,400,230,4278,9.5,73,1,pontiac grand prix,,,,, 29.0,4,68,49,1867,19.5,73,2,fiat 128,,,,, 24.0,4,116,75,2158,15.5,73,2,opel manta,,,,, 20.0,4,114,91,2582,14.0,73,2,audi 100ls,,,,, 19.0,4,121,112,2868,15.5,73,2,volvo 144ea,,,,, 15.0,8,318,150,3399,11.0,73,1,dodge dart custom,,,,, 24.0,4,121,110,2660,14.0,73,2,saab 99le,,,,, 20.0,6,156,122,2807,13.5,73,3,toyota mark ii,,,,, 11.0,8,350,180,3664,11.0,73,1,oldsmobile omega,,,,, 20.0,6,198,95,3102,16.5,74,1,plymouth duster,,,,, 21.0,6,200,?,2875,17.0,74,1,ford maverick,,,,, 19.0,6,232,100,2901,16.0,74,1,amc hornet,,,,, 15.0,6,250,100,3336,17.0,74,1,chevrolet nova,,,,, 31.0,4,79,67,1950,19.0,74,3,datsun b210,,,,, 26.0,4,122,80,2451,16.5,74,1,ford pinto,,,,, 32.0,4,71,65,1836,21.0,74,3,toyota corolla 1200,,,,, 25.0,4,140,75,2542,17.0,74,1,chevrolet vega,,,,, 16.0,6,250,100,3781,17.0,74,1,chevrolet chevelle malibu classic,,,,, 16.0,6,258,110,3632,18.0,74,1,amc matador,,,,, 18.0,6,225,105,3613,16.5,74,1,plymouth satellite sebring,,,,, 16.0,8,302,140,4141,14.0,74,1,ford gran torino,,,,, 13.0,8,350,150,4699,14.5,74,1,buick century luxus (sw),,,,, 14.0,8,318,150,4457,13.5,74,1,dodge coronet custom (sw),,,,, 14.0,8,302,140,4638,16.0,74,1,ford gran torino (sw),,,,, 14.0,8,304,150,4257,15.5,74,1,amc matador (sw),,,,, 29.0,4,98,83,2219,16.5,74,2,audi fox,,,,, 26.0,4,79,67,1963,15.5,74,2,volkswagen dasher,,,,, 26.0,4,97,78,2300,14.5,74,2,opel manta,,,,, 31.0,4,76,52,1649,16.5,74,3,toyota corona,,,,, 32.0,4,83,61,2003,19.0,74,3,datsun 710,,,,, 28.0,4,90,75,2125,14.5,74,1,dodge colt,,,,, 24.0,4,90,75,2108,15.5,74,2,fiat 128,,,,, 26.0,4,116,75,2246,14.0,74,2,fiat 124 tc,,,,, 24.0,4,120,97,2489,15.0,74,3,honda civic,,,,, 26.0,4,108,93,2391,15.5,74,3,subaru,,,,, 31.0,4,79,67,2000,16.0,74,2,fiat x1.9,,,,, 19.0,6,225,95,3264,16.0,75,1,plymouth valiant custom,,,,, 18.0,6,250,105,3459,16.0,75,1,chevrolet nova,,,,, 15.0,6,250,72,3432,21.0,75,1,mercury monarch,,,,, 15.0,6,250,72,3158,19.5,75,1,ford maverick,,,,, 16.0,8,400,170,4668,11.5,75,1,pontiac catalina,,,,, 15.0,8,350,145,4440,14.0,75,1,chevrolet bel air,,,,, 16.0,8,318,150,4498,14.5,75,1,plymouth grand fury,,,,, 14.0,8,351,148,4657,13.5,75,1,ford ltd,,,,, 17.0,6,231,110,3907,21.0,75,1,buick century,,,,, 16.0,6,250,105,3897,18.5,75,1,chevroelt chevelle malibu,,,,, 15.0,6,258,110,3730,19.0,75,1,amc matador,,,,, 18.0,6,225,95,3785,19.0,75,1,plymouth fury,,,,, 21.0,6,231,110,3039,15.0,75,1,buick skyhawk,,,,, 20.0,8,262,110,3221,13.5,75,1,chevrolet monza 2+2,,,,, 13.0,8,302,129,3169,12.0,75,1,ford mustang ii,,,,, 29.0,4,97,75,2171,16.0,75,3,toyota corolla,,,,, 23.0,4,140,83,2639,17.0,75,1,ford pinto,,,,, 20.0,6,232,100,2914,16.0,75,1,amc gremlin,,,,, 23.0,4,140,78,2592,18.5,75,1,pontiac astro,,,,, 24.0,4,134,96,2702,13.5,75,3,toyota corona,,,,, 25.0,4,90,71,2223,16.5,75,2,volkswagen dasher,,,,, 24.0,4,119,97,2545,17.0,75,3,datsun 710,,,,, 18.0,6,171,97,2984,14.5,75,1,ford pinto,,,,, 29.0,4,90,70,1937,14.0,75,2,volkswagen rabbit,,,,, 19.0,6,232,90,3211,17.0,75,1,amc pacer,,,,, 23.0,4,115,95,2694,15.0,75,2,audi 100ls,,,,, 23.0,4,120,88,2957,17.0,75,2,peugeot 504,,,,, 22.0,4,121,98,2945,14.5,75,2,volvo 244dl,,,,, 25.0,4,121,115,2671,13.5,75,2,saab 99le,,,,, 33.0,4,91,53,1795,17.5,75,3,honda civic cvcc,,,,, 28.0,4,107,86,2464,15.5,76,2,fiat 131,,,,, 25.0,4,116,81,2220,16.9,76,2,opel 1900,,,,, 25.0,4,140,92,2572,14.9,76,1,capri ii,,,,, 26.0,4,98,79,2255,17.7,76,1,dodge colt,,,,, 27.0,4,101,83,2202,15.3,76,2,renault 12tl,,,,, 17.5,8,305,140,4215,13.0,76,1,chevrolet chevelle malibu classic,,,,, 16.0,8,318,150,4190,13.0,76,1,dodge coronet brougham,,,,, 15.5,8,304,120,3962,13.9,76,1,amc matador,,,,, 14.5,8,351,152,4215,12.8,76,1,ford gran torino,,,,, 22.0,6,225,100,3233,15.4,76,1,plymouth valiant,,,,, 22.0,6,250,105,3353,14.5,76,1,chevrolet nova,,,,, 24.0,6,200,81,3012,17.6,76,1,ford maverick,,,,, 22.5,6,232,90,3085,17.6,76,1,amc hornet,,,,, 29.0,4,85,52,2035,22.2,76,1,chevrolet chevette,,,,, 24.5,4,98,60,2164,22.1,76,1,chevrolet woody,,,,, 29.0,4,90,70,1937,14.2,76,2,vw rabbit,,,,, -------------------------------------------------------------------------------- /merge/cars2.csv: -------------------------------------------------------------------------------- 1 | mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car 33.0,4,91,53,1795,17.4,76,3,honda civic 20.0,6,225,100,3651,17.7,76,1,dodge aspen se 18.0,6,250,78,3574,21.0,76,1,ford granada ghia 18.5,6,250,110,3645,16.2,76,1,pontiac ventura sj 17.5,6,258,95,3193,17.8,76,1,amc pacer d/l 29.5,4,97,71,1825,12.2,76,2,volkswagen rabbit 32.0,4,85,70,1990,17.0,76,3,datsun b-210 28.0,4,97,75,2155,16.4,76,3,toyota corolla 26.5,4,140,72,2565,13.6,76,1,ford pinto 20.0,4,130,102,3150,15.7,76,2,volvo 245 13.0,8,318,150,3940,13.2,76,1,plymouth volare premier v8 19.0,4,120,88,3270,21.9,76,2,peugeot 504 19.0,6,156,108,2930,15.5,76,3,toyota mark ii 16.5,6,168,120,3820,16.7,76,2,mercedes-benz 280s 16.5,8,350,180,4380,12.1,76,1,cadillac seville 13.0,8,350,145,4055,12.0,76,1,chevy c10 13.0,8,302,130,3870,15.0,76,1,ford f108 13.0,8,318,150,3755,14.0,76,1,dodge d100 31.5,4,98,68,2045,18.5,77,3,honda accord cvcc 30.0,4,111,80,2155,14.8,77,1,buick opel isuzu deluxe 36.0,4,79,58,1825,18.6,77,2,renault 5 gtl 25.5,4,122,96,2300,15.5,77,1,plymouth arrow gs 33.5,4,85,70,1945,16.8,77,3,datsun f-10 hatchback 17.5,8,305,145,3880,12.5,77,1,chevrolet caprice classic 17.0,8,260,110,4060,19.0,77,1,oldsmobile cutlass supreme 15.5,8,318,145,4140,13.7,77,1,dodge monaco brougham 15.0,8,302,130,4295,14.9,77,1,mercury cougar brougham 17.5,6,250,110,3520,16.4,77,1,chevrolet concours 20.5,6,231,105,3425,16.9,77,1,buick skylark 19.0,6,225,100,3630,17.7,77,1,plymouth volare custom 18.5,6,250,98,3525,19.0,77,1,ford granada 16.0,8,400,180,4220,11.1,77,1,pontiac grand prix lj 15.5,8,350,170,4165,11.4,77,1,chevrolet monte carlo landau 15.5,8,400,190,4325,12.2,77,1,chrysler cordoba 16.0,8,351,149,4335,14.5,77,1,ford thunderbird 29.0,4,97,78,1940,14.5,77,2,volkswagen rabbit custom 24.5,4,151,88,2740,16.0,77,1,pontiac sunbird coupe 26.0,4,97,75,2265,18.2,77,3,toyota corolla liftback 25.5,4,140,89,2755,15.8,77,1,ford mustang ii 2+2 30.5,4,98,63,2051,17.0,77,1,chevrolet chevette 33.5,4,98,83,2075,15.9,77,1,dodge colt m/m 30.0,4,97,67,1985,16.4,77,3,subaru dl 30.5,4,97,78,2190,14.1,77,2,volkswagen dasher 22.0,6,146,97,2815,14.5,77,3,datsun 810 21.5,4,121,110,2600,12.8,77,2,bmw 320i 21.5,3,80,110,2720,13.5,77,3,mazda rx-4 43.1,4,90,48,1985,21.5,78,2,volkswagen rabbit custom diesel 36.1,4,98,66,1800,14.4,78,1,ford fiesta 32.8,4,78,52,1985,19.4,78,3,mazda glc deluxe 39.4,4,85,70,2070,18.6,78,3,datsun b210 gx 36.1,4,91,60,1800,16.4,78,3,honda civic cvcc 19.9,8,260,110,3365,15.5,78,1,oldsmobile cutlass salon brougham 19.4,8,318,140,3735,13.2,78,1,dodge diplomat 20.2,8,302,139,3570,12.8,78,1,mercury monarch ghia 19.2,6,231,105,3535,19.2,78,1,pontiac phoenix lj 20.5,6,200,95,3155,18.2,78,1,chevrolet malibu 20.2,6,200,85,2965,15.8,78,1,ford fairmont (auto) 25.1,4,140,88,2720,15.4,78,1,ford fairmont (man) 20.5,6,225,100,3430,17.2,78,1,plymouth volare 19.4,6,232,90,3210,17.2,78,1,amc concord 20.6,6,231,105,3380,15.8,78,1,buick century special 20.8,6,200,85,3070,16.7,78,1,mercury zephyr 18.6,6,225,110,3620,18.7,78,1,dodge aspen 18.1,6,258,120,3410,15.1,78,1,amc concord d/l 19.2,8,305,145,3425,13.2,78,1,chevrolet monte carlo landau 17.7,6,231,165,3445,13.4,78,1,buick regal sport coupe (turbo) 18.1,8,302,139,3205,11.2,78,1,ford futura 17.5,8,318,140,4080,13.7,78,1,dodge magnum xe 30.0,4,98,68,2155,16.5,78,1,chevrolet chevette 27.5,4,134,95,2560,14.2,78,3,toyota corona 27.2,4,119,97,2300,14.7,78,3,datsun 510 30.9,4,105,75,2230,14.5,78,1,dodge omni 21.1,4,134,95,2515,14.8,78,3,toyota celica gt liftback 23.2,4,156,105,2745,16.7,78,1,plymouth sapporo 23.8,4,151,85,2855,17.6,78,1,oldsmobile starfire sx 23.9,4,119,97,2405,14.9,78,3,datsun 200-sx 20.3,5,131,103,2830,15.9,78,2,audi 5000 17.0,6,163,125,3140,13.6,78,2,volvo 264gl 21.6,4,121,115,2795,15.7,78,2,saab 99gle 16.2,6,163,133,3410,15.8,78,2,peugeot 604sl 31.5,4,89,71,1990,14.9,78,2,volkswagen scirocco 29.5,4,98,68,2135,16.6,78,3,honda accord lx 21.5,6,231,115,3245,15.4,79,1,pontiac lemans v6 19.8,6,200,85,2990,18.2,79,1,mercury zephyr 6 22.3,4,140,88,2890,17.3,79,1,ford fairmont 4 20.2,6,232,90,3265,18.2,79,1,amc concord dl 6 20.6,6,225,110,3360,16.6,79,1,dodge aspen 6 17.0,8,305,130,3840,15.4,79,1,chevrolet caprice classic 17.6,8,302,129,3725,13.4,79,1,ford ltd landau 16.5,8,351,138,3955,13.2,79,1,mercury grand marquis 18.2,8,318,135,3830,15.2,79,1,dodge st. regis 16.9,8,350,155,4360,14.9,79,1,buick estate wagon (sw) 15.5,8,351,142,4054,14.3,79,1,ford country squire (sw) 19.2,8,267,125,3605,15.0,79,1,chevrolet malibu classic (sw) 18.5,8,360,150,3940,13.0,79,1,chrysler lebaron town @ country (sw) 31.9,4,89,71,1925,14.0,79,2,vw rabbit custom 34.1,4,86,65,1975,15.2,79,3,maxda glc deluxe 35.7,4,98,80,1915,14.4,79,1,dodge colt hatchback custom 27.4,4,121,80,2670,15.0,79,1,amc spirit dl 25.4,5,183,77,3530,20.1,79,2,mercedes benz 300d 23.0,8,350,125,3900,17.4,79,1,cadillac eldorado 27.2,4,141,71,3190,24.8,79,2,peugeot 504 23.9,8,260,90,3420,22.2,79,1,oldsmobile cutlass salon brougham 34.2,4,105,70,2200,13.2,79,1,plymouth horizon 34.5,4,105,70,2150,14.9,79,1,plymouth horizon tc3 31.8,4,85,65,2020,19.2,79,3,datsun 210 37.3,4,91,69,2130,14.7,79,2,fiat strada custom 28.4,4,151,90,2670,16.0,79,1,buick skylark limited 28.8,6,173,115,2595,11.3,79,1,chevrolet citation 26.8,6,173,115,2700,12.9,79,1,oldsmobile omega brougham 33.5,4,151,90,2556,13.2,79,1,pontiac phoenix 41.5,4,98,76,2144,14.7,80,2,vw rabbit 38.1,4,89,60,1968,18.8,80,3,toyota corolla tercel 32.1,4,98,70,2120,15.5,80,1,chevrolet chevette 37.2,4,86,65,2019,16.4,80,3,datsun 310 28.0,4,151,90,2678,16.5,80,1,chevrolet citation 26.4,4,140,88,2870,18.1,80,1,ford fairmont 24.3,4,151,90,3003,20.1,80,1,amc concord 19.1,6,225,90,3381,18.7,80,1,dodge aspen 34.3,4,97,78,2188,15.8,80,2,audi 4000 29.8,4,134,90,2711,15.5,80,3,toyota corona liftback 31.3,4,120,75,2542,17.5,80,3,mazda 626 37.0,4,119,92,2434,15.0,80,3,datsun 510 hatchback 32.2,4,108,75,2265,15.2,80,3,toyota corolla 46.6,4,86,65,2110,17.9,80,3,mazda glc 27.9,4,156,105,2800,14.4,80,1,dodge colt 40.8,4,85,65,2110,19.2,80,3,datsun 210 44.3,4,90,48,2085,21.7,80,2,vw rabbit c (diesel) 43.4,4,90,48,2335,23.7,80,2,vw dasher (diesel) 36.4,5,121,67,2950,19.9,80,2,audi 5000s (diesel) 30.0,4,146,67,3250,21.8,80,2,mercedes-benz 240d 44.6,4,91,67,1850,13.8,80,3,honda civic 1500 gl 40.9,4,85,?,1835,17.3,80,2,renault lecar deluxe 33.8,4,97,67,2145,18.0,80,3,subaru dl 29.8,4,89,62,1845,15.3,80,2,vokswagen rabbit 32.7,6,168,132,2910,11.4,80,3,datsun 280-zx 23.7,3,70,100,2420,12.5,80,3,mazda rx-7 gs 35.0,4,122,88,2500,15.1,80,2,triumph tr7 coupe 23.6,4,140,?,2905,14.3,80,1,ford mustang cobra 32.4,4,107,72,2290,17.0,80,3,honda accord 27.2,4,135,84,2490,15.7,81,1,plymouth reliant 26.6,4,151,84,2635,16.4,81,1,buick skylark 25.8,4,156,92,2620,14.4,81,1,dodge aries wagon (sw) 23.5,6,173,110,2725,12.6,81,1,chevrolet citation 30.0,4,135,84,2385,12.9,81,1,plymouth reliant 39.1,4,79,58,1755,16.9,81,3,toyota starlet 39.0,4,86,64,1875,16.4,81,1,plymouth champ 35.1,4,81,60,1760,16.1,81,3,honda civic 1300 32.3,4,97,67,2065,17.8,81,3,subaru 37.0,4,85,65,1975,19.4,81,3,datsun 210 mpg 37.7,4,89,62,2050,17.3,81,3,toyota tercel 34.1,4,91,68,1985,16.0,81,3,mazda glc 4 34.7,4,105,63,2215,14.9,81,1,plymouth horizon 4 34.4,4,98,65,2045,16.2,81,1,ford escort 4w 29.9,4,98,65,2380,20.7,81,1,ford escort 2h 33.0,4,105,74,2190,14.2,81,2,volkswagen jetta 34.5,4,100,?,2320,15.8,81,2,renault 18i 33.7,4,107,75,2210,14.4,81,3,honda prelude 32.4,4,108,75,2350,16.8,81,3,toyota corolla 32.9,4,119,100,2615,14.8,81,3,datsun 200sx 31.6,4,120,74,2635,18.3,81,3,mazda 626 28.1,4,141,80,3230,20.4,81,2,peugeot 505s turbo diesel 30.7,6,145,76,3160,19.6,81,2,volvo diesel 25.4,6,168,116,2900,12.6,81,3,toyota cressida 24.2,6,146,120,2930,13.8,81,3,datsun 810 maxima 22.4,6,231,110,3415,15.8,81,1,buick century 26.6,8,350,105,3725,19.0,81,1,oldsmobile cutlass ls 20.2,6,200,88,3060,17.1,81,1,ford granada gl 17.6,6,225,85,3465,16.6,81,1,chrysler lebaron salon 28.0,4,112,88,2605,19.6,82,1,chevrolet cavalier 27.0,4,112,88,2640,18.6,82,1,chevrolet cavalier wagon 34.0,4,112,88,2395,18.0,82,1,chevrolet cavalier 2-door 31.0,4,112,85,2575,16.2,82,1,pontiac j2000 se hatchback 29.0,4,135,84,2525,16.0,82,1,dodge aries se 27.0,4,151,90,2735,18.0,82,1,pontiac phoenix 24.0,4,140,92,2865,16.4,82,1,ford fairmont futura 23.0,4,151,?,3035,20.5,82,1,amc concord dl 36.0,4,105,74,1980,15.3,82,2,volkswagen rabbit l 37.0,4,91,68,2025,18.2,82,3,mazda glc custom l 31.0,4,91,68,1970,17.6,82,3,mazda glc custom 38.0,4,105,63,2125,14.7,82,1,plymouth horizon miser 36.0,4,98,70,2125,17.3,82,1,mercury lynx l 36.0,4,120,88,2160,14.5,82,3,nissan stanza xe 36.0,4,107,75,2205,14.5,82,3,honda accord 34.0,4,108,70,2245,16.9,82,3,toyota corolla 38.0,4,91,67,1965,15.0,82,3,honda civic 32.0,4,91,67,1965,15.7,82,3,honda civic (auto) 38.0,4,91,67,1995,16.2,82,3,datsun 310 gx 25.0,6,181,110,2945,16.4,82,1,buick century limited 38.0,6,262,85,3015,17.0,82,1,oldsmobile cutlass ciera (diesel) 26.0,4,156,92,2585,14.5,82,1,chrysler lebaron medallion 22.0,6,232,112,2835,14.7,82,1,ford granada l 32.0,4,144,96,2665,13.9,82,3,toyota celica gt 36.0,4,135,84,2370,13.0,82,1,dodge charger 2.2 27.0,4,151,90,2950,17.3,82,1,chevrolet camaro 27.0,4,140,86,2790,15.6,82,1,ford mustang gl 44.0,4,97,52,2130,24.6,82,2,vw pickup 32.0,4,135,84,2295,11.6,82,1,dodge rampage 28.0,4,120,79,2625,18.6,82,1,ford ranger 31.0,4,119,82,2720,19.4,82,1,chevy s-10 -------------------------------------------------------------------------------- /merge/fictious name.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Fictitious Names" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "This time you will create a data again \n", 17 | "\n", 18 | "Special thanks to [Chris Albon](http://chrisalbon.com/) for sharing the dataset and materials.\n", 19 | "All the credits to this exercise belongs to him. \n", 20 | "\n", 21 | "In order to understand about it go [here](https://blog.codinghorror.com/a-visual-explanation-of-sql-joins/).\n", 22 | "\n", 23 | "### Step 1. Import the necessary libraries" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import numpy as np\n", 33 | "import pandas as pd" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "### Step 2. Create the 3 DataFrames based on the followin raw data" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "raw_data_1 = {\n", 50 | " 'subject_id': ['1', '2', '3', '4', '5'],\n", 51 | " 'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'], \n", 52 | " 'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}\n", 53 | "\n", 54 | "raw_data_2 = {\n", 55 | " 'subject_id': ['4', '5', '6', '7', '8'],\n", 56 | " 'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'], \n", 57 | " 'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}\n", 58 | "\n", 59 | "raw_data_3 = {\n", 60 | " 'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],\n", 61 | " 'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "### Step 3. Assign each to a variable called data1, data2, data3" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 6, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | " subject_id first_name last_name\n", 81 | "0 1 Alex Anderson\n", 82 | "1 2 Amy Ackerman\n", 83 | "2 3 Allen Ali\n", 84 | "3 4 Alice Aoni\n", 85 | "4 5 Ayoung Atiches\n", 86 | " subject_id first_name last_name\n", 87 | "0 4 Billy Bonder\n", 88 | "1 5 Brian Black\n", 89 | "2 6 Bran Balwner\n", 90 | "3 7 Bryce Brice\n", 91 | "4 8 Betty Btisan\n", 92 | " subject_id test_id\n", 93 | "0 1 51\n", 94 | "1 2 15\n", 95 | "2 3 15\n", 96 | "3 4 61\n", 97 | "4 5 16\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "data1 = pd.DataFrame(raw_data_1)\n", 103 | "print(data1.head())\n", 104 | "data2 = pd.DataFrame(raw_data_2)\n", 105 | "print(data2.head())\n", 106 | "data3 = pd.DataFrame(raw_data_3)\n", 107 | "print(data3.head())" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "### Step 4. Join the two dataframes along rows and assign all_data" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 10, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "all_data = data1.append(data2)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "### Step 5. Join the two dataframes along columns and assing to all_data_col" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 23, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "# data1.set_index('subject_id').join(data2.set_index('subject_id'))\n", 140 | "all_data_col = pd.concat([data1,data2],axis=1)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "### Step 6. Print data3" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 24, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/html": [ 158 | "
\n", 159 | "\n", 172 | "\n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | "
subject_idtest_id
0151
1215
2315
3461
4516
5714
6815
791
81061
91116
\n", 233 | "
" 234 | ], 235 | "text/plain": [ 236 | " subject_id test_id\n", 237 | "0 1 51\n", 238 | "1 2 15\n", 239 | "2 3 15\n", 240 | "3 4 61\n", 241 | "4 5 16\n", 242 | "5 7 14\n", 243 | "6 8 15\n", 244 | "7 9 1\n", 245 | "8 10 61\n", 246 | "9 11 16" 247 | ] 248 | }, 249 | "execution_count": 24, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "data3" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "### Step 7. Merge all_data and data3 along the subject_id value" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 27, 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/html": [ 273 | "
\n", 274 | "\n", 287 | "\n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | "
subject_idfirst_namelast_nametest_id
01AlexAnderson51
12AmyAckerman15
23AllenAli15
34AliceAoni61
44BillyBonder61
55AyoungAtiches16
65BrianBlack16
77BryceBrice14
88BettyBtisan15
\n", 363 | "
" 364 | ], 365 | "text/plain": [ 366 | " subject_id first_name last_name test_id\n", 367 | "0 1 Alex Anderson 51\n", 368 | "1 2 Amy Ackerman 15\n", 369 | "2 3 Allen Ali 15\n", 370 | "3 4 Alice Aoni 61\n", 371 | "4 4 Billy Bonder 61\n", 372 | "5 5 Ayoung Atiches 16\n", 373 | "6 5 Brian Black 16\n", 374 | "7 7 Bryce Brice 14\n", 375 | "8 8 Betty Btisan 15" 376 | ] 377 | }, 378 | "execution_count": 27, 379 | "metadata": {}, 380 | "output_type": "execute_result" 381 | } 382 | ], 383 | "source": [ 384 | "all_data.merge(data3, on='subject_id')" 385 | ] 386 | }, 387 | { 388 | "cell_type": "markdown", 389 | "metadata": {}, 390 | "source": [ 391 | "### Step 8. Merge only the data that has the same 'subject_id' on both data1 and data2" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 28, 397 | "metadata": {}, 398 | "outputs": [ 399 | { 400 | "data": { 401 | "text/html": [ 402 | "
\n", 403 | "\n", 416 | "\n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | "
subject_idfirst_name_xlast_name_xfirst_name_ylast_name_y
04AliceAoniBillyBonder
15AyoungAtichesBrianBlack
\n", 446 | "
" 447 | ], 448 | "text/plain": [ 449 | " subject_id first_name_x last_name_x first_name_y last_name_y\n", 450 | "0 4 Alice Aoni Billy Bonder\n", 451 | "1 5 Ayoung Atiches Brian Black" 452 | ] 453 | }, 454 | "execution_count": 28, 455 | "metadata": {}, 456 | "output_type": "execute_result" 457 | } 458 | ], 459 | "source": [ 460 | "data1.merge(data2,on='subject_id',how='inner')" 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "metadata": {}, 466 | "source": [ 467 | "### Step 9. Merge all values in data1 and data2, with matching records from both sides where available." 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 29, 473 | "metadata": {}, 474 | "outputs": [ 475 | { 476 | "data": { 477 | "text/html": [ 478 | "
\n", 479 | "\n", 492 | "\n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | "
subject_idfirst_name_xlast_name_xfirst_name_ylast_name_y
01AlexAndersonNaNNaN
12AmyAckermanNaNNaN
23AllenAliNaNNaN
34AliceAoniBillyBonder
45AyoungAtichesBrianBlack
56NaNNaNBranBalwner
67NaNNaNBryceBrice
78NaNNaNBettyBtisan
\n", 570 | "
" 571 | ], 572 | "text/plain": [ 573 | " subject_id first_name_x last_name_x first_name_y last_name_y\n", 574 | "0 1 Alex Anderson NaN NaN\n", 575 | "1 2 Amy Ackerman NaN NaN\n", 576 | "2 3 Allen Ali NaN NaN\n", 577 | "3 4 Alice Aoni Billy Bonder\n", 578 | "4 5 Ayoung Atiches Brian Black\n", 579 | "5 6 NaN NaN Bran Balwner\n", 580 | "6 7 NaN NaN Bryce Brice\n", 581 | "7 8 NaN NaN Betty Btisan" 582 | ] 583 | }, 584 | "execution_count": 29, 585 | "metadata": {}, 586 | "output_type": "execute_result" 587 | } 588 | ], 589 | "source": [ 590 | "data1.merge(data2,on='subject_id',how='outer')" 591 | ] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": null, 596 | "metadata": {}, 597 | "outputs": [], 598 | "source": [] 599 | } 600 | ], 601 | "metadata": { 602 | "kernelspec": { 603 | "display_name": "Python 3", 604 | "language": "python", 605 | "name": "python3" 606 | }, 607 | "language_info": { 608 | "codemirror_mode": { 609 | "name": "ipython", 610 | "version": 3 611 | }, 612 | "file_extension": ".py", 613 | "mimetype": "text/x-python", 614 | "name": "python", 615 | "nbconvert_exporter": "python", 616 | "pygments_lexer": "ipython3", 617 | "version": "3.6.5" 618 | } 619 | }, 620 | "nbformat": 4, 621 | "nbformat_minor": 1 622 | } 623 | -------------------------------------------------------------------------------- /time series/Apple_Stock/Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Apple Stock" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "We are going to use Apple's stock price.\n", 17 | "\n", 18 | "\n", 19 | "### Step 1. Import the necessary libraries" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import numpy as np\n", 29 | "import pandas as pd" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "raw", 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "source": [] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "### Step 3. Assign it to a variable apple" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 49, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/html": [ 61 | "
\n", 62 | "\n", 75 | "\n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | "
DateOpenHighLowCloseVolumeAdj Close
02014-07-0896.2796.8093.9295.356513000095.35
12014-07-0794.1495.9994.1095.975630540095.97
22014-07-0393.6794.1093.2094.032289180094.03
32014-07-0293.8794.0693.0993.482842090093.48
42014-07-0193.5294.0793.1393.523817020093.52
\n", 141 | "
" 142 | ], 143 | "text/plain": [ 144 | " Date Open High Low Close Volume Adj Close\n", 145 | "0 2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35\n", 146 | "1 2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97\n", 147 | "2 2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03\n", 148 | "3 2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48\n", 149 | "4 2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52" 150 | ] 151 | }, 152 | "execution_count": 49, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "apple = pd.read_csv(\"https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv\")\n", 159 | "apple.head()\n" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "### Step 4. Check out the type of the columns" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 50, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "data": { 183 | "text/plain": [ 184 | "Date object\n", 185 | "Open float64\n", 186 | "High float64\n", 187 | "Low float64\n", 188 | "Close float64\n", 189 | "Volume int64\n", 190 | "Adj Close float64\n", 191 | "dtype: object" 192 | ] 193 | }, 194 | "execution_count": 50, 195 | "metadata": {}, 196 | "output_type": "execute_result" 197 | } 198 | ], 199 | "source": [ 200 | "apple.dtypes" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "### Step 5. Transform the Date column as a datetime type" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 51, 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "data": { 217 | "text/plain": [ 218 | "Date datetime64[ns]\n", 219 | "Open float64\n", 220 | "High float64\n", 221 | "Low float64\n", 222 | "Close float64\n", 223 | "Volume int64\n", 224 | "Adj Close float64\n", 225 | "dtype: object" 226 | ] 227 | }, 228 | "execution_count": 51, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [ 234 | "apple.Date = pd.to_datetime(apple['Date'])\n", 235 | "apple.dtypes" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "### Step 6. Set the date as the index" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 52, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "apple.set_index('Date',inplace=True)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "### Step 7. Is there any duplicate dates?" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 53, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "data": { 268 | "text/plain": [ 269 | "True" 270 | ] 271 | }, 272 | "execution_count": 53, 273 | "metadata": {}, 274 | "output_type": "execute_result" 275 | } 276 | ], 277 | "source": [ 278 | "apple.index.is_unique" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "### Step 8. Ops...it seems the index is from the most recent date. Make the first entry the oldest date." 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 59, 291 | "metadata": {}, 292 | "outputs": [ 293 | { 294 | "data": { 295 | "text/html": [ 296 | "
\n", 297 | "\n", 310 | "\n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | "
OpenHighLowCloseVolumeAdj Close
Date
1980-12-1228.7528.8728.7528.751172584000.45
1980-12-1527.3827.3827.2527.25439712000.42
1980-12-1625.3725.3725.2525.25264320000.39
1980-12-1725.8726.0025.8725.87216104000.40
1980-12-1826.6326.7526.6326.63183624000.41
\n", 379 | "
" 380 | ], 381 | "text/plain": [ 382 | " Open High Low Close Volume Adj Close\n", 383 | "Date \n", 384 | "1980-12-12 28.75 28.87 28.75 28.75 117258400 0.45\n", 385 | "1980-12-15 27.38 27.38 27.25 27.25 43971200 0.42\n", 386 | "1980-12-16 25.37 25.37 25.25 25.25 26432000 0.39\n", 387 | "1980-12-17 25.87 26.00 25.87 25.87 21610400 0.40\n", 388 | "1980-12-18 26.63 26.75 26.63 26.63 18362400 0.41" 389 | ] 390 | }, 391 | "execution_count": 59, 392 | "metadata": {}, 393 | "output_type": "execute_result" 394 | } 395 | ], 396 | "source": [ 397 | "apple.sort_index(ascending=True,in).head()" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "### Step 9. Get the last business day of each month" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 56, 410 | "metadata": {}, 411 | "outputs": [ 412 | { 413 | "data": { 414 | "text/html": [ 415 | "
\n", 416 | "\n", 429 | "\n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | "
OpenHighLowCloseVolumeAdj Close
Date
2014-07-0896.2796.8093.9295.356513000095.35
2014-07-0794.1495.9994.1095.975630540095.97
2014-07-0393.6794.1093.2094.032289180094.03
2014-07-0293.8794.0693.0993.482842090093.48
2014-07-0193.5294.0793.1393.523817020093.52
\n", 498 | "
" 499 | ], 500 | "text/plain": [ 501 | " Open High Low Close Volume Adj Close\n", 502 | "Date \n", 503 | "2014-07-08 96.27 96.80 93.92 95.35 65130000 95.35\n", 504 | "2014-07-07 94.14 95.99 94.10 95.97 56305400 95.97\n", 505 | "2014-07-03 93.67 94.10 93.20 94.03 22891800 94.03\n", 506 | "2014-07-02 93.87 94.06 93.09 93.48 28420900 93.48\n", 507 | "2014-07-01 93.52 94.07 93.13 93.52 38170200 93.52" 508 | ] 509 | }, 510 | "execution_count": 56, 511 | "metadata": {}, 512 | "output_type": "execute_result" 513 | } 514 | ], 515 | "source": [ 516 | "apple.head()" 517 | ] 518 | }, 519 | { 520 | "cell_type": "markdown", 521 | "metadata": {}, 522 | "source": [ 523 | "### Step 10. What is the difference in days between the first day and the oldest" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": null, 529 | "metadata": {}, 530 | "outputs": [], 531 | "source": [] 532 | }, 533 | { 534 | "cell_type": "markdown", 535 | "metadata": {}, 536 | "source": [ 537 | "### Step 11. How many months in the data we have?" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": null, 543 | "metadata": {}, 544 | "outputs": [], 545 | "source": [] 546 | }, 547 | { 548 | "cell_type": "markdown", 549 | "metadata": {}, 550 | "source": [ 551 | "### Step 12. Plot the 'Adj Close' value. Set the size of the figure to 13.5 x 9 inches" 552 | ] 553 | }, 554 | { 555 | "cell_type": "code", 556 | "execution_count": null, 557 | "metadata": {}, 558 | "outputs": [], 559 | "source": [] 560 | }, 561 | { 562 | "cell_type": "markdown", 563 | "metadata": {}, 564 | "source": [ 565 | "### BONUS: Create your own question and answer it." 566 | ] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "execution_count": null, 571 | "metadata": { 572 | "collapsed": true 573 | }, 574 | "outputs": [], 575 | "source": [] 576 | } 577 | ], 578 | "metadata": { 579 | "anaconda-cloud": {}, 580 | "kernelspec": { 581 | "display_name": "Python 3", 582 | "language": "python", 583 | "name": "python3" 584 | }, 585 | "language_info": { 586 | "codemirror_mode": { 587 | "name": "ipython", 588 | "version": 3 589 | }, 590 | "file_extension": ".py", 591 | "mimetype": "text/x-python", 592 | "name": "python", 593 | "nbconvert_exporter": "python", 594 | "pygments_lexer": "ipython3", 595 | "version": "3.6.5" 596 | } 597 | }, 598 | "nbformat": 4, 599 | "nbformat_minor": 1 600 | } 601 | -------------------------------------------------------------------------------- /time series/Getting_Financial_Data/Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Getting Financial Data - Google Finance" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "This time you will get data from a website.\n", 17 | "\n", 18 | "\n", 19 | "### Step 1. Import the necessary libraries" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "collapsed": false 27 | }, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "### Step 2. Create your time range (start and end variables). The start date should be 01/01/2015 and the end should today (whatever your today is)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [], 45 | "source": [] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "### Step 3. Select the Apple, Tesla, Twitter, IBM, LinkedIn stocks symbols and assign them to a variable called stocks" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [], 61 | "source": [] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "### Step 4. Read the data from google, assign to df and print it" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": { 74 | "collapsed": false 75 | }, 76 | "outputs": [], 77 | "source": [] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "### Step 5. What is the type of structure of df ?" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "collapsed": false 91 | }, 92 | "outputs": [], 93 | "source": [] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "### Step 6. Print all the Items axis values\n", 100 | "#### To learn more about the Panel structure go to [documentation](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#panel) " 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": false 108 | }, 109 | "outputs": [], 110 | "source": [] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "### Step 7. Good, now we know the data avaiable. Create a dataFrame called vol, with the Volume values." 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [], 126 | "source": [] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "### Step 8. Aggregate the data of Volume to weekly\n", 133 | "#### Hint: Be careful to not sum data from the same week of 2015 and other years." 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "collapsed": false, 141 | "scrolled": true 142 | }, 143 | "outputs": [], 144 | "source": [] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "### Step 9. Find all the volume traded in the year of 2015" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": { 157 | "collapsed": false 158 | }, 159 | "outputs": [], 160 | "source": [] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "### BONUS: Create your own question and answer it." 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "collapsed": true 174 | }, 175 | "outputs": [], 176 | "source": [] 177 | } 178 | ], 179 | "metadata": { 180 | "kernelspec": { 181 | "display_name": "Python 2", 182 | "language": "python", 183 | "name": "python2" 184 | }, 185 | "language_info": { 186 | "codemirror_mode": { 187 | "name": "ipython", 188 | "version": 2 189 | }, 190 | "file_extension": ".py", 191 | "mimetype": "text/x-python", 192 | "name": "python", 193 | "nbconvert_exporter": "python", 194 | "pygments_lexer": "ipython2", 195 | "version": "2.7.11" 196 | } 197 | }, 198 | "nbformat": 4, 199 | "nbformat_minor": 0 200 | } 201 | -------------------------------------------------------------------------------- /time series/Getting_Financial_Data/Solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Getting Financial Data - Google Finance" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "This time you will get data from a website.\n", 17 | "\n", 18 | "\n", 19 | "### Step 1. Import the necessary libraries" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 30, 25 | "metadata": { 26 | "collapsed": false 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "\n", 32 | "# package to extract data from various Internet sources into a DataFrame\n", 33 | "# make sure you have it installed\n", 34 | "from pandas_datareader import data, wb\n", 35 | "\n", 36 | "# package for dates\n", 37 | "import datetime as dt" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### Step 2. Create your time range (start and end variables). The start date should be 01/01/2015 and the end should today (whatever your today is)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 75, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/plain": [ 57 | "datetime.datetime(2015, 1, 1, 0, 0)" 58 | ] 59 | }, 60 | "execution_count": 75, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "### Step 3. Select the Apple, Tesla, Twitter, IBM, LinkedIn stocks symbols and assign them to a variable called stocks" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 1, 77 | "metadata": { 78 | "collapsed": false 79 | }, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "text/plain": [ 84 | "['AAPL', 'TSLA', 'IBM', 'LNKD']" 85 | ] 86 | }, 87 | "execution_count": 1, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "### Step 4. Read the data from google, assign to df and print it" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 77, 104 | "metadata": { 105 | "collapsed": false 106 | }, 107 | "outputs": [ 108 | { 109 | "data": { 110 | "text/plain": [ 111 | "\n", 112 | "Dimensions: 5 (items) x 399 (major_axis) x 4 (minor_axis)\n", 113 | "Items axis: Open to Volume\n", 114 | "Major_axis axis: 2015-01-02 00:00:00 to 2016-08-02 00:00:00\n", 115 | "Minor_axis axis: AAPL to TSLA" 116 | ] 117 | }, 118 | "execution_count": 77, 119 | "metadata": {}, 120 | "output_type": "execute_result" 121 | } 122 | ], 123 | "source": [] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "### Step 5. What is the type of structure of df ?" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": { 136 | "collapsed": false 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "# 'pandas.core.panel.Panel'" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "### Step 6. Print all the Items axis values\n", 148 | "#### To learn more about the Panel structure go to [documentation](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#panel) " 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 120, 154 | "metadata": { 155 | "collapsed": false 156 | }, 157 | "outputs": [ 158 | { 159 | "data": { 160 | "text/plain": [ 161 | "Index([u'Open', u'High', u'Low', u'Close', u'Volume'], dtype='object')" 162 | ] 163 | }, 164 | "execution_count": 120, 165 | "metadata": {}, 166 | "output_type": "execute_result" 167 | } 168 | ], 169 | "source": [] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "### Step 7. Good, now we know the data avaiable. Create a dataFrame called vol, with the Volume values." 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 122, 181 | "metadata": { 182 | "collapsed": false 183 | }, 184 | "outputs": [ 185 | { 186 | "data": { 187 | "text/html": [ 188 | "
\n", 189 | "\n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | "
AAPLIBMLNKDTSLA
Date
2015-01-0253204626.05525466.01203743.04764443.0
2015-01-0564285491.04880389.01400562.05368477.0
2015-01-0665797116.06146712.02006546.06261936.0
2015-01-0740105934.04701839.0985016.02968390.0
2015-01-0859364547.04241113.01293955.03442509.0
\n", 244 | "
" 245 | ], 246 | "text/plain": [ 247 | " AAPL IBM LNKD TSLA\n", 248 | "Date \n", 249 | "2015-01-02 53204626.0 5525466.0 1203743.0 4764443.0\n", 250 | "2015-01-05 64285491.0 4880389.0 1400562.0 5368477.0\n", 251 | "2015-01-06 65797116.0 6146712.0 2006546.0 6261936.0\n", 252 | "2015-01-07 40105934.0 4701839.0 985016.0 2968390.0\n", 253 | "2015-01-08 59364547.0 4241113.0 1293955.0 3442509.0" 254 | ] 255 | }, 256 | "execution_count": 122, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "### Step 8. Aggregate the data of Volume to weekly\n", 268 | "#### Hint: Be careful to not sum data from the same week of 2015 and other years." 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 132, 274 | "metadata": { 275 | "collapsed": false, 276 | "scrolled": true 277 | }, 278 | "outputs": [ 279 | { 280 | "data": { 281 | "text/html": [ 282 | "
\n", 283 | "\n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | "
AAPLIBMLNKDTSLA
weekyear
1201553204626.05525466.01203743.04764443.0
2016343422014.025233098.06630485.020967926.0
22015283252615.024458400.07203125.022709607.0
2016302072797.029379214.09160521.022997290.0
32015304226647.023263206.07084168.030799137.0
\n", 343 | "
" 344 | ], 345 | "text/plain": [ 346 | " AAPL IBM LNKD TSLA\n", 347 | "week year \n", 348 | "1 2015 53204626.0 5525466.0 1203743.0 4764443.0\n", 349 | " 2016 343422014.0 25233098.0 6630485.0 20967926.0\n", 350 | "2 2015 283252615.0 24458400.0 7203125.0 22709607.0\n", 351 | " 2016 302072797.0 29379214.0 9160521.0 22997290.0\n", 352 | "3 2015 304226647.0 23263206.0 7084168.0 30799137.0" 353 | ] 354 | }, 355 | "execution_count": 132, 356 | "metadata": {}, 357 | "output_type": "execute_result" 358 | } 359 | ], 360 | "source": [] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": {}, 365 | "source": [ 366 | "### Step 9. Find all the volume traded in the year of 2015" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": 131, 372 | "metadata": { 373 | "collapsed": false 374 | }, 375 | "outputs": [ 376 | { 377 | "data": { 378 | "text/html": [ 379 | "
\n", 380 | "\n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | "
AAPLIBMLNKDTSLA
year
20151.301994e+101.100959e+09440376163.01.085839e+09
20166.081474e+096.585250e+08453233878.07.540962e+08
\n", 414 | "
" 415 | ], 416 | "text/plain": [ 417 | " AAPL IBM LNKD TSLA\n", 418 | "year \n", 419 | "2015 1.301994e+10 1.100959e+09 440376163.0 1.085839e+09\n", 420 | "2016 6.081474e+09 6.585250e+08 453233878.0 7.540962e+08" 421 | ] 422 | }, 423 | "execution_count": 131, 424 | "metadata": {}, 425 | "output_type": "execute_result" 426 | } 427 | ], 428 | "source": [] 429 | }, 430 | { 431 | "cell_type": "markdown", 432 | "metadata": {}, 433 | "source": [ 434 | "### BONUS: Create your own question and answer it." 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": { 441 | "collapsed": true 442 | }, 443 | "outputs": [], 444 | "source": [] 445 | } 446 | ], 447 | "metadata": { 448 | "kernelspec": { 449 | "display_name": "Python 2", 450 | "language": "python", 451 | "name": "python2" 452 | }, 453 | "language_info": { 454 | "codemirror_mode": { 455 | "name": "ipython", 456 | "version": 2 457 | }, 458 | "file_extension": ".py", 459 | "mimetype": "text/x-python", 460 | "name": "python", 461 | "nbconvert_exporter": "python", 462 | "pygments_lexer": "ipython2", 463 | "version": "2.7.11" 464 | } 465 | }, 466 | "nbformat": 4, 467 | "nbformat_minor": 0 468 | } 469 | -------------------------------------------------------------------------------- /time series/Investor_Flow_of_Funds_US/Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Investor - Flow of Funds - US" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Introduction:\n", 15 | "\n", 16 | "Special thanks to: https://github.com/rgrp for sharing the dataset.\n", 17 | "\n", 18 | "### Step 1. Import the necessary libraries" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/datasets/investor-flow-of-funds-us/master/data/weekly.csv). " 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "### Step 3. Assign it to a variable called " 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [], 51 | "source": [] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "### Step 4. What is the frequency of the dataset?" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "collapsed": false 65 | }, 66 | "outputs": [], 67 | "source": [] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "### Step 5. Set the column Date as the index." 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "collapsed": false 81 | }, 82 | "outputs": [], 83 | "source": [] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "### Step 6. What is the type of the index?" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": false 97 | }, 98 | "outputs": [], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "### Step 7. Set the index to a DatetimeIndex type" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "collapsed": false 113 | }, 114 | "outputs": [], 115 | "source": [] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "### Step 8. Change the frequency to monthly, sum the values and assign it to monthly." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "### Step 9. You will notice that it filled the dataFrame with months that don't have any data with NaN. Let's drop these rows." 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "collapsed": false 145 | }, 146 | "outputs": [], 147 | "source": [] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "### Step 10. Good, now we have the monthly data. Now change the frequency to year." 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [], 163 | "source": [] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "### BONUS: Create your own question and answer it." 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": { 176 | "collapsed": true 177 | }, 178 | "outputs": [], 179 | "source": [] 180 | } 181 | ], 182 | "metadata": { 183 | "kernelspec": { 184 | "display_name": "Python 2", 185 | "language": "python", 186 | "name": "python2" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 2 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython2", 198 | "version": "2.7.11" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 0 203 | } 204 | --------------------------------------------------------------------------------