├── .gitattributes ├── .gitignore ├── 1. Python Crash Course ├── Python Crash Course Exercises - Solutions.ipynb ├── Python Crash Course Exercises .ipynb └── Python Crash Course.ipynb ├── 10. Linear-Regression ├── .ipynb_checkpoints │ ├── Creating Fake Data-checkpoint.ipynb │ ├── Linear Regression - Project Exercise - Solutions-checkpoint.ipynb │ ├── Linear Regression - Project Exercise -checkpoint.ipynb │ └── Linear Regression with Python-checkpoint.ipynb ├── Ecommerce Customers ├── Linear Regression - Project Exercise - Solutions.ipynb ├── Linear Regression - Project Exercise .ipynb ├── Linear Regression with Python.ipynb └── USA_Housing.csv ├── 11. Logistic-Regression ├── .ipynb_checkpoints │ ├── Logistic Regression Project - Solutions-checkpoint.ipynb │ ├── Logistic Regression Project -checkpoint.ipynb │ └── Logistic Regression with Python-checkpoint.ipynb ├── Logistic Regression Project - Solutions.ipynb ├── Logistic Regression Project .ipynb ├── Logistic Regression with Python.ipynb ├── advertising.csv ├── titanic_test.csv └── titanic_train.csv ├── 12. K-Nearest-Neighbors ├── .ipynb_checkpoints │ ├── K Nearest Neighbors Project - Solutions-checkpoint.ipynb │ ├── K Nearest Neighbors Project-checkpoint.ipynb │ ├── K Nearest Neighbors with Python-checkpoint.ipynb │ └── kNN_classification-checkpoint.ipynb ├── Classified Data ├── K Nearest Neighbors Project - Solutions.ipynb ├── K Nearest Neighbors Project.ipynb ├── K Nearest Neighbors with Python.ipynb ├── KNN_Project_Data ├── iris.data.txt └── kNN_classification.ipynb ├── 13. Decision-Trees-and-Random-Forests ├── .ipynb_checkpoints │ ├── Decision Trees and Random Forest Project - Solutions-checkpoint.ipynb │ ├── Decision Trees and Random Forest Project -checkpoint.ipynb │ └── Decision Trees and Random Forests in Python-checkpoint.ipynb ├── Decision Trees and Random Forest Project - Solutions.ipynb ├── Decision Trees and Random Forest Project .ipynb ├── Decision Trees and Random Forests in Python.ipynb ├── kyphosis.csv └── loan_data.csv ├── 14. Support-Vector-Machines ├── .ipynb_checkpoints │ ├── Support Vector Machines Project - Solutions-checkpoint.ipynb │ ├── Support Vector Machines Project -checkpoint.ipynb │ └── Support Vector Machines with Python-checkpoint.ipynb ├── Support Vector Machines Project - Solutions.ipynb ├── Support Vector Machines Project .ipynb └── Support Vector Machines with Python.ipynb ├── 15. K-Means-Clustering ├── .ipynb_checkpoints │ ├── K Means Clustering Project - Solutions-checkpoint.ipynb │ ├── K Means Clustering Project -checkpoint.ipynb │ └── K Means Clustering with Python-checkpoint.ipynb ├── College_Data ├── K Means Clustering Project - Solutions.ipynb ├── K Means Clustering Project .ipynb └── K Means Clustering with Python.ipynb ├── 16. Principal-Component-Analysis ├── .ipynb_checkpoints │ └── Principal Component Analysis-checkpoint.ipynb ├── PCA.png └── Principal Component Analysis.ipynb ├── 2. NumPy ├── NumPy Arrays.ipynb ├── Numpy Exercise - Solutions.ipynb ├── Numpy Exercise .ipynb ├── Numpy Indexing and Selection.ipynb └── Numpy Operations.ipynb ├── 3. Pandas ├── .ipynb_checkpoints │ ├── Data Input and Output-checkpoint.ipynb │ ├── DataFrames-checkpoint.ipynb │ ├── Groupby-checkpoint.ipynb │ ├── Introduction to Pandas-checkpoint.ipynb │ ├── Merging, Joining, and Concatenating -checkpoint.ipynb │ ├── Missing Data-checkpoint.ipynb │ ├── Operations-checkpoint.ipynb │ └── Series-checkpoint.ipynb ├── Data Input and Output.ipynb ├── DataFrames.ipynb ├── Excel_Sample.xlsx ├── Groupby.ipynb ├── Introduction to Pandas.ipynb ├── Merging, Joining, and Concatenating .ipynb ├── Missing Data.ipynb ├── Operations.ipynb ├── Pandas Exercises │ ├── .ipynb_checkpoints │ │ ├── Ecommerce Purchases Exercise - Solutions-checkpoint.ipynb │ │ ├── Ecommerce Purchases Exercise -checkpoint.ipynb │ │ ├── SF Salaries Exercise- Solutions-checkpoint.ipynb │ │ └── SF Salaries Exercise-checkpoint.ipynb │ ├── Ecommerce Purchases │ ├── Ecommerce Purchases Exercise - Solutions.ipynb │ ├── Ecommerce Purchases Exercise .ipynb │ ├── SF Salaries Exercise- Solutions.ipynb │ ├── SF Salaries Exercise.ipynb │ └── Salaries.csv ├── Series.ipynb ├── example └── multi_index_example ├── 4. Matplotlib ├── .ipynb_checkpoints │ ├── Advanced Matplotlib Concepts-checkpoint.ipynb │ ├── Matplotlib Concepts Lecture-checkpoint.ipynb │ ├── Matplotlib Exercises - Solutions-checkpoint.ipynb │ └── Matplotlib Exercises -checkpoint.ipynb ├── Advanced Matplotlib Concepts.ipynb ├── Matplotlib Concepts Lecture.ipynb ├── Matplotlib Exercises - Solutions.ipynb └── Matplotlib Exercises .ipynb ├── 5. Seaborn ├── Categorical Plots.ipynb ├── Distribution Plots.ipynb ├── Grids.ipynb ├── Matrix Plots.ipynb ├── Regression Plots.ipynb ├── Seaborn Exercises - Solutions.ipynb ├── Seaborn Exercises .ipynb └── Style and Color.ipynb ├── 6. Pandas Built-in Data Viz ├── .ipynb_checkpoints │ ├── Pandas Built-in Data Visualization-checkpoint.ipynb │ ├── Pandas Data Visualization Exercise - Solutions-checkpoint.ipynb │ └── Pandas Data Visualization Exercise -checkpoint.ipynb ├── Pandas Built-in Data Visualization.ipynb ├── Pandas Data Visualization Exercise - Solutions.ipynb ├── Pandas Data Visualization Exercise .ipynb ├── df1 ├── df2 └── df3 ├── 7. Geographical Plotting ├── .ipynb_checkpoints │ ├── Choropleth Maps Exercise - Solutions-checkpoint.ipynb │ ├── Choropleth Maps Exercise -checkpoint.ipynb │ └── Choropleth Maps-checkpoint.ipynb ├── 2011_US_AGRI_Exports ├── 2012_Election_Data ├── 2014_World_GDP ├── 2014_World_Power_Consumption ├── Choropleth Maps Exercise - Solutions.ipynb ├── Choropleth Maps Exercise .ipynb ├── Choropleth Maps.ipynb └── plotly_cheat_sheet.pdf ├── 8. Plotly and Cufflinks ├── .ipynb_checkpoints │ └── Plotly and Cufflinks-checkpoint.ipynb └── Plotly and Cufflinks.ipynb ├── 9. Data-Capstone-Projects ├── .ipynb_checkpoints │ ├── 911 Calls Data Capstone Project - Solutions-checkpoint.ipynb │ ├── 911 Calls Data Capstone Project -checkpoint.ipynb │ ├── Finance Project - Solutions-checkpoint.ipynb │ ├── Finance Project -checkpoint.ipynb │ └── SF Salaries Exercise- Solutions-checkpoint.ipynb ├── 911 Calls Data Capstone Project - Solutions.ipynb ├── 911 Calls Data Capstone Project .ipynb ├── 911.csv ├── Finance Project - Solutions.ipynb ├── Finance Project .ipynb └── precipitation.html └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # Windows Installer files 12 | *.cab 13 | *.msi 14 | *.msm 15 | *.msp 16 | 17 | # Windows shortcuts 18 | *.lnk 19 | 20 | # ========================= 21 | # Operating System Files 22 | # ========================= 23 | 24 | # OSX 25 | # ========================= 26 | 27 | .DS_Store 28 | .AppleDouble 29 | .LSOverride 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear in the root of a volume 35 | .DocumentRevisions-V100 36 | .fseventsd 37 | .Spotlight-V100 38 | .TemporaryItems 39 | .Trashes 40 | .VolumeIcon.icns 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | -------------------------------------------------------------------------------- /1. Python Crash Course/Python Crash Course Exercises - Solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python Crash Course Exercises - Solutions\n", 8 | "\n", 9 | "This is an optional exercise to test your understanding of Python Basics. If you find this extremely challenging, then you probably are not ready for the rest of this course yet and don't have enough programming experience to continue. I would suggest you take another course more geared towards complete beginners, such as [Complete Python Bootcamp]()" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Exercises\n", 17 | "\n", 18 | "Answer the questions or complete the tasks outlined in bold below, use the specific method described if applicable." 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "** What is 7 to the power of 4?**" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/plain": [ 38 | "2401" 39 | ] 40 | }, 41 | "execution_count": 1, 42 | "metadata": {}, 43 | "output_type": "execute_result" 44 | } 45 | ], 46 | "source": [ 47 | "7 **4" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "** Split this string:**\n", 55 | "\n", 56 | " s = \"Hi there Sam!\"\n", 57 | " \n", 58 | "**into a list. **" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": { 65 | "collapsed": true 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "s = 'Hi there Sam!'" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 3, 75 | "metadata": { 76 | "collapsed": false 77 | }, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "['Hi', 'there', 'dad!']" 83 | ] 84 | }, 85 | "execution_count": 3, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "s.split()" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "** Given the variables:**\n", 99 | "\n", 100 | " planet = \"Earth\"\n", 101 | " diameter = 12742\n", 102 | "\n", 103 | "** Use .format() to print the following string: **\n", 104 | "\n", 105 | " The diameter of Earth is 12742 kilometers." 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 5, 111 | "metadata": { 112 | "collapsed": true 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "planet = \"Earth\"\n", 117 | "diameter = 12742" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 6, 123 | "metadata": { 124 | "collapsed": false 125 | }, 126 | "outputs": [ 127 | { 128 | "name": "stdout", 129 | "output_type": "stream", 130 | "text": [ 131 | "The diameter of Earth is 12742 kilometers.\n" 132 | ] 133 | } 134 | ], 135 | "source": [ 136 | "print(\"The diameter of {} is {} kilometers.\".format(planet,diameter))" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "** Given this nested list, use indexing to grab the word \"hello\" **" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 7, 149 | "metadata": { 150 | "collapsed": true 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "lst = [1,2,[3,4],[5,[100,200,['hello']],23,11],1,7]" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 14, 160 | "metadata": { 161 | "collapsed": false 162 | }, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/plain": [ 167 | "'hello'" 168 | ] 169 | }, 170 | "execution_count": 14, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "lst[3][1][2][0]" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "** Given this nest dictionary grab the word \"hello\". Be prepared, this will be annoying/tricky **" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 16, 189 | "metadata": { 190 | "collapsed": false 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "d = {'k1':[1,2,3,{'tricky':['oh','man','inception',{'target':[1,2,3,'hello']}]}]}" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 22, 200 | "metadata": { 201 | "collapsed": false 202 | }, 203 | "outputs": [ 204 | { 205 | "data": { 206 | "text/plain": [ 207 | "'hello'" 208 | ] 209 | }, 210 | "execution_count": 22, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "d['k1'][3]['tricky'][3]['target'][3]" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "** What is the main difference between a tuple and a list? **" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 23, 229 | "metadata": { 230 | "collapsed": true 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "# Tuple is immutable" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "** Create a function that grabs the email website domain from a string in the form: **\n", 242 | "\n", 243 | " user@domain.com\n", 244 | " \n", 245 | "**So for example, passing \"user@domain.com\" would return: domain.com**" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 24, 251 | "metadata": { 252 | "collapsed": true 253 | }, 254 | "outputs": [], 255 | "source": [ 256 | "def domainGet(email):\n", 257 | " return email.split('@')[-1]" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 26, 263 | "metadata": { 264 | "collapsed": false 265 | }, 266 | "outputs": [ 267 | { 268 | "data": { 269 | "text/plain": [ 270 | "'domain.com'" 271 | ] 272 | }, 273 | "execution_count": 26, 274 | "metadata": {}, 275 | "output_type": "execute_result" 276 | } 277 | ], 278 | "source": [ 279 | "domainGet('user@domain.com')" 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "** Create a basic function that returns True if the word 'dog' is contained in the input string. Don't worry about edge cases like a punctuation being attached to the word dog, but do account for capitalization. **" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 27, 292 | "metadata": { 293 | "collapsed": true 294 | }, 295 | "outputs": [], 296 | "source": [ 297 | "def findDog(st):\n", 298 | " return 'dog' in st.lower().split()" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 28, 304 | "metadata": { 305 | "collapsed": false 306 | }, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "True" 312 | ] 313 | }, 314 | "execution_count": 28, 315 | "metadata": {}, 316 | "output_type": "execute_result" 317 | } 318 | ], 319 | "source": [ 320 | "findDog('Is there a dog here?')" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "** Create a function that counts the number of times the word \"dog\" occurs in a string. Again ignore edge cases. **" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 30, 333 | "metadata": { 334 | "collapsed": false 335 | }, 336 | "outputs": [], 337 | "source": [ 338 | "def countDog(st):\n", 339 | " count = 0\n", 340 | " for word in st.lower().split():\n", 341 | " if word == 'dog':\n", 342 | " count += 1\n", 343 | " return count" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 31, 349 | "metadata": { 350 | "collapsed": false 351 | }, 352 | "outputs": [ 353 | { 354 | "data": { 355 | "text/plain": [ 356 | "2" 357 | ] 358 | }, 359 | "execution_count": 31, 360 | "metadata": {}, 361 | "output_type": "execute_result" 362 | } 363 | ], 364 | "source": [ 365 | "countDog('This dog runs faster than the other dog dude!')" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "** Use lambda expressions and the filter() function to filter out words from a list that don't start with the letter 's'. For example:**\n", 373 | "\n", 374 | " seq = ['soup','dog','salad','cat','great']\n", 375 | "\n", 376 | "**should be filtered down to:**\n", 377 | "\n", 378 | " ['soup','salad']" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": 34, 384 | "metadata": { 385 | "collapsed": true 386 | }, 387 | "outputs": [], 388 | "source": [ 389 | "seq = ['soup','dog','salad','cat','great']" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": 35, 395 | "metadata": { 396 | "collapsed": false 397 | }, 398 | "outputs": [ 399 | { 400 | "data": { 401 | "text/plain": [ 402 | "['soup', 'salad']" 403 | ] 404 | }, 405 | "execution_count": 35, 406 | "metadata": {}, 407 | "output_type": "execute_result" 408 | } 409 | ], 410 | "source": [ 411 | "list(filter(lambda word: word[0]=='s',seq))" 412 | ] 413 | }, 414 | { 415 | "cell_type": "markdown", 416 | "metadata": {}, 417 | "source": [ 418 | "### Final Problem\n", 419 | "**You are driving a little too fast, and a police officer stops you. Write a function\n", 420 | " to return one of 3 possible results: \"No ticket\", \"Small ticket\", or \"Big Ticket\". \n", 421 | " If your speed is 60 or less, the result is \"No Ticket\". If speed is between 61 \n", 422 | " and 80 inclusive, the result is \"Small Ticket\". If speed is 81 or more, the result is \"Big Ticket\". Unless it is your birthday (encoded as a boolean value in the parameters of the function) -- on your birthday, your speed can be 5 higher in all \n", 423 | " cases. **" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 4, 429 | "metadata": { 430 | "collapsed": true 431 | }, 432 | "outputs": [], 433 | "source": [ 434 | "def caught_speeding(speed, is_birthday):\n", 435 | " \n", 436 | " if is_birthday:\n", 437 | " speeding = speed - 5\n", 438 | " else:\n", 439 | " speeding = speed\n", 440 | " \n", 441 | " if speeding > 80:\n", 442 | " return 'Big Ticket'\n", 443 | " elif speeding > 60:\n", 444 | " return 'Small Ticket'\n", 445 | " else:\n", 446 | " return 'No Ticket'" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 5, 452 | "metadata": { 453 | "collapsed": false 454 | }, 455 | "outputs": [ 456 | { 457 | "data": { 458 | "text/plain": [ 459 | "'Small Ticket'" 460 | ] 461 | }, 462 | "execution_count": 5, 463 | "metadata": {}, 464 | "output_type": "execute_result" 465 | } 466 | ], 467 | "source": [ 468 | "caught_speeding(81,True)" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": 6, 474 | "metadata": { 475 | "collapsed": false 476 | }, 477 | "outputs": [ 478 | { 479 | "data": { 480 | "text/plain": [ 481 | "'Big Ticket'" 482 | ] 483 | }, 484 | "execution_count": 6, 485 | "metadata": {}, 486 | "output_type": "execute_result" 487 | } 488 | ], 489 | "source": [ 490 | "caught_speeding(81,False)" 491 | ] 492 | }, 493 | { 494 | "cell_type": "markdown", 495 | "metadata": {}, 496 | "source": [ 497 | "# Great job!" 498 | ] 499 | } 500 | ], 501 | "metadata": { 502 | "kernelspec": { 503 | "display_name": "Python 3", 504 | "language": "python", 505 | "name": "python3" 506 | }, 507 | "language_info": { 508 | "codemirror_mode": { 509 | "name": "ipython", 510 | "version": 3 511 | }, 512 | "file_extension": ".py", 513 | "mimetype": "text/x-python", 514 | "name": "python", 515 | "nbconvert_exporter": "python", 516 | "pygments_lexer": "ipython3", 517 | "version": "3.5.1" 518 | } 519 | }, 520 | "nbformat": 4, 521 | "nbformat_minor": 0 522 | } 523 | -------------------------------------------------------------------------------- /1. Python Crash Course/Python Crash Course Exercises .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python Crash Course Exercises \n", 8 | "\n", 9 | "This is an optional exercise to test your understanding of Python Basics. If you find this extremely challenging, then you probably are not ready for the rest of this course yet and don't have enough programming experience to continue. I would suggest you take another course more geared towards complete beginners, such as [Complete Python Bootcamp](https://www.udemy.com/complete-python-bootcamp)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Exercises\n", 17 | "\n", 18 | "Answer the questions or complete the tasks outlined in bold below, use the specific method described if applicable." 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "** What is 7 to the power of 4?**" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/plain": [ 38 | "2401" 39 | ] 40 | }, 41 | "execution_count": 1, 42 | "metadata": {}, 43 | "output_type": "execute_result" 44 | } 45 | ], 46 | "source": [] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "** Split this string:**\n", 53 | "\n", 54 | " s = \"Hi there Sam!\"\n", 55 | " \n", 56 | "**into a list. **" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 4, 62 | "metadata": { 63 | "collapsed": true 64 | }, 65 | "outputs": [], 66 | "source": [] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 3, 71 | "metadata": { 72 | "collapsed": false 73 | }, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/plain": [ 78 | "['Hi', 'there', 'dad!']" 79 | ] 80 | }, 81 | "execution_count": 3, 82 | "metadata": {}, 83 | "output_type": "execute_result" 84 | } 85 | ], 86 | "source": [] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "** Given the variables:**\n", 93 | "\n", 94 | " planet = \"Earth\"\n", 95 | " diameter = 12742\n", 96 | "\n", 97 | "** Use .format() to print the following string: **\n", 98 | "\n", 99 | " The diameter of Earth is 12742 kilometers." 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 5, 105 | "metadata": { 106 | "collapsed": true 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "planet = \"Earth\"\n", 111 | "diameter = 12742" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 6, 117 | "metadata": { 118 | "collapsed": false 119 | }, 120 | "outputs": [ 121 | { 122 | "name": "stdout", 123 | "output_type": "stream", 124 | "text": [ 125 | "The diameter of Earth is 12742 kilometers.\n" 126 | ] 127 | } 128 | ], 129 | "source": [] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "** Given this nested list, use indexing to grab the word \"hello\" **" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 7, 141 | "metadata": { 142 | "collapsed": true 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "lst = [1,2,[3,4],[5,[100,200,['hello']],23,11],1,7]" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 14, 152 | "metadata": { 153 | "collapsed": false 154 | }, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "'hello'" 160 | ] 161 | }, 162 | "execution_count": 14, 163 | "metadata": {}, 164 | "output_type": "execute_result" 165 | } 166 | ], 167 | "source": [] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "** Given this nested dictionary grab the word \"hello\". Be prepared, this will be annoying/tricky **" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 16, 179 | "metadata": { 180 | "collapsed": false 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "d = {'k1':[1,2,3,{'tricky':['oh','man','inception',{'target':[1,2,3,'hello']}]}]}" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 22, 190 | "metadata": { 191 | "collapsed": false 192 | }, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "'hello'" 198 | ] 199 | }, 200 | "execution_count": 22, 201 | "metadata": {}, 202 | "output_type": "execute_result" 203 | } 204 | ], 205 | "source": [] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "** What is the main difference between a tuple and a list? **" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 23, 217 | "metadata": { 218 | "collapsed": true 219 | }, 220 | "outputs": [], 221 | "source": [ 222 | "# Tuple is immutable" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": {}, 228 | "source": [ 229 | "** Create a function that grabs the email website domain from a string in the form: **\n", 230 | "\n", 231 | " user@domain.com\n", 232 | " \n", 233 | "**So for example, passing \"user@domain.com\" would return: domain.com**" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 24, 239 | "metadata": { 240 | "collapsed": true 241 | }, 242 | "outputs": [], 243 | "source": [] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 26, 248 | "metadata": { 249 | "collapsed": false 250 | }, 251 | "outputs": [ 252 | { 253 | "data": { 254 | "text/plain": [ 255 | "'domain.com'" 256 | ] 257 | }, 258 | "execution_count": 26, 259 | "metadata": {}, 260 | "output_type": "execute_result" 261 | } 262 | ], 263 | "source": [ 264 | "domainGet('user@domain.com')" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "** Create a basic function that returns True if the word 'dog' is contained in the input string. Don't worry about edge cases like a punctuation being attached to the word dog, but do account for capitalization. **" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 27, 277 | "metadata": { 278 | "collapsed": true 279 | }, 280 | "outputs": [], 281 | "source": [] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 28, 286 | "metadata": { 287 | "collapsed": false 288 | }, 289 | "outputs": [ 290 | { 291 | "data": { 292 | "text/plain": [ 293 | "True" 294 | ] 295 | }, 296 | "execution_count": 28, 297 | "metadata": {}, 298 | "output_type": "execute_result" 299 | } 300 | ], 301 | "source": [ 302 | "findDog('Is there a dog here?')" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "** Create a function that counts the number of times the word \"dog\" occurs in a string. Again ignore edge cases. **" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 30, 315 | "metadata": { 316 | "collapsed": false 317 | }, 318 | "outputs": [], 319 | "source": [] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 31, 324 | "metadata": { 325 | "collapsed": false 326 | }, 327 | "outputs": [ 328 | { 329 | "data": { 330 | "text/plain": [ 331 | "2" 332 | ] 333 | }, 334 | "execution_count": 31, 335 | "metadata": {}, 336 | "output_type": "execute_result" 337 | } 338 | ], 339 | "source": [ 340 | "countDog('This dog runs faster than the other dog dude!')" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "** Use lambda expressions and the filter() function to filter out words from a list that don't start with the letter 's'. For example:**\n", 348 | "\n", 349 | " seq = ['soup','dog','salad','cat','great']\n", 350 | "\n", 351 | "**should be filtered down to:**\n", 352 | "\n", 353 | " ['soup','salad']" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 34, 359 | "metadata": { 360 | "collapsed": true 361 | }, 362 | "outputs": [], 363 | "source": [ 364 | "seq = ['soup','dog','salad','cat','great']" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 35, 370 | "metadata": { 371 | "collapsed": false 372 | }, 373 | "outputs": [ 374 | { 375 | "data": { 376 | "text/plain": [ 377 | "['soup', 'salad']" 378 | ] 379 | }, 380 | "execution_count": 35, 381 | "metadata": {}, 382 | "output_type": "execute_result" 383 | } 384 | ], 385 | "source": [] 386 | }, 387 | { 388 | "cell_type": "markdown", 389 | "metadata": {}, 390 | "source": [ 391 | "### Final Problem\n", 392 | "**You are driving a little too fast, and a police officer stops you. Write a function\n", 393 | " to return one of 3 possible results: \"No ticket\", \"Small ticket\", or \"Big Ticket\". \n", 394 | " If your speed is 60 or less, the result is \"No Ticket\". If speed is between 61 \n", 395 | " and 80 inclusive, the result is \"Small Ticket\". If speed is 81 or more, the result is \"Big Ticket\". Unless it is your birthday (encoded as a boolean value in the parameters of the function) -- on your birthday, your speed can be 5 higher in all \n", 396 | " cases. **" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 36, 402 | "metadata": { 403 | "collapsed": true 404 | }, 405 | "outputs": [], 406 | "source": [ 407 | "def caught_speeding(speed, is_birthday):\n", 408 | " pass" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 42, 414 | "metadata": { 415 | "collapsed": false 416 | }, 417 | "outputs": [ 418 | { 419 | "data": { 420 | "text/plain": [ 421 | "'Small Ticket'" 422 | ] 423 | }, 424 | "execution_count": 42, 425 | "metadata": {}, 426 | "output_type": "execute_result" 427 | } 428 | ], 429 | "source": [ 430 | "caught_speeding(81,True)" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 43, 436 | "metadata": { 437 | "collapsed": false 438 | }, 439 | "outputs": [ 440 | { 441 | "data": { 442 | "text/plain": [ 443 | "'Big Ticket'" 444 | ] 445 | }, 446 | "execution_count": 43, 447 | "metadata": {}, 448 | "output_type": "execute_result" 449 | } 450 | ], 451 | "source": [ 452 | "caught_speeding(81,False)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "markdown", 457 | "metadata": {}, 458 | "source": [ 459 | "# Great job!" 460 | ] 461 | } 462 | ], 463 | "metadata": { 464 | "kernelspec": { 465 | "display_name": "Python 3", 466 | "language": "python", 467 | "name": "python3" 468 | }, 469 | "language_info": { 470 | "codemirror_mode": { 471 | "name": "ipython", 472 | "version": 3 473 | }, 474 | "file_extension": ".py", 475 | "mimetype": "text/x-python", 476 | "name": "python", 477 | "nbconvert_exporter": "python", 478 | "pygments_lexer": "ipython3", 479 | "version": "3.5.1" 480 | } 481 | }, 482 | "nbformat": 4, 483 | "nbformat_minor": 0 484 | } 485 | -------------------------------------------------------------------------------- /12. K-Nearest-Neighbors/.ipynb_checkpoints/kNN_classification-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## kNN implementation for a Classification Problem\n", 8 | "The model for kNN is the entire dataset. When a prediction is required for a unseen data instance, the kNN algorithm will search through the training dataset for the k-most similar instances. The prediction attribute of the most similar instances is summarized and returned as the prediction for the unseen data.\n", 9 | "\n", 10 | "The similarity measure is dependent on the type of data. For real-valued data, the Euclidean distance can be used.\n", 11 | "Other types of data such as categorical or binary , other distance measures could be used.\n", 12 | "\n", 13 | "In the case of regression problems, the average of the predicted attribute may be returned. In the case of classification problems, the most prevalent class may be returned." 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "### Handle Data and make them to train and test set" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 81, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "import csv\n", 32 | "import random\n", 33 | "import numpy as np" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 82, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "def loadDataset(filename, split, trainingSet = [], testSet = []):\n", 45 | " with open(filename) as csvfile:\n", 46 | " lines = csv.reader(csvfile)\n", 47 | " dataset = list(lines)\n", 48 | " for x in range(len(dataset) - 1):\n", 49 | " for y in range(4):\n", 50 | " dataset[x][y] = float(dataset[x][y])\n", 51 | " if random.random() < split:\n", 52 | " trainingSet.append(dataset[x])\n", 53 | " else:\n", 54 | " testSet.append(dataset[x])" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 83, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "[[5.0, 3.6, 1.4, 0.2, 'Iris-setosa'], [5.4, 3.9, 1.7, 0.4, 'Iris-setosa'], [4.6, 3.4, 1.4, 0.3, 'Iris-setosa']]\n", 69 | "[[5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [4.7, 3.2, 1.3, 0.2, 'Iris-setosa']]\n" 70 | ] 71 | } 72 | ], 73 | "source": [ 74 | "trainingSet = []\n", 75 | "testSet = []\n", 76 | "a = loadDataset('iris.data.txt', 0.66, trainingSet, testSet)\n", 77 | "print(trainingSet[0:3])\n", 78 | "print(testSet[0:3])" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### Similarity\n", 86 | "In order to make predictions we need to calculate the similarity between any two given data instances. In this case it is the Euclidean distance which is the square root of the sum of the squared differences between the two array of numbers. " 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 84, 92 | "metadata": { 93 | "collapsed": true 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "def euclideanDistance(instance1, instance2, length): # length : length of the array you want the distance to be calculated\n", 98 | " distance = 0\n", 99 | " for x in range(length):\n", 100 | " distance += np.power((instance1[x] - instance2[x]), 2)\n", 101 | " return np.sqrt(distance)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 85, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [ 111 | { 112 | "name": "stdout", 113 | "output_type": "stream", 114 | "text": [ 115 | "3.46410161514\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "data1 = [2, 2, 2, 'a']\n", 121 | "data2 = [4, 4, 4, 'b']\n", 122 | "euc_dist = euclideanDistance(data1, data2, 3)\n", 123 | "print(euc_dist)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "### Neighbors\n", 131 | "Now that we have a similarity measure, we can use it to collect the k-most similar instances for a given unseen instance.\n", 132 | "So, in principle we are calculating the distances of all instances of the train set with the one instance of test set (unseen set) and selecting a subset with the smallest distance values." 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 86, 138 | "metadata": { 139 | "collapsed": true 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "import operator\n", 144 | "\n", 145 | "def getNeighbors(trainingSet, testInstance, k):\n", 146 | " distances = []\n", 147 | " length = len(testInstance) - 1 # removing the response column from the array\n", 148 | " for x in range(len(trainingSet)):\n", 149 | " dist = euclideanDistance(testInstance, trainingSet[x], length)\n", 150 | " distances.append((trainingSet[x], dist)) # a tuple of training set observation and the distance.\n", 151 | " distances.sort(key = operator.itemgetter(1)) # sort the tuple by the value (ascending) as the input arg is 1\n", 152 | " neighbors = []\n", 153 | " for x in range(k): # k-nearest neighbors\n", 154 | " neighbors.append(distances[x][0]) # just the train instance and not the distance\n", 155 | " return neighbors" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 87, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "[[4, 4, 4, 'b']]\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "trainSet = [[2, 2, 2, 'a'], [4, 4, 4, 'b']]\n", 175 | "testInstance = [5, 5, 5]\n", 176 | "k = 1\n", 177 | "neighbors = getNeighbors(trainSet, testInstance, 1)\n", 178 | "print(neighbors)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "### Response\n", 186 | "Once we have located the most similar neighbors for a test instance, the next task is to devise a predicted response based on these neighbors. We can do it by allowing each neighbor to vote for their class attribute and take majority vote as the prediction." 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 88, 192 | "metadata": { 193 | "collapsed": true 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "def getResponse(neighbors):\n", 198 | " classVotes = {}\n", 199 | " for x in range(len(neighbors)):\n", 200 | " response = neighbors[x][-1] # extracting the class value of the neighbors\n", 201 | " if response not in classVotes:\n", 202 | " classVotes[response] = 1\n", 203 | " else:\n", 204 | " classVotes[response] += 1\n", 205 | " sortedVotes = sorted(classVotes.items(), key = operator.itemgetter(1), reverse=True) # descending by values\n", 206 | " return sortedVotes[0][0] # 1st tuple and 1st item which is the response variable (with highest vote)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 89, 212 | "metadata": { 213 | "collapsed": false 214 | }, 215 | "outputs": [ 216 | { 217 | "name": "stdout", 218 | "output_type": "stream", 219 | "text": [ 220 | "a\n" 221 | ] 222 | } 223 | ], 224 | "source": [ 225 | "neighbors = [[2, 2, 2, 'a'], [1, 1, 1, 'a'], [3, 3, 3, 'c']]\n", 226 | "response = getResponse(neighbors)\n", 227 | "print(response)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "### Accuracy" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 95, 240 | "metadata": { 241 | "collapsed": true 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "def getAccuracy(testSet, predictions):\n", 246 | " correct = 0\n", 247 | " for x in range(len(testSet)):\n", 248 | " if testSet[x][-1] == predictions[x]:\n", 249 | " correct += 1\n", 250 | " return (correct / float(len(testSet))) * 100.00" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 96, 256 | "metadata": { 257 | "collapsed": false 258 | }, 259 | "outputs": [ 260 | { 261 | "name": "stdout", 262 | "output_type": "stream", 263 | "text": [ 264 | "66.66666666666666\n" 265 | ] 266 | } 267 | ], 268 | "source": [ 269 | "testSet = [[1, 1, 1, 'a'], [2, 2, 2, 'a'], [3, 3, 3, 'b']]\n", 270 | "predictions = ['a', 'a', 'a']\n", 271 | "accuracy = getAccuracy(testSet, predictions)\n", 272 | "print(accuracy)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 97, 278 | "metadata": { 279 | "collapsed": false 280 | }, 281 | "outputs": [ 282 | { 283 | "name": "stdout", 284 | "output_type": "stream", 285 | "text": [ 286 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 287 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 288 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 289 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 290 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 291 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 292 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 293 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 294 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 295 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 296 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 297 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 298 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 299 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 300 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 301 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 302 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 303 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 304 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 305 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 306 | "Predicted = 'Iris-virginica' Actual = 'Iris-versicolor'\n", 307 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 308 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 309 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 310 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 311 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 312 | "Predicted = 'Iris-versicolor' Actual = 'Iris-virginica'\n", 313 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 314 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 315 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 316 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 317 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 318 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 319 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 320 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 321 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 322 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 323 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 324 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 325 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 326 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 327 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 328 | "Accuracy: 95.23809523809523%\n" 329 | ] 330 | } 331 | ], 332 | "source": [ 333 | "def main():\n", 334 | " trainingSet = []\n", 335 | " testSet = []\n", 336 | " split = 0.67\n", 337 | " loadDataset('iris.data.txt', split, trainingSet, testSet)\n", 338 | " \n", 339 | " predictions = []\n", 340 | " k = 10\n", 341 | " for x in range(len(testSet)):\n", 342 | " neighbors = getNeighbors(trainingSet, testSet[x], k)\n", 343 | " result = getResponse(neighbors)\n", 344 | " predictions.append(result)\n", 345 | " print('Predicted = ' +repr(result)+ ' Actual = ' +repr(testSet[x][-1]))\n", 346 | " accuracy = getAccuracy(testSet, predictions)\n", 347 | " print('Accuracy: ' +repr(accuracy)+ '%')\n", 348 | "main()" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "metadata": { 355 | "collapsed": true 356 | }, 357 | "outputs": [], 358 | "source": [] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": { 364 | "collapsed": true 365 | }, 366 | "outputs": [], 367 | "source": [] 368 | } 369 | ], 370 | "metadata": { 371 | "kernelspec": { 372 | "display_name": "Python 3", 373 | "language": "python", 374 | "name": "python3" 375 | }, 376 | "language_info": { 377 | "codemirror_mode": { 378 | "name": "ipython", 379 | "version": 3 380 | }, 381 | "file_extension": ".py", 382 | "mimetype": "text/x-python", 383 | "name": "python", 384 | "nbconvert_exporter": "python", 385 | "pygments_lexer": "ipython3", 386 | "version": "3.5.1" 387 | } 388 | }, 389 | "nbformat": 4, 390 | "nbformat_minor": 0 391 | } 392 | -------------------------------------------------------------------------------- /12. K-Nearest-Neighbors/iris.data.txt: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,Iris-setosa 2 | 4.9,3.0,1.4,0.2,Iris-setosa 3 | 4.7,3.2,1.3,0.2,Iris-setosa 4 | 4.6,3.1,1.5,0.2,Iris-setosa 5 | 5.0,3.6,1.4,0.2,Iris-setosa 6 | 5.4,3.9,1.7,0.4,Iris-setosa 7 | 4.6,3.4,1.4,0.3,Iris-setosa 8 | 5.0,3.4,1.5,0.2,Iris-setosa 9 | 4.4,2.9,1.4,0.2,Iris-setosa 10 | 4.9,3.1,1.5,0.1,Iris-setosa 11 | 5.4,3.7,1.5,0.2,Iris-setosa 12 | 4.8,3.4,1.6,0.2,Iris-setosa 13 | 4.8,3.0,1.4,0.1,Iris-setosa 14 | 4.3,3.0,1.1,0.1,Iris-setosa 15 | 5.8,4.0,1.2,0.2,Iris-setosa 16 | 5.7,4.4,1.5,0.4,Iris-setosa 17 | 5.4,3.9,1.3,0.4,Iris-setosa 18 | 5.1,3.5,1.4,0.3,Iris-setosa 19 | 5.7,3.8,1.7,0.3,Iris-setosa 20 | 5.1,3.8,1.5,0.3,Iris-setosa 21 | 5.4,3.4,1.7,0.2,Iris-setosa 22 | 5.1,3.7,1.5,0.4,Iris-setosa 23 | 4.6,3.6,1.0,0.2,Iris-setosa 24 | 5.1,3.3,1.7,0.5,Iris-setosa 25 | 4.8,3.4,1.9,0.2,Iris-setosa 26 | 5.0,3.0,1.6,0.2,Iris-setosa 27 | 5.0,3.4,1.6,0.4,Iris-setosa 28 | 5.2,3.5,1.5,0.2,Iris-setosa 29 | 5.2,3.4,1.4,0.2,Iris-setosa 30 | 4.7,3.2,1.6,0.2,Iris-setosa 31 | 4.8,3.1,1.6,0.2,Iris-setosa 32 | 5.4,3.4,1.5,0.4,Iris-setosa 33 | 5.2,4.1,1.5,0.1,Iris-setosa 34 | 5.5,4.2,1.4,0.2,Iris-setosa 35 | 4.9,3.1,1.5,0.1,Iris-setosa 36 | 5.0,3.2,1.2,0.2,Iris-setosa 37 | 5.5,3.5,1.3,0.2,Iris-setosa 38 | 4.9,3.1,1.5,0.1,Iris-setosa 39 | 4.4,3.0,1.3,0.2,Iris-setosa 40 | 5.1,3.4,1.5,0.2,Iris-setosa 41 | 5.0,3.5,1.3,0.3,Iris-setosa 42 | 4.5,2.3,1.3,0.3,Iris-setosa 43 | 4.4,3.2,1.3,0.2,Iris-setosa 44 | 5.0,3.5,1.6,0.6,Iris-setosa 45 | 5.1,3.8,1.9,0.4,Iris-setosa 46 | 4.8,3.0,1.4,0.3,Iris-setosa 47 | 5.1,3.8,1.6,0.2,Iris-setosa 48 | 4.6,3.2,1.4,0.2,Iris-setosa 49 | 5.3,3.7,1.5,0.2,Iris-setosa 50 | 5.0,3.3,1.4,0.2,Iris-setosa 51 | 7.0,3.2,4.7,1.4,Iris-versicolor 52 | 6.4,3.2,4.5,1.5,Iris-versicolor 53 | 6.9,3.1,4.9,1.5,Iris-versicolor 54 | 5.5,2.3,4.0,1.3,Iris-versicolor 55 | 6.5,2.8,4.6,1.5,Iris-versicolor 56 | 5.7,2.8,4.5,1.3,Iris-versicolor 57 | 6.3,3.3,4.7,1.6,Iris-versicolor 58 | 4.9,2.4,3.3,1.0,Iris-versicolor 59 | 6.6,2.9,4.6,1.3,Iris-versicolor 60 | 5.2,2.7,3.9,1.4,Iris-versicolor 61 | 5.0,2.0,3.5,1.0,Iris-versicolor 62 | 5.9,3.0,4.2,1.5,Iris-versicolor 63 | 6.0,2.2,4.0,1.0,Iris-versicolor 64 | 6.1,2.9,4.7,1.4,Iris-versicolor 65 | 5.6,2.9,3.6,1.3,Iris-versicolor 66 | 6.7,3.1,4.4,1.4,Iris-versicolor 67 | 5.6,3.0,4.5,1.5,Iris-versicolor 68 | 5.8,2.7,4.1,1.0,Iris-versicolor 69 | 6.2,2.2,4.5,1.5,Iris-versicolor 70 | 5.6,2.5,3.9,1.1,Iris-versicolor 71 | 5.9,3.2,4.8,1.8,Iris-versicolor 72 | 6.1,2.8,4.0,1.3,Iris-versicolor 73 | 6.3,2.5,4.9,1.5,Iris-versicolor 74 | 6.1,2.8,4.7,1.2,Iris-versicolor 75 | 6.4,2.9,4.3,1.3,Iris-versicolor 76 | 6.6,3.0,4.4,1.4,Iris-versicolor 77 | 6.8,2.8,4.8,1.4,Iris-versicolor 78 | 6.7,3.0,5.0,1.7,Iris-versicolor 79 | 6.0,2.9,4.5,1.5,Iris-versicolor 80 | 5.7,2.6,3.5,1.0,Iris-versicolor 81 | 5.5,2.4,3.8,1.1,Iris-versicolor 82 | 5.5,2.4,3.7,1.0,Iris-versicolor 83 | 5.8,2.7,3.9,1.2,Iris-versicolor 84 | 6.0,2.7,5.1,1.6,Iris-versicolor 85 | 5.4,3.0,4.5,1.5,Iris-versicolor 86 | 6.0,3.4,4.5,1.6,Iris-versicolor 87 | 6.7,3.1,4.7,1.5,Iris-versicolor 88 | 6.3,2.3,4.4,1.3,Iris-versicolor 89 | 5.6,3.0,4.1,1.3,Iris-versicolor 90 | 5.5,2.5,4.0,1.3,Iris-versicolor 91 | 5.5,2.6,4.4,1.2,Iris-versicolor 92 | 6.1,3.0,4.6,1.4,Iris-versicolor 93 | 5.8,2.6,4.0,1.2,Iris-versicolor 94 | 5.0,2.3,3.3,1.0,Iris-versicolor 95 | 5.6,2.7,4.2,1.3,Iris-versicolor 96 | 5.7,3.0,4.2,1.2,Iris-versicolor 97 | 5.7,2.9,4.2,1.3,Iris-versicolor 98 | 6.2,2.9,4.3,1.3,Iris-versicolor 99 | 5.1,2.5,3.0,1.1,Iris-versicolor 100 | 5.7,2.8,4.1,1.3,Iris-versicolor 101 | 6.3,3.3,6.0,2.5,Iris-virginica 102 | 5.8,2.7,5.1,1.9,Iris-virginica 103 | 7.1,3.0,5.9,2.1,Iris-virginica 104 | 6.3,2.9,5.6,1.8,Iris-virginica 105 | 6.5,3.0,5.8,2.2,Iris-virginica 106 | 7.6,3.0,6.6,2.1,Iris-virginica 107 | 4.9,2.5,4.5,1.7,Iris-virginica 108 | 7.3,2.9,6.3,1.8,Iris-virginica 109 | 6.7,2.5,5.8,1.8,Iris-virginica 110 | 7.2,3.6,6.1,2.5,Iris-virginica 111 | 6.5,3.2,5.1,2.0,Iris-virginica 112 | 6.4,2.7,5.3,1.9,Iris-virginica 113 | 6.8,3.0,5.5,2.1,Iris-virginica 114 | 5.7,2.5,5.0,2.0,Iris-virginica 115 | 5.8,2.8,5.1,2.4,Iris-virginica 116 | 6.4,3.2,5.3,2.3,Iris-virginica 117 | 6.5,3.0,5.5,1.8,Iris-virginica 118 | 7.7,3.8,6.7,2.2,Iris-virginica 119 | 7.7,2.6,6.9,2.3,Iris-virginica 120 | 6.0,2.2,5.0,1.5,Iris-virginica 121 | 6.9,3.2,5.7,2.3,Iris-virginica 122 | 5.6,2.8,4.9,2.0,Iris-virginica 123 | 7.7,2.8,6.7,2.0,Iris-virginica 124 | 6.3,2.7,4.9,1.8,Iris-virginica 125 | 6.7,3.3,5.7,2.1,Iris-virginica 126 | 7.2,3.2,6.0,1.8,Iris-virginica 127 | 6.2,2.8,4.8,1.8,Iris-virginica 128 | 6.1,3.0,4.9,1.8,Iris-virginica 129 | 6.4,2.8,5.6,2.1,Iris-virginica 130 | 7.2,3.0,5.8,1.6,Iris-virginica 131 | 7.4,2.8,6.1,1.9,Iris-virginica 132 | 7.9,3.8,6.4,2.0,Iris-virginica 133 | 6.4,2.8,5.6,2.2,Iris-virginica 134 | 6.3,2.8,5.1,1.5,Iris-virginica 135 | 6.1,2.6,5.6,1.4,Iris-virginica 136 | 7.7,3.0,6.1,2.3,Iris-virginica 137 | 6.3,3.4,5.6,2.4,Iris-virginica 138 | 6.4,3.1,5.5,1.8,Iris-virginica 139 | 6.0,3.0,4.8,1.8,Iris-virginica 140 | 6.9,3.1,5.4,2.1,Iris-virginica 141 | 6.7,3.1,5.6,2.4,Iris-virginica 142 | 6.9,3.1,5.1,2.3,Iris-virginica 143 | 5.8,2.7,5.1,1.9,Iris-virginica 144 | 6.8,3.2,5.9,2.3,Iris-virginica 145 | 6.7,3.3,5.7,2.5,Iris-virginica 146 | 6.7,3.0,5.2,2.3,Iris-virginica 147 | 6.3,2.5,5.0,1.9,Iris-virginica 148 | 6.5,3.0,5.2,2.0,Iris-virginica 149 | 6.2,3.4,5.4,2.3,Iris-virginica 150 | 5.9,3.0,5.1,1.8,Iris-virginica 151 | 152 | -------------------------------------------------------------------------------- /12. K-Nearest-Neighbors/kNN_classification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## kNN implementation for a Classification Problem\n", 8 | "The model for kNN is the entire dataset. When a prediction is required for a unseen data instance, the kNN algorithm will search through the training dataset for the k-most similar instances. The prediction attribute of the most similar instances is summarized and returned as the prediction for the unseen data.\n", 9 | "\n", 10 | "The similarity measure is dependent on the type of data. For real-valued data, the Euclidean distance can be used.\n", 11 | "Other types of data such as categorical or binary , other distance measures could be used.\n", 12 | "\n", 13 | "In the case of regression problems, the average of the predicted attribute may be returned. In the case of classification problems, the most prevalent class may be returned." 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "### Handle Data and make them to train and test set" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 81, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "import csv\n", 32 | "import random\n", 33 | "import numpy as np" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 82, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "def loadDataset(filename, split, trainingSet = [], testSet = []):\n", 45 | " with open(filename) as csvfile:\n", 46 | " lines = csv.reader(csvfile)\n", 47 | " dataset = list(lines)\n", 48 | " for x in range(len(dataset) - 1):\n", 49 | " for y in range(4):\n", 50 | " dataset[x][y] = float(dataset[x][y])\n", 51 | " if random.random() < split:\n", 52 | " trainingSet.append(dataset[x])\n", 53 | " else:\n", 54 | " testSet.append(dataset[x])" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 83, 60 | "metadata": { 61 | "collapsed": false 62 | }, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "[[5.0, 3.6, 1.4, 0.2, 'Iris-setosa'], [5.4, 3.9, 1.7, 0.4, 'Iris-setosa'], [4.6, 3.4, 1.4, 0.3, 'Iris-setosa']]\n", 69 | "[[5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [4.7, 3.2, 1.3, 0.2, 'Iris-setosa']]\n" 70 | ] 71 | } 72 | ], 73 | "source": [ 74 | "trainingSet = []\n", 75 | "testSet = []\n", 76 | "a = loadDataset('iris.data.txt', 0.66, trainingSet, testSet)\n", 77 | "print(trainingSet[0:3])\n", 78 | "print(testSet[0:3])" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### Similarity\n", 86 | "In order to make predictions we need to calculate the similarity between any two given data instances. In this case it is the Euclidean distance which is the square root of the sum of the squared differences between the two array of numbers. " 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 84, 92 | "metadata": { 93 | "collapsed": true 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "def euclideanDistance(instance1, instance2, length): # length : length of the array you want the distance to be calculated\n", 98 | " distance = 0\n", 99 | " for x in range(length):\n", 100 | " distance += np.power((instance1[x] - instance2[x]), 2)\n", 101 | " return np.sqrt(distance)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 85, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [ 111 | { 112 | "name": "stdout", 113 | "output_type": "stream", 114 | "text": [ 115 | "3.46410161514\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "data1 = [2, 2, 2, 'a']\n", 121 | "data2 = [4, 4, 4, 'b']\n", 122 | "euc_dist = euclideanDistance(data1, data2, 3)\n", 123 | "print(euc_dist)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "### Neighbors\n", 131 | "Now that we have a similarity measure, we can use it to collect the k-most similar instances for a given unseen instance.\n", 132 | "So, in principle we are calculating the distances of all instances of the train set with the one instance of test set (unseen set) and selecting a subset with the smallest distance values." 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 86, 138 | "metadata": { 139 | "collapsed": true 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "import operator\n", 144 | "\n", 145 | "def getNeighbors(trainingSet, testInstance, k):\n", 146 | " distances = []\n", 147 | " length = len(testInstance) - 1 # removing the response column from the array\n", 148 | " for x in range(len(trainingSet)):\n", 149 | " dist = euclideanDistance(testInstance, trainingSet[x], length)\n", 150 | " distances.append((trainingSet[x], dist)) # a tuple of training set observation and the distance.\n", 151 | " distances.sort(key = operator.itemgetter(1)) # sort the tuple by the value (ascending) as the input arg is 1\n", 152 | " neighbors = []\n", 153 | " for x in range(k): # k-nearest neighbors\n", 154 | " neighbors.append(distances[x][0]) # just the train instance and not the distance\n", 155 | " return neighbors" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 87, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "[[4, 4, 4, 'b']]\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "trainSet = [[2, 2, 2, 'a'], [4, 4, 4, 'b']]\n", 175 | "testInstance = [5, 5, 5]\n", 176 | "k = 1\n", 177 | "neighbors = getNeighbors(trainSet, testInstance, 1)\n", 178 | "print(neighbors)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "### Response\n", 186 | "Once we have located the most similar neighbors for a test instance, the next task is to devise a predicted response based on these neighbors. We can do it by allowing each neighbor to vote for their class attribute and take majority vote as the prediction." 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 88, 192 | "metadata": { 193 | "collapsed": true 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "def getResponse(neighbors):\n", 198 | " classVotes = {}\n", 199 | " for x in range(len(neighbors)):\n", 200 | " response = neighbors[x][-1] # extracting the class value of the neighbors\n", 201 | " if response not in classVotes:\n", 202 | " classVotes[response] = 1\n", 203 | " else:\n", 204 | " classVotes[response] += 1\n", 205 | " sortedVotes = sorted(classVotes.items(), key = operator.itemgetter(1), reverse=True) # descending by values\n", 206 | " return sortedVotes[0][0] # 1st tuple and 1st item which is the response variable (with highest vote)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 89, 212 | "metadata": { 213 | "collapsed": false 214 | }, 215 | "outputs": [ 216 | { 217 | "name": "stdout", 218 | "output_type": "stream", 219 | "text": [ 220 | "a\n" 221 | ] 222 | } 223 | ], 224 | "source": [ 225 | "neighbors = [[2, 2, 2, 'a'], [1, 1, 1, 'a'], [3, 3, 3, 'c']]\n", 226 | "response = getResponse(neighbors)\n", 227 | "print(response)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "### Accuracy" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 95, 240 | "metadata": { 241 | "collapsed": true 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "def getAccuracy(testSet, predictions):\n", 246 | " correct = 0\n", 247 | " for x in range(len(testSet)):\n", 248 | " if testSet[x][-1] == predictions[x]:\n", 249 | " correct += 1\n", 250 | " return (correct / float(len(testSet))) * 100.00" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 96, 256 | "metadata": { 257 | "collapsed": false 258 | }, 259 | "outputs": [ 260 | { 261 | "name": "stdout", 262 | "output_type": "stream", 263 | "text": [ 264 | "66.66666666666666\n" 265 | ] 266 | } 267 | ], 268 | "source": [ 269 | "testSet = [[1, 1, 1, 'a'], [2, 2, 2, 'a'], [3, 3, 3, 'b']]\n", 270 | "predictions = ['a', 'a', 'a']\n", 271 | "accuracy = getAccuracy(testSet, predictions)\n", 272 | "print(accuracy)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 97, 278 | "metadata": { 279 | "collapsed": false 280 | }, 281 | "outputs": [ 282 | { 283 | "name": "stdout", 284 | "output_type": "stream", 285 | "text": [ 286 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 287 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 288 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 289 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 290 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 291 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 292 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 293 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 294 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 295 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 296 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 297 | "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n", 298 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 299 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 300 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 301 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 302 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 303 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 304 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 305 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 306 | "Predicted = 'Iris-virginica' Actual = 'Iris-versicolor'\n", 307 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 308 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 309 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 310 | "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n", 311 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 312 | "Predicted = 'Iris-versicolor' Actual = 'Iris-virginica'\n", 313 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 314 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 315 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 316 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 317 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 318 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 319 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 320 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 321 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 322 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 323 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 324 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 325 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 326 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 327 | "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n", 328 | "Accuracy: 95.23809523809523%\n" 329 | ] 330 | } 331 | ], 332 | "source": [ 333 | "def main():\n", 334 | " trainingSet = []\n", 335 | " testSet = []\n", 336 | " split = 0.67\n", 337 | " loadDataset('iris.data.txt', split, trainingSet, testSet)\n", 338 | " \n", 339 | " predictions = []\n", 340 | " k = 10\n", 341 | " for x in range(len(testSet)):\n", 342 | " neighbors = getNeighbors(trainingSet, testSet[x], k)\n", 343 | " result = getResponse(neighbors)\n", 344 | " predictions.append(result)\n", 345 | " print('Predicted = ' +repr(result)+ ' Actual = ' +repr(testSet[x][-1]))\n", 346 | " accuracy = getAccuracy(testSet, predictions)\n", 347 | " print('Accuracy: ' +repr(accuracy)+ '%')\n", 348 | "main()" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "metadata": { 355 | "collapsed": true 356 | }, 357 | "outputs": [], 358 | "source": [] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": { 364 | "collapsed": true 365 | }, 366 | "outputs": [], 367 | "source": [] 368 | } 369 | ], 370 | "metadata": { 371 | "kernelspec": { 372 | "display_name": "Python 3", 373 | "language": "python", 374 | "name": "python3" 375 | }, 376 | "language_info": { 377 | "codemirror_mode": { 378 | "name": "ipython", 379 | "version": 3 380 | }, 381 | "file_extension": ".py", 382 | "mimetype": "text/x-python", 383 | "name": "python", 384 | "nbconvert_exporter": "python", 385 | "pygments_lexer": "ipython3", 386 | "version": "3.5.1" 387 | } 388 | }, 389 | "nbformat": 4, 390 | "nbformat_minor": 0 391 | } 392 | -------------------------------------------------------------------------------- /13. Decision-Trees-and-Random-Forests/kyphosis.csv: -------------------------------------------------------------------------------- 1 | "Kyphosis","Age","Number","Start" 2 | "absent",71,3,5 3 | "absent",158,3,14 4 | "present",128,4,5 5 | "absent",2,5,1 6 | "absent",1,4,15 7 | "absent",1,2,16 8 | "absent",61,2,17 9 | "absent",37,3,16 10 | "absent",113,2,16 11 | "present",59,6,12 12 | "present",82,5,14 13 | "absent",148,3,16 14 | "absent",18,5,2 15 | "absent",1,4,12 16 | "absent",168,3,18 17 | "absent",1,3,16 18 | "absent",78,6,15 19 | "absent",175,5,13 20 | "absent",80,5,16 21 | "absent",27,4,9 22 | "absent",22,2,16 23 | "present",105,6,5 24 | "present",96,3,12 25 | "absent",131,2,3 26 | "present",15,7,2 27 | "absent",9,5,13 28 | "absent",8,3,6 29 | "absent",100,3,14 30 | "absent",4,3,16 31 | "absent",151,2,16 32 | "absent",31,3,16 33 | "absent",125,2,11 34 | "absent",130,5,13 35 | "absent",112,3,16 36 | "absent",140,5,11 37 | "absent",93,3,16 38 | "absent",1,3,9 39 | "present",52,5,6 40 | "absent",20,6,9 41 | "present",91,5,12 42 | "present",73,5,1 43 | "absent",35,3,13 44 | "absent",143,9,3 45 | "absent",61,4,1 46 | "absent",97,3,16 47 | "present",139,3,10 48 | "absent",136,4,15 49 | "absent",131,5,13 50 | "present",121,3,3 51 | "absent",177,2,14 52 | "absent",68,5,10 53 | "absent",9,2,17 54 | "present",139,10,6 55 | "absent",2,2,17 56 | "absent",140,4,15 57 | "absent",72,5,15 58 | "absent",2,3,13 59 | "present",120,5,8 60 | "absent",51,7,9 61 | "absent",102,3,13 62 | "present",130,4,1 63 | "present",114,7,8 64 | "absent",81,4,1 65 | "absent",118,3,16 66 | "absent",118,4,16 67 | "absent",17,4,10 68 | "absent",195,2,17 69 | "absent",159,4,13 70 | "absent",18,4,11 71 | "absent",15,5,16 72 | "absent",158,5,14 73 | "absent",127,4,12 74 | "absent",87,4,16 75 | "absent",206,4,10 76 | "absent",11,3,15 77 | "absent",178,4,15 78 | "present",157,3,13 79 | "absent",26,7,13 80 | "absent",120,2,13 81 | "present",42,7,6 82 | "absent",36,4,13 83 | -------------------------------------------------------------------------------- /16. Principal-Component-Analysis/PCA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuvroBaner/Python-for-Data-Science-and-Machine-Learning-Bootcamp/c0bafbafc2c37a189c70a3758f6e81888b1542ae/16. Principal-Component-Analysis/PCA.png -------------------------------------------------------------------------------- /2. NumPy/Numpy Exercise .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# NumPy Exercises \n", 18 | "\n", 19 | "Now that we've learned about NumPy let's test your knowledge. We'll start off with a few simple tasks, and then you'll be asked some more complicated questions." 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "#### Import NumPy as np" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": { 33 | "collapsed": true 34 | }, 35 | "outputs": [], 36 | "source": [] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "#### Create an array of 10 zeros " 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/plain": [ 55 | "array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])" 56 | ] 57 | }, 58 | "execution_count": 2, 59 | "metadata": {}, 60 | "output_type": "execute_result" 61 | } 62 | ], 63 | "source": [] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "#### Create an array of 10 ones" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 3, 75 | "metadata": { 76 | "collapsed": false 77 | }, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "array([ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])" 83 | ] 84 | }, 85 | "execution_count": 3, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "#### Create an array of 10 fives" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 4, 102 | "metadata": { 103 | "collapsed": false 104 | }, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/plain": [ 109 | "array([ 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.])" 110 | ] 111 | }, 112 | "execution_count": 4, 113 | "metadata": {}, 114 | "output_type": "execute_result" 115 | } 116 | ], 117 | "source": [] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "#### Create an array of the integers from 10 to 50" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 5, 129 | "metadata": { 130 | "collapsed": false 131 | }, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/plain": [ 136 | "array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n", 137 | " 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,\n", 138 | " 44, 45, 46, 47, 48, 49, 50])" 139 | ] 140 | }, 141 | "execution_count": 5, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "#### Create an array of all the even integers from 10 to 50" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 6, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [ 162 | { 163 | "data": { 164 | "text/plain": [ 165 | "array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42,\n", 166 | " 44, 46, 48, 50])" 167 | ] 168 | }, 169 | "execution_count": 6, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "#### Create a 3x3 matrix with values ranging from 0 to 8" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 7, 186 | "metadata": { 187 | "collapsed": false 188 | }, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/plain": [ 193 | "array([[0, 1, 2],\n", 194 | " [3, 4, 5],\n", 195 | " [6, 7, 8]])" 196 | ] 197 | }, 198 | "execution_count": 7, 199 | "metadata": {}, 200 | "output_type": "execute_result" 201 | } 202 | ], 203 | "source": [] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "#### Create a 3x3 identity matrix" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 8, 215 | "metadata": { 216 | "collapsed": false 217 | }, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": [ 222 | "array([[ 1., 0., 0.],\n", 223 | " [ 0., 1., 0.],\n", 224 | " [ 0., 0., 1.]])" 225 | ] 226 | }, 227 | "execution_count": 8, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "#### Use NumPy to generate a random number between 0 and 1" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 15, 244 | "metadata": { 245 | "collapsed": false 246 | }, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "array([ 0.42829726])" 252 | ] 253 | }, 254 | "execution_count": 15, 255 | "metadata": {}, 256 | "output_type": "execute_result" 257 | } 258 | ], 259 | "source": [] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "#### Use NumPy to generate an array of 25 random numbers sampled from a standard normal distribution" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 33, 271 | "metadata": { 272 | "collapsed": false 273 | }, 274 | "outputs": [ 275 | { 276 | "data": { 277 | "text/plain": [ 278 | "array([ 1.32031013, 1.6798602 , -0.42985892, -1.53116655, 0.85753232,\n", 279 | " 0.87339938, 0.35668636, -1.47491157, 0.15349697, 0.99530727,\n", 280 | " -0.94865451, -1.69174783, 1.57525349, -0.70615234, 0.10991879,\n", 281 | " -0.49478947, 1.08279872, 0.76488333, -2.3039931 , 0.35401124,\n", 282 | " -0.45454399, -0.64754649, -0.29391671, 0.02339861, 0.38272124])" 283 | ] 284 | }, 285 | "execution_count": 33, 286 | "metadata": {}, 287 | "output_type": "execute_result" 288 | } 289 | ], 290 | "source": [] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "#### Create the following matrix:" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 35, 302 | "metadata": { 303 | "collapsed": false 304 | }, 305 | "outputs": [ 306 | { 307 | "data": { 308 | "text/plain": [ 309 | "array([[ 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ],\n", 310 | " [ 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 ],\n", 311 | " [ 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 ],\n", 312 | " [ 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 ],\n", 313 | " [ 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 ],\n", 314 | " [ 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 ],\n", 315 | " [ 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7 ],\n", 316 | " [ 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8 ],\n", 317 | " [ 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9 ],\n", 318 | " [ 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1. ]])" 319 | ] 320 | }, 321 | "execution_count": 35, 322 | "metadata": {}, 323 | "output_type": "execute_result" 324 | } 325 | ], 326 | "source": [] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "#### Create an array of 20 linearly spaced points between 0 and 1:" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 36, 338 | "metadata": { 339 | "collapsed": false 340 | }, 341 | "outputs": [ 342 | { 343 | "data": { 344 | "text/plain": [ 345 | "array([ 0. , 0.05263158, 0.10526316, 0.15789474, 0.21052632,\n", 346 | " 0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,\n", 347 | " 0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,\n", 348 | " 0.78947368, 0.84210526, 0.89473684, 0.94736842, 1. ])" 349 | ] 350 | }, 351 | "execution_count": 36, 352 | "metadata": {}, 353 | "output_type": "execute_result" 354 | } 355 | ], 356 | "source": [] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "## Numpy Indexing and Selection\n", 363 | "\n", 364 | "Now you will be given a few matrices, and be asked to replicate the resulting matrix outputs:" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 38, 370 | "metadata": { 371 | "collapsed": false 372 | }, 373 | "outputs": [ 374 | { 375 | "data": { 376 | "text/plain": [ 377 | "array([[ 1, 2, 3, 4, 5],\n", 378 | " [ 6, 7, 8, 9, 10],\n", 379 | " [11, 12, 13, 14, 15],\n", 380 | " [16, 17, 18, 19, 20],\n", 381 | " [21, 22, 23, 24, 25]])" 382 | ] 383 | }, 384 | "execution_count": 38, 385 | "metadata": {}, 386 | "output_type": "execute_result" 387 | } 388 | ], 389 | "source": [ 390 | "mat = np.arange(1,26).reshape(5,5)\n", 391 | "mat" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 39, 397 | "metadata": { 398 | "collapsed": true 399 | }, 400 | "outputs": [], 401 | "source": [ 402 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 403 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 404 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 40, 410 | "metadata": { 411 | "collapsed": false 412 | }, 413 | "outputs": [ 414 | { 415 | "data": { 416 | "text/plain": [ 417 | "array([[12, 13, 14, 15],\n", 418 | " [17, 18, 19, 20],\n", 419 | " [22, 23, 24, 25]])" 420 | ] 421 | }, 422 | "execution_count": 40, 423 | "metadata": {}, 424 | "output_type": "execute_result" 425 | } 426 | ], 427 | "source": [] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": 29, 432 | "metadata": { 433 | "collapsed": true 434 | }, 435 | "outputs": [], 436 | "source": [ 437 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 438 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 439 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": 41, 445 | "metadata": { 446 | "collapsed": false 447 | }, 448 | "outputs": [ 449 | { 450 | "data": { 451 | "text/plain": [ 452 | "20" 453 | ] 454 | }, 455 | "execution_count": 41, 456 | "metadata": {}, 457 | "output_type": "execute_result" 458 | } 459 | ], 460 | "source": [] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": 30, 465 | "metadata": { 466 | "collapsed": true 467 | }, 468 | "outputs": [], 469 | "source": [ 470 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 471 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 472 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": 42, 478 | "metadata": { 479 | "collapsed": false 480 | }, 481 | "outputs": [ 482 | { 483 | "data": { 484 | "text/plain": [ 485 | "array([[ 2],\n", 486 | " [ 7],\n", 487 | " [12]])" 488 | ] 489 | }, 490 | "execution_count": 42, 491 | "metadata": {}, 492 | "output_type": "execute_result" 493 | } 494 | ], 495 | "source": [] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 31, 500 | "metadata": { 501 | "collapsed": true 502 | }, 503 | "outputs": [], 504 | "source": [ 505 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 506 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 507 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": 46, 513 | "metadata": { 514 | "collapsed": false 515 | }, 516 | "outputs": [ 517 | { 518 | "data": { 519 | "text/plain": [ 520 | "array([21, 22, 23, 24, 25])" 521 | ] 522 | }, 523 | "execution_count": 46, 524 | "metadata": {}, 525 | "output_type": "execute_result" 526 | } 527 | ], 528 | "source": [] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": 32, 533 | "metadata": { 534 | "collapsed": true 535 | }, 536 | "outputs": [], 537 | "source": [ 538 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 539 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 540 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": 49, 546 | "metadata": { 547 | "collapsed": false 548 | }, 549 | "outputs": [ 550 | { 551 | "data": { 552 | "text/plain": [ 553 | "array([[16, 17, 18, 19, 20],\n", 554 | " [21, 22, 23, 24, 25]])" 555 | ] 556 | }, 557 | "execution_count": 49, 558 | "metadata": {}, 559 | "output_type": "execute_result" 560 | } 561 | ], 562 | "source": [] 563 | }, 564 | { 565 | "cell_type": "markdown", 566 | "metadata": {}, 567 | "source": [ 568 | "### Now do the following" 569 | ] 570 | }, 571 | { 572 | "cell_type": "markdown", 573 | "metadata": {}, 574 | "source": [ 575 | "#### Get the sum of all the values in mat" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": 50, 581 | "metadata": { 582 | "collapsed": false 583 | }, 584 | "outputs": [ 585 | { 586 | "data": { 587 | "text/plain": [ 588 | "325" 589 | ] 590 | }, 591 | "execution_count": 50, 592 | "metadata": {}, 593 | "output_type": "execute_result" 594 | } 595 | ], 596 | "source": [] 597 | }, 598 | { 599 | "cell_type": "markdown", 600 | "metadata": {}, 601 | "source": [ 602 | "#### Get the standard deviation of the values in mat" 603 | ] 604 | }, 605 | { 606 | "cell_type": "code", 607 | "execution_count": 51, 608 | "metadata": { 609 | "collapsed": false 610 | }, 611 | "outputs": [ 612 | { 613 | "data": { 614 | "text/plain": [ 615 | "7.2111025509279782" 616 | ] 617 | }, 618 | "execution_count": 51, 619 | "metadata": {}, 620 | "output_type": "execute_result" 621 | } 622 | ], 623 | "source": [] 624 | }, 625 | { 626 | "cell_type": "markdown", 627 | "metadata": {}, 628 | "source": [ 629 | "#### Get the sum of all the columns in mat" 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": 53, 635 | "metadata": { 636 | "collapsed": false 637 | }, 638 | "outputs": [ 639 | { 640 | "data": { 641 | "text/plain": [ 642 | "array([55, 60, 65, 70, 75])" 643 | ] 644 | }, 645 | "execution_count": 53, 646 | "metadata": {}, 647 | "output_type": "execute_result" 648 | } 649 | ], 650 | "source": [] 651 | }, 652 | { 653 | "cell_type": "markdown", 654 | "metadata": { 655 | "collapsed": true 656 | }, 657 | "source": [ 658 | "# Great Job!" 659 | ] 660 | } 661 | ], 662 | "metadata": { 663 | "kernelspec": { 664 | "display_name": "Python 3", 665 | "language": "python", 666 | "name": "python3" 667 | }, 668 | "language_info": { 669 | "codemirror_mode": { 670 | "name": "ipython", 671 | "version": 3 672 | }, 673 | "file_extension": ".py", 674 | "mimetype": "text/x-python", 675 | "name": "python", 676 | "nbconvert_exporter": "python", 677 | "pygments_lexer": "ipython3", 678 | "version": "3.5.1" 679 | } 680 | }, 681 | "nbformat": 4, 682 | "nbformat_minor": 0 683 | } 684 | -------------------------------------------------------------------------------- /2. NumPy/Numpy Operations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "source": [ 19 | "# NumPy Operations" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## Arithmetic\n", 27 | "\n", 28 | "You can easily perform array with array arithmetic, or scalar with array arithmetic. Let's see some examples:" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 1, 34 | "metadata": { 35 | "collapsed": true 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "import numpy as np\n", 40 | "arr = np.arange(0,10)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/plain": [ 53 | "array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])" 54 | ] 55 | }, 56 | "execution_count": 2, 57 | "metadata": {}, 58 | "output_type": "execute_result" 59 | } 60 | ], 61 | "source": [ 62 | "arr + arr" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 3, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [ 72 | { 73 | "data": { 74 | "text/plain": [ 75 | "array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81])" 76 | ] 77 | }, 78 | "execution_count": 3, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | } 82 | ], 83 | "source": [ 84 | "arr * arr" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 4, 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/plain": [ 97 | "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" 98 | ] 99 | }, 100 | "execution_count": 4, 101 | "metadata": {}, 102 | "output_type": "execute_result" 103 | } 104 | ], 105 | "source": [ 106 | "arr - arr" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 5, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [ 116 | { 117 | "name": "stderr", 118 | "output_type": "stream", 119 | "text": [ 120 | "/Users/marci/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: invalid value encountered in true_divide\n", 121 | " if __name__ == '__main__':\n" 122 | ] 123 | }, 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "array([ nan, 1., 1., 1., 1., 1., 1., 1., 1., 1.])" 128 | ] 129 | }, 130 | "execution_count": 5, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "# Warning on division by zero, but not an error!\n", 137 | "# Just replaced with nan\n", 138 | "arr/arr" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 6, 144 | "metadata": { 145 | "collapsed": false 146 | }, 147 | "outputs": [ 148 | { 149 | "name": "stderr", 150 | "output_type": "stream", 151 | "text": [ 152 | "/Users/marci/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: divide by zero encountered in true_divide\n", 153 | " if __name__ == '__main__':\n" 154 | ] 155 | }, 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "array([ inf, 1. , 0.5 , 0.33333333, 0.25 ,\n", 160 | " 0.2 , 0.16666667, 0.14285714, 0.125 , 0.11111111])" 161 | ] 162 | }, 163 | "execution_count": 6, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "# Also warning, but not an error instead infinity\n", 170 | "1/arr" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 10, 176 | "metadata": { 177 | "collapsed": false 178 | }, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/plain": [ 183 | "array([ 0, 1, 8, 27, 64, 125, 216, 343, 512, 729])" 184 | ] 185 | }, 186 | "execution_count": 10, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "arr**3" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "## Universal Array Functions\n", 200 | "\n", 201 | "Numpy comes with many [universal array functions](http://docs.scipy.org/doc/numpy/reference/ufuncs.html), which are essentially just mathematical operations you can use to perform the operation across the array. Let's show some common ones:" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 12, 207 | "metadata": { 208 | "collapsed": false 209 | }, 210 | "outputs": [ 211 | { 212 | "data": { 213 | "text/plain": [ 214 | "array([ 0. , 1. , 1.41421356, 1.73205081, 2. ,\n", 215 | " 2.23606798, 2.44948974, 2.64575131, 2.82842712, 3. ])" 216 | ] 217 | }, 218 | "execution_count": 12, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": [ 224 | "#Taking Square Roots\n", 225 | "np.sqrt(arr)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 13, 231 | "metadata": { 232 | "collapsed": false 233 | }, 234 | "outputs": [ 235 | { 236 | "data": { 237 | "text/plain": [ 238 | "array([ 1.00000000e+00, 2.71828183e+00, 7.38905610e+00,\n", 239 | " 2.00855369e+01, 5.45981500e+01, 1.48413159e+02,\n", 240 | " 4.03428793e+02, 1.09663316e+03, 2.98095799e+03,\n", 241 | " 8.10308393e+03])" 242 | ] 243 | }, 244 | "execution_count": 13, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "#Calcualting exponential (e^)\n", 251 | "np.exp(arr)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 14, 257 | "metadata": { 258 | "collapsed": false 259 | }, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/plain": [ 264 | "9" 265 | ] 266 | }, 267 | "execution_count": 14, 268 | "metadata": {}, 269 | "output_type": "execute_result" 270 | } 271 | ], 272 | "source": [ 273 | "np.max(arr) #same as arr.max()" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 15, 279 | "metadata": { 280 | "collapsed": false 281 | }, 282 | "outputs": [ 283 | { 284 | "data": { 285 | "text/plain": [ 286 | "array([ 0. , 0.84147098, 0.90929743, 0.14112001, -0.7568025 ,\n", 287 | " -0.95892427, -0.2794155 , 0.6569866 , 0.98935825, 0.41211849])" 288 | ] 289 | }, 290 | "execution_count": 15, 291 | "metadata": {}, 292 | "output_type": "execute_result" 293 | } 294 | ], 295 | "source": [ 296 | "np.sin(arr)" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 16, 302 | "metadata": { 303 | "collapsed": false 304 | }, 305 | "outputs": [ 306 | { 307 | "name": "stderr", 308 | "output_type": "stream", 309 | "text": [ 310 | "/Users/marci/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: divide by zero encountered in log\n", 311 | " if __name__ == '__main__':\n" 312 | ] 313 | }, 314 | { 315 | "data": { 316 | "text/plain": [ 317 | "array([ -inf, 0. , 0.69314718, 1.09861229, 1.38629436,\n", 318 | " 1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458])" 319 | ] 320 | }, 321 | "execution_count": 16, 322 | "metadata": {}, 323 | "output_type": "execute_result" 324 | } 325 | ], 326 | "source": [ 327 | "np.log(arr)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": [ 334 | "# Great Job!\n", 335 | "\n", 336 | "That's all we need to know for now!" 337 | ] 338 | } 339 | ], 340 | "metadata": { 341 | "kernelspec": { 342 | "display_name": "Python 3", 343 | "language": "python", 344 | "name": "python3" 345 | }, 346 | "language_info": { 347 | "codemirror_mode": { 348 | "name": "ipython", 349 | "version": 3 350 | }, 351 | "file_extension": ".py", 352 | "mimetype": "text/x-python", 353 | "name": "python", 354 | "nbconvert_exporter": "python", 355 | "pygments_lexer": "ipython3", 356 | "version": "3.5.1" 357 | } 358 | }, 359 | "nbformat": 4, 360 | "nbformat_minor": 0 361 | } 362 | -------------------------------------------------------------------------------- /3. Pandas/.ipynb_checkpoints/Introduction to Pandas-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "source": [ 19 | "# Introduction to Pandas\n", 20 | "\n", 21 | "In this section of the course we will learn how to use pandas for data analysis. You can think of pandas as an extremely powerful version of Excel, with a lot more features. In this section of the course, you should go through the notebooks in this order:\n", 22 | "\n", 23 | "* Introduction to Pandas\n", 24 | "* Series\n", 25 | "* DataFrames\n", 26 | "* Missing Data\n", 27 | "* GroupBy\n", 28 | "* Merging,Joining,and Concatenating\n", 29 | "* Operations\n", 30 | "* Data Input and Output" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "___" 38 | ] 39 | } 40 | ], 41 | "metadata": { 42 | "kernelspec": { 43 | "display_name": "Python 3", 44 | "language": "python", 45 | "name": "python3" 46 | }, 47 | "language_info": { 48 | "codemirror_mode": { 49 | "name": "ipython", 50 | "version": 3 51 | }, 52 | "file_extension": ".py", 53 | "mimetype": "text/x-python", 54 | "name": "python", 55 | "nbconvert_exporter": "python", 56 | "pygments_lexer": "ipython3", 57 | "version": "3.5.1" 58 | } 59 | }, 60 | "nbformat": 4, 61 | "nbformat_minor": 0 62 | } 63 | -------------------------------------------------------------------------------- /3. Pandas/.ipynb_checkpoints/Missing Data-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Missing Data\n", 18 | "\n", 19 | "Let's show a few convenient methods to deal with Missing Data in pandas:" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import numpy as np\n", 31 | "import pandas as pd" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 9, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "df = pd.DataFrame({'A':[1,2,np.nan],\n", 43 | " 'B':[5,np.nan,np.nan],\n", 44 | " 'C':[1,2,3]})" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 10, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/html": [ 57 | "
\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | "
ABC
01.05.01
12.0NaN2
2NaNNaN3
\n", 88 | "
" 89 | ], 90 | "text/plain": [ 91 | " A B C\n", 92 | "0 1.0 5.0 1\n", 93 | "1 2.0 NaN 2\n", 94 | "2 NaN NaN 3" 95 | ] 96 | }, 97 | "execution_count": 10, 98 | "metadata": {}, 99 | "output_type": "execute_result" 100 | } 101 | ], 102 | "source": [ 103 | "df" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 12, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/html": [ 116 | "
\n", 117 | "\n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | "
ABC
01.05.01
\n", 135 | "
" 136 | ], 137 | "text/plain": [ 138 | " A B C\n", 139 | "0 1.0 5.0 1" 140 | ] 141 | }, 142 | "execution_count": 12, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "df.dropna()" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 13, 154 | "metadata": { 155 | "collapsed": false 156 | }, 157 | "outputs": [ 158 | { 159 | "data": { 160 | "text/html": [ 161 | "
\n", 162 | "\n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | "
C
01
12
23
\n", 184 | "
" 185 | ], 186 | "text/plain": [ 187 | " C\n", 188 | "0 1\n", 189 | "1 2\n", 190 | "2 3" 191 | ] 192 | }, 193 | "execution_count": 13, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "df.dropna(axis=1)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 14, 205 | "metadata": { 206 | "collapsed": false 207 | }, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/html": [ 212 | "
\n", 213 | "\n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | "
ABC
01.05.01
12.0NaN2
\n", 237 | "
" 238 | ], 239 | "text/plain": [ 240 | " A B C\n", 241 | "0 1.0 5.0 1\n", 242 | "1 2.0 NaN 2" 243 | ] 244 | }, 245 | "execution_count": 14, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "df.dropna(thresh=2)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 15, 257 | "metadata": { 258 | "collapsed": false 259 | }, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/html": [ 264 | "
\n", 265 | "\n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | "
ABC
0151
12FILL VALUE2
2FILL VALUEFILL VALUE3
\n", 295 | "
" 296 | ], 297 | "text/plain": [ 298 | " A B C\n", 299 | "0 1 5 1\n", 300 | "1 2 FILL VALUE 2\n", 301 | "2 FILL VALUE FILL VALUE 3" 302 | ] 303 | }, 304 | "execution_count": 15, 305 | "metadata": {}, 306 | "output_type": "execute_result" 307 | } 308 | ], 309 | "source": [ 310 | "df.fillna(value='FILL VALUE')" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 17, 316 | "metadata": { 317 | "collapsed": false 318 | }, 319 | "outputs": [ 320 | { 321 | "data": { 322 | "text/plain": [ 323 | "0 1.0\n", 324 | "1 2.0\n", 325 | "2 1.5\n", 326 | "Name: A, dtype: float64" 327 | ] 328 | }, 329 | "execution_count": 17, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "df['A'].fillna(value=df['A'].mean())" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "# Great Job!" 343 | ] 344 | } 345 | ], 346 | "metadata": { 347 | "kernelspec": { 348 | "display_name": "Python 3", 349 | "language": "python", 350 | "name": "python3" 351 | }, 352 | "language_info": { 353 | "codemirror_mode": { 354 | "name": "ipython", 355 | "version": 3 356 | }, 357 | "file_extension": ".py", 358 | "mimetype": "text/x-python", 359 | "name": "python", 360 | "nbconvert_exporter": "python", 361 | "pygments_lexer": "ipython3", 362 | "version": "3.5.1" 363 | } 364 | }, 365 | "nbformat": 4, 366 | "nbformat_minor": 0 367 | } 368 | -------------------------------------------------------------------------------- /3. Pandas/.ipynb_checkpoints/Series-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___\n", 11 | "# Series" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "The first main data type we will learn about for pandas is the Series data type. Let's import Pandas and explore the Series object.\n", 19 | "\n", 20 | "A Series is very similar to a NumPy array (in fact it is built on top of the NumPy array object). What differentiates the NumPy array from a Series, is that a Series can have axis labels, meaning it can be indexed by a label, instead of just a number location. It also doesn't need to hold numeric data, it can hold any arbitrary Python Object.\n", 21 | "\n", 22 | "Let's explore this concept through some examples:" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "import numpy as np\n", 34 | "import pandas as pd" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "### Creating a Series\n", 42 | "\n", 43 | "You can convert a list,numpy array, or dictionary to a Series:" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": { 50 | "collapsed": true 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "labels = ['a','b','c']\n", 55 | "my_list = [10,20,30]\n", 56 | "arr = np.array([10,20,30])\n", 57 | "d = {'a':10,'b':20,'c':30}" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "** Using Lists**" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 4, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/plain": [ 77 | "0 10\n", 78 | "1 20\n", 79 | "2 30\n", 80 | "dtype: int64" 81 | ] 82 | }, 83 | "execution_count": 4, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "pd.Series(data=my_list)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 5, 95 | "metadata": { 96 | "collapsed": false 97 | }, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "a 10\n", 103 | "b 20\n", 104 | "c 30\n", 105 | "dtype: int64" 106 | ] 107 | }, 108 | "execution_count": 5, 109 | "metadata": {}, 110 | "output_type": "execute_result" 111 | } 112 | ], 113 | "source": [ 114 | "pd.Series(data=my_list,index=labels)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 6, 120 | "metadata": { 121 | "collapsed": false 122 | }, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "a 10\n", 128 | "b 20\n", 129 | "c 30\n", 130 | "dtype: int64" 131 | ] 132 | }, 133 | "execution_count": 6, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "pd.Series(my_list,labels)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "** NumPy Arrays **" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 7, 152 | "metadata": { 153 | "collapsed": false 154 | }, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "0 10\n", 160 | "1 20\n", 161 | "2 30\n", 162 | "dtype: int64" 163 | ] 164 | }, 165 | "execution_count": 7, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": [ 171 | "pd.Series(arr)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 8, 177 | "metadata": { 178 | "collapsed": false 179 | }, 180 | "outputs": [ 181 | { 182 | "data": { 183 | "text/plain": [ 184 | "a 10\n", 185 | "b 20\n", 186 | "c 30\n", 187 | "dtype: int64" 188 | ] 189 | }, 190 | "execution_count": 8, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | } 194 | ], 195 | "source": [ 196 | "pd.Series(arr,labels)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "** Dictionary**" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 9, 209 | "metadata": { 210 | "collapsed": false 211 | }, 212 | "outputs": [ 213 | { 214 | "data": { 215 | "text/plain": [ 216 | "a 10\n", 217 | "b 20\n", 218 | "c 30\n", 219 | "dtype: int64" 220 | ] 221 | }, 222 | "execution_count": 9, 223 | "metadata": {}, 224 | "output_type": "execute_result" 225 | } 226 | ], 227 | "source": [ 228 | "pd.Series(d)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "### Data in a Series\n", 236 | "\n", 237 | "A pandas Series can hold a variety of object types:" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 10, 243 | "metadata": { 244 | "collapsed": false 245 | }, 246 | "outputs": [ 247 | { 248 | "data": { 249 | "text/plain": [ 250 | "0 a\n", 251 | "1 b\n", 252 | "2 c\n", 253 | "dtype: object" 254 | ] 255 | }, 256 | "execution_count": 10, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "pd.Series(data=labels)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 11, 268 | "metadata": { 269 | "collapsed": false 270 | }, 271 | "outputs": [ 272 | { 273 | "data": { 274 | "text/plain": [ 275 | "0 \n", 276 | "1 \n", 277 | "2 \n", 278 | "dtype: object" 279 | ] 280 | }, 281 | "execution_count": 11, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "# Even functions (although unlikely that you will use this)\n", 288 | "pd.Series([sum,print,len])" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "## Using an Index\n", 296 | "\n", 297 | "The key to using a Series is understanding its index. Pandas makes use of these index names or numbers by allowing for fast look ups of information (works like a hash table or dictionary).\n", 298 | "\n", 299 | "Let's see some examples of how to grab information from a Series. Let us create two sereis, ser1 and ser2:" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 12, 305 | "metadata": { 306 | "collapsed": false 307 | }, 308 | "outputs": [], 309 | "source": [ 310 | "ser1 = pd.Series([1,2,3,4],index = ['USA', 'Germany','USSR', 'Japan']) " 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 13, 316 | "metadata": { 317 | "collapsed": false 318 | }, 319 | "outputs": [ 320 | { 321 | "data": { 322 | "text/plain": [ 323 | "USA 1\n", 324 | "Germany 2\n", 325 | "USSR 3\n", 326 | "Japan 4\n", 327 | "dtype: int64" 328 | ] 329 | }, 330 | "execution_count": 13, 331 | "metadata": {}, 332 | "output_type": "execute_result" 333 | } 334 | ], 335 | "source": [ 336 | "ser1" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 14, 342 | "metadata": { 343 | "collapsed": true 344 | }, 345 | "outputs": [], 346 | "source": [ 347 | "ser2 = pd.Series([1,2,5,4],index = ['USA', 'Germany','Italy', 'Japan']) " 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 15, 353 | "metadata": { 354 | "collapsed": false 355 | }, 356 | "outputs": [ 357 | { 358 | "data": { 359 | "text/plain": [ 360 | "USA 1\n", 361 | "Germany 2\n", 362 | "Italy 5\n", 363 | "Japan 4\n", 364 | "dtype: int64" 365 | ] 366 | }, 367 | "execution_count": 15, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "ser2" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 16, 379 | "metadata": { 380 | "collapsed": false 381 | }, 382 | "outputs": [ 383 | { 384 | "data": { 385 | "text/plain": [ 386 | "1" 387 | ] 388 | }, 389 | "execution_count": 16, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "ser1['USA']" 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "metadata": { 401 | "collapsed": false 402 | }, 403 | "source": [ 404 | "Operations are then also done based off of index:" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 17, 410 | "metadata": { 411 | "collapsed": false 412 | }, 413 | "outputs": [ 414 | { 415 | "data": { 416 | "text/plain": [ 417 | "Germany 4.0\n", 418 | "Italy NaN\n", 419 | "Japan 8.0\n", 420 | "USA 2.0\n", 421 | "USSR NaN\n", 422 | "dtype: float64" 423 | ] 424 | }, 425 | "execution_count": 17, 426 | "metadata": {}, 427 | "output_type": "execute_result" 428 | } 429 | ], 430 | "source": [ 431 | "ser1 + ser2" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": {}, 437 | "source": [ 438 | "Let's stop here for now and move on to DataFrames, which will expand on the concept of Series!\n", 439 | "# Great Job!" 440 | ] 441 | } 442 | ], 443 | "metadata": { 444 | "kernelspec": { 445 | "display_name": "Python 3", 446 | "language": "python", 447 | "name": "python3" 448 | }, 449 | "language_info": { 450 | "codemirror_mode": { 451 | "name": "ipython", 452 | "version": 3 453 | }, 454 | "file_extension": ".py", 455 | "mimetype": "text/x-python", 456 | "name": "python", 457 | "nbconvert_exporter": "python", 458 | "pygments_lexer": "ipython3", 459 | "version": "3.5.1" 460 | } 461 | }, 462 | "nbformat": 4, 463 | "nbformat_minor": 0 464 | } 465 | -------------------------------------------------------------------------------- /3. Pandas/Excel_Sample.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuvroBaner/Python-for-Data-Science-and-Machine-Learning-Bootcamp/c0bafbafc2c37a189c70a3758f6e81888b1542ae/3. Pandas/Excel_Sample.xlsx -------------------------------------------------------------------------------- /3. Pandas/Introduction to Pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "source": [ 19 | "# Introduction to Pandas\n", 20 | "\n", 21 | "In this section of the course we will learn how to use pandas for data analysis. You can think of pandas as an extremely powerful version of Excel, with a lot more features. In this section of the course, you should go through the notebooks in this order:\n", 22 | "\n", 23 | "* Introduction to Pandas\n", 24 | "* Series\n", 25 | "* DataFrames\n", 26 | "* Missing Data\n", 27 | "* GroupBy\n", 28 | "* Merging,Joining,and Concatenating\n", 29 | "* Operations\n", 30 | "* Data Input and Output" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "___" 38 | ] 39 | } 40 | ], 41 | "metadata": { 42 | "kernelspec": { 43 | "display_name": "Python 3", 44 | "language": "python", 45 | "name": "python3" 46 | }, 47 | "language_info": { 48 | "codemirror_mode": { 49 | "name": "ipython", 50 | "version": 3 51 | }, 52 | "file_extension": ".py", 53 | "mimetype": "text/x-python", 54 | "name": "python", 55 | "nbconvert_exporter": "python", 56 | "pygments_lexer": "ipython3", 57 | "version": "3.5.1" 58 | } 59 | }, 60 | "nbformat": 4, 61 | "nbformat_minor": 0 62 | } 63 | -------------------------------------------------------------------------------- /3. Pandas/Missing Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Missing Data\n", 18 | "\n", 19 | "Let's show a few convenient methods to deal with Missing Data in pandas:" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import numpy as np\n", 31 | "import pandas as pd" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 9, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "df = pd.DataFrame({'A':[1,2,np.nan],\n", 43 | " 'B':[5,np.nan,np.nan],\n", 44 | " 'C':[1,2,3]})" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 10, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/html": [ 57 | "
\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | "
ABC
01.05.01
12.0NaN2
2NaNNaN3
\n", 88 | "
" 89 | ], 90 | "text/plain": [ 91 | " A B C\n", 92 | "0 1.0 5.0 1\n", 93 | "1 2.0 NaN 2\n", 94 | "2 NaN NaN 3" 95 | ] 96 | }, 97 | "execution_count": 10, 98 | "metadata": {}, 99 | "output_type": "execute_result" 100 | } 101 | ], 102 | "source": [ 103 | "df" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 12, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/html": [ 116 | "
\n", 117 | "\n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | "
ABC
01.05.01
\n", 135 | "
" 136 | ], 137 | "text/plain": [ 138 | " A B C\n", 139 | "0 1.0 5.0 1" 140 | ] 141 | }, 142 | "execution_count": 12, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "df.dropna()" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 13, 154 | "metadata": { 155 | "collapsed": false 156 | }, 157 | "outputs": [ 158 | { 159 | "data": { 160 | "text/html": [ 161 | "
\n", 162 | "\n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | "
C
01
12
23
\n", 184 | "
" 185 | ], 186 | "text/plain": [ 187 | " C\n", 188 | "0 1\n", 189 | "1 2\n", 190 | "2 3" 191 | ] 192 | }, 193 | "execution_count": 13, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "df.dropna(axis=1)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 14, 205 | "metadata": { 206 | "collapsed": false 207 | }, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/html": [ 212 | "
\n", 213 | "\n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | "
ABC
01.05.01
12.0NaN2
\n", 237 | "
" 238 | ], 239 | "text/plain": [ 240 | " A B C\n", 241 | "0 1.0 5.0 1\n", 242 | "1 2.0 NaN 2" 243 | ] 244 | }, 245 | "execution_count": 14, 246 | "metadata": {}, 247 | "output_type": "execute_result" 248 | } 249 | ], 250 | "source": [ 251 | "df.dropna(thresh=2)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 15, 257 | "metadata": { 258 | "collapsed": false 259 | }, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/html": [ 264 | "
\n", 265 | "\n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | "
ABC
0151
12FILL VALUE2
2FILL VALUEFILL VALUE3
\n", 295 | "
" 296 | ], 297 | "text/plain": [ 298 | " A B C\n", 299 | "0 1 5 1\n", 300 | "1 2 FILL VALUE 2\n", 301 | "2 FILL VALUE FILL VALUE 3" 302 | ] 303 | }, 304 | "execution_count": 15, 305 | "metadata": {}, 306 | "output_type": "execute_result" 307 | } 308 | ], 309 | "source": [ 310 | "df.fillna(value='FILL VALUE')" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 17, 316 | "metadata": { 317 | "collapsed": false 318 | }, 319 | "outputs": [ 320 | { 321 | "data": { 322 | "text/plain": [ 323 | "0 1.0\n", 324 | "1 2.0\n", 325 | "2 1.5\n", 326 | "Name: A, dtype: float64" 327 | ] 328 | }, 329 | "execution_count": 17, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "df['A'].fillna(value=df['A'].mean())" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "# Great Job!" 343 | ] 344 | } 345 | ], 346 | "metadata": { 347 | "kernelspec": { 348 | "display_name": "Python 3", 349 | "language": "python", 350 | "name": "python3" 351 | }, 352 | "language_info": { 353 | "codemirror_mode": { 354 | "name": "ipython", 355 | "version": 3 356 | }, 357 | "file_extension": ".py", 358 | "mimetype": "text/x-python", 359 | "name": "python", 360 | "nbconvert_exporter": "python", 361 | "pygments_lexer": "ipython3", 362 | "version": "3.5.1" 363 | } 364 | }, 365 | "nbformat": 4, 366 | "nbformat_minor": 0 367 | } 368 | -------------------------------------------------------------------------------- /3. Pandas/Series.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___\n", 11 | "# Series" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "The first main data type we will learn about for pandas is the Series data type. Let's import Pandas and explore the Series object.\n", 19 | "\n", 20 | "A Series is very similar to a NumPy array (in fact it is built on top of the NumPy array object). What differentiates the NumPy array from a Series, is that a Series can have axis labels, meaning it can be indexed by a label, instead of just a number location. It also doesn't need to hold numeric data, it can hold any arbitrary Python Object.\n", 21 | "\n", 22 | "Let's explore this concept through some examples:" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "import numpy as np\n", 34 | "import pandas as pd" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "### Creating a Series\n", 42 | "\n", 43 | "You can convert a list,numpy array, or dictionary to a Series:" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": { 50 | "collapsed": true 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "labels = ['a','b','c']\n", 55 | "my_list = [10,20,30]\n", 56 | "arr = np.array([10,20,30])\n", 57 | "d = {'a':10,'b':20,'c':30}" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "** Using Lists**" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 4, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/plain": [ 77 | "0 10\n", 78 | "1 20\n", 79 | "2 30\n", 80 | "dtype: int64" 81 | ] 82 | }, 83 | "execution_count": 4, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "pd.Series(data=my_list)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 5, 95 | "metadata": { 96 | "collapsed": false 97 | }, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "a 10\n", 103 | "b 20\n", 104 | "c 30\n", 105 | "dtype: int64" 106 | ] 107 | }, 108 | "execution_count": 5, 109 | "metadata": {}, 110 | "output_type": "execute_result" 111 | } 112 | ], 113 | "source": [ 114 | "pd.Series(data=my_list,index=labels)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 6, 120 | "metadata": { 121 | "collapsed": false 122 | }, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "a 10\n", 128 | "b 20\n", 129 | "c 30\n", 130 | "dtype: int64" 131 | ] 132 | }, 133 | "execution_count": 6, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "pd.Series(my_list,labels)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "** NumPy Arrays **" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 7, 152 | "metadata": { 153 | "collapsed": false 154 | }, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "0 10\n", 160 | "1 20\n", 161 | "2 30\n", 162 | "dtype: int64" 163 | ] 164 | }, 165 | "execution_count": 7, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": [ 171 | "pd.Series(arr)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 8, 177 | "metadata": { 178 | "collapsed": false 179 | }, 180 | "outputs": [ 181 | { 182 | "data": { 183 | "text/plain": [ 184 | "a 10\n", 185 | "b 20\n", 186 | "c 30\n", 187 | "dtype: int64" 188 | ] 189 | }, 190 | "execution_count": 8, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | } 194 | ], 195 | "source": [ 196 | "pd.Series(arr,labels)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "** Dictionary**" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 9, 209 | "metadata": { 210 | "collapsed": false 211 | }, 212 | "outputs": [ 213 | { 214 | "data": { 215 | "text/plain": [ 216 | "a 10\n", 217 | "b 20\n", 218 | "c 30\n", 219 | "dtype: int64" 220 | ] 221 | }, 222 | "execution_count": 9, 223 | "metadata": {}, 224 | "output_type": "execute_result" 225 | } 226 | ], 227 | "source": [ 228 | "pd.Series(d)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "### Data in a Series\n", 236 | "\n", 237 | "A pandas Series can hold a variety of object types:" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 10, 243 | "metadata": { 244 | "collapsed": false 245 | }, 246 | "outputs": [ 247 | { 248 | "data": { 249 | "text/plain": [ 250 | "0 a\n", 251 | "1 b\n", 252 | "2 c\n", 253 | "dtype: object" 254 | ] 255 | }, 256 | "execution_count": 10, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "pd.Series(data=labels)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 11, 268 | "metadata": { 269 | "collapsed": false 270 | }, 271 | "outputs": [ 272 | { 273 | "data": { 274 | "text/plain": [ 275 | "0 \n", 276 | "1 \n", 277 | "2 \n", 278 | "dtype: object" 279 | ] 280 | }, 281 | "execution_count": 11, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "# Even functions (although unlikely that you will use this)\n", 288 | "pd.Series([sum,print,len])" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "## Using an Index\n", 296 | "\n", 297 | "The key to using a Series is understanding its index. Pandas makes use of these index names or numbers by allowing for fast look ups of information (works like a hash table or dictionary).\n", 298 | "\n", 299 | "Let's see some examples of how to grab information from a Series. Let us create two sereis, ser1 and ser2:" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 12, 305 | "metadata": { 306 | "collapsed": false 307 | }, 308 | "outputs": [], 309 | "source": [ 310 | "ser1 = pd.Series([1,2,3,4],index = ['USA', 'Germany','USSR', 'Japan']) " 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 13, 316 | "metadata": { 317 | "collapsed": false 318 | }, 319 | "outputs": [ 320 | { 321 | "data": { 322 | "text/plain": [ 323 | "USA 1\n", 324 | "Germany 2\n", 325 | "USSR 3\n", 326 | "Japan 4\n", 327 | "dtype: int64" 328 | ] 329 | }, 330 | "execution_count": 13, 331 | "metadata": {}, 332 | "output_type": "execute_result" 333 | } 334 | ], 335 | "source": [ 336 | "ser1" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 14, 342 | "metadata": { 343 | "collapsed": true 344 | }, 345 | "outputs": [], 346 | "source": [ 347 | "ser2 = pd.Series([1,2,5,4],index = ['USA', 'Germany','Italy', 'Japan']) " 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 15, 353 | "metadata": { 354 | "collapsed": false 355 | }, 356 | "outputs": [ 357 | { 358 | "data": { 359 | "text/plain": [ 360 | "USA 1\n", 361 | "Germany 2\n", 362 | "Italy 5\n", 363 | "Japan 4\n", 364 | "dtype: int64" 365 | ] 366 | }, 367 | "execution_count": 15, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "ser2" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 16, 379 | "metadata": { 380 | "collapsed": false 381 | }, 382 | "outputs": [ 383 | { 384 | "data": { 385 | "text/plain": [ 386 | "1" 387 | ] 388 | }, 389 | "execution_count": 16, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "ser1['USA']" 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "metadata": { 401 | "collapsed": false 402 | }, 403 | "source": [ 404 | "Operations are then also done based off of index:" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 17, 410 | "metadata": { 411 | "collapsed": false 412 | }, 413 | "outputs": [ 414 | { 415 | "data": { 416 | "text/plain": [ 417 | "Germany 4.0\n", 418 | "Italy NaN\n", 419 | "Japan 8.0\n", 420 | "USA 2.0\n", 421 | "USSR NaN\n", 422 | "dtype: float64" 423 | ] 424 | }, 425 | "execution_count": 17, 426 | "metadata": {}, 427 | "output_type": "execute_result" 428 | } 429 | ], 430 | "source": [ 431 | "ser1 + ser2" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": {}, 437 | "source": [ 438 | "Let's stop here for now and move on to DataFrames, which will expand on the concept of Series!\n", 439 | "# Great Job!" 440 | ] 441 | } 442 | ], 443 | "metadata": { 444 | "kernelspec": { 445 | "display_name": "Python 3", 446 | "language": "python", 447 | "name": "python3" 448 | }, 449 | "language_info": { 450 | "codemirror_mode": { 451 | "name": "ipython", 452 | "version": 3 453 | }, 454 | "file_extension": ".py", 455 | "mimetype": "text/x-python", 456 | "name": "python", 457 | "nbconvert_exporter": "python", 458 | "pygments_lexer": "ipython3", 459 | "version": "3.5.1" 460 | } 461 | }, 462 | "nbformat": 4, 463 | "nbformat_minor": 0 464 | } 465 | -------------------------------------------------------------------------------- /3. Pandas/example: -------------------------------------------------------------------------------- 1 | a,b,c,d 2 | 0,1,2,3 3 | 4,5,6,7 4 | 8,9,10,11 5 | 12,13,14,15 6 | -------------------------------------------------------------------------------- /3. Pandas/multi_index_example: -------------------------------------------------------------------------------- 1 | first,bar,bar,baz,baz,foo,foo,qux,qux 2 | second,one,two,one,two,one,two,one,two 3 | ,,,,,,,, 4 | A,1.025984152081572,-0.1565979042889875,-0.031579143908112575,0.6498258334908454,2.154846443259472,-0.6102588558227414,-0.755325340010558,-0.34641850351854453 5 | B,0.1470267713241236,-0.47944803904109595,0.558769406443067,1.0248102783372157,-0.925874258809907,1.8628641384939535,-1.1338171615837889,0.6104779075384634 6 | C,0.3860303121135517,2.084018530338962,-0.37651867524923904,0.23033634359240704,0.6812092925867574,1.0351250747739213,-0.031160481493099617,1.9399323109926203 7 | -------------------------------------------------------------------------------- /6. Pandas Built-in Data Viz/df2: -------------------------------------------------------------------------------- 1 | a,b,c,d 2 | 0.039761986133905136,0.2185172274750622,0.10342298051665423,0.9579042338107532 3 | 0.9372879037285884,0.04156728027953449,0.8991254222382951,0.9776795571253272 4 | 0.7805044779316328,0.008947537857148302,0.5578084027546968,0.7975104497549266 5 | 0.6727174963492204,0.24786984946279625,0.2640713103088026,0.44435791644122935 6 | 0.05382860859967886,0.5201244020579979,0.5522642392797277,0.19000759632053632 7 | 0.2860433671280178,0.5934650440000543,0.9073072637456548,0.6378977150631427 8 | 0.4304355863327313,0.16623013749421356,0.4693825447762464,0.4977008828313123 9 | 0.3122955538295512,0.5028232900921878,0.8066087010958843,0.8505190941429479 10 | 0.1877648514121828,0.9970746427719338,0.8959552961495315,0.530390137569463 11 | 0.9081621790575398,0.23272641071536715,0.4141382611943452,0.4320069001558664 12 | -------------------------------------------------------------------------------- /7. Geographical Plotting/.ipynb_checkpoints/Choropleth Maps Exercise -checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Choropleth Maps Exercise \n", 18 | "\n", 19 | "Welcome to the Choropleth Maps Exercise! In this exercise we will give you some simple datasets and ask you to create Choropleth Maps from them. Due to the Nature of Plotly we can't show you examples\n", 20 | "\n", 21 | "[Full Documentation Reference](https://plot.ly/python/reference/#choropleth)\n", 22 | "\n", 23 | "## Plotly Imports" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 38, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "import plotly.graph_objs as go \n", 35 | "from plotly.offline import init_notebook_mode,iplot\n", 36 | "init_notebook_mode(connected=True) " 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "** Import pandas and read the csv file: 2014_World_Power_Consumption**" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 1, 49 | "metadata": { 50 | "collapsed": true 51 | }, 52 | "outputs": [], 53 | "source": [] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 152, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [], 62 | "source": [] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "** Check the head of the DataFrame. **" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 156, 74 | "metadata": { 75 | "collapsed": false 76 | }, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/html": [ 81 | "
\n", 82 | "\n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | "
CountryPower Consumption KWHText
0China5.523000e+12China 5,523,000,000,000
1United States3.832000e+12United 3,832,000,000,000
2European2.771000e+12European 2,771,000,000,000
3Russia1.065000e+12Russia 1,065,000,000,000
4Japan9.210000e+11Japan 921,000,000,000
\n", 124 | "
" 125 | ], 126 | "text/plain": [ 127 | " Country Power Consumption KWH Text\n", 128 | "0 China 5.523000e+12 China 5,523,000,000,000\n", 129 | "1 United States 3.832000e+12 United 3,832,000,000,000\n", 130 | "2 European 2.771000e+12 European 2,771,000,000,000\n", 131 | "3 Russia 1.065000e+12 Russia 1,065,000,000,000\n", 132 | "4 Japan 9.210000e+11 Japan 921,000,000,000" 133 | ] 134 | }, 135 | "execution_count": 156, 136 | "metadata": {}, 137 | "output_type": "execute_result" 138 | } 139 | ], 140 | "source": [] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "** Referencing the lecture notes, create a Choropleth Plot of the Power Consumption for Countries using the data and layout dictionary. **" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": { 153 | "collapsed": true 154 | }, 155 | "outputs": [], 156 | "source": [] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "choromap = go.Figure(data = [data],layout = layout)\n", 167 | "iplot(choromap,validate=False)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "## USA Choropleth\n", 175 | "\n", 176 | "** Import the 2012_Election_Data csv file using pandas. **" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 109, 182 | "metadata": { 183 | "collapsed": true 184 | }, 185 | "outputs": [], 186 | "source": [] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "** Check the head of the DataFrame. **" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 110, 198 | "metadata": { 199 | "collapsed": false 200 | }, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/html": [ 205 | "
\n", 206 | "\n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | "
YearICPSR State CodeAlphanumeric State CodeStateVEP Total Ballots CountedVEP Highest OfficeVAP Highest OfficeTotal Ballots CountedHighest OfficeVoting-Eligible Population (VEP)Voting-Age Population (VAP)% Non-citizenPrisonProbationParoleTotal Ineligible FelonState Abv
02012411AlabamaNaN58.6%56.0%NaN2,074,3383,539,2173707440.02.6%32,23257,9938,61671,584AL
12012812Alaska58.9%58.7%55.3%301,694300,495511,792543763.03.8%5,6337,1731,88211,317AK
22012613Arizona53.0%52.6%46.5%2,323,5792,306,5594,387,9004959270.09.9%35,18872,4527,46081,048AZ
32012424Arkansas51.1%50.7%47.7%1,078,5481,069,4682,109,8472242740.03.5%14,47130,12223,37253,808AR
42012715California55.7%55.1%45.1%13,202,15813,038,54723,681,83728913129.017.4%119,455089,287208,742CA
\n", 332 | "
" 333 | ], 334 | "text/plain": [ 335 | " Year ICPSR State Code Alphanumeric State Code State \\\n", 336 | "0 2012 41 1 Alabama \n", 337 | "1 2012 81 2 Alaska \n", 338 | "2 2012 61 3 Arizona \n", 339 | "3 2012 42 4 Arkansas \n", 340 | "4 2012 71 5 California \n", 341 | "\n", 342 | " VEP Total Ballots Counted VEP Highest Office VAP Highest Office \\\n", 343 | "0 NaN 58.6% 56.0% \n", 344 | "1 58.9% 58.7% 55.3% \n", 345 | "2 53.0% 52.6% 46.5% \n", 346 | "3 51.1% 50.7% 47.7% \n", 347 | "4 55.7% 55.1% 45.1% \n", 348 | "\n", 349 | " Total Ballots Counted Highest Office Voting-Eligible Population (VEP) \\\n", 350 | "0 NaN 2,074,338 3,539,217 \n", 351 | "1 301,694 300,495 511,792 \n", 352 | "2 2,323,579 2,306,559 4,387,900 \n", 353 | "3 1,078,548 1,069,468 2,109,847 \n", 354 | "4 13,202,158 13,038,547 23,681,837 \n", 355 | "\n", 356 | " Voting-Age Population (VAP) % Non-citizen Prison Probation Parole \\\n", 357 | "0 3707440.0 2.6% 32,232 57,993 8,616 \n", 358 | "1 543763.0 3.8% 5,633 7,173 1,882 \n", 359 | "2 4959270.0 9.9% 35,188 72,452 7,460 \n", 360 | "3 2242740.0 3.5% 14,471 30,122 23,372 \n", 361 | "4 28913129.0 17.4% 119,455 0 89,287 \n", 362 | "\n", 363 | " Total Ineligible Felon State Abv \n", 364 | "0 71,584 AL \n", 365 | "1 11,317 AK \n", 366 | "2 81,048 AZ \n", 367 | "3 53,808 AR \n", 368 | "4 208,742 CA " 369 | ] 370 | }, 371 | "execution_count": 110, 372 | "metadata": {}, 373 | "output_type": "execute_result" 374 | } 375 | ], 376 | "source": [] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "** Now create a plot that displays the Voting-Age Population (VAP) per state. If you later want to play around with other columns, make sure you consider their data type. VAP has already been transformed to a float for you. **" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 120, 388 | "metadata": { 389 | "collapsed": false 390 | }, 391 | "outputs": [], 392 | "source": [] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 121, 397 | "metadata": { 398 | "collapsed": true 399 | }, 400 | "outputs": [], 401 | "source": [] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": { 407 | "collapsed": false 408 | }, 409 | "outputs": [], 410 | "source": [ 411 | "choromap = go.Figure(data = [data],layout = layout)\n", 412 | "iplot(choromap,validate=False)" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": {}, 418 | "source": [ 419 | "# Great Job!" 420 | ] 421 | } 422 | ], 423 | "metadata": { 424 | "kernelspec": { 425 | "display_name": "Python 3", 426 | "language": "python", 427 | "name": "python3" 428 | }, 429 | "language_info": { 430 | "codemirror_mode": { 431 | "name": "ipython", 432 | "version": 3 433 | }, 434 | "file_extension": ".py", 435 | "mimetype": "text/x-python", 436 | "name": "python", 437 | "nbconvert_exporter": "python", 438 | "pygments_lexer": "ipython3", 439 | "version": "3.5.1" 440 | } 441 | }, 442 | "nbformat": 4, 443 | "nbformat_minor": 0 444 | } 445 | -------------------------------------------------------------------------------- /7. Geographical Plotting/2011_US_AGRI_Exports: -------------------------------------------------------------------------------- 1 | code,state,category,total exports,beef,pork,poultry,dairy,fruits fresh,fruits proc,total fruits,veggies fresh,veggies proc,total veggies,corn,wheat,cotton,text 2 | AL,Alabama,state,1390.63,34.4,10.6,481.0,4.06,8.0,17.1,25.11,5.5,8.9,14.33,34.9,70.0,317.61,Alabama
Beef 34.4 Dairy 4.06
Fruits 25.11 Veggies 14.33
Wheat 70.0 Corn 34.9 3 | AK,Alaska,state,13.31,0.2,0.1,0.0,0.19,0.0,0.0,0.0,0.6,1.0,1.56,0.0,0.0,0.0,Alaska
Beef 0.2 Dairy 0.19
Fruits 0.0 Veggies 1.56
Wheat 0.0 Corn 0.0 4 | AZ,Arizona,state,1463.17,71.3,17.9,0.0,105.48,19.3,41.0,60.27,147.5,239.4,386.91,7.3,48.7,423.95,Arizona
Beef 71.3 Dairy 105.48
Fruits 60.27 Veggies 386.91
Wheat 48.7 Corn 7.3 5 | AR,Arkansas,state,3586.02,53.2,29.4,562.9,3.53,2.2,4.7,6.88,4.4,7.1,11.45,69.5,114.5,665.44,Arkansas
Beef 53.2 Dairy 3.53
Fruits 6.88 Veggies 11.45
Wheat 114.5 Corn 69.5 6 | CA, California, state,16472.88,228.7,11.1,225.4,929.95,2791.8,5944.6,8736.4,803.2,1303.5,2106.79,34.6,249.3,1064.95, California
Beef 228.7 Dairy 929.95
Fruits 8736.4 Veggies 2106.79
Wheat 249.3 Corn 34.6 7 | CO,Colorado,state,1851.33,261.4,66.0,14.0,71.94,5.7,12.2,17.99,45.1,73.2,118.27,183.2,400.5,0.0,Colorado
Beef 261.4 Dairy 71.94
Fruits 17.99 Veggies 118.27
Wheat 400.5 Corn 183.2 8 | CT,Connecticut,state,259.62,1.1,0.1,6.9,9.49,4.2,8.9,13.1,4.3,6.9,11.16,0.0,0.0,0.0,Connecticut
Beef 1.1 Dairy 9.49
Fruits 13.1 Veggies 11.16
Wheat 0.0 Corn 0.0 9 | DE,Delaware,state,282.19,0.4,0.6,114.7,2.3,0.5,1.0,1.53,7.6,12.4,20.03,26.9,22.9,0.0,Delaware
Beef 0.4 Dairy 2.3
Fruits 1.53 Veggies 20.03
Wheat 22.9 Corn 26.9 10 | FL,Florida,state,3764.09,42.6,0.9,56.9,66.31,438.2,933.1,1371.36,171.9,279.0,450.86,3.5,1.8,78.24,Florida
Beef 42.6 Dairy 66.31
Fruits 1371.36 Veggies 450.86
Wheat 1.8 Corn 3.5 11 | GA,Georgia,state,2860.84,31.0,18.9,630.4,38.38,74.6,158.9,233.51,59.0,95.8,154.77,57.8,65.4,1154.07,Georgia
Beef 31.0 Dairy 38.38
Fruits 233.51 Veggies 154.77
Wheat 65.4 Corn 57.8 12 | HI,Hawaii,state,401.84,4.0,0.7,1.3,1.16,17.7,37.8,55.51,9.5,15.4,24.83,0.0,0.0,0.0,Hawaii
Beef 4.0 Dairy 1.16
Fruits 55.51 Veggies 24.83
Wheat 0.0 Corn 0.0 13 | ID,Idaho,state,2078.89,119.8,0.0,2.4,294.6,6.9,14.7,21.64,121.7,197.5,319.19,24.0,568.2,0.0,Idaho
Beef 119.8 Dairy 294.6
Fruits 21.64 Veggies 319.19
Wheat 568.2 Corn 24.0 14 | IL,Illinois,state,8709.48,53.7,394.0,14.0,45.82,4.0,8.5,12.53,15.2,24.7,39.95,2228.5,223.8,0.0,Illinois
Beef 53.7 Dairy 45.82
Fruits 12.53 Veggies 39.95
Wheat 223.8 Corn 2228.5 15 | IN,Indiana,state,5050.23,21.9,341.9,165.6,89.7,4.1,8.8,12.98,14.4,23.4,37.89,1123.2,114.0,0.0,Indiana
Beef 21.9 Dairy 89.7
Fruits 12.98 Veggies 37.89
Wheat 114.0 Corn 1123.2 16 | IA,Iowa,state,11273.76,289.8,1895.6,155.6,107.0,1.0,2.2,3.24,2.7,4.4,7.1,2529.8,3.1,0.0,Iowa
Beef 289.8 Dairy 107.0
Fruits 3.24 Veggies 7.1
Wheat 3.1 Corn 2529.8 17 | KS,Kansas,state,4589.01,659.3,179.4,6.4,65.45,1.0,2.1,3.11,3.6,5.8,9.32,457.3,1426.5,43.98,Kansas
Beef 659.3 Dairy 65.45
Fruits 3.11 Veggies 9.32
Wheat 1426.5 Corn 457.3 18 | KY,Kentucky,state,1889.15,54.8,34.2,151.3,28.27,2.1,4.5,6.6,0.0,0.0,0.0,179.1,149.3,0.0,Kentucky
Beef 54.8 Dairy 28.27
Fruits 6.6 Veggies 0.0
Wheat 149.3 Corn 179.1 19 | LA,Louisiana,state,1914.23,19.8,0.8,77.2,6.02,5.7,12.1,17.83,6.6,10.7,17.25,91.4,78.7,280.42,Louisiana
Beef 19.8 Dairy 6.02
Fruits 17.83 Veggies 17.25
Wheat 78.7 Corn 91.4 20 | ME,Maine,state,278.37,1.4,0.5,10.4,16.18,16.6,35.4,52.01,24.0,38.9,62.9,0.0,0.0,0.0,Maine
Beef 1.4 Dairy 16.18
Fruits 52.01 Veggies 62.9
Wheat 0.0 Corn 0.0 21 | MD,Maryland,state,692.75,5.6,3.1,127.0,24.81,4.1,8.8,12.9,7.8,12.6,20.43,54.1,55.8,0.0,Maryland
Beef 5.6 Dairy 24.81
Fruits 12.9 Veggies 20.43
Wheat 55.8 Corn 54.1 22 | MA,Massachusetts,state,248.65,0.6,0.5,0.6,5.81,25.8,55.0,80.83,8.1,13.1,21.13,0.0,0.0,0.0,Massachusetts
Beef 0.6 Dairy 5.81
Fruits 80.83 Veggies 21.13
Wheat 0.0 Corn 0.0 23 | MI,Michigan,state,3164.16,37.7,118.1,32.6,214.82,82.3,175.3,257.69,72.4,117.5,189.96,381.5,247.0,0.0,Michigan
Beef 37.7 Dairy 214.82
Fruits 257.69 Veggies 189.96
Wheat 247.0 Corn 381.5 24 | MN,Minnesota,state,7192.33,112.3,740.4,189.2,218.05,2.5,5.4,7.91,45.9,74.5,120.37,1264.3,538.1,0.0,Minnesota
Beef 112.3 Dairy 218.05
Fruits 7.91 Veggies 120.37
Wheat 538.1 Corn 1264.3 25 | MS,Mississippi,state,2170.8,12.8,30.4,370.8,5.45,5.4,11.6,17.04,10.6,17.2,27.87,110.0,102.2,494.75,Mississippi
Beef 12.8 Dairy 5.45
Fruits 17.04 Veggies 27.87
Wheat 102.2 Corn 110.0 26 | MO,Missouri,state,3933.42,137.2,277.3,196.1,34.26,4.2,9.0,13.18,6.8,11.1,17.9,428.8,161.7,345.29,Missouri
Beef 137.2 Dairy 34.26
Fruits 13.18 Veggies 17.9
Wheat 161.7 Corn 428.8 27 | MT,Montana,state,1718.0,105.0,16.7,1.7,6.82,1.1,2.2,3.3,17.3,28.0,45.27,5.4,1198.1,0.0,Montana
Beef 105.0 Dairy 6.82
Fruits 3.3 Veggies 45.27
Wheat 1198.1 Corn 5.4 28 | NE,Nebraska,state,7114.13,762.2,262.5,31.4,30.07,0.7,1.5,2.16,20.4,33.1,53.5,1735.9,292.3,0.0,Nebraska
Beef 762.2 Dairy 30.07
Fruits 2.16 Veggies 53.5
Wheat 292.3 Corn 1735.9 29 | NV,Nevada,state,139.89,21.8,0.2,0.0,16.57,0.4,0.8,1.19,10.6,17.3,27.93,0.0,5.4,0.0,Nevada
Beef 21.8 Dairy 16.57
Fruits 1.19 Veggies 27.93
Wheat 5.4 Corn 0.0 30 | NH,New Hampshire,state,73.06,0.6,0.2,0.8,7.46,2.6,5.4,7.98,1.7,2.8,4.5,0.0,0.0,0.0,New Hampshire
Beef 0.6 Dairy 7.46
Fruits 7.98 Veggies 4.5
Wheat 0.0 Corn 0.0 31 | NJ,New Jersey,state,500.4,0.8,0.4,4.6,3.37,35.0,74.5,109.45,21.6,35.0,56.54,10.1,6.7,0.0,New Jersey
Beef 0.8 Dairy 3.37
Fruits 109.45 Veggies 56.54
Wheat 6.7 Corn 10.1 32 | NM,New Mexico,state,751.58,117.2,0.1,0.3,191.01,32.6,69.3,101.9,16.7,27.1,43.88,11.2,13.9,72.62,New Mexico
Beef 117.2 Dairy 191.01
Fruits 101.9 Veggies 43.88
Wheat 13.9 Corn 11.2 33 | NY,New York,state,1488.9,22.2,5.8,17.7,331.8,64.7,137.8,202.56,54.7,88.7,143.37,106.1,29.9,0.0,New York
Beef 22.2 Dairy 331.8
Fruits 202.56 Veggies 143.37
Wheat 29.9 Corn 106.1 34 | NC,North Carolina,state,3806.05,24.8,702.8,598.4,24.9,23.8,50.7,74.47,57.4,93.1,150.45,92.2,200.3,470.86,North Carolina
Beef 24.8 Dairy 24.9
Fruits 74.47 Veggies 150.45
Wheat 200.3 Corn 92.2 35 | ND,North Dakota,state,3761.96,78.5,16.1,0.5,8.14,0.1,0.2,0.25,49.9,80.9,130.79,236.1,1664.5,0.0,North Dakota
Beef 78.5 Dairy 8.14
Fruits 0.25 Veggies 130.79
Wheat 1664.5 Corn 236.1 36 | OH,Ohio,state,3979.79,36.2,199.1,129.9,134.57,8.7,18.5,27.21,20.4,33.1,53.53,535.1,207.4,0.0,Ohio
Beef 36.2 Dairy 134.57
Fruits 27.21 Veggies 53.53
Wheat 207.4 Corn 535.1 37 | OK,Oklahoma,state,1646.41,337.6,265.3,131.1,24.35,3.0,6.3,9.24,3.4,5.5,8.9,27.5,324.8,110.54,Oklahoma
Beef 337.6 Dairy 24.35
Fruits 9.24 Veggies 8.9
Wheat 324.8 Corn 27.5 38 | OR,Oregon,state,1794.57,58.8,1.4,14.2,63.66,100.7,214.4,315.04,48.2,78.3,126.5,11.7,320.3,0.0,Oregon
Beef 58.8 Dairy 63.66
Fruits 315.04 Veggies 126.5
Wheat 320.3 Corn 11.7 39 | PA,Pennsylvania,state,1969.87,50.9,91.3,169.8,280.87,28.6,60.9,89.48,14.6,23.7,38.26,112.1,41.0,0.0,Pennsylvania
Beef 50.9 Dairy 280.87
Fruits 89.48 Veggies 38.26
Wheat 41.0 Corn 112.1 40 | RI,Rhode Island,state,31.59,0.1,0.1,0.2,0.52,0.9,1.9,2.83,1.2,1.9,3.02,0.0,0.0,0.0,Rhode Island
Beef 0.1 Dairy 0.52
Fruits 2.83 Veggies 3.02
Wheat 0.0 Corn 0.0 41 | SC,South Carolina,state,929.93,15.2,10.9,186.5,7.62,17.1,36.4,53.45,16.3,26.4,42.66,32.1,55.3,206.1,South Carolina
Beef 15.2 Dairy 7.62
Fruits 53.45 Veggies 42.66
Wheat 55.3 Corn 32.1 42 | SD,South Dakota,state,3770.19,193.5,160.2,29.3,46.77,0.3,0.5,0.8,1.5,2.5,4.06,643.6,704.5,0.0,South Dakota
Beef 193.5 Dairy 46.77
Fruits 0.8 Veggies 4.06
Wheat 704.5 Corn 643.6 43 | TN,Tennessee,state,1535.13,51.1,17.6,82.4,21.18,2.0,4.2,6.23,9.4,15.3,24.67,88.8,100.0,363.83,Tennessee
Beef 51.1 Dairy 21.18
Fruits 6.23 Veggies 24.67
Wheat 100.0 Corn 88.8 44 | TX,Texas,state,6648.22,961.0,42.7,339.2,240.55,31.9,68.0,99.9,43.9,71.3,115.23,167.2,309.7,2308.76,Texas
Beef 961.0 Dairy 240.55
Fruits 99.9 Veggies 115.23
Wheat 309.7 Corn 167.2 45 | UT,Utah,state,453.39,27.9,59.0,23.1,48.6,3.9,8.4,12.34,2.5,4.1,6.6,5.3,42.8,0.0,Utah
Beef 27.9 Dairy 48.6
Fruits 12.34 Veggies 6.6
Wheat 42.8 Corn 5.3 46 | VT,Vermont,state,180.14,6.2,0.2,0.9,65.98,2.6,5.4,8.01,1.5,2.5,4.05,0.0,0.0,0.0,Vermont
Beef 6.2 Dairy 65.98
Fruits 8.01 Veggies 4.05
Wheat 0.0 Corn 0.0 47 | VA,Virginia,state,1146.48,39.5,16.9,164.7,47.85,11.7,24.8,36.48,10.4,16.9,27.25,39.5,77.5,64.84,Virginia
Beef 39.5 Dairy 47.85
Fruits 36.48 Veggies 27.25
Wheat 77.5 Corn 39.5 48 | WA,Washington,state,3894.81,59.2,0.0,35.6,154.18,555.6,1183.0,1738.57,138.7,225.1,363.79,29.5,786.3,0.0,Washington
Beef 59.2 Dairy 154.18
Fruits 1738.57 Veggies 363.79
Wheat 786.3 Corn 29.5 49 | WV,West Virginia,state,138.89,12.0,0.3,45.4,3.9,3.7,7.9,11.54,0.0,0.0,0.0,3.5,1.6,0.0,West Virginia
Beef 12.0 Dairy 3.9
Fruits 11.54 Veggies 0.0
Wheat 1.6 Corn 3.5 50 | WI,Wisconsin,state,3090.23,107.3,38.6,34.5,633.6,42.8,91.0,133.8,56.8,92.2,148.99,460.5,96.7,0.0,Wisconsin
Beef 107.3 Dairy 633.6
Fruits 133.8 Veggies 148.99
Wheat 96.7 Corn 460.5 51 | WY,Wyoming,state,349.69,75.1,33.2,0.1,2.89,0.1,0.1,0.17,3.9,6.3,10.23,9.0,20.7,0.0,Wyoming
Beef 75.1 Dairy 2.89
Fruits 0.17 Veggies 10.23
Wheat 20.7 Corn 9.0 52 | -------------------------------------------------------------------------------- /7. Geographical Plotting/2012_Election_Data: -------------------------------------------------------------------------------- 1 | Year,ICPSR State Code,Alphanumeric State Code,State,VEP Total Ballots Counted,VEP Highest Office,VAP Highest Office,Total Ballots Counted,Highest Office,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,State Abv 2 | 2012,41,1,Alabama,,58.6%,56.0%,,"2,074,338","3,539,217",3707440.0,2.6%,"32,232","57,993","8,616","71,584",AL 3 | 2012,81,2,Alaska,58.9%,58.7%,55.3%,"301,694","300,495","511,792",543763.0,3.8%,"5,633","7,173","1,882","11,317",AK 4 | 2012,61,3,Arizona,53.0%,52.6%,46.5%,"2,323,579","2,306,559","4,387,900",4959270.0,9.9%,"35,188","72,452","7,460","81,048",AZ 5 | 2012,42,4,Arkansas,51.1%,50.7%,47.7%,"1,078,548","1,069,468","2,109,847",2242740.0,3.5%,"14,471","30,122","23,372","53,808",AR 6 | 2012,71,5,California,55.7%,55.1%,45.1%,"13,202,158","13,038,547","23,681,837",28913129.0,17.4%,"119,455",0,"89,287","208,742",CA 7 | 2012,62,6,Colorado,70.6%,69.9%,64.5%,"2,596,173","2,569,522","3,675,871",3981208.0,6.9%,"18,807",0,"11,458","30,265",CO 8 | 2012,1,7,Connecticut,61.4%,61.3%,55.6%,"1,560,640","1,558,960","2,543,202",2801375.0,8.5%,"16,935",0,"2,793","19,728",CT 9 | 2012,11,8,Delaware,,62.3%,57.8%,,"413,921","663,967",715708.0,5.1%,"6,610","15,641",601,"15,501",DE 10 | 2012,55,9,District of Columbia,61.6%,61.5%,55.5%,"294,254","293,764","477,582",528848.0,9.7%,0,0,0,0,District of Columbia 11 | 2012,43,10,Florida,63.3%,62.8%,55.1%,"8,538,264","8,474,179","13,495,057",15380947.0,10.8%,"91,954","240,869","4,538","224,153",FL 12 | 2012,44,11,Georgia,59.3%,59.0%,52.3%,"3,919,355","3,900,050","6,606,607",7452696.0,7.2%,"52,737","442,061","24,761","311,790",GA 13 | 2012,82,12,Hawaii,44.5%,44.2%,39.9%,"437,159","434,697","982,902",1088335.0,9.2%,"5,544",0,0,"5,544",HI 14 | 2012,63,13,Idaho,61.0%,59.8%,55.6%,"666,290","652,274","1,091,410",1173727.0,4.6%,"7,985","31,606","3,848","28,584",ID 15 | 2012,21,14,Illinois,59.3%,58.9%,53.3%,"5,279,752","5,242,014","8,899,143",9827043.0,8.9%,"49,348",0,0,"49,348",IL 16 | 2012,22,15,Indiana,56.0%,55.2%,52.9%,"2,663,368","2,624,534","4,755,291",4960376.0,3.6%,"28,266",0,0,"28,266",IN 17 | 2012,31,16,Iowa,70.6%,70.3%,67.1%,"1,589,951","1,582,180","2,251,748",2356209.0,3.2%,"8,470","29,333","5,151","29,167",IA 18 | 2012,32,17,Kansas,58.2%,56.9%,53.5%,"1,182,771","1,156,254","2,030,686",2162442.0,5.0%,"9,346","17,021","5,126","23,493",KS 19 | 2012,51,18,Kentucky,56.2%,55.7%,53.4%,"1,815,843","1,797,212","3,229,185",3368684.0,2.2%,"21,863","54,511","14,419","65,173",KY 20 | 2012,45,19,Louisiana,60.8%,60.2%,57.0%,"2,014,548","1,994,065","3,311,626",3495847.0,2.7%,"40,047","41,298","28,946","90,881",LA 21 | 2012,2,20,Maine,69.3%,68.2%,67.0%,"724,758","713,180","1,046,008",1064779.0,1.8%,0,0,0,0,ME 22 | 2012,52,21,Maryland,67.3%,66.6%,59.5%,"2,734,062","2,707,327","4,063,582",4553853.0,8.9%,"20,871","96,640","13,195","85,285",MD 23 | 2012,3,22,Massachusetts,66.2%,65.9%,60.2%,"3,184,196","3,167,767","4,809,675",5263550.0,8.4%,"10,283",0,0,"10,283",MA 24 | 2012,23,23,Michigan,65.4%,64.7%,62.0%,"4,780,701","4,730,961","7,312,725",7625576.0,3.5%,"43,019",0,0,"43,019",MI 25 | 2012,33,24,Minnesota,76.4%,76.0%,71.4%,"2,950,780","2,936,561","3,861,598",4114820.0,4.4%,"9,383","108,157","6,006","72,712",MN 26 | 2012,46,25,Mississippi,,59.3%,57.2%,,"1,285,584","2,166,825",2246931.0,1.5%,"22,305","30,768","6,804","45,416",MS 27 | 2012,34,26,Missouri,,62.2%,59.6%,,"2,757,323","4,432,957",4628500.0,2.5%,"30,714","55,470","20,672","80,785",MO 28 | 2012,64,27,Montana,63.5%,62.5%,61.6%,"491,966","484,048","774,476",785454.0,0.9%,"3,592",0,0,"3,592",MT 29 | 2012,35,28,Nebraska,61.1%,60.3%,56.9%,"804,245","794,379","1,316,915",1396507.0,4.7%,"4,466","14,260","1,383","13,407",NE 30 | 2012,65,29,Nevada,56.5%,56.4%,48.2%,"1,016,664","1,014,918","1,800,969",2105976.0,13.3%,"12,883","11,321","5,379","24,262",NV 31 | 2012,4,30,New Hampshire,70.9%,70.2%,67.8%,"718,700","710,972","1,013,420",1047978.0,3.0%,"2,672",0,0,"2,672",NH 32 | 2012,12,31,New Jersey,62.2%,61.5%,53.2%,"3,683,638","3,640,292","5,918,182",6847503.0,12.1%,"21,759","114,886","14,987","97,636",NJ 33 | 2012,66,32,New Mexico,54.8%,54.6%,49.8%,"786,522","783,757","1,436,363",1573400.0,7.3%,"6,553","21,381","5,078","22,963",NM 34 | 2012,13,33,New York,53.5%,53.1%,46.1%,"7,128,852","7,074,723","13,324,107",15344671.0,12.5%,"49,889",0,"46,222","96,111",NY 35 | 2012,47,34,North Carolina,65.4%,64.8%,60.1%,"4,542,488","4,505,372","6,947,954",7496980.0,6.1%,"35,567","96,070","4,359","90,843",NC 36 | 2012,36,35,North Dakota,60.4%,59.8%,58.7%,"325,564","322,627","539,164",549955.0,1.7%,"1,500",0,0,"1,500",ND 37 | 2012,24,36,Ohio,65.1%,64.5%,62.7%,"5,632,423","5,580,822","8,649,495",8896930.0,2.2%,"50,313",0,0,"50,313",OH 38 | 2012,53,37,Oklahoma,,49.2%,46.3%,,"1,334,872","2,713,268",2885093.0,4.5%,"25,225","25,506","2,310","41,053",OK 39 | 2012,72,38,Oregon,64.2%,63.1%,58.7%,"1,820,507","1,789,270","2,836,101",3050747.0,6.6%,"13,607",0,0,"13,607",OR 40 | 2012,14,39,Pennsylvania,,59.5%,57.2%,,"5,742,040","9,651,432",10037099.0,3.3%,"50,054",0,0,"50,054",PA 41 | 2012,5,40,Rhode Island,,58.0%,53.4%,,"446,049","768,918",834983.0,7.5%,"3,249",0,0,"3,249",RI 42 | 2012,48,41,South Carolina,56.8%,56.3%,53.6%,"1,981,516","1,964,118","3,486,838",3662322.0,3.5%,"21,895","34,945","6,116","46,532",SC 43 | 2012,37,42,South Dakota,60.1%,59.3%,57.6%,"368,270","363,815","613,190",631472.0,1.9%,"3,574",0,"2,761","6,335",SD 44 | 2012,54,43,Tennessee,52.3%,51.9%,49.4%,"2,478,870","2,458,577","4,736,084",4976284.0,3.3%,"28,135","64,430","13,138","75,421",TN 45 | 2012,49,44,Texas,,49.6%,41.7%,,"7,993,851","16,119,973",19185395.0,13.5%,"157,564","405,473","112,288","484,753",TX 46 | 2012,67,45,Utah,56.1%,55.5%,51.4%,"1,028,786","1,017,440","1,833,339",1978956.0,7.0%,"6,611",0,0,"6,611",UT 47 | 2012,6,46,Vermont,61.2%,60.7%,59.6%,"301,793","299,290","493,355",502242.0,1.8%,0,0,0,0,VT 48 | 2012,40,47,Virginia,66.6%,66.1%,60.7%,"3,888,186","3,854,489","5,834,676",6348827.0,7.1%,"36,425","52,956","1,983","66,475",VA 49 | 2012,73,48,Washington,65.8%,64.8%,58.6%,"3,172,939","3,125,516","4,822,060",5329782.0,8.2%,"16,355","88,339","8,895","72,070",WA 50 | 2012,56,49,West Virginia,,46.3%,45.5%,,"670,438","1,447,066",1472642.0,0.8%,"7,052","8,573","2,052","13,648",WV 51 | 2012,25,50,Wisconsin,,72.9%,69.5%,,"3,068,434","4,209,370",4417273.0,3.2%,"21,987","46,328","20,023","66,564",WI 52 | 2012,68,51,Wyoming,59.0%,58.6%,56.4%,"250,701","249,061","425,142",441726.0,2.5%,"2,163","5,162",762,"5,661",WY 53 | -------------------------------------------------------------------------------- /7. Geographical Plotting/2014_World_GDP: -------------------------------------------------------------------------------- 1 | COUNTRY,GDP (BILLIONS),CODE 2 | Afghanistan,21.71,AFG 3 | Albania,13.4,ALB 4 | Algeria,227.8,DZA 5 | American Samoa,0.75,ASM 6 | Andorra,4.8,AND 7 | Angola,131.4,AGO 8 | Anguilla,0.18,AIA 9 | Antigua and Barbuda,1.24,ATG 10 | Argentina,536.2,ARG 11 | Armenia,10.88,ARM 12 | Aruba,2.52,ABW 13 | Australia,1483.0,AUS 14 | Austria,436.1,AUT 15 | Azerbaijan,77.91,AZE 16 | "Bahamas, The",8.65,BHM 17 | Bahrain,34.05,BHR 18 | Bangladesh,186.6,BGD 19 | Barbados,4.28,BRB 20 | Belarus,75.25,BLR 21 | Belgium,527.8,BEL 22 | Belize,1.67,BLZ 23 | Benin,9.24,BEN 24 | Bermuda,5.2,BMU 25 | Bhutan,2.09,BTN 26 | Bolivia,34.08,BOL 27 | Bosnia and Herzegovina,19.55,BIH 28 | Botswana,16.3,BWA 29 | Brazil,2244.0,BRA 30 | British Virgin Islands,1.1,VGB 31 | Brunei,17.43,BRN 32 | Bulgaria,55.08,BGR 33 | Burkina Faso,13.38,BFA 34 | Burma,65.29,MMR 35 | Burundi,3.04,BDI 36 | Cabo Verde,1.98,CPV 37 | Cambodia,16.9,KHM 38 | Cameroon,32.16,CMR 39 | Canada,1794.0,CAN 40 | Cayman Islands,2.25,CYM 41 | Central African Republic,1.73,CAF 42 | Chad,15.84,TCD 43 | Chile,264.1,CHL 44 | China,10360.0,CHN 45 | Colombia,400.1,COL 46 | Comoros,0.72,COM 47 | "Congo, Democratic Republic of the",32.67,COD 48 | "Congo, Republic of the",14.11,COG 49 | Cook Islands,0.18,COK 50 | Costa Rica,50.46,CRI 51 | Cote d'Ivoire,33.96,CIV 52 | Croatia,57.18,HRV 53 | Cuba,77.15,CUB 54 | Curacao,5.6,CUW 55 | Cyprus,21.34,CYP 56 | Czech Republic,205.6,CZE 57 | Denmark,347.2,DNK 58 | Djibouti,1.58,DJI 59 | Dominica,0.51,DMA 60 | Dominican Republic,64.05,DOM 61 | Ecuador,100.5,ECU 62 | Egypt,284.9,EGY 63 | El Salvador,25.14,SLV 64 | Equatorial Guinea,15.4,GNQ 65 | Eritrea,3.87,ERI 66 | Estonia,26.36,EST 67 | Ethiopia,49.86,ETH 68 | Falkland Islands (Islas Malvinas),0.16,FLK 69 | Faroe Islands,2.32,FRO 70 | Fiji,4.17,FJI 71 | Finland,276.3,FIN 72 | France,2902.0,FRA 73 | French Polynesia,7.15,PYF 74 | Gabon,20.68,GAB 75 | "Gambia, The",0.92,GMB 76 | Georgia,16.13,GEO 77 | Germany,3820.0,DEU 78 | Ghana,35.48,GHA 79 | Gibraltar,1.85,GIB 80 | Greece,246.4,GRC 81 | Greenland,2.16,GRL 82 | Grenada,0.84,GRD 83 | Guam,4.6,GUM 84 | Guatemala,58.3,GTM 85 | Guernsey,2.74,GGY 86 | Guinea-Bissau,1.04,GNB 87 | Guinea,6.77,GIN 88 | Guyana,3.14,GUY 89 | Haiti,8.92,HTI 90 | Honduras,19.37,HND 91 | Hong Kong,292.7,HKG 92 | Hungary,129.7,HUN 93 | Iceland,16.2,ISL 94 | India,2048.0,IND 95 | Indonesia,856.1,IDN 96 | Iran,402.7,IRN 97 | Iraq,232.2,IRQ 98 | Ireland,245.8,IRL 99 | Isle of Man,4.08,IMN 100 | Israel,305.0,ISR 101 | Italy,2129.0,ITA 102 | Jamaica,13.92,JAM 103 | Japan,4770.0,JPN 104 | Jersey,5.77,JEY 105 | Jordan,36.55,JOR 106 | Kazakhstan,225.6,KAZ 107 | Kenya,62.72,KEN 108 | Kiribati,0.16,KIR 109 | "Korea, North",28.0,KOR 110 | "Korea, South",1410.0,PRK 111 | Kosovo,5.99,KSV 112 | Kuwait,179.3,KWT 113 | Kyrgyzstan,7.65,KGZ 114 | Laos,11.71,LAO 115 | Latvia,32.82,LVA 116 | Lebanon,47.5,LBN 117 | Lesotho,2.46,LSO 118 | Liberia,2.07,LBR 119 | Libya,49.34,LBY 120 | Liechtenstein,5.11,LIE 121 | Lithuania,48.72,LTU 122 | Luxembourg,63.93,LUX 123 | Macau,51.68,MAC 124 | Macedonia,10.92,MKD 125 | Madagascar,11.19,MDG 126 | Malawi,4.41,MWI 127 | Malaysia,336.9,MYS 128 | Maldives,2.41,MDV 129 | Mali,12.04,MLI 130 | Malta,10.57,MLT 131 | Marshall Islands,0.18,MHL 132 | Mauritania,4.29,MRT 133 | Mauritius,12.72,MUS 134 | Mexico,1296.0,MEX 135 | "Micronesia, Federated States of",0.34,FSM 136 | Moldova,7.74,MDA 137 | Monaco,6.06,MCO 138 | Mongolia,11.73,MNG 139 | Montenegro,4.66,MNE 140 | Morocco,112.6,MAR 141 | Mozambique,16.59,MOZ 142 | Namibia,13.11,NAM 143 | Nepal,19.64,NPL 144 | Netherlands,880.4,NLD 145 | New Caledonia,11.1,NCL 146 | New Zealand,201.0,NZL 147 | Nicaragua,11.85,NIC 148 | Nigeria,594.3,NGA 149 | Niger,8.29,NER 150 | Niue,0.01,NIU 151 | Northern Mariana Islands,1.23,MNP 152 | Norway,511.6,NOR 153 | Oman,80.54,OMN 154 | Pakistan,237.5,PAK 155 | Palau,0.65,PLW 156 | Panama,44.69,PAN 157 | Papua New Guinea,16.1,PNG 158 | Paraguay,31.3,PRY 159 | Peru,208.2,PER 160 | Philippines,284.6,PHL 161 | Poland,552.2,POL 162 | Portugal,228.2,PRT 163 | Puerto Rico,93.52,PRI 164 | Qatar,212.0,QAT 165 | Romania,199.0,ROU 166 | Russia,2057.0,RUS 167 | Rwanda,8.0,RWA 168 | Saint Kitts and Nevis,0.81,KNA 169 | Saint Lucia,1.35,LCA 170 | Saint Martin,0.56,MAF 171 | Saint Pierre and Miquelon,0.22,SPM 172 | Saint Vincent and the Grenadines,0.75,VCT 173 | Samoa,0.83,WSM 174 | San Marino,1.86,SMR 175 | Sao Tome and Principe,0.36,STP 176 | Saudi Arabia,777.9,SAU 177 | Senegal,15.88,SEN 178 | Serbia,42.65,SRB 179 | Seychelles,1.47,SYC 180 | Sierra Leone,5.41,SLE 181 | Singapore,307.9,SGP 182 | Sint Maarten,304.1,SXM 183 | Slovakia,99.75,SVK 184 | Slovenia,49.93,SVN 185 | Solomon Islands,1.16,SLB 186 | Somalia,2.37,SOM 187 | South Africa,341.2,ZAF 188 | South Sudan,11.89,SSD 189 | Spain,1400.0,ESP 190 | Sri Lanka,71.57,LKA 191 | Sudan,70.03,SDN 192 | Suriname,5.27,SUR 193 | Swaziland,3.84,SWZ 194 | Sweden,559.1,SWE 195 | Switzerland,679.0,CHE 196 | Syria,64.7,SYR 197 | Taiwan,529.5,TWN 198 | Tajikistan,9.16,TJK 199 | Tanzania,36.62,TZA 200 | Thailand,373.8,THA 201 | Timor-Leste,4.51,TLS 202 | Togo,4.84,TGO 203 | Tonga,0.49,TON 204 | Trinidad and Tobago,29.63,TTO 205 | Tunisia,49.12,TUN 206 | Turkey,813.3,TUR 207 | Turkmenistan,43.5,TKM 208 | Tuvalu,0.04,TUV 209 | Uganda,26.09,UGA 210 | Ukraine,134.9,UKR 211 | United Arab Emirates,416.4,ARE 212 | United Kingdom,2848.0,GBR 213 | United States,17420.0,USA 214 | Uruguay,55.6,URY 215 | Uzbekistan,63.08,UZB 216 | Vanuatu,0.82,VUT 217 | Venezuela,209.2,VEN 218 | Vietnam,187.8,VNM 219 | Virgin Islands,5.08,VGB 220 | West Bank,6.64,WBG 221 | Yemen,45.45,YEM 222 | Zambia,25.61,ZMB 223 | Zimbabwe,13.74,ZWE 224 | -------------------------------------------------------------------------------- /7. Geographical Plotting/2014_World_Power_Consumption: -------------------------------------------------------------------------------- 1 | Country,Power Consumption KWH,Text 2 | China,5523000000000.0,"China 5,523,000,000,000" 3 | United States,3832000000000.0,"United 3,832,000,000,000" 4 | European,2771000000000.0,"European 2,771,000,000,000" 5 | Russia,1065000000000.0,"Russia 1,065,000,000,000" 6 | Japan,921000000000.0,"Japan 921,000,000,000" 7 | India,864700000000.0,"India 864,700,000,000" 8 | Germany,540100000000.0,"Germany 540,100,000,000" 9 | Canada,511000000000.0,"Canada 511,000,000,000" 10 | Brazil,483500000000.0,"Brazil 483,500,000,000" 11 | "Korea,",482400000000.0,"Korea, 482,400,000,000" 12 | France,451100000000.0,"France 451,100,000,000" 13 | United Kingdom,319100000000.0,"United 319,100,000,000" 14 | Italy,303100000000.0,"Italy 303,100,000,000" 15 | Taiwan,249500000000.0,"Taiwan 249,500,000,000" 16 | Spain,243100000000.0,"Spain 243,100,000,000" 17 | Mexico,234000000000.0,"Mexico 234,000,000,000" 18 | Saudi,231600000000.0,"Saudi 231,600,000,000" 19 | Australia,222600000000.0,"Australia 222,600,000,000" 20 | South,211600000000.0,"South 211,600,000,000" 21 | Turkey,197000000000.0,"Turkey 197,000,000,000" 22 | Iran,195300000000.0,"Iran 195,300,000,000" 23 | Indonesia,167500000000.0,"Indonesia 167,500,000,000" 24 | Ukraine,159800000000.0,"Ukraine 159,800,000,000" 25 | Thailand,155900000000.0,"Thailand 155,900,000,000" 26 | Poland,139000000000.0,"Poland 139,000,000,000" 27 | Egypt,135600000000.0,"Egypt 135,600,000,000" 28 | Sweden,130500000000.0,"Sweden 130,500,000,000" 29 | Norway,126400000000.0,"Norway 126,400,000,000" 30 | Malaysia,118500000000.0,"Malaysia 118,500,000,000" 31 | Argentina,117100000000.0,"Argentina 117,100,000,000" 32 | Netherlands,116800000000.0,"Netherlands 116,800,000,000" 33 | Vietnam,108300000000.0,"Vietnam 108,300,000,000" 34 | Venezuela,97690000000.0,"Venezuela 97,690,000,000" 35 | United Arab Emirates,93280000000.0,"United 93,280,000,000" 36 | Finland,82040000000.0,"Finland 82,040,000,000" 37 | Belgium,81890000000.0,"Belgium 81,890,000,000" 38 | Kazakhstan,80290000000.0,"Kazakhstan 80,290,000,000" 39 | Pakistan,78890000000.0,"Pakistan 78,890,000,000" 40 | Philippines,75270000000.0,"Philippines 75,270,000,000" 41 | Austria,69750000000.0,"Austria 69,750,000,000" 42 | Chile,63390000000.0,"Chile 63,390,000,000" 43 | Czechia,60550000000.0,"Czechia 60,550,000,000" 44 | Israel,59830000000.0,"Israel 59,830,000,000" 45 | Switzerland,58010000000.0,"Switzerland 58,010,000,000" 46 | Greece,57730000000.0,"Greece 57,730,000,000" 47 | Iraq,53410000000.0,"Iraq 53,410,000,000" 48 | Romania,50730000000.0,"Romania 50,730,000,000" 49 | Kuwait,50000000000.0,"Kuwait 50,000,000,000" 50 | Colombia,49380000000.0,"Colombia 49,380,000,000" 51 | Singapore,47180000000.0,"Singapore 47,180,000,000" 52 | Portugal,46250000000.0,"Portugal 46,250,000,000" 53 | Uzbekistan,45210000000.0,"Uzbekistan 45,210,000,000" 54 | Hong,44210000000.0,"Hong 44,210,000,000" 55 | Algeria,42870000000.0,"Algeria 42,870,000,000" 56 | Bangladesh,41520000000.0,"Bangladesh 41,520,000,000" 57 | New,40300000000.0,"New 40,300,000,000" 58 | Bulgaria,37990000000.0,"Bulgaria 37,990,000,000" 59 | Belarus,37880000000.0,"Belarus 37,880,000,000" 60 | Peru,35690000000.0,"Peru 35,690,000,000" 61 | Denmark,31960000000.0,"Denmark 31,960,000,000" 62 | Qatar,30530000000.0,"Qatar 30,530,000,000" 63 | Slovakia,28360000000.0,"Slovakia 28,360,000,000" 64 | Libya,27540000000.0,"Libya 27,540,000,000" 65 | Serbia,26910000000.0,"Serbia 26,910,000,000" 66 | Morocco,26700000000.0,"Morocco 26,700,000,000" 67 | Syria,25700000000.0,"Syria 25,700,000,000" 68 | Nigeria,24780000000.0,"Nigeria 24,780,000,000" 69 | Ireland,24240000000.0,"Ireland 24,240,000,000" 70 | Hungary,21550000000.0,"Hungary 21,550,000,000" 71 | Oman,20360000000.0,"Oman 20,360,000,000" 72 | Ecuador,19020000000.0,"Ecuador 19,020,000,000" 73 | Puerto,18620000000.0,"Puerto 18,620,000,000" 74 | Azerbaijan,17790000000.0,"Azerbaijan 17,790,000,000" 75 | Croatia,16970000000.0,"Croatia 16,970,000,000" 76 | Iceland,16940000000.0,"Iceland 16,940,000,000" 77 | Cuba,16200000000.0,"Cuba 16,200,000,000" 78 | "Korea,",16000000000.0,"Korea, 16,000,000,000" 79 | Dominican,15140000000.0,"Dominican 15,140,000,000" 80 | Jordan,14560000000.0,"Jordan 14,560,000,000" 81 | Tajikistan,14420000000.0,"Tajikistan 14,420,000,000" 82 | Tunisia,13310000000.0,"Tunisia 13,310,000,000" 83 | Slovenia,13020000000.0,"Slovenia 13,020,000,000" 84 | Lebanon,12940000000.0,"Lebanon 12,940,000,000" 85 | Bosnia,12560000000.0,"Bosnia 12,560,000,000" 86 | Turkmenistan,11750000000.0,"Turkmenistan 11,750,000,000" 87 | Bahrain,11690000000.0,"Bahrain 11,690,000,000" 88 | Mozambique,11280000000.0,"Mozambique 11,280,000,000" 89 | Ghana,10580000000.0,"Ghana 10,580,000,000" 90 | Sri,10170000000.0,"Sri 10,170,000,000" 91 | Kyrgyzstan,9943000000.0,"Kyrgyzstan 9,943,000,000" 92 | Lithuania,9664000000.0,"Lithuania 9,664,000,000" 93 | Uruguay,9559000000.0,"Uruguay 9,559,000,000" 94 | Costa,8987000000.0,"Costa 8,987,000,000" 95 | Guatemala,8915000000.0,"Guatemala 8,915,000,000" 96 | Georgia,8468000000.0,"Georgia 8,468,000,000" 97 | Trinidad,8365000000.0,"Trinidad 8,365,000,000" 98 | Zambia,8327000000.0,"Zambia 8,327,000,000" 99 | Paraguay,8125000000.0,"Paraguay 8,125,000,000" 100 | Albania,7793000000.0,"Albania 7,793,000,000" 101 | Burma,7765000000.0,"Burma 7,765,000,000" 102 | Estonia,7417000000.0,"Estonia 7,417,000,000" 103 | "Congo,",7292000000.0,"Congo, 7,292,000,000" 104 | Panama,7144000000.0,"Panama 7,144,000,000" 105 | Latvia,7141000000.0,"Latvia 7,141,000,000" 106 | Macedonia,6960000000.0,"Macedonia 6,960,000,000" 107 | Zimbabwe,6831000000.0,"Zimbabwe 6,831,000,000" 108 | Kenya,6627000000.0,"Kenya 6,627,000,000" 109 | Bolivia,6456000000.0,"Bolivia 6,456,000,000" 110 | Luxembourg,6108000000.0,"Luxembourg 6,108,000,000" 111 | Sudan,5665000000.0,"Sudan 5,665,000,000" 112 | El,5665000000.0,"El 5,665,000,000" 113 | Cameroon,5535000000.0,"Cameroon 5,535,000,000" 114 | West,5312000000.0,"West 5,312,000,000" 115 | Ethiopia,5227000000.0,"Ethiopia 5,227,000,000" 116 | Armenia,5043000000.0,"Armenia 5,043,000,000" 117 | Honduras,5036000000.0,"Honduras 5,036,000,000" 118 | Angola,4842000000.0,"Angola 4,842,000,000" 119 | Cote,4731000000.0,"Cote 4,731,000,000" 120 | Tanzania,4545000000.0,"Tanzania 4,545,000,000" 121 | Nicaragua,4412000000.0,"Nicaragua 4,412,000,000" 122 | Moldova,4305000000.0,"Moldova 4,305,000,000" 123 | Cyprus,4296000000.0,"Cyprus 4,296,000,000" 124 | Macau,4291000000.0,"Macau 4,291,000,000" 125 | Namibia,4238000000.0,"Namibia 4,238,000,000" 126 | Mongolia,4204000000.0,"Mongolia 4,204,000,000" 127 | Afghanistan,3893000000.0,"Afghanistan 3,893,000,000" 128 | Yemen,3838000000.0,"Yemen 3,838,000,000" 129 | Brunei,3766000000.0,"Brunei 3,766,000,000" 130 | Cambodia,3553000000.0,"Cambodia 3,553,000,000" 131 | Montenegro,3465000000.0,"Montenegro 3,465,000,000" 132 | Nepal,3239000000.0,"Nepal 3,239,000,000" 133 | Botswana,3213000000.0,"Botswana 3,213,000,000" 134 | Papua,3116000000.0,"Papua 3,116,000,000" 135 | Jamaica,3008000000.0,"Jamaica 3,008,000,000" 136 | Kosovo,2887000000.0,"Kosovo 2,887,000,000" 137 | Laos,2874000000.0,"Laos 2,874,000,000" 138 | Uganda,2821000000.0,"Uganda 2,821,000,000" 139 | New,2716000000.0,"New 2,716,000,000" 140 | Mauritius,2658000000.0,"Mauritius 2,658,000,000" 141 | Senegal,2586000000.0,"Senegal 2,586,000,000" 142 | Bhutan,2085000000.0,"Bhutan 2,085,000,000" 143 | Malawi,2027000000.0,"Malawi 2,027,000,000" 144 | Madagascar,1883000000.0,"Madagascar 1,883,000,000" 145 | "Bahamas,",1716000000.0,"Bahamas, 1,716,000,000" 146 | Gabon,1680000000.0,"Gabon 1,680,000,000" 147 | Suriname,1572000000.0,"Suriname 1,572,000,000" 148 | Guam,1566000000.0,"Guam 1,566,000,000" 149 | Liechtenstein,1360000000.0,"Liechtenstein 1,360,000,000" 150 | Swaziland,1295000000.0,"Swaziland 1,295,000,000" 151 | Burkina,985500000.0,"Burkina 985,500,000" 152 | Togo,976000000.0,"Togo 976,000,000" 153 | Curacao,968000000.0,"Curacao 968,000,000" 154 | Mauritania,962600000.0,"Mauritania 962,600,000" 155 | Barbados,938000000.0,"Barbados 938,000,000" 156 | Niger,930200000.0,"Niger 930,200,000" 157 | Aruba,920700000.0,"Aruba 920,700,000" 158 | Benin,911000000.0,"Benin 911,000,000" 159 | Guinea,903000000.0,"Guinea 903,000,000" 160 | Mali,882600000.0,"Mali 882,600,000" 161 | Fiji,777600000.0,"Fiji 777,600,000" 162 | "Congo,",740000000.0,"Congo, 740,000,000" 163 | Virgin,723500000.0,"Virgin 723,500,000" 164 | Lesotho,707000000.0,"Lesotho 707,000,000" 165 | South,694100000.0,"South 694,100,000" 166 | Bermuda,664200000.0,"Bermuda 664,200,000" 167 | French,652900000.0,"French 652,900,000" 168 | Jersey,630100000.0,"Jersey 630,100,000" 169 | Belize,605000000.0,"Belize 605,000,000" 170 | Andorra,562400000.0,"Andorra 562,400,000" 171 | Guyana,558000000.0,"Guyana 558,000,000" 172 | Cayman,545900000.0,"Cayman 545,900,000" 173 | Haiti,452000000.0,"Haiti 452,000,000" 174 | Rwanda,365500000.0,"Rwanda 365,500,000" 175 | Saint,336400000.0,"Saint 336,400,000" 176 | Djibouti,311600000.0,"Djibouti 311,600,000" 177 | Seychelles,293900000.0,"Seychelles 293,900,000" 178 | Somalia,293000000.0,"Somalia 293,000,000" 179 | Antigua,293000000.0,"Antigua 293,000,000" 180 | Greenland,292000000.0,"Greenland 292,000,000" 181 | Cabo,285500000.0,"Cabo 285,500,000" 182 | Eritrea,284000000.0,"Eritrea 284,000,000" 183 | Burundi,282900000.0,"Burundi 282,900,000" 184 | Liberia,276900000.0,"Liberia 276,900,000" 185 | Maldives,267100000.0,"Maldives 267,100,000" 186 | Faroe,261300000.0,"Faroe 261,300,000" 187 | "Gambia,",218600000.0,"Gambia, 218,600,000" 188 | Chad,190700000.0,"Chad 190,700,000" 189 | "Micronesia,",178600000.0,"Micronesia, 178,600,000" 190 | Grenada,178000000.0,"Grenada 178,000,000" 191 | Central,168300000.0,"Central 168,300,000" 192 | Turks,167400000.0,"Turks 167,400,000" 193 | Gibraltar,160000000.0,"Gibraltar 160,000,000" 194 | American,146000000.0,"American 146,000,000" 195 | Sierra,134900000.0,"Sierra 134,900,000" 196 | Saint,130200000.0,"Saint 130,200,000" 197 | Saint,127400000.0,"Saint 127,400,000" 198 | Timor-Leste,125300000.0,"Timor-Leste 125,300,000" 199 | Equatorial,93000000.0,"Equatorial 93,000,000" 200 | Samoa,90400000.0,"Samoa 90,400,000" 201 | Dominica,89750000.0,"Dominica 89,750,000" 202 | Western,83700000.0,"Western 83,700,000" 203 | Solomon,79050000.0,"Solomon 79,050,000" 204 | Sao,60450000.0,"Sao 60,450,000" 205 | British,51150000.0,"British 51,150,000" 206 | Vanuatu,49290000.0,"Vanuatu 49,290,000" 207 | Guinea-Bissau,46500000.0,"Guinea-Bissau 46,500,000" 208 | Tonga,44640000.0,"Tonga 44,640,000" 209 | Saint,39990000.0,"Saint 39,990,000" 210 | Comoros,39990000.0,"Comoros 39,990,000" 211 | Cook,28950000.0,"Cook 28,950,000" 212 | Kiribati,24180000.0,"Kiribati 24,180,000" 213 | Montserrat,23250000.0,"Montserrat 23,250,000" 214 | Nauru,23250000.0,"Nauru 23,250,000" 215 | Falkland,11160000.0,"Falkland 11,160,000" 216 | Saint,7440000.0,"Saint 7,440,000" 217 | Niue,2790000.0,"Niue 2,790,000" 218 | Gaza,202000.0,"Gaza 202,000" 219 | Malta,174700.0,"Malta 174,700" 220 | Northern,48300.0,"Northern 48,300" 221 | -------------------------------------------------------------------------------- /7. Geographical Plotting/Choropleth Maps Exercise - Solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Choropleth Maps Exercise - Solutions\n", 18 | "\n", 19 | "Welcome to the Choropleth Maps Exercise! In this exercise we will give you some simple datasets and ask you to create Choropleth Maps from them. Due to the Nature of Plotly we can't show you examples embedded inside the notebook.\n", 20 | "\n", 21 | "[Full Documentation Reference](https://plot.ly/python/reference/#choropleth)\n", 22 | "\n", 23 | "## Plotly Imports" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 13, 29 | "metadata": { 30 | "collapsed": false 31 | }, 32 | "outputs": [ 33 | { 34 | "data": { 35 | "text/html": [ 36 | "" 37 | ], 38 | "text/plain": [ 39 | "" 40 | ] 41 | }, 42 | "metadata": {}, 43 | "output_type": "display_data" 44 | } 45 | ], 46 | "source": [ 47 | "import plotly.graph_objs as go \n", 48 | "from plotly.offline import init_notebook_mode,iplot,plot\n", 49 | "init_notebook_mode(connected=True) " 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "** Import pandas and read the csv file: 2014_World_Power_Consumption**" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 2, 62 | "metadata": { 63 | "collapsed": true 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "import pandas as pd" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 3, 73 | "metadata": { 74 | "collapsed": false 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "df = pd.read_csv('2014_World_Power_Consumption')" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "** Check the head of the DataFrame. **" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 4, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/html": [ 98 | "
\n", 99 | "\n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | "
CountryPower Consumption KWHText
0China5.523000e+12China 5,523,000,000,000
1United States3.832000e+12United 3,832,000,000,000
2European2.771000e+12European 2,771,000,000,000
3Russia1.065000e+12Russia 1,065,000,000,000
4Japan9.210000e+11Japan 921,000,000,000
\n", 141 | "
" 142 | ], 143 | "text/plain": [ 144 | " Country Power Consumption KWH Text\n", 145 | "0 China 5.523000e+12 China 5,523,000,000,000\n", 146 | "1 United States 3.832000e+12 United 3,832,000,000,000\n", 147 | "2 European 2.771000e+12 European 2,771,000,000,000\n", 148 | "3 Russia 1.065000e+12 Russia 1,065,000,000,000\n", 149 | "4 Japan 9.210000e+11 Japan 921,000,000,000" 150 | ] 151 | }, 152 | "execution_count": 4, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "df.head()" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "** Referencing the lecture notes, create a Choropleth Plot of the Power Consumption for Countries using the data and layout dictionary. **" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 19, 171 | "metadata": { 172 | "collapsed": true 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "data = dict(\n", 177 | " type = 'choropleth',\n", 178 | " colorscale = 'Viridis',\n", 179 | " reversescale = True,\n", 180 | " locations = df['Country'],\n", 181 | " locationmode = \"country names\",\n", 182 | " z = df['Power Consumption KWH'],\n", 183 | " text = df['Country'],\n", 184 | " colorbar = {'title' : 'Power Consumption KWH'},\n", 185 | " ) \n", 186 | "\n", 187 | "layout = dict(title = '2014 Power Consumption KWH',\n", 188 | " geo = dict(showframe = False,projection = {'type':'Mercator'})\n", 189 | " )" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 20, 195 | "metadata": { 196 | "collapsed": false 197 | }, 198 | "outputs": [ 199 | { 200 | "data": { 201 | "text/plain": [ 202 | "'file:///Users/marci/Pierian-Data-Courses/Udemy-Python-Data-Science-Machine-Learning/Python-Data-Science-and-Machine-Learning-Bootcamp/Python-for-Data-Visualization/Geographical Plotting/temp-plot.html'" 203 | ] 204 | }, 205 | "execution_count": 20, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "choromap = go.Figure(data = [data],layout = layout)\n", 212 | "plot(choromap,validate=False)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "## USA Choropleth\n", 220 | "\n", 221 | "** Import the 2012_Election_Data csv file using pandas. **" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 7, 227 | "metadata": { 228 | "collapsed": true 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "usdf = pd.read_csv('2012_Election_Data')" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "** Check the head of the DataFrame. **" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 8, 245 | "metadata": { 246 | "collapsed": false 247 | }, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/html": [ 252 | "
\n", 253 | "\n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | "
YearICPSR State CodeAlphanumeric State CodeStateVEP Total Ballots CountedVEP Highest OfficeVAP Highest OfficeTotal Ballots CountedHighest OfficeVoting-Eligible Population (VEP)Voting-Age Population (VAP)% Non-citizenPrisonProbationParoleTotal Ineligible FelonState Abv
02012411AlabamaNaN58.6%56.0%NaN2,074,3383,539,2173707440.02.6%32,23257,9938,61671,584AL
12012812Alaska58.9%58.7%55.3%301,694300,495511,792543763.03.8%5,6337,1731,88211,317AK
22012613Arizona53.0%52.6%46.5%2,323,5792,306,5594,387,9004959270.09.9%35,18872,4527,46081,048AZ
32012424Arkansas51.1%50.7%47.7%1,078,5481,069,4682,109,8472242740.03.5%14,47130,12223,37253,808AR
42012715California55.7%55.1%45.1%13,202,15813,038,54723,681,83728913129.017.4%119,455089,287208,742CA
\n", 379 | "
" 380 | ], 381 | "text/plain": [ 382 | " Year ICPSR State Code Alphanumeric State Code State \\\n", 383 | "0 2012 41 1 Alabama \n", 384 | "1 2012 81 2 Alaska \n", 385 | "2 2012 61 3 Arizona \n", 386 | "3 2012 42 4 Arkansas \n", 387 | "4 2012 71 5 California \n", 388 | "\n", 389 | " VEP Total Ballots Counted VEP Highest Office VAP Highest Office \\\n", 390 | "0 NaN 58.6% 56.0% \n", 391 | "1 58.9% 58.7% 55.3% \n", 392 | "2 53.0% 52.6% 46.5% \n", 393 | "3 51.1% 50.7% 47.7% \n", 394 | "4 55.7% 55.1% 45.1% \n", 395 | "\n", 396 | " Total Ballots Counted Highest Office Voting-Eligible Population (VEP) \\\n", 397 | "0 NaN 2,074,338 3,539,217 \n", 398 | "1 301,694 300,495 511,792 \n", 399 | "2 2,323,579 2,306,559 4,387,900 \n", 400 | "3 1,078,548 1,069,468 2,109,847 \n", 401 | "4 13,202,158 13,038,547 23,681,837 \n", 402 | "\n", 403 | " Voting-Age Population (VAP) % Non-citizen Prison Probation Parole \\\n", 404 | "0 3707440.0 2.6% 32,232 57,993 8,616 \n", 405 | "1 543763.0 3.8% 5,633 7,173 1,882 \n", 406 | "2 4959270.0 9.9% 35,188 72,452 7,460 \n", 407 | "3 2242740.0 3.5% 14,471 30,122 23,372 \n", 408 | "4 28913129.0 17.4% 119,455 0 89,287 \n", 409 | "\n", 410 | " Total Ineligible Felon State Abv \n", 411 | "0 71,584 AL \n", 412 | "1 11,317 AK \n", 413 | "2 81,048 AZ \n", 414 | "3 53,808 AR \n", 415 | "4 208,742 CA " 416 | ] 417 | }, 418 | "execution_count": 8, 419 | "metadata": {}, 420 | "output_type": "execute_result" 421 | } 422 | ], 423 | "source": [ 424 | "usdf.head()" 425 | ] 426 | }, 427 | { 428 | "cell_type": "markdown", 429 | "metadata": {}, 430 | "source": [ 431 | "** Now create a plot that displays the Voting-Age Population (VAP) per state. If you later want to play around with other columns, make sure you consider their data type. VAP has already been transformed to a float for you. **" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 16, 437 | "metadata": { 438 | "collapsed": false 439 | }, 440 | "outputs": [], 441 | "source": [ 442 | "data = dict(type='choropleth',\n", 443 | " colorscale = 'Viridis',\n", 444 | " reversescale = True,\n", 445 | " locations = usdf['State Abv'],\n", 446 | " z = usdf['Voting-Age Population (VAP)'],\n", 447 | " locationmode = 'USA-states',\n", 448 | " text = usdf['State'],\n", 449 | " marker = dict(line = dict(color = 'rgb(255,255,255)',width = 1)),\n", 450 | " colorbar = {'title':\"Voting-Age Population (VAP)\"}\n", 451 | " ) " 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": 17, 457 | "metadata": { 458 | "collapsed": true 459 | }, 460 | "outputs": [], 461 | "source": [ 462 | "layout = dict(title = '2012 General Election Voting Data',\n", 463 | " geo = dict(scope='usa',\n", 464 | " showlakes = True,\n", 465 | " lakecolor = 'rgb(85,173,240)')\n", 466 | " )" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": 18, 472 | "metadata": { 473 | "collapsed": false 474 | }, 475 | "outputs": [ 476 | { 477 | "data": { 478 | "text/plain": [ 479 | "'file:///Users/marci/Pierian-Data-Courses/Udemy-Python-Data-Science-Machine-Learning/Python-Data-Science-and-Machine-Learning-Bootcamp/Python-for-Data-Visualization/Geographical Plotting/temp-plot.html'" 480 | ] 481 | }, 482 | "execution_count": 18, 483 | "metadata": {}, 484 | "output_type": "execute_result" 485 | } 486 | ], 487 | "source": [ 488 | "choromap = go.Figure(data = [data],layout = layout)\n", 489 | "plot(choromap,validate=False)" 490 | ] 491 | }, 492 | { 493 | "cell_type": "markdown", 494 | "metadata": {}, 495 | "source": [ 496 | "# Great Job!" 497 | ] 498 | } 499 | ], 500 | "metadata": { 501 | "kernelspec": { 502 | "display_name": "Python 3", 503 | "language": "python", 504 | "name": "python3" 505 | }, 506 | "language_info": { 507 | "codemirror_mode": { 508 | "name": "ipython", 509 | "version": 3 510 | }, 511 | "file_extension": ".py", 512 | "mimetype": "text/x-python", 513 | "name": "python", 514 | "nbconvert_exporter": "python", 515 | "pygments_lexer": "ipython3", 516 | "version": "3.5.1" 517 | } 518 | }, 519 | "nbformat": 4, 520 | "nbformat_minor": 0 521 | } 522 | -------------------------------------------------------------------------------- /7. Geographical Plotting/Choropleth Maps Exercise .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Choropleth Maps Exercise \n", 18 | "\n", 19 | "Welcome to the Choropleth Maps Exercise! In this exercise we will give you some simple datasets and ask you to create Choropleth Maps from them. Due to the Nature of Plotly we can't show you examples\n", 20 | "\n", 21 | "[Full Documentation Reference](https://plot.ly/python/reference/#choropleth)\n", 22 | "\n", 23 | "## Plotly Imports" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 38, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "import plotly.graph_objs as go \n", 35 | "from plotly.offline import init_notebook_mode,iplot\n", 36 | "init_notebook_mode(connected=True) " 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "** Import pandas and read the csv file: 2014_World_Power_Consumption**" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 1, 49 | "metadata": { 50 | "collapsed": true 51 | }, 52 | "outputs": [], 53 | "source": [] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 152, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [], 62 | "source": [] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "** Check the head of the DataFrame. **" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 156, 74 | "metadata": { 75 | "collapsed": false 76 | }, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/html": [ 81 | "
\n", 82 | "\n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | "
CountryPower Consumption KWHText
0China5.523000e+12China 5,523,000,000,000
1United States3.832000e+12United 3,832,000,000,000
2European2.771000e+12European 2,771,000,000,000
3Russia1.065000e+12Russia 1,065,000,000,000
4Japan9.210000e+11Japan 921,000,000,000
\n", 124 | "
" 125 | ], 126 | "text/plain": [ 127 | " Country Power Consumption KWH Text\n", 128 | "0 China 5.523000e+12 China 5,523,000,000,000\n", 129 | "1 United States 3.832000e+12 United 3,832,000,000,000\n", 130 | "2 European 2.771000e+12 European 2,771,000,000,000\n", 131 | "3 Russia 1.065000e+12 Russia 1,065,000,000,000\n", 132 | "4 Japan 9.210000e+11 Japan 921,000,000,000" 133 | ] 134 | }, 135 | "execution_count": 156, 136 | "metadata": {}, 137 | "output_type": "execute_result" 138 | } 139 | ], 140 | "source": [] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "** Referencing the lecture notes, create a Choropleth Plot of the Power Consumption for Countries using the data and layout dictionary. **" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": { 153 | "collapsed": true 154 | }, 155 | "outputs": [], 156 | "source": [] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "choromap = go.Figure(data = [data],layout = layout)\n", 167 | "iplot(choromap,validate=False)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "## USA Choropleth\n", 175 | "\n", 176 | "** Import the 2012_Election_Data csv file using pandas. **" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 109, 182 | "metadata": { 183 | "collapsed": true 184 | }, 185 | "outputs": [], 186 | "source": [] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "** Check the head of the DataFrame. **" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 110, 198 | "metadata": { 199 | "collapsed": false 200 | }, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/html": [ 205 | "
\n", 206 | "\n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | "
YearICPSR State CodeAlphanumeric State CodeStateVEP Total Ballots CountedVEP Highest OfficeVAP Highest OfficeTotal Ballots CountedHighest OfficeVoting-Eligible Population (VEP)Voting-Age Population (VAP)% Non-citizenPrisonProbationParoleTotal Ineligible FelonState Abv
02012411AlabamaNaN58.6%56.0%NaN2,074,3383,539,2173707440.02.6%32,23257,9938,61671,584AL
12012812Alaska58.9%58.7%55.3%301,694300,495511,792543763.03.8%5,6337,1731,88211,317AK
22012613Arizona53.0%52.6%46.5%2,323,5792,306,5594,387,9004959270.09.9%35,18872,4527,46081,048AZ
32012424Arkansas51.1%50.7%47.7%1,078,5481,069,4682,109,8472242740.03.5%14,47130,12223,37253,808AR
42012715California55.7%55.1%45.1%13,202,15813,038,54723,681,83728913129.017.4%119,455089,287208,742CA
\n", 332 | "
" 333 | ], 334 | "text/plain": [ 335 | " Year ICPSR State Code Alphanumeric State Code State \\\n", 336 | "0 2012 41 1 Alabama \n", 337 | "1 2012 81 2 Alaska \n", 338 | "2 2012 61 3 Arizona \n", 339 | "3 2012 42 4 Arkansas \n", 340 | "4 2012 71 5 California \n", 341 | "\n", 342 | " VEP Total Ballots Counted VEP Highest Office VAP Highest Office \\\n", 343 | "0 NaN 58.6% 56.0% \n", 344 | "1 58.9% 58.7% 55.3% \n", 345 | "2 53.0% 52.6% 46.5% \n", 346 | "3 51.1% 50.7% 47.7% \n", 347 | "4 55.7% 55.1% 45.1% \n", 348 | "\n", 349 | " Total Ballots Counted Highest Office Voting-Eligible Population (VEP) \\\n", 350 | "0 NaN 2,074,338 3,539,217 \n", 351 | "1 301,694 300,495 511,792 \n", 352 | "2 2,323,579 2,306,559 4,387,900 \n", 353 | "3 1,078,548 1,069,468 2,109,847 \n", 354 | "4 13,202,158 13,038,547 23,681,837 \n", 355 | "\n", 356 | " Voting-Age Population (VAP) % Non-citizen Prison Probation Parole \\\n", 357 | "0 3707440.0 2.6% 32,232 57,993 8,616 \n", 358 | "1 543763.0 3.8% 5,633 7,173 1,882 \n", 359 | "2 4959270.0 9.9% 35,188 72,452 7,460 \n", 360 | "3 2242740.0 3.5% 14,471 30,122 23,372 \n", 361 | "4 28913129.0 17.4% 119,455 0 89,287 \n", 362 | "\n", 363 | " Total Ineligible Felon State Abv \n", 364 | "0 71,584 AL \n", 365 | "1 11,317 AK \n", 366 | "2 81,048 AZ \n", 367 | "3 53,808 AR \n", 368 | "4 208,742 CA " 369 | ] 370 | }, 371 | "execution_count": 110, 372 | "metadata": {}, 373 | "output_type": "execute_result" 374 | } 375 | ], 376 | "source": [] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "** Now create a plot that displays the Voting-Age Population (VAP) per state. If you later want to play around with other columns, make sure you consider their data type. VAP has already been transformed to a float for you. **" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 120, 388 | "metadata": { 389 | "collapsed": false 390 | }, 391 | "outputs": [], 392 | "source": [] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 121, 397 | "metadata": { 398 | "collapsed": true 399 | }, 400 | "outputs": [], 401 | "source": [] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": { 407 | "collapsed": false 408 | }, 409 | "outputs": [], 410 | "source": [ 411 | "choromap = go.Figure(data = [data],layout = layout)\n", 412 | "iplot(choromap,validate=False)" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": {}, 418 | "source": [ 419 | "# Great Job!" 420 | ] 421 | } 422 | ], 423 | "metadata": { 424 | "kernelspec": { 425 | "display_name": "Python 3", 426 | "language": "python", 427 | "name": "python3" 428 | }, 429 | "language_info": { 430 | "codemirror_mode": { 431 | "name": "ipython", 432 | "version": 3 433 | }, 434 | "file_extension": ".py", 435 | "mimetype": "text/x-python", 436 | "name": "python", 437 | "nbconvert_exporter": "python", 438 | "pygments_lexer": "ipython3", 439 | "version": "3.5.1" 440 | } 441 | }, 442 | "nbformat": 4, 443 | "nbformat_minor": 0 444 | } 445 | -------------------------------------------------------------------------------- /7. Geographical Plotting/plotly_cheat_sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SuvroBaner/Python-for-Data-Science-and-Machine-Learning-Bootcamp/c0bafbafc2c37a189c70a3758f6e81888b1542ae/7. Geographical Plotting/plotly_cheat_sheet.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python-for-Data-Science-and-Machine-Learning-Bootcamp 2 | This repository is based on the online classes I did with Udemy to leverage Python to solve Data Science problems. 3 | The instructor Jose Portilla, a Data Scientist himself lectured the course which is worth taking. 4 | I will only post the iPython notebooks and they will be posted as I finish them myself. This helps me to keep a track of my studies 5 | and also refer to it on the fly while I am at work. Reference: 6 | https://www.udemy.com/python-for-data-science-and-machine-learning-bootcamp/learn/v4/overview 7 | 8 | Following are the topics posted- 9 | 10 | 1) Python Crash Course 11 | 2) NumPy (Numeric Python) 12 | 3) Pandas 13 | 4) Matplotlib 14 | 5) Seaborn 15 | 6) Pandas Built-in Data Visualization 16 | 7) Plotly and Cufflinks 17 | 8) Geograhical Plotting 18 | 9) Data - Capstone Project 19 | 10) Linear Regression 20 | 11) Logistic Regression 21 | 12) K-Nearest Neighbors 22 | 13) Decision Trees and Random Forests 23 | 14) Support Vector Machines 24 | 15) K-Means Clustering 25 | 16) Principal Component Analysis 26 | --------------------------------------------------------------------------------