├── .gitattributes
├── .gitignore
├── 1. Python Crash Course
    ├── Python Crash Course Exercises - Solutions.ipynb
    ├── Python Crash Course Exercises .ipynb
    └── Python Crash Course.ipynb
├── 10. Linear-Regression
    ├── .ipynb_checkpoints
    │   ├── Creating Fake Data-checkpoint.ipynb
    │   ├── Linear Regression - Project Exercise - Solutions-checkpoint.ipynb
    │   ├── Linear Regression - Project Exercise -checkpoint.ipynb
    │   └── Linear Regression with Python-checkpoint.ipynb
    ├── Ecommerce Customers
    ├── Linear Regression - Project Exercise - Solutions.ipynb
    ├── Linear Regression - Project Exercise .ipynb
    ├── Linear Regression with Python.ipynb
    └── USA_Housing.csv
├── 11. Logistic-Regression
    ├── .ipynb_checkpoints
    │   ├── Logistic Regression Project - Solutions-checkpoint.ipynb
    │   ├── Logistic Regression Project -checkpoint.ipynb
    │   └── Logistic Regression with Python-checkpoint.ipynb
    ├── Logistic Regression Project - Solutions.ipynb
    ├── Logistic Regression Project .ipynb
    ├── Logistic Regression with Python.ipynb
    ├── advertising.csv
    ├── titanic_test.csv
    └── titanic_train.csv
├── 12. K-Nearest-Neighbors
    ├── .ipynb_checkpoints
    │   ├── K Nearest Neighbors Project - Solutions-checkpoint.ipynb
    │   ├── K Nearest Neighbors Project-checkpoint.ipynb
    │   ├── K Nearest Neighbors with Python-checkpoint.ipynb
    │   └── kNN_classification-checkpoint.ipynb
    ├── Classified Data
    ├── K Nearest Neighbors Project - Solutions.ipynb
    ├── K Nearest Neighbors Project.ipynb
    ├── K Nearest Neighbors with Python.ipynb
    ├── KNN_Project_Data
    ├── iris.data.txt
    └── kNN_classification.ipynb
├── 13. Decision-Trees-and-Random-Forests
    ├── .ipynb_checkpoints
    │   ├── Decision Trees and Random Forest Project - Solutions-checkpoint.ipynb
    │   ├── Decision Trees and Random Forest Project -checkpoint.ipynb
    │   └── Decision Trees and Random Forests in Python-checkpoint.ipynb
    ├── Decision Trees and Random Forest Project - Solutions.ipynb
    ├── Decision Trees and Random Forest Project .ipynb
    ├── Decision Trees and Random Forests in Python.ipynb
    ├── kyphosis.csv
    └── loan_data.csv
├── 14. Support-Vector-Machines
    ├── .ipynb_checkpoints
    │   ├── Support Vector Machines Project - Solutions-checkpoint.ipynb
    │   ├── Support Vector Machines Project -checkpoint.ipynb
    │   └── Support Vector Machines with Python-checkpoint.ipynb
    ├── Support Vector Machines Project - Solutions.ipynb
    ├── Support Vector Machines Project .ipynb
    └── Support Vector Machines with Python.ipynb
├── 15. K-Means-Clustering
    ├── .ipynb_checkpoints
    │   ├── K Means Clustering Project - Solutions-checkpoint.ipynb
    │   ├── K Means Clustering Project -checkpoint.ipynb
    │   └── K Means Clustering with Python-checkpoint.ipynb
    ├── College_Data
    ├── K Means Clustering Project - Solutions.ipynb
    ├── K Means Clustering Project .ipynb
    └── K Means Clustering with Python.ipynb
├── 16. Principal-Component-Analysis
    ├── .ipynb_checkpoints
    │   └── Principal Component Analysis-checkpoint.ipynb
    ├── PCA.png
    └── Principal Component Analysis.ipynb
├── 2. NumPy
    ├── NumPy Arrays.ipynb
    ├── Numpy Exercise - Solutions.ipynb
    ├── Numpy Exercise .ipynb
    ├── Numpy Indexing and Selection.ipynb
    └── Numpy Operations.ipynb
├── 3. Pandas
    ├── .ipynb_checkpoints
    │   ├── Data Input and Output-checkpoint.ipynb
    │   ├── DataFrames-checkpoint.ipynb
    │   ├── Groupby-checkpoint.ipynb
    │   ├── Introduction to Pandas-checkpoint.ipynb
    │   ├── Merging, Joining, and Concatenating -checkpoint.ipynb
    │   ├── Missing Data-checkpoint.ipynb
    │   ├── Operations-checkpoint.ipynb
    │   └── Series-checkpoint.ipynb
    ├── Data Input and Output.ipynb
    ├── DataFrames.ipynb
    ├── Excel_Sample.xlsx
    ├── Groupby.ipynb
    ├── Introduction to Pandas.ipynb
    ├── Merging, Joining, and Concatenating .ipynb
    ├── Missing Data.ipynb
    ├── Operations.ipynb
    ├── Pandas Exercises
    │   ├── .ipynb_checkpoints
    │   │   ├── Ecommerce Purchases Exercise - Solutions-checkpoint.ipynb
    │   │   ├── Ecommerce Purchases Exercise -checkpoint.ipynb
    │   │   ├── SF Salaries Exercise- Solutions-checkpoint.ipynb
    │   │   └── SF Salaries Exercise-checkpoint.ipynb
    │   ├── Ecommerce Purchases
    │   ├── Ecommerce Purchases Exercise - Solutions.ipynb
    │   ├── Ecommerce Purchases Exercise .ipynb
    │   ├── SF Salaries Exercise- Solutions.ipynb
    │   ├── SF Salaries Exercise.ipynb
    │   └── Salaries.csv
    ├── Series.ipynb
    ├── example
    └── multi_index_example
├── 4. Matplotlib
    ├── .ipynb_checkpoints
    │   ├── Advanced Matplotlib Concepts-checkpoint.ipynb
    │   ├── Matplotlib Concepts Lecture-checkpoint.ipynb
    │   ├── Matplotlib Exercises - Solutions-checkpoint.ipynb
    │   └── Matplotlib Exercises -checkpoint.ipynb
    ├── Advanced Matplotlib Concepts.ipynb
    ├── Matplotlib Concepts Lecture.ipynb
    ├── Matplotlib Exercises - Solutions.ipynb
    └── Matplotlib Exercises .ipynb
├── 5. Seaborn
    ├── Categorical Plots.ipynb
    ├── Distribution Plots.ipynb
    ├── Grids.ipynb
    ├── Matrix Plots.ipynb
    ├── Regression Plots.ipynb
    ├── Seaborn Exercises - Solutions.ipynb
    ├── Seaborn Exercises .ipynb
    └── Style and Color.ipynb
├── 6. Pandas Built-in Data Viz
    ├── .ipynb_checkpoints
    │   ├── Pandas Built-in Data Visualization-checkpoint.ipynb
    │   ├── Pandas Data Visualization Exercise - Solutions-checkpoint.ipynb
    │   └── Pandas Data Visualization Exercise -checkpoint.ipynb
    ├── Pandas Built-in Data Visualization.ipynb
    ├── Pandas Data Visualization Exercise - Solutions.ipynb
    ├── Pandas Data Visualization Exercise .ipynb
    ├── df1
    ├── df2
    └── df3
├── 7. Geographical Plotting
    ├── .ipynb_checkpoints
    │   ├── Choropleth Maps Exercise - Solutions-checkpoint.ipynb
    │   ├── Choropleth Maps Exercise -checkpoint.ipynb
    │   └── Choropleth Maps-checkpoint.ipynb
    ├── 2011_US_AGRI_Exports
    ├── 2012_Election_Data
    ├── 2014_World_GDP
    ├── 2014_World_Power_Consumption
    ├── Choropleth Maps Exercise - Solutions.ipynb
    ├── Choropleth Maps Exercise .ipynb
    ├── Choropleth Maps.ipynb
    └── plotly_cheat_sheet.pdf
├── 8. Plotly and Cufflinks
    ├── .ipynb_checkpoints
    │   └── Plotly and Cufflinks-checkpoint.ipynb
    └── Plotly and Cufflinks.ipynb
├── 9. Data-Capstone-Projects
    ├── .ipynb_checkpoints
    │   ├── 911 Calls Data Capstone Project - Solutions-checkpoint.ipynb
    │   ├── 911 Calls Data Capstone Project -checkpoint.ipynb
    │   ├── Finance Project - Solutions-checkpoint.ipynb
    │   ├── Finance Project -checkpoint.ipynb
    │   └── SF Salaries Exercise- Solutions-checkpoint.ipynb
    ├── 911 Calls Data Capstone Project - Solutions.ipynb
    ├── 911 Calls Data Capstone Project .ipynb
    ├── 911.csv
    ├── Finance Project - Solutions.ipynb
    ├── Finance Project .ipynb
    └── precipitation.html
└── README.md


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 | 
17 | # Windows shortcuts
18 | *.lnk
19 | 
20 | # =========================
21 | # Operating System Files
22 | # =========================
23 | 
24 | # OSX
25 | # =========================
26 | 
27 | .DS_Store
28 | .AppleDouble
29 | .LSOverride
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear in the root of a volume
35 | .DocumentRevisions-V100
36 | .fseventsd
37 | .Spotlight-V100
38 | .TemporaryItems
39 | .Trashes
40 | .VolumeIcon.icns
41 | 
42 | # Directories potentially created on remote AFP share
43 | .AppleDB
44 | .AppleDesktop
45 | Network Trash Folder
46 | Temporary Items
47 | .apdisk
48 | 


--------------------------------------------------------------------------------
/1. Python Crash Course/Python Crash Course Exercises - Solutions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Python Crash Course Exercises - Solutions\n",
  8 |     "\n",
  9 |     "This is an optional exercise to test your understanding of Python Basics. If you find this extremely challenging, then you probably are not ready for the rest of this course yet and don't have enough programming experience to continue. I would suggest you take another course more geared towards complete beginners, such as [Complete Python Bootcamp]()"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## Exercises\n",
 17 |     "\n",
 18 |     "Answer the questions or complete the tasks outlined in bold below, use the specific method described if applicable."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "** What is 7 to the power of 4?**"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 1,
 31 |    "metadata": {
 32 |     "collapsed": false
 33 |    },
 34 |    "outputs": [
 35 |     {
 36 |      "data": {
 37 |       "text/plain": [
 38 |        "2401"
 39 |       ]
 40 |      },
 41 |      "execution_count": 1,
 42 |      "metadata": {},
 43 |      "output_type": "execute_result"
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "7 **4"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "** Split this string:**\n",
 55 |     "\n",
 56 |     "    s = \"Hi there Sam!\"\n",
 57 |     "    \n",
 58 |     "**into a list. **"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 4,
 64 |    "metadata": {
 65 |     "collapsed": true
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "s = 'Hi there Sam!'"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 3,
 75 |    "metadata": {
 76 |     "collapsed": false
 77 |    },
 78 |    "outputs": [
 79 |     {
 80 |      "data": {
 81 |       "text/plain": [
 82 |        "['Hi', 'there', 'dad!']"
 83 |       ]
 84 |      },
 85 |      "execution_count": 3,
 86 |      "metadata": {},
 87 |      "output_type": "execute_result"
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "s.split()"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "** Given the variables:**\n",
 99 |     "\n",
100 |     "    planet = \"Earth\"\n",
101 |     "    diameter = 12742\n",
102 |     "\n",
103 |     "** Use .format() to print the following string: **\n",
104 |     "\n",
105 |     "    The diameter of Earth is 12742 kilometers."
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 5,
111 |    "metadata": {
112 |     "collapsed": true
113 |    },
114 |    "outputs": [],
115 |    "source": [
116 |     "planet = \"Earth\"\n",
117 |     "diameter = 12742"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 6,
123 |    "metadata": {
124 |     "collapsed": false
125 |    },
126 |    "outputs": [
127 |     {
128 |      "name": "stdout",
129 |      "output_type": "stream",
130 |      "text": [
131 |       "The diameter of Earth is 12742 kilometers.\n"
132 |      ]
133 |     }
134 |    ],
135 |    "source": [
136 |     "print(\"The diameter of {} is {} kilometers.\".format(planet,diameter))"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "** Given this nested list, use indexing to grab the word \"hello\" **"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 7,
149 |    "metadata": {
150 |     "collapsed": true
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "lst = [1,2,[3,4],[5,[100,200,['hello']],23,11],1,7]"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 14,
160 |    "metadata": {
161 |     "collapsed": false
162 |    },
163 |    "outputs": [
164 |     {
165 |      "data": {
166 |       "text/plain": [
167 |        "'hello'"
168 |       ]
169 |      },
170 |      "execution_count": 14,
171 |      "metadata": {},
172 |      "output_type": "execute_result"
173 |     }
174 |    ],
175 |    "source": [
176 |     "lst[3][1][2][0]"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "** Given this nest dictionary grab the word \"hello\". Be prepared, this will be annoying/tricky **"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 16,
189 |    "metadata": {
190 |     "collapsed": false
191 |    },
192 |    "outputs": [],
193 |    "source": [
194 |     "d = {'k1':[1,2,3,{'tricky':['oh','man','inception',{'target':[1,2,3,'hello']}]}]}"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 22,
200 |    "metadata": {
201 |     "collapsed": false
202 |    },
203 |    "outputs": [
204 |     {
205 |      "data": {
206 |       "text/plain": [
207 |        "'hello'"
208 |       ]
209 |      },
210 |      "execution_count": 22,
211 |      "metadata": {},
212 |      "output_type": "execute_result"
213 |     }
214 |    ],
215 |    "source": [
216 |     "d['k1'][3]['tricky'][3]['target'][3]"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "markdown",
221 |    "metadata": {},
222 |    "source": [
223 |     "** What is the main difference between a tuple and a list? **"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 23,
229 |    "metadata": {
230 |     "collapsed": true
231 |    },
232 |    "outputs": [],
233 |    "source": [
234 |     "# Tuple is immutable"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "markdown",
239 |    "metadata": {},
240 |    "source": [
241 |     "** Create a function that grabs the email website domain from a string in the form: **\n",
242 |     "\n",
243 |     "    user@domain.com\n",
244 |     "    \n",
245 |     "**So for example, passing \"user@domain.com\" would return: domain.com**"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 24,
251 |    "metadata": {
252 |     "collapsed": true
253 |    },
254 |    "outputs": [],
255 |    "source": [
256 |     "def domainGet(email):\n",
257 |     "    return email.split('@')[-1]"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 26,
263 |    "metadata": {
264 |     "collapsed": false
265 |    },
266 |    "outputs": [
267 |     {
268 |      "data": {
269 |       "text/plain": [
270 |        "'domain.com'"
271 |       ]
272 |      },
273 |      "execution_count": 26,
274 |      "metadata": {},
275 |      "output_type": "execute_result"
276 |     }
277 |    ],
278 |    "source": [
279 |     "domainGet('user@domain.com')"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "markdown",
284 |    "metadata": {},
285 |    "source": [
286 |     "** Create a basic function that returns True if the word 'dog' is contained in the input string. Don't worry about edge cases like a punctuation being attached to the word dog, but do account for capitalization. **"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": 27,
292 |    "metadata": {
293 |     "collapsed": true
294 |    },
295 |    "outputs": [],
296 |    "source": [
297 |     "def findDog(st):\n",
298 |     "    return 'dog' in st.lower().split()"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "code",
303 |    "execution_count": 28,
304 |    "metadata": {
305 |     "collapsed": false
306 |    },
307 |    "outputs": [
308 |     {
309 |      "data": {
310 |       "text/plain": [
311 |        "True"
312 |       ]
313 |      },
314 |      "execution_count": 28,
315 |      "metadata": {},
316 |      "output_type": "execute_result"
317 |     }
318 |    ],
319 |    "source": [
320 |     "findDog('Is there a dog here?')"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "markdown",
325 |    "metadata": {},
326 |    "source": [
327 |     "** Create a function that counts the number of times the word \"dog\" occurs in a string. Again ignore edge cases. **"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": 30,
333 |    "metadata": {
334 |     "collapsed": false
335 |    },
336 |    "outputs": [],
337 |    "source": [
338 |     "def countDog(st):\n",
339 |     "    count = 0\n",
340 |     "    for word in st.lower().split():\n",
341 |     "        if word == 'dog':\n",
342 |     "            count += 1\n",
343 |     "    return count"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": 31,
349 |    "metadata": {
350 |     "collapsed": false
351 |    },
352 |    "outputs": [
353 |     {
354 |      "data": {
355 |       "text/plain": [
356 |        "2"
357 |       ]
358 |      },
359 |      "execution_count": 31,
360 |      "metadata": {},
361 |      "output_type": "execute_result"
362 |     }
363 |    ],
364 |    "source": [
365 |     "countDog('This dog runs faster than the other dog dude!')"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "markdown",
370 |    "metadata": {},
371 |    "source": [
372 |     "** Use lambda expressions and the filter() function to filter out words from a list that don't start with the letter 's'. For example:**\n",
373 |     "\n",
374 |     "    seq = ['soup','dog','salad','cat','great']\n",
375 |     "\n",
376 |     "**should be filtered down to:**\n",
377 |     "\n",
378 |     "    ['soup','salad']"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": 34,
384 |    "metadata": {
385 |     "collapsed": true
386 |    },
387 |    "outputs": [],
388 |    "source": [
389 |     "seq = ['soup','dog','salad','cat','great']"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "code",
394 |    "execution_count": 35,
395 |    "metadata": {
396 |     "collapsed": false
397 |    },
398 |    "outputs": [
399 |     {
400 |      "data": {
401 |       "text/plain": [
402 |        "['soup', 'salad']"
403 |       ]
404 |      },
405 |      "execution_count": 35,
406 |      "metadata": {},
407 |      "output_type": "execute_result"
408 |     }
409 |    ],
410 |    "source": [
411 |     "list(filter(lambda word: word[0]=='s',seq))"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "markdown",
416 |    "metadata": {},
417 |    "source": [
418 |     "### Final Problem\n",
419 |     "**You are driving a little too fast, and a police officer stops you. Write a function\n",
420 |     "  to return one of 3 possible results: \"No ticket\", \"Small ticket\", or \"Big Ticket\". \n",
421 |     "  If your speed is 60 or less, the result is \"No Ticket\". If speed is between 61 \n",
422 |     "  and 80 inclusive, the result is \"Small Ticket\". If speed is 81 or more, the result is \"Big    Ticket\". Unless it is your birthday (encoded as a boolean value in the parameters of the function) -- on your birthday, your speed can be 5 higher in all \n",
423 |     "  cases. **"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": 4,
429 |    "metadata": {
430 |     "collapsed": true
431 |    },
432 |    "outputs": [],
433 |    "source": [
434 |     "def caught_speeding(speed, is_birthday):\n",
435 |     "    \n",
436 |     "    if is_birthday:\n",
437 |     "        speeding = speed - 5\n",
438 |     "    else:\n",
439 |     "        speeding = speed\n",
440 |     "    \n",
441 |     "    if speeding > 80:\n",
442 |     "        return 'Big Ticket'\n",
443 |     "    elif speeding > 60:\n",
444 |     "        return 'Small Ticket'\n",
445 |     "    else:\n",
446 |     "        return 'No Ticket'"
447 |    ]
448 |   },
449 |   {
450 |    "cell_type": "code",
451 |    "execution_count": 5,
452 |    "metadata": {
453 |     "collapsed": false
454 |    },
455 |    "outputs": [
456 |     {
457 |      "data": {
458 |       "text/plain": [
459 |        "'Small Ticket'"
460 |       ]
461 |      },
462 |      "execution_count": 5,
463 |      "metadata": {},
464 |      "output_type": "execute_result"
465 |     }
466 |    ],
467 |    "source": [
468 |     "caught_speeding(81,True)"
469 |    ]
470 |   },
471 |   {
472 |    "cell_type": "code",
473 |    "execution_count": 6,
474 |    "metadata": {
475 |     "collapsed": false
476 |    },
477 |    "outputs": [
478 |     {
479 |      "data": {
480 |       "text/plain": [
481 |        "'Big Ticket'"
482 |       ]
483 |      },
484 |      "execution_count": 6,
485 |      "metadata": {},
486 |      "output_type": "execute_result"
487 |     }
488 |    ],
489 |    "source": [
490 |     "caught_speeding(81,False)"
491 |    ]
492 |   },
493 |   {
494 |    "cell_type": "markdown",
495 |    "metadata": {},
496 |    "source": [
497 |     "# Great job!"
498 |    ]
499 |   }
500 |  ],
501 |  "metadata": {
502 |   "kernelspec": {
503 |    "display_name": "Python 3",
504 |    "language": "python",
505 |    "name": "python3"
506 |   },
507 |   "language_info": {
508 |    "codemirror_mode": {
509 |     "name": "ipython",
510 |     "version": 3
511 |    },
512 |    "file_extension": ".py",
513 |    "mimetype": "text/x-python",
514 |    "name": "python",
515 |    "nbconvert_exporter": "python",
516 |    "pygments_lexer": "ipython3",
517 |    "version": "3.5.1"
518 |   }
519 |  },
520 |  "nbformat": 4,
521 |  "nbformat_minor": 0
522 | }
523 | 


--------------------------------------------------------------------------------
/1. Python Crash Course/Python Crash Course Exercises .ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Python Crash Course Exercises \n",
  8 |     "\n",
  9 |     "This is an optional exercise to test your understanding of Python Basics. If you find this extremely challenging, then you probably are not ready for the rest of this course yet and don't have enough programming experience to continue. I would suggest you take another course more geared towards complete beginners, such as [Complete Python Bootcamp](https://www.udemy.com/complete-python-bootcamp)"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## Exercises\n",
 17 |     "\n",
 18 |     "Answer the questions or complete the tasks outlined in bold below, use the specific method described if applicable."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "** What is 7 to the power of 4?**"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 1,
 31 |    "metadata": {
 32 |     "collapsed": false
 33 |    },
 34 |    "outputs": [
 35 |     {
 36 |      "data": {
 37 |       "text/plain": [
 38 |        "2401"
 39 |       ]
 40 |      },
 41 |      "execution_count": 1,
 42 |      "metadata": {},
 43 |      "output_type": "execute_result"
 44 |     }
 45 |    ],
 46 |    "source": []
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "** Split this string:**\n",
 53 |     "\n",
 54 |     "    s = \"Hi there Sam!\"\n",
 55 |     "    \n",
 56 |     "**into a list. **"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 4,
 62 |    "metadata": {
 63 |     "collapsed": true
 64 |    },
 65 |    "outputs": [],
 66 |    "source": []
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 3,
 71 |    "metadata": {
 72 |     "collapsed": false
 73 |    },
 74 |    "outputs": [
 75 |     {
 76 |      "data": {
 77 |       "text/plain": [
 78 |        "['Hi', 'there', 'dad!']"
 79 |       ]
 80 |      },
 81 |      "execution_count": 3,
 82 |      "metadata": {},
 83 |      "output_type": "execute_result"
 84 |     }
 85 |    ],
 86 |    "source": []
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "** Given the variables:**\n",
 93 |     "\n",
 94 |     "    planet = \"Earth\"\n",
 95 |     "    diameter = 12742\n",
 96 |     "\n",
 97 |     "** Use .format() to print the following string: **\n",
 98 |     "\n",
 99 |     "    The diameter of Earth is 12742 kilometers."
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 5,
105 |    "metadata": {
106 |     "collapsed": true
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "planet = \"Earth\"\n",
111 |     "diameter = 12742"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 6,
117 |    "metadata": {
118 |     "collapsed": false
119 |    },
120 |    "outputs": [
121 |     {
122 |      "name": "stdout",
123 |      "output_type": "stream",
124 |      "text": [
125 |       "The diameter of Earth is 12742 kilometers.\n"
126 |      ]
127 |     }
128 |    ],
129 |    "source": []
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "** Given this nested list, use indexing to grab the word \"hello\" **"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 7,
141 |    "metadata": {
142 |     "collapsed": true
143 |    },
144 |    "outputs": [],
145 |    "source": [
146 |     "lst = [1,2,[3,4],[5,[100,200,['hello']],23,11],1,7]"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 14,
152 |    "metadata": {
153 |     "collapsed": false
154 |    },
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/plain": [
159 |        "'hello'"
160 |       ]
161 |      },
162 |      "execution_count": 14,
163 |      "metadata": {},
164 |      "output_type": "execute_result"
165 |     }
166 |    ],
167 |    "source": []
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "** Given this nested dictionary grab the word \"hello\". Be prepared, this will be annoying/tricky **"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 16,
179 |    "metadata": {
180 |     "collapsed": false
181 |    },
182 |    "outputs": [],
183 |    "source": [
184 |     "d = {'k1':[1,2,3,{'tricky':['oh','man','inception',{'target':[1,2,3,'hello']}]}]}"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 22,
190 |    "metadata": {
191 |     "collapsed": false
192 |    },
193 |    "outputs": [
194 |     {
195 |      "data": {
196 |       "text/plain": [
197 |        "'hello'"
198 |       ]
199 |      },
200 |      "execution_count": 22,
201 |      "metadata": {},
202 |      "output_type": "execute_result"
203 |     }
204 |    ],
205 |    "source": []
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {},
210 |    "source": [
211 |     "** What is the main difference between a tuple and a list? **"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 23,
217 |    "metadata": {
218 |     "collapsed": true
219 |    },
220 |    "outputs": [],
221 |    "source": [
222 |     "# Tuple is immutable"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "markdown",
227 |    "metadata": {},
228 |    "source": [
229 |     "** Create a function that grabs the email website domain from a string in the form: **\n",
230 |     "\n",
231 |     "    user@domain.com\n",
232 |     "    \n",
233 |     "**So for example, passing \"user@domain.com\" would return: domain.com**"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 24,
239 |    "metadata": {
240 |     "collapsed": true
241 |    },
242 |    "outputs": [],
243 |    "source": []
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": 26,
248 |    "metadata": {
249 |     "collapsed": false
250 |    },
251 |    "outputs": [
252 |     {
253 |      "data": {
254 |       "text/plain": [
255 |        "'domain.com'"
256 |       ]
257 |      },
258 |      "execution_count": 26,
259 |      "metadata": {},
260 |      "output_type": "execute_result"
261 |     }
262 |    ],
263 |    "source": [
264 |     "domainGet('user@domain.com')"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "markdown",
269 |    "metadata": {},
270 |    "source": [
271 |     "** Create a basic function that returns True if the word 'dog' is contained in the input string. Don't worry about edge cases like a punctuation being attached to the word dog, but do account for capitalization. **"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": 27,
277 |    "metadata": {
278 |     "collapsed": true
279 |    },
280 |    "outputs": [],
281 |    "source": []
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": 28,
286 |    "metadata": {
287 |     "collapsed": false
288 |    },
289 |    "outputs": [
290 |     {
291 |      "data": {
292 |       "text/plain": [
293 |        "True"
294 |       ]
295 |      },
296 |      "execution_count": 28,
297 |      "metadata": {},
298 |      "output_type": "execute_result"
299 |     }
300 |    ],
301 |    "source": [
302 |     "findDog('Is there a dog here?')"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "markdown",
307 |    "metadata": {},
308 |    "source": [
309 |     "** Create a function that counts the number of times the word \"dog\" occurs in a string. Again ignore edge cases. **"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": 30,
315 |    "metadata": {
316 |     "collapsed": false
317 |    },
318 |    "outputs": [],
319 |    "source": []
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": 31,
324 |    "metadata": {
325 |     "collapsed": false
326 |    },
327 |    "outputs": [
328 |     {
329 |      "data": {
330 |       "text/plain": [
331 |        "2"
332 |       ]
333 |      },
334 |      "execution_count": 31,
335 |      "metadata": {},
336 |      "output_type": "execute_result"
337 |     }
338 |    ],
339 |    "source": [
340 |     "countDog('This dog runs faster than the other dog dude!')"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "markdown",
345 |    "metadata": {},
346 |    "source": [
347 |     "** Use lambda expressions and the filter() function to filter out words from a list that don't start with the letter 's'. For example:**\n",
348 |     "\n",
349 |     "    seq = ['soup','dog','salad','cat','great']\n",
350 |     "\n",
351 |     "**should be filtered down to:**\n",
352 |     "\n",
353 |     "    ['soup','salad']"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "execution_count": 34,
359 |    "metadata": {
360 |     "collapsed": true
361 |    },
362 |    "outputs": [],
363 |    "source": [
364 |     "seq = ['soup','dog','salad','cat','great']"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "code",
369 |    "execution_count": 35,
370 |    "metadata": {
371 |     "collapsed": false
372 |    },
373 |    "outputs": [
374 |     {
375 |      "data": {
376 |       "text/plain": [
377 |        "['soup', 'salad']"
378 |       ]
379 |      },
380 |      "execution_count": 35,
381 |      "metadata": {},
382 |      "output_type": "execute_result"
383 |     }
384 |    ],
385 |    "source": []
386 |   },
387 |   {
388 |    "cell_type": "markdown",
389 |    "metadata": {},
390 |    "source": [
391 |     "### Final Problem\n",
392 |     "**You are driving a little too fast, and a police officer stops you. Write a function\n",
393 |     "  to return one of 3 possible results: \"No ticket\", \"Small ticket\", or \"Big Ticket\". \n",
394 |     "  If your speed is 60 or less, the result is \"No Ticket\". If speed is between 61 \n",
395 |     "  and 80 inclusive, the result is \"Small Ticket\". If speed is 81 or more, the result is \"Big    Ticket\". Unless it is your birthday (encoded as a boolean value in the parameters of the function) -- on your birthday, your speed can be 5 higher in all \n",
396 |     "  cases. **"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "code",
401 |    "execution_count": 36,
402 |    "metadata": {
403 |     "collapsed": true
404 |    },
405 |    "outputs": [],
406 |    "source": [
407 |     "def caught_speeding(speed, is_birthday):\n",
408 |     "    pass"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "code",
413 |    "execution_count": 42,
414 |    "metadata": {
415 |     "collapsed": false
416 |    },
417 |    "outputs": [
418 |     {
419 |      "data": {
420 |       "text/plain": [
421 |        "'Small Ticket'"
422 |       ]
423 |      },
424 |      "execution_count": 42,
425 |      "metadata": {},
426 |      "output_type": "execute_result"
427 |     }
428 |    ],
429 |    "source": [
430 |     "caught_speeding(81,True)"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "code",
435 |    "execution_count": 43,
436 |    "metadata": {
437 |     "collapsed": false
438 |    },
439 |    "outputs": [
440 |     {
441 |      "data": {
442 |       "text/plain": [
443 |        "'Big Ticket'"
444 |       ]
445 |      },
446 |      "execution_count": 43,
447 |      "metadata": {},
448 |      "output_type": "execute_result"
449 |     }
450 |    ],
451 |    "source": [
452 |     "caught_speeding(81,False)"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "markdown",
457 |    "metadata": {},
458 |    "source": [
459 |     "# Great job!"
460 |    ]
461 |   }
462 |  ],
463 |  "metadata": {
464 |   "kernelspec": {
465 |    "display_name": "Python 3",
466 |    "language": "python",
467 |    "name": "python3"
468 |   },
469 |   "language_info": {
470 |    "codemirror_mode": {
471 |     "name": "ipython",
472 |     "version": 3
473 |    },
474 |    "file_extension": ".py",
475 |    "mimetype": "text/x-python",
476 |    "name": "python",
477 |    "nbconvert_exporter": "python",
478 |    "pygments_lexer": "ipython3",
479 |    "version": "3.5.1"
480 |   }
481 |  },
482 |  "nbformat": 4,
483 |  "nbformat_minor": 0
484 | }
485 | 


--------------------------------------------------------------------------------
/12. K-Nearest-Neighbors/.ipynb_checkpoints/kNN_classification-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## kNN implementation for a Classification Problem\n",
  8 |     "The model for kNN is the entire dataset. When a prediction is required for a unseen data instance, the kNN algorithm will search through the training dataset for the k-most similar instances. The prediction attribute of the most similar instances is summarized and returned as the prediction for the unseen data.\n",
  9 |     "\n",
 10 |     "The similarity measure is dependent on the type of data. For real-valued data, the Euclidean distance can be used.\n",
 11 |     "Other types of data such as categorical or binary , other distance measures could be used.\n",
 12 |     "\n",
 13 |     "In the case of regression problems, the average of the predicted attribute may be returned. In the case of classification problems, the most prevalent class may be returned."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "### Handle Data and make them to train and test set"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 81,
 26 |    "metadata": {
 27 |     "collapsed": true
 28 |    },
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "import csv\n",
 32 |     "import random\n",
 33 |     "import numpy as np"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 82,
 39 |    "metadata": {
 40 |     "collapsed": true
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "def loadDataset(filename, split, trainingSet = [], testSet = []):\n",
 45 |     "    with open(filename) as csvfile:\n",
 46 |     "        lines = csv.reader(csvfile)\n",
 47 |     "        dataset = list(lines)\n",
 48 |     "        for x in range(len(dataset) - 1):\n",
 49 |     "            for y in range(4):\n",
 50 |     "                dataset[x][y] = float(dataset[x][y])\n",
 51 |     "            if random.random() < split:\n",
 52 |     "                trainingSet.append(dataset[x])\n",
 53 |     "            else:\n",
 54 |     "                testSet.append(dataset[x])"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 83,
 60 |    "metadata": {
 61 |     "collapsed": false
 62 |    },
 63 |    "outputs": [
 64 |     {
 65 |      "name": "stdout",
 66 |      "output_type": "stream",
 67 |      "text": [
 68 |       "[[5.0, 3.6, 1.4, 0.2, 'Iris-setosa'], [5.4, 3.9, 1.7, 0.4, 'Iris-setosa'], [4.6, 3.4, 1.4, 0.3, 'Iris-setosa']]\n",
 69 |       "[[5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [4.7, 3.2, 1.3, 0.2, 'Iris-setosa']]\n"
 70 |      ]
 71 |     }
 72 |    ],
 73 |    "source": [
 74 |     "trainingSet = []\n",
 75 |     "testSet = []\n",
 76 |     "a = loadDataset('iris.data.txt', 0.66, trainingSet, testSet)\n",
 77 |     "print(trainingSet[0:3])\n",
 78 |     "print(testSet[0:3])"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "### Similarity\n",
 86 |     "In order to make predictions we need to calculate the similarity between any two given data instances. In this case it is the Euclidean distance which is the square root of the sum of the squared differences between the two array of numbers. "
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 84,
 92 |    "metadata": {
 93 |     "collapsed": true
 94 |    },
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "def euclideanDistance(instance1, instance2, length):  # length : length of the array you want the distance to be calculated\n",
 98 |     "    distance = 0\n",
 99 |     "    for x in range(length):\n",
100 |     "        distance += np.power((instance1[x] - instance2[x]), 2)\n",
101 |     "    return np.sqrt(distance)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 85,
107 |    "metadata": {
108 |     "collapsed": false
109 |    },
110 |    "outputs": [
111 |     {
112 |      "name": "stdout",
113 |      "output_type": "stream",
114 |      "text": [
115 |       "3.46410161514\n"
116 |      ]
117 |     }
118 |    ],
119 |    "source": [
120 |     "data1 = [2, 2, 2, 'a']\n",
121 |     "data2 = [4, 4, 4, 'b']\n",
122 |     "euc_dist = euclideanDistance(data1, data2, 3)\n",
123 |     "print(euc_dist)"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "### Neighbors\n",
131 |     "Now that we have a similarity measure, we can use it to collect the k-most similar instances for a given unseen instance.\n",
132 |     "So, in principle we are calculating the distances of all instances of the train set with the one instance of test set (unseen set) and selecting a subset with the smallest distance values."
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 86,
138 |    "metadata": {
139 |     "collapsed": true
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "import operator\n",
144 |     "\n",
145 |     "def getNeighbors(trainingSet, testInstance, k):\n",
146 |     "    distances = []\n",
147 |     "    length = len(testInstance) - 1 # removing the response column from the array\n",
148 |     "    for x in range(len(trainingSet)):\n",
149 |     "        dist = euclideanDistance(testInstance, trainingSet[x], length)\n",
150 |     "        distances.append((trainingSet[x], dist)) # a tuple of training set observation and the distance.\n",
151 |     "    distances.sort(key = operator.itemgetter(1)) # sort the tuple by the value (ascending) as the input arg is 1\n",
152 |     "    neighbors = []\n",
153 |     "    for x in range(k): # k-nearest neighbors\n",
154 |     "        neighbors.append(distances[x][0])  # just the train instance and not the distance\n",
155 |     "    return neighbors"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 87,
161 |    "metadata": {
162 |     "collapsed": false
163 |    },
164 |    "outputs": [
165 |     {
166 |      "name": "stdout",
167 |      "output_type": "stream",
168 |      "text": [
169 |       "[[4, 4, 4, 'b']]\n"
170 |      ]
171 |     }
172 |    ],
173 |    "source": [
174 |     "trainSet = [[2, 2, 2, 'a'], [4, 4, 4, 'b']]\n",
175 |     "testInstance = [5, 5, 5]\n",
176 |     "k = 1\n",
177 |     "neighbors = getNeighbors(trainSet, testInstance, 1)\n",
178 |     "print(neighbors)"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "markdown",
183 |    "metadata": {},
184 |    "source": [
185 |     "### Response\n",
186 |     "Once we have located the most similar neighbors for a test instance, the next task is to devise a predicted response based on these neighbors. We can do it by allowing each neighbor to vote for their class attribute and take majority vote as the prediction."
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": 88,
192 |    "metadata": {
193 |     "collapsed": true
194 |    },
195 |    "outputs": [],
196 |    "source": [
197 |     "def getResponse(neighbors):\n",
198 |     "    classVotes = {}\n",
199 |     "    for x in range(len(neighbors)):\n",
200 |     "        response = neighbors[x][-1] # extracting the class value of the neighbors\n",
201 |     "        if response not in classVotes:\n",
202 |     "            classVotes[response] = 1\n",
203 |     "        else:\n",
204 |     "            classVotes[response] += 1\n",
205 |     "    sortedVotes = sorted(classVotes.items(), key = operator.itemgetter(1), reverse=True) # descending by values\n",
206 |     "    return sortedVotes[0][0] # 1st tuple and 1st item which is the response variable (with highest vote)"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 89,
212 |    "metadata": {
213 |     "collapsed": false
214 |    },
215 |    "outputs": [
216 |     {
217 |      "name": "stdout",
218 |      "output_type": "stream",
219 |      "text": [
220 |       "a\n"
221 |      ]
222 |     }
223 |    ],
224 |    "source": [
225 |     "neighbors = [[2, 2, 2, 'a'], [1, 1, 1, 'a'], [3, 3, 3, 'c']]\n",
226 |     "response = getResponse(neighbors)\n",
227 |     "print(response)"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "metadata": {},
233 |    "source": [
234 |     "### Accuracy"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": 95,
240 |    "metadata": {
241 |     "collapsed": true
242 |    },
243 |    "outputs": [],
244 |    "source": [
245 |     "def getAccuracy(testSet, predictions):\n",
246 |     "    correct = 0\n",
247 |     "    for x in range(len(testSet)):\n",
248 |     "        if testSet[x][-1] == predictions[x]:\n",
249 |     "            correct += 1\n",
250 |     "    return (correct / float(len(testSet))) * 100.00"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": 96,
256 |    "metadata": {
257 |     "collapsed": false
258 |    },
259 |    "outputs": [
260 |     {
261 |      "name": "stdout",
262 |      "output_type": "stream",
263 |      "text": [
264 |       "66.66666666666666\n"
265 |      ]
266 |     }
267 |    ],
268 |    "source": [
269 |     "testSet = [[1, 1, 1, 'a'], [2, 2, 2, 'a'], [3, 3, 3, 'b']]\n",
270 |     "predictions = ['a', 'a', 'a']\n",
271 |     "accuracy = getAccuracy(testSet, predictions)\n",
272 |     "print(accuracy)"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 97,
278 |    "metadata": {
279 |     "collapsed": false
280 |    },
281 |    "outputs": [
282 |     {
283 |      "name": "stdout",
284 |      "output_type": "stream",
285 |      "text": [
286 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
287 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
288 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
289 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
290 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
291 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
292 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
293 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
294 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
295 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
296 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
297 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
298 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
299 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
300 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
301 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
302 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
303 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
304 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
305 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
306 |       "Predicted = 'Iris-virginica' Actual = 'Iris-versicolor'\n",
307 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
308 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
309 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
310 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
311 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
312 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-virginica'\n",
313 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
314 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
315 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
316 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
317 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
318 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
319 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
320 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
321 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
322 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
323 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
324 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
325 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
326 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
327 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
328 |       "Accuracy: 95.23809523809523%\n"
329 |      ]
330 |     }
331 |    ],
332 |    "source": [
333 |     "def main():\n",
334 |     "    trainingSet = []\n",
335 |     "    testSet = []\n",
336 |     "    split = 0.67\n",
337 |     "    loadDataset('iris.data.txt', split, trainingSet, testSet)\n",
338 |     "    \n",
339 |     "    predictions = []\n",
340 |     "    k = 10\n",
341 |     "    for x in range(len(testSet)):\n",
342 |     "        neighbors = getNeighbors(trainingSet, testSet[x], k)\n",
343 |     "        result = getResponse(neighbors)\n",
344 |     "        predictions.append(result)\n",
345 |     "        print('Predicted = ' +repr(result)+ ' Actual = ' +repr(testSet[x][-1]))\n",
346 |     "    accuracy = getAccuracy(testSet, predictions)\n",
347 |     "    print('Accuracy: ' +repr(accuracy)+ '%')\n",
348 |     "main()"
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "code",
353 |    "execution_count": null,
354 |    "metadata": {
355 |     "collapsed": true
356 |    },
357 |    "outputs": [],
358 |    "source": []
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": null,
363 |    "metadata": {
364 |     "collapsed": true
365 |    },
366 |    "outputs": [],
367 |    "source": []
368 |   }
369 |  ],
370 |  "metadata": {
371 |   "kernelspec": {
372 |    "display_name": "Python 3",
373 |    "language": "python",
374 |    "name": "python3"
375 |   },
376 |   "language_info": {
377 |    "codemirror_mode": {
378 |     "name": "ipython",
379 |     "version": 3
380 |    },
381 |    "file_extension": ".py",
382 |    "mimetype": "text/x-python",
383 |    "name": "python",
384 |    "nbconvert_exporter": "python",
385 |    "pygments_lexer": "ipython3",
386 |    "version": "3.5.1"
387 |   }
388 |  },
389 |  "nbformat": 4,
390 |  "nbformat_minor": 0
391 | }
392 | 


--------------------------------------------------------------------------------
/12. K-Nearest-Neighbors/iris.data.txt:
--------------------------------------------------------------------------------
  1 | 5.1,3.5,1.4,0.2,Iris-setosa
  2 | 4.9,3.0,1.4,0.2,Iris-setosa
  3 | 4.7,3.2,1.3,0.2,Iris-setosa
  4 | 4.6,3.1,1.5,0.2,Iris-setosa
  5 | 5.0,3.6,1.4,0.2,Iris-setosa
  6 | 5.4,3.9,1.7,0.4,Iris-setosa
  7 | 4.6,3.4,1.4,0.3,Iris-setosa
  8 | 5.0,3.4,1.5,0.2,Iris-setosa
  9 | 4.4,2.9,1.4,0.2,Iris-setosa
 10 | 4.9,3.1,1.5,0.1,Iris-setosa
 11 | 5.4,3.7,1.5,0.2,Iris-setosa
 12 | 4.8,3.4,1.6,0.2,Iris-setosa
 13 | 4.8,3.0,1.4,0.1,Iris-setosa
 14 | 4.3,3.0,1.1,0.1,Iris-setosa
 15 | 5.8,4.0,1.2,0.2,Iris-setosa
 16 | 5.7,4.4,1.5,0.4,Iris-setosa
 17 | 5.4,3.9,1.3,0.4,Iris-setosa
 18 | 5.1,3.5,1.4,0.3,Iris-setosa
 19 | 5.7,3.8,1.7,0.3,Iris-setosa
 20 | 5.1,3.8,1.5,0.3,Iris-setosa
 21 | 5.4,3.4,1.7,0.2,Iris-setosa
 22 | 5.1,3.7,1.5,0.4,Iris-setosa
 23 | 4.6,3.6,1.0,0.2,Iris-setosa
 24 | 5.1,3.3,1.7,0.5,Iris-setosa
 25 | 4.8,3.4,1.9,0.2,Iris-setosa
 26 | 5.0,3.0,1.6,0.2,Iris-setosa
 27 | 5.0,3.4,1.6,0.4,Iris-setosa
 28 | 5.2,3.5,1.5,0.2,Iris-setosa
 29 | 5.2,3.4,1.4,0.2,Iris-setosa
 30 | 4.7,3.2,1.6,0.2,Iris-setosa
 31 | 4.8,3.1,1.6,0.2,Iris-setosa
 32 | 5.4,3.4,1.5,0.4,Iris-setosa
 33 | 5.2,4.1,1.5,0.1,Iris-setosa
 34 | 5.5,4.2,1.4,0.2,Iris-setosa
 35 | 4.9,3.1,1.5,0.1,Iris-setosa
 36 | 5.0,3.2,1.2,0.2,Iris-setosa
 37 | 5.5,3.5,1.3,0.2,Iris-setosa
 38 | 4.9,3.1,1.5,0.1,Iris-setosa
 39 | 4.4,3.0,1.3,0.2,Iris-setosa
 40 | 5.1,3.4,1.5,0.2,Iris-setosa
 41 | 5.0,3.5,1.3,0.3,Iris-setosa
 42 | 4.5,2.3,1.3,0.3,Iris-setosa
 43 | 4.4,3.2,1.3,0.2,Iris-setosa
 44 | 5.0,3.5,1.6,0.6,Iris-setosa
 45 | 5.1,3.8,1.9,0.4,Iris-setosa
 46 | 4.8,3.0,1.4,0.3,Iris-setosa
 47 | 5.1,3.8,1.6,0.2,Iris-setosa
 48 | 4.6,3.2,1.4,0.2,Iris-setosa
 49 | 5.3,3.7,1.5,0.2,Iris-setosa
 50 | 5.0,3.3,1.4,0.2,Iris-setosa
 51 | 7.0,3.2,4.7,1.4,Iris-versicolor
 52 | 6.4,3.2,4.5,1.5,Iris-versicolor
 53 | 6.9,3.1,4.9,1.5,Iris-versicolor
 54 | 5.5,2.3,4.0,1.3,Iris-versicolor
 55 | 6.5,2.8,4.6,1.5,Iris-versicolor
 56 | 5.7,2.8,4.5,1.3,Iris-versicolor
 57 | 6.3,3.3,4.7,1.6,Iris-versicolor
 58 | 4.9,2.4,3.3,1.0,Iris-versicolor
 59 | 6.6,2.9,4.6,1.3,Iris-versicolor
 60 | 5.2,2.7,3.9,1.4,Iris-versicolor
 61 | 5.0,2.0,3.5,1.0,Iris-versicolor
 62 | 5.9,3.0,4.2,1.5,Iris-versicolor
 63 | 6.0,2.2,4.0,1.0,Iris-versicolor
 64 | 6.1,2.9,4.7,1.4,Iris-versicolor
 65 | 5.6,2.9,3.6,1.3,Iris-versicolor
 66 | 6.7,3.1,4.4,1.4,Iris-versicolor
 67 | 5.6,3.0,4.5,1.5,Iris-versicolor
 68 | 5.8,2.7,4.1,1.0,Iris-versicolor
 69 | 6.2,2.2,4.5,1.5,Iris-versicolor
 70 | 5.6,2.5,3.9,1.1,Iris-versicolor
 71 | 5.9,3.2,4.8,1.8,Iris-versicolor
 72 | 6.1,2.8,4.0,1.3,Iris-versicolor
 73 | 6.3,2.5,4.9,1.5,Iris-versicolor
 74 | 6.1,2.8,4.7,1.2,Iris-versicolor
 75 | 6.4,2.9,4.3,1.3,Iris-versicolor
 76 | 6.6,3.0,4.4,1.4,Iris-versicolor
 77 | 6.8,2.8,4.8,1.4,Iris-versicolor
 78 | 6.7,3.0,5.0,1.7,Iris-versicolor
 79 | 6.0,2.9,4.5,1.5,Iris-versicolor
 80 | 5.7,2.6,3.5,1.0,Iris-versicolor
 81 | 5.5,2.4,3.8,1.1,Iris-versicolor
 82 | 5.5,2.4,3.7,1.0,Iris-versicolor
 83 | 5.8,2.7,3.9,1.2,Iris-versicolor
 84 | 6.0,2.7,5.1,1.6,Iris-versicolor
 85 | 5.4,3.0,4.5,1.5,Iris-versicolor
 86 | 6.0,3.4,4.5,1.6,Iris-versicolor
 87 | 6.7,3.1,4.7,1.5,Iris-versicolor
 88 | 6.3,2.3,4.4,1.3,Iris-versicolor
 89 | 5.6,3.0,4.1,1.3,Iris-versicolor
 90 | 5.5,2.5,4.0,1.3,Iris-versicolor
 91 | 5.5,2.6,4.4,1.2,Iris-versicolor
 92 | 6.1,3.0,4.6,1.4,Iris-versicolor
 93 | 5.8,2.6,4.0,1.2,Iris-versicolor
 94 | 5.0,2.3,3.3,1.0,Iris-versicolor
 95 | 5.6,2.7,4.2,1.3,Iris-versicolor
 96 | 5.7,3.0,4.2,1.2,Iris-versicolor
 97 | 5.7,2.9,4.2,1.3,Iris-versicolor
 98 | 6.2,2.9,4.3,1.3,Iris-versicolor
 99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor
101 | 6.3,3.3,6.0,2.5,Iris-virginica
102 | 5.8,2.7,5.1,1.9,Iris-virginica
103 | 7.1,3.0,5.9,2.1,Iris-virginica
104 | 6.3,2.9,5.6,1.8,Iris-virginica
105 | 6.5,3.0,5.8,2.2,Iris-virginica
106 | 7.6,3.0,6.6,2.1,Iris-virginica
107 | 4.9,2.5,4.5,1.7,Iris-virginica
108 | 7.3,2.9,6.3,1.8,Iris-virginica
109 | 6.7,2.5,5.8,1.8,Iris-virginica
110 | 7.2,3.6,6.1,2.5,Iris-virginica
111 | 6.5,3.2,5.1,2.0,Iris-virginica
112 | 6.4,2.7,5.3,1.9,Iris-virginica
113 | 6.8,3.0,5.5,2.1,Iris-virginica
114 | 5.7,2.5,5.0,2.0,Iris-virginica
115 | 5.8,2.8,5.1,2.4,Iris-virginica
116 | 6.4,3.2,5.3,2.3,Iris-virginica
117 | 6.5,3.0,5.5,1.8,Iris-virginica
118 | 7.7,3.8,6.7,2.2,Iris-virginica
119 | 7.7,2.6,6.9,2.3,Iris-virginica
120 | 6.0,2.2,5.0,1.5,Iris-virginica
121 | 6.9,3.2,5.7,2.3,Iris-virginica
122 | 5.6,2.8,4.9,2.0,Iris-virginica
123 | 7.7,2.8,6.7,2.0,Iris-virginica
124 | 6.3,2.7,4.9,1.8,Iris-virginica
125 | 6.7,3.3,5.7,2.1,Iris-virginica
126 | 7.2,3.2,6.0,1.8,Iris-virginica
127 | 6.2,2.8,4.8,1.8,Iris-virginica
128 | 6.1,3.0,4.9,1.8,Iris-virginica
129 | 6.4,2.8,5.6,2.1,Iris-virginica
130 | 7.2,3.0,5.8,1.6,Iris-virginica
131 | 7.4,2.8,6.1,1.9,Iris-virginica
132 | 7.9,3.8,6.4,2.0,Iris-virginica
133 | 6.4,2.8,5.6,2.2,Iris-virginica
134 | 6.3,2.8,5.1,1.5,Iris-virginica
135 | 6.1,2.6,5.6,1.4,Iris-virginica
136 | 7.7,3.0,6.1,2.3,Iris-virginica
137 | 6.3,3.4,5.6,2.4,Iris-virginica
138 | 6.4,3.1,5.5,1.8,Iris-virginica
139 | 6.0,3.0,4.8,1.8,Iris-virginica
140 | 6.9,3.1,5.4,2.1,Iris-virginica
141 | 6.7,3.1,5.6,2.4,Iris-virginica
142 | 6.9,3.1,5.1,2.3,Iris-virginica
143 | 5.8,2.7,5.1,1.9,Iris-virginica
144 | 6.8,3.2,5.9,2.3,Iris-virginica
145 | 6.7,3.3,5.7,2.5,Iris-virginica
146 | 6.7,3.0,5.2,2.3,Iris-virginica
147 | 6.3,2.5,5.0,1.9,Iris-virginica
148 | 6.5,3.0,5.2,2.0,Iris-virginica
149 | 6.2,3.4,5.4,2.3,Iris-virginica
150 | 5.9,3.0,5.1,1.8,Iris-virginica
151 | 
152 | 


--------------------------------------------------------------------------------
/12. K-Nearest-Neighbors/kNN_classification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## kNN implementation for a Classification Problem\n",
  8 |     "The model for kNN is the entire dataset. When a prediction is required for a unseen data instance, the kNN algorithm will search through the training dataset for the k-most similar instances. The prediction attribute of the most similar instances is summarized and returned as the prediction for the unseen data.\n",
  9 |     "\n",
 10 |     "The similarity measure is dependent on the type of data. For real-valued data, the Euclidean distance can be used.\n",
 11 |     "Other types of data such as categorical or binary , other distance measures could be used.\n",
 12 |     "\n",
 13 |     "In the case of regression problems, the average of the predicted attribute may be returned. In the case of classification problems, the most prevalent class may be returned."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "### Handle Data and make them to train and test set"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 81,
 26 |    "metadata": {
 27 |     "collapsed": true
 28 |    },
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "import csv\n",
 32 |     "import random\n",
 33 |     "import numpy as np"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 82,
 39 |    "metadata": {
 40 |     "collapsed": true
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "def loadDataset(filename, split, trainingSet = [], testSet = []):\n",
 45 |     "    with open(filename) as csvfile:\n",
 46 |     "        lines = csv.reader(csvfile)\n",
 47 |     "        dataset = list(lines)\n",
 48 |     "        for x in range(len(dataset) - 1):\n",
 49 |     "            for y in range(4):\n",
 50 |     "                dataset[x][y] = float(dataset[x][y])\n",
 51 |     "            if random.random() < split:\n",
 52 |     "                trainingSet.append(dataset[x])\n",
 53 |     "            else:\n",
 54 |     "                testSet.append(dataset[x])"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 83,
 60 |    "metadata": {
 61 |     "collapsed": false
 62 |    },
 63 |    "outputs": [
 64 |     {
 65 |      "name": "stdout",
 66 |      "output_type": "stream",
 67 |      "text": [
 68 |       "[[5.0, 3.6, 1.4, 0.2, 'Iris-setosa'], [5.4, 3.9, 1.7, 0.4, 'Iris-setosa'], [4.6, 3.4, 1.4, 0.3, 'Iris-setosa']]\n",
 69 |       "[[5.1, 3.5, 1.4, 0.2, 'Iris-setosa'], [4.9, 3.0, 1.4, 0.2, 'Iris-setosa'], [4.7, 3.2, 1.3, 0.2, 'Iris-setosa']]\n"
 70 |      ]
 71 |     }
 72 |    ],
 73 |    "source": [
 74 |     "trainingSet = []\n",
 75 |     "testSet = []\n",
 76 |     "a = loadDataset('iris.data.txt', 0.66, trainingSet, testSet)\n",
 77 |     "print(trainingSet[0:3])\n",
 78 |     "print(testSet[0:3])"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "### Similarity\n",
 86 |     "In order to make predictions we need to calculate the similarity between any two given data instances. In this case it is the Euclidean distance which is the square root of the sum of the squared differences between the two array of numbers. "
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 84,
 92 |    "metadata": {
 93 |     "collapsed": true
 94 |    },
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "def euclideanDistance(instance1, instance2, length):  # length : length of the array you want the distance to be calculated\n",
 98 |     "    distance = 0\n",
 99 |     "    for x in range(length):\n",
100 |     "        distance += np.power((instance1[x] - instance2[x]), 2)\n",
101 |     "    return np.sqrt(distance)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 85,
107 |    "metadata": {
108 |     "collapsed": false
109 |    },
110 |    "outputs": [
111 |     {
112 |      "name": "stdout",
113 |      "output_type": "stream",
114 |      "text": [
115 |       "3.46410161514\n"
116 |      ]
117 |     }
118 |    ],
119 |    "source": [
120 |     "data1 = [2, 2, 2, 'a']\n",
121 |     "data2 = [4, 4, 4, 'b']\n",
122 |     "euc_dist = euclideanDistance(data1, data2, 3)\n",
123 |     "print(euc_dist)"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "### Neighbors\n",
131 |     "Now that we have a similarity measure, we can use it to collect the k-most similar instances for a given unseen instance.\n",
132 |     "So, in principle we are calculating the distances of all instances of the train set with the one instance of test set (unseen set) and selecting a subset with the smallest distance values."
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 86,
138 |    "metadata": {
139 |     "collapsed": true
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "import operator\n",
144 |     "\n",
145 |     "def getNeighbors(trainingSet, testInstance, k):\n",
146 |     "    distances = []\n",
147 |     "    length = len(testInstance) - 1 # removing the response column from the array\n",
148 |     "    for x in range(len(trainingSet)):\n",
149 |     "        dist = euclideanDistance(testInstance, trainingSet[x], length)\n",
150 |     "        distances.append((trainingSet[x], dist)) # a tuple of training set observation and the distance.\n",
151 |     "    distances.sort(key = operator.itemgetter(1)) # sort the tuple by the value (ascending) as the input arg is 1\n",
152 |     "    neighbors = []\n",
153 |     "    for x in range(k): # k-nearest neighbors\n",
154 |     "        neighbors.append(distances[x][0])  # just the train instance and not the distance\n",
155 |     "    return neighbors"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 87,
161 |    "metadata": {
162 |     "collapsed": false
163 |    },
164 |    "outputs": [
165 |     {
166 |      "name": "stdout",
167 |      "output_type": "stream",
168 |      "text": [
169 |       "[[4, 4, 4, 'b']]\n"
170 |      ]
171 |     }
172 |    ],
173 |    "source": [
174 |     "trainSet = [[2, 2, 2, 'a'], [4, 4, 4, 'b']]\n",
175 |     "testInstance = [5, 5, 5]\n",
176 |     "k = 1\n",
177 |     "neighbors = getNeighbors(trainSet, testInstance, 1)\n",
178 |     "print(neighbors)"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "markdown",
183 |    "metadata": {},
184 |    "source": [
185 |     "### Response\n",
186 |     "Once we have located the most similar neighbors for a test instance, the next task is to devise a predicted response based on these neighbors. We can do it by allowing each neighbor to vote for their class attribute and take majority vote as the prediction."
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": 88,
192 |    "metadata": {
193 |     "collapsed": true
194 |    },
195 |    "outputs": [],
196 |    "source": [
197 |     "def getResponse(neighbors):\n",
198 |     "    classVotes = {}\n",
199 |     "    for x in range(len(neighbors)):\n",
200 |     "        response = neighbors[x][-1] # extracting the class value of the neighbors\n",
201 |     "        if response not in classVotes:\n",
202 |     "            classVotes[response] = 1\n",
203 |     "        else:\n",
204 |     "            classVotes[response] += 1\n",
205 |     "    sortedVotes = sorted(classVotes.items(), key = operator.itemgetter(1), reverse=True) # descending by values\n",
206 |     "    return sortedVotes[0][0] # 1st tuple and 1st item which is the response variable (with highest vote)"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 89,
212 |    "metadata": {
213 |     "collapsed": false
214 |    },
215 |    "outputs": [
216 |     {
217 |      "name": "stdout",
218 |      "output_type": "stream",
219 |      "text": [
220 |       "a\n"
221 |      ]
222 |     }
223 |    ],
224 |    "source": [
225 |     "neighbors = [[2, 2, 2, 'a'], [1, 1, 1, 'a'], [3, 3, 3, 'c']]\n",
226 |     "response = getResponse(neighbors)\n",
227 |     "print(response)"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "metadata": {},
233 |    "source": [
234 |     "### Accuracy"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": 95,
240 |    "metadata": {
241 |     "collapsed": true
242 |    },
243 |    "outputs": [],
244 |    "source": [
245 |     "def getAccuracy(testSet, predictions):\n",
246 |     "    correct = 0\n",
247 |     "    for x in range(len(testSet)):\n",
248 |     "        if testSet[x][-1] == predictions[x]:\n",
249 |     "            correct += 1\n",
250 |     "    return (correct / float(len(testSet))) * 100.00"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": 96,
256 |    "metadata": {
257 |     "collapsed": false
258 |    },
259 |    "outputs": [
260 |     {
261 |      "name": "stdout",
262 |      "output_type": "stream",
263 |      "text": [
264 |       "66.66666666666666\n"
265 |      ]
266 |     }
267 |    ],
268 |    "source": [
269 |     "testSet = [[1, 1, 1, 'a'], [2, 2, 2, 'a'], [3, 3, 3, 'b']]\n",
270 |     "predictions = ['a', 'a', 'a']\n",
271 |     "accuracy = getAccuracy(testSet, predictions)\n",
272 |     "print(accuracy)"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 97,
278 |    "metadata": {
279 |     "collapsed": false
280 |    },
281 |    "outputs": [
282 |     {
283 |      "name": "stdout",
284 |      "output_type": "stream",
285 |      "text": [
286 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
287 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
288 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
289 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
290 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
291 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
292 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
293 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
294 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
295 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
296 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
297 |       "Predicted = 'Iris-setosa' Actual = 'Iris-setosa'\n",
298 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
299 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
300 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
301 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
302 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
303 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
304 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
305 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
306 |       "Predicted = 'Iris-virginica' Actual = 'Iris-versicolor'\n",
307 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
308 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
309 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
310 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-versicolor'\n",
311 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
312 |       "Predicted = 'Iris-versicolor' Actual = 'Iris-virginica'\n",
313 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
314 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
315 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
316 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
317 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
318 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
319 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
320 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
321 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
322 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
323 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
324 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
325 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
326 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
327 |       "Predicted = 'Iris-virginica' Actual = 'Iris-virginica'\n",
328 |       "Accuracy: 95.23809523809523%\n"
329 |      ]
330 |     }
331 |    ],
332 |    "source": [
333 |     "def main():\n",
334 |     "    trainingSet = []\n",
335 |     "    testSet = []\n",
336 |     "    split = 0.67\n",
337 |     "    loadDataset('iris.data.txt', split, trainingSet, testSet)\n",
338 |     "    \n",
339 |     "    predictions = []\n",
340 |     "    k = 10\n",
341 |     "    for x in range(len(testSet)):\n",
342 |     "        neighbors = getNeighbors(trainingSet, testSet[x], k)\n",
343 |     "        result = getResponse(neighbors)\n",
344 |     "        predictions.append(result)\n",
345 |     "        print('Predicted = ' +repr(result)+ ' Actual = ' +repr(testSet[x][-1]))\n",
346 |     "    accuracy = getAccuracy(testSet, predictions)\n",
347 |     "    print('Accuracy: ' +repr(accuracy)+ '%')\n",
348 |     "main()"
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "code",
353 |    "execution_count": null,
354 |    "metadata": {
355 |     "collapsed": true
356 |    },
357 |    "outputs": [],
358 |    "source": []
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": null,
363 |    "metadata": {
364 |     "collapsed": true
365 |    },
366 |    "outputs": [],
367 |    "source": []
368 |   }
369 |  ],
370 |  "metadata": {
371 |   "kernelspec": {
372 |    "display_name": "Python 3",
373 |    "language": "python",
374 |    "name": "python3"
375 |   },
376 |   "language_info": {
377 |    "codemirror_mode": {
378 |     "name": "ipython",
379 |     "version": 3
380 |    },
381 |    "file_extension": ".py",
382 |    "mimetype": "text/x-python",
383 |    "name": "python",
384 |    "nbconvert_exporter": "python",
385 |    "pygments_lexer": "ipython3",
386 |    "version": "3.5.1"
387 |   }
388 |  },
389 |  "nbformat": 4,
390 |  "nbformat_minor": 0
391 | }
392 | 


--------------------------------------------------------------------------------
/13. Decision-Trees-and-Random-Forests/kyphosis.csv:
--------------------------------------------------------------------------------
 1 | "Kyphosis","Age","Number","Start"
 2 | "absent",71,3,5
 3 | "absent",158,3,14
 4 | "present",128,4,5
 5 | "absent",2,5,1
 6 | "absent",1,4,15
 7 | "absent",1,2,16
 8 | "absent",61,2,17
 9 | "absent",37,3,16
10 | "absent",113,2,16
11 | "present",59,6,12
12 | "present",82,5,14
13 | "absent",148,3,16
14 | "absent",18,5,2
15 | "absent",1,4,12
16 | "absent",168,3,18
17 | "absent",1,3,16
18 | "absent",78,6,15
19 | "absent",175,5,13
20 | "absent",80,5,16
21 | "absent",27,4,9
22 | "absent",22,2,16
23 | "present",105,6,5
24 | "present",96,3,12
25 | "absent",131,2,3
26 | "present",15,7,2
27 | "absent",9,5,13
28 | "absent",8,3,6
29 | "absent",100,3,14
30 | "absent",4,3,16
31 | "absent",151,2,16
32 | "absent",31,3,16
33 | "absent",125,2,11
34 | "absent",130,5,13
35 | "absent",112,3,16
36 | "absent",140,5,11
37 | "absent",93,3,16
38 | "absent",1,3,9
39 | "present",52,5,6
40 | "absent",20,6,9
41 | "present",91,5,12
42 | "present",73,5,1
43 | "absent",35,3,13
44 | "absent",143,9,3
45 | "absent",61,4,1
46 | "absent",97,3,16
47 | "present",139,3,10
48 | "absent",136,4,15
49 | "absent",131,5,13
50 | "present",121,3,3
51 | "absent",177,2,14
52 | "absent",68,5,10
53 | "absent",9,2,17
54 | "present",139,10,6
55 | "absent",2,2,17
56 | "absent",140,4,15
57 | "absent",72,5,15
58 | "absent",2,3,13
59 | "present",120,5,8
60 | "absent",51,7,9
61 | "absent",102,3,13
62 | "present",130,4,1
63 | "present",114,7,8
64 | "absent",81,4,1
65 | "absent",118,3,16
66 | "absent",118,4,16
67 | "absent",17,4,10
68 | "absent",195,2,17
69 | "absent",159,4,13
70 | "absent",18,4,11
71 | "absent",15,5,16
72 | "absent",158,5,14
73 | "absent",127,4,12
74 | "absent",87,4,16
75 | "absent",206,4,10
76 | "absent",11,3,15
77 | "absent",178,4,15
78 | "present",157,3,13
79 | "absent",26,7,13
80 | "absent",120,2,13
81 | "present",42,7,6
82 | "absent",36,4,13
83 | 


--------------------------------------------------------------------------------
/16. Principal-Component-Analysis/PCA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SuvroBaner/Python-for-Data-Science-and-Machine-Learning-Bootcamp/c0bafbafc2c37a189c70a3758f6e81888b1542ae/16. Principal-Component-Analysis/PCA.png


--------------------------------------------------------------------------------
/2. NumPy/Numpy Exercise .ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "___\n",
  8 |     "\n",
  9 |     "<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
 10 |     "___"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# NumPy Exercises \n",
 18 |     "\n",
 19 |     "Now that we've learned about NumPy let's test your knowledge. We'll start off with a few simple tasks, and then you'll be asked some more complicated questions."
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "#### Import NumPy as np"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 1,
 32 |    "metadata": {
 33 |     "collapsed": true
 34 |    },
 35 |    "outputs": [],
 36 |    "source": []
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "#### Create an array of 10 zeros "
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 2,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [
 52 |     {
 53 |      "data": {
 54 |       "text/plain": [
 55 |        "array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])"
 56 |       ]
 57 |      },
 58 |      "execution_count": 2,
 59 |      "metadata": {},
 60 |      "output_type": "execute_result"
 61 |     }
 62 |    ],
 63 |    "source": []
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "#### Create an array of 10 ones"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 3,
 75 |    "metadata": {
 76 |     "collapsed": false
 77 |    },
 78 |    "outputs": [
 79 |     {
 80 |      "data": {
 81 |       "text/plain": [
 82 |        "array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])"
 83 |       ]
 84 |      },
 85 |      "execution_count": 3,
 86 |      "metadata": {},
 87 |      "output_type": "execute_result"
 88 |     }
 89 |    ],
 90 |    "source": []
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "#### Create an array of 10 fives"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 4,
102 |    "metadata": {
103 |     "collapsed": false
104 |    },
105 |    "outputs": [
106 |     {
107 |      "data": {
108 |       "text/plain": [
109 |        "array([ 5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.])"
110 |       ]
111 |      },
112 |      "execution_count": 4,
113 |      "metadata": {},
114 |      "output_type": "execute_result"
115 |     }
116 |    ],
117 |    "source": []
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "#### Create an array of the integers from 10 to 50"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 5,
129 |    "metadata": {
130 |     "collapsed": false
131 |    },
132 |    "outputs": [
133 |     {
134 |      "data": {
135 |       "text/plain": [
136 |        "array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n",
137 |        "       27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,\n",
138 |        "       44, 45, 46, 47, 48, 49, 50])"
139 |       ]
140 |      },
141 |      "execution_count": 5,
142 |      "metadata": {},
143 |      "output_type": "execute_result"
144 |     }
145 |    ],
146 |    "source": []
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "#### Create an array of all the even integers from 10 to 50"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 6,
158 |    "metadata": {
159 |     "collapsed": false
160 |    },
161 |    "outputs": [
162 |     {
163 |      "data": {
164 |       "text/plain": [
165 |        "array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42,\n",
166 |        "       44, 46, 48, 50])"
167 |       ]
168 |      },
169 |      "execution_count": 6,
170 |      "metadata": {},
171 |      "output_type": "execute_result"
172 |     }
173 |    ],
174 |    "source": []
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "metadata": {},
179 |    "source": [
180 |     "#### Create a 3x3 matrix with values ranging from 0 to 8"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 7,
186 |    "metadata": {
187 |     "collapsed": false
188 |    },
189 |    "outputs": [
190 |     {
191 |      "data": {
192 |       "text/plain": [
193 |        "array([[0, 1, 2],\n",
194 |        "       [3, 4, 5],\n",
195 |        "       [6, 7, 8]])"
196 |       ]
197 |      },
198 |      "execution_count": 7,
199 |      "metadata": {},
200 |      "output_type": "execute_result"
201 |     }
202 |    ],
203 |    "source": []
204 |   },
205 |   {
206 |    "cell_type": "markdown",
207 |    "metadata": {},
208 |    "source": [
209 |     "#### Create a 3x3 identity matrix"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 8,
215 |    "metadata": {
216 |     "collapsed": false
217 |    },
218 |    "outputs": [
219 |     {
220 |      "data": {
221 |       "text/plain": [
222 |        "array([[ 1.,  0.,  0.],\n",
223 |        "       [ 0.,  1.,  0.],\n",
224 |        "       [ 0.,  0.,  1.]])"
225 |       ]
226 |      },
227 |      "execution_count": 8,
228 |      "metadata": {},
229 |      "output_type": "execute_result"
230 |     }
231 |    ],
232 |    "source": []
233 |   },
234 |   {
235 |    "cell_type": "markdown",
236 |    "metadata": {},
237 |    "source": [
238 |     "#### Use NumPy to generate a random number between 0 and 1"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": 15,
244 |    "metadata": {
245 |     "collapsed": false
246 |    },
247 |    "outputs": [
248 |     {
249 |      "data": {
250 |       "text/plain": [
251 |        "array([ 0.42829726])"
252 |       ]
253 |      },
254 |      "execution_count": 15,
255 |      "metadata": {},
256 |      "output_type": "execute_result"
257 |     }
258 |    ],
259 |    "source": []
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "#### Use NumPy to generate an array of 25 random numbers sampled from a standard normal distribution"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 33,
271 |    "metadata": {
272 |     "collapsed": false
273 |    },
274 |    "outputs": [
275 |     {
276 |      "data": {
277 |       "text/plain": [
278 |        "array([ 1.32031013,  1.6798602 , -0.42985892, -1.53116655,  0.85753232,\n",
279 |        "        0.87339938,  0.35668636, -1.47491157,  0.15349697,  0.99530727,\n",
280 |        "       -0.94865451, -1.69174783,  1.57525349, -0.70615234,  0.10991879,\n",
281 |        "       -0.49478947,  1.08279872,  0.76488333, -2.3039931 ,  0.35401124,\n",
282 |        "       -0.45454399, -0.64754649, -0.29391671,  0.02339861,  0.38272124])"
283 |       ]
284 |      },
285 |      "execution_count": 33,
286 |      "metadata": {},
287 |      "output_type": "execute_result"
288 |     }
289 |    ],
290 |    "source": []
291 |   },
292 |   {
293 |    "cell_type": "markdown",
294 |    "metadata": {},
295 |    "source": [
296 |     "#### Create the following matrix:"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 35,
302 |    "metadata": {
303 |     "collapsed": false
304 |    },
305 |    "outputs": [
306 |     {
307 |      "data": {
308 |       "text/plain": [
309 |        "array([[ 0.01,  0.02,  0.03,  0.04,  0.05,  0.06,  0.07,  0.08,  0.09,  0.1 ],\n",
310 |        "       [ 0.11,  0.12,  0.13,  0.14,  0.15,  0.16,  0.17,  0.18,  0.19,  0.2 ],\n",
311 |        "       [ 0.21,  0.22,  0.23,  0.24,  0.25,  0.26,  0.27,  0.28,  0.29,  0.3 ],\n",
312 |        "       [ 0.31,  0.32,  0.33,  0.34,  0.35,  0.36,  0.37,  0.38,  0.39,  0.4 ],\n",
313 |        "       [ 0.41,  0.42,  0.43,  0.44,  0.45,  0.46,  0.47,  0.48,  0.49,  0.5 ],\n",
314 |        "       [ 0.51,  0.52,  0.53,  0.54,  0.55,  0.56,  0.57,  0.58,  0.59,  0.6 ],\n",
315 |        "       [ 0.61,  0.62,  0.63,  0.64,  0.65,  0.66,  0.67,  0.68,  0.69,  0.7 ],\n",
316 |        "       [ 0.71,  0.72,  0.73,  0.74,  0.75,  0.76,  0.77,  0.78,  0.79,  0.8 ],\n",
317 |        "       [ 0.81,  0.82,  0.83,  0.84,  0.85,  0.86,  0.87,  0.88,  0.89,  0.9 ],\n",
318 |        "       [ 0.91,  0.92,  0.93,  0.94,  0.95,  0.96,  0.97,  0.98,  0.99,  1.  ]])"
319 |       ]
320 |      },
321 |      "execution_count": 35,
322 |      "metadata": {},
323 |      "output_type": "execute_result"
324 |     }
325 |    ],
326 |    "source": []
327 |   },
328 |   {
329 |    "cell_type": "markdown",
330 |    "metadata": {},
331 |    "source": [
332 |     "#### Create an array of 20 linearly spaced points between 0 and 1:"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": 36,
338 |    "metadata": {
339 |     "collapsed": false
340 |    },
341 |    "outputs": [
342 |     {
343 |      "data": {
344 |       "text/plain": [
345 |        "array([ 0.        ,  0.05263158,  0.10526316,  0.15789474,  0.21052632,\n",
346 |        "        0.26315789,  0.31578947,  0.36842105,  0.42105263,  0.47368421,\n",
347 |        "        0.52631579,  0.57894737,  0.63157895,  0.68421053,  0.73684211,\n",
348 |        "        0.78947368,  0.84210526,  0.89473684,  0.94736842,  1.        ])"
349 |       ]
350 |      },
351 |      "execution_count": 36,
352 |      "metadata": {},
353 |      "output_type": "execute_result"
354 |     }
355 |    ],
356 |    "source": []
357 |   },
358 |   {
359 |    "cell_type": "markdown",
360 |    "metadata": {},
361 |    "source": [
362 |     "## Numpy Indexing and Selection\n",
363 |     "\n",
364 |     "Now you will be given a few matrices, and be asked to replicate the resulting matrix outputs:"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "code",
369 |    "execution_count": 38,
370 |    "metadata": {
371 |     "collapsed": false
372 |    },
373 |    "outputs": [
374 |     {
375 |      "data": {
376 |       "text/plain": [
377 |        "array([[ 1,  2,  3,  4,  5],\n",
378 |        "       [ 6,  7,  8,  9, 10],\n",
379 |        "       [11, 12, 13, 14, 15],\n",
380 |        "       [16, 17, 18, 19, 20],\n",
381 |        "       [21, 22, 23, 24, 25]])"
382 |       ]
383 |      },
384 |      "execution_count": 38,
385 |      "metadata": {},
386 |      "output_type": "execute_result"
387 |     }
388 |    ],
389 |    "source": [
390 |     "mat = np.arange(1,26).reshape(5,5)\n",
391 |     "mat"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "code",
396 |    "execution_count": 39,
397 |    "metadata": {
398 |     "collapsed": true
399 |    },
400 |    "outputs": [],
401 |    "source": [
402 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
403 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
404 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "code",
409 |    "execution_count": 40,
410 |    "metadata": {
411 |     "collapsed": false
412 |    },
413 |    "outputs": [
414 |     {
415 |      "data": {
416 |       "text/plain": [
417 |        "array([[12, 13, 14, 15],\n",
418 |        "       [17, 18, 19, 20],\n",
419 |        "       [22, 23, 24, 25]])"
420 |       ]
421 |      },
422 |      "execution_count": 40,
423 |      "metadata": {},
424 |      "output_type": "execute_result"
425 |     }
426 |    ],
427 |    "source": []
428 |   },
429 |   {
430 |    "cell_type": "code",
431 |    "execution_count": 29,
432 |    "metadata": {
433 |     "collapsed": true
434 |    },
435 |    "outputs": [],
436 |    "source": [
437 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
438 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
439 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
440 |    ]
441 |   },
442 |   {
443 |    "cell_type": "code",
444 |    "execution_count": 41,
445 |    "metadata": {
446 |     "collapsed": false
447 |    },
448 |    "outputs": [
449 |     {
450 |      "data": {
451 |       "text/plain": [
452 |        "20"
453 |       ]
454 |      },
455 |      "execution_count": 41,
456 |      "metadata": {},
457 |      "output_type": "execute_result"
458 |     }
459 |    ],
460 |    "source": []
461 |   },
462 |   {
463 |    "cell_type": "code",
464 |    "execution_count": 30,
465 |    "metadata": {
466 |     "collapsed": true
467 |    },
468 |    "outputs": [],
469 |    "source": [
470 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
471 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
472 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
473 |    ]
474 |   },
475 |   {
476 |    "cell_type": "code",
477 |    "execution_count": 42,
478 |    "metadata": {
479 |     "collapsed": false
480 |    },
481 |    "outputs": [
482 |     {
483 |      "data": {
484 |       "text/plain": [
485 |        "array([[ 2],\n",
486 |        "       [ 7],\n",
487 |        "       [12]])"
488 |       ]
489 |      },
490 |      "execution_count": 42,
491 |      "metadata": {},
492 |      "output_type": "execute_result"
493 |     }
494 |    ],
495 |    "source": []
496 |   },
497 |   {
498 |    "cell_type": "code",
499 |    "execution_count": 31,
500 |    "metadata": {
501 |     "collapsed": true
502 |    },
503 |    "outputs": [],
504 |    "source": [
505 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
506 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
507 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "code",
512 |    "execution_count": 46,
513 |    "metadata": {
514 |     "collapsed": false
515 |    },
516 |    "outputs": [
517 |     {
518 |      "data": {
519 |       "text/plain": [
520 |        "array([21, 22, 23, 24, 25])"
521 |       ]
522 |      },
523 |      "execution_count": 46,
524 |      "metadata": {},
525 |      "output_type": "execute_result"
526 |     }
527 |    ],
528 |    "source": []
529 |   },
530 |   {
531 |    "cell_type": "code",
532 |    "execution_count": 32,
533 |    "metadata": {
534 |     "collapsed": true
535 |    },
536 |    "outputs": [],
537 |    "source": [
538 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
539 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
540 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
541 |    ]
542 |   },
543 |   {
544 |    "cell_type": "code",
545 |    "execution_count": 49,
546 |    "metadata": {
547 |     "collapsed": false
548 |    },
549 |    "outputs": [
550 |     {
551 |      "data": {
552 |       "text/plain": [
553 |        "array([[16, 17, 18, 19, 20],\n",
554 |        "       [21, 22, 23, 24, 25]])"
555 |       ]
556 |      },
557 |      "execution_count": 49,
558 |      "metadata": {},
559 |      "output_type": "execute_result"
560 |     }
561 |    ],
562 |    "source": []
563 |   },
564 |   {
565 |    "cell_type": "markdown",
566 |    "metadata": {},
567 |    "source": [
568 |     "### Now do the following"
569 |    ]
570 |   },
571 |   {
572 |    "cell_type": "markdown",
573 |    "metadata": {},
574 |    "source": [
575 |     "#### Get the sum of all the values in mat"
576 |    ]
577 |   },
578 |   {
579 |    "cell_type": "code",
580 |    "execution_count": 50,
581 |    "metadata": {
582 |     "collapsed": false
583 |    },
584 |    "outputs": [
585 |     {
586 |      "data": {
587 |       "text/plain": [
588 |        "325"
589 |       ]
590 |      },
591 |      "execution_count": 50,
592 |      "metadata": {},
593 |      "output_type": "execute_result"
594 |     }
595 |    ],
596 |    "source": []
597 |   },
598 |   {
599 |    "cell_type": "markdown",
600 |    "metadata": {},
601 |    "source": [
602 |     "#### Get the standard deviation of the values in mat"
603 |    ]
604 |   },
605 |   {
606 |    "cell_type": "code",
607 |    "execution_count": 51,
608 |    "metadata": {
609 |     "collapsed": false
610 |    },
611 |    "outputs": [
612 |     {
613 |      "data": {
614 |       "text/plain": [
615 |        "7.2111025509279782"
616 |       ]
617 |      },
618 |      "execution_count": 51,
619 |      "metadata": {},
620 |      "output_type": "execute_result"
621 |     }
622 |    ],
623 |    "source": []
624 |   },
625 |   {
626 |    "cell_type": "markdown",
627 |    "metadata": {},
628 |    "source": [
629 |     "#### Get the sum of all the columns in mat"
630 |    ]
631 |   },
632 |   {
633 |    "cell_type": "code",
634 |    "execution_count": 53,
635 |    "metadata": {
636 |     "collapsed": false
637 |    },
638 |    "outputs": [
639 |     {
640 |      "data": {
641 |       "text/plain": [
642 |        "array([55, 60, 65, 70, 75])"
643 |       ]
644 |      },
645 |      "execution_count": 53,
646 |      "metadata": {},
647 |      "output_type": "execute_result"
648 |     }
649 |    ],
650 |    "source": []
651 |   },
652 |   {
653 |    "cell_type": "markdown",
654 |    "metadata": {
655 |     "collapsed": true
656 |    },
657 |    "source": [
658 |     "# Great Job!"
659 |    ]
660 |   }
661 |  ],
662 |  "metadata": {
663 |   "kernelspec": {
664 |    "display_name": "Python 3",
665 |    "language": "python",
666 |    "name": "python3"
667 |   },
668 |   "language_info": {
669 |    "codemirror_mode": {
670 |     "name": "ipython",
671 |     "version": 3
672 |    },
673 |    "file_extension": ".py",
674 |    "mimetype": "text/x-python",
675 |    "name": "python",
676 |    "nbconvert_exporter": "python",
677 |    "pygments_lexer": "ipython3",
678 |    "version": "3.5.1"
679 |   }
680 |  },
681 |  "nbformat": 4,
682 |  "nbformat_minor": 0
683 | }
684 | 


--------------------------------------------------------------------------------
/2. NumPy/Numpy Operations.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "___\n",
  8 |     "\n",
  9 |     "<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
 10 |     "___"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {
 16 |     "collapsed": true
 17 |    },
 18 |    "source": [
 19 |     "# NumPy Operations"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Arithmetic\n",
 27 |     "\n",
 28 |     "You can easily perform array with array arithmetic, or scalar with array arithmetic. Let's see some examples:"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 1,
 34 |    "metadata": {
 35 |     "collapsed": true
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "import numpy as np\n",
 40 |     "arr = np.arange(0,10)"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 2,
 46 |    "metadata": {
 47 |     "collapsed": false
 48 |    },
 49 |    "outputs": [
 50 |     {
 51 |      "data": {
 52 |       "text/plain": [
 53 |        "array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])"
 54 |       ]
 55 |      },
 56 |      "execution_count": 2,
 57 |      "metadata": {},
 58 |      "output_type": "execute_result"
 59 |     }
 60 |    ],
 61 |    "source": [
 62 |     "arr + arr"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 3,
 68 |    "metadata": {
 69 |     "collapsed": false
 70 |    },
 71 |    "outputs": [
 72 |     {
 73 |      "data": {
 74 |       "text/plain": [
 75 |        "array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])"
 76 |       ]
 77 |      },
 78 |      "execution_count": 3,
 79 |      "metadata": {},
 80 |      "output_type": "execute_result"
 81 |     }
 82 |    ],
 83 |    "source": [
 84 |     "arr * arr"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 4,
 90 |    "metadata": {
 91 |     "collapsed": false
 92 |    },
 93 |    "outputs": [
 94 |     {
 95 |      "data": {
 96 |       "text/plain": [
 97 |        "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])"
 98 |       ]
 99 |      },
100 |      "execution_count": 4,
101 |      "metadata": {},
102 |      "output_type": "execute_result"
103 |     }
104 |    ],
105 |    "source": [
106 |     "arr - arr"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 5,
112 |    "metadata": {
113 |     "collapsed": false
114 |    },
115 |    "outputs": [
116 |     {
117 |      "name": "stderr",
118 |      "output_type": "stream",
119 |      "text": [
120 |       "/Users/marci/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: invalid value encountered in true_divide\n",
121 |       "  if __name__ == '__main__':\n"
122 |      ]
123 |     },
124 |     {
125 |      "data": {
126 |       "text/plain": [
127 |        "array([ nan,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.])"
128 |       ]
129 |      },
130 |      "execution_count": 5,
131 |      "metadata": {},
132 |      "output_type": "execute_result"
133 |     }
134 |    ],
135 |    "source": [
136 |     "# Warning on division by zero, but not an error!\n",
137 |     "# Just replaced with nan\n",
138 |     "arr/arr"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 6,
144 |    "metadata": {
145 |     "collapsed": false
146 |    },
147 |    "outputs": [
148 |     {
149 |      "name": "stderr",
150 |      "output_type": "stream",
151 |      "text": [
152 |       "/Users/marci/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: divide by zero encountered in true_divide\n",
153 |       "  if __name__ == '__main__':\n"
154 |      ]
155 |     },
156 |     {
157 |      "data": {
158 |       "text/plain": [
159 |        "array([        inf,  1.        ,  0.5       ,  0.33333333,  0.25      ,\n",
160 |        "        0.2       ,  0.16666667,  0.14285714,  0.125     ,  0.11111111])"
161 |       ]
162 |      },
163 |      "execution_count": 6,
164 |      "metadata": {},
165 |      "output_type": "execute_result"
166 |     }
167 |    ],
168 |    "source": [
169 |     "# Also warning, but not an error instead infinity\n",
170 |     "1/arr"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 10,
176 |    "metadata": {
177 |     "collapsed": false
178 |    },
179 |    "outputs": [
180 |     {
181 |      "data": {
182 |       "text/plain": [
183 |        "array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])"
184 |       ]
185 |      },
186 |      "execution_count": 10,
187 |      "metadata": {},
188 |      "output_type": "execute_result"
189 |     }
190 |    ],
191 |    "source": [
192 |     "arr**3"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "## Universal Array Functions\n",
200 |     "\n",
201 |     "Numpy comes with many [universal array functions](http://docs.scipy.org/doc/numpy/reference/ufuncs.html), which are essentially just mathematical operations you can use to perform the operation across the array. Let's show some common ones:"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 12,
207 |    "metadata": {
208 |     "collapsed": false
209 |    },
210 |    "outputs": [
211 |     {
212 |      "data": {
213 |       "text/plain": [
214 |        "array([ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ,\n",
215 |        "        2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ])"
216 |       ]
217 |      },
218 |      "execution_count": 12,
219 |      "metadata": {},
220 |      "output_type": "execute_result"
221 |     }
222 |    ],
223 |    "source": [
224 |     "#Taking Square Roots\n",
225 |     "np.sqrt(arr)"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": 13,
231 |    "metadata": {
232 |     "collapsed": false
233 |    },
234 |    "outputs": [
235 |     {
236 |      "data": {
237 |       "text/plain": [
238 |        "array([  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,\n",
239 |        "         2.00855369e+01,   5.45981500e+01,   1.48413159e+02,\n",
240 |        "         4.03428793e+02,   1.09663316e+03,   2.98095799e+03,\n",
241 |        "         8.10308393e+03])"
242 |       ]
243 |      },
244 |      "execution_count": 13,
245 |      "metadata": {},
246 |      "output_type": "execute_result"
247 |     }
248 |    ],
249 |    "source": [
250 |     "#Calcualting exponential (e^)\n",
251 |     "np.exp(arr)"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": 14,
257 |    "metadata": {
258 |     "collapsed": false
259 |    },
260 |    "outputs": [
261 |     {
262 |      "data": {
263 |       "text/plain": [
264 |        "9"
265 |       ]
266 |      },
267 |      "execution_count": 14,
268 |      "metadata": {},
269 |      "output_type": "execute_result"
270 |     }
271 |    ],
272 |    "source": [
273 |     "np.max(arr) #same as arr.max()"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 15,
279 |    "metadata": {
280 |     "collapsed": false
281 |    },
282 |    "outputs": [
283 |     {
284 |      "data": {
285 |       "text/plain": [
286 |        "array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,\n",
287 |        "       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849])"
288 |       ]
289 |      },
290 |      "execution_count": 15,
291 |      "metadata": {},
292 |      "output_type": "execute_result"
293 |     }
294 |    ],
295 |    "source": [
296 |     "np.sin(arr)"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 16,
302 |    "metadata": {
303 |     "collapsed": false
304 |    },
305 |    "outputs": [
306 |     {
307 |      "name": "stderr",
308 |      "output_type": "stream",
309 |      "text": [
310 |       "/Users/marci/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: divide by zero encountered in log\n",
311 |       "  if __name__ == '__main__':\n"
312 |      ]
313 |     },
314 |     {
315 |      "data": {
316 |       "text/plain": [
317 |        "array([       -inf,  0.        ,  0.69314718,  1.09861229,  1.38629436,\n",
318 |        "        1.60943791,  1.79175947,  1.94591015,  2.07944154,  2.19722458])"
319 |       ]
320 |      },
321 |      "execution_count": 16,
322 |      "metadata": {},
323 |      "output_type": "execute_result"
324 |     }
325 |    ],
326 |    "source": [
327 |     "np.log(arr)"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "markdown",
332 |    "metadata": {},
333 |    "source": [
334 |     "# Great Job!\n",
335 |     "\n",
336 |     "That's all we need to know for now!"
337 |    ]
338 |   }
339 |  ],
340 |  "metadata": {
341 |   "kernelspec": {
342 |    "display_name": "Python 3",
343 |    "language": "python",
344 |    "name": "python3"
345 |   },
346 |   "language_info": {
347 |    "codemirror_mode": {
348 |     "name": "ipython",
349 |     "version": 3
350 |    },
351 |    "file_extension": ".py",
352 |    "mimetype": "text/x-python",
353 |    "name": "python",
354 |    "nbconvert_exporter": "python",
355 |    "pygments_lexer": "ipython3",
356 |    "version": "3.5.1"
357 |   }
358 |  },
359 |  "nbformat": 4,
360 |  "nbformat_minor": 0
361 | }
362 | 


--------------------------------------------------------------------------------
/3. Pandas/.ipynb_checkpoints/Introduction to Pandas-checkpoint.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "___\n",
 8 |     "\n",
 9 |     "<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
10 |     "___"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "markdown",
15 |    "metadata": {
16 |     "collapsed": true
17 |    },
18 |    "source": [
19 |     "# Introduction to Pandas\n",
20 |     "\n",
21 |     "In this section of the course we will learn how to use pandas for data analysis. You can think of pandas as an extremely powerful version of Excel, with a lot more features. In this section of the course, you should go through the notebooks in this order:\n",
22 |     "\n",
23 |     "* Introduction to Pandas\n",
24 |     "* Series\n",
25 |     "* DataFrames\n",
26 |     "* Missing Data\n",
27 |     "* GroupBy\n",
28 |     "* Merging,Joining,and Concatenating\n",
29 |     "* Operations\n",
30 |     "* Data Input and Output"
31 |    ]
32 |   },
33 |   {
34 |    "cell_type": "markdown",
35 |    "metadata": {},
36 |    "source": [
37 |     "___"
38 |    ]
39 |   }
40 |  ],
41 |  "metadata": {
42 |   "kernelspec": {
43 |    "display_name": "Python 3",
44 |    "language": "python",
45 |    "name": "python3"
46 |   },
47 |   "language_info": {
48 |    "codemirror_mode": {
49 |     "name": "ipython",
50 |     "version": 3
51 |    },
52 |    "file_extension": ".py",
53 |    "mimetype": "text/x-python",
54 |    "name": "python",
55 |    "nbconvert_exporter": "python",
56 |    "pygments_lexer": "ipython3",
57 |    "version": "3.5.1"
58 |   }
59 |  },
60 |  "nbformat": 4,
61 |  "nbformat_minor": 0
62 | }
63 | 


--------------------------------------------------------------------------------
/3. Pandas/.ipynb_checkpoints/Missing Data-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "___\n",
  8 |     "\n",
  9 |     "<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
 10 |     "___"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# Missing Data\n",
 18 |     "\n",
 19 |     "Let's show a few convenient methods to deal with Missing Data in pandas:"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 1,
 25 |    "metadata": {
 26 |     "collapsed": true
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import numpy as np\n",
 31 |     "import pandas as pd"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 9,
 37 |    "metadata": {
 38 |     "collapsed": true
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "df = pd.DataFrame({'A':[1,2,np.nan],\n",
 43 |     "                  'B':[5,np.nan,np.nan],\n",
 44 |     "                  'C':[1,2,3]})"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 10,
 50 |    "metadata": {
 51 |     "collapsed": false
 52 |    },
 53 |    "outputs": [
 54 |     {
 55 |      "data": {
 56 |       "text/html": [
 57 |        "<div>\n",
 58 |        "<table border=\"1\" class=\"dataframe\">\n",
 59 |        "  <thead>\n",
 60 |        "    <tr style=\"text-align: right;\">\n",
 61 |        "      <th></th>\n",
 62 |        "      <th>A</th>\n",
 63 |        "      <th>B</th>\n",
 64 |        "      <th>C</th>\n",
 65 |        "    </tr>\n",
 66 |        "  </thead>\n",
 67 |        "  <tbody>\n",
 68 |        "    <tr>\n",
 69 |        "      <th>0</th>\n",
 70 |        "      <td>1.0</td>\n",
 71 |        "      <td>5.0</td>\n",
 72 |        "      <td>1</td>\n",
 73 |        "    </tr>\n",
 74 |        "    <tr>\n",
 75 |        "      <th>1</th>\n",
 76 |        "      <td>2.0</td>\n",
 77 |        "      <td>NaN</td>\n",
 78 |        "      <td>2</td>\n",
 79 |        "    </tr>\n",
 80 |        "    <tr>\n",
 81 |        "      <th>2</th>\n",
 82 |        "      <td>NaN</td>\n",
 83 |        "      <td>NaN</td>\n",
 84 |        "      <td>3</td>\n",
 85 |        "    </tr>\n",
 86 |        "  </tbody>\n",
 87 |        "</table>\n",
 88 |        "</div>"
 89 |       ],
 90 |       "text/plain": [
 91 |        "     A    B  C\n",
 92 |        "0  1.0  5.0  1\n",
 93 |        "1  2.0  NaN  2\n",
 94 |        "2  NaN  NaN  3"
 95 |       ]
 96 |      },
 97 |      "execution_count": 10,
 98 |      "metadata": {},
 99 |      "output_type": "execute_result"
100 |     }
101 |    ],
102 |    "source": [
103 |     "df"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 12,
109 |    "metadata": {
110 |     "collapsed": false
111 |    },
112 |    "outputs": [
113 |     {
114 |      "data": {
115 |       "text/html": [
116 |        "<div>\n",
117 |        "<table border=\"1\" class=\"dataframe\">\n",
118 |        "  <thead>\n",
119 |        "    <tr style=\"text-align: right;\">\n",
120 |        "      <th></th>\n",
121 |        "      <th>A</th>\n",
122 |        "      <th>B</th>\n",
123 |        "      <th>C</th>\n",
124 |        "    </tr>\n",
125 |        "  </thead>\n",
126 |        "  <tbody>\n",
127 |        "    <tr>\n",
128 |        "      <th>0</th>\n",
129 |        "      <td>1.0</td>\n",
130 |        "      <td>5.0</td>\n",
131 |        "      <td>1</td>\n",
132 |        "    </tr>\n",
133 |        "  </tbody>\n",
134 |        "</table>\n",
135 |        "</div>"
136 |       ],
137 |       "text/plain": [
138 |        "     A    B  C\n",
139 |        "0  1.0  5.0  1"
140 |       ]
141 |      },
142 |      "execution_count": 12,
143 |      "metadata": {},
144 |      "output_type": "execute_result"
145 |     }
146 |    ],
147 |    "source": [
148 |     "df.dropna()"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 13,
154 |    "metadata": {
155 |     "collapsed": false
156 |    },
157 |    "outputs": [
158 |     {
159 |      "data": {
160 |       "text/html": [
161 |        "<div>\n",
162 |        "<table border=\"1\" class=\"dataframe\">\n",
163 |        "  <thead>\n",
164 |        "    <tr style=\"text-align: right;\">\n",
165 |        "      <th></th>\n",
166 |        "      <th>C</th>\n",
167 |        "    </tr>\n",
168 |        "  </thead>\n",
169 |        "  <tbody>\n",
170 |        "    <tr>\n",
171 |        "      <th>0</th>\n",
172 |        "      <td>1</td>\n",
173 |        "    </tr>\n",
174 |        "    <tr>\n",
175 |        "      <th>1</th>\n",
176 |        "      <td>2</td>\n",
177 |        "    </tr>\n",
178 |        "    <tr>\n",
179 |        "      <th>2</th>\n",
180 |        "      <td>3</td>\n",
181 |        "    </tr>\n",
182 |        "  </tbody>\n",
183 |        "</table>\n",
184 |        "</div>"
185 |       ],
186 |       "text/plain": [
187 |        "   C\n",
188 |        "0  1\n",
189 |        "1  2\n",
190 |        "2  3"
191 |       ]
192 |      },
193 |      "execution_count": 13,
194 |      "metadata": {},
195 |      "output_type": "execute_result"
196 |     }
197 |    ],
198 |    "source": [
199 |     "df.dropna(axis=1)"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 14,
205 |    "metadata": {
206 |     "collapsed": false
207 |    },
208 |    "outputs": [
209 |     {
210 |      "data": {
211 |       "text/html": [
212 |        "<div>\n",
213 |        "<table border=\"1\" class=\"dataframe\">\n",
214 |        "  <thead>\n",
215 |        "    <tr style=\"text-align: right;\">\n",
216 |        "      <th></th>\n",
217 |        "      <th>A</th>\n",
218 |        "      <th>B</th>\n",
219 |        "      <th>C</th>\n",
220 |        "    </tr>\n",
221 |        "  </thead>\n",
222 |        "  <tbody>\n",
223 |        "    <tr>\n",
224 |        "      <th>0</th>\n",
225 |        "      <td>1.0</td>\n",
226 |        "      <td>5.0</td>\n",
227 |        "      <td>1</td>\n",
228 |        "    </tr>\n",
229 |        "    <tr>\n",
230 |        "      <th>1</th>\n",
231 |        "      <td>2.0</td>\n",
232 |        "      <td>NaN</td>\n",
233 |        "      <td>2</td>\n",
234 |        "    </tr>\n",
235 |        "  </tbody>\n",
236 |        "</table>\n",
237 |        "</div>"
238 |       ],
239 |       "text/plain": [
240 |        "     A    B  C\n",
241 |        "0  1.0  5.0  1\n",
242 |        "1  2.0  NaN  2"
243 |       ]
244 |      },
245 |      "execution_count": 14,
246 |      "metadata": {},
247 |      "output_type": "execute_result"
248 |     }
249 |    ],
250 |    "source": [
251 |     "df.dropna(thresh=2)"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": 15,
257 |    "metadata": {
258 |     "collapsed": false
259 |    },
260 |    "outputs": [
261 |     {
262 |      "data": {
263 |       "text/html": [
264 |        "<div>\n",
265 |        "<table border=\"1\" class=\"dataframe\">\n",
266 |        "  <thead>\n",
267 |        "    <tr style=\"text-align: right;\">\n",
268 |        "      <th></th>\n",
269 |        "      <th>A</th>\n",
270 |        "      <th>B</th>\n",
271 |        "      <th>C</th>\n",
272 |        "    </tr>\n",
273 |        "  </thead>\n",
274 |        "  <tbody>\n",
275 |        "    <tr>\n",
276 |        "      <th>0</th>\n",
277 |        "      <td>1</td>\n",
278 |        "      <td>5</td>\n",
279 |        "      <td>1</td>\n",
280 |        "    </tr>\n",
281 |        "    <tr>\n",
282 |        "      <th>1</th>\n",
283 |        "      <td>2</td>\n",
284 |        "      <td>FILL VALUE</td>\n",
285 |        "      <td>2</td>\n",
286 |        "    </tr>\n",
287 |        "    <tr>\n",
288 |        "      <th>2</th>\n",
289 |        "      <td>FILL VALUE</td>\n",
290 |        "      <td>FILL VALUE</td>\n",
291 |        "      <td>3</td>\n",
292 |        "    </tr>\n",
293 |        "  </tbody>\n",
294 |        "</table>\n",
295 |        "</div>"
296 |       ],
297 |       "text/plain": [
298 |        "            A           B  C\n",
299 |        "0           1           5  1\n",
300 |        "1           2  FILL VALUE  2\n",
301 |        "2  FILL VALUE  FILL VALUE  3"
302 |       ]
303 |      },
304 |      "execution_count": 15,
305 |      "metadata": {},
306 |      "output_type": "execute_result"
307 |     }
308 |    ],
309 |    "source": [
310 |     "df.fillna(value='FILL VALUE')"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 17,
316 |    "metadata": {
317 |     "collapsed": false
318 |    },
319 |    "outputs": [
320 |     {
321 |      "data": {
322 |       "text/plain": [
323 |        "0    1.0\n",
324 |        "1    2.0\n",
325 |        "2    1.5\n",
326 |        "Name: A, dtype: float64"
327 |       ]
328 |      },
329 |      "execution_count": 17,
330 |      "metadata": {},
331 |      "output_type": "execute_result"
332 |     }
333 |    ],
334 |    "source": [
335 |     "df['A'].fillna(value=df['A'].mean())"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "markdown",
340 |    "metadata": {},
341 |    "source": [
342 |     "# Great Job!"
343 |    ]
344 |   }
345 |  ],
346 |  "metadata": {
347 |   "kernelspec": {
348 |    "display_name": "Python 3",
349 |    "language": "python",
350 |    "name": "python3"
351 |   },
352 |   "language_info": {
353 |    "codemirror_mode": {
354 |     "name": "ipython",
355 |     "version": 3
356 |    },
357 |    "file_extension": ".py",
358 |    "mimetype": "text/x-python",
359 |    "name": "python",
360 |    "nbconvert_exporter": "python",
361 |    "pygments_lexer": "ipython3",
362 |    "version": "3.5.1"
363 |   }
364 |  },
365 |  "nbformat": 4,
366 |  "nbformat_minor": 0
367 | }
368 | 


--------------------------------------------------------------------------------
/3. Pandas/.ipynb_checkpoints/Series-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "___\n",
  8 |     "\n",
  9 |     "<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
 10 |     "___\n",
 11 |     "# Series"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "The first main data type we will learn about for pandas is the Series data type. Let's import Pandas and explore the Series object.\n",
 19 |     "\n",
 20 |     "A Series is very similar to a NumPy array (in fact it is built on top of the NumPy array object). What differentiates the NumPy array from a Series, is that a Series can have axis labels, meaning it can be indexed by a label, instead of just a number location. It also doesn't need to hold numeric data, it can hold any arbitrary Python Object.\n",
 21 |     "\n",
 22 |     "Let's explore this concept through some examples:"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "import numpy as np\n",
 34 |     "import pandas as pd"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Creating a Series\n",
 42 |     "\n",
 43 |     "You can convert a list,numpy array, or dictionary to a Series:"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 3,
 49 |    "metadata": {
 50 |     "collapsed": true
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "labels = ['a','b','c']\n",
 55 |     "my_list = [10,20,30]\n",
 56 |     "arr = np.array([10,20,30])\n",
 57 |     "d = {'a':10,'b':20,'c':30}"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "** Using Lists**"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 4,
 70 |    "metadata": {
 71 |     "collapsed": false
 72 |    },
 73 |    "outputs": [
 74 |     {
 75 |      "data": {
 76 |       "text/plain": [
 77 |        "0    10\n",
 78 |        "1    20\n",
 79 |        "2    30\n",
 80 |        "dtype: int64"
 81 |       ]
 82 |      },
 83 |      "execution_count": 4,
 84 |      "metadata": {},
 85 |      "output_type": "execute_result"
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "pd.Series(data=my_list)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 5,
 95 |    "metadata": {
 96 |     "collapsed": false
 97 |    },
 98 |    "outputs": [
 99 |     {
100 |      "data": {
101 |       "text/plain": [
102 |        "a    10\n",
103 |        "b    20\n",
104 |        "c    30\n",
105 |        "dtype: int64"
106 |       ]
107 |      },
108 |      "execution_count": 5,
109 |      "metadata": {},
110 |      "output_type": "execute_result"
111 |     }
112 |    ],
113 |    "source": [
114 |     "pd.Series(data=my_list,index=labels)"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 6,
120 |    "metadata": {
121 |     "collapsed": false
122 |    },
123 |    "outputs": [
124 |     {
125 |      "data": {
126 |       "text/plain": [
127 |        "a    10\n",
128 |        "b    20\n",
129 |        "c    30\n",
130 |        "dtype: int64"
131 |       ]
132 |      },
133 |      "execution_count": 6,
134 |      "metadata": {},
135 |      "output_type": "execute_result"
136 |     }
137 |    ],
138 |    "source": [
139 |     "pd.Series(my_list,labels)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "** NumPy Arrays **"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 7,
152 |    "metadata": {
153 |     "collapsed": false
154 |    },
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/plain": [
159 |        "0    10\n",
160 |        "1    20\n",
161 |        "2    30\n",
162 |        "dtype: int64"
163 |       ]
164 |      },
165 |      "execution_count": 7,
166 |      "metadata": {},
167 |      "output_type": "execute_result"
168 |     }
169 |    ],
170 |    "source": [
171 |     "pd.Series(arr)"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 8,
177 |    "metadata": {
178 |     "collapsed": false
179 |    },
180 |    "outputs": [
181 |     {
182 |      "data": {
183 |       "text/plain": [
184 |        "a    10\n",
185 |        "b    20\n",
186 |        "c    30\n",
187 |        "dtype: int64"
188 |       ]
189 |      },
190 |      "execution_count": 8,
191 |      "metadata": {},
192 |      "output_type": "execute_result"
193 |     }
194 |    ],
195 |    "source": [
196 |     "pd.Series(arr,labels)"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "** Dictionary**"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": 9,
209 |    "metadata": {
210 |     "collapsed": false
211 |    },
212 |    "outputs": [
213 |     {
214 |      "data": {
215 |       "text/plain": [
216 |        "a    10\n",
217 |        "b    20\n",
218 |        "c    30\n",
219 |        "dtype: int64"
220 |       ]
221 |      },
222 |      "execution_count": 9,
223 |      "metadata": {},
224 |      "output_type": "execute_result"
225 |     }
226 |    ],
227 |    "source": [
228 |     "pd.Series(d)"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "### Data in a Series\n",
236 |     "\n",
237 |     "A pandas Series can hold a variety of object types:"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": 10,
243 |    "metadata": {
244 |     "collapsed": false
245 |    },
246 |    "outputs": [
247 |     {
248 |      "data": {
249 |       "text/plain": [
250 |        "0    a\n",
251 |        "1    b\n",
252 |        "2    c\n",
253 |        "dtype: object"
254 |       ]
255 |      },
256 |      "execution_count": 10,
257 |      "metadata": {},
258 |      "output_type": "execute_result"
259 |     }
260 |    ],
261 |    "source": [
262 |     "pd.Series(data=labels)"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": 11,
268 |    "metadata": {
269 |     "collapsed": false
270 |    },
271 |    "outputs": [
272 |     {
273 |      "data": {
274 |       "text/plain": [
275 |        "0      <built-in function sum>\n",
276 |        "1    <built-in function print>\n",
277 |        "2      <built-in function len>\n",
278 |        "dtype: object"
279 |       ]
280 |      },
281 |      "execution_count": 11,
282 |      "metadata": {},
283 |      "output_type": "execute_result"
284 |     }
285 |    ],
286 |    "source": [
287 |     "# Even functions (although unlikely that you will use this)\n",
288 |     "pd.Series([sum,print,len])"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "metadata": {},
294 |    "source": [
295 |     "## Using an Index\n",
296 |     "\n",
297 |     "The key to using a Series is understanding its index. Pandas makes use of these index names or numbers by allowing for fast look ups of information (works like a hash table or dictionary).\n",
298 |     "\n",
299 |     "Let's see some examples of how to grab information from a Series. Let us create two sereis, ser1 and ser2:"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": 12,
305 |    "metadata": {
306 |     "collapsed": false
307 |    },
308 |    "outputs": [],
309 |    "source": [
310 |     "ser1 = pd.Series([1,2,3,4],index = ['USA', 'Germany','USSR', 'Japan'])                                   "
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 13,
316 |    "metadata": {
317 |     "collapsed": false
318 |    },
319 |    "outputs": [
320 |     {
321 |      "data": {
322 |       "text/plain": [
323 |        "USA        1\n",
324 |        "Germany    2\n",
325 |        "USSR       3\n",
326 |        "Japan      4\n",
327 |        "dtype: int64"
328 |       ]
329 |      },
330 |      "execution_count": 13,
331 |      "metadata": {},
332 |      "output_type": "execute_result"
333 |     }
334 |    ],
335 |    "source": [
336 |     "ser1"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": 14,
342 |    "metadata": {
343 |     "collapsed": true
344 |    },
345 |    "outputs": [],
346 |    "source": [
347 |     "ser2 = pd.Series([1,2,5,4],index = ['USA', 'Germany','Italy', 'Japan'])                                   "
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 15,
353 |    "metadata": {
354 |     "collapsed": false
355 |    },
356 |    "outputs": [
357 |     {
358 |      "data": {
359 |       "text/plain": [
360 |        "USA        1\n",
361 |        "Germany    2\n",
362 |        "Italy      5\n",
363 |        "Japan      4\n",
364 |        "dtype: int64"
365 |       ]
366 |      },
367 |      "execution_count": 15,
368 |      "metadata": {},
369 |      "output_type": "execute_result"
370 |     }
371 |    ],
372 |    "source": [
373 |     "ser2"
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "code",
378 |    "execution_count": 16,
379 |    "metadata": {
380 |     "collapsed": false
381 |    },
382 |    "outputs": [
383 |     {
384 |      "data": {
385 |       "text/plain": [
386 |        "1"
387 |       ]
388 |      },
389 |      "execution_count": 16,
390 |      "metadata": {},
391 |      "output_type": "execute_result"
392 |     }
393 |    ],
394 |    "source": [
395 |     "ser1['USA']"
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "markdown",
400 |    "metadata": {
401 |     "collapsed": false
402 |    },
403 |    "source": [
404 |     "Operations are then also done based off of index:"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "code",
409 |    "execution_count": 17,
410 |    "metadata": {
411 |     "collapsed": false
412 |    },
413 |    "outputs": [
414 |     {
415 |      "data": {
416 |       "text/plain": [
417 |        "Germany    4.0\n",
418 |        "Italy      NaN\n",
419 |        "Japan      8.0\n",
420 |        "USA        2.0\n",
421 |        "USSR       NaN\n",
422 |        "dtype: float64"
423 |       ]
424 |      },
425 |      "execution_count": 17,
426 |      "metadata": {},
427 |      "output_type": "execute_result"
428 |     }
429 |    ],
430 |    "source": [
431 |     "ser1 + ser2"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "markdown",
436 |    "metadata": {},
437 |    "source": [
438 |     "Let's stop here for now and move on to DataFrames, which will expand on the concept of Series!\n",
439 |     "# Great Job!"
440 |    ]
441 |   }
442 |  ],
443 |  "metadata": {
444 |   "kernelspec": {
445 |    "display_name": "Python 3",
446 |    "language": "python",
447 |    "name": "python3"
448 |   },
449 |   "language_info": {
450 |    "codemirror_mode": {
451 |     "name": "ipython",
452 |     "version": 3
453 |    },
454 |    "file_extension": ".py",
455 |    "mimetype": "text/x-python",
456 |    "name": "python",
457 |    "nbconvert_exporter": "python",
458 |    "pygments_lexer": "ipython3",
459 |    "version": "3.5.1"
460 |   }
461 |  },
462 |  "nbformat": 4,
463 |  "nbformat_minor": 0
464 | }
465 | 


--------------------------------------------------------------------------------
/3. Pandas/Excel_Sample.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SuvroBaner/Python-for-Data-Science-and-Machine-Learning-Bootcamp/c0bafbafc2c37a189c70a3758f6e81888b1542ae/3. Pandas/Excel_Sample.xlsx


--------------------------------------------------------------------------------
/3. Pandas/Introduction to Pandas.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "___\n",
 8 |     "\n",
 9 |     "<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
10 |     "___"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "markdown",
15 |    "metadata": {
16 |     "collapsed": true
17 |    },
18 |    "source": [
19 |     "# Introduction to Pandas\n",
20 |     "\n",
21 |     "In this section of the course we will learn how to use pandas for data analysis. You can think of pandas as an extremely powerful version of Excel, with a lot more features. In this section of the course, you should go through the notebooks in this order:\n",
22 |     "\n",
23 |     "* Introduction to Pandas\n",
24 |     "* Series\n",
25 |     "* DataFrames\n",
26 |     "* Missing Data\n",
27 |     "* GroupBy\n",
28 |     "* Merging,Joining,and Concatenating\n",
29 |     "* Operations\n",
30 |     "* Data Input and Output"
31 |    ]
32 |   },
33 |   {
34 |    "cell_type": "markdown",
35 |    "metadata": {},
36 |    "source": [
37 |     "___"
38 |    ]
39 |   }
40 |  ],
41 |  "metadata": {
42 |   "kernelspec": {
43 |    "display_name": "Python 3",
44 |    "language": "python",
45 |    "name": "python3"
46 |   },
47 |   "language_info": {
48 |    "codemirror_mode": {
49 |     "name": "ipython",
50 |     "version": 3
51 |    },
52 |    "file_extension": ".py",
53 |    "mimetype": "text/x-python",
54 |    "name": "python",
55 |    "nbconvert_exporter": "python",
56 |    "pygments_lexer": "ipython3",
57 |    "version": "3.5.1"
58 |   }
59 |  },
60 |  "nbformat": 4,
61 |  "nbformat_minor": 0
62 | }
63 | 


--------------------------------------------------------------------------------
/3. Pandas/Missing Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "___\n",
  8 |     "\n",
  9 |     "<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
 10 |     "___"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# Missing Data\n",
 18 |     "\n",
 19 |     "Let's show a few convenient methods to deal with Missing Data in pandas:"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 1,
 25 |    "metadata": {
 26 |     "collapsed": true
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import numpy as np\n",
 31 |     "import pandas as pd"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 9,
 37 |    "metadata": {
 38 |     "collapsed": true
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "df = pd.DataFrame({'A':[1,2,np.nan],\n",
 43 |     "                  'B':[5,np.nan,np.nan],\n",
 44 |     "                  'C':[1,2,3]})"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 10,
 50 |    "metadata": {
 51 |     "collapsed": false
 52 |    },
 53 |    "outputs": [
 54 |     {
 55 |      "data": {
 56 |       "text/html": [
 57 |        "<div>\n",
 58 |        "<table border=\"1\" class=\"dataframe\">\n",
 59 |        "  <thead>\n",
 60 |        "    <tr style=\"text-align: right;\">\n",
 61 |        "      <th></th>\n",
 62 |        "      <th>A</th>\n",
 63 |        "      <th>B</th>\n",
 64 |        "      <th>C</th>\n",
 65 |        "    </tr>\n",
 66 |        "  </thead>\n",
 67 |        "  <tbody>\n",
 68 |        "    <tr>\n",
 69 |        "      <th>0</th>\n",
 70 |        "      <td>1.0</td>\n",
 71 |        "      <td>5.0</td>\n",
 72 |        "      <td>1</td>\n",
 73 |        "    </tr>\n",
 74 |        "    <tr>\n",
 75 |        "      <th>1</th>\n",
 76 |        "      <td>2.0</td>\n",
 77 |        "      <td>NaN</td>\n",
 78 |        "      <td>2</td>\n",
 79 |        "    </tr>\n",
 80 |        "    <tr>\n",
 81 |        "      <th>2</th>\n",
 82 |        "      <td>NaN</td>\n",
 83 |        "      <td>NaN</td>\n",
 84 |        "      <td>3</td>\n",
 85 |        "    </tr>\n",
 86 |        "  </tbody>\n",
 87 |        "</table>\n",
 88 |        "</div>"
 89 |       ],
 90 |       "text/plain": [
 91 |        "     A    B  C\n",
 92 |        "0  1.0  5.0  1\n",
 93 |        "1  2.0  NaN  2\n",
 94 |        "2  NaN  NaN  3"
 95 |       ]
 96 |      },
 97 |      "execution_count": 10,
 98 |      "metadata": {},
 99 |      "output_type": "execute_result"
100 |     }
101 |    ],
102 |    "source": [
103 |     "df"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 12,
109 |    "metadata": {
110 |     "collapsed": false
111 |    },
112 |    "outputs": [
113 |     {
114 |      "data": {
115 |       "text/html": [
116 |        "<div>\n",
117 |        "<table border=\"1\" class=\"dataframe\">\n",
118 |        "  <thead>\n",
119 |        "    <tr style=\"text-align: right;\">\n",
120 |        "      <th></th>\n",
121 |        "      <th>A</th>\n",
122 |        "      <th>B</th>\n",
123 |        "      <th>C</th>\n",
124 |        "    </tr>\n",
125 |        "  </thead>\n",
126 |        "  <tbody>\n",
127 |        "    <tr>\n",
128 |        "      <th>0</th>\n",
129 |        "      <td>1.0</td>\n",
130 |        "      <td>5.0</td>\n",
131 |        "      <td>1</td>\n",
132 |        "    </tr>\n",
133 |        "  </tbody>\n",
134 |        "</table>\n",
135 |        "</div>"
136 |       ],
137 |       "text/plain": [
138 |        "     A    B  C\n",
139 |        "0  1.0  5.0  1"
140 |       ]
141 |      },
142 |      "execution_count": 12,
143 |      "metadata": {},
144 |      "output_type": "execute_result"
145 |     }
146 |    ],
147 |    "source": [
148 |     "df.dropna()"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 13,
154 |    "metadata": {
155 |     "collapsed": false
156 |    },
157 |    "outputs": [
158 |     {
159 |      "data": {
160 |       "text/html": [
161 |        "<div>\n",
162 |        "<table border=\"1\" class=\"dataframe\">\n",
163 |        "  <thead>\n",
164 |        "    <tr style=\"text-align: right;\">\n",
165 |        "      <th></th>\n",
166 |        "      <th>C</th>\n",
167 |        "    </tr>\n",
168 |        "  </thead>\n",
169 |        "  <tbody>\n",
170 |        "    <tr>\n",
171 |        "      <th>0</th>\n",
172 |        "      <td>1</td>\n",
173 |        "    </tr>\n",
174 |        "    <tr>\n",
175 |        "      <th>1</th>\n",
176 |        "      <td>2</td>\n",
177 |        "    </tr>\n",
178 |        "    <tr>\n",
179 |        "      <th>2</th>\n",
180 |        "      <td>3</td>\n",
181 |        "    </tr>\n",
182 |        "  </tbody>\n",
183 |        "</table>\n",
184 |        "</div>"
185 |       ],
186 |       "text/plain": [
187 |        "   C\n",
188 |        "0  1\n",
189 |        "1  2\n",
190 |        "2  3"
191 |       ]
192 |      },
193 |      "execution_count": 13,
194 |      "metadata": {},
195 |      "output_type": "execute_result"
196 |     }
197 |    ],
198 |    "source": [
199 |     "df.dropna(axis=1)"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 14,
205 |    "metadata": {
206 |     "collapsed": false
207 |    },
208 |    "outputs": [
209 |     {
210 |      "data": {
211 |       "text/html": [
212 |        "<div>\n",
213 |        "<table border=\"1\" class=\"dataframe\">\n",
214 |        "  <thead>\n",
215 |        "    <tr style=\"text-align: right;\">\n",
216 |        "      <th></th>\n",
217 |        "      <th>A</th>\n",
218 |        "      <th>B</th>\n",
219 |        "      <th>C</th>\n",
220 |        "    </tr>\n",
221 |        "  </thead>\n",
222 |        "  <tbody>\n",
223 |        "    <tr>\n",
224 |        "      <th>0</th>\n",
225 |        "      <td>1.0</td>\n",
226 |        "      <td>5.0</td>\n",
227 |        "      <td>1</td>\n",
228 |        "    </tr>\n",
229 |        "    <tr>\n",
230 |        "      <th>1</th>\n",
231 |        "      <td>2.0</td>\n",
232 |        "      <td>NaN</td>\n",
233 |        "      <td>2</td>\n",
234 |        "    </tr>\n",
235 |        "  </tbody>\n",
236 |        "</table>\n",
237 |        "</div>"
238 |       ],
239 |       "text/plain": [
240 |        "     A    B  C\n",
241 |        "0  1.0  5.0  1\n",
242 |        "1  2.0  NaN  2"
243 |       ]
244 |      },
245 |      "execution_count": 14,
246 |      "metadata": {},
247 |      "output_type": "execute_result"
248 |     }
249 |    ],
250 |    "source": [
251 |     "df.dropna(thresh=2)"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": 15,
257 |    "metadata": {
258 |     "collapsed": false
259 |    },
260 |    "outputs": [
261 |     {
262 |      "data": {
263 |       "text/html": [
264 |        "<div>\n",
265 |        "<table border=\"1\" class=\"dataframe\">\n",
266 |        "  <thead>\n",
267 |        "    <tr style=\"text-align: right;\">\n",
268 |        "      <th></th>\n",
269 |        "      <th>A</th>\n",
270 |        "      <th>B</th>\n",
271 |        "      <th>C</th>\n",
272 |        "    </tr>\n",
273 |        "  </thead>\n",
274 |        "  <tbody>\n",
275 |        "    <tr>\n",
276 |        "      <th>0</th>\n",
277 |        "      <td>1</td>\n",
278 |        "      <td>5</td>\n",
279 |        "      <td>1</td>\n",
280 |        "    </tr>\n",
281 |        "    <tr>\n",
282 |        "      <th>1</th>\n",
283 |        "      <td>2</td>\n",
284 |        "      <td>FILL VALUE</td>\n",
285 |        "      <td>2</td>\n",
286 |        "    </tr>\n",
287 |        "    <tr>\n",
288 |        "      <th>2</th>\n",
289 |        "      <td>FILL VALUE</td>\n",
290 |        "      <td>FILL VALUE</td>\n",
291 |        "      <td>3</td>\n",
292 |        "    </tr>\n",
293 |        "  </tbody>\n",
294 |        "</table>\n",
295 |        "</div>"
296 |       ],
297 |       "text/plain": [
298 |        "            A           B  C\n",
299 |        "0           1           5  1\n",
300 |        "1           2  FILL VALUE  2\n",
301 |        "2  FILL VALUE  FILL VALUE  3"
302 |       ]
303 |      },
304 |      "execution_count": 15,
305 |      "metadata": {},
306 |      "output_type": "execute_result"
307 |     }
308 |    ],
309 |    "source": [
310 |     "df.fillna(value='FILL VALUE')"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 17,
316 |    "metadata": {
317 |     "collapsed": false
318 |    },
319 |    "outputs": [
320 |     {
321 |      "data": {
322 |       "text/plain": [
323 |        "0    1.0\n",
324 |        "1    2.0\n",
325 |        "2    1.5\n",
326 |        "Name: A, dtype: float64"
327 |       ]
328 |      },
329 |      "execution_count": 17,
330 |      "metadata": {},
331 |      "output_type": "execute_result"
332 |     }
333 |    ],
334 |    "source": [
335 |     "df['A'].fillna(value=df['A'].mean())"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "markdown",
340 |    "metadata": {},
341 |    "source": [
342 |     "# Great Job!"
343 |    ]
344 |   }
345 |  ],
346 |  "metadata": {
347 |   "kernelspec": {
348 |    "display_name": "Python 3",
349 |    "language": "python",
350 |    "name": "python3"
351 |   },
352 |   "language_info": {
353 |    "codemirror_mode": {
354 |     "name": "ipython",
355 |     "version": 3
356 |    },
357 |    "file_extension": ".py",
358 |    "mimetype": "text/x-python",
359 |    "name": "python",
360 |    "nbconvert_exporter": "python",
361 |    "pygments_lexer": "ipython3",
362 |    "version": "3.5.1"
363 |   }
364 |  },
365 |  "nbformat": 4,
366 |  "nbformat_minor": 0
367 | }
368 | 


--------------------------------------------------------------------------------
/3. Pandas/Series.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "___\n",
  8 |     "\n",
  9 |     "<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
 10 |     "___\n",
 11 |     "# Series"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "The first main data type we will learn about for pandas is the Series data type. Let's import Pandas and explore the Series object.\n",
 19 |     "\n",
 20 |     "A Series is very similar to a NumPy array (in fact it is built on top of the NumPy array object). What differentiates the NumPy array from a Series, is that a Series can have axis labels, meaning it can be indexed by a label, instead of just a number location. It also doesn't need to hold numeric data, it can hold any arbitrary Python Object.\n",
 21 |     "\n",
 22 |     "Let's explore this concept through some examples:"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "import numpy as np\n",
 34 |     "import pandas as pd"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Creating a Series\n",
 42 |     "\n",
 43 |     "You can convert a list,numpy array, or dictionary to a Series:"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 3,
 49 |    "metadata": {
 50 |     "collapsed": true
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "labels = ['a','b','c']\n",
 55 |     "my_list = [10,20,30]\n",
 56 |     "arr = np.array([10,20,30])\n",
 57 |     "d = {'a':10,'b':20,'c':30}"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "** Using Lists**"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 4,
 70 |    "metadata": {
 71 |     "collapsed": false
 72 |    },
 73 |    "outputs": [
 74 |     {
 75 |      "data": {
 76 |       "text/plain": [
 77 |        "0    10\n",
 78 |        "1    20\n",
 79 |        "2    30\n",
 80 |        "dtype: int64"
 81 |       ]
 82 |      },
 83 |      "execution_count": 4,
 84 |      "metadata": {},
 85 |      "output_type": "execute_result"
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "pd.Series(data=my_list)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 5,
 95 |    "metadata": {
 96 |     "collapsed": false
 97 |    },
 98 |    "outputs": [
 99 |     {
100 |      "data": {
101 |       "text/plain": [
102 |        "a    10\n",
103 |        "b    20\n",
104 |        "c    30\n",
105 |        "dtype: int64"
106 |       ]
107 |      },
108 |      "execution_count": 5,
109 |      "metadata": {},
110 |      "output_type": "execute_result"
111 |     }
112 |    ],
113 |    "source": [
114 |     "pd.Series(data=my_list,index=labels)"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 6,
120 |    "metadata": {
121 |     "collapsed": false
122 |    },
123 |    "outputs": [
124 |     {
125 |      "data": {
126 |       "text/plain": [
127 |        "a    10\n",
128 |        "b    20\n",
129 |        "c    30\n",
130 |        "dtype: int64"
131 |       ]
132 |      },
133 |      "execution_count": 6,
134 |      "metadata": {},
135 |      "output_type": "execute_result"
136 |     }
137 |    ],
138 |    "source": [
139 |     "pd.Series(my_list,labels)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "** NumPy Arrays **"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 7,
152 |    "metadata": {
153 |     "collapsed": false
154 |    },
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/plain": [
159 |        "0    10\n",
160 |        "1    20\n",
161 |        "2    30\n",
162 |        "dtype: int64"
163 |       ]
164 |      },
165 |      "execution_count": 7,
166 |      "metadata": {},
167 |      "output_type": "execute_result"
168 |     }
169 |    ],
170 |    "source": [
171 |     "pd.Series(arr)"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 8,
177 |    "metadata": {
178 |     "collapsed": false
179 |    },
180 |    "outputs": [
181 |     {
182 |      "data": {
183 |       "text/plain": [
184 |        "a    10\n",
185 |        "b    20\n",
186 |        "c    30\n",
187 |        "dtype: int64"
188 |       ]
189 |      },
190 |      "execution_count": 8,
191 |      "metadata": {},
192 |      "output_type": "execute_result"
193 |     }
194 |    ],
195 |    "source": [
196 |     "pd.Series(arr,labels)"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "** Dictionary**"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": 9,
209 |    "metadata": {
210 |     "collapsed": false
211 |    },
212 |    "outputs": [
213 |     {
214 |      "data": {
215 |       "text/plain": [
216 |        "a    10\n",
217 |        "b    20\n",
218 |        "c    30\n",
219 |        "dtype: int64"
220 |       ]
221 |      },
222 |      "execution_count": 9,
223 |      "metadata": {},
224 |      "output_type": "execute_result"
225 |     }
226 |    ],
227 |    "source": [
228 |     "pd.Series(d)"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "### Data in a Series\n",
236 |     "\n",
237 |     "A pandas Series can hold a variety of object types:"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": 10,
243 |    "metadata": {
244 |     "collapsed": false
245 |    },
246 |    "outputs": [
247 |     {
248 |      "data": {
249 |       "text/plain": [
250 |        "0    a\n",
251 |        "1    b\n",
252 |        "2    c\n",
253 |        "dtype: object"
254 |       ]
255 |      },
256 |      "execution_count": 10,
257 |      "metadata": {},
258 |      "output_type": "execute_result"
259 |     }
260 |    ],
261 |    "source": [
262 |     "pd.Series(data=labels)"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": 11,
268 |    "metadata": {
269 |     "collapsed": false
270 |    },
271 |    "outputs": [
272 |     {
273 |      "data": {
274 |       "text/plain": [
275 |        "0      <built-in function sum>\n",
276 |        "1    <built-in function print>\n",
277 |        "2      <built-in function len>\n",
278 |        "dtype: object"
279 |       ]
280 |      },
281 |      "execution_count": 11,
282 |      "metadata": {},
283 |      "output_type": "execute_result"
284 |     }
285 |    ],
286 |    "source": [
287 |     "# Even functions (although unlikely that you will use this)\n",
288 |     "pd.Series([sum,print,len])"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "metadata": {},
294 |    "source": [
295 |     "## Using an Index\n",
296 |     "\n",
297 |     "The key to using a Series is understanding its index. Pandas makes use of these index names or numbers by allowing for fast look ups of information (works like a hash table or dictionary).\n",
298 |     "\n",
299 |     "Let's see some examples of how to grab information from a Series. Let us create two sereis, ser1 and ser2:"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": 12,
305 |    "metadata": {
306 |     "collapsed": false
307 |    },
308 |    "outputs": [],
309 |    "source": [
310 |     "ser1 = pd.Series([1,2,3,4],index = ['USA', 'Germany','USSR', 'Japan'])                                   "
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 13,
316 |    "metadata": {
317 |     "collapsed": false
318 |    },
319 |    "outputs": [
320 |     {
321 |      "data": {
322 |       "text/plain": [
323 |        "USA        1\n",
324 |        "Germany    2\n",
325 |        "USSR       3\n",
326 |        "Japan      4\n",
327 |        "dtype: int64"
328 |       ]
329 |      },
330 |      "execution_count": 13,
331 |      "metadata": {},
332 |      "output_type": "execute_result"
333 |     }
334 |    ],
335 |    "source": [
336 |     "ser1"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": 14,
342 |    "metadata": {
343 |     "collapsed": true
344 |    },
345 |    "outputs": [],
346 |    "source": [
347 |     "ser2 = pd.Series([1,2,5,4],index = ['USA', 'Germany','Italy', 'Japan'])                                   "
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 15,
353 |    "metadata": {
354 |     "collapsed": false
355 |    },
356 |    "outputs": [
357 |     {
358 |      "data": {
359 |       "text/plain": [
360 |        "USA        1\n",
361 |        "Germany    2\n",
362 |        "Italy      5\n",
363 |        "Japan      4\n",
364 |        "dtype: int64"
365 |       ]
366 |      },
367 |      "execution_count": 15,
368 |      "metadata": {},
369 |      "output_type": "execute_result"
370 |     }
371 |    ],
372 |    "source": [
373 |     "ser2"
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "code",
378 |    "execution_count": 16,
379 |    "metadata": {
380 |     "collapsed": false
381 |    },
382 |    "outputs": [
383 |     {
384 |      "data": {
385 |       "text/plain": [
386 |        "1"
387 |       ]
388 |      },
389 |      "execution_count": 16,
390 |      "metadata": {},
391 |      "output_type": "execute_result"
392 |     }
393 |    ],
394 |    "source": [
395 |     "ser1['USA']"
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "markdown",
400 |    "metadata": {
401 |     "collapsed": false
402 |    },
403 |    "source": [
404 |     "Operations are then also done based off of index:"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "code",
409 |    "execution_count": 17,
410 |    "metadata": {
411 |     "collapsed": false
412 |    },
413 |    "outputs": [
414 |     {
415 |      "data": {
416 |       "text/plain": [
417 |        "Germany    4.0\n",
418 |        "Italy      NaN\n",
419 |        "Japan      8.0\n",
420 |        "USA        2.0\n",
421 |        "USSR       NaN\n",
422 |        "dtype: float64"
423 |       ]
424 |      },
425 |      "execution_count": 17,
426 |      "metadata": {},
427 |      "output_type": "execute_result"
428 |     }
429 |    ],
430 |    "source": [
431 |     "ser1 + ser2"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "markdown",
436 |    "metadata": {},
437 |    "source": [
438 |     "Let's stop here for now and move on to DataFrames, which will expand on the concept of Series!\n",
439 |     "# Great Job!"
440 |    ]
441 |   }
442 |  ],
443 |  "metadata": {
444 |   "kernelspec": {
445 |    "display_name": "Python 3",
446 |    "language": "python",
447 |    "name": "python3"
448 |   },
449 |   "language_info": {
450 |    "codemirror_mode": {
451 |     "name": "ipython",
452 |     "version": 3
453 |    },
454 |    "file_extension": ".py",
455 |    "mimetype": "text/x-python",
456 |    "name": "python",
457 |    "nbconvert_exporter": "python",
458 |    "pygments_lexer": "ipython3",
459 |    "version": "3.5.1"
460 |   }
461 |  },
462 |  "nbformat": 4,
463 |  "nbformat_minor": 0
464 | }
465 | 


--------------------------------------------------------------------------------
/3. Pandas/example:
--------------------------------------------------------------------------------
1 | a,b,c,d
2 | 0,1,2,3
3 | 4,5,6,7
4 | 8,9,10,11
5 | 12,13,14,15
6 | 


--------------------------------------------------------------------------------
/3. Pandas/multi_index_example:
--------------------------------------------------------------------------------
1 | first,bar,bar,baz,baz,foo,foo,qux,qux
2 | second,one,two,one,two,one,two,one,two
3 | ,,,,,,,,
4 | A,1.025984152081572,-0.1565979042889875,-0.031579143908112575,0.6498258334908454,2.154846443259472,-0.6102588558227414,-0.755325340010558,-0.34641850351854453
5 | B,0.1470267713241236,-0.47944803904109595,0.558769406443067,1.0248102783372157,-0.925874258809907,1.8628641384939535,-1.1338171615837889,0.6104779075384634
6 | C,0.3860303121135517,2.084018530338962,-0.37651867524923904,0.23033634359240704,0.6812092925867574,1.0351250747739213,-0.031160481493099617,1.9399323109926203
7 | 


--------------------------------------------------------------------------------
/6. Pandas Built-in Data Viz/df2:
--------------------------------------------------------------------------------
 1 | a,b,c,d
 2 | 0.039761986133905136,0.2185172274750622,0.10342298051665423,0.9579042338107532
 3 | 0.9372879037285884,0.04156728027953449,0.8991254222382951,0.9776795571253272
 4 | 0.7805044779316328,0.008947537857148302,0.5578084027546968,0.7975104497549266
 5 | 0.6727174963492204,0.24786984946279625,0.2640713103088026,0.44435791644122935
 6 | 0.05382860859967886,0.5201244020579979,0.5522642392797277,0.19000759632053632
 7 | 0.2860433671280178,0.5934650440000543,0.9073072637456548,0.6378977150631427
 8 | 0.4304355863327313,0.16623013749421356,0.4693825447762464,0.4977008828313123
 9 | 0.3122955538295512,0.5028232900921878,0.8066087010958843,0.8505190941429479
10 | 0.1877648514121828,0.9970746427719338,0.8959552961495315,0.530390137569463
11 | 0.9081621790575398,0.23272641071536715,0.4141382611943452,0.4320069001558664
12 | 


--------------------------------------------------------------------------------
/7. Geographical Plotting/.ipynb_checkpoints/Choropleth Maps Exercise -checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "___\n",
  8 |     "\n",
  9 |     "<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
 10 |     "___"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# Choropleth Maps Exercise \n",
 18 |     "\n",
 19 |     "Welcome to the Choropleth Maps Exercise! In this exercise we will give you some simple datasets and ask you to create Choropleth Maps from them. Due to the Nature of Plotly we can't show you examples\n",
 20 |     "\n",
 21 |     "[Full Documentation Reference](https://plot.ly/python/reference/#choropleth)\n",
 22 |     "\n",
 23 |     "## Plotly Imports"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 38,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import plotly.graph_objs as go \n",
 35 |     "from plotly.offline import init_notebook_mode,iplot\n",
 36 |     "init_notebook_mode(connected=True) "
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "** Import pandas and read the csv file: 2014_World_Power_Consumption**"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 1,
 49 |    "metadata": {
 50 |     "collapsed": true
 51 |    },
 52 |    "outputs": [],
 53 |    "source": []
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 152,
 58 |    "metadata": {
 59 |     "collapsed": false
 60 |    },
 61 |    "outputs": [],
 62 |    "source": []
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "** Check the head of the DataFrame. **"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 156,
 74 |    "metadata": {
 75 |     "collapsed": false
 76 |    },
 77 |    "outputs": [
 78 |     {
 79 |      "data": {
 80 |       "text/html": [
 81 |        "<div>\n",
 82 |        "<table border=\"1\" class=\"dataframe\">\n",
 83 |        "  <thead>\n",
 84 |        "    <tr style=\"text-align: right;\">\n",
 85 |        "      <th></th>\n",
 86 |        "      <th>Country</th>\n",
 87 |        "      <th>Power Consumption KWH</th>\n",
 88 |        "      <th>Text</th>\n",
 89 |        "    </tr>\n",
 90 |        "  </thead>\n",
 91 |        "  <tbody>\n",
 92 |        "    <tr>\n",
 93 |        "      <th>0</th>\n",
 94 |        "      <td>China</td>\n",
 95 |        "      <td>5.523000e+12</td>\n",
 96 |        "      <td>China 5,523,000,000,000</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>1</th>\n",
100 |        "      <td>United States</td>\n",
101 |        "      <td>3.832000e+12</td>\n",
102 |        "      <td>United 3,832,000,000,000</td>\n",
103 |        "    </tr>\n",
104 |        "    <tr>\n",
105 |        "      <th>2</th>\n",
106 |        "      <td>European</td>\n",
107 |        "      <td>2.771000e+12</td>\n",
108 |        "      <td>European 2,771,000,000,000</td>\n",
109 |        "    </tr>\n",
110 |        "    <tr>\n",
111 |        "      <th>3</th>\n",
112 |        "      <td>Russia</td>\n",
113 |        "      <td>1.065000e+12</td>\n",
114 |        "      <td>Russia 1,065,000,000,000</td>\n",
115 |        "    </tr>\n",
116 |        "    <tr>\n",
117 |        "      <th>4</th>\n",
118 |        "      <td>Japan</td>\n",
119 |        "      <td>9.210000e+11</td>\n",
120 |        "      <td>Japan 921,000,000,000</td>\n",
121 |        "    </tr>\n",
122 |        "  </tbody>\n",
123 |        "</table>\n",
124 |        "</div>"
125 |       ],
126 |       "text/plain": [
127 |        "         Country  Power Consumption KWH                        Text\n",
128 |        "0          China           5.523000e+12     China 5,523,000,000,000\n",
129 |        "1  United States           3.832000e+12    United 3,832,000,000,000\n",
130 |        "2       European           2.771000e+12  European 2,771,000,000,000\n",
131 |        "3         Russia           1.065000e+12    Russia 1,065,000,000,000\n",
132 |        "4          Japan           9.210000e+11       Japan 921,000,000,000"
133 |       ]
134 |      },
135 |      "execution_count": 156,
136 |      "metadata": {},
137 |      "output_type": "execute_result"
138 |     }
139 |    ],
140 |    "source": []
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "** Referencing the lecture notes, create a Choropleth Plot of the Power Consumption for Countries using the data and layout dictionary. **"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "metadata": {
153 |     "collapsed": true
154 |    },
155 |    "outputs": [],
156 |    "source": []
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {
162 |     "collapsed": false
163 |    },
164 |    "outputs": [],
165 |    "source": [
166 |     "choromap = go.Figure(data = [data],layout = layout)\n",
167 |     "iplot(choromap,validate=False)"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "## USA Choropleth\n",
175 |     "\n",
176 |     "** Import the 2012_Election_Data csv file using pandas. **"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 109,
182 |    "metadata": {
183 |     "collapsed": true
184 |    },
185 |    "outputs": [],
186 |    "source": []
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "** Check the head of the DataFrame. **"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 110,
198 |    "metadata": {
199 |     "collapsed": false
200 |    },
201 |    "outputs": [
202 |     {
203 |      "data": {
204 |       "text/html": [
205 |        "<div>\n",
206 |        "<table border=\"1\" class=\"dataframe\">\n",
207 |        "  <thead>\n",
208 |        "    <tr style=\"text-align: right;\">\n",
209 |        "      <th></th>\n",
210 |        "      <th>Year</th>\n",
211 |        "      <th>ICPSR State Code</th>\n",
212 |        "      <th>Alphanumeric State Code</th>\n",
213 |        "      <th>State</th>\n",
214 |        "      <th>VEP Total Ballots Counted</th>\n",
215 |        "      <th>VEP Highest Office</th>\n",
216 |        "      <th>VAP Highest Office</th>\n",
217 |        "      <th>Total Ballots Counted</th>\n",
218 |        "      <th>Highest Office</th>\n",
219 |        "      <th>Voting-Eligible Population (VEP)</th>\n",
220 |        "      <th>Voting-Age Population (VAP)</th>\n",
221 |        "      <th>% Non-citizen</th>\n",
222 |        "      <th>Prison</th>\n",
223 |        "      <th>Probation</th>\n",
224 |        "      <th>Parole</th>\n",
225 |        "      <th>Total Ineligible Felon</th>\n",
226 |        "      <th>State Abv</th>\n",
227 |        "    </tr>\n",
228 |        "  </thead>\n",
229 |        "  <tbody>\n",
230 |        "    <tr>\n",
231 |        "      <th>0</th>\n",
232 |        "      <td>2012</td>\n",
233 |        "      <td>41</td>\n",
234 |        "      <td>1</td>\n",
235 |        "      <td>Alabama</td>\n",
236 |        "      <td>NaN</td>\n",
237 |        "      <td>58.6%</td>\n",
238 |        "      <td>56.0%</td>\n",
239 |        "      <td>NaN</td>\n",
240 |        "      <td>2,074,338</td>\n",
241 |        "      <td>3,539,217</td>\n",
242 |        "      <td>3707440.0</td>\n",
243 |        "      <td>2.6%</td>\n",
244 |        "      <td>32,232</td>\n",
245 |        "      <td>57,993</td>\n",
246 |        "      <td>8,616</td>\n",
247 |        "      <td>71,584</td>\n",
248 |        "      <td>AL</td>\n",
249 |        "    </tr>\n",
250 |        "    <tr>\n",
251 |        "      <th>1</th>\n",
252 |        "      <td>2012</td>\n",
253 |        "      <td>81</td>\n",
254 |        "      <td>2</td>\n",
255 |        "      <td>Alaska</td>\n",
256 |        "      <td>58.9%</td>\n",
257 |        "      <td>58.7%</td>\n",
258 |        "      <td>55.3%</td>\n",
259 |        "      <td>301,694</td>\n",
260 |        "      <td>300,495</td>\n",
261 |        "      <td>511,792</td>\n",
262 |        "      <td>543763.0</td>\n",
263 |        "      <td>3.8%</td>\n",
264 |        "      <td>5,633</td>\n",
265 |        "      <td>7,173</td>\n",
266 |        "      <td>1,882</td>\n",
267 |        "      <td>11,317</td>\n",
268 |        "      <td>AK</td>\n",
269 |        "    </tr>\n",
270 |        "    <tr>\n",
271 |        "      <th>2</th>\n",
272 |        "      <td>2012</td>\n",
273 |        "      <td>61</td>\n",
274 |        "      <td>3</td>\n",
275 |        "      <td>Arizona</td>\n",
276 |        "      <td>53.0%</td>\n",
277 |        "      <td>52.6%</td>\n",
278 |        "      <td>46.5%</td>\n",
279 |        "      <td>2,323,579</td>\n",
280 |        "      <td>2,306,559</td>\n",
281 |        "      <td>4,387,900</td>\n",
282 |        "      <td>4959270.0</td>\n",
283 |        "      <td>9.9%</td>\n",
284 |        "      <td>35,188</td>\n",
285 |        "      <td>72,452</td>\n",
286 |        "      <td>7,460</td>\n",
287 |        "      <td>81,048</td>\n",
288 |        "      <td>AZ</td>\n",
289 |        "    </tr>\n",
290 |        "    <tr>\n",
291 |        "      <th>3</th>\n",
292 |        "      <td>2012</td>\n",
293 |        "      <td>42</td>\n",
294 |        "      <td>4</td>\n",
295 |        "      <td>Arkansas</td>\n",
296 |        "      <td>51.1%</td>\n",
297 |        "      <td>50.7%</td>\n",
298 |        "      <td>47.7%</td>\n",
299 |        "      <td>1,078,548</td>\n",
300 |        "      <td>1,069,468</td>\n",
301 |        "      <td>2,109,847</td>\n",
302 |        "      <td>2242740.0</td>\n",
303 |        "      <td>3.5%</td>\n",
304 |        "      <td>14,471</td>\n",
305 |        "      <td>30,122</td>\n",
306 |        "      <td>23,372</td>\n",
307 |        "      <td>53,808</td>\n",
308 |        "      <td>AR</td>\n",
309 |        "    </tr>\n",
310 |        "    <tr>\n",
311 |        "      <th>4</th>\n",
312 |        "      <td>2012</td>\n",
313 |        "      <td>71</td>\n",
314 |        "      <td>5</td>\n",
315 |        "      <td>California</td>\n",
316 |        "      <td>55.7%</td>\n",
317 |        "      <td>55.1%</td>\n",
318 |        "      <td>45.1%</td>\n",
319 |        "      <td>13,202,158</td>\n",
320 |        "      <td>13,038,547</td>\n",
321 |        "      <td>23,681,837</td>\n",
322 |        "      <td>28913129.0</td>\n",
323 |        "      <td>17.4%</td>\n",
324 |        "      <td>119,455</td>\n",
325 |        "      <td>0</td>\n",
326 |        "      <td>89,287</td>\n",
327 |        "      <td>208,742</td>\n",
328 |        "      <td>CA</td>\n",
329 |        "    </tr>\n",
330 |        "  </tbody>\n",
331 |        "</table>\n",
332 |        "</div>"
333 |       ],
334 |       "text/plain": [
335 |        "   Year  ICPSR State Code  Alphanumeric State Code       State  \\\n",
336 |        "0  2012                41                        1     Alabama   \n",
337 |        "1  2012                81                        2      Alaska   \n",
338 |        "2  2012                61                        3     Arizona   \n",
339 |        "3  2012                42                        4    Arkansas   \n",
340 |        "4  2012                71                        5  California   \n",
341 |        "\n",
342 |        "  VEP Total Ballots Counted VEP Highest Office VAP Highest Office  \\\n",
343 |        "0                       NaN              58.6%              56.0%   \n",
344 |        "1                     58.9%              58.7%              55.3%   \n",
345 |        "2                     53.0%              52.6%              46.5%   \n",
346 |        "3                     51.1%              50.7%              47.7%   \n",
347 |        "4                     55.7%              55.1%              45.1%   \n",
348 |        "\n",
349 |        "  Total Ballots Counted Highest Office Voting-Eligible Population (VEP)  \\\n",
350 |        "0                   NaN      2,074,338                        3,539,217   \n",
351 |        "1               301,694        300,495                          511,792   \n",
352 |        "2             2,323,579      2,306,559                        4,387,900   \n",
353 |        "3             1,078,548      1,069,468                        2,109,847   \n",
354 |        "4            13,202,158     13,038,547                       23,681,837   \n",
355 |        "\n",
356 |        "   Voting-Age Population (VAP) % Non-citizen   Prison Probation  Parole  \\\n",
357 |        "0                    3707440.0          2.6%   32,232    57,993   8,616   \n",
358 |        "1                     543763.0          3.8%    5,633     7,173   1,882   \n",
359 |        "2                    4959270.0          9.9%   35,188    72,452   7,460   \n",
360 |        "3                    2242740.0          3.5%   14,471    30,122  23,372   \n",
361 |        "4                   28913129.0         17.4%  119,455         0  89,287   \n",
362 |        "\n",
363 |        "  Total Ineligible Felon State Abv  \n",
364 |        "0                 71,584        AL  \n",
365 |        "1                 11,317        AK  \n",
366 |        "2                 81,048        AZ  \n",
367 |        "3                 53,808        AR  \n",
368 |        "4                208,742        CA  "
369 |       ]
370 |      },
371 |      "execution_count": 110,
372 |      "metadata": {},
373 |      "output_type": "execute_result"
374 |     }
375 |    ],
376 |    "source": []
377 |   },
378 |   {
379 |    "cell_type": "markdown",
380 |    "metadata": {},
381 |    "source": [
382 |     "** Now create a plot that displays the Voting-Age Population (VAP) per state. If you later want to play around with other columns, make sure you consider their data type. VAP has already been transformed to a float for you. **"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": 120,
388 |    "metadata": {
389 |     "collapsed": false
390 |    },
391 |    "outputs": [],
392 |    "source": []
393 |   },
394 |   {
395 |    "cell_type": "code",
396 |    "execution_count": 121,
397 |    "metadata": {
398 |     "collapsed": true
399 |    },
400 |    "outputs": [],
401 |    "source": []
402 |   },
403 |   {
404 |    "cell_type": "code",
405 |    "execution_count": null,
406 |    "metadata": {
407 |     "collapsed": false
408 |    },
409 |    "outputs": [],
410 |    "source": [
411 |     "choromap = go.Figure(data = [data],layout = layout)\n",
412 |     "iplot(choromap,validate=False)"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "markdown",
417 |    "metadata": {},
418 |    "source": [
419 |     "# Great Job!"
420 |    ]
421 |   }
422 |  ],
423 |  "metadata": {
424 |   "kernelspec": {
425 |    "display_name": "Python 3",
426 |    "language": "python",
427 |    "name": "python3"
428 |   },
429 |   "language_info": {
430 |    "codemirror_mode": {
431 |     "name": "ipython",
432 |     "version": 3
433 |    },
434 |    "file_extension": ".py",
435 |    "mimetype": "text/x-python",
436 |    "name": "python",
437 |    "nbconvert_exporter": "python",
438 |    "pygments_lexer": "ipython3",
439 |    "version": "3.5.1"
440 |   }
441 |  },
442 |  "nbformat": 4,
443 |  "nbformat_minor": 0
444 | }
445 | 


--------------------------------------------------------------------------------
/7. Geographical Plotting/2011_US_AGRI_Exports:
--------------------------------------------------------------------------------
 1 | code,state,category,total exports,beef,pork,poultry,dairy,fruits fresh,fruits proc,total fruits,veggies fresh,veggies proc,total veggies,corn,wheat,cotton,text
 2 | AL,Alabama,state,1390.63,34.4,10.6,481.0,4.06,8.0,17.1,25.11,5.5,8.9,14.33,34.9,70.0,317.61,Alabama<br>Beef 34.4 Dairy 4.06<br>Fruits 25.11 Veggies 14.33<br>Wheat 70.0 Corn 34.9
 3 | AK,Alaska,state,13.31,0.2,0.1,0.0,0.19,0.0,0.0,0.0,0.6,1.0,1.56,0.0,0.0,0.0,Alaska<br>Beef 0.2 Dairy 0.19<br>Fruits 0.0 Veggies 1.56<br>Wheat 0.0 Corn 0.0
 4 | AZ,Arizona,state,1463.17,71.3,17.9,0.0,105.48,19.3,41.0,60.27,147.5,239.4,386.91,7.3,48.7,423.95,Arizona<br>Beef 71.3 Dairy 105.48<br>Fruits 60.27 Veggies 386.91<br>Wheat 48.7 Corn 7.3
 5 | AR,Arkansas,state,3586.02,53.2,29.4,562.9,3.53,2.2,4.7,6.88,4.4,7.1,11.45,69.5,114.5,665.44,Arkansas<br>Beef 53.2 Dairy 3.53<br>Fruits 6.88 Veggies 11.45<br>Wheat 114.5 Corn 69.5
 6 | CA, California, state,16472.88,228.7,11.1,225.4,929.95,2791.8,5944.6,8736.4,803.2,1303.5,2106.79,34.6,249.3,1064.95, California<br>Beef 228.7 Dairy 929.95<br>Fruits 8736.4 Veggies 2106.79<br>Wheat 249.3 Corn 34.6
 7 | CO,Colorado,state,1851.33,261.4,66.0,14.0,71.94,5.7,12.2,17.99,45.1,73.2,118.27,183.2,400.5,0.0,Colorado<br>Beef 261.4 Dairy 71.94<br>Fruits 17.99 Veggies 118.27<br>Wheat 400.5 Corn 183.2
 8 | CT,Connecticut,state,259.62,1.1,0.1,6.9,9.49,4.2,8.9,13.1,4.3,6.9,11.16,0.0,0.0,0.0,Connecticut<br>Beef 1.1 Dairy 9.49<br>Fruits 13.1 Veggies 11.16<br>Wheat 0.0 Corn 0.0
 9 | DE,Delaware,state,282.19,0.4,0.6,114.7,2.3,0.5,1.0,1.53,7.6,12.4,20.03,26.9,22.9,0.0,Delaware<br>Beef 0.4 Dairy 2.3<br>Fruits 1.53 Veggies 20.03<br>Wheat 22.9 Corn 26.9
10 | FL,Florida,state,3764.09,42.6,0.9,56.9,66.31,438.2,933.1,1371.36,171.9,279.0,450.86,3.5,1.8,78.24,Florida<br>Beef 42.6 Dairy 66.31<br>Fruits 1371.36 Veggies 450.86<br>Wheat 1.8 Corn 3.5
11 | GA,Georgia,state,2860.84,31.0,18.9,630.4,38.38,74.6,158.9,233.51,59.0,95.8,154.77,57.8,65.4,1154.07,Georgia<br>Beef 31.0 Dairy 38.38<br>Fruits 233.51 Veggies 154.77<br>Wheat 65.4 Corn 57.8
12 | HI,Hawaii,state,401.84,4.0,0.7,1.3,1.16,17.7,37.8,55.51,9.5,15.4,24.83,0.0,0.0,0.0,Hawaii<br>Beef 4.0 Dairy 1.16<br>Fruits 55.51 Veggies 24.83<br>Wheat 0.0 Corn 0.0
13 | ID,Idaho,state,2078.89,119.8,0.0,2.4,294.6,6.9,14.7,21.64,121.7,197.5,319.19,24.0,568.2,0.0,Idaho<br>Beef 119.8 Dairy 294.6<br>Fruits 21.64 Veggies 319.19<br>Wheat 568.2 Corn 24.0
14 | IL,Illinois,state,8709.48,53.7,394.0,14.0,45.82,4.0,8.5,12.53,15.2,24.7,39.95,2228.5,223.8,0.0,Illinois<br>Beef 53.7 Dairy 45.82<br>Fruits 12.53 Veggies 39.95<br>Wheat 223.8 Corn 2228.5
15 | IN,Indiana,state,5050.23,21.9,341.9,165.6,89.7,4.1,8.8,12.98,14.4,23.4,37.89,1123.2,114.0,0.0,Indiana<br>Beef 21.9 Dairy 89.7<br>Fruits 12.98 Veggies 37.89<br>Wheat 114.0 Corn 1123.2
16 | IA,Iowa,state,11273.76,289.8,1895.6,155.6,107.0,1.0,2.2,3.24,2.7,4.4,7.1,2529.8,3.1,0.0,Iowa<br>Beef 289.8 Dairy 107.0<br>Fruits 3.24 Veggies 7.1<br>Wheat 3.1 Corn 2529.8
17 | KS,Kansas,state,4589.01,659.3,179.4,6.4,65.45,1.0,2.1,3.11,3.6,5.8,9.32,457.3,1426.5,43.98,Kansas<br>Beef 659.3 Dairy 65.45<br>Fruits 3.11 Veggies 9.32<br>Wheat 1426.5 Corn 457.3
18 | KY,Kentucky,state,1889.15,54.8,34.2,151.3,28.27,2.1,4.5,6.6,0.0,0.0,0.0,179.1,149.3,0.0,Kentucky<br>Beef 54.8 Dairy 28.27<br>Fruits 6.6 Veggies 0.0<br>Wheat 149.3 Corn 179.1
19 | LA,Louisiana,state,1914.23,19.8,0.8,77.2,6.02,5.7,12.1,17.83,6.6,10.7,17.25,91.4,78.7,280.42,Louisiana<br>Beef 19.8 Dairy 6.02<br>Fruits 17.83 Veggies 17.25<br>Wheat 78.7 Corn 91.4
20 | ME,Maine,state,278.37,1.4,0.5,10.4,16.18,16.6,35.4,52.01,24.0,38.9,62.9,0.0,0.0,0.0,Maine<br>Beef 1.4 Dairy 16.18<br>Fruits 52.01 Veggies 62.9<br>Wheat 0.0 Corn 0.0
21 | MD,Maryland,state,692.75,5.6,3.1,127.0,24.81,4.1,8.8,12.9,7.8,12.6,20.43,54.1,55.8,0.0,Maryland<br>Beef 5.6 Dairy 24.81<br>Fruits 12.9 Veggies 20.43<br>Wheat 55.8 Corn 54.1
22 | MA,Massachusetts,state,248.65,0.6,0.5,0.6,5.81,25.8,55.0,80.83,8.1,13.1,21.13,0.0,0.0,0.0,Massachusetts<br>Beef 0.6 Dairy 5.81<br>Fruits 80.83 Veggies 21.13<br>Wheat 0.0 Corn 0.0
23 | MI,Michigan,state,3164.16,37.7,118.1,32.6,214.82,82.3,175.3,257.69,72.4,117.5,189.96,381.5,247.0,0.0,Michigan<br>Beef 37.7 Dairy 214.82<br>Fruits 257.69 Veggies 189.96<br>Wheat 247.0 Corn 381.5
24 | MN,Minnesota,state,7192.33,112.3,740.4,189.2,218.05,2.5,5.4,7.91,45.9,74.5,120.37,1264.3,538.1,0.0,Minnesota<br>Beef 112.3 Dairy 218.05<br>Fruits 7.91 Veggies 120.37<br>Wheat 538.1 Corn 1264.3
25 | MS,Mississippi,state,2170.8,12.8,30.4,370.8,5.45,5.4,11.6,17.04,10.6,17.2,27.87,110.0,102.2,494.75,Mississippi<br>Beef 12.8 Dairy 5.45<br>Fruits 17.04 Veggies 27.87<br>Wheat 102.2 Corn 110.0
26 | MO,Missouri,state,3933.42,137.2,277.3,196.1,34.26,4.2,9.0,13.18,6.8,11.1,17.9,428.8,161.7,345.29,Missouri<br>Beef 137.2 Dairy 34.26<br>Fruits 13.18 Veggies 17.9<br>Wheat 161.7 Corn 428.8
27 | MT,Montana,state,1718.0,105.0,16.7,1.7,6.82,1.1,2.2,3.3,17.3,28.0,45.27,5.4,1198.1,0.0,Montana<br>Beef 105.0 Dairy 6.82<br>Fruits 3.3 Veggies 45.27<br>Wheat 1198.1 Corn 5.4
28 | NE,Nebraska,state,7114.13,762.2,262.5,31.4,30.07,0.7,1.5,2.16,20.4,33.1,53.5,1735.9,292.3,0.0,Nebraska<br>Beef 762.2 Dairy 30.07<br>Fruits 2.16 Veggies 53.5<br>Wheat 292.3 Corn 1735.9
29 | NV,Nevada,state,139.89,21.8,0.2,0.0,16.57,0.4,0.8,1.19,10.6,17.3,27.93,0.0,5.4,0.0,Nevada<br>Beef 21.8 Dairy 16.57<br>Fruits 1.19 Veggies 27.93<br>Wheat 5.4 Corn 0.0
30 | NH,New Hampshire,state,73.06,0.6,0.2,0.8,7.46,2.6,5.4,7.98,1.7,2.8,4.5,0.0,0.0,0.0,New Hampshire<br>Beef 0.6 Dairy 7.46<br>Fruits 7.98 Veggies 4.5<br>Wheat 0.0 Corn 0.0
31 | NJ,New Jersey,state,500.4,0.8,0.4,4.6,3.37,35.0,74.5,109.45,21.6,35.0,56.54,10.1,6.7,0.0,New Jersey<br>Beef 0.8 Dairy 3.37<br>Fruits 109.45 Veggies 56.54<br>Wheat 6.7 Corn 10.1
32 | NM,New Mexico,state,751.58,117.2,0.1,0.3,191.01,32.6,69.3,101.9,16.7,27.1,43.88,11.2,13.9,72.62,New Mexico<br>Beef 117.2 Dairy 191.01<br>Fruits 101.9 Veggies 43.88<br>Wheat 13.9 Corn 11.2
33 | NY,New York,state,1488.9,22.2,5.8,17.7,331.8,64.7,137.8,202.56,54.7,88.7,143.37,106.1,29.9,0.0,New York<br>Beef 22.2 Dairy 331.8<br>Fruits 202.56 Veggies 143.37<br>Wheat 29.9 Corn 106.1
34 | NC,North Carolina,state,3806.05,24.8,702.8,598.4,24.9,23.8,50.7,74.47,57.4,93.1,150.45,92.2,200.3,470.86,North Carolina<br>Beef 24.8 Dairy 24.9<br>Fruits 74.47 Veggies 150.45<br>Wheat 200.3 Corn 92.2
35 | ND,North Dakota,state,3761.96,78.5,16.1,0.5,8.14,0.1,0.2,0.25,49.9,80.9,130.79,236.1,1664.5,0.0,North Dakota<br>Beef 78.5 Dairy 8.14<br>Fruits 0.25 Veggies 130.79<br>Wheat 1664.5 Corn 236.1
36 | OH,Ohio,state,3979.79,36.2,199.1,129.9,134.57,8.7,18.5,27.21,20.4,33.1,53.53,535.1,207.4,0.0,Ohio<br>Beef 36.2 Dairy 134.57<br>Fruits 27.21 Veggies 53.53<br>Wheat 207.4 Corn 535.1
37 | OK,Oklahoma,state,1646.41,337.6,265.3,131.1,24.35,3.0,6.3,9.24,3.4,5.5,8.9,27.5,324.8,110.54,Oklahoma<br>Beef 337.6 Dairy 24.35<br>Fruits 9.24 Veggies 8.9<br>Wheat 324.8 Corn 27.5
38 | OR,Oregon,state,1794.57,58.8,1.4,14.2,63.66,100.7,214.4,315.04,48.2,78.3,126.5,11.7,320.3,0.0,Oregon<br>Beef 58.8 Dairy 63.66<br>Fruits 315.04 Veggies 126.5<br>Wheat 320.3 Corn 11.7
39 | PA,Pennsylvania,state,1969.87,50.9,91.3,169.8,280.87,28.6,60.9,89.48,14.6,23.7,38.26,112.1,41.0,0.0,Pennsylvania<br>Beef 50.9 Dairy 280.87<br>Fruits 89.48 Veggies 38.26<br>Wheat 41.0 Corn 112.1
40 | RI,Rhode Island,state,31.59,0.1,0.1,0.2,0.52,0.9,1.9,2.83,1.2,1.9,3.02,0.0,0.0,0.0,Rhode Island<br>Beef 0.1 Dairy 0.52<br>Fruits 2.83 Veggies 3.02<br>Wheat 0.0 Corn 0.0
41 | SC,South Carolina,state,929.93,15.2,10.9,186.5,7.62,17.1,36.4,53.45,16.3,26.4,42.66,32.1,55.3,206.1,South Carolina<br>Beef 15.2 Dairy 7.62<br>Fruits 53.45 Veggies 42.66<br>Wheat 55.3 Corn 32.1
42 | SD,South Dakota,state,3770.19,193.5,160.2,29.3,46.77,0.3,0.5,0.8,1.5,2.5,4.06,643.6,704.5,0.0,South Dakota<br>Beef 193.5 Dairy 46.77<br>Fruits 0.8 Veggies 4.06<br>Wheat 704.5 Corn 643.6
43 | TN,Tennessee,state,1535.13,51.1,17.6,82.4,21.18,2.0,4.2,6.23,9.4,15.3,24.67,88.8,100.0,363.83,Tennessee<br>Beef 51.1 Dairy 21.18<br>Fruits 6.23 Veggies 24.67<br>Wheat 100.0 Corn 88.8
44 | TX,Texas,state,6648.22,961.0,42.7,339.2,240.55,31.9,68.0,99.9,43.9,71.3,115.23,167.2,309.7,2308.76,Texas<br>Beef 961.0 Dairy 240.55<br>Fruits 99.9 Veggies 115.23<br>Wheat 309.7 Corn 167.2
45 | UT,Utah,state,453.39,27.9,59.0,23.1,48.6,3.9,8.4,12.34,2.5,4.1,6.6,5.3,42.8,0.0,Utah<br>Beef 27.9 Dairy 48.6<br>Fruits 12.34 Veggies 6.6<br>Wheat 42.8 Corn 5.3
46 | VT,Vermont,state,180.14,6.2,0.2,0.9,65.98,2.6,5.4,8.01,1.5,2.5,4.05,0.0,0.0,0.0,Vermont<br>Beef 6.2 Dairy 65.98<br>Fruits 8.01 Veggies 4.05<br>Wheat 0.0 Corn 0.0
47 | VA,Virginia,state,1146.48,39.5,16.9,164.7,47.85,11.7,24.8,36.48,10.4,16.9,27.25,39.5,77.5,64.84,Virginia<br>Beef 39.5 Dairy 47.85<br>Fruits 36.48 Veggies 27.25<br>Wheat 77.5 Corn 39.5
48 | WA,Washington,state,3894.81,59.2,0.0,35.6,154.18,555.6,1183.0,1738.57,138.7,225.1,363.79,29.5,786.3,0.0,Washington<br>Beef 59.2 Dairy 154.18<br>Fruits 1738.57 Veggies 363.79<br>Wheat 786.3 Corn 29.5
49 | WV,West Virginia,state,138.89,12.0,0.3,45.4,3.9,3.7,7.9,11.54,0.0,0.0,0.0,3.5,1.6,0.0,West Virginia<br>Beef 12.0 Dairy 3.9<br>Fruits 11.54 Veggies 0.0<br>Wheat 1.6 Corn 3.5
50 | WI,Wisconsin,state,3090.23,107.3,38.6,34.5,633.6,42.8,91.0,133.8,56.8,92.2,148.99,460.5,96.7,0.0,Wisconsin<br>Beef 107.3 Dairy 633.6<br>Fruits 133.8 Veggies 148.99<br>Wheat 96.7 Corn 460.5
51 | WY,Wyoming,state,349.69,75.1,33.2,0.1,2.89,0.1,0.1,0.17,3.9,6.3,10.23,9.0,20.7,0.0,Wyoming<br>Beef 75.1 Dairy 2.89<br>Fruits 0.17 Veggies 10.23<br>Wheat 20.7 Corn 9.0
52 | 


--------------------------------------------------------------------------------
/7. Geographical Plotting/2012_Election_Data:
--------------------------------------------------------------------------------
 1 | Year,ICPSR State Code,Alphanumeric State Code,State,VEP Total Ballots Counted,VEP Highest Office,VAP Highest Office,Total Ballots Counted,Highest Office,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,State Abv
 2 | 2012,41,1,Alabama,,58.6%,56.0%,,"2,074,338","3,539,217",3707440.0,2.6%,"32,232","57,993","8,616","71,584",AL
 3 | 2012,81,2,Alaska,58.9%,58.7%,55.3%,"301,694","300,495","511,792",543763.0,3.8%,"5,633","7,173","1,882","11,317",AK
 4 | 2012,61,3,Arizona,53.0%,52.6%,46.5%,"2,323,579","2,306,559","4,387,900",4959270.0,9.9%,"35,188","72,452","7,460","81,048",AZ
 5 | 2012,42,4,Arkansas,51.1%,50.7%,47.7%,"1,078,548","1,069,468","2,109,847",2242740.0,3.5%,"14,471","30,122","23,372","53,808",AR
 6 | 2012,71,5,California,55.7%,55.1%,45.1%,"13,202,158","13,038,547","23,681,837",28913129.0,17.4%,"119,455",0,"89,287","208,742",CA
 7 | 2012,62,6,Colorado,70.6%,69.9%,64.5%,"2,596,173","2,569,522","3,675,871",3981208.0,6.9%,"18,807",0,"11,458","30,265",CO
 8 | 2012,1,7,Connecticut,61.4%,61.3%,55.6%,"1,560,640","1,558,960","2,543,202",2801375.0,8.5%,"16,935",0,"2,793","19,728",CT
 9 | 2012,11,8,Delaware,,62.3%,57.8%,,"413,921","663,967",715708.0,5.1%,"6,610","15,641",601,"15,501",DE
10 | 2012,55,9,District of Columbia,61.6%,61.5%,55.5%,"294,254","293,764","477,582",528848.0,9.7%,0,0,0,0,District of Columbia
11 | 2012,43,10,Florida,63.3%,62.8%,55.1%,"8,538,264","8,474,179","13,495,057",15380947.0,10.8%,"91,954","240,869","4,538","224,153",FL
12 | 2012,44,11,Georgia,59.3%,59.0%,52.3%,"3,919,355","3,900,050","6,606,607",7452696.0,7.2%,"52,737","442,061","24,761","311,790",GA
13 | 2012,82,12,Hawaii,44.5%,44.2%,39.9%,"437,159","434,697","982,902",1088335.0,9.2%,"5,544",0,0,"5,544",HI
14 | 2012,63,13,Idaho,61.0%,59.8%,55.6%,"666,290","652,274","1,091,410",1173727.0,4.6%,"7,985","31,606","3,848","28,584",ID
15 | 2012,21,14,Illinois,59.3%,58.9%,53.3%,"5,279,752","5,242,014","8,899,143",9827043.0,8.9%,"49,348",0,0,"49,348",IL
16 | 2012,22,15,Indiana,56.0%,55.2%,52.9%,"2,663,368","2,624,534","4,755,291",4960376.0,3.6%,"28,266",0,0,"28,266",IN
17 | 2012,31,16,Iowa,70.6%,70.3%,67.1%,"1,589,951","1,582,180","2,251,748",2356209.0,3.2%,"8,470","29,333","5,151","29,167",IA
18 | 2012,32,17,Kansas,58.2%,56.9%,53.5%,"1,182,771","1,156,254","2,030,686",2162442.0,5.0%,"9,346","17,021","5,126","23,493",KS
19 | 2012,51,18,Kentucky,56.2%,55.7%,53.4%,"1,815,843","1,797,212","3,229,185",3368684.0,2.2%,"21,863","54,511","14,419","65,173",KY
20 | 2012,45,19,Louisiana,60.8%,60.2%,57.0%,"2,014,548","1,994,065","3,311,626",3495847.0,2.7%,"40,047","41,298","28,946","90,881",LA
21 | 2012,2,20,Maine,69.3%,68.2%,67.0%,"724,758","713,180","1,046,008",1064779.0,1.8%,0,0,0,0,ME
22 | 2012,52,21,Maryland,67.3%,66.6%,59.5%,"2,734,062","2,707,327","4,063,582",4553853.0,8.9%,"20,871","96,640","13,195","85,285",MD
23 | 2012,3,22,Massachusetts,66.2%,65.9%,60.2%,"3,184,196","3,167,767","4,809,675",5263550.0,8.4%,"10,283",0,0,"10,283",MA
24 | 2012,23,23,Michigan,65.4%,64.7%,62.0%,"4,780,701","4,730,961","7,312,725",7625576.0,3.5%,"43,019",0,0,"43,019",MI
25 | 2012,33,24,Minnesota,76.4%,76.0%,71.4%,"2,950,780","2,936,561","3,861,598",4114820.0,4.4%,"9,383","108,157","6,006","72,712",MN
26 | 2012,46,25,Mississippi,,59.3%,57.2%,,"1,285,584","2,166,825",2246931.0,1.5%,"22,305","30,768","6,804","45,416",MS
27 | 2012,34,26,Missouri,,62.2%,59.6%,,"2,757,323","4,432,957",4628500.0,2.5%,"30,714","55,470","20,672","80,785",MO
28 | 2012,64,27,Montana,63.5%,62.5%,61.6%,"491,966","484,048","774,476",785454.0,0.9%,"3,592",0,0,"3,592",MT
29 | 2012,35,28,Nebraska,61.1%,60.3%,56.9%,"804,245","794,379","1,316,915",1396507.0,4.7%,"4,466","14,260","1,383","13,407",NE
30 | 2012,65,29,Nevada,56.5%,56.4%,48.2%,"1,016,664","1,014,918","1,800,969",2105976.0,13.3%,"12,883","11,321","5,379","24,262",NV
31 | 2012,4,30,New Hampshire,70.9%,70.2%,67.8%,"718,700","710,972","1,013,420",1047978.0,3.0%,"2,672",0,0,"2,672",NH
32 | 2012,12,31,New Jersey,62.2%,61.5%,53.2%,"3,683,638","3,640,292","5,918,182",6847503.0,12.1%,"21,759","114,886","14,987","97,636",NJ
33 | 2012,66,32,New Mexico,54.8%,54.6%,49.8%,"786,522","783,757","1,436,363",1573400.0,7.3%,"6,553","21,381","5,078","22,963",NM
34 | 2012,13,33,New York,53.5%,53.1%,46.1%,"7,128,852","7,074,723","13,324,107",15344671.0,12.5%,"49,889",0,"46,222","96,111",NY
35 | 2012,47,34,North Carolina,65.4%,64.8%,60.1%,"4,542,488","4,505,372","6,947,954",7496980.0,6.1%,"35,567","96,070","4,359","90,843",NC
36 | 2012,36,35,North Dakota,60.4%,59.8%,58.7%,"325,564","322,627","539,164",549955.0,1.7%,"1,500",0,0,"1,500",ND
37 | 2012,24,36,Ohio,65.1%,64.5%,62.7%,"5,632,423","5,580,822","8,649,495",8896930.0,2.2%,"50,313",0,0,"50,313",OH
38 | 2012,53,37,Oklahoma,,49.2%,46.3%,,"1,334,872","2,713,268",2885093.0,4.5%,"25,225","25,506","2,310","41,053",OK
39 | 2012,72,38,Oregon,64.2%,63.1%,58.7%,"1,820,507","1,789,270","2,836,101",3050747.0,6.6%,"13,607",0,0,"13,607",OR
40 | 2012,14,39,Pennsylvania,,59.5%,57.2%,,"5,742,040","9,651,432",10037099.0,3.3%,"50,054",0,0,"50,054",PA
41 | 2012,5,40,Rhode Island,,58.0%,53.4%,,"446,049","768,918",834983.0,7.5%,"3,249",0,0,"3,249",RI
42 | 2012,48,41,South Carolina,56.8%,56.3%,53.6%,"1,981,516","1,964,118","3,486,838",3662322.0,3.5%,"21,895","34,945","6,116","46,532",SC
43 | 2012,37,42,South Dakota,60.1%,59.3%,57.6%,"368,270","363,815","613,190",631472.0,1.9%,"3,574",0,"2,761","6,335",SD
44 | 2012,54,43,Tennessee,52.3%,51.9%,49.4%,"2,478,870","2,458,577","4,736,084",4976284.0,3.3%,"28,135","64,430","13,138","75,421",TN
45 | 2012,49,44,Texas,,49.6%,41.7%,,"7,993,851","16,119,973",19185395.0,13.5%,"157,564","405,473","112,288","484,753",TX
46 | 2012,67,45,Utah,56.1%,55.5%,51.4%,"1,028,786","1,017,440","1,833,339",1978956.0,7.0%,"6,611",0,0,"6,611",UT
47 | 2012,6,46,Vermont,61.2%,60.7%,59.6%,"301,793","299,290","493,355",502242.0,1.8%,0,0,0,0,VT
48 | 2012,40,47,Virginia,66.6%,66.1%,60.7%,"3,888,186","3,854,489","5,834,676",6348827.0,7.1%,"36,425","52,956","1,983","66,475",VA
49 | 2012,73,48,Washington,65.8%,64.8%,58.6%,"3,172,939","3,125,516","4,822,060",5329782.0,8.2%,"16,355","88,339","8,895","72,070",WA
50 | 2012,56,49,West Virginia,,46.3%,45.5%,,"670,438","1,447,066",1472642.0,0.8%,"7,052","8,573","2,052","13,648",WV
51 | 2012,25,50,Wisconsin,,72.9%,69.5%,,"3,068,434","4,209,370",4417273.0,3.2%,"21,987","46,328","20,023","66,564",WI
52 | 2012,68,51,Wyoming,59.0%,58.6%,56.4%,"250,701","249,061","425,142",441726.0,2.5%,"2,163","5,162",762,"5,661",WY
53 | 


--------------------------------------------------------------------------------
/7. Geographical Plotting/2014_World_GDP:
--------------------------------------------------------------------------------
  1 | COUNTRY,GDP (BILLIONS),CODE
  2 | Afghanistan,21.71,AFG
  3 | Albania,13.4,ALB
  4 | Algeria,227.8,DZA
  5 | American Samoa,0.75,ASM
  6 | Andorra,4.8,AND
  7 | Angola,131.4,AGO
  8 | Anguilla,0.18,AIA
  9 | Antigua and Barbuda,1.24,ATG
 10 | Argentina,536.2,ARG
 11 | Armenia,10.88,ARM
 12 | Aruba,2.52,ABW
 13 | Australia,1483.0,AUS
 14 | Austria,436.1,AUT
 15 | Azerbaijan,77.91,AZE
 16 | "Bahamas, The",8.65,BHM
 17 | Bahrain,34.05,BHR
 18 | Bangladesh,186.6,BGD
 19 | Barbados,4.28,BRB
 20 | Belarus,75.25,BLR
 21 | Belgium,527.8,BEL
 22 | Belize,1.67,BLZ
 23 | Benin,9.24,BEN
 24 | Bermuda,5.2,BMU
 25 | Bhutan,2.09,BTN
 26 | Bolivia,34.08,BOL
 27 | Bosnia and Herzegovina,19.55,BIH
 28 | Botswana,16.3,BWA
 29 | Brazil,2244.0,BRA
 30 | British Virgin Islands,1.1,VGB
 31 | Brunei,17.43,BRN
 32 | Bulgaria,55.08,BGR
 33 | Burkina Faso,13.38,BFA
 34 | Burma,65.29,MMR
 35 | Burundi,3.04,BDI
 36 | Cabo Verde,1.98,CPV
 37 | Cambodia,16.9,KHM
 38 | Cameroon,32.16,CMR
 39 | Canada,1794.0,CAN
 40 | Cayman Islands,2.25,CYM
 41 | Central African Republic,1.73,CAF
 42 | Chad,15.84,TCD
 43 | Chile,264.1,CHL
 44 | China,10360.0,CHN
 45 | Colombia,400.1,COL
 46 | Comoros,0.72,COM
 47 | "Congo, Democratic Republic of the",32.67,COD
 48 | "Congo, Republic of the",14.11,COG
 49 | Cook Islands,0.18,COK
 50 | Costa Rica,50.46,CRI
 51 | Cote d'Ivoire,33.96,CIV
 52 | Croatia,57.18,HRV
 53 | Cuba,77.15,CUB
 54 | Curacao,5.6,CUW
 55 | Cyprus,21.34,CYP
 56 | Czech Republic,205.6,CZE
 57 | Denmark,347.2,DNK
 58 | Djibouti,1.58,DJI
 59 | Dominica,0.51,DMA
 60 | Dominican Republic,64.05,DOM
 61 | Ecuador,100.5,ECU
 62 | Egypt,284.9,EGY
 63 | El Salvador,25.14,SLV
 64 | Equatorial Guinea,15.4,GNQ
 65 | Eritrea,3.87,ERI
 66 | Estonia,26.36,EST
 67 | Ethiopia,49.86,ETH
 68 | Falkland Islands (Islas Malvinas),0.16,FLK
 69 | Faroe Islands,2.32,FRO
 70 | Fiji,4.17,FJI
 71 | Finland,276.3,FIN
 72 | France,2902.0,FRA
 73 | French Polynesia,7.15,PYF
 74 | Gabon,20.68,GAB
 75 | "Gambia, The",0.92,GMB
 76 | Georgia,16.13,GEO
 77 | Germany,3820.0,DEU
 78 | Ghana,35.48,GHA
 79 | Gibraltar,1.85,GIB
 80 | Greece,246.4,GRC
 81 | Greenland,2.16,GRL
 82 | Grenada,0.84,GRD
 83 | Guam,4.6,GUM
 84 | Guatemala,58.3,GTM
 85 | Guernsey,2.74,GGY
 86 | Guinea-Bissau,1.04,GNB
 87 | Guinea,6.77,GIN
 88 | Guyana,3.14,GUY
 89 | Haiti,8.92,HTI
 90 | Honduras,19.37,HND
 91 | Hong Kong,292.7,HKG
 92 | Hungary,129.7,HUN
 93 | Iceland,16.2,ISL
 94 | India,2048.0,IND
 95 | Indonesia,856.1,IDN
 96 | Iran,402.7,IRN
 97 | Iraq,232.2,IRQ
 98 | Ireland,245.8,IRL
 99 | Isle of Man,4.08,IMN
100 | Israel,305.0,ISR
101 | Italy,2129.0,ITA
102 | Jamaica,13.92,JAM
103 | Japan,4770.0,JPN
104 | Jersey,5.77,JEY
105 | Jordan,36.55,JOR
106 | Kazakhstan,225.6,KAZ
107 | Kenya,62.72,KEN
108 | Kiribati,0.16,KIR
109 | "Korea, North",28.0,KOR
110 | "Korea, South",1410.0,PRK
111 | Kosovo,5.99,KSV
112 | Kuwait,179.3,KWT
113 | Kyrgyzstan,7.65,KGZ
114 | Laos,11.71,LAO
115 | Latvia,32.82,LVA
116 | Lebanon,47.5,LBN
117 | Lesotho,2.46,LSO
118 | Liberia,2.07,LBR
119 | Libya,49.34,LBY
120 | Liechtenstein,5.11,LIE
121 | Lithuania,48.72,LTU
122 | Luxembourg,63.93,LUX
123 | Macau,51.68,MAC
124 | Macedonia,10.92,MKD
125 | Madagascar,11.19,MDG
126 | Malawi,4.41,MWI
127 | Malaysia,336.9,MYS
128 | Maldives,2.41,MDV
129 | Mali,12.04,MLI
130 | Malta,10.57,MLT
131 | Marshall Islands,0.18,MHL
132 | Mauritania,4.29,MRT
133 | Mauritius,12.72,MUS
134 | Mexico,1296.0,MEX
135 | "Micronesia, Federated States of",0.34,FSM
136 | Moldova,7.74,MDA
137 | Monaco,6.06,MCO
138 | Mongolia,11.73,MNG
139 | Montenegro,4.66,MNE
140 | Morocco,112.6,MAR
141 | Mozambique,16.59,MOZ
142 | Namibia,13.11,NAM
143 | Nepal,19.64,NPL
144 | Netherlands,880.4,NLD
145 | New Caledonia,11.1,NCL
146 | New Zealand,201.0,NZL
147 | Nicaragua,11.85,NIC
148 | Nigeria,594.3,NGA
149 | Niger,8.29,NER
150 | Niue,0.01,NIU
151 | Northern Mariana Islands,1.23,MNP
152 | Norway,511.6,NOR
153 | Oman,80.54,OMN
154 | Pakistan,237.5,PAK
155 | Palau,0.65,PLW
156 | Panama,44.69,PAN
157 | Papua New Guinea,16.1,PNG
158 | Paraguay,31.3,PRY
159 | Peru,208.2,PER
160 | Philippines,284.6,PHL
161 | Poland,552.2,POL
162 | Portugal,228.2,PRT
163 | Puerto Rico,93.52,PRI
164 | Qatar,212.0,QAT
165 | Romania,199.0,ROU
166 | Russia,2057.0,RUS
167 | Rwanda,8.0,RWA
168 | Saint Kitts and Nevis,0.81,KNA
169 | Saint Lucia,1.35,LCA
170 | Saint Martin,0.56,MAF
171 | Saint Pierre and Miquelon,0.22,SPM
172 | Saint Vincent and the Grenadines,0.75,VCT
173 | Samoa,0.83,WSM
174 | San Marino,1.86,SMR
175 | Sao Tome and Principe,0.36,STP
176 | Saudi Arabia,777.9,SAU
177 | Senegal,15.88,SEN
178 | Serbia,42.65,SRB
179 | Seychelles,1.47,SYC
180 | Sierra Leone,5.41,SLE
181 | Singapore,307.9,SGP
182 | Sint Maarten,304.1,SXM
183 | Slovakia,99.75,SVK
184 | Slovenia,49.93,SVN
185 | Solomon Islands,1.16,SLB
186 | Somalia,2.37,SOM
187 | South Africa,341.2,ZAF
188 | South Sudan,11.89,SSD
189 | Spain,1400.0,ESP
190 | Sri Lanka,71.57,LKA
191 | Sudan,70.03,SDN
192 | Suriname,5.27,SUR
193 | Swaziland,3.84,SWZ
194 | Sweden,559.1,SWE
195 | Switzerland,679.0,CHE
196 | Syria,64.7,SYR
197 | Taiwan,529.5,TWN
198 | Tajikistan,9.16,TJK
199 | Tanzania,36.62,TZA
200 | Thailand,373.8,THA
201 | Timor-Leste,4.51,TLS
202 | Togo,4.84,TGO
203 | Tonga,0.49,TON
204 | Trinidad and Tobago,29.63,TTO
205 | Tunisia,49.12,TUN
206 | Turkey,813.3,TUR
207 | Turkmenistan,43.5,TKM
208 | Tuvalu,0.04,TUV
209 | Uganda,26.09,UGA
210 | Ukraine,134.9,UKR
211 | United Arab Emirates,416.4,ARE
212 | United Kingdom,2848.0,GBR
213 | United States,17420.0,USA
214 | Uruguay,55.6,URY
215 | Uzbekistan,63.08,UZB
216 | Vanuatu,0.82,VUT
217 | Venezuela,209.2,VEN
218 | Vietnam,187.8,VNM
219 | Virgin Islands,5.08,VGB
220 | West Bank,6.64,WBG
221 | Yemen,45.45,YEM
222 | Zambia,25.61,ZMB
223 | Zimbabwe,13.74,ZWE
224 | 


--------------------------------------------------------------------------------
/7. Geographical Plotting/2014_World_Power_Consumption:
--------------------------------------------------------------------------------
  1 | Country,Power Consumption KWH,Text
  2 | China,5523000000000.0,"China 5,523,000,000,000"
  3 | United States,3832000000000.0,"United 3,832,000,000,000"
  4 | European,2771000000000.0,"European 2,771,000,000,000"
  5 | Russia,1065000000000.0,"Russia 1,065,000,000,000"
  6 | Japan,921000000000.0,"Japan 921,000,000,000"
  7 | India,864700000000.0,"India 864,700,000,000"
  8 | Germany,540100000000.0,"Germany 540,100,000,000"
  9 | Canada,511000000000.0,"Canada 511,000,000,000"
 10 | Brazil,483500000000.0,"Brazil 483,500,000,000"
 11 | "Korea,",482400000000.0,"Korea, 482,400,000,000"
 12 | France,451100000000.0,"France 451,100,000,000"
 13 | United Kingdom,319100000000.0,"United 319,100,000,000"
 14 | Italy,303100000000.0,"Italy 303,100,000,000"
 15 | Taiwan,249500000000.0,"Taiwan 249,500,000,000"
 16 | Spain,243100000000.0,"Spain 243,100,000,000"
 17 | Mexico,234000000000.0,"Mexico 234,000,000,000"
 18 | Saudi,231600000000.0,"Saudi 231,600,000,000"
 19 | Australia,222600000000.0,"Australia 222,600,000,000"
 20 | South,211600000000.0,"South 211,600,000,000"
 21 | Turkey,197000000000.0,"Turkey 197,000,000,000"
 22 | Iran,195300000000.0,"Iran 195,300,000,000"
 23 | Indonesia,167500000000.0,"Indonesia 167,500,000,000"
 24 | Ukraine,159800000000.0,"Ukraine 159,800,000,000"
 25 | Thailand,155900000000.0,"Thailand 155,900,000,000"
 26 | Poland,139000000000.0,"Poland 139,000,000,000"
 27 | Egypt,135600000000.0,"Egypt 135,600,000,000"
 28 | Sweden,130500000000.0,"Sweden 130,500,000,000"
 29 | Norway,126400000000.0,"Norway 126,400,000,000"
 30 | Malaysia,118500000000.0,"Malaysia 118,500,000,000"
 31 | Argentina,117100000000.0,"Argentina 117,100,000,000"
 32 | Netherlands,116800000000.0,"Netherlands 116,800,000,000"
 33 | Vietnam,108300000000.0,"Vietnam 108,300,000,000"
 34 | Venezuela,97690000000.0,"Venezuela 97,690,000,000"
 35 | United Arab Emirates,93280000000.0,"United 93,280,000,000"
 36 | Finland,82040000000.0,"Finland 82,040,000,000"
 37 | Belgium,81890000000.0,"Belgium 81,890,000,000"
 38 | Kazakhstan,80290000000.0,"Kazakhstan 80,290,000,000"
 39 | Pakistan,78890000000.0,"Pakistan 78,890,000,000"
 40 | Philippines,75270000000.0,"Philippines 75,270,000,000"
 41 | Austria,69750000000.0,"Austria 69,750,000,000"
 42 | Chile,63390000000.0,"Chile 63,390,000,000"
 43 | Czechia,60550000000.0,"Czechia 60,550,000,000"
 44 | Israel,59830000000.0,"Israel 59,830,000,000"
 45 | Switzerland,58010000000.0,"Switzerland 58,010,000,000"
 46 | Greece,57730000000.0,"Greece 57,730,000,000"
 47 | Iraq,53410000000.0,"Iraq 53,410,000,000"
 48 | Romania,50730000000.0,"Romania 50,730,000,000"
 49 | Kuwait,50000000000.0,"Kuwait 50,000,000,000"
 50 | Colombia,49380000000.0,"Colombia 49,380,000,000"
 51 | Singapore,47180000000.0,"Singapore 47,180,000,000"
 52 | Portugal,46250000000.0,"Portugal 46,250,000,000"
 53 | Uzbekistan,45210000000.0,"Uzbekistan 45,210,000,000"
 54 | Hong,44210000000.0,"Hong 44,210,000,000"
 55 | Algeria,42870000000.0,"Algeria 42,870,000,000"
 56 | Bangladesh,41520000000.0,"Bangladesh 41,520,000,000"
 57 | New,40300000000.0,"New 40,300,000,000"
 58 | Bulgaria,37990000000.0,"Bulgaria 37,990,000,000"
 59 | Belarus,37880000000.0,"Belarus 37,880,000,000"
 60 | Peru,35690000000.0,"Peru 35,690,000,000"
 61 | Denmark,31960000000.0,"Denmark 31,960,000,000"
 62 | Qatar,30530000000.0,"Qatar 30,530,000,000"
 63 | Slovakia,28360000000.0,"Slovakia 28,360,000,000"
 64 | Libya,27540000000.0,"Libya 27,540,000,000"
 65 | Serbia,26910000000.0,"Serbia 26,910,000,000"
 66 | Morocco,26700000000.0,"Morocco 26,700,000,000"
 67 | Syria,25700000000.0,"Syria 25,700,000,000"
 68 | Nigeria,24780000000.0,"Nigeria 24,780,000,000"
 69 | Ireland,24240000000.0,"Ireland 24,240,000,000"
 70 | Hungary,21550000000.0,"Hungary 21,550,000,000"
 71 | Oman,20360000000.0,"Oman 20,360,000,000"
 72 | Ecuador,19020000000.0,"Ecuador 19,020,000,000"
 73 | Puerto,18620000000.0,"Puerto 18,620,000,000"
 74 | Azerbaijan,17790000000.0,"Azerbaijan 17,790,000,000"
 75 | Croatia,16970000000.0,"Croatia 16,970,000,000"
 76 | Iceland,16940000000.0,"Iceland 16,940,000,000"
 77 | Cuba,16200000000.0,"Cuba 16,200,000,000"
 78 | "Korea,",16000000000.0,"Korea, 16,000,000,000"
 79 | Dominican,15140000000.0,"Dominican 15,140,000,000"
 80 | Jordan,14560000000.0,"Jordan 14,560,000,000"
 81 | Tajikistan,14420000000.0,"Tajikistan 14,420,000,000"
 82 | Tunisia,13310000000.0,"Tunisia 13,310,000,000"
 83 | Slovenia,13020000000.0,"Slovenia 13,020,000,000"
 84 | Lebanon,12940000000.0,"Lebanon 12,940,000,000"
 85 | Bosnia,12560000000.0,"Bosnia 12,560,000,000"
 86 | Turkmenistan,11750000000.0,"Turkmenistan 11,750,000,000"
 87 | Bahrain,11690000000.0,"Bahrain 11,690,000,000"
 88 | Mozambique,11280000000.0,"Mozambique 11,280,000,000"
 89 | Ghana,10580000000.0,"Ghana 10,580,000,000"
 90 | Sri,10170000000.0,"Sri 10,170,000,000"
 91 | Kyrgyzstan,9943000000.0,"Kyrgyzstan 9,943,000,000"
 92 | Lithuania,9664000000.0,"Lithuania 9,664,000,000"
 93 | Uruguay,9559000000.0,"Uruguay 9,559,000,000"
 94 | Costa,8987000000.0,"Costa 8,987,000,000"
 95 | Guatemala,8915000000.0,"Guatemala 8,915,000,000"
 96 | Georgia,8468000000.0,"Georgia 8,468,000,000"
 97 | Trinidad,8365000000.0,"Trinidad 8,365,000,000"
 98 | Zambia,8327000000.0,"Zambia 8,327,000,000"
 99 | Paraguay,8125000000.0,"Paraguay 8,125,000,000"
100 | Albania,7793000000.0,"Albania 7,793,000,000"
101 | Burma,7765000000.0,"Burma 7,765,000,000"
102 | Estonia,7417000000.0,"Estonia 7,417,000,000"
103 | "Congo,",7292000000.0,"Congo, 7,292,000,000"
104 | Panama,7144000000.0,"Panama 7,144,000,000"
105 | Latvia,7141000000.0,"Latvia 7,141,000,000"
106 | Macedonia,6960000000.0,"Macedonia 6,960,000,000"
107 | Zimbabwe,6831000000.0,"Zimbabwe 6,831,000,000"
108 | Kenya,6627000000.0,"Kenya 6,627,000,000"
109 | Bolivia,6456000000.0,"Bolivia 6,456,000,000"
110 | Luxembourg,6108000000.0,"Luxembourg 6,108,000,000"
111 | Sudan,5665000000.0,"Sudan 5,665,000,000"
112 | El,5665000000.0,"El 5,665,000,000"
113 | Cameroon,5535000000.0,"Cameroon 5,535,000,000"
114 | West,5312000000.0,"West 5,312,000,000"
115 | Ethiopia,5227000000.0,"Ethiopia 5,227,000,000"
116 | Armenia,5043000000.0,"Armenia 5,043,000,000"
117 | Honduras,5036000000.0,"Honduras 5,036,000,000"
118 | Angola,4842000000.0,"Angola 4,842,000,000"
119 | Cote,4731000000.0,"Cote 4,731,000,000"
120 | Tanzania,4545000000.0,"Tanzania 4,545,000,000"
121 | Nicaragua,4412000000.0,"Nicaragua 4,412,000,000"
122 | Moldova,4305000000.0,"Moldova 4,305,000,000"
123 | Cyprus,4296000000.0,"Cyprus 4,296,000,000"
124 | Macau,4291000000.0,"Macau 4,291,000,000"
125 | Namibia,4238000000.0,"Namibia 4,238,000,000"
126 | Mongolia,4204000000.0,"Mongolia 4,204,000,000"
127 | Afghanistan,3893000000.0,"Afghanistan 3,893,000,000"
128 | Yemen,3838000000.0,"Yemen 3,838,000,000"
129 | Brunei,3766000000.0,"Brunei 3,766,000,000"
130 | Cambodia,3553000000.0,"Cambodia 3,553,000,000"
131 | Montenegro,3465000000.0,"Montenegro 3,465,000,000"
132 | Nepal,3239000000.0,"Nepal 3,239,000,000"
133 | Botswana,3213000000.0,"Botswana 3,213,000,000"
134 | Papua,3116000000.0,"Papua 3,116,000,000"
135 | Jamaica,3008000000.0,"Jamaica 3,008,000,000"
136 | Kosovo,2887000000.0,"Kosovo 2,887,000,000"
137 | Laos,2874000000.0,"Laos 2,874,000,000"
138 | Uganda,2821000000.0,"Uganda 2,821,000,000"
139 | New,2716000000.0,"New 2,716,000,000"
140 | Mauritius,2658000000.0,"Mauritius 2,658,000,000"
141 | Senegal,2586000000.0,"Senegal 2,586,000,000"
142 | Bhutan,2085000000.0,"Bhutan 2,085,000,000"
143 | Malawi,2027000000.0,"Malawi 2,027,000,000"
144 | Madagascar,1883000000.0,"Madagascar 1,883,000,000"
145 | "Bahamas,",1716000000.0,"Bahamas, 1,716,000,000"
146 | Gabon,1680000000.0,"Gabon 1,680,000,000"
147 | Suriname,1572000000.0,"Suriname 1,572,000,000"
148 | Guam,1566000000.0,"Guam 1,566,000,000"
149 | Liechtenstein,1360000000.0,"Liechtenstein 1,360,000,000"
150 | Swaziland,1295000000.0,"Swaziland 1,295,000,000"
151 | Burkina,985500000.0,"Burkina 985,500,000"
152 | Togo,976000000.0,"Togo 976,000,000"
153 | Curacao,968000000.0,"Curacao 968,000,000"
154 | Mauritania,962600000.0,"Mauritania 962,600,000"
155 | Barbados,938000000.0,"Barbados 938,000,000"
156 | Niger,930200000.0,"Niger 930,200,000"
157 | Aruba,920700000.0,"Aruba 920,700,000"
158 | Benin,911000000.0,"Benin 911,000,000"
159 | Guinea,903000000.0,"Guinea 903,000,000"
160 | Mali,882600000.0,"Mali 882,600,000"
161 | Fiji,777600000.0,"Fiji 777,600,000"
162 | "Congo,",740000000.0,"Congo, 740,000,000"
163 | Virgin,723500000.0,"Virgin 723,500,000"
164 | Lesotho,707000000.0,"Lesotho 707,000,000"
165 | South,694100000.0,"South 694,100,000"
166 | Bermuda,664200000.0,"Bermuda 664,200,000"
167 | French,652900000.0,"French 652,900,000"
168 | Jersey,630100000.0,"Jersey 630,100,000"
169 | Belize,605000000.0,"Belize 605,000,000"
170 | Andorra,562400000.0,"Andorra 562,400,000"
171 | Guyana,558000000.0,"Guyana 558,000,000"
172 | Cayman,545900000.0,"Cayman 545,900,000"
173 | Haiti,452000000.0,"Haiti 452,000,000"
174 | Rwanda,365500000.0,"Rwanda 365,500,000"
175 | Saint,336400000.0,"Saint 336,400,000"
176 | Djibouti,311600000.0,"Djibouti 311,600,000"
177 | Seychelles,293900000.0,"Seychelles 293,900,000"
178 | Somalia,293000000.0,"Somalia 293,000,000"
179 | Antigua,293000000.0,"Antigua 293,000,000"
180 | Greenland,292000000.0,"Greenland 292,000,000"
181 | Cabo,285500000.0,"Cabo 285,500,000"
182 | Eritrea,284000000.0,"Eritrea 284,000,000"
183 | Burundi,282900000.0,"Burundi 282,900,000"
184 | Liberia,276900000.0,"Liberia 276,900,000"
185 | Maldives,267100000.0,"Maldives 267,100,000"
186 | Faroe,261300000.0,"Faroe 261,300,000"
187 | "Gambia,",218600000.0,"Gambia, 218,600,000"
188 | Chad,190700000.0,"Chad 190,700,000"
189 | "Micronesia,",178600000.0,"Micronesia, 178,600,000"
190 | Grenada,178000000.0,"Grenada 178,000,000"
191 | Central,168300000.0,"Central 168,300,000"
192 | Turks,167400000.0,"Turks 167,400,000"
193 | Gibraltar,160000000.0,"Gibraltar 160,000,000"
194 | American,146000000.0,"American 146,000,000"
195 | Sierra,134900000.0,"Sierra 134,900,000"
196 | Saint,130200000.0,"Saint 130,200,000"
197 | Saint,127400000.0,"Saint 127,400,000"
198 | Timor-Leste,125300000.0,"Timor-Leste 125,300,000"
199 | Equatorial,93000000.0,"Equatorial 93,000,000"
200 | Samoa,90400000.0,"Samoa 90,400,000"
201 | Dominica,89750000.0,"Dominica 89,750,000"
202 | Western,83700000.0,"Western 83,700,000"
203 | Solomon,79050000.0,"Solomon 79,050,000"
204 | Sao,60450000.0,"Sao 60,450,000"
205 | British,51150000.0,"British 51,150,000"
206 | Vanuatu,49290000.0,"Vanuatu 49,290,000"
207 | Guinea-Bissau,46500000.0,"Guinea-Bissau 46,500,000"
208 | Tonga,44640000.0,"Tonga 44,640,000"
209 | Saint,39990000.0,"Saint 39,990,000"
210 | Comoros,39990000.0,"Comoros 39,990,000"
211 | Cook,28950000.0,"Cook 28,950,000"
212 | Kiribati,24180000.0,"Kiribati 24,180,000"
213 | Montserrat,23250000.0,"Montserrat 23,250,000"
214 | Nauru,23250000.0,"Nauru 23,250,000"
215 | Falkland,11160000.0,"Falkland 11,160,000"
216 | Saint,7440000.0,"Saint 7,440,000"
217 | Niue,2790000.0,"Niue 2,790,000"
218 | Gaza,202000.0,"Gaza 202,000"
219 | Malta,174700.0,"Malta 174,700"
220 | Northern,48300.0,"Northern 48,300"
221 | 


--------------------------------------------------------------------------------
/7. Geographical Plotting/Choropleth Maps Exercise - Solutions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "___\n",
  8 |     "\n",
  9 |     "<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
 10 |     "___"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# Choropleth Maps Exercise - Solutions\n",
 18 |     "\n",
 19 |     "Welcome to the Choropleth Maps Exercise! In this exercise we will give you some simple datasets and ask you to create Choropleth Maps from them. Due to the Nature of Plotly we can't show you examples embedded inside the notebook.\n",
 20 |     "\n",
 21 |     "[Full Documentation Reference](https://plot.ly/python/reference/#choropleth)\n",
 22 |     "\n",
 23 |     "## Plotly Imports"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 13,
 29 |    "metadata": {
 30 |     "collapsed": false
 31 |    },
 32 |    "outputs": [
 33 |     {
 34 |      "data": {
 35 |       "text/html": [
 36 |        "<script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window.Plotly) {{require(['plotly'],function(plotly) {window.Plotly=plotly;});}}</script>"
 37 |       ],
 38 |       "text/plain": [
 39 |        "<IPython.core.display.HTML object>"
 40 |       ]
 41 |      },
 42 |      "metadata": {},
 43 |      "output_type": "display_data"
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "import plotly.graph_objs as go \n",
 48 |     "from plotly.offline import init_notebook_mode,iplot,plot\n",
 49 |     "init_notebook_mode(connected=True) "
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "** Import pandas and read the csv file: 2014_World_Power_Consumption**"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 2,
 62 |    "metadata": {
 63 |     "collapsed": true
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "import pandas as pd"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 3,
 73 |    "metadata": {
 74 |     "collapsed": false
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "df = pd.read_csv('2014_World_Power_Consumption')"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "** Check the head of the DataFrame. **"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 4,
 91 |    "metadata": {
 92 |     "collapsed": false
 93 |    },
 94 |    "outputs": [
 95 |     {
 96 |      "data": {
 97 |       "text/html": [
 98 |        "<div>\n",
 99 |        "<table border=\"1\" class=\"dataframe\">\n",
100 |        "  <thead>\n",
101 |        "    <tr style=\"text-align: right;\">\n",
102 |        "      <th></th>\n",
103 |        "      <th>Country</th>\n",
104 |        "      <th>Power Consumption KWH</th>\n",
105 |        "      <th>Text</th>\n",
106 |        "    </tr>\n",
107 |        "  </thead>\n",
108 |        "  <tbody>\n",
109 |        "    <tr>\n",
110 |        "      <th>0</th>\n",
111 |        "      <td>China</td>\n",
112 |        "      <td>5.523000e+12</td>\n",
113 |        "      <td>China 5,523,000,000,000</td>\n",
114 |        "    </tr>\n",
115 |        "    <tr>\n",
116 |        "      <th>1</th>\n",
117 |        "      <td>United States</td>\n",
118 |        "      <td>3.832000e+12</td>\n",
119 |        "      <td>United 3,832,000,000,000</td>\n",
120 |        "    </tr>\n",
121 |        "    <tr>\n",
122 |        "      <th>2</th>\n",
123 |        "      <td>European</td>\n",
124 |        "      <td>2.771000e+12</td>\n",
125 |        "      <td>European 2,771,000,000,000</td>\n",
126 |        "    </tr>\n",
127 |        "    <tr>\n",
128 |        "      <th>3</th>\n",
129 |        "      <td>Russia</td>\n",
130 |        "      <td>1.065000e+12</td>\n",
131 |        "      <td>Russia 1,065,000,000,000</td>\n",
132 |        "    </tr>\n",
133 |        "    <tr>\n",
134 |        "      <th>4</th>\n",
135 |        "      <td>Japan</td>\n",
136 |        "      <td>9.210000e+11</td>\n",
137 |        "      <td>Japan 921,000,000,000</td>\n",
138 |        "    </tr>\n",
139 |        "  </tbody>\n",
140 |        "</table>\n",
141 |        "</div>"
142 |       ],
143 |       "text/plain": [
144 |        "         Country  Power Consumption KWH                        Text\n",
145 |        "0          China           5.523000e+12     China 5,523,000,000,000\n",
146 |        "1  United States           3.832000e+12    United 3,832,000,000,000\n",
147 |        "2       European           2.771000e+12  European 2,771,000,000,000\n",
148 |        "3         Russia           1.065000e+12    Russia 1,065,000,000,000\n",
149 |        "4          Japan           9.210000e+11       Japan 921,000,000,000"
150 |       ]
151 |      },
152 |      "execution_count": 4,
153 |      "metadata": {},
154 |      "output_type": "execute_result"
155 |     }
156 |    ],
157 |    "source": [
158 |     "df.head()"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "metadata": {},
164 |    "source": [
165 |     "** Referencing the lecture notes, create a Choropleth Plot of the Power Consumption for Countries using the data and layout dictionary. **"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 19,
171 |    "metadata": {
172 |     "collapsed": true
173 |    },
174 |    "outputs": [],
175 |    "source": [
176 |     "data = dict(\n",
177 |     "        type = 'choropleth',\n",
178 |     "        colorscale = 'Viridis',\n",
179 |     "        reversescale = True,\n",
180 |     "        locations = df['Country'],\n",
181 |     "        locationmode = \"country names\",\n",
182 |     "        z = df['Power Consumption KWH'],\n",
183 |     "        text = df['Country'],\n",
184 |     "        colorbar = {'title' : 'Power Consumption KWH'},\n",
185 |     "      ) \n",
186 |     "\n",
187 |     "layout = dict(title = '2014 Power Consumption KWH',\n",
188 |     "                geo = dict(showframe = False,projection = {'type':'Mercator'})\n",
189 |     "             )"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 20,
195 |    "metadata": {
196 |     "collapsed": false
197 |    },
198 |    "outputs": [
199 |     {
200 |      "data": {
201 |       "text/plain": [
202 |        "'file:///Users/marci/Pierian-Data-Courses/Udemy-Python-Data-Science-Machine-Learning/Python-Data-Science-and-Machine-Learning-Bootcamp/Python-for-Data-Visualization/Geographical Plotting/temp-plot.html'"
203 |       ]
204 |      },
205 |      "execution_count": 20,
206 |      "metadata": {},
207 |      "output_type": "execute_result"
208 |     }
209 |    ],
210 |    "source": [
211 |     "choromap = go.Figure(data = [data],layout = layout)\n",
212 |     "plot(choromap,validate=False)"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {},
218 |    "source": [
219 |     "## USA Choropleth\n",
220 |     "\n",
221 |     "** Import the 2012_Election_Data csv file using pandas. **"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 7,
227 |    "metadata": {
228 |     "collapsed": true
229 |    },
230 |    "outputs": [],
231 |    "source": [
232 |     "usdf = pd.read_csv('2012_Election_Data')"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "metadata": {},
238 |    "source": [
239 |     "** Check the head of the DataFrame. **"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 8,
245 |    "metadata": {
246 |     "collapsed": false
247 |    },
248 |    "outputs": [
249 |     {
250 |      "data": {
251 |       "text/html": [
252 |        "<div>\n",
253 |        "<table border=\"1\" class=\"dataframe\">\n",
254 |        "  <thead>\n",
255 |        "    <tr style=\"text-align: right;\">\n",
256 |        "      <th></th>\n",
257 |        "      <th>Year</th>\n",
258 |        "      <th>ICPSR State Code</th>\n",
259 |        "      <th>Alphanumeric State Code</th>\n",
260 |        "      <th>State</th>\n",
261 |        "      <th>VEP Total Ballots Counted</th>\n",
262 |        "      <th>VEP Highest Office</th>\n",
263 |        "      <th>VAP Highest Office</th>\n",
264 |        "      <th>Total Ballots Counted</th>\n",
265 |        "      <th>Highest Office</th>\n",
266 |        "      <th>Voting-Eligible Population (VEP)</th>\n",
267 |        "      <th>Voting-Age Population (VAP)</th>\n",
268 |        "      <th>% Non-citizen</th>\n",
269 |        "      <th>Prison</th>\n",
270 |        "      <th>Probation</th>\n",
271 |        "      <th>Parole</th>\n",
272 |        "      <th>Total Ineligible Felon</th>\n",
273 |        "      <th>State Abv</th>\n",
274 |        "    </tr>\n",
275 |        "  </thead>\n",
276 |        "  <tbody>\n",
277 |        "    <tr>\n",
278 |        "      <th>0</th>\n",
279 |        "      <td>2012</td>\n",
280 |        "      <td>41</td>\n",
281 |        "      <td>1</td>\n",
282 |        "      <td>Alabama</td>\n",
283 |        "      <td>NaN</td>\n",
284 |        "      <td>58.6%</td>\n",
285 |        "      <td>56.0%</td>\n",
286 |        "      <td>NaN</td>\n",
287 |        "      <td>2,074,338</td>\n",
288 |        "      <td>3,539,217</td>\n",
289 |        "      <td>3707440.0</td>\n",
290 |        "      <td>2.6%</td>\n",
291 |        "      <td>32,232</td>\n",
292 |        "      <td>57,993</td>\n",
293 |        "      <td>8,616</td>\n",
294 |        "      <td>71,584</td>\n",
295 |        "      <td>AL</td>\n",
296 |        "    </tr>\n",
297 |        "    <tr>\n",
298 |        "      <th>1</th>\n",
299 |        "      <td>2012</td>\n",
300 |        "      <td>81</td>\n",
301 |        "      <td>2</td>\n",
302 |        "      <td>Alaska</td>\n",
303 |        "      <td>58.9%</td>\n",
304 |        "      <td>58.7%</td>\n",
305 |        "      <td>55.3%</td>\n",
306 |        "      <td>301,694</td>\n",
307 |        "      <td>300,495</td>\n",
308 |        "      <td>511,792</td>\n",
309 |        "      <td>543763.0</td>\n",
310 |        "      <td>3.8%</td>\n",
311 |        "      <td>5,633</td>\n",
312 |        "      <td>7,173</td>\n",
313 |        "      <td>1,882</td>\n",
314 |        "      <td>11,317</td>\n",
315 |        "      <td>AK</td>\n",
316 |        "    </tr>\n",
317 |        "    <tr>\n",
318 |        "      <th>2</th>\n",
319 |        "      <td>2012</td>\n",
320 |        "      <td>61</td>\n",
321 |        "      <td>3</td>\n",
322 |        "      <td>Arizona</td>\n",
323 |        "      <td>53.0%</td>\n",
324 |        "      <td>52.6%</td>\n",
325 |        "      <td>46.5%</td>\n",
326 |        "      <td>2,323,579</td>\n",
327 |        "      <td>2,306,559</td>\n",
328 |        "      <td>4,387,900</td>\n",
329 |        "      <td>4959270.0</td>\n",
330 |        "      <td>9.9%</td>\n",
331 |        "      <td>35,188</td>\n",
332 |        "      <td>72,452</td>\n",
333 |        "      <td>7,460</td>\n",
334 |        "      <td>81,048</td>\n",
335 |        "      <td>AZ</td>\n",
336 |        "    </tr>\n",
337 |        "    <tr>\n",
338 |        "      <th>3</th>\n",
339 |        "      <td>2012</td>\n",
340 |        "      <td>42</td>\n",
341 |        "      <td>4</td>\n",
342 |        "      <td>Arkansas</td>\n",
343 |        "      <td>51.1%</td>\n",
344 |        "      <td>50.7%</td>\n",
345 |        "      <td>47.7%</td>\n",
346 |        "      <td>1,078,548</td>\n",
347 |        "      <td>1,069,468</td>\n",
348 |        "      <td>2,109,847</td>\n",
349 |        "      <td>2242740.0</td>\n",
350 |        "      <td>3.5%</td>\n",
351 |        "      <td>14,471</td>\n",
352 |        "      <td>30,122</td>\n",
353 |        "      <td>23,372</td>\n",
354 |        "      <td>53,808</td>\n",
355 |        "      <td>AR</td>\n",
356 |        "    </tr>\n",
357 |        "    <tr>\n",
358 |        "      <th>4</th>\n",
359 |        "      <td>2012</td>\n",
360 |        "      <td>71</td>\n",
361 |        "      <td>5</td>\n",
362 |        "      <td>California</td>\n",
363 |        "      <td>55.7%</td>\n",
364 |        "      <td>55.1%</td>\n",
365 |        "      <td>45.1%</td>\n",
366 |        "      <td>13,202,158</td>\n",
367 |        "      <td>13,038,547</td>\n",
368 |        "      <td>23,681,837</td>\n",
369 |        "      <td>28913129.0</td>\n",
370 |        "      <td>17.4%</td>\n",
371 |        "      <td>119,455</td>\n",
372 |        "      <td>0</td>\n",
373 |        "      <td>89,287</td>\n",
374 |        "      <td>208,742</td>\n",
375 |        "      <td>CA</td>\n",
376 |        "    </tr>\n",
377 |        "  </tbody>\n",
378 |        "</table>\n",
379 |        "</div>"
380 |       ],
381 |       "text/plain": [
382 |        "   Year  ICPSR State Code  Alphanumeric State Code       State  \\\n",
383 |        "0  2012                41                        1     Alabama   \n",
384 |        "1  2012                81                        2      Alaska   \n",
385 |        "2  2012                61                        3     Arizona   \n",
386 |        "3  2012                42                        4    Arkansas   \n",
387 |        "4  2012                71                        5  California   \n",
388 |        "\n",
389 |        "  VEP Total Ballots Counted VEP Highest Office VAP Highest Office  \\\n",
390 |        "0                       NaN              58.6%              56.0%   \n",
391 |        "1                     58.9%              58.7%              55.3%   \n",
392 |        "2                     53.0%              52.6%              46.5%   \n",
393 |        "3                     51.1%              50.7%              47.7%   \n",
394 |        "4                     55.7%              55.1%              45.1%   \n",
395 |        "\n",
396 |        "  Total Ballots Counted Highest Office Voting-Eligible Population (VEP)  \\\n",
397 |        "0                   NaN      2,074,338                        3,539,217   \n",
398 |        "1               301,694        300,495                          511,792   \n",
399 |        "2             2,323,579      2,306,559                        4,387,900   \n",
400 |        "3             1,078,548      1,069,468                        2,109,847   \n",
401 |        "4            13,202,158     13,038,547                       23,681,837   \n",
402 |        "\n",
403 |        "   Voting-Age Population (VAP) % Non-citizen   Prison Probation  Parole  \\\n",
404 |        "0                    3707440.0          2.6%   32,232    57,993   8,616   \n",
405 |        "1                     543763.0          3.8%    5,633     7,173   1,882   \n",
406 |        "2                    4959270.0          9.9%   35,188    72,452   7,460   \n",
407 |        "3                    2242740.0          3.5%   14,471    30,122  23,372   \n",
408 |        "4                   28913129.0         17.4%  119,455         0  89,287   \n",
409 |        "\n",
410 |        "  Total Ineligible Felon State Abv  \n",
411 |        "0                 71,584        AL  \n",
412 |        "1                 11,317        AK  \n",
413 |        "2                 81,048        AZ  \n",
414 |        "3                 53,808        AR  \n",
415 |        "4                208,742        CA  "
416 |       ]
417 |      },
418 |      "execution_count": 8,
419 |      "metadata": {},
420 |      "output_type": "execute_result"
421 |     }
422 |    ],
423 |    "source": [
424 |     "usdf.head()"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "markdown",
429 |    "metadata": {},
430 |    "source": [
431 |     "** Now create a plot that displays the Voting-Age Population (VAP) per state. If you later want to play around with other columns, make sure you consider their data type. VAP has already been transformed to a float for you. **"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": 16,
437 |    "metadata": {
438 |     "collapsed": false
439 |    },
440 |    "outputs": [],
441 |    "source": [
442 |     "data = dict(type='choropleth',\n",
443 |     "            colorscale = 'Viridis',\n",
444 |     "            reversescale = True,\n",
445 |     "            locations = usdf['State Abv'],\n",
446 |     "            z = usdf['Voting-Age Population (VAP)'],\n",
447 |     "            locationmode = 'USA-states',\n",
448 |     "            text = usdf['State'],\n",
449 |     "            marker = dict(line = dict(color = 'rgb(255,255,255)',width = 1)),\n",
450 |     "            colorbar = {'title':\"Voting-Age Population (VAP)\"}\n",
451 |     "            ) "
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "code",
456 |    "execution_count": 17,
457 |    "metadata": {
458 |     "collapsed": true
459 |    },
460 |    "outputs": [],
461 |    "source": [
462 |     "layout = dict(title = '2012 General Election Voting Data',\n",
463 |     "              geo = dict(scope='usa',\n",
464 |     "                         showlakes = True,\n",
465 |     "                         lakecolor = 'rgb(85,173,240)')\n",
466 |     "             )"
467 |    ]
468 |   },
469 |   {
470 |    "cell_type": "code",
471 |    "execution_count": 18,
472 |    "metadata": {
473 |     "collapsed": false
474 |    },
475 |    "outputs": [
476 |     {
477 |      "data": {
478 |       "text/plain": [
479 |        "'file:///Users/marci/Pierian-Data-Courses/Udemy-Python-Data-Science-Machine-Learning/Python-Data-Science-and-Machine-Learning-Bootcamp/Python-for-Data-Visualization/Geographical Plotting/temp-plot.html'"
480 |       ]
481 |      },
482 |      "execution_count": 18,
483 |      "metadata": {},
484 |      "output_type": "execute_result"
485 |     }
486 |    ],
487 |    "source": [
488 |     "choromap = go.Figure(data = [data],layout = layout)\n",
489 |     "plot(choromap,validate=False)"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "markdown",
494 |    "metadata": {},
495 |    "source": [
496 |     "# Great Job!"
497 |    ]
498 |   }
499 |  ],
500 |  "metadata": {
501 |   "kernelspec": {
502 |    "display_name": "Python 3",
503 |    "language": "python",
504 |    "name": "python3"
505 |   },
506 |   "language_info": {
507 |    "codemirror_mode": {
508 |     "name": "ipython",
509 |     "version": 3
510 |    },
511 |    "file_extension": ".py",
512 |    "mimetype": "text/x-python",
513 |    "name": "python",
514 |    "nbconvert_exporter": "python",
515 |    "pygments_lexer": "ipython3",
516 |    "version": "3.5.1"
517 |   }
518 |  },
519 |  "nbformat": 4,
520 |  "nbformat_minor": 0
521 | }
522 | 


--------------------------------------------------------------------------------
/7. Geographical Plotting/Choropleth Maps Exercise .ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "___\n",
  8 |     "\n",
  9 |     "<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
 10 |     "___"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# Choropleth Maps Exercise \n",
 18 |     "\n",
 19 |     "Welcome to the Choropleth Maps Exercise! In this exercise we will give you some simple datasets and ask you to create Choropleth Maps from them. Due to the Nature of Plotly we can't show you examples\n",
 20 |     "\n",
 21 |     "[Full Documentation Reference](https://plot.ly/python/reference/#choropleth)\n",
 22 |     "\n",
 23 |     "## Plotly Imports"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 38,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import plotly.graph_objs as go \n",
 35 |     "from plotly.offline import init_notebook_mode,iplot\n",
 36 |     "init_notebook_mode(connected=True) "
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "** Import pandas and read the csv file: 2014_World_Power_Consumption**"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 1,
 49 |    "metadata": {
 50 |     "collapsed": true
 51 |    },
 52 |    "outputs": [],
 53 |    "source": []
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 152,
 58 |    "metadata": {
 59 |     "collapsed": false
 60 |    },
 61 |    "outputs": [],
 62 |    "source": []
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "** Check the head of the DataFrame. **"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 156,
 74 |    "metadata": {
 75 |     "collapsed": false
 76 |    },
 77 |    "outputs": [
 78 |     {
 79 |      "data": {
 80 |       "text/html": [
 81 |        "<div>\n",
 82 |        "<table border=\"1\" class=\"dataframe\">\n",
 83 |        "  <thead>\n",
 84 |        "    <tr style=\"text-align: right;\">\n",
 85 |        "      <th></th>\n",
 86 |        "      <th>Country</th>\n",
 87 |        "      <th>Power Consumption KWH</th>\n",
 88 |        "      <th>Text</th>\n",
 89 |        "    </tr>\n",
 90 |        "  </thead>\n",
 91 |        "  <tbody>\n",
 92 |        "    <tr>\n",
 93 |        "      <th>0</th>\n",
 94 |        "      <td>China</td>\n",
 95 |        "      <td>5.523000e+12</td>\n",
 96 |        "      <td>China 5,523,000,000,000</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>1</th>\n",
100 |        "      <td>United States</td>\n",
101 |        "      <td>3.832000e+12</td>\n",
102 |        "      <td>United 3,832,000,000,000</td>\n",
103 |        "    </tr>\n",
104 |        "    <tr>\n",
105 |        "      <th>2</th>\n",
106 |        "      <td>European</td>\n",
107 |        "      <td>2.771000e+12</td>\n",
108 |        "      <td>European 2,771,000,000,000</td>\n",
109 |        "    </tr>\n",
110 |        "    <tr>\n",
111 |        "      <th>3</th>\n",
112 |        "      <td>Russia</td>\n",
113 |        "      <td>1.065000e+12</td>\n",
114 |        "      <td>Russia 1,065,000,000,000</td>\n",
115 |        "    </tr>\n",
116 |        "    <tr>\n",
117 |        "      <th>4</th>\n",
118 |        "      <td>Japan</td>\n",
119 |        "      <td>9.210000e+11</td>\n",
120 |        "      <td>Japan 921,000,000,000</td>\n",
121 |        "    </tr>\n",
122 |        "  </tbody>\n",
123 |        "</table>\n",
124 |        "</div>"
125 |       ],
126 |       "text/plain": [
127 |        "         Country  Power Consumption KWH                        Text\n",
128 |        "0          China           5.523000e+12     China 5,523,000,000,000\n",
129 |        "1  United States           3.832000e+12    United 3,832,000,000,000\n",
130 |        "2       European           2.771000e+12  European 2,771,000,000,000\n",
131 |        "3         Russia           1.065000e+12    Russia 1,065,000,000,000\n",
132 |        "4          Japan           9.210000e+11       Japan 921,000,000,000"
133 |       ]
134 |      },
135 |      "execution_count": 156,
136 |      "metadata": {},
137 |      "output_type": "execute_result"
138 |     }
139 |    ],
140 |    "source": []
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "** Referencing the lecture notes, create a Choropleth Plot of the Power Consumption for Countries using the data and layout dictionary. **"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "metadata": {
153 |     "collapsed": true
154 |    },
155 |    "outputs": [],
156 |    "source": []
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {
162 |     "collapsed": false
163 |    },
164 |    "outputs": [],
165 |    "source": [
166 |     "choromap = go.Figure(data = [data],layout = layout)\n",
167 |     "iplot(choromap,validate=False)"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "## USA Choropleth\n",
175 |     "\n",
176 |     "** Import the 2012_Election_Data csv file using pandas. **"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 109,
182 |    "metadata": {
183 |     "collapsed": true
184 |    },
185 |    "outputs": [],
186 |    "source": []
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "** Check the head of the DataFrame. **"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 110,
198 |    "metadata": {
199 |     "collapsed": false
200 |    },
201 |    "outputs": [
202 |     {
203 |      "data": {
204 |       "text/html": [
205 |        "<div>\n",
206 |        "<table border=\"1\" class=\"dataframe\">\n",
207 |        "  <thead>\n",
208 |        "    <tr style=\"text-align: right;\">\n",
209 |        "      <th></th>\n",
210 |        "      <th>Year</th>\n",
211 |        "      <th>ICPSR State Code</th>\n",
212 |        "      <th>Alphanumeric State Code</th>\n",
213 |        "      <th>State</th>\n",
214 |        "      <th>VEP Total Ballots Counted</th>\n",
215 |        "      <th>VEP Highest Office</th>\n",
216 |        "      <th>VAP Highest Office</th>\n",
217 |        "      <th>Total Ballots Counted</th>\n",
218 |        "      <th>Highest Office</th>\n",
219 |        "      <th>Voting-Eligible Population (VEP)</th>\n",
220 |        "      <th>Voting-Age Population (VAP)</th>\n",
221 |        "      <th>% Non-citizen</th>\n",
222 |        "      <th>Prison</th>\n",
223 |        "      <th>Probation</th>\n",
224 |        "      <th>Parole</th>\n",
225 |        "      <th>Total Ineligible Felon</th>\n",
226 |        "      <th>State Abv</th>\n",
227 |        "    </tr>\n",
228 |        "  </thead>\n",
229 |        "  <tbody>\n",
230 |        "    <tr>\n",
231 |        "      <th>0</th>\n",
232 |        "      <td>2012</td>\n",
233 |        "      <td>41</td>\n",
234 |        "      <td>1</td>\n",
235 |        "      <td>Alabama</td>\n",
236 |        "      <td>NaN</td>\n",
237 |        "      <td>58.6%</td>\n",
238 |        "      <td>56.0%</td>\n",
239 |        "      <td>NaN</td>\n",
240 |        "      <td>2,074,338</td>\n",
241 |        "      <td>3,539,217</td>\n",
242 |        "      <td>3707440.0</td>\n",
243 |        "      <td>2.6%</td>\n",
244 |        "      <td>32,232</td>\n",
245 |        "      <td>57,993</td>\n",
246 |        "      <td>8,616</td>\n",
247 |        "      <td>71,584</td>\n",
248 |        "      <td>AL</td>\n",
249 |        "    </tr>\n",
250 |        "    <tr>\n",
251 |        "      <th>1</th>\n",
252 |        "      <td>2012</td>\n",
253 |        "      <td>81</td>\n",
254 |        "      <td>2</td>\n",
255 |        "      <td>Alaska</td>\n",
256 |        "      <td>58.9%</td>\n",
257 |        "      <td>58.7%</td>\n",
258 |        "      <td>55.3%</td>\n",
259 |        "      <td>301,694</td>\n",
260 |        "      <td>300,495</td>\n",
261 |        "      <td>511,792</td>\n",
262 |        "      <td>543763.0</td>\n",
263 |        "      <td>3.8%</td>\n",
264 |        "      <td>5,633</td>\n",
265 |        "      <td>7,173</td>\n",
266 |        "      <td>1,882</td>\n",
267 |        "      <td>11,317</td>\n",
268 |        "      <td>AK</td>\n",
269 |        "    </tr>\n",
270 |        "    <tr>\n",
271 |        "      <th>2</th>\n",
272 |        "      <td>2012</td>\n",
273 |        "      <td>61</td>\n",
274 |        "      <td>3</td>\n",
275 |        "      <td>Arizona</td>\n",
276 |        "      <td>53.0%</td>\n",
277 |        "      <td>52.6%</td>\n",
278 |        "      <td>46.5%</td>\n",
279 |        "      <td>2,323,579</td>\n",
280 |        "      <td>2,306,559</td>\n",
281 |        "      <td>4,387,900</td>\n",
282 |        "      <td>4959270.0</td>\n",
283 |        "      <td>9.9%</td>\n",
284 |        "      <td>35,188</td>\n",
285 |        "      <td>72,452</td>\n",
286 |        "      <td>7,460</td>\n",
287 |        "      <td>81,048</td>\n",
288 |        "      <td>AZ</td>\n",
289 |        "    </tr>\n",
290 |        "    <tr>\n",
291 |        "      <th>3</th>\n",
292 |        "      <td>2012</td>\n",
293 |        "      <td>42</td>\n",
294 |        "      <td>4</td>\n",
295 |        "      <td>Arkansas</td>\n",
296 |        "      <td>51.1%</td>\n",
297 |        "      <td>50.7%</td>\n",
298 |        "      <td>47.7%</td>\n",
299 |        "      <td>1,078,548</td>\n",
300 |        "      <td>1,069,468</td>\n",
301 |        "      <td>2,109,847</td>\n",
302 |        "      <td>2242740.0</td>\n",
303 |        "      <td>3.5%</td>\n",
304 |        "      <td>14,471</td>\n",
305 |        "      <td>30,122</td>\n",
306 |        "      <td>23,372</td>\n",
307 |        "      <td>53,808</td>\n",
308 |        "      <td>AR</td>\n",
309 |        "    </tr>\n",
310 |        "    <tr>\n",
311 |        "      <th>4</th>\n",
312 |        "      <td>2012</td>\n",
313 |        "      <td>71</td>\n",
314 |        "      <td>5</td>\n",
315 |        "      <td>California</td>\n",
316 |        "      <td>55.7%</td>\n",
317 |        "      <td>55.1%</td>\n",
318 |        "      <td>45.1%</td>\n",
319 |        "      <td>13,202,158</td>\n",
320 |        "      <td>13,038,547</td>\n",
321 |        "      <td>23,681,837</td>\n",
322 |        "      <td>28913129.0</td>\n",
323 |        "      <td>17.4%</td>\n",
324 |        "      <td>119,455</td>\n",
325 |        "      <td>0</td>\n",
326 |        "      <td>89,287</td>\n",
327 |        "      <td>208,742</td>\n",
328 |        "      <td>CA</td>\n",
329 |        "    </tr>\n",
330 |        "  </tbody>\n",
331 |        "</table>\n",
332 |        "</div>"
333 |       ],
334 |       "text/plain": [
335 |        "   Year  ICPSR State Code  Alphanumeric State Code       State  \\\n",
336 |        "0  2012                41                        1     Alabama   \n",
337 |        "1  2012                81                        2      Alaska   \n",
338 |        "2  2012                61                        3     Arizona   \n",
339 |        "3  2012                42                        4    Arkansas   \n",
340 |        "4  2012                71                        5  California   \n",
341 |        "\n",
342 |        "  VEP Total Ballots Counted VEP Highest Office VAP Highest Office  \\\n",
343 |        "0                       NaN              58.6%              56.0%   \n",
344 |        "1                     58.9%              58.7%              55.3%   \n",
345 |        "2                     53.0%              52.6%              46.5%   \n",
346 |        "3                     51.1%              50.7%              47.7%   \n",
347 |        "4                     55.7%              55.1%              45.1%   \n",
348 |        "\n",
349 |        "  Total Ballots Counted Highest Office Voting-Eligible Population (VEP)  \\\n",
350 |        "0                   NaN      2,074,338                        3,539,217   \n",
351 |        "1               301,694        300,495                          511,792   \n",
352 |        "2             2,323,579      2,306,559                        4,387,900   \n",
353 |        "3             1,078,548      1,069,468                        2,109,847   \n",
354 |        "4            13,202,158     13,038,547                       23,681,837   \n",
355 |        "\n",
356 |        "   Voting-Age Population (VAP) % Non-citizen   Prison Probation  Parole  \\\n",
357 |        "0                    3707440.0          2.6%   32,232    57,993   8,616   \n",
358 |        "1                     543763.0          3.8%    5,633     7,173   1,882   \n",
359 |        "2                    4959270.0          9.9%   35,188    72,452   7,460   \n",
360 |        "3                    2242740.0          3.5%   14,471    30,122  23,372   \n",
361 |        "4                   28913129.0         17.4%  119,455         0  89,287   \n",
362 |        "\n",
363 |        "  Total Ineligible Felon State Abv  \n",
364 |        "0                 71,584        AL  \n",
365 |        "1                 11,317        AK  \n",
366 |        "2                 81,048        AZ  \n",
367 |        "3                 53,808        AR  \n",
368 |        "4                208,742        CA  "
369 |       ]
370 |      },
371 |      "execution_count": 110,
372 |      "metadata": {},
373 |      "output_type": "execute_result"
374 |     }
375 |    ],
376 |    "source": []
377 |   },
378 |   {
379 |    "cell_type": "markdown",
380 |    "metadata": {},
381 |    "source": [
382 |     "** Now create a plot that displays the Voting-Age Population (VAP) per state. If you later want to play around with other columns, make sure you consider their data type. VAP has already been transformed to a float for you. **"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": 120,
388 |    "metadata": {
389 |     "collapsed": false
390 |    },
391 |    "outputs": [],
392 |    "source": []
393 |   },
394 |   {
395 |    "cell_type": "code",
396 |    "execution_count": 121,
397 |    "metadata": {
398 |     "collapsed": true
399 |    },
400 |    "outputs": [],
401 |    "source": []
402 |   },
403 |   {
404 |    "cell_type": "code",
405 |    "execution_count": null,
406 |    "metadata": {
407 |     "collapsed": false
408 |    },
409 |    "outputs": [],
410 |    "source": [
411 |     "choromap = go.Figure(data = [data],layout = layout)\n",
412 |     "iplot(choromap,validate=False)"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "markdown",
417 |    "metadata": {},
418 |    "source": [
419 |     "# Great Job!"
420 |    ]
421 |   }
422 |  ],
423 |  "metadata": {
424 |   "kernelspec": {
425 |    "display_name": "Python 3",
426 |    "language": "python",
427 |    "name": "python3"
428 |   },
429 |   "language_info": {
430 |    "codemirror_mode": {
431 |     "name": "ipython",
432 |     "version": 3
433 |    },
434 |    "file_extension": ".py",
435 |    "mimetype": "text/x-python",
436 |    "name": "python",
437 |    "nbconvert_exporter": "python",
438 |    "pygments_lexer": "ipython3",
439 |    "version": "3.5.1"
440 |   }
441 |  },
442 |  "nbformat": 4,
443 |  "nbformat_minor": 0
444 | }
445 | 


--------------------------------------------------------------------------------
/7. Geographical Plotting/plotly_cheat_sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SuvroBaner/Python-for-Data-Science-and-Machine-Learning-Bootcamp/c0bafbafc2c37a189c70a3758f6e81888b1542ae/7. Geographical Plotting/plotly_cheat_sheet.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Python-for-Data-Science-and-Machine-Learning-Bootcamp
 2 | This repository is based on the online classes I did with Udemy to leverage Python to solve Data Science problems. 
 3 | The instructor Jose Portilla, a Data Scientist himself lectured the course which is worth taking.
 4 | I will only post the iPython notebooks and they will be posted as I finish them myself. This helps me to keep a track of my studies
 5 | and also refer to it on the fly while I am at work. Reference: 
 6 | https://www.udemy.com/python-for-data-science-and-machine-learning-bootcamp/learn/v4/overview
 7 | 
 8 | Following are the topics posted-
 9 | 
10 | 1) Python Crash Course
11 | 2) NumPy (Numeric Python)
12 | 3) Pandas
13 | 4) Matplotlib
14 | 5) Seaborn
15 | 6) Pandas Built-in Data Visualization
16 | 7) Plotly and Cufflinks
17 | 8) Geograhical Plotting
18 | 9) Data - Capstone Project
19 | 10) Linear Regression
20 | 11) Logistic Regression
21 | 12) K-Nearest Neighbors
22 | 13) Decision Trees and Random Forests
23 | 14) Support Vector Machines
24 | 15) K-Means Clustering
25 | 16) Principal Component Analysis
26 | 


--------------------------------------------------------------------------------