├── 03-Python-for-Data-Analysis-Pandas
    ├── example
    ├── Excel_Sample.xlsx
    ├── multi_index_example
    ├── 01-Introduction to Pandas.ipynb
    ├── 04-Missing Data.ipynb
    ├── 02-Series.ipynb
    └── 05-Groupby.ipynb
├── 05-Data-Visualization-with-Matplotlib
    └── LWM.png
├── 07-Pandas-Built-in-Data-Viz
    └── df2
├── 02-Python-for-Data-Analysis-NumPy
    ├── 03-Numpy Operations.ipynb
    ├── 04-Numpy Exercises.ipynb
    ├── 05-Numpy Exercises - Solutions.ipynb
    ├── 02-Numpy Indexing and Selection.ipynb
    └── 01-NumPy Arrays.ipynb
└── 04-Pandas-Exercises
    ├── practice.ipynb
    ├── 03-Ecommerce Purchases Exercise .ipynb
    ├── 01-SF Salaries Exercise.ipynb
    ├── 04-Ecommerce Purchases Exercise - Solutions.ipynb
    └── 02-SF Salaries Exercise - Solutions.ipynb


/03-Python-for-Data-Analysis-Pandas/example:
--------------------------------------------------------------------------------
1 | a,b,c,d
2 | 0,1,2,3
3 | 4,5,6,7
4 | 8,9,10,11
5 | 12,13,14,15
6 | 


--------------------------------------------------------------------------------
/05-Data-Visualization-with-Matplotlib/LWM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learnwithme4998/Data-analysis-and-Data-Analytics/HEAD/05-Data-Visualization-with-Matplotlib/LWM.png


--------------------------------------------------------------------------------
/03-Python-for-Data-Analysis-Pandas/Excel_Sample.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learnwithme4998/Data-analysis-and-Data-Analytics/HEAD/03-Python-for-Data-Analysis-Pandas/Excel_Sample.xlsx


--------------------------------------------------------------------------------
/03-Python-for-Data-Analysis-Pandas/multi_index_example:
--------------------------------------------------------------------------------
1 | first,bar,bar,baz,baz,foo,foo,qux,qux
2 | second,one,two,one,two,one,two,one,two
3 | ,,,,,,,,
4 | A,1.025984152081572,-0.1565979042889875,-0.031579143908112575,0.6498258334908454,2.154846443259472,-0.6102588558227414,-0.755325340010558,-0.34641850351854453
5 | B,0.1470267713241236,-0.47944803904109595,0.558769406443067,1.0248102783372157,-0.925874258809907,1.8628641384939535,-1.1338171615837889,0.6104779075384634
6 | C,0.3860303121135517,2.084018530338962,-0.37651867524923904,0.23033634359240704,0.6812092925867574,1.0351250747739213,-0.031160481493099617,1.9399323109926203
7 | 


--------------------------------------------------------------------------------
/07-Pandas-Built-in-Data-Viz/df2:
--------------------------------------------------------------------------------
 1 | a,b,c,d
 2 | 0.039761986133905136,0.2185172274750622,0.10342298051665423,0.9579042338107532
 3 | 0.9372879037285884,0.04156728027953449,0.8991254222382951,0.9776795571253272
 4 | 0.7805044779316328,0.008947537857148302,0.5578084027546968,0.7975104497549266
 5 | 0.6727174963492204,0.24786984946279625,0.2640713103088026,0.44435791644122935
 6 | 0.05382860859967886,0.5201244020579979,0.5522642392797277,0.19000759632053632
 7 | 0.2860433671280178,0.5934650440000543,0.9073072637456548,0.6378977150631427
 8 | 0.4304355863327313,0.16623013749421356,0.4693825447762464,0.4977008828313123
 9 | 0.3122955538295512,0.5028232900921878,0.8066087010958843,0.8505190941429479
10 | 0.1877648514121828,0.9970746427719338,0.8959552961495315,0.530390137569463
11 | 0.9081621790575398,0.23272641071536715,0.4141382611943452,0.4320069001558664
12 | 


--------------------------------------------------------------------------------
/03-Python-for-Data-Analysis-Pandas/01-Introduction to Pandas.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {
 6 |     "collapsed": true
 7 |    },
 8 |    "source": [
 9 |     "# Introduction to Pandas\n",
10 |     "\n",
11 |     " we will learn how to use pandas for data analysis. You can think of pandas as an extremely powerful version of Excel, with a lot more features. In this section of the course, you should go through the notebooks in this order:\n",
12 |     "\n",
13 |     "* Introduction to Pandas\n",
14 |     "* Series\n",
15 |     "* DataFrames\n",
16 |     "* Missing Data\n",
17 |     "* GroupBy\n",
18 |     "* Merging,Joining,and Concatenating\n",
19 |     "* Operations\n",
20 |     "* Data Input and Output"
21 |    ]
22 |   },
23 |   {
24 |    "cell_type": "markdown",
25 |    "metadata": {},
26 |    "source": [
27 |     "___"
28 |    ]
29 |   }
30 |  ],
31 |  "metadata": {
32 |   "kernelspec": {
33 |    "display_name": "Python 3",
34 |    "language": "python",
35 |    "name": "python3"
36 |   },
37 |   "language_info": {
38 |    "codemirror_mode": {
39 |     "name": "ipython",
40 |     "version": 3
41 |    },
42 |    "file_extension": ".py",
43 |    "mimetype": "text/x-python",
44 |    "name": "python",
45 |    "nbconvert_exporter": "python",
46 |    "pygments_lexer": "ipython3",
47 |    "version": "3.7.6"
48 |   }
49 |  },
50 |  "nbformat": 4,
51 |  "nbformat_minor": 1
52 | }
53 | 


--------------------------------------------------------------------------------
/02-Python-for-Data-Analysis-NumPy/03-Numpy Operations.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": true
  7 |    },
  8 |    "source": [
  9 |     "# NumPy Operations"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## Arithmetic\n",
 17 |     "\n",
 18 |     "You can easily perform array with array arithmetic, or scalar with array arithmetic. Let's see some examples:"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {
 25 |     "collapsed": true
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "import numpy as np\n",
 30 |     "arr = np.arange(0,10)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 2,
 36 |    "metadata": {},
 37 |    "outputs": [
 38 |     {
 39 |      "data": {
 40 |       "text/plain": [
 41 |        "array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])"
 42 |       ]
 43 |      },
 44 |      "execution_count": 2,
 45 |      "metadata": {},
 46 |      "output_type": "execute_result"
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "arr + arr"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 3,
 56 |    "metadata": {},
 57 |    "outputs": [
 58 |     {
 59 |      "data": {
 60 |       "text/plain": [
 61 |        "array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])"
 62 |       ]
 63 |      },
 64 |      "execution_count": 3,
 65 |      "metadata": {},
 66 |      "output_type": "execute_result"
 67 |     }
 68 |    ],
 69 |    "source": [
 70 |     "arr * arr"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 4,
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "data": {
 80 |       "text/plain": [
 81 |        "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])"
 82 |       ]
 83 |      },
 84 |      "execution_count": 4,
 85 |      "metadata": {},
 86 |      "output_type": "execute_result"
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "arr - arr"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 5,
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "name": "stderr",
100 |      "output_type": "stream",
101 |      "text": [
102 |       "/Users/marci/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: invalid value encountered in true_divide\n",
103 |       "  if __name__ == '__main__':\n"
104 |      ]
105 |     },
106 |     {
107 |      "data": {
108 |       "text/plain": [
109 |        "array([ nan,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.])"
110 |       ]
111 |      },
112 |      "execution_count": 5,
113 |      "metadata": {},
114 |      "output_type": "execute_result"
115 |     }
116 |    ],
117 |    "source": [
118 |     "# Warning on division by zero, but not an error!\n",
119 |     "# Just replaced with nan\n",
120 |     "arr/arr"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 6,
126 |    "metadata": {},
127 |    "outputs": [
128 |     {
129 |      "name": "stderr",
130 |      "output_type": "stream",
131 |      "text": [
132 |       "/Users/marci/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: divide by zero encountered in true_divide\n",
133 |       "  if __name__ == '__main__':\n"
134 |      ]
135 |     },
136 |     {
137 |      "data": {
138 |       "text/plain": [
139 |        "array([        inf,  1.        ,  0.5       ,  0.33333333,  0.25      ,\n",
140 |        "        0.2       ,  0.16666667,  0.14285714,  0.125     ,  0.11111111])"
141 |       ]
142 |      },
143 |      "execution_count": 6,
144 |      "metadata": {},
145 |      "output_type": "execute_result"
146 |     }
147 |    ],
148 |    "source": [
149 |     "# Also warning, but not an error instead infinity\n",
150 |     "1/arr"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 10,
156 |    "metadata": {},
157 |    "outputs": [
158 |     {
159 |      "data": {
160 |       "text/plain": [
161 |        "array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])"
162 |       ]
163 |      },
164 |      "execution_count": 10,
165 |      "metadata": {},
166 |      "output_type": "execute_result"
167 |     }
168 |    ],
169 |    "source": [
170 |     "arr**3"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "## Universal Array Functions\n",
178 |     "\n",
179 |     "Numpy comes with many [universal array functions](http://docs.scipy.org/doc/numpy/reference/ufuncs.html), which are essentially just mathematical operations you can use to perform the operation across the array. Let's show some common ones:"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 12,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "data": {
189 |       "text/plain": [
190 |        "array([ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ,\n",
191 |        "        2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ])"
192 |       ]
193 |      },
194 |      "execution_count": 12,
195 |      "metadata": {},
196 |      "output_type": "execute_result"
197 |     }
198 |    ],
199 |    "source": [
200 |     "#Taking Square Roots\n",
201 |     "np.sqrt(arr)"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 13,
207 |    "metadata": {},
208 |    "outputs": [
209 |     {
210 |      "data": {
211 |       "text/plain": [
212 |        "array([  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,\n",
213 |        "         2.00855369e+01,   5.45981500e+01,   1.48413159e+02,\n",
214 |        "         4.03428793e+02,   1.09663316e+03,   2.98095799e+03,\n",
215 |        "         8.10308393e+03])"
216 |       ]
217 |      },
218 |      "execution_count": 13,
219 |      "metadata": {},
220 |      "output_type": "execute_result"
221 |     }
222 |    ],
223 |    "source": [
224 |     "#Calcualting exponential (e^)\n",
225 |     "np.exp(arr)"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": 14,
231 |    "metadata": {},
232 |    "outputs": [
233 |     {
234 |      "data": {
235 |       "text/plain": [
236 |        "9"
237 |       ]
238 |      },
239 |      "execution_count": 14,
240 |      "metadata": {},
241 |      "output_type": "execute_result"
242 |     }
243 |    ],
244 |    "source": [
245 |     "np.max(arr) #same as arr.max()"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 15,
251 |    "metadata": {},
252 |    "outputs": [
253 |     {
254 |      "data": {
255 |       "text/plain": [
256 |        "array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,\n",
257 |        "       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849])"
258 |       ]
259 |      },
260 |      "execution_count": 15,
261 |      "metadata": {},
262 |      "output_type": "execute_result"
263 |     }
264 |    ],
265 |    "source": [
266 |     "np.sin(arr)"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": 16,
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "name": "stderr",
276 |      "output_type": "stream",
277 |      "text": [
278 |       "/Users/marci/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: divide by zero encountered in log\n",
279 |       "  if __name__ == '__main__':\n"
280 |      ]
281 |     },
282 |     {
283 |      "data": {
284 |       "text/plain": [
285 |        "array([       -inf,  0.        ,  0.69314718,  1.09861229,  1.38629436,\n",
286 |        "        1.60943791,  1.79175947,  1.94591015,  2.07944154,  2.19722458])"
287 |       ]
288 |      },
289 |      "execution_count": 16,
290 |      "metadata": {},
291 |      "output_type": "execute_result"
292 |     }
293 |    ],
294 |    "source": [
295 |     "np.log(arr)"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "markdown",
300 |    "metadata": {},
301 |    "source": [
302 |     "# Great Job!\n",
303 |     "\n",
304 |     "That's all we need to know for now!"
305 |    ]
306 |   }
307 |  ],
308 |  "metadata": {
309 |   "kernelspec": {
310 |    "display_name": "Python 3",
311 |    "language": "python",
312 |    "name": "python3"
313 |   },
314 |   "language_info": {
315 |    "codemirror_mode": {
316 |     "name": "ipython",
317 |     "version": 3
318 |    },
319 |    "file_extension": ".py",
320 |    "mimetype": "text/x-python",
321 |    "name": "python",
322 |    "nbconvert_exporter": "python",
323 |    "pygments_lexer": "ipython3",
324 |    "version": "3.7.6"
325 |   }
326 |  },
327 |  "nbformat": 4,
328 |  "nbformat_minor": 1
329 | }
330 | 


--------------------------------------------------------------------------------
/03-Python-for-Data-Analysis-Pandas/04-Missing Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Missing Data\n",
  8 |     "\n",
  9 |     "Let's show a few convenient methods to deal with Missing Data in pandas:"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {
 16 |     "collapsed": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import numpy as np\n",
 21 |     "import pandas as pd"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 9,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "df = pd.DataFrame({'A':[1,2,np.nan],\n",
 33 |     "                  'B':[5,np.nan,np.nan],\n",
 34 |     "                  'C':[1,2,3]})"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 10,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "data": {
 44 |       "text/html": [
 45 |        "<div>\n",
 46 |        "<table border=\"1\" class=\"dataframe\">\n",
 47 |        "  <thead>\n",
 48 |        "    <tr style=\"text-align: right;\">\n",
 49 |        "      <th></th>\n",
 50 |        "      <th>A</th>\n",
 51 |        "      <th>B</th>\n",
 52 |        "      <th>C</th>\n",
 53 |        "    </tr>\n",
 54 |        "  </thead>\n",
 55 |        "  <tbody>\n",
 56 |        "    <tr>\n",
 57 |        "      <th>0</th>\n",
 58 |        "      <td>1.0</td>\n",
 59 |        "      <td>5.0</td>\n",
 60 |        "      <td>1</td>\n",
 61 |        "    </tr>\n",
 62 |        "    <tr>\n",
 63 |        "      <th>1</th>\n",
 64 |        "      <td>2.0</td>\n",
 65 |        "      <td>NaN</td>\n",
 66 |        "      <td>2</td>\n",
 67 |        "    </tr>\n",
 68 |        "    <tr>\n",
 69 |        "      <th>2</th>\n",
 70 |        "      <td>NaN</td>\n",
 71 |        "      <td>NaN</td>\n",
 72 |        "      <td>3</td>\n",
 73 |        "    </tr>\n",
 74 |        "  </tbody>\n",
 75 |        "</table>\n",
 76 |        "</div>"
 77 |       ],
 78 |       "text/plain": [
 79 |        "     A    B  C\n",
 80 |        "0  1.0  5.0  1\n",
 81 |        "1  2.0  NaN  2\n",
 82 |        "2  NaN  NaN  3"
 83 |       ]
 84 |      },
 85 |      "execution_count": 10,
 86 |      "metadata": {},
 87 |      "output_type": "execute_result"
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "df"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 12,
 97 |    "metadata": {},
 98 |    "outputs": [
 99 |     {
100 |      "data": {
101 |       "text/html": [
102 |        "<div>\n",
103 |        "<table border=\"1\" class=\"dataframe\">\n",
104 |        "  <thead>\n",
105 |        "    <tr style=\"text-align: right;\">\n",
106 |        "      <th></th>\n",
107 |        "      <th>A</th>\n",
108 |        "      <th>B</th>\n",
109 |        "      <th>C</th>\n",
110 |        "    </tr>\n",
111 |        "  </thead>\n",
112 |        "  <tbody>\n",
113 |        "    <tr>\n",
114 |        "      <th>0</th>\n",
115 |        "      <td>1.0</td>\n",
116 |        "      <td>5.0</td>\n",
117 |        "      <td>1</td>\n",
118 |        "    </tr>\n",
119 |        "  </tbody>\n",
120 |        "</table>\n",
121 |        "</div>"
122 |       ],
123 |       "text/plain": [
124 |        "     A    B  C\n",
125 |        "0  1.0  5.0  1"
126 |       ]
127 |      },
128 |      "execution_count": 12,
129 |      "metadata": {},
130 |      "output_type": "execute_result"
131 |     }
132 |    ],
133 |    "source": [
134 |     "df.dropna()"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 13,
140 |    "metadata": {},
141 |    "outputs": [
142 |     {
143 |      "data": {
144 |       "text/html": [
145 |        "<div>\n",
146 |        "<table border=\"1\" class=\"dataframe\">\n",
147 |        "  <thead>\n",
148 |        "    <tr style=\"text-align: right;\">\n",
149 |        "      <th></th>\n",
150 |        "      <th>C</th>\n",
151 |        "    </tr>\n",
152 |        "  </thead>\n",
153 |        "  <tbody>\n",
154 |        "    <tr>\n",
155 |        "      <th>0</th>\n",
156 |        "      <td>1</td>\n",
157 |        "    </tr>\n",
158 |        "    <tr>\n",
159 |        "      <th>1</th>\n",
160 |        "      <td>2</td>\n",
161 |        "    </tr>\n",
162 |        "    <tr>\n",
163 |        "      <th>2</th>\n",
164 |        "      <td>3</td>\n",
165 |        "    </tr>\n",
166 |        "  </tbody>\n",
167 |        "</table>\n",
168 |        "</div>"
169 |       ],
170 |       "text/plain": [
171 |        "   C\n",
172 |        "0  1\n",
173 |        "1  2\n",
174 |        "2  3"
175 |       ]
176 |      },
177 |      "execution_count": 13,
178 |      "metadata": {},
179 |      "output_type": "execute_result"
180 |     }
181 |    ],
182 |    "source": [
183 |     "df.dropna(axis=1)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 14,
189 |    "metadata": {},
190 |    "outputs": [
191 |     {
192 |      "data": {
193 |       "text/html": [
194 |        "<div>\n",
195 |        "<table border=\"1\" class=\"dataframe\">\n",
196 |        "  <thead>\n",
197 |        "    <tr style=\"text-align: right;\">\n",
198 |        "      <th></th>\n",
199 |        "      <th>A</th>\n",
200 |        "      <th>B</th>\n",
201 |        "      <th>C</th>\n",
202 |        "    </tr>\n",
203 |        "  </thead>\n",
204 |        "  <tbody>\n",
205 |        "    <tr>\n",
206 |        "      <th>0</th>\n",
207 |        "      <td>1.0</td>\n",
208 |        "      <td>5.0</td>\n",
209 |        "      <td>1</td>\n",
210 |        "    </tr>\n",
211 |        "    <tr>\n",
212 |        "      <th>1</th>\n",
213 |        "      <td>2.0</td>\n",
214 |        "      <td>NaN</td>\n",
215 |        "      <td>2</td>\n",
216 |        "    </tr>\n",
217 |        "  </tbody>\n",
218 |        "</table>\n",
219 |        "</div>"
220 |       ],
221 |       "text/plain": [
222 |        "     A    B  C\n",
223 |        "0  1.0  5.0  1\n",
224 |        "1  2.0  NaN  2"
225 |       ]
226 |      },
227 |      "execution_count": 14,
228 |      "metadata": {},
229 |      "output_type": "execute_result"
230 |     }
231 |    ],
232 |    "source": [
233 |     "df.dropna(thresh=2)"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 15,
239 |    "metadata": {},
240 |    "outputs": [
241 |     {
242 |      "data": {
243 |       "text/html": [
244 |        "<div>\n",
245 |        "<table border=\"1\" class=\"dataframe\">\n",
246 |        "  <thead>\n",
247 |        "    <tr style=\"text-align: right;\">\n",
248 |        "      <th></th>\n",
249 |        "      <th>A</th>\n",
250 |        "      <th>B</th>\n",
251 |        "      <th>C</th>\n",
252 |        "    </tr>\n",
253 |        "  </thead>\n",
254 |        "  <tbody>\n",
255 |        "    <tr>\n",
256 |        "      <th>0</th>\n",
257 |        "      <td>1</td>\n",
258 |        "      <td>5</td>\n",
259 |        "      <td>1</td>\n",
260 |        "    </tr>\n",
261 |        "    <tr>\n",
262 |        "      <th>1</th>\n",
263 |        "      <td>2</td>\n",
264 |        "      <td>FILL VALUE</td>\n",
265 |        "      <td>2</td>\n",
266 |        "    </tr>\n",
267 |        "    <tr>\n",
268 |        "      <th>2</th>\n",
269 |        "      <td>FILL VALUE</td>\n",
270 |        "      <td>FILL VALUE</td>\n",
271 |        "      <td>3</td>\n",
272 |        "    </tr>\n",
273 |        "  </tbody>\n",
274 |        "</table>\n",
275 |        "</div>"
276 |       ],
277 |       "text/plain": [
278 |        "            A           B  C\n",
279 |        "0           1           5  1\n",
280 |        "1           2  FILL VALUE  2\n",
281 |        "2  FILL VALUE  FILL VALUE  3"
282 |       ]
283 |      },
284 |      "execution_count": 15,
285 |      "metadata": {},
286 |      "output_type": "execute_result"
287 |     }
288 |    ],
289 |    "source": [
290 |     "df.fillna(value='FILL VALUE')"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": 17,
296 |    "metadata": {},
297 |    "outputs": [
298 |     {
299 |      "data": {
300 |       "text/plain": [
301 |        "0    1.0\n",
302 |        "1    2.0\n",
303 |        "2    1.5\n",
304 |        "Name: A, dtype: float64"
305 |       ]
306 |      },
307 |      "execution_count": 17,
308 |      "metadata": {},
309 |      "output_type": "execute_result"
310 |     }
311 |    ],
312 |    "source": [
313 |     "df['A'].fillna(value=df['A'].mean())"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "markdown",
318 |    "metadata": {},
319 |    "source": [
320 |     "# Great Job!"
321 |    ]
322 |   }
323 |  ],
324 |  "metadata": {
325 |   "kernelspec": {
326 |    "display_name": "Python 3",
327 |    "language": "python",
328 |    "name": "python3"
329 |   },
330 |   "language_info": {
331 |    "codemirror_mode": {
332 |     "name": "ipython",
333 |     "version": 3
334 |    },
335 |    "file_extension": ".py",
336 |    "mimetype": "text/x-python",
337 |    "name": "python",
338 |    "nbconvert_exporter": "python",
339 |    "pygments_lexer": "ipython3",
340 |    "version": "3.7.6"
341 |   }
342 |  },
343 |  "nbformat": 4,
344 |  "nbformat_minor": 1
345 | }
346 | 


--------------------------------------------------------------------------------
/03-Python-for-Data-Analysis-Pandas/02-Series.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "\n",
  8 |     "# Series"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "The first main data type we will learn about for pandas is the Series data type. Let's import Pandas and explore the Series object.\n",
 16 |     "\n",
 17 |     "A Series is very similar to a NumPy array (in fact it is built on top of the NumPy array object). What differentiates the NumPy array from a Series, is that a Series can have axis labels, meaning it can be indexed by a label, instead of just a number location. It also doesn't need to hold numeric data, it can hold any arbitrary Python Object.\n",
 18 |     "\n",
 19 |     "Let's explore this concept through some examples:"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 1,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "import numpy as np\n",
 29 |     "import pandas as pd"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "### Creating a Series\n",
 37 |     "\n",
 38 |     "You can convert a list,numpy array, or dictionary to a Series:"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {
 45 |     "collapsed": true
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "labels = ['a','b','c']\n",
 50 |     "my_list = [10,20,30]\n",
 51 |     "arr = np.array([10,20,30])\n",
 52 |     "d = {'a':10,'b':20,'c':30}"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "** Using Lists**"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {},
 66 |    "outputs": [
 67 |     {
 68 |      "data": {
 69 |       "text/plain": [
 70 |        "0    10\n",
 71 |        "1    20\n",
 72 |        "2    30\n",
 73 |        "dtype: int64"
 74 |       ]
 75 |      },
 76 |      "execution_count": 4,
 77 |      "metadata": {},
 78 |      "output_type": "execute_result"
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "pd.Series(data=my_list)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 5,
 88 |    "metadata": {},
 89 |    "outputs": [
 90 |     {
 91 |      "data": {
 92 |       "text/plain": [
 93 |        "a    10\n",
 94 |        "b    20\n",
 95 |        "c    30\n",
 96 |        "dtype: int64"
 97 |       ]
 98 |      },
 99 |      "execution_count": 5,
100 |      "metadata": {},
101 |      "output_type": "execute_result"
102 |     }
103 |    ],
104 |    "source": [
105 |     "pd.Series(data=my_list,index=labels)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 6,
111 |    "metadata": {},
112 |    "outputs": [
113 |     {
114 |      "data": {
115 |       "text/plain": [
116 |        "a    10\n",
117 |        "b    20\n",
118 |        "c    30\n",
119 |        "dtype: int64"
120 |       ]
121 |      },
122 |      "execution_count": 6,
123 |      "metadata": {},
124 |      "output_type": "execute_result"
125 |     }
126 |    ],
127 |    "source": [
128 |     "pd.Series(my_list,labels)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "** NumPy Arrays **"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 7,
141 |    "metadata": {},
142 |    "outputs": [
143 |     {
144 |      "data": {
145 |       "text/plain": [
146 |        "0    10\n",
147 |        "1    20\n",
148 |        "2    30\n",
149 |        "dtype: int64"
150 |       ]
151 |      },
152 |      "execution_count": 7,
153 |      "metadata": {},
154 |      "output_type": "execute_result"
155 |     }
156 |    ],
157 |    "source": [
158 |     "pd.Series(arr)"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 8,
164 |    "metadata": {},
165 |    "outputs": [
166 |     {
167 |      "data": {
168 |       "text/plain": [
169 |        "a    10\n",
170 |        "b    20\n",
171 |        "c    30\n",
172 |        "dtype: int64"
173 |       ]
174 |      },
175 |      "execution_count": 8,
176 |      "metadata": {},
177 |      "output_type": "execute_result"
178 |     }
179 |    ],
180 |    "source": [
181 |     "pd.Series(arr,labels)"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "markdown",
186 |    "metadata": {},
187 |    "source": [
188 |     "** Dictionary**"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 9,
194 |    "metadata": {},
195 |    "outputs": [
196 |     {
197 |      "data": {
198 |       "text/plain": [
199 |        "a    10\n",
200 |        "b    20\n",
201 |        "c    30\n",
202 |        "dtype: int64"
203 |       ]
204 |      },
205 |      "execution_count": 9,
206 |      "metadata": {},
207 |      "output_type": "execute_result"
208 |     }
209 |    ],
210 |    "source": [
211 |     "pd.Series(d)"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "markdown",
216 |    "metadata": {},
217 |    "source": [
218 |     "### Data in a Series\n",
219 |     "\n",
220 |     "A pandas Series can hold a variety of object types:"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 10,
226 |    "metadata": {},
227 |    "outputs": [
228 |     {
229 |      "data": {
230 |       "text/plain": [
231 |        "0    a\n",
232 |        "1    b\n",
233 |        "2    c\n",
234 |        "dtype: object"
235 |       ]
236 |      },
237 |      "execution_count": 10,
238 |      "metadata": {},
239 |      "output_type": "execute_result"
240 |     }
241 |    ],
242 |    "source": [
243 |     "pd.Series(data=labels)"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 11,
249 |    "metadata": {},
250 |    "outputs": [
251 |     {
252 |      "data": {
253 |       "text/plain": [
254 |        "0      <built-in function sum>\n",
255 |        "1    <built-in function print>\n",
256 |        "2      <built-in function len>\n",
257 |        "dtype: object"
258 |       ]
259 |      },
260 |      "execution_count": 11,
261 |      "metadata": {},
262 |      "output_type": "execute_result"
263 |     }
264 |    ],
265 |    "source": [
266 |     "# Even functions (although unlikely that you will use this)\n",
267 |     "pd.Series([sum,print,len])"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "markdown",
272 |    "metadata": {},
273 |    "source": [
274 |     "## Using an Index\n",
275 |     "\n",
276 |     "The key to using a Series is understanding its index. Pandas makes use of these index names or numbers by allowing for fast look ups of information (works like a hash table or dictionary).\n",
277 |     "\n",
278 |     "Let's see some examples of how to grab information from a Series. Let us create two sereis, ser1 and ser2:"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": 12,
284 |    "metadata": {},
285 |    "outputs": [],
286 |    "source": [
287 |     "ser1 = pd.Series([1,2,3,4],index = ['USA', 'Germany','USSR', 'Japan'])                                   "
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": 13,
293 |    "metadata": {},
294 |    "outputs": [
295 |     {
296 |      "data": {
297 |       "text/plain": [
298 |        "USA        1\n",
299 |        "Germany    2\n",
300 |        "USSR       3\n",
301 |        "Japan      4\n",
302 |        "dtype: int64"
303 |       ]
304 |      },
305 |      "execution_count": 13,
306 |      "metadata": {},
307 |      "output_type": "execute_result"
308 |     }
309 |    ],
310 |    "source": [
311 |     "ser1"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": 2,
317 |    "metadata": {},
318 |    "outputs": [],
319 |    "source": [
320 |     "ser2 = pd.Series([1,2,5,4],index = ['USA', 'Germany','Italy', 'Japan'])                                   "
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": 15,
326 |    "metadata": {},
327 |    "outputs": [
328 |     {
329 |      "data": {
330 |       "text/plain": [
331 |        "USA        1\n",
332 |        "Germany    2\n",
333 |        "Italy      5\n",
334 |        "Japan      4\n",
335 |        "dtype: int64"
336 |       ]
337 |      },
338 |      "execution_count": 15,
339 |      "metadata": {},
340 |      "output_type": "execute_result"
341 |     }
342 |    ],
343 |    "source": [
344 |     "ser2"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "code",
349 |    "execution_count": 16,
350 |    "metadata": {},
351 |    "outputs": [
352 |     {
353 |      "data": {
354 |       "text/plain": [
355 |        "1"
356 |       ]
357 |      },
358 |      "execution_count": 16,
359 |      "metadata": {},
360 |      "output_type": "execute_result"
361 |     }
362 |    ],
363 |    "source": [
364 |     "ser1['USA']"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "markdown",
369 |    "metadata": {},
370 |    "source": [
371 |     "Operations are then also done based off of index:"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": 17,
377 |    "metadata": {},
378 |    "outputs": [
379 |     {
380 |      "data": {
381 |       "text/plain": [
382 |        "Germany    4.0\n",
383 |        "Italy      NaN\n",
384 |        "Japan      8.0\n",
385 |        "USA        2.0\n",
386 |        "USSR       NaN\n",
387 |        "dtype: float64"
388 |       ]
389 |      },
390 |      "execution_count": 17,
391 |      "metadata": {},
392 |      "output_type": "execute_result"
393 |     }
394 |    ],
395 |    "source": [
396 |     "ser1 + ser2"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "markdown",
401 |    "metadata": {},
402 |    "source": [
403 |     "Let's stop here for now and move on to DataFrames, which will expand on the concept of Series!\n",
404 |     "# Great Job!"
405 |    ]
406 |   }
407 |  ],
408 |  "metadata": {
409 |   "kernelspec": {
410 |    "display_name": "Python 3",
411 |    "language": "python",
412 |    "name": "python3"
413 |   },
414 |   "language_info": {
415 |    "codemirror_mode": {
416 |     "name": "ipython",
417 |     "version": 3
418 |    },
419 |    "file_extension": ".py",
420 |    "mimetype": "text/x-python",
421 |    "name": "python",
422 |    "nbconvert_exporter": "python",
423 |    "pygments_lexer": "ipython3",
424 |    "version": "3.7.6"
425 |   }
426 |  },
427 |  "nbformat": 4,
428 |  "nbformat_minor": 1
429 | }
430 | 


--------------------------------------------------------------------------------
/04-Pandas-Exercises/practice.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 7,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 55,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "df = pd.read_csv('Salaries.csv')"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 59,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "data": {
 28 |       "text/html": [
 29 |        "<div>\n",
 30 |        "<style scoped>\n",
 31 |        "    .dataframe tbody tr th:only-of-type {\n",
 32 |        "        vertical-align: middle;\n",
 33 |        "    }\n",
 34 |        "\n",
 35 |        "    .dataframe tbody tr th {\n",
 36 |        "        vertical-align: top;\n",
 37 |        "    }\n",
 38 |        "\n",
 39 |        "    .dataframe thead th {\n",
 40 |        "        text-align: right;\n",
 41 |        "    }\n",
 42 |        "</style>\n",
 43 |        "<table border=\"1\" class=\"dataframe\">\n",
 44 |        "  <thead>\n",
 45 |        "    <tr style=\"text-align: right;\">\n",
 46 |        "      <th></th>\n",
 47 |        "      <th>Id</th>\n",
 48 |        "      <th>EmployeeName</th>\n",
 49 |        "      <th>JobTitle</th>\n",
 50 |        "      <th>BasePay</th>\n",
 51 |        "      <th>OvertimePay</th>\n",
 52 |        "      <th>OtherPay</th>\n",
 53 |        "      <th>Benefits</th>\n",
 54 |        "      <th>TotalPay</th>\n",
 55 |        "      <th>TotalPayBenefits</th>\n",
 56 |        "      <th>Year</th>\n",
 57 |        "      <th>Notes</th>\n",
 58 |        "      <th>Agency</th>\n",
 59 |        "      <th>Status</th>\n",
 60 |        "    </tr>\n",
 61 |        "  </thead>\n",
 62 |        "  <tbody>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>0</th>\n",
 65 |        "      <td>1</td>\n",
 66 |        "      <td>NATHANIEL FORD</td>\n",
 67 |        "      <td>GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY</td>\n",
 68 |        "      <td>167411.18</td>\n",
 69 |        "      <td>0.00</td>\n",
 70 |        "      <td>400184.25</td>\n",
 71 |        "      <td>NaN</td>\n",
 72 |        "      <td>567595.43</td>\n",
 73 |        "      <td>567595.43</td>\n",
 74 |        "      <td>2011</td>\n",
 75 |        "      <td>NaN</td>\n",
 76 |        "      <td>San Francisco</td>\n",
 77 |        "      <td>NaN</td>\n",
 78 |        "    </tr>\n",
 79 |        "    <tr>\n",
 80 |        "      <th>1</th>\n",
 81 |        "      <td>2</td>\n",
 82 |        "      <td>GARY JIMENEZ</td>\n",
 83 |        "      <td>CAPTAIN III (POLICE DEPARTMENT)</td>\n",
 84 |        "      <td>155966.02</td>\n",
 85 |        "      <td>245131.88</td>\n",
 86 |        "      <td>137811.38</td>\n",
 87 |        "      <td>NaN</td>\n",
 88 |        "      <td>538909.28</td>\n",
 89 |        "      <td>538909.28</td>\n",
 90 |        "      <td>2011</td>\n",
 91 |        "      <td>NaN</td>\n",
 92 |        "      <td>San Francisco</td>\n",
 93 |        "      <td>NaN</td>\n",
 94 |        "    </tr>\n",
 95 |        "    <tr>\n",
 96 |        "      <th>2</th>\n",
 97 |        "      <td>3</td>\n",
 98 |        "      <td>ALBERT PARDINI</td>\n",
 99 |        "      <td>CAPTAIN III (POLICE DEPARTMENT)</td>\n",
100 |        "      <td>212739.13</td>\n",
101 |        "      <td>106088.18</td>\n",
102 |        "      <td>16452.60</td>\n",
103 |        "      <td>NaN</td>\n",
104 |        "      <td>335279.91</td>\n",
105 |        "      <td>335279.91</td>\n",
106 |        "      <td>2011</td>\n",
107 |        "      <td>NaN</td>\n",
108 |        "      <td>San Francisco</td>\n",
109 |        "      <td>NaN</td>\n",
110 |        "    </tr>\n",
111 |        "  </tbody>\n",
112 |        "</table>\n",
113 |        "</div>"
114 |       ],
115 |       "text/plain": [
116 |        "   Id    EmployeeName                                        JobTitle  \\\n",
117 |        "0   1  NATHANIEL FORD  GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY   \n",
118 |        "1   2    GARY JIMENEZ                 CAPTAIN III (POLICE DEPARTMENT)   \n",
119 |        "2   3  ALBERT PARDINI                 CAPTAIN III (POLICE DEPARTMENT)   \n",
120 |        "\n",
121 |        "     BasePay  OvertimePay   OtherPay  Benefits   TotalPay  TotalPayBenefits  \\\n",
122 |        "0  167411.18         0.00  400184.25       NaN  567595.43         567595.43   \n",
123 |        "1  155966.02    245131.88  137811.38       NaN  538909.28         538909.28   \n",
124 |        "2  212739.13    106088.18   16452.60       NaN  335279.91         335279.91   \n",
125 |        "\n",
126 |        "   Year  Notes         Agency  Status  \n",
127 |        "0  2011    NaN  San Francisco     NaN  \n",
128 |        "1  2011    NaN  San Francisco     NaN  \n",
129 |        "2  2011    NaN  San Francisco     NaN  "
130 |       ]
131 |      },
132 |      "execution_count": 59,
133 |      "metadata": {},
134 |      "output_type": "execute_result"
135 |     }
136 |    ],
137 |    "source": [
138 |     "df.head(3)"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 60,
144 |    "metadata": {},
145 |    "outputs": [
146 |     {
147 |      "name": "stdout",
148 |      "output_type": "stream",
149 |      "text": [
150 |       "<class 'pandas.core.frame.DataFrame'>\n",
151 |       "RangeIndex: 148654 entries, 0 to 148653\n",
152 |       "Data columns (total 13 columns):\n",
153 |       " #   Column            Non-Null Count   Dtype  \n",
154 |       "---  ------            --------------   -----  \n",
155 |       " 0   Id                148654 non-null  int64  \n",
156 |       " 1   EmployeeName      148654 non-null  object \n",
157 |       " 2   JobTitle          148654 non-null  object \n",
158 |       " 3   BasePay           148045 non-null  float64\n",
159 |       " 4   OvertimePay       148650 non-null  float64\n",
160 |       " 5   OtherPay          148650 non-null  float64\n",
161 |       " 6   Benefits          112491 non-null  float64\n",
162 |       " 7   TotalPay          148654 non-null  float64\n",
163 |       " 8   TotalPayBenefits  148654 non-null  float64\n",
164 |       " 9   Year              148654 non-null  int64  \n",
165 |       " 10  Notes             0 non-null       float64\n",
166 |       " 11  Agency            148654 non-null  object \n",
167 |       " 12  Status            0 non-null       float64\n",
168 |       "dtypes: float64(8), int64(2), object(3)\n",
169 |       "memory usage: 14.7+ MB\n"
170 |      ]
171 |     }
172 |    ],
173 |    "source": [
174 |     "df.info()"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 61,
180 |    "metadata": {},
181 |    "outputs": [
182 |     {
183 |      "data": {
184 |       "text/plain": [
185 |        "66325.44884050643"
186 |       ]
187 |      },
188 |      "execution_count": 61,
189 |      "metadata": {},
190 |      "output_type": "execute_result"
191 |     }
192 |    ],
193 |    "source": [
194 |     "df['BasePay'].mean()"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 62,
200 |    "metadata": {},
201 |    "outputs": [
202 |     {
203 |      "data": {
204 |       "text/plain": [
205 |        "245131.88"
206 |       ]
207 |      },
208 |      "execution_count": 62,
209 |      "metadata": {},
210 |      "output_type": "execute_result"
211 |     }
212 |    ],
213 |    "source": [
214 |     "df['OvertimePay'].max()"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 65,
220 |    "metadata": {},
221 |    "outputs": [
222 |     {
223 |      "data": {
224 |       "text/plain": [
225 |        "24    270324.91\n",
226 |        "Name: TotalPayBenefits, dtype: float64"
227 |       ]
228 |      },
229 |      "execution_count": 65,
230 |      "metadata": {},
231 |      "output_type": "execute_result"
232 |     }
233 |    ],
234 |    "source": [
235 |     "df[df['EmployeeName'] == 'JOSEPH DRISCOLL']['TotalPayBenefits']"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 67,
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "text/html": [
246 |        "<div>\n",
247 |        "<style scoped>\n",
248 |        "    .dataframe tbody tr th:only-of-type {\n",
249 |        "        vertical-align: middle;\n",
250 |        "    }\n",
251 |        "\n",
252 |        "    .dataframe tbody tr th {\n",
253 |        "        vertical-align: top;\n",
254 |        "    }\n",
255 |        "\n",
256 |        "    .dataframe thead th {\n",
257 |        "        text-align: right;\n",
258 |        "    }\n",
259 |        "</style>\n",
260 |        "<table border=\"1\" class=\"dataframe\">\n",
261 |        "  <thead>\n",
262 |        "    <tr style=\"text-align: right;\">\n",
263 |        "      <th></th>\n",
264 |        "      <th>Id</th>\n",
265 |        "      <th>EmployeeName</th>\n",
266 |        "      <th>JobTitle</th>\n",
267 |        "      <th>BasePay</th>\n",
268 |        "      <th>OvertimePay</th>\n",
269 |        "      <th>OtherPay</th>\n",
270 |        "      <th>Benefits</th>\n",
271 |        "      <th>TotalPay</th>\n",
272 |        "      <th>TotalPayBenefits</th>\n",
273 |        "      <th>Year</th>\n",
274 |        "      <th>Notes</th>\n",
275 |        "      <th>Agency</th>\n",
276 |        "      <th>Status</th>\n",
277 |        "    </tr>\n",
278 |        "  </thead>\n",
279 |        "  <tbody>\n",
280 |        "    <tr>\n",
281 |        "      <th>148653</th>\n",
282 |        "      <td>148654</td>\n",
283 |        "      <td>Joe Lopez</td>\n",
284 |        "      <td>Counselor, Log Cabin Ranch</td>\n",
285 |        "      <td>0.0</td>\n",
286 |        "      <td>0.0</td>\n",
287 |        "      <td>-618.13</td>\n",
288 |        "      <td>0.0</td>\n",
289 |        "      <td>-618.13</td>\n",
290 |        "      <td>-618.13</td>\n",
291 |        "      <td>2014</td>\n",
292 |        "      <td>NaN</td>\n",
293 |        "      <td>San Francisco</td>\n",
294 |        "      <td>NaN</td>\n",
295 |        "    </tr>\n",
296 |        "  </tbody>\n",
297 |        "</table>\n",
298 |        "</div>"
299 |       ],
300 |       "text/plain": [
301 |        "            Id EmployeeName                    JobTitle  BasePay  OvertimePay  \\\n",
302 |        "148653  148654    Joe Lopez  Counselor, Log Cabin Ranch      0.0          0.0   \n",
303 |        "\n",
304 |        "        OtherPay  Benefits  TotalPay  TotalPayBenefits  Year  Notes  \\\n",
305 |        "148653   -618.13       0.0   -618.13           -618.13  2014    NaN   \n",
306 |        "\n",
307 |        "               Agency  Status  \n",
308 |        "148653  San Francisco     NaN  "
309 |       ]
310 |      },
311 |      "execution_count": 67,
312 |      "metadata": {},
313 |      "output_type": "execute_result"
314 |     }
315 |    ],
316 |    "source": [
317 |     "df[df['TotalPayBenefits'] == df['TotalPayBenefits'].min()]"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "execution_count": 70,
323 |    "metadata": {},
324 |    "outputs": [
325 |     {
326 |      "data": {
327 |       "text/plain": [
328 |        "Year\n",
329 |        "2011    63595.956517\n",
330 |        "2012    65436.406857\n",
331 |        "2013    69630.030216\n",
332 |        "2014    66564.421924\n",
333 |        "Name: BasePay, dtype: float64"
334 |       ]
335 |      },
336 |      "execution_count": 70,
337 |      "metadata": {},
338 |      "output_type": "execute_result"
339 |     }
340 |    ],
341 |    "source": [
342 |     "df.groupby('Year').mean()['BasePay']"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": 71,
348 |    "metadata": {},
349 |    "outputs": [
350 |     {
351 |      "data": {
352 |       "text/plain": [
353 |        "2159"
354 |       ]
355 |      },
356 |      "execution_count": 71,
357 |      "metadata": {},
358 |      "output_type": "execute_result"
359 |     }
360 |    ],
361 |    "source": [
362 |     "df['JobTitle'].nunique()"
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "code",
367 |    "execution_count": 73,
368 |    "metadata": {},
369 |    "outputs": [
370 |     {
371 |      "data": {
372 |       "text/plain": [
373 |        "Transit Operator                7036\n",
374 |        "Special Nurse                   4389\n",
375 |        "Registered Nurse                3736\n",
376 |        "Public Svc Aide-Public Works    2518\n",
377 |        "Police Officer 3                2421\n",
378 |        "Name: JobTitle, dtype: int64"
379 |       ]
380 |      },
381 |      "execution_count": 73,
382 |      "metadata": {},
383 |      "output_type": "execute_result"
384 |     }
385 |    ],
386 |    "source": [
387 |     "df['JobTitle'].value_counts().head()"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "code",
392 |    "execution_count": 77,
393 |    "metadata": {},
394 |    "outputs": [
395 |     {
396 |      "data": {
397 |       "text/plain": [
398 |        "202"
399 |       ]
400 |      },
401 |      "execution_count": 77,
402 |      "metadata": {},
403 |      "output_type": "execute_result"
404 |     }
405 |    ],
406 |    "source": [
407 |     "sum(df[df['Year'] == 2013]['JobTitle'].value_counts() == 1)"
408 |    ]
409 |   },
410 |   {
411 |    "cell_type": "code",
412 |    "execution_count": 80,
413 |    "metadata": {},
414 |    "outputs": [],
415 |    "source": [
416 |     "df['len']  = df['JobTitle'].apply(len)"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "code",
421 |    "execution_count": 83,
422 |    "metadata": {},
423 |    "outputs": [
424 |     {
425 |      "data": {
426 |       "text/html": [
427 |        "<div>\n",
428 |        "<style scoped>\n",
429 |        "    .dataframe tbody tr th:only-of-type {\n",
430 |        "        vertical-align: middle;\n",
431 |        "    }\n",
432 |        "\n",
433 |        "    .dataframe tbody tr th {\n",
434 |        "        vertical-align: top;\n",
435 |        "    }\n",
436 |        "\n",
437 |        "    .dataframe thead th {\n",
438 |        "        text-align: right;\n",
439 |        "    }\n",
440 |        "</style>\n",
441 |        "<table border=\"1\" class=\"dataframe\">\n",
442 |        "  <thead>\n",
443 |        "    <tr style=\"text-align: right;\">\n",
444 |        "      <th></th>\n",
445 |        "      <th>len</th>\n",
446 |        "      <th>TotalPayBenefits</th>\n",
447 |        "    </tr>\n",
448 |        "  </thead>\n",
449 |        "  <tbody>\n",
450 |        "    <tr>\n",
451 |        "      <th>len</th>\n",
452 |        "      <td>1.000000</td>\n",
453 |        "      <td>-0.036878</td>\n",
454 |        "    </tr>\n",
455 |        "    <tr>\n",
456 |        "      <th>TotalPayBenefits</th>\n",
457 |        "      <td>-0.036878</td>\n",
458 |        "      <td>1.000000</td>\n",
459 |        "    </tr>\n",
460 |        "  </tbody>\n",
461 |        "</table>\n",
462 |        "</div>"
463 |       ],
464 |       "text/plain": [
465 |        "                       len  TotalPayBenefits\n",
466 |        "len               1.000000         -0.036878\n",
467 |        "TotalPayBenefits -0.036878          1.000000"
468 |       ]
469 |      },
470 |      "execution_count": 83,
471 |      "metadata": {},
472 |      "output_type": "execute_result"
473 |     }
474 |    ],
475 |    "source": [
476 |     "df[['len','TotalPayBenefits']].corr()"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "code",
481 |    "execution_count": null,
482 |    "metadata": {},
483 |    "outputs": [],
484 |    "source": []
485 |   },
486 |   {
487 |    "cell_type": "code",
488 |    "execution_count": null,
489 |    "metadata": {},
490 |    "outputs": [],
491 |    "source": []
492 |   },
493 |   {
494 |    "cell_type": "code",
495 |    "execution_count": null,
496 |    "metadata": {},
497 |    "outputs": [],
498 |    "source": []
499 |   }
500 |  ],
501 |  "metadata": {
502 |   "kernelspec": {
503 |    "display_name": "Python 3",
504 |    "language": "python",
505 |    "name": "python3"
506 |   },
507 |   "language_info": {
508 |    "codemirror_mode": {
509 |     "name": "ipython",
510 |     "version": 3
511 |    },
512 |    "file_extension": ".py",
513 |    "mimetype": "text/x-python",
514 |    "name": "python",
515 |    "nbconvert_exporter": "python",
516 |    "pygments_lexer": "ipython3",
517 |    "version": "3.7.6"
518 |   }
519 |  },
520 |  "nbformat": 4,
521 |  "nbformat_minor": 4
522 | }
523 | 


--------------------------------------------------------------------------------
/02-Python-for-Data-Analysis-NumPy/04-Numpy Exercises.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# NumPy Exercises \n",
  8 |     "\n",
  9 |     "Now that we've learned about NumPy let's test your knowledge. We'll start off with a few simple tasks, and then you'll be asked some more complicated questions."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "#### Import NumPy as np"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 1,
 22 |    "metadata": {
 23 |     "collapsed": true
 24 |    },
 25 |    "outputs": [],
 26 |    "source": []
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "#### Create an array of 10 zeros "
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 2,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "data": {
 42 |       "text/plain": [
 43 |        "array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])"
 44 |       ]
 45 |      },
 46 |      "execution_count": 2,
 47 |      "metadata": {},
 48 |      "output_type": "execute_result"
 49 |     }
 50 |    ],
 51 |    "source": []
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "#### Create an array of 10 ones"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 3,
 63 |    "metadata": {},
 64 |    "outputs": [
 65 |     {
 66 |      "data": {
 67 |       "text/plain": [
 68 |        "array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])"
 69 |       ]
 70 |      },
 71 |      "execution_count": 3,
 72 |      "metadata": {},
 73 |      "output_type": "execute_result"
 74 |     }
 75 |    ],
 76 |    "source": []
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "#### Create an array of 10 fives"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 4,
 88 |    "metadata": {},
 89 |    "outputs": [
 90 |     {
 91 |      "data": {
 92 |       "text/plain": [
 93 |        "array([ 5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.])"
 94 |       ]
 95 |      },
 96 |      "execution_count": 4,
 97 |      "metadata": {},
 98 |      "output_type": "execute_result"
 99 |     }
100 |    ],
101 |    "source": []
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "#### Create an array of the integers from 10 to 50"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 5,
113 |    "metadata": {},
114 |    "outputs": [
115 |     {
116 |      "data": {
117 |       "text/plain": [
118 |        "array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n",
119 |        "       27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,\n",
120 |        "       44, 45, 46, 47, 48, 49, 50])"
121 |       ]
122 |      },
123 |      "execution_count": 5,
124 |      "metadata": {},
125 |      "output_type": "execute_result"
126 |     }
127 |    ],
128 |    "source": []
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "#### Create an array of all the even integers from 10 to 50"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 6,
140 |    "metadata": {},
141 |    "outputs": [
142 |     {
143 |      "data": {
144 |       "text/plain": [
145 |        "array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42,\n",
146 |        "       44, 46, 48, 50])"
147 |       ]
148 |      },
149 |      "execution_count": 6,
150 |      "metadata": {},
151 |      "output_type": "execute_result"
152 |     }
153 |    ],
154 |    "source": []
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "#### Create a 3x3 matrix with values ranging from 0 to 8"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 7,
166 |    "metadata": {},
167 |    "outputs": [
168 |     {
169 |      "data": {
170 |       "text/plain": [
171 |        "array([[0, 1, 2],\n",
172 |        "       [3, 4, 5],\n",
173 |        "       [6, 7, 8]])"
174 |       ]
175 |      },
176 |      "execution_count": 7,
177 |      "metadata": {},
178 |      "output_type": "execute_result"
179 |     }
180 |    ],
181 |    "source": []
182 |   },
183 |   {
184 |    "cell_type": "markdown",
185 |    "metadata": {},
186 |    "source": [
187 |     "#### Create a 3x3 identity matrix"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 8,
193 |    "metadata": {},
194 |    "outputs": [
195 |     {
196 |      "data": {
197 |       "text/plain": [
198 |        "array([[ 1.,  0.,  0.],\n",
199 |        "       [ 0.,  1.,  0.],\n",
200 |        "       [ 0.,  0.,  1.]])"
201 |       ]
202 |      },
203 |      "execution_count": 8,
204 |      "metadata": {},
205 |      "output_type": "execute_result"
206 |     }
207 |    ],
208 |    "source": []
209 |   },
210 |   {
211 |    "cell_type": "markdown",
212 |    "metadata": {},
213 |    "source": [
214 |     "#### Use NumPy to generate a random number between 0 and 1"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 15,
220 |    "metadata": {},
221 |    "outputs": [
222 |     {
223 |      "data": {
224 |       "text/plain": [
225 |        "array([ 0.42829726])"
226 |       ]
227 |      },
228 |      "execution_count": 15,
229 |      "metadata": {},
230 |      "output_type": "execute_result"
231 |     }
232 |    ],
233 |    "source": []
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "metadata": {},
238 |    "source": [
239 |     "#### Use NumPy to generate an array of 25 random numbers sampled from a standard normal distribution"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 33,
245 |    "metadata": {},
246 |    "outputs": [
247 |     {
248 |      "data": {
249 |       "text/plain": [
250 |        "array([ 1.32031013,  1.6798602 , -0.42985892, -1.53116655,  0.85753232,\n",
251 |        "        0.87339938,  0.35668636, -1.47491157,  0.15349697,  0.99530727,\n",
252 |        "       -0.94865451, -1.69174783,  1.57525349, -0.70615234,  0.10991879,\n",
253 |        "       -0.49478947,  1.08279872,  0.76488333, -2.3039931 ,  0.35401124,\n",
254 |        "       -0.45454399, -0.64754649, -0.29391671,  0.02339861,  0.38272124])"
255 |       ]
256 |      },
257 |      "execution_count": 33,
258 |      "metadata": {},
259 |      "output_type": "execute_result"
260 |     }
261 |    ],
262 |    "source": []
263 |   },
264 |   {
265 |    "cell_type": "markdown",
266 |    "metadata": {},
267 |    "source": [
268 |     "#### Create the following matrix:"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": 35,
274 |    "metadata": {},
275 |    "outputs": [
276 |     {
277 |      "data": {
278 |       "text/plain": [
279 |        "array([[ 0.01,  0.02,  0.03,  0.04,  0.05,  0.06,  0.07,  0.08,  0.09,  0.1 ],\n",
280 |        "       [ 0.11,  0.12,  0.13,  0.14,  0.15,  0.16,  0.17,  0.18,  0.19,  0.2 ],\n",
281 |        "       [ 0.21,  0.22,  0.23,  0.24,  0.25,  0.26,  0.27,  0.28,  0.29,  0.3 ],\n",
282 |        "       [ 0.31,  0.32,  0.33,  0.34,  0.35,  0.36,  0.37,  0.38,  0.39,  0.4 ],\n",
283 |        "       [ 0.41,  0.42,  0.43,  0.44,  0.45,  0.46,  0.47,  0.48,  0.49,  0.5 ],\n",
284 |        "       [ 0.51,  0.52,  0.53,  0.54,  0.55,  0.56,  0.57,  0.58,  0.59,  0.6 ],\n",
285 |        "       [ 0.61,  0.62,  0.63,  0.64,  0.65,  0.66,  0.67,  0.68,  0.69,  0.7 ],\n",
286 |        "       [ 0.71,  0.72,  0.73,  0.74,  0.75,  0.76,  0.77,  0.78,  0.79,  0.8 ],\n",
287 |        "       [ 0.81,  0.82,  0.83,  0.84,  0.85,  0.86,  0.87,  0.88,  0.89,  0.9 ],\n",
288 |        "       [ 0.91,  0.92,  0.93,  0.94,  0.95,  0.96,  0.97,  0.98,  0.99,  1.  ]])"
289 |       ]
290 |      },
291 |      "execution_count": 35,
292 |      "metadata": {},
293 |      "output_type": "execute_result"
294 |     }
295 |    ],
296 |    "source": []
297 |   },
298 |   {
299 |    "cell_type": "markdown",
300 |    "metadata": {},
301 |    "source": [
302 |     "#### Create an array of 20 linearly spaced points between 0 and 1:"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": 36,
308 |    "metadata": {},
309 |    "outputs": [
310 |     {
311 |      "data": {
312 |       "text/plain": [
313 |        "array([ 0.        ,  0.05263158,  0.10526316,  0.15789474,  0.21052632,\n",
314 |        "        0.26315789,  0.31578947,  0.36842105,  0.42105263,  0.47368421,\n",
315 |        "        0.52631579,  0.57894737,  0.63157895,  0.68421053,  0.73684211,\n",
316 |        "        0.78947368,  0.84210526,  0.89473684,  0.94736842,  1.        ])"
317 |       ]
318 |      },
319 |      "execution_count": 36,
320 |      "metadata": {},
321 |      "output_type": "execute_result"
322 |     }
323 |    ],
324 |    "source": []
325 |   },
326 |   {
327 |    "cell_type": "markdown",
328 |    "metadata": {},
329 |    "source": [
330 |     "## Numpy Indexing and Selection\n",
331 |     "\n",
332 |     "Now you will be given a few matrices, and be asked to replicate the resulting matrix outputs:"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": 38,
338 |    "metadata": {},
339 |    "outputs": [
340 |     {
341 |      "data": {
342 |       "text/plain": [
343 |        "array([[ 1,  2,  3,  4,  5],\n",
344 |        "       [ 6,  7,  8,  9, 10],\n",
345 |        "       [11, 12, 13, 14, 15],\n",
346 |        "       [16, 17, 18, 19, 20],\n",
347 |        "       [21, 22, 23, 24, 25]])"
348 |       ]
349 |      },
350 |      "execution_count": 38,
351 |      "metadata": {},
352 |      "output_type": "execute_result"
353 |     }
354 |    ],
355 |    "source": [
356 |     "mat = np.arange(1,26).reshape(5,5)\n",
357 |     "mat"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": 39,
363 |    "metadata": {
364 |     "collapsed": true
365 |    },
366 |    "outputs": [],
367 |    "source": [
368 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
369 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
370 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "code",
375 |    "execution_count": 40,
376 |    "metadata": {},
377 |    "outputs": [
378 |     {
379 |      "data": {
380 |       "text/plain": [
381 |        "array([[12, 13, 14, 15],\n",
382 |        "       [17, 18, 19, 20],\n",
383 |        "       [22, 23, 24, 25]])"
384 |       ]
385 |      },
386 |      "execution_count": 40,
387 |      "metadata": {},
388 |      "output_type": "execute_result"
389 |     }
390 |    ],
391 |    "source": []
392 |   },
393 |   {
394 |    "cell_type": "code",
395 |    "execution_count": 29,
396 |    "metadata": {
397 |     "collapsed": true
398 |    },
399 |    "outputs": [],
400 |    "source": [
401 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
402 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
403 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": 41,
409 |    "metadata": {},
410 |    "outputs": [
411 |     {
412 |      "data": {
413 |       "text/plain": [
414 |        "20"
415 |       ]
416 |      },
417 |      "execution_count": 41,
418 |      "metadata": {},
419 |      "output_type": "execute_result"
420 |     }
421 |    ],
422 |    "source": []
423 |   },
424 |   {
425 |    "cell_type": "code",
426 |    "execution_count": 30,
427 |    "metadata": {
428 |     "collapsed": true
429 |    },
430 |    "outputs": [],
431 |    "source": [
432 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
433 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
434 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "code",
439 |    "execution_count": 42,
440 |    "metadata": {},
441 |    "outputs": [
442 |     {
443 |      "data": {
444 |       "text/plain": [
445 |        "array([[ 2],\n",
446 |        "       [ 7],\n",
447 |        "       [12]])"
448 |       ]
449 |      },
450 |      "execution_count": 42,
451 |      "metadata": {},
452 |      "output_type": "execute_result"
453 |     }
454 |    ],
455 |    "source": []
456 |   },
457 |   {
458 |    "cell_type": "code",
459 |    "execution_count": 31,
460 |    "metadata": {
461 |     "collapsed": true
462 |    },
463 |    "outputs": [],
464 |    "source": [
465 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
466 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
467 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
468 |    ]
469 |   },
470 |   {
471 |    "cell_type": "code",
472 |    "execution_count": 46,
473 |    "metadata": {},
474 |    "outputs": [
475 |     {
476 |      "data": {
477 |       "text/plain": [
478 |        "array([21, 22, 23, 24, 25])"
479 |       ]
480 |      },
481 |      "execution_count": 46,
482 |      "metadata": {},
483 |      "output_type": "execute_result"
484 |     }
485 |    ],
486 |    "source": []
487 |   },
488 |   {
489 |    "cell_type": "code",
490 |    "execution_count": 32,
491 |    "metadata": {
492 |     "collapsed": true
493 |    },
494 |    "outputs": [],
495 |    "source": [
496 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
497 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
498 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
499 |    ]
500 |   },
501 |   {
502 |    "cell_type": "code",
503 |    "execution_count": 49,
504 |    "metadata": {},
505 |    "outputs": [
506 |     {
507 |      "data": {
508 |       "text/plain": [
509 |        "array([[16, 17, 18, 19, 20],\n",
510 |        "       [21, 22, 23, 24, 25]])"
511 |       ]
512 |      },
513 |      "execution_count": 49,
514 |      "metadata": {},
515 |      "output_type": "execute_result"
516 |     }
517 |    ],
518 |    "source": []
519 |   },
520 |   {
521 |    "cell_type": "markdown",
522 |    "metadata": {},
523 |    "source": [
524 |     "### Now do the following"
525 |    ]
526 |   },
527 |   {
528 |    "cell_type": "markdown",
529 |    "metadata": {},
530 |    "source": [
531 |     "#### Get the sum of all the values in mat"
532 |    ]
533 |   },
534 |   {
535 |    "cell_type": "code",
536 |    "execution_count": 50,
537 |    "metadata": {},
538 |    "outputs": [
539 |     {
540 |      "data": {
541 |       "text/plain": [
542 |        "325"
543 |       ]
544 |      },
545 |      "execution_count": 50,
546 |      "metadata": {},
547 |      "output_type": "execute_result"
548 |     }
549 |    ],
550 |    "source": []
551 |   },
552 |   {
553 |    "cell_type": "markdown",
554 |    "metadata": {},
555 |    "source": [
556 |     "#### Get the standard deviation of the values in mat"
557 |    ]
558 |   },
559 |   {
560 |    "cell_type": "code",
561 |    "execution_count": 51,
562 |    "metadata": {},
563 |    "outputs": [
564 |     {
565 |      "data": {
566 |       "text/plain": [
567 |        "7.2111025509279782"
568 |       ]
569 |      },
570 |      "execution_count": 51,
571 |      "metadata": {},
572 |      "output_type": "execute_result"
573 |     }
574 |    ],
575 |    "source": []
576 |   },
577 |   {
578 |    "cell_type": "markdown",
579 |    "metadata": {},
580 |    "source": [
581 |     "#### Get the sum of all the columns in mat"
582 |    ]
583 |   },
584 |   {
585 |    "cell_type": "code",
586 |    "execution_count": 53,
587 |    "metadata": {},
588 |    "outputs": [
589 |     {
590 |      "data": {
591 |       "text/plain": [
592 |        "array([55, 60, 65, 70, 75])"
593 |       ]
594 |      },
595 |      "execution_count": 53,
596 |      "metadata": {},
597 |      "output_type": "execute_result"
598 |     }
599 |    ],
600 |    "source": []
601 |   },
602 |   {
603 |    "cell_type": "markdown",
604 |    "metadata": {
605 |     "collapsed": true
606 |    },
607 |    "source": [
608 |     "# Great Job!"
609 |    ]
610 |   }
611 |  ],
612 |  "metadata": {
613 |   "kernelspec": {
614 |    "display_name": "Python 3",
615 |    "language": "python",
616 |    "name": "python3"
617 |   },
618 |   "language_info": {
619 |    "codemirror_mode": {
620 |     "name": "ipython",
621 |     "version": 3
622 |    },
623 |    "file_extension": ".py",
624 |    "mimetype": "text/x-python",
625 |    "name": "python",
626 |    "nbconvert_exporter": "python",
627 |    "pygments_lexer": "ipython3",
628 |    "version": "3.7.6"
629 |   }
630 |  },
631 |  "nbformat": 4,
632 |  "nbformat_minor": 1
633 | }
634 | 


--------------------------------------------------------------------------------
/02-Python-for-Data-Analysis-NumPy/05-Numpy Exercises - Solutions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# NumPy Exercises - Solutions\n",
  8 |     "\n",
  9 |     "Now that we've learned about NumPy let's test your knowledge. We'll start off with a few simple tasks and then you'll be asked some more complicated questions."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "#### Import NumPy as np"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 1,
 22 |    "metadata": {
 23 |     "collapsed": true
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import numpy as np"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "#### Create an array of 10 zeros "
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 2,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "data": {
 44 |       "text/plain": [
 45 |        "array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])"
 46 |       ]
 47 |      },
 48 |      "execution_count": 2,
 49 |      "metadata": {},
 50 |      "output_type": "execute_result"
 51 |     }
 52 |    ],
 53 |    "source": [
 54 |     "np.zeros(10)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "#### Create an array of 10 ones"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 3,
 67 |    "metadata": {},
 68 |    "outputs": [
 69 |     {
 70 |      "data": {
 71 |       "text/plain": [
 72 |        "array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])"
 73 |       ]
 74 |      },
 75 |      "execution_count": 3,
 76 |      "metadata": {},
 77 |      "output_type": "execute_result"
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "np.ones(10)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "#### Create an array of 10 fives"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 4,
 94 |    "metadata": {},
 95 |    "outputs": [
 96 |     {
 97 |      "data": {
 98 |       "text/plain": [
 99 |        "array([ 5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.])"
100 |       ]
101 |      },
102 |      "execution_count": 4,
103 |      "metadata": {},
104 |      "output_type": "execute_result"
105 |     }
106 |    ],
107 |    "source": [
108 |     "np.ones(10) * 5"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "#### Create an array of the integers from 10 to 50"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 5,
121 |    "metadata": {},
122 |    "outputs": [
123 |     {
124 |      "data": {
125 |       "text/plain": [
126 |        "array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n",
127 |        "       27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,\n",
128 |        "       44, 45, 46, 47, 48, 49, 50])"
129 |       ]
130 |      },
131 |      "execution_count": 5,
132 |      "metadata": {},
133 |      "output_type": "execute_result"
134 |     }
135 |    ],
136 |    "source": [
137 |     "np.arange(10,51)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "#### Create an array of all the even integers from 10 to 50"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 6,
150 |    "metadata": {},
151 |    "outputs": [
152 |     {
153 |      "data": {
154 |       "text/plain": [
155 |        "array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42,\n",
156 |        "       44, 46, 48, 50])"
157 |       ]
158 |      },
159 |      "execution_count": 6,
160 |      "metadata": {},
161 |      "output_type": "execute_result"
162 |     }
163 |    ],
164 |    "source": [
165 |     "np.arange(10,51,2)"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "metadata": {},
171 |    "source": [
172 |     "#### Create a 3x3 matrix with values ranging from 0 to 8"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 7,
178 |    "metadata": {},
179 |    "outputs": [
180 |     {
181 |      "data": {
182 |       "text/plain": [
183 |        "array([[0, 1, 2],\n",
184 |        "       [3, 4, 5],\n",
185 |        "       [6, 7, 8]])"
186 |       ]
187 |      },
188 |      "execution_count": 7,
189 |      "metadata": {},
190 |      "output_type": "execute_result"
191 |     }
192 |    ],
193 |    "source": [
194 |     "np.arange(9).reshape(3,3)"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "markdown",
199 |    "metadata": {},
200 |    "source": [
201 |     "#### Create a 3x3 identity matrix"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 8,
207 |    "metadata": {},
208 |    "outputs": [
209 |     {
210 |      "data": {
211 |       "text/plain": [
212 |        "array([[ 1.,  0.,  0.],\n",
213 |        "       [ 0.,  1.,  0.],\n",
214 |        "       [ 0.,  0.,  1.]])"
215 |       ]
216 |      },
217 |      "execution_count": 8,
218 |      "metadata": {},
219 |      "output_type": "execute_result"
220 |     }
221 |    ],
222 |    "source": [
223 |     "np.eye(3)"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "markdown",
228 |    "metadata": {},
229 |    "source": [
230 |     "#### Use NumPy to generate a random number between 0 and 1"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": 15,
236 |    "metadata": {},
237 |    "outputs": [
238 |     {
239 |      "data": {
240 |       "text/plain": [
241 |        "array([ 0.42829726])"
242 |       ]
243 |      },
244 |      "execution_count": 15,
245 |      "metadata": {},
246 |      "output_type": "execute_result"
247 |     }
248 |    ],
249 |    "source": [
250 |     "np.random.rand(1)"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "markdown",
255 |    "metadata": {},
256 |    "source": [
257 |     "#### Use NumPy to generate an array of 25 random numbers sampled from a standard normal distribution"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 33,
263 |    "metadata": {},
264 |    "outputs": [
265 |     {
266 |      "data": {
267 |       "text/plain": [
268 |        "array([ 1.32031013,  1.6798602 , -0.42985892, -1.53116655,  0.85753232,\n",
269 |        "        0.87339938,  0.35668636, -1.47491157,  0.15349697,  0.99530727,\n",
270 |        "       -0.94865451, -1.69174783,  1.57525349, -0.70615234,  0.10991879,\n",
271 |        "       -0.49478947,  1.08279872,  0.76488333, -2.3039931 ,  0.35401124,\n",
272 |        "       -0.45454399, -0.64754649, -0.29391671,  0.02339861,  0.38272124])"
273 |       ]
274 |      },
275 |      "execution_count": 33,
276 |      "metadata": {},
277 |      "output_type": "execute_result"
278 |     }
279 |    ],
280 |    "source": [
281 |     "np.random.randn(25)"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {},
287 |    "source": [
288 |     "#### Create the following matrix:"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "code",
293 |    "execution_count": 35,
294 |    "metadata": {},
295 |    "outputs": [
296 |     {
297 |      "data": {
298 |       "text/plain": [
299 |        "array([[ 0.01,  0.02,  0.03,  0.04,  0.05,  0.06,  0.07,  0.08,  0.09,  0.1 ],\n",
300 |        "       [ 0.11,  0.12,  0.13,  0.14,  0.15,  0.16,  0.17,  0.18,  0.19,  0.2 ],\n",
301 |        "       [ 0.21,  0.22,  0.23,  0.24,  0.25,  0.26,  0.27,  0.28,  0.29,  0.3 ],\n",
302 |        "       [ 0.31,  0.32,  0.33,  0.34,  0.35,  0.36,  0.37,  0.38,  0.39,  0.4 ],\n",
303 |        "       [ 0.41,  0.42,  0.43,  0.44,  0.45,  0.46,  0.47,  0.48,  0.49,  0.5 ],\n",
304 |        "       [ 0.51,  0.52,  0.53,  0.54,  0.55,  0.56,  0.57,  0.58,  0.59,  0.6 ],\n",
305 |        "       [ 0.61,  0.62,  0.63,  0.64,  0.65,  0.66,  0.67,  0.68,  0.69,  0.7 ],\n",
306 |        "       [ 0.71,  0.72,  0.73,  0.74,  0.75,  0.76,  0.77,  0.78,  0.79,  0.8 ],\n",
307 |        "       [ 0.81,  0.82,  0.83,  0.84,  0.85,  0.86,  0.87,  0.88,  0.89,  0.9 ],\n",
308 |        "       [ 0.91,  0.92,  0.93,  0.94,  0.95,  0.96,  0.97,  0.98,  0.99,  1.  ]])"
309 |       ]
310 |      },
311 |      "execution_count": 35,
312 |      "metadata": {},
313 |      "output_type": "execute_result"
314 |     }
315 |    ],
316 |    "source": [
317 |     "\n"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "markdown",
322 |    "metadata": {},
323 |    "source": [
324 |     "#### Create an array of 20 linearly spaced points between 0 and 1:"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": 36,
330 |    "metadata": {},
331 |    "outputs": [
332 |     {
333 |      "data": {
334 |       "text/plain": [
335 |        "array([ 0.        ,  0.05263158,  0.10526316,  0.15789474,  0.21052632,\n",
336 |        "        0.26315789,  0.31578947,  0.36842105,  0.42105263,  0.47368421,\n",
337 |        "        0.52631579,  0.57894737,  0.63157895,  0.68421053,  0.73684211,\n",
338 |        "        0.78947368,  0.84210526,  0.89473684,  0.94736842,  1.        ])"
339 |       ]
340 |      },
341 |      "execution_count": 36,
342 |      "metadata": {},
343 |      "output_type": "execute_result"
344 |     }
345 |    ],
346 |    "source": [
347 |     "np.linspace(0,1,20)"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "markdown",
352 |    "metadata": {},
353 |    "source": [
354 |     "## Numpy Indexing and Selection\n",
355 |     "\n",
356 |     "Now you will be given a few matrices, and be asked to replicate the resulting matrix outputs:"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "code",
361 |    "execution_count": 38,
362 |    "metadata": {},
363 |    "outputs": [
364 |     {
365 |      "data": {
366 |       "text/plain": [
367 |        "array([[ 1,  2,  3,  4,  5],\n",
368 |        "       [ 6,  7,  8,  9, 10],\n",
369 |        "       [11, 12, 13, 14, 15],\n",
370 |        "       [16, 17, 18, 19, 20],\n",
371 |        "       [21, 22, 23, 24, 25]])"
372 |       ]
373 |      },
374 |      "execution_count": 38,
375 |      "metadata": {},
376 |      "output_type": "execute_result"
377 |     }
378 |    ],
379 |    "source": [
380 |     "mat = np.arange(1,26).reshape(5,5)\n",
381 |     "mat"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "code",
386 |    "execution_count": 39,
387 |    "metadata": {
388 |     "collapsed": true
389 |    },
390 |    "outputs": [],
391 |    "source": [
392 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
393 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
394 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": 40,
400 |    "metadata": {},
401 |    "outputs": [
402 |     {
403 |      "data": {
404 |       "text/plain": [
405 |        "array([[12, 13, 14, 15],\n",
406 |        "       [17, 18, 19, 20],\n",
407 |        "       [22, 23, 24, 25]])"
408 |       ]
409 |      },
410 |      "execution_count": 40,
411 |      "metadata": {},
412 |      "output_type": "execute_result"
413 |     }
414 |    ],
415 |    "source": [
416 |     "mat[2:,1:]"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "code",
421 |    "execution_count": 29,
422 |    "metadata": {
423 |     "collapsed": true
424 |    },
425 |    "outputs": [],
426 |    "source": [
427 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
428 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
429 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
430 |    ]
431 |   },
432 |   {
433 |    "cell_type": "code",
434 |    "execution_count": 41,
435 |    "metadata": {},
436 |    "outputs": [
437 |     {
438 |      "data": {
439 |       "text/plain": [
440 |        "20"
441 |       ]
442 |      },
443 |      "execution_count": 41,
444 |      "metadata": {},
445 |      "output_type": "execute_result"
446 |     }
447 |    ],
448 |    "source": [
449 |     "mat[3,4]"
450 |    ]
451 |   },
452 |   {
453 |    "cell_type": "code",
454 |    "execution_count": 30,
455 |    "metadata": {
456 |     "collapsed": true
457 |    },
458 |    "outputs": [],
459 |    "source": [
460 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
461 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
462 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
463 |    ]
464 |   },
465 |   {
466 |    "cell_type": "code",
467 |    "execution_count": 42,
468 |    "metadata": {},
469 |    "outputs": [
470 |     {
471 |      "data": {
472 |       "text/plain": [
473 |        "array([[ 2],\n",
474 |        "       [ 7],\n",
475 |        "       [12]])"
476 |       ]
477 |      },
478 |      "execution_count": 42,
479 |      "metadata": {},
480 |      "output_type": "execute_result"
481 |     }
482 |    ],
483 |    "source": [
484 |     "mat[:3,1:2]"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "code",
489 |    "execution_count": 31,
490 |    "metadata": {
491 |     "collapsed": true
492 |    },
493 |    "outputs": [],
494 |    "source": [
495 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
496 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
497 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
498 |    ]
499 |   },
500 |   {
501 |    "cell_type": "code",
502 |    "execution_count": 46,
503 |    "metadata": {},
504 |    "outputs": [
505 |     {
506 |      "data": {
507 |       "text/plain": [
508 |        "array([21, 22, 23, 24, 25])"
509 |       ]
510 |      },
511 |      "execution_count": 46,
512 |      "metadata": {},
513 |      "output_type": "execute_result"
514 |     }
515 |    ],
516 |    "source": [
517 |     "mat[4,:]"
518 |    ]
519 |   },
520 |   {
521 |    "cell_type": "code",
522 |    "execution_count": 32,
523 |    "metadata": {
524 |     "collapsed": true
525 |    },
526 |    "outputs": [],
527 |    "source": [
528 |     "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n",
529 |     "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n",
530 |     "# BE ABLE TO SEE THE OUTPUT ANY MORE"
531 |    ]
532 |   },
533 |   {
534 |    "cell_type": "code",
535 |    "execution_count": 49,
536 |    "metadata": {},
537 |    "outputs": [
538 |     {
539 |      "data": {
540 |       "text/plain": [
541 |        "array([[16, 17, 18, 19, 20],\n",
542 |        "       [21, 22, 23, 24, 25]])"
543 |       ]
544 |      },
545 |      "execution_count": 49,
546 |      "metadata": {},
547 |      "output_type": "execute_result"
548 |     }
549 |    ],
550 |    "source": [
551 |     "mat[3:5,:]"
552 |    ]
553 |   },
554 |   {
555 |    "cell_type": "markdown",
556 |    "metadata": {},
557 |    "source": [
558 |     "### Now do the following"
559 |    ]
560 |   },
561 |   {
562 |    "cell_type": "markdown",
563 |    "metadata": {},
564 |    "source": [
565 |     "#### Get the sum of all the values in mat"
566 |    ]
567 |   },
568 |   {
569 |    "cell_type": "code",
570 |    "execution_count": 50,
571 |    "metadata": {},
572 |    "outputs": [
573 |     {
574 |      "data": {
575 |       "text/plain": [
576 |        "325"
577 |       ]
578 |      },
579 |      "execution_count": 50,
580 |      "metadata": {},
581 |      "output_type": "execute_result"
582 |     }
583 |    ],
584 |    "source": [
585 |     "mat.sum()"
586 |    ]
587 |   },
588 |   {
589 |    "cell_type": "markdown",
590 |    "metadata": {},
591 |    "source": [
592 |     "#### Get the standard deviation of the values in mat"
593 |    ]
594 |   },
595 |   {
596 |    "cell_type": "code",
597 |    "execution_count": 51,
598 |    "metadata": {},
599 |    "outputs": [
600 |     {
601 |      "data": {
602 |       "text/plain": [
603 |        "7.2111025509279782"
604 |       ]
605 |      },
606 |      "execution_count": 51,
607 |      "metadata": {},
608 |      "output_type": "execute_result"
609 |     }
610 |    ],
611 |    "source": [
612 |     "mat.std()"
613 |    ]
614 |   },
615 |   {
616 |    "cell_type": "markdown",
617 |    "metadata": {},
618 |    "source": [
619 |     "#### Get the sum of all the columns in mat"
620 |    ]
621 |   },
622 |   {
623 |    "cell_type": "code",
624 |    "execution_count": 53,
625 |    "metadata": {},
626 |    "outputs": [
627 |     {
628 |      "data": {
629 |       "text/plain": [
630 |        "array([55, 60, 65, 70, 75])"
631 |       ]
632 |      },
633 |      "execution_count": 53,
634 |      "metadata": {},
635 |      "output_type": "execute_result"
636 |     }
637 |    ],
638 |    "source": [
639 |     "mat.sum(axis=0)"
640 |    ]
641 |   },
642 |   {
643 |    "cell_type": "markdown",
644 |    "metadata": {
645 |     "collapsed": true
646 |    },
647 |    "source": [
648 |     "# Great Job!"
649 |    ]
650 |   }
651 |  ],
652 |  "metadata": {
653 |   "kernelspec": {
654 |    "display_name": "Python 3",
655 |    "language": "python",
656 |    "name": "python3"
657 |   },
658 |   "language_info": {
659 |    "codemirror_mode": {
660 |     "name": "ipython",
661 |     "version": 3
662 |    },
663 |    "file_extension": ".py",
664 |    "mimetype": "text/x-python",
665 |    "name": "python",
666 |    "nbconvert_exporter": "python",
667 |    "pygments_lexer": "ipython3",
668 |    "version": "3.7.6"
669 |   }
670 |  },
671 |  "nbformat": 4,
672 |  "nbformat_minor": 1
673 | }
674 | 


--------------------------------------------------------------------------------
/02-Python-for-Data-Analysis-NumPy/02-Numpy Indexing and Selection.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# NumPy Indexing and Selection\n",
  8 |     "\n",
  9 |     "In this lecture we will discuss how to select elements or groups of elements from an array."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 2,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 3,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "#Creating sample array\n",
 28 |     "arr = np.arange(0,11)"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 4,
 34 |    "metadata": {},
 35 |    "outputs": [
 36 |     {
 37 |      "data": {
 38 |       "text/plain": [
 39 |        "array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])"
 40 |       ]
 41 |      },
 42 |      "execution_count": 4,
 43 |      "metadata": {},
 44 |      "output_type": "execute_result"
 45 |     }
 46 |    ],
 47 |    "source": [
 48 |     "#Show\n",
 49 |     "arr"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "## Bracket Indexing and Selection\n",
 57 |     "The simplest way to pick one or some elements of an array looks very similar to python lists:"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 5,
 63 |    "metadata": {},
 64 |    "outputs": [
 65 |     {
 66 |      "data": {
 67 |       "text/plain": [
 68 |        "8"
 69 |       ]
 70 |      },
 71 |      "execution_count": 5,
 72 |      "metadata": {},
 73 |      "output_type": "execute_result"
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "#Get a value at an index\n",
 78 |     "arr[8]"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 6,
 84 |    "metadata": {},
 85 |    "outputs": [
 86 |     {
 87 |      "data": {
 88 |       "text/plain": [
 89 |        "array([1, 2, 3, 4])"
 90 |       ]
 91 |      },
 92 |      "execution_count": 6,
 93 |      "metadata": {},
 94 |      "output_type": "execute_result"
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "#Get values in a range\n",
 99 |     "arr[1:5]"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 7,
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "data": {
109 |       "text/plain": [
110 |        "array([0, 1, 2, 3, 4])"
111 |       ]
112 |      },
113 |      "execution_count": 7,
114 |      "metadata": {},
115 |      "output_type": "execute_result"
116 |     }
117 |    ],
118 |    "source": [
119 |     "#Get values in a range\n",
120 |     "arr[0:5]"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "## Broadcasting\n",
128 |     "\n",
129 |     "Numpy arrays differ from a normal Python list because of their ability to broadcast:"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 8,
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "data": {
139 |       "text/plain": [
140 |        "array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])"
141 |       ]
142 |      },
143 |      "execution_count": 8,
144 |      "metadata": {},
145 |      "output_type": "execute_result"
146 |     }
147 |    ],
148 |    "source": [
149 |     "#Setting a value with index range (Broadcasting)\n",
150 |     "arr[0:5]=100\n",
151 |     "\n",
152 |     "#Show\n",
153 |     "arr"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 9,
159 |    "metadata": {},
160 |    "outputs": [
161 |     {
162 |      "data": {
163 |       "text/plain": [
164 |        "array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])"
165 |       ]
166 |      },
167 |      "execution_count": 9,
168 |      "metadata": {},
169 |      "output_type": "execute_result"
170 |     }
171 |    ],
172 |    "source": [
173 |     "# Reset array, we'll see why I had to reset in  a moment\n",
174 |     "arr = np.arange(0,11)\n",
175 |     "\n",
176 |     "#Show\n",
177 |     "arr"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 10,
183 |    "metadata": {},
184 |    "outputs": [
185 |     {
186 |      "data": {
187 |       "text/plain": [
188 |        "array([0, 1, 2, 3, 4, 5])"
189 |       ]
190 |      },
191 |      "execution_count": 10,
192 |      "metadata": {},
193 |      "output_type": "execute_result"
194 |     }
195 |    ],
196 |    "source": [
197 |     "#Important notes on Slices\n",
198 |     "slice_of_arr = arr[0:6]\n",
199 |     "\n",
200 |     "#Show slice\n",
201 |     "slice_of_arr"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 11,
207 |    "metadata": {},
208 |    "outputs": [
209 |     {
210 |      "data": {
211 |       "text/plain": [
212 |        "array([99, 99, 99, 99, 99, 99])"
213 |       ]
214 |      },
215 |      "execution_count": 11,
216 |      "metadata": {},
217 |      "output_type": "execute_result"
218 |     }
219 |    ],
220 |    "source": [
221 |     "#Change Slice\n",
222 |     "slice_of_arr[:]=99\n",
223 |     "\n",
224 |     "#Show Slice again\n",
225 |     "slice_of_arr"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "markdown",
230 |    "metadata": {},
231 |    "source": [
232 |     "Now note the changes also occur in our original array!"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": 12,
238 |    "metadata": {},
239 |    "outputs": [
240 |     {
241 |      "data": {
242 |       "text/plain": [
243 |        "array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])"
244 |       ]
245 |      },
246 |      "execution_count": 12,
247 |      "metadata": {},
248 |      "output_type": "execute_result"
249 |     }
250 |    ],
251 |    "source": [
252 |     "arr"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "markdown",
257 |    "metadata": {},
258 |    "source": [
259 |     "Data is not copied, it's a view of the original array! This avoids memory problems!"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": 13,
265 |    "metadata": {},
266 |    "outputs": [
267 |     {
268 |      "data": {
269 |       "text/plain": [
270 |        "array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])"
271 |       ]
272 |      },
273 |      "execution_count": 13,
274 |      "metadata": {},
275 |      "output_type": "execute_result"
276 |     }
277 |    ],
278 |    "source": [
279 |     "#To get a copy, need to be explicit\n",
280 |     "arr_copy = arr.copy()\n",
281 |     "\n",
282 |     "arr_copy"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "markdown",
287 |    "metadata": {},
288 |    "source": [
289 |     "## Indexing a 2D array (matrices)\n",
290 |     "\n",
291 |     "The general format is **arr_2d[row][col]** or **arr_2d[row,col]**. I recommend usually using the comma notation for clarity."
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": 14,
297 |    "metadata": {},
298 |    "outputs": [
299 |     {
300 |      "data": {
301 |       "text/plain": [
302 |        "array([[ 5, 10, 15],\n",
303 |        "       [20, 25, 30],\n",
304 |        "       [35, 40, 45]])"
305 |       ]
306 |      },
307 |      "execution_count": 14,
308 |      "metadata": {},
309 |      "output_type": "execute_result"
310 |     }
311 |    ],
312 |    "source": [
313 |     "arr_2d = np.array(([5,10,15],[20,25,30],[35,40,45]))\n",
314 |     "\n",
315 |     "#Show\n",
316 |     "arr_2d"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": 15,
322 |    "metadata": {},
323 |    "outputs": [
324 |     {
325 |      "data": {
326 |       "text/plain": [
327 |        "array([20, 25, 30])"
328 |       ]
329 |      },
330 |      "execution_count": 15,
331 |      "metadata": {},
332 |      "output_type": "execute_result"
333 |     }
334 |    ],
335 |    "source": [
336 |     "#Indexing row\n",
337 |     "arr_2d[1]\n"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": 16,
343 |    "metadata": {},
344 |    "outputs": [
345 |     {
346 |      "data": {
347 |       "text/plain": [
348 |        "20"
349 |       ]
350 |      },
351 |      "execution_count": 16,
352 |      "metadata": {},
353 |      "output_type": "execute_result"
354 |     }
355 |    ],
356 |    "source": [
357 |     "# Format is arr_2d[row][col] or arr_2d[row,col]\n",
358 |     "\n",
359 |     "# Getting individual element value\n",
360 |     "arr_2d[1][0]"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "code",
365 |    "execution_count": 17,
366 |    "metadata": {},
367 |    "outputs": [
368 |     {
369 |      "data": {
370 |       "text/plain": [
371 |        "20"
372 |       ]
373 |      },
374 |      "execution_count": 17,
375 |      "metadata": {},
376 |      "output_type": "execute_result"
377 |     }
378 |    ],
379 |    "source": [
380 |     "# Getting individual element value\n",
381 |     "arr_2d[1,0]"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "code",
386 |    "execution_count": 18,
387 |    "metadata": {},
388 |    "outputs": [
389 |     {
390 |      "data": {
391 |       "text/plain": [
392 |        "array([[10, 15],\n",
393 |        "       [25, 30]])"
394 |       ]
395 |      },
396 |      "execution_count": 18,
397 |      "metadata": {},
398 |      "output_type": "execute_result"
399 |     }
400 |    ],
401 |    "source": [
402 |     "# 2D array slicing\n",
403 |     "\n",
404 |     "#Shape (2,2) from top right corner\n",
405 |     "arr_2d[:2,1:]"
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": 19,
411 |    "metadata": {},
412 |    "outputs": [
413 |     {
414 |      "data": {
415 |       "text/plain": [
416 |        "array([35, 40, 45])"
417 |       ]
418 |      },
419 |      "execution_count": 19,
420 |      "metadata": {},
421 |      "output_type": "execute_result"
422 |     }
423 |    ],
424 |    "source": [
425 |     "#Shape bottom row\n",
426 |     "arr_2d[2]"
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "code",
431 |    "execution_count": 20,
432 |    "metadata": {},
433 |    "outputs": [
434 |     {
435 |      "data": {
436 |       "text/plain": [
437 |        "array([35, 40, 45])"
438 |       ]
439 |      },
440 |      "execution_count": 20,
441 |      "metadata": {},
442 |      "output_type": "execute_result"
443 |     }
444 |    ],
445 |    "source": [
446 |     "#Shape bottom row\n",
447 |     "arr_2d[2,:]"
448 |    ]
449 |   },
450 |   {
451 |    "cell_type": "markdown",
452 |    "metadata": {},
453 |    "source": [
454 |     "### Fancy Indexing\n",
455 |     "\n",
456 |     "Fancy indexing allows you to select entire rows or columns out of order,to show this, let's quickly build out a numpy array:"
457 |    ]
458 |   },
459 |   {
460 |    "cell_type": "code",
461 |    "execution_count": 21,
462 |    "metadata": {},
463 |    "outputs": [],
464 |    "source": [
465 |     "#Set up matrix\n",
466 |     "arr2d = np.zeros((10,10))"
467 |    ]
468 |   },
469 |   {
470 |    "cell_type": "code",
471 |    "execution_count": 22,
472 |    "metadata": {},
473 |    "outputs": [],
474 |    "source": [
475 |     "#Length of array\n",
476 |     "arr_length = arr2d.shape[1]"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "code",
481 |    "execution_count": 23,
482 |    "metadata": {},
483 |    "outputs": [
484 |     {
485 |      "data": {
486 |       "text/plain": [
487 |        "array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],\n",
488 |        "       [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],\n",
489 |        "       [ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.],\n",
490 |        "       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],\n",
491 |        "       [ 4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.],\n",
492 |        "       [ 5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.],\n",
493 |        "       [ 6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.],\n",
494 |        "       [ 7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.],\n",
495 |        "       [ 8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.],\n",
496 |        "       [ 9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.]])"
497 |       ]
498 |      },
499 |      "execution_count": 23,
500 |      "metadata": {},
501 |      "output_type": "execute_result"
502 |     }
503 |    ],
504 |    "source": [
505 |     "#Set up array\n",
506 |     "\n",
507 |     "for i in range(arr_length):\n",
508 |     "    arr2d[i] = i\n",
509 |     "    \n",
510 |     "arr2d"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "markdown",
515 |    "metadata": {},
516 |    "source": [
517 |     "Fancy indexing allows the following"
518 |    ]
519 |   },
520 |   {
521 |    "cell_type": "code",
522 |    "execution_count": 24,
523 |    "metadata": {},
524 |    "outputs": [
525 |     {
526 |      "data": {
527 |       "text/plain": [
528 |        "array([[ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.],\n",
529 |        "       [ 4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.],\n",
530 |        "       [ 6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.],\n",
531 |        "       [ 8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.]])"
532 |       ]
533 |      },
534 |      "execution_count": 24,
535 |      "metadata": {},
536 |      "output_type": "execute_result"
537 |     }
538 |    ],
539 |    "source": [
540 |     "arr2d[[2,4,6,8]]"
541 |    ]
542 |   },
543 |   {
544 |    "cell_type": "code",
545 |    "execution_count": 25,
546 |    "metadata": {},
547 |    "outputs": [
548 |     {
549 |      "data": {
550 |       "text/plain": [
551 |        "array([[ 6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.],\n",
552 |        "       [ 4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.],\n",
553 |        "       [ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.],\n",
554 |        "       [ 7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.]])"
555 |       ]
556 |      },
557 |      "execution_count": 25,
558 |      "metadata": {},
559 |      "output_type": "execute_result"
560 |     }
561 |    ],
562 |    "source": [
563 |     "#Allows in any order\n",
564 |     "arr2d[[6,4,2,7]]"
565 |    ]
566 |   },
567 |   {
568 |    "cell_type": "markdown",
569 |    "metadata": {},
570 |    "source": [
571 |     "## Selection\n",
572 |     "\n",
573 |     "Let's briefly go over how to use brackets for selection based off of comparison operators."
574 |    ]
575 |   },
576 |   {
577 |    "cell_type": "code",
578 |    "execution_count": 28,
579 |    "metadata": {},
580 |    "outputs": [
581 |     {
582 |      "data": {
583 |       "text/plain": [
584 |        "array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])"
585 |       ]
586 |      },
587 |      "execution_count": 28,
588 |      "metadata": {},
589 |      "output_type": "execute_result"
590 |     }
591 |    ],
592 |    "source": [
593 |     "arr = np.arange(1,11)\n",
594 |     "arr"
595 |    ]
596 |   },
597 |   {
598 |    "cell_type": "code",
599 |    "execution_count": 30,
600 |    "metadata": {},
601 |    "outputs": [
602 |     {
603 |      "data": {
604 |       "text/plain": [
605 |        "array([False, False, False, False,  True,  True,  True,  True,  True,  True], dtype=bool)"
606 |       ]
607 |      },
608 |      "execution_count": 30,
609 |      "metadata": {},
610 |      "output_type": "execute_result"
611 |     }
612 |    ],
613 |    "source": [
614 |     "arr > 4"
615 |    ]
616 |   },
617 |   {
618 |    "cell_type": "code",
619 |    "execution_count": 31,
620 |    "metadata": {
621 |     "collapsed": true
622 |    },
623 |    "outputs": [],
624 |    "source": [
625 |     "bool_arr = arr>4"
626 |    ]
627 |   },
628 |   {
629 |    "cell_type": "code",
630 |    "execution_count": 32,
631 |    "metadata": {},
632 |    "outputs": [
633 |     {
634 |      "data": {
635 |       "text/plain": [
636 |        "array([False, False, False, False,  True,  True,  True,  True,  True,  True], dtype=bool)"
637 |       ]
638 |      },
639 |      "execution_count": 32,
640 |      "metadata": {},
641 |      "output_type": "execute_result"
642 |     }
643 |    ],
644 |    "source": [
645 |     "bool_arr"
646 |    ]
647 |   },
648 |   {
649 |    "cell_type": "code",
650 |    "execution_count": 33,
651 |    "metadata": {},
652 |    "outputs": [
653 |     {
654 |      "data": {
655 |       "text/plain": [
656 |        "array([ 5,  6,  7,  8,  9, 10])"
657 |       ]
658 |      },
659 |      "execution_count": 33,
660 |      "metadata": {},
661 |      "output_type": "execute_result"
662 |     }
663 |    ],
664 |    "source": [
665 |     "arr[bool_arr]"
666 |    ]
667 |   },
668 |   {
669 |    "cell_type": "code",
670 |    "execution_count": 34,
671 |    "metadata": {},
672 |    "outputs": [
673 |     {
674 |      "data": {
675 |       "text/plain": [
676 |        "array([ 3,  4,  5,  6,  7,  8,  9, 10])"
677 |       ]
678 |      },
679 |      "execution_count": 34,
680 |      "metadata": {},
681 |      "output_type": "execute_result"
682 |     }
683 |    ],
684 |    "source": [
685 |     "arr[arr>2]"
686 |    ]
687 |   },
688 |   {
689 |    "cell_type": "code",
690 |    "execution_count": 37,
691 |    "metadata": {},
692 |    "outputs": [
693 |     {
694 |      "data": {
695 |       "text/plain": [
696 |        "array([ 3,  4,  5,  6,  7,  8,  9, 10])"
697 |       ]
698 |      },
699 |      "execution_count": 37,
700 |      "metadata": {},
701 |      "output_type": "execute_result"
702 |     }
703 |    ],
704 |    "source": [
705 |     "x = 2\n",
706 |     "arr[arr>x]"
707 |    ]
708 |   },
709 |   {
710 |    "cell_type": "markdown",
711 |    "metadata": {},
712 |    "source": [
713 |     "# Great Job!\n"
714 |    ]
715 |   }
716 |  ],
717 |  "metadata": {
718 |   "kernelspec": {
719 |    "display_name": "Python 3",
720 |    "language": "python",
721 |    "name": "python3"
722 |   },
723 |   "language_info": {
724 |    "codemirror_mode": {
725 |     "name": "ipython",
726 |     "version": 3
727 |    },
728 |    "file_extension": ".py",
729 |    "mimetype": "text/x-python",
730 |    "name": "python",
731 |    "nbconvert_exporter": "python",
732 |    "pygments_lexer": "ipython3",
733 |    "version": "3.7.6"
734 |   }
735 |  },
736 |  "nbformat": 4,
737 |  "nbformat_minor": 1
738 | }
739 | 


--------------------------------------------------------------------------------
/04-Pandas-Exercises/03-Ecommerce Purchases Exercise .ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "___\n",
  8 |     "\n",
  9 |     "<a href='http://www.pieriandata.com'> <img src='../../Pierian_Data_Logo.png' /></a>\n",
 10 |     "___\n",
 11 |     "# Ecommerce Purchases Exercise\n",
 12 |     "\n",
 13 |     "In this Exercise you will be given some Fake Data about some purchases done through Amazon! Just go ahead and follow the directions and try your best to answer the questions and complete the tasks. Feel free to reference the solutions. Most of the tasks can be solved in different ways. For the most part, the questions get progressively harder.\n",
 14 |     "\n",
 15 |     "Please excuse anything that doesn't make \"Real-World\" sense in the dataframe, all the data is fake and made-up.\n",
 16 |     "\n",
 17 |     "Also note that all of these questions can be answered with one line of code.\n",
 18 |     "____\n",
 19 |     "** Import pandas and read in the Ecommerce Purchases csv file and set it to a DataFrame called ecom. **"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 1,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "import pandas as pd"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 86,
 34 |    "metadata": {
 35 |     "collapsed": true
 36 |    },
 37 |    "outputs": [],
 38 |    "source": []
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "**Check the head of the DataFrame.**"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 87,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "data": {
 54 |       "text/html": [
 55 |        "<div>\n",
 56 |        "<table border=\"1\" class=\"dataframe\">\n",
 57 |        "  <thead>\n",
 58 |        "    <tr style=\"text-align: right;\">\n",
 59 |        "      <th></th>\n",
 60 |        "      <th>Address</th>\n",
 61 |        "      <th>Lot</th>\n",
 62 |        "      <th>AM or PM</th>\n",
 63 |        "      <th>Browser Info</th>\n",
 64 |        "      <th>Company</th>\n",
 65 |        "      <th>Credit Card</th>\n",
 66 |        "      <th>CC Exp Date</th>\n",
 67 |        "      <th>CC Security Code</th>\n",
 68 |        "      <th>CC Provider</th>\n",
 69 |        "      <th>Email</th>\n",
 70 |        "      <th>Job</th>\n",
 71 |        "      <th>IP Address</th>\n",
 72 |        "      <th>Language</th>\n",
 73 |        "      <th>Purchase Price</th>\n",
 74 |        "    </tr>\n",
 75 |        "  </thead>\n",
 76 |        "  <tbody>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>0</th>\n",
 79 |        "      <td>16629 Pace Camp Apt. 448\\nAlexisborough, NE 77...</td>\n",
 80 |        "      <td>46 in</td>\n",
 81 |        "      <td>PM</td>\n",
 82 |        "      <td>Opera/9.56.(X11; Linux x86_64; sl-SI) Presto/2...</td>\n",
 83 |        "      <td>Martinez-Herman</td>\n",
 84 |        "      <td>6011929061123406</td>\n",
 85 |        "      <td>02/20</td>\n",
 86 |        "      <td>900</td>\n",
 87 |        "      <td>JCB 16 digit</td>\n",
 88 |        "      <td>pdunlap@yahoo.com</td>\n",
 89 |        "      <td>Scientist, product/process development</td>\n",
 90 |        "      <td>149.146.147.205</td>\n",
 91 |        "      <td>el</td>\n",
 92 |        "      <td>98.14</td>\n",
 93 |        "    </tr>\n",
 94 |        "    <tr>\n",
 95 |        "      <th>1</th>\n",
 96 |        "      <td>9374 Jasmine Spurs Suite 508\\nSouth John, TN 8...</td>\n",
 97 |        "      <td>28 rn</td>\n",
 98 |        "      <td>PM</td>\n",
 99 |        "      <td>Opera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr...</td>\n",
100 |        "      <td>Fletcher, Richards and Whitaker</td>\n",
101 |        "      <td>3337758169645356</td>\n",
102 |        "      <td>11/18</td>\n",
103 |        "      <td>561</td>\n",
104 |        "      <td>Mastercard</td>\n",
105 |        "      <td>anthony41@reed.com</td>\n",
106 |        "      <td>Drilling engineer</td>\n",
107 |        "      <td>15.160.41.51</td>\n",
108 |        "      <td>fr</td>\n",
109 |        "      <td>70.73</td>\n",
110 |        "    </tr>\n",
111 |        "    <tr>\n",
112 |        "      <th>2</th>\n",
113 |        "      <td>Unit 0065 Box 5052\\nDPO AP 27450</td>\n",
114 |        "      <td>94 vE</td>\n",
115 |        "      <td>PM</td>\n",
116 |        "      <td>Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ...</td>\n",
117 |        "      <td>Simpson, Williams and Pham</td>\n",
118 |        "      <td>675957666125</td>\n",
119 |        "      <td>08/19</td>\n",
120 |        "      <td>699</td>\n",
121 |        "      <td>JCB 16 digit</td>\n",
122 |        "      <td>amymiller@morales-harrison.com</td>\n",
123 |        "      <td>Customer service manager</td>\n",
124 |        "      <td>132.207.160.22</td>\n",
125 |        "      <td>de</td>\n",
126 |        "      <td>0.95</td>\n",
127 |        "    </tr>\n",
128 |        "    <tr>\n",
129 |        "      <th>3</th>\n",
130 |        "      <td>7780 Julia Fords\\nNew Stacy, WA 45798</td>\n",
131 |        "      <td>36 vm</td>\n",
132 |        "      <td>PM</td>\n",
133 |        "      <td>Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ...</td>\n",
134 |        "      <td>Williams, Marshall and Buchanan</td>\n",
135 |        "      <td>6011578504430710</td>\n",
136 |        "      <td>02/24</td>\n",
137 |        "      <td>384</td>\n",
138 |        "      <td>Discover</td>\n",
139 |        "      <td>brent16@olson-robinson.info</td>\n",
140 |        "      <td>Drilling engineer</td>\n",
141 |        "      <td>30.250.74.19</td>\n",
142 |        "      <td>es</td>\n",
143 |        "      <td>78.04</td>\n",
144 |        "    </tr>\n",
145 |        "    <tr>\n",
146 |        "      <th>4</th>\n",
147 |        "      <td>23012 Munoz Drive Suite 337\\nNew Cynthia, TX 5...</td>\n",
148 |        "      <td>20 IE</td>\n",
149 |        "      <td>AM</td>\n",
150 |        "      <td>Opera/9.58.(X11; Linux x86_64; it-IT) Presto/2...</td>\n",
151 |        "      <td>Brown, Watson and Andrews</td>\n",
152 |        "      <td>6011456623207998</td>\n",
153 |        "      <td>10/25</td>\n",
154 |        "      <td>678</td>\n",
155 |        "      <td>Diners Club / Carte Blanche</td>\n",
156 |        "      <td>christopherwright@gmail.com</td>\n",
157 |        "      <td>Fine artist</td>\n",
158 |        "      <td>24.140.33.94</td>\n",
159 |        "      <td>es</td>\n",
160 |        "      <td>77.82</td>\n",
161 |        "    </tr>\n",
162 |        "  </tbody>\n",
163 |        "</table>\n",
164 |        "</div>"
165 |       ],
166 |       "text/plain": [
167 |        "                                             Address    Lot AM or PM  \\\n",
168 |        "0  16629 Pace Camp Apt. 448\\nAlexisborough, NE 77...  46 in       PM   \n",
169 |        "1  9374 Jasmine Spurs Suite 508\\nSouth John, TN 8...  28 rn       PM   \n",
170 |        "2                   Unit 0065 Box 5052\\nDPO AP 27450  94 vE       PM   \n",
171 |        "3              7780 Julia Fords\\nNew Stacy, WA 45798  36 vm       PM   \n",
172 |        "4  23012 Munoz Drive Suite 337\\nNew Cynthia, TX 5...  20 IE       AM   \n",
173 |        "\n",
174 |        "                                        Browser Info  \\\n",
175 |        "0  Opera/9.56.(X11; Linux x86_64; sl-SI) Presto/2...   \n",
176 |        "1  Opera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr...   \n",
177 |        "2  Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ...   \n",
178 |        "3  Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ...   \n",
179 |        "4  Opera/9.58.(X11; Linux x86_64; it-IT) Presto/2...   \n",
180 |        "\n",
181 |        "                           Company       Credit Card CC Exp Date  \\\n",
182 |        "0                  Martinez-Herman  6011929061123406       02/20   \n",
183 |        "1  Fletcher, Richards and Whitaker  3337758169645356       11/18   \n",
184 |        "2       Simpson, Williams and Pham      675957666125       08/19   \n",
185 |        "3  Williams, Marshall and Buchanan  6011578504430710       02/24   \n",
186 |        "4        Brown, Watson and Andrews  6011456623207998       10/25   \n",
187 |        "\n",
188 |        "   CC Security Code                  CC Provider  \\\n",
189 |        "0               900                 JCB 16 digit   \n",
190 |        "1               561                   Mastercard   \n",
191 |        "2               699                 JCB 16 digit   \n",
192 |        "3               384                     Discover   \n",
193 |        "4               678  Diners Club / Carte Blanche   \n",
194 |        "\n",
195 |        "                            Email                                     Job  \\\n",
196 |        "0               pdunlap@yahoo.com  Scientist, product/process development   \n",
197 |        "1              anthony41@reed.com                       Drilling engineer   \n",
198 |        "2  amymiller@morales-harrison.com                Customer service manager   \n",
199 |        "3     brent16@olson-robinson.info                       Drilling engineer   \n",
200 |        "4     christopherwright@gmail.com                             Fine artist   \n",
201 |        "\n",
202 |        "        IP Address Language  Purchase Price  \n",
203 |        "0  149.146.147.205       el           98.14  \n",
204 |        "1     15.160.41.51       fr           70.73  \n",
205 |        "2   132.207.160.22       de            0.95  \n",
206 |        "3     30.250.74.19       es           78.04  \n",
207 |        "4     24.140.33.94       es           77.82  "
208 |       ]
209 |      },
210 |      "execution_count": 87,
211 |      "metadata": {},
212 |      "output_type": "execute_result"
213 |     }
214 |    ],
215 |    "source": []
216 |   },
217 |   {
218 |    "cell_type": "markdown",
219 |    "metadata": {},
220 |    "source": [
221 |     "** How many rows and columns are there? **"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 88,
227 |    "metadata": {},
228 |    "outputs": [
229 |     {
230 |      "name": "stdout",
231 |      "output_type": "stream",
232 |      "text": [
233 |       "<class 'pandas.core.frame.DataFrame'>\n",
234 |       "RangeIndex: 10000 entries, 0 to 9999\n",
235 |       "Data columns (total 14 columns):\n",
236 |       "Address             10000 non-null object\n",
237 |       "Lot                 10000 non-null object\n",
238 |       "AM or PM            10000 non-null object\n",
239 |       "Browser Info        10000 non-null object\n",
240 |       "Company             10000 non-null object\n",
241 |       "Credit Card         10000 non-null int64\n",
242 |       "CC Exp Date         10000 non-null object\n",
243 |       "CC Security Code    10000 non-null int64\n",
244 |       "CC Provider         10000 non-null object\n",
245 |       "Email               10000 non-null object\n",
246 |       "Job                 10000 non-null object\n",
247 |       "IP Address          10000 non-null object\n",
248 |       "Language            10000 non-null object\n",
249 |       "Purchase Price      10000 non-null float64\n",
250 |       "dtypes: float64(1), int64(2), object(11)\n",
251 |       "memory usage: 1.1+ MB\n"
252 |      ]
253 |     }
254 |    ],
255 |    "source": []
256 |   },
257 |   {
258 |    "cell_type": "markdown",
259 |    "metadata": {},
260 |    "source": [
261 |     "** What is the average Purchase Price? **"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": 90,
267 |    "metadata": {},
268 |    "outputs": [
269 |     {
270 |      "data": {
271 |       "text/plain": [
272 |        "50.34730200000025"
273 |       ]
274 |      },
275 |      "execution_count": 90,
276 |      "metadata": {},
277 |      "output_type": "execute_result"
278 |     }
279 |    ],
280 |    "source": []
281 |   },
282 |   {
283 |    "cell_type": "markdown",
284 |    "metadata": {},
285 |    "source": [
286 |     "** What were the highest and lowest purchase prices? **"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": 92,
292 |    "metadata": {},
293 |    "outputs": [
294 |     {
295 |      "data": {
296 |       "text/plain": [
297 |        "99.989999999999995"
298 |       ]
299 |      },
300 |      "execution_count": 92,
301 |      "metadata": {},
302 |      "output_type": "execute_result"
303 |     }
304 |    ],
305 |    "source": []
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 93,
310 |    "metadata": {},
311 |    "outputs": [
312 |     {
313 |      "data": {
314 |       "text/plain": [
315 |        "0.0"
316 |       ]
317 |      },
318 |      "execution_count": 93,
319 |      "metadata": {},
320 |      "output_type": "execute_result"
321 |     }
322 |    ],
323 |    "source": []
324 |   },
325 |   {
326 |    "cell_type": "markdown",
327 |    "metadata": {},
328 |    "source": [
329 |     "** How many people have English 'en' as their Language of choice on the website? **"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": 94,
335 |    "metadata": {},
336 |    "outputs": [
337 |     {
338 |      "data": {
339 |       "text/plain": [
340 |        "Address             1098\n",
341 |        "Lot                 1098\n",
342 |        "AM or PM            1098\n",
343 |        "Browser Info        1098\n",
344 |        "Company             1098\n",
345 |        "Credit Card         1098\n",
346 |        "CC Exp Date         1098\n",
347 |        "CC Security Code    1098\n",
348 |        "CC Provider         1098\n",
349 |        "Email               1098\n",
350 |        "Job                 1098\n",
351 |        "IP Address          1098\n",
352 |        "Language            1098\n",
353 |        "Purchase Price      1098\n",
354 |        "dtype: int64"
355 |       ]
356 |      },
357 |      "execution_count": 94,
358 |      "metadata": {},
359 |      "output_type": "execute_result"
360 |     }
361 |    ],
362 |    "source": []
363 |   },
364 |   {
365 |    "cell_type": "markdown",
366 |    "metadata": {},
367 |    "source": [
368 |     "** How many people have the job title of \"Lawyer\" ? **\n"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": 95,
374 |    "metadata": {
375 |     "scrolled": true
376 |    },
377 |    "outputs": [
378 |     {
379 |      "name": "stdout",
380 |      "output_type": "stream",
381 |      "text": [
382 |       "<class 'pandas.core.frame.DataFrame'>\n",
383 |       "Int64Index: 30 entries, 470 to 9979\n",
384 |       "Data columns (total 14 columns):\n",
385 |       "Address             30 non-null object\n",
386 |       "Lot                 30 non-null object\n",
387 |       "AM or PM            30 non-null object\n",
388 |       "Browser Info        30 non-null object\n",
389 |       "Company             30 non-null object\n",
390 |       "Credit Card         30 non-null int64\n",
391 |       "CC Exp Date         30 non-null object\n",
392 |       "CC Security Code    30 non-null int64\n",
393 |       "CC Provider         30 non-null object\n",
394 |       "Email               30 non-null object\n",
395 |       "Job                 30 non-null object\n",
396 |       "IP Address          30 non-null object\n",
397 |       "Language            30 non-null object\n",
398 |       "Purchase Price      30 non-null float64\n",
399 |       "dtypes: float64(1), int64(2), object(11)\n",
400 |       "memory usage: 3.5+ KB\n"
401 |      ]
402 |     }
403 |    ],
404 |    "source": []
405 |   },
406 |   {
407 |    "cell_type": "markdown",
408 |    "metadata": {},
409 |    "source": [
410 |     "** How many people made the purchase during the AM and how many people made the purchase during PM ? **\n",
411 |     "\n",
412 |     "**(Hint: Check out [value_counts()](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.value_counts.html) ) **"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "code",
417 |    "execution_count": 96,
418 |    "metadata": {},
419 |    "outputs": [
420 |     {
421 |      "data": {
422 |       "text/plain": [
423 |        "PM    5068\n",
424 |        "AM    4932\n",
425 |        "Name: AM or PM, dtype: int64"
426 |       ]
427 |      },
428 |      "execution_count": 96,
429 |      "metadata": {},
430 |      "output_type": "execute_result"
431 |     }
432 |    ],
433 |    "source": []
434 |   },
435 |   {
436 |    "cell_type": "markdown",
437 |    "metadata": {},
438 |    "source": [
439 |     "** What are the 5 most common Job Titles? **"
440 |    ]
441 |   },
442 |   {
443 |    "cell_type": "code",
444 |    "execution_count": 97,
445 |    "metadata": {},
446 |    "outputs": [
447 |     {
448 |      "data": {
449 |       "text/plain": [
450 |        "Interior and spatial designer    31\n",
451 |        "Lawyer                           30\n",
452 |        "Social researcher                28\n",
453 |        "Purchasing manager               27\n",
454 |        "Designer, jewellery              27\n",
455 |        "Name: Job, dtype: int64"
456 |       ]
457 |      },
458 |      "execution_count": 97,
459 |      "metadata": {},
460 |      "output_type": "execute_result"
461 |     }
462 |    ],
463 |    "source": []
464 |   },
465 |   {
466 |    "cell_type": "markdown",
467 |    "metadata": {},
468 |    "source": [
469 |     "** Someone made a purchase that came from Lot: \"90 WT\" , what was the Purchase Price for this transaction? **"
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "code",
474 |    "execution_count": 99,
475 |    "metadata": {},
476 |    "outputs": [
477 |     {
478 |      "data": {
479 |       "text/plain": [
480 |        "513    75.1\n",
481 |        "Name: Purchase Price, dtype: float64"
482 |       ]
483 |      },
484 |      "execution_count": 99,
485 |      "metadata": {},
486 |      "output_type": "execute_result"
487 |     }
488 |    ],
489 |    "source": []
490 |   },
491 |   {
492 |    "cell_type": "markdown",
493 |    "metadata": {},
494 |    "source": [
495 |     "** What is the email of the person with the following Credit Card Number: 4926535242672853 **"
496 |    ]
497 |   },
498 |   {
499 |    "cell_type": "code",
500 |    "execution_count": 100,
501 |    "metadata": {},
502 |    "outputs": [
503 |     {
504 |      "data": {
505 |       "text/plain": [
506 |        "1234    bondellen@williams-garza.com\n",
507 |        "Name: Email, dtype: object"
508 |       ]
509 |      },
510 |      "execution_count": 100,
511 |      "metadata": {},
512 |      "output_type": "execute_result"
513 |     }
514 |    ],
515 |    "source": []
516 |   },
517 |   {
518 |    "cell_type": "markdown",
519 |    "metadata": {},
520 |    "source": [
521 |     "** How many people have American Express as their Credit Card Provider *and* made a purchase above $95 ?**"
522 |    ]
523 |   },
524 |   {
525 |    "cell_type": "code",
526 |    "execution_count": 101,
527 |    "metadata": {},
528 |    "outputs": [
529 |     {
530 |      "data": {
531 |       "text/plain": [
532 |        "Address             39\n",
533 |        "Lot                 39\n",
534 |        "AM or PM            39\n",
535 |        "Browser Info        39\n",
536 |        "Company             39\n",
537 |        "Credit Card         39\n",
538 |        "CC Exp Date         39\n",
539 |        "CC Security Code    39\n",
540 |        "CC Provider         39\n",
541 |        "Email               39\n",
542 |        "Job                 39\n",
543 |        "IP Address          39\n",
544 |        "Language            39\n",
545 |        "Purchase Price      39\n",
546 |        "dtype: int64"
547 |       ]
548 |      },
549 |      "execution_count": 101,
550 |      "metadata": {},
551 |      "output_type": "execute_result"
552 |     }
553 |    ],
554 |    "source": []
555 |   },
556 |   {
557 |    "cell_type": "markdown",
558 |    "metadata": {},
559 |    "source": [
560 |     "** Hard: How many people have a credit card that expires in 2025? **"
561 |    ]
562 |   },
563 |   {
564 |    "cell_type": "code",
565 |    "execution_count": 102,
566 |    "metadata": {},
567 |    "outputs": [
568 |     {
569 |      "data": {
570 |       "text/plain": [
571 |        "1033"
572 |       ]
573 |      },
574 |      "execution_count": 102,
575 |      "metadata": {},
576 |      "output_type": "execute_result"
577 |     }
578 |    ],
579 |    "source": []
580 |   },
581 |   {
582 |    "cell_type": "markdown",
583 |    "metadata": {},
584 |    "source": [
585 |     "** Hard: What are the top 5 most popular email providers/hosts (e.g. gmail.com, yahoo.com, etc...) **"
586 |    ]
587 |   },
588 |   {
589 |    "cell_type": "code",
590 |    "execution_count": 56,
591 |    "metadata": {},
592 |    "outputs": [
593 |     {
594 |      "data": {
595 |       "text/plain": [
596 |        "hotmail.com     1638\n",
597 |        "yahoo.com       1616\n",
598 |        "gmail.com       1605\n",
599 |        "smith.com         42\n",
600 |        "williams.com      37\n",
601 |        "Name: Email, dtype: int64"
602 |       ]
603 |      },
604 |      "execution_count": 56,
605 |      "metadata": {},
606 |      "output_type": "execute_result"
607 |     }
608 |    ],
609 |    "source": []
610 |   },
611 |   {
612 |    "cell_type": "markdown",
613 |    "metadata": {},
614 |    "source": [
615 |     "# Great Job!"
616 |    ]
617 |   }
618 |  ],
619 |  "metadata": {
620 |   "kernelspec": {
621 |    "display_name": "Python 3",
622 |    "language": "python",
623 |    "name": "python3"
624 |   },
625 |   "language_info": {
626 |    "codemirror_mode": {
627 |     "name": "ipython",
628 |     "version": 3
629 |    },
630 |    "file_extension": ".py",
631 |    "mimetype": "text/x-python",
632 |    "name": "python",
633 |    "nbconvert_exporter": "python",
634 |    "pygments_lexer": "ipython3",
635 |    "version": "3.7.6"
636 |   }
637 |  },
638 |  "nbformat": 4,
639 |  "nbformat_minor": 1
640 | }
641 | 


--------------------------------------------------------------------------------
/04-Pandas-Exercises/01-SF Salaries Exercise.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# SF Salaries Exercise \n",
  8 |     "\n",
  9 |     "Welcome to a quick exercise for you to practice your pandas skills! We will be using the [SF Salaries Dataset](https://www.kaggle.com/kaggle/sf-salaries) from Kaggle! Just follow along and complete the tasks outlined in bold below. The tasks will get harder and harder as you go along."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "** Import pandas as pd.**"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 6,
 22 |    "metadata": {
 23 |     "collapsed": true
 24 |    },
 25 |    "outputs": [],
 26 |    "source": []
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "** Read Salaries.csv as a dataframe called sal.**"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 7,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": []
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "** Check the head of the DataFrame. **"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 8,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "data": {
 56 |       "text/html": [
 57 |        "<div>\n",
 58 |        "<table border=\"1\" class=\"dataframe\">\n",
 59 |        "  <thead>\n",
 60 |        "    <tr style=\"text-align: right;\">\n",
 61 |        "      <th></th>\n",
 62 |        "      <th>Id</th>\n",
 63 |        "      <th>EmployeeName</th>\n",
 64 |        "      <th>JobTitle</th>\n",
 65 |        "      <th>BasePay</th>\n",
 66 |        "      <th>OvertimePay</th>\n",
 67 |        "      <th>OtherPay</th>\n",
 68 |        "      <th>Benefits</th>\n",
 69 |        "      <th>TotalPay</th>\n",
 70 |        "      <th>TotalPayBenefits</th>\n",
 71 |        "      <th>Year</th>\n",
 72 |        "      <th>Notes</th>\n",
 73 |        "      <th>Agency</th>\n",
 74 |        "      <th>Status</th>\n",
 75 |        "    </tr>\n",
 76 |        "  </thead>\n",
 77 |        "  <tbody>\n",
 78 |        "    <tr>\n",
 79 |        "      <th>0</th>\n",
 80 |        "      <td>1</td>\n",
 81 |        "      <td>NATHANIEL FORD</td>\n",
 82 |        "      <td>GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY</td>\n",
 83 |        "      <td>167411.18</td>\n",
 84 |        "      <td>0.00</td>\n",
 85 |        "      <td>400184.25</td>\n",
 86 |        "      <td>NaN</td>\n",
 87 |        "      <td>567595.43</td>\n",
 88 |        "      <td>567595.43</td>\n",
 89 |        "      <td>2011</td>\n",
 90 |        "      <td>NaN</td>\n",
 91 |        "      <td>San Francisco</td>\n",
 92 |        "      <td>NaN</td>\n",
 93 |        "    </tr>\n",
 94 |        "    <tr>\n",
 95 |        "      <th>1</th>\n",
 96 |        "      <td>2</td>\n",
 97 |        "      <td>GARY JIMENEZ</td>\n",
 98 |        "      <td>CAPTAIN III (POLICE DEPARTMENT)</td>\n",
 99 |        "      <td>155966.02</td>\n",
100 |        "      <td>245131.88</td>\n",
101 |        "      <td>137811.38</td>\n",
102 |        "      <td>NaN</td>\n",
103 |        "      <td>538909.28</td>\n",
104 |        "      <td>538909.28</td>\n",
105 |        "      <td>2011</td>\n",
106 |        "      <td>NaN</td>\n",
107 |        "      <td>San Francisco</td>\n",
108 |        "      <td>NaN</td>\n",
109 |        "    </tr>\n",
110 |        "    <tr>\n",
111 |        "      <th>2</th>\n",
112 |        "      <td>3</td>\n",
113 |        "      <td>ALBERT PARDINI</td>\n",
114 |        "      <td>CAPTAIN III (POLICE DEPARTMENT)</td>\n",
115 |        "      <td>212739.13</td>\n",
116 |        "      <td>106088.18</td>\n",
117 |        "      <td>16452.60</td>\n",
118 |        "      <td>NaN</td>\n",
119 |        "      <td>335279.91</td>\n",
120 |        "      <td>335279.91</td>\n",
121 |        "      <td>2011</td>\n",
122 |        "      <td>NaN</td>\n",
123 |        "      <td>San Francisco</td>\n",
124 |        "      <td>NaN</td>\n",
125 |        "    </tr>\n",
126 |        "    <tr>\n",
127 |        "      <th>3</th>\n",
128 |        "      <td>4</td>\n",
129 |        "      <td>CHRISTOPHER CHONG</td>\n",
130 |        "      <td>WIRE ROPE CABLE MAINTENANCE MECHANIC</td>\n",
131 |        "      <td>77916.00</td>\n",
132 |        "      <td>56120.71</td>\n",
133 |        "      <td>198306.90</td>\n",
134 |        "      <td>NaN</td>\n",
135 |        "      <td>332343.61</td>\n",
136 |        "      <td>332343.61</td>\n",
137 |        "      <td>2011</td>\n",
138 |        "      <td>NaN</td>\n",
139 |        "      <td>San Francisco</td>\n",
140 |        "      <td>NaN</td>\n",
141 |        "    </tr>\n",
142 |        "    <tr>\n",
143 |        "      <th>4</th>\n",
144 |        "      <td>5</td>\n",
145 |        "      <td>PATRICK GARDNER</td>\n",
146 |        "      <td>DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)</td>\n",
147 |        "      <td>134401.60</td>\n",
148 |        "      <td>9737.00</td>\n",
149 |        "      <td>182234.59</td>\n",
150 |        "      <td>NaN</td>\n",
151 |        "      <td>326373.19</td>\n",
152 |        "      <td>326373.19</td>\n",
153 |        "      <td>2011</td>\n",
154 |        "      <td>NaN</td>\n",
155 |        "      <td>San Francisco</td>\n",
156 |        "      <td>NaN</td>\n",
157 |        "    </tr>\n",
158 |        "  </tbody>\n",
159 |        "</table>\n",
160 |        "</div>"
161 |       ],
162 |       "text/plain": [
163 |        "   Id       EmployeeName                                        JobTitle  \\\n",
164 |        "0   1     NATHANIEL FORD  GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY   \n",
165 |        "1   2       GARY JIMENEZ                 CAPTAIN III (POLICE DEPARTMENT)   \n",
166 |        "2   3     ALBERT PARDINI                 CAPTAIN III (POLICE DEPARTMENT)   \n",
167 |        "3   4  CHRISTOPHER CHONG            WIRE ROPE CABLE MAINTENANCE MECHANIC   \n",
168 |        "4   5    PATRICK GARDNER    DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)   \n",
169 |        "\n",
170 |        "     BasePay  OvertimePay   OtherPay  Benefits   TotalPay  TotalPayBenefits  \\\n",
171 |        "0  167411.18         0.00  400184.25       NaN  567595.43         567595.43   \n",
172 |        "1  155966.02    245131.88  137811.38       NaN  538909.28         538909.28   \n",
173 |        "2  212739.13    106088.18   16452.60       NaN  335279.91         335279.91   \n",
174 |        "3   77916.00     56120.71  198306.90       NaN  332343.61         332343.61   \n",
175 |        "4  134401.60      9737.00  182234.59       NaN  326373.19         326373.19   \n",
176 |        "\n",
177 |        "   Year  Notes         Agency  Status  \n",
178 |        "0  2011    NaN  San Francisco     NaN  \n",
179 |        "1  2011    NaN  San Francisco     NaN  \n",
180 |        "2  2011    NaN  San Francisco     NaN  \n",
181 |        "3  2011    NaN  San Francisco     NaN  \n",
182 |        "4  2011    NaN  San Francisco     NaN  "
183 |       ]
184 |      },
185 |      "execution_count": 8,
186 |      "metadata": {},
187 |      "output_type": "execute_result"
188 |     }
189 |    ],
190 |    "source": []
191 |   },
192 |   {
193 |    "cell_type": "markdown",
194 |    "metadata": {},
195 |    "source": [
196 |     "** Use the .info() method to find out how many entries there are.**"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": 9,
202 |    "metadata": {},
203 |    "outputs": [
204 |     {
205 |      "name": "stdout",
206 |      "output_type": "stream",
207 |      "text": [
208 |       "<class 'pandas.core.frame.DataFrame'>\n",
209 |       "RangeIndex: 148654 entries, 0 to 148653\n",
210 |       "Data columns (total 13 columns):\n",
211 |       "Id                  148654 non-null int64\n",
212 |       "EmployeeName        148654 non-null object\n",
213 |       "JobTitle            148654 non-null object\n",
214 |       "BasePay             148045 non-null float64\n",
215 |       "OvertimePay         148650 non-null float64\n",
216 |       "OtherPay            148650 non-null float64\n",
217 |       "Benefits            112491 non-null float64\n",
218 |       "TotalPay            148654 non-null float64\n",
219 |       "TotalPayBenefits    148654 non-null float64\n",
220 |       "Year                148654 non-null int64\n",
221 |       "Notes               0 non-null float64\n",
222 |       "Agency              148654 non-null object\n",
223 |       "Status              0 non-null float64\n",
224 |       "dtypes: float64(8), int64(2), object(3)\n",
225 |       "memory usage: 14.7+ MB\n"
226 |      ]
227 |     }
228 |    ],
229 |    "source": []
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "**What is the average BasePay ?**"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 10,
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "text/plain": [
246 |        "66325.44884050643"
247 |       ]
248 |      },
249 |      "execution_count": 10,
250 |      "metadata": {},
251 |      "output_type": "execute_result"
252 |     }
253 |    ],
254 |    "source": []
255 |   },
256 |   {
257 |    "cell_type": "markdown",
258 |    "metadata": {},
259 |    "source": [
260 |     "** What is the highest amount of OvertimePay in the dataset ? **"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": 11,
266 |    "metadata": {},
267 |    "outputs": [
268 |     {
269 |      "data": {
270 |       "text/plain": [
271 |        "245131.88"
272 |       ]
273 |      },
274 |      "execution_count": 11,
275 |      "metadata": {},
276 |      "output_type": "execute_result"
277 |     }
278 |    ],
279 |    "source": []
280 |   },
281 |   {
282 |    "cell_type": "markdown",
283 |    "metadata": {},
284 |    "source": [
285 |     "** What is the job title of  JOSEPH DRISCOLL ? Note: Use all caps, otherwise you may get an answer that doesn't match up (there is also a lowercase Joseph Driscoll). **"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": 12,
291 |    "metadata": {},
292 |    "outputs": [
293 |     {
294 |      "data": {
295 |       "text/plain": [
296 |        "24    CAPTAIN, FIRE SUPPRESSION\n",
297 |        "Name: JobTitle, dtype: object"
298 |       ]
299 |      },
300 |      "execution_count": 12,
301 |      "metadata": {},
302 |      "output_type": "execute_result"
303 |     }
304 |    ],
305 |    "source": []
306 |   },
307 |   {
308 |    "cell_type": "markdown",
309 |    "metadata": {},
310 |    "source": [
311 |     "** How much does JOSEPH DRISCOLL make (including benefits)? **"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": 13,
317 |    "metadata": {},
318 |    "outputs": [
319 |     {
320 |      "data": {
321 |       "text/plain": [
322 |        "24    270324.91\n",
323 |        "Name: TotalPayBenefits, dtype: float64"
324 |       ]
325 |      },
326 |      "execution_count": 13,
327 |      "metadata": {},
328 |      "output_type": "execute_result"
329 |     }
330 |    ],
331 |    "source": []
332 |   },
333 |   {
334 |    "cell_type": "markdown",
335 |    "metadata": {},
336 |    "source": [
337 |     "** What is the name of highest paid person (including benefits)?**"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": 14,
343 |    "metadata": {},
344 |    "outputs": [
345 |     {
346 |      "data": {
347 |       "text/html": [
348 |        "<div>\n",
349 |        "<table border=\"1\" class=\"dataframe\">\n",
350 |        "  <thead>\n",
351 |        "    <tr style=\"text-align: right;\">\n",
352 |        "      <th></th>\n",
353 |        "      <th>Id</th>\n",
354 |        "      <th>EmployeeName</th>\n",
355 |        "      <th>JobTitle</th>\n",
356 |        "      <th>BasePay</th>\n",
357 |        "      <th>OvertimePay</th>\n",
358 |        "      <th>OtherPay</th>\n",
359 |        "      <th>Benefits</th>\n",
360 |        "      <th>TotalPay</th>\n",
361 |        "      <th>TotalPayBenefits</th>\n",
362 |        "      <th>Year</th>\n",
363 |        "      <th>Notes</th>\n",
364 |        "      <th>Agency</th>\n",
365 |        "      <th>Status</th>\n",
366 |        "    </tr>\n",
367 |        "  </thead>\n",
368 |        "  <tbody>\n",
369 |        "    <tr>\n",
370 |        "      <th>0</th>\n",
371 |        "      <td>1</td>\n",
372 |        "      <td>NATHANIEL FORD</td>\n",
373 |        "      <td>GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY</td>\n",
374 |        "      <td>167411.18</td>\n",
375 |        "      <td>0.0</td>\n",
376 |        "      <td>400184.25</td>\n",
377 |        "      <td>NaN</td>\n",
378 |        "      <td>567595.43</td>\n",
379 |        "      <td>567595.43</td>\n",
380 |        "      <td>2011</td>\n",
381 |        "      <td>NaN</td>\n",
382 |        "      <td>San Francisco</td>\n",
383 |        "      <td>NaN</td>\n",
384 |        "    </tr>\n",
385 |        "  </tbody>\n",
386 |        "</table>\n",
387 |        "</div>"
388 |       ],
389 |       "text/plain": [
390 |        "   Id    EmployeeName                                        JobTitle  \\\n",
391 |        "0   1  NATHANIEL FORD  GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY   \n",
392 |        "\n",
393 |        "     BasePay  OvertimePay   OtherPay  Benefits   TotalPay  TotalPayBenefits  \\\n",
394 |        "0  167411.18          0.0  400184.25       NaN  567595.43         567595.43   \n",
395 |        "\n",
396 |        "   Year  Notes         Agency  Status  \n",
397 |        "0  2011    NaN  San Francisco     NaN  "
398 |       ]
399 |      },
400 |      "execution_count": 14,
401 |      "metadata": {},
402 |      "output_type": "execute_result"
403 |     }
404 |    ],
405 |    "source": []
406 |   },
407 |   {
408 |    "cell_type": "markdown",
409 |    "metadata": {},
410 |    "source": [
411 |     "** What is the name of lowest paid person (including benefits)? Do you notice something strange about how much he or she is paid?**"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "code",
416 |    "execution_count": 15,
417 |    "metadata": {},
418 |    "outputs": [
419 |     {
420 |      "data": {
421 |       "text/html": [
422 |        "<div>\n",
423 |        "<table border=\"1\" class=\"dataframe\">\n",
424 |        "  <thead>\n",
425 |        "    <tr style=\"text-align: right;\">\n",
426 |        "      <th></th>\n",
427 |        "      <th>Id</th>\n",
428 |        "      <th>EmployeeName</th>\n",
429 |        "      <th>JobTitle</th>\n",
430 |        "      <th>BasePay</th>\n",
431 |        "      <th>OvertimePay</th>\n",
432 |        "      <th>OtherPay</th>\n",
433 |        "      <th>Benefits</th>\n",
434 |        "      <th>TotalPay</th>\n",
435 |        "      <th>TotalPayBenefits</th>\n",
436 |        "      <th>Year</th>\n",
437 |        "      <th>Notes</th>\n",
438 |        "      <th>Agency</th>\n",
439 |        "      <th>Status</th>\n",
440 |        "    </tr>\n",
441 |        "  </thead>\n",
442 |        "  <tbody>\n",
443 |        "    <tr>\n",
444 |        "      <th>148653</th>\n",
445 |        "      <td>148654</td>\n",
446 |        "      <td>Joe Lopez</td>\n",
447 |        "      <td>Counselor, Log Cabin Ranch</td>\n",
448 |        "      <td>0.0</td>\n",
449 |        "      <td>0.0</td>\n",
450 |        "      <td>-618.13</td>\n",
451 |        "      <td>0.0</td>\n",
452 |        "      <td>-618.13</td>\n",
453 |        "      <td>-618.13</td>\n",
454 |        "      <td>2014</td>\n",
455 |        "      <td>NaN</td>\n",
456 |        "      <td>San Francisco</td>\n",
457 |        "      <td>NaN</td>\n",
458 |        "    </tr>\n",
459 |        "  </tbody>\n",
460 |        "</table>\n",
461 |        "</div>"
462 |       ],
463 |       "text/plain": [
464 |        "            Id EmployeeName                    JobTitle  BasePay  OvertimePay  \\\n",
465 |        "148653  148654    Joe Lopez  Counselor, Log Cabin Ranch      0.0          0.0   \n",
466 |        "\n",
467 |        "        OtherPay  Benefits  TotalPay  TotalPayBenefits  Year  Notes  \\\n",
468 |        "148653   -618.13       0.0   -618.13           -618.13  2014    NaN   \n",
469 |        "\n",
470 |        "               Agency  Status  \n",
471 |        "148653  San Francisco     NaN  "
472 |       ]
473 |      },
474 |      "execution_count": 15,
475 |      "metadata": {},
476 |      "output_type": "execute_result"
477 |     }
478 |    ],
479 |    "source": []
480 |   },
481 |   {
482 |    "cell_type": "markdown",
483 |    "metadata": {},
484 |    "source": [
485 |     "** What was the average (mean) BasePay of all employees per year? (2011-2014) ? **"
486 |    ]
487 |   },
488 |   {
489 |    "cell_type": "code",
490 |    "execution_count": 16,
491 |    "metadata": {},
492 |    "outputs": [
493 |     {
494 |      "data": {
495 |       "text/plain": [
496 |        "Year\n",
497 |        "2011    63595.956517\n",
498 |        "2012    65436.406857\n",
499 |        "2013    69630.030216\n",
500 |        "2014    66564.421924\n",
501 |        "Name: BasePay, dtype: float64"
502 |       ]
503 |      },
504 |      "execution_count": 16,
505 |      "metadata": {},
506 |      "output_type": "execute_result"
507 |     }
508 |    ],
509 |    "source": []
510 |   },
511 |   {
512 |    "cell_type": "markdown",
513 |    "metadata": {},
514 |    "source": [
515 |     "** How many unique job titles are there? **"
516 |    ]
517 |   },
518 |   {
519 |    "cell_type": "code",
520 |    "execution_count": 17,
521 |    "metadata": {},
522 |    "outputs": [
523 |     {
524 |      "data": {
525 |       "text/plain": [
526 |        "2159"
527 |       ]
528 |      },
529 |      "execution_count": 17,
530 |      "metadata": {},
531 |      "output_type": "execute_result"
532 |     }
533 |    ],
534 |    "source": []
535 |   },
536 |   {
537 |    "cell_type": "markdown",
538 |    "metadata": {},
539 |    "source": [
540 |     "** What are the top 5 most common jobs? **"
541 |    ]
542 |   },
543 |   {
544 |    "cell_type": "code",
545 |    "execution_count": 18,
546 |    "metadata": {},
547 |    "outputs": [
548 |     {
549 |      "data": {
550 |       "text/plain": [
551 |        "Transit Operator                7036\n",
552 |        "Special Nurse                   4389\n",
553 |        "Registered Nurse                3736\n",
554 |        "Public Svc Aide-Public Works    2518\n",
555 |        "Police Officer 3                2421\n",
556 |        "Name: JobTitle, dtype: int64"
557 |       ]
558 |      },
559 |      "execution_count": 18,
560 |      "metadata": {},
561 |      "output_type": "execute_result"
562 |     }
563 |    ],
564 |    "source": []
565 |   },
566 |   {
567 |    "cell_type": "markdown",
568 |    "metadata": {},
569 |    "source": [
570 |     "** How many Job Titles were represented by only one person in 2013? (e.g. Job Titles with only one occurence in 2013?) **"
571 |    ]
572 |   },
573 |   {
574 |    "cell_type": "code",
575 |    "execution_count": 19,
576 |    "metadata": {},
577 |    "outputs": [
578 |     {
579 |      "data": {
580 |       "text/plain": [
581 |        "202"
582 |       ]
583 |      },
584 |      "execution_count": 19,
585 |      "metadata": {},
586 |      "output_type": "execute_result"
587 |     }
588 |    ],
589 |    "source": []
590 |   },
591 |   {
592 |    "cell_type": "markdown",
593 |    "metadata": {},
594 |    "source": [
595 |     "** Bonus: Is there a correlation between length of the Job Title string and Salary? **"
596 |    ]
597 |   },
598 |   {
599 |    "cell_type": "code",
600 |    "execution_count": 22,
601 |    "metadata": {},
602 |    "outputs": [],
603 |    "source": []
604 |   },
605 |   {
606 |    "cell_type": "code",
607 |    "execution_count": 23,
608 |    "metadata": {},
609 |    "outputs": [
610 |     {
611 |      "data": {
612 |       "text/html": [
613 |        "<div>\n",
614 |        "<table border=\"1\" class=\"dataframe\">\n",
615 |        "  <thead>\n",
616 |        "    <tr style=\"text-align: right;\">\n",
617 |        "      <th></th>\n",
618 |        "      <th>title_len</th>\n",
619 |        "      <th>TotalPayBenefits</th>\n",
620 |        "    </tr>\n",
621 |        "  </thead>\n",
622 |        "  <tbody>\n",
623 |        "    <tr>\n",
624 |        "      <th>title_len</th>\n",
625 |        "      <td>1.000000</td>\n",
626 |        "      <td>-0.036878</td>\n",
627 |        "    </tr>\n",
628 |        "    <tr>\n",
629 |        "      <th>TotalPayBenefits</th>\n",
630 |        "      <td>-0.036878</td>\n",
631 |        "      <td>1.000000</td>\n",
632 |        "    </tr>\n",
633 |        "  </tbody>\n",
634 |        "</table>\n",
635 |        "</div>"
636 |       ],
637 |       "text/plain": [
638 |        "                  title_len  TotalPayBenefits\n",
639 |        "title_len          1.000000         -0.036878\n",
640 |        "TotalPayBenefits  -0.036878          1.000000"
641 |       ]
642 |      },
643 |      "execution_count": 23,
644 |      "metadata": {},
645 |      "output_type": "execute_result"
646 |     }
647 |    ],
648 |    "source": []
649 |   },
650 |   {
651 |    "cell_type": "markdown",
652 |    "metadata": {},
653 |    "source": [
654 |     "# Great Job!"
655 |    ]
656 |   }
657 |  ],
658 |  "metadata": {
659 |   "kernelspec": {
660 |    "display_name": "Python 3",
661 |    "language": "python",
662 |    "name": "python3"
663 |   },
664 |   "language_info": {
665 |    "codemirror_mode": {
666 |     "name": "ipython",
667 |     "version": 3
668 |    },
669 |    "file_extension": ".py",
670 |    "mimetype": "text/x-python",
671 |    "name": "python",
672 |    "nbconvert_exporter": "python",
673 |    "pygments_lexer": "ipython3",
674 |    "version": "3.7.6"
675 |   }
676 |  },
677 |  "nbformat": 4,
678 |  "nbformat_minor": 1
679 | }
680 | 


--------------------------------------------------------------------------------
/04-Pandas-Exercises/04-Ecommerce Purchases Exercise - Solutions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "___\n",
  8 |     "\n",
  9 |     "<a href='http://www.pieriandata.com'> <img src='../../Pierian_Data_Logo.png' /></a>\n",
 10 |     "___\n",
 11 |     "# Ecommerce Purchases Exercise - Solutions\n",
 12 |     "\n",
 13 |     "In this Exercise you will be given some Fake Data about some purchases done through Amazon! Just go ahead and follow the directions and try your best to answer the questions and complete the tasks. Feel free to reference the solutions. Most of the tasks can be solved in different ways. For the most part, the questions get progressively harder.\n",
 14 |     "\n",
 15 |     "Please excuse anything that doesn't make \"Real-World\" sense in the dataframe, all the data is fake and made-up.\n",
 16 |     "\n",
 17 |     "Also note that all of these questions can be answered with one line of code.\n",
 18 |     "____\n",
 19 |     "** Import pandas and read in the Ecommerce Purchases csv file and set it to a DataFrame called ecom. **"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 84,
 25 |    "metadata": {
 26 |     "collapsed": true
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import pandas as pd"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 86,
 36 |    "metadata": {
 37 |     "collapsed": true
 38 |    },
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "ecom = pd.read_csv('Ecommerce Purchases')"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "**Check the head of the DataFrame.**"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 87,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "data": {
 58 |       "text/html": [
 59 |        "<div>\n",
 60 |        "<table border=\"1\" class=\"dataframe\">\n",
 61 |        "  <thead>\n",
 62 |        "    <tr style=\"text-align: right;\">\n",
 63 |        "      <th></th>\n",
 64 |        "      <th>Address</th>\n",
 65 |        "      <th>Lot</th>\n",
 66 |        "      <th>AM or PM</th>\n",
 67 |        "      <th>Browser Info</th>\n",
 68 |        "      <th>Company</th>\n",
 69 |        "      <th>Credit Card</th>\n",
 70 |        "      <th>CC Exp Date</th>\n",
 71 |        "      <th>CC Security Code</th>\n",
 72 |        "      <th>CC Provider</th>\n",
 73 |        "      <th>Email</th>\n",
 74 |        "      <th>Job</th>\n",
 75 |        "      <th>IP Address</th>\n",
 76 |        "      <th>Language</th>\n",
 77 |        "      <th>Purchase Price</th>\n",
 78 |        "    </tr>\n",
 79 |        "  </thead>\n",
 80 |        "  <tbody>\n",
 81 |        "    <tr>\n",
 82 |        "      <th>0</th>\n",
 83 |        "      <td>16629 Pace Camp Apt. 448\\nAlexisborough, NE 77...</td>\n",
 84 |        "      <td>46 in</td>\n",
 85 |        "      <td>PM</td>\n",
 86 |        "      <td>Opera/9.56.(X11; Linux x86_64; sl-SI) Presto/2...</td>\n",
 87 |        "      <td>Martinez-Herman</td>\n",
 88 |        "      <td>6011929061123406</td>\n",
 89 |        "      <td>02/20</td>\n",
 90 |        "      <td>900</td>\n",
 91 |        "      <td>JCB 16 digit</td>\n",
 92 |        "      <td>pdunlap@yahoo.com</td>\n",
 93 |        "      <td>Scientist, product/process development</td>\n",
 94 |        "      <td>149.146.147.205</td>\n",
 95 |        "      <td>el</td>\n",
 96 |        "      <td>98.14</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>1</th>\n",
100 |        "      <td>9374 Jasmine Spurs Suite 508\\nSouth John, TN 8...</td>\n",
101 |        "      <td>28 rn</td>\n",
102 |        "      <td>PM</td>\n",
103 |        "      <td>Opera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr...</td>\n",
104 |        "      <td>Fletcher, Richards and Whitaker</td>\n",
105 |        "      <td>3337758169645356</td>\n",
106 |        "      <td>11/18</td>\n",
107 |        "      <td>561</td>\n",
108 |        "      <td>Mastercard</td>\n",
109 |        "      <td>anthony41@reed.com</td>\n",
110 |        "      <td>Drilling engineer</td>\n",
111 |        "      <td>15.160.41.51</td>\n",
112 |        "      <td>fr</td>\n",
113 |        "      <td>70.73</td>\n",
114 |        "    </tr>\n",
115 |        "    <tr>\n",
116 |        "      <th>2</th>\n",
117 |        "      <td>Unit 0065 Box 5052\\nDPO AP 27450</td>\n",
118 |        "      <td>94 vE</td>\n",
119 |        "      <td>PM</td>\n",
120 |        "      <td>Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ...</td>\n",
121 |        "      <td>Simpson, Williams and Pham</td>\n",
122 |        "      <td>675957666125</td>\n",
123 |        "      <td>08/19</td>\n",
124 |        "      <td>699</td>\n",
125 |        "      <td>JCB 16 digit</td>\n",
126 |        "      <td>amymiller@morales-harrison.com</td>\n",
127 |        "      <td>Customer service manager</td>\n",
128 |        "      <td>132.207.160.22</td>\n",
129 |        "      <td>de</td>\n",
130 |        "      <td>0.95</td>\n",
131 |        "    </tr>\n",
132 |        "    <tr>\n",
133 |        "      <th>3</th>\n",
134 |        "      <td>7780 Julia Fords\\nNew Stacy, WA 45798</td>\n",
135 |        "      <td>36 vm</td>\n",
136 |        "      <td>PM</td>\n",
137 |        "      <td>Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ...</td>\n",
138 |        "      <td>Williams, Marshall and Buchanan</td>\n",
139 |        "      <td>6011578504430710</td>\n",
140 |        "      <td>02/24</td>\n",
141 |        "      <td>384</td>\n",
142 |        "      <td>Discover</td>\n",
143 |        "      <td>brent16@olson-robinson.info</td>\n",
144 |        "      <td>Drilling engineer</td>\n",
145 |        "      <td>30.250.74.19</td>\n",
146 |        "      <td>es</td>\n",
147 |        "      <td>78.04</td>\n",
148 |        "    </tr>\n",
149 |        "    <tr>\n",
150 |        "      <th>4</th>\n",
151 |        "      <td>23012 Munoz Drive Suite 337\\nNew Cynthia, TX 5...</td>\n",
152 |        "      <td>20 IE</td>\n",
153 |        "      <td>AM</td>\n",
154 |        "      <td>Opera/9.58.(X11; Linux x86_64; it-IT) Presto/2...</td>\n",
155 |        "      <td>Brown, Watson and Andrews</td>\n",
156 |        "      <td>6011456623207998</td>\n",
157 |        "      <td>10/25</td>\n",
158 |        "      <td>678</td>\n",
159 |        "      <td>Diners Club / Carte Blanche</td>\n",
160 |        "      <td>christopherwright@gmail.com</td>\n",
161 |        "      <td>Fine artist</td>\n",
162 |        "      <td>24.140.33.94</td>\n",
163 |        "      <td>es</td>\n",
164 |        "      <td>77.82</td>\n",
165 |        "    </tr>\n",
166 |        "  </tbody>\n",
167 |        "</table>\n",
168 |        "</div>"
169 |       ],
170 |       "text/plain": [
171 |        "                                             Address    Lot AM or PM  \\\n",
172 |        "0  16629 Pace Camp Apt. 448\\nAlexisborough, NE 77...  46 in       PM   \n",
173 |        "1  9374 Jasmine Spurs Suite 508\\nSouth John, TN 8...  28 rn       PM   \n",
174 |        "2                   Unit 0065 Box 5052\\nDPO AP 27450  94 vE       PM   \n",
175 |        "3              7780 Julia Fords\\nNew Stacy, WA 45798  36 vm       PM   \n",
176 |        "4  23012 Munoz Drive Suite 337\\nNew Cynthia, TX 5...  20 IE       AM   \n",
177 |        "\n",
178 |        "                                        Browser Info  \\\n",
179 |        "0  Opera/9.56.(X11; Linux x86_64; sl-SI) Presto/2...   \n",
180 |        "1  Opera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr...   \n",
181 |        "2  Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ...   \n",
182 |        "3  Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ...   \n",
183 |        "4  Opera/9.58.(X11; Linux x86_64; it-IT) Presto/2...   \n",
184 |        "\n",
185 |        "                           Company       Credit Card CC Exp Date  \\\n",
186 |        "0                  Martinez-Herman  6011929061123406       02/20   \n",
187 |        "1  Fletcher, Richards and Whitaker  3337758169645356       11/18   \n",
188 |        "2       Simpson, Williams and Pham      675957666125       08/19   \n",
189 |        "3  Williams, Marshall and Buchanan  6011578504430710       02/24   \n",
190 |        "4        Brown, Watson and Andrews  6011456623207998       10/25   \n",
191 |        "\n",
192 |        "   CC Security Code                  CC Provider  \\\n",
193 |        "0               900                 JCB 16 digit   \n",
194 |        "1               561                   Mastercard   \n",
195 |        "2               699                 JCB 16 digit   \n",
196 |        "3               384                     Discover   \n",
197 |        "4               678  Diners Club / Carte Blanche   \n",
198 |        "\n",
199 |        "                            Email                                     Job  \\\n",
200 |        "0               pdunlap@yahoo.com  Scientist, product/process development   \n",
201 |        "1              anthony41@reed.com                       Drilling engineer   \n",
202 |        "2  amymiller@morales-harrison.com                Customer service manager   \n",
203 |        "3     brent16@olson-robinson.info                       Drilling engineer   \n",
204 |        "4     christopherwright@gmail.com                             Fine artist   \n",
205 |        "\n",
206 |        "        IP Address Language  Purchase Price  \n",
207 |        "0  149.146.147.205       el           98.14  \n",
208 |        "1     15.160.41.51       fr           70.73  \n",
209 |        "2   132.207.160.22       de            0.95  \n",
210 |        "3     30.250.74.19       es           78.04  \n",
211 |        "4     24.140.33.94       es           77.82  "
212 |       ]
213 |      },
214 |      "execution_count": 87,
215 |      "metadata": {},
216 |      "output_type": "execute_result"
217 |     }
218 |    ],
219 |    "source": [
220 |     "ecom.head()"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "** How many rows and columns are there? **"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": 88,
233 |    "metadata": {},
234 |    "outputs": [
235 |     {
236 |      "name": "stdout",
237 |      "output_type": "stream",
238 |      "text": [
239 |       "<class 'pandas.core.frame.DataFrame'>\n",
240 |       "RangeIndex: 10000 entries, 0 to 9999\n",
241 |       "Data columns (total 14 columns):\n",
242 |       "Address             10000 non-null object\n",
243 |       "Lot                 10000 non-null object\n",
244 |       "AM or PM            10000 non-null object\n",
245 |       "Browser Info        10000 non-null object\n",
246 |       "Company             10000 non-null object\n",
247 |       "Credit Card         10000 non-null int64\n",
248 |       "CC Exp Date         10000 non-null object\n",
249 |       "CC Security Code    10000 non-null int64\n",
250 |       "CC Provider         10000 non-null object\n",
251 |       "Email               10000 non-null object\n",
252 |       "Job                 10000 non-null object\n",
253 |       "IP Address          10000 non-null object\n",
254 |       "Language            10000 non-null object\n",
255 |       "Purchase Price      10000 non-null float64\n",
256 |       "dtypes: float64(1), int64(2), object(11)\n",
257 |       "memory usage: 1.1+ MB\n"
258 |      ]
259 |     }
260 |    ],
261 |    "source": [
262 |     "ecom.info()"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {},
268 |    "source": [
269 |     "** What is the average Purchase Price? **"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 90,
275 |    "metadata": {},
276 |    "outputs": [
277 |     {
278 |      "data": {
279 |       "text/plain": [
280 |        "50.34730200000025"
281 |       ]
282 |      },
283 |      "execution_count": 90,
284 |      "metadata": {},
285 |      "output_type": "execute_result"
286 |     }
287 |    ],
288 |    "source": [
289 |     "ecom['Purchase Price'].mean()"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "markdown",
294 |    "metadata": {},
295 |    "source": [
296 |     "** What were the highest and lowest purchase prices? **"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 92,
302 |    "metadata": {},
303 |    "outputs": [
304 |     {
305 |      "data": {
306 |       "text/plain": [
307 |        "99.989999999999995"
308 |       ]
309 |      },
310 |      "execution_count": 92,
311 |      "metadata": {},
312 |      "output_type": "execute_result"
313 |     }
314 |    ],
315 |    "source": [
316 |     "ecom['Purchase Price'].max()"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": 93,
322 |    "metadata": {},
323 |    "outputs": [
324 |     {
325 |      "data": {
326 |       "text/plain": [
327 |        "0.0"
328 |       ]
329 |      },
330 |      "execution_count": 93,
331 |      "metadata": {},
332 |      "output_type": "execute_result"
333 |     }
334 |    ],
335 |    "source": [
336 |     "ecom['Purchase Price'].min()"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "markdown",
341 |    "metadata": {},
342 |    "source": [
343 |     "** How many people have English 'en' as their Language of choice on the website? **"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": 94,
349 |    "metadata": {},
350 |    "outputs": [
351 |     {
352 |      "data": {
353 |       "text/plain": [
354 |        "Address             1098\n",
355 |        "Lot                 1098\n",
356 |        "AM or PM            1098\n",
357 |        "Browser Info        1098\n",
358 |        "Company             1098\n",
359 |        "Credit Card         1098\n",
360 |        "CC Exp Date         1098\n",
361 |        "CC Security Code    1098\n",
362 |        "CC Provider         1098\n",
363 |        "Email               1098\n",
364 |        "Job                 1098\n",
365 |        "IP Address          1098\n",
366 |        "Language            1098\n",
367 |        "Purchase Price      1098\n",
368 |        "dtype: int64"
369 |       ]
370 |      },
371 |      "execution_count": 94,
372 |      "metadata": {},
373 |      "output_type": "execute_result"
374 |     }
375 |    ],
376 |    "source": [
377 |     "ecom[ecom['Language']=='en'].count()"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "markdown",
382 |    "metadata": {},
383 |    "source": [
384 |     "** How many people have the job title of \"Lawyer\" ? **\n"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "code",
389 |    "execution_count": 95,
390 |    "metadata": {},
391 |    "outputs": [
392 |     {
393 |      "name": "stdout",
394 |      "output_type": "stream",
395 |      "text": [
396 |       "<class 'pandas.core.frame.DataFrame'>\n",
397 |       "Int64Index: 30 entries, 470 to 9979\n",
398 |       "Data columns (total 14 columns):\n",
399 |       "Address             30 non-null object\n",
400 |       "Lot                 30 non-null object\n",
401 |       "AM or PM            30 non-null object\n",
402 |       "Browser Info        30 non-null object\n",
403 |       "Company             30 non-null object\n",
404 |       "Credit Card         30 non-null int64\n",
405 |       "CC Exp Date         30 non-null object\n",
406 |       "CC Security Code    30 non-null int64\n",
407 |       "CC Provider         30 non-null object\n",
408 |       "Email               30 non-null object\n",
409 |       "Job                 30 non-null object\n",
410 |       "IP Address          30 non-null object\n",
411 |       "Language            30 non-null object\n",
412 |       "Purchase Price      30 non-null float64\n",
413 |       "dtypes: float64(1), int64(2), object(11)\n",
414 |       "memory usage: 3.5+ KB\n"
415 |      ]
416 |     }
417 |    ],
418 |    "source": [
419 |     "ecom[ecom['Job'] == 'Lawyer'].info()"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "markdown",
424 |    "metadata": {},
425 |    "source": [
426 |     "** How many people made the purchase during the AM and how many people made the purchase during PM ? **\n",
427 |     "\n",
428 |     "**(Hint: Check out [value_counts()](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.value_counts.html) ) **"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "code",
433 |    "execution_count": 96,
434 |    "metadata": {},
435 |    "outputs": [
436 |     {
437 |      "data": {
438 |       "text/plain": [
439 |        "PM    5068\n",
440 |        "AM    4932\n",
441 |        "Name: AM or PM, dtype: int64"
442 |       ]
443 |      },
444 |      "execution_count": 96,
445 |      "metadata": {},
446 |      "output_type": "execute_result"
447 |     }
448 |    ],
449 |    "source": [
450 |     "ecom['AM or PM'].value_counts()"
451 |    ]
452 |   },
453 |   {
454 |    "cell_type": "markdown",
455 |    "metadata": {},
456 |    "source": [
457 |     "** What are the 5 most common Job Titles? **"
458 |    ]
459 |   },
460 |   {
461 |    "cell_type": "code",
462 |    "execution_count": 97,
463 |    "metadata": {},
464 |    "outputs": [
465 |     {
466 |      "data": {
467 |       "text/plain": [
468 |        "Interior and spatial designer    31\n",
469 |        "Lawyer                           30\n",
470 |        "Social researcher                28\n",
471 |        "Purchasing manager               27\n",
472 |        "Designer, jewellery              27\n",
473 |        "Name: Job, dtype: int64"
474 |       ]
475 |      },
476 |      "execution_count": 97,
477 |      "metadata": {},
478 |      "output_type": "execute_result"
479 |     }
480 |    ],
481 |    "source": [
482 |     "ecom['Job'].value_counts().head(5)"
483 |    ]
484 |   },
485 |   {
486 |    "cell_type": "markdown",
487 |    "metadata": {},
488 |    "source": [
489 |     "** Someone made a purchase that came from Lot: \"90 WT\" , what was the Purchase Price for this transaction? **"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "code",
494 |    "execution_count": 99,
495 |    "metadata": {},
496 |    "outputs": [
497 |     {
498 |      "data": {
499 |       "text/plain": [
500 |        "513    75.1\n",
501 |        "Name: Purchase Price, dtype: float64"
502 |       ]
503 |      },
504 |      "execution_count": 99,
505 |      "metadata": {},
506 |      "output_type": "execute_result"
507 |     }
508 |    ],
509 |    "source": [
510 |     "ecom[ecom['Lot']=='90 WT']['Purchase Price']"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "markdown",
515 |    "metadata": {},
516 |    "source": [
517 |     "** What is the email of the person with the following Credit Card Number: 4926535242672853 **"
518 |    ]
519 |   },
520 |   {
521 |    "cell_type": "code",
522 |    "execution_count": 100,
523 |    "metadata": {},
524 |    "outputs": [
525 |     {
526 |      "data": {
527 |       "text/plain": [
528 |        "1234    bondellen@williams-garza.com\n",
529 |        "Name: Email, dtype: object"
530 |       ]
531 |      },
532 |      "execution_count": 100,
533 |      "metadata": {},
534 |      "output_type": "execute_result"
535 |     }
536 |    ],
537 |    "source": [
538 |     "ecom[ecom[\"Credit Card\"] == 4926535242672853]['Email'] "
539 |    ]
540 |   },
541 |   {
542 |    "cell_type": "markdown",
543 |    "metadata": {},
544 |    "source": [
545 |     "** How many people have American Express as their Credit Card Provider *and* made a purchase above $95 ?**"
546 |    ]
547 |   },
548 |   {
549 |    "cell_type": "code",
550 |    "execution_count": 101,
551 |    "metadata": {},
552 |    "outputs": [
553 |     {
554 |      "data": {
555 |       "text/plain": [
556 |        "Address             39\n",
557 |        "Lot                 39\n",
558 |        "AM or PM            39\n",
559 |        "Browser Info        39\n",
560 |        "Company             39\n",
561 |        "Credit Card         39\n",
562 |        "CC Exp Date         39\n",
563 |        "CC Security Code    39\n",
564 |        "CC Provider         39\n",
565 |        "Email               39\n",
566 |        "Job                 39\n",
567 |        "IP Address          39\n",
568 |        "Language            39\n",
569 |        "Purchase Price      39\n",
570 |        "dtype: int64"
571 |       ]
572 |      },
573 |      "execution_count": 101,
574 |      "metadata": {},
575 |      "output_type": "execute_result"
576 |     }
577 |    ],
578 |    "source": [
579 |     "ecom[(ecom['CC Provider']=='American Express') & (ecom['Purchase Price']>95)].count()"
580 |    ]
581 |   },
582 |   {
583 |    "cell_type": "markdown",
584 |    "metadata": {},
585 |    "source": [
586 |     "** Hard: How many people have a credit card that expires in 2025? **"
587 |    ]
588 |   },
589 |   {
590 |    "cell_type": "code",
591 |    "execution_count": 102,
592 |    "metadata": {},
593 |    "outputs": [
594 |     {
595 |      "data": {
596 |       "text/plain": [
597 |        "1033"
598 |       ]
599 |      },
600 |      "execution_count": 102,
601 |      "metadata": {},
602 |      "output_type": "execute_result"
603 |     }
604 |    ],
605 |    "source": [
606 |     "sum(ecom['CC Exp Date'].apply(lambda x: x[3:]) == '25')"
607 |    ]
608 |   },
609 |   {
610 |    "cell_type": "markdown",
611 |    "metadata": {},
612 |    "source": [
613 |     "** Hard: What are the top 5 most popular email providers/hosts (e.g. gmail.com, yahoo.com, etc...) **"
614 |    ]
615 |   },
616 |   {
617 |    "cell_type": "code",
618 |    "execution_count": 56,
619 |    "metadata": {},
620 |    "outputs": [
621 |     {
622 |      "data": {
623 |       "text/plain": [
624 |        "hotmail.com     1638\n",
625 |        "yahoo.com       1616\n",
626 |        "gmail.com       1605\n",
627 |        "smith.com         42\n",
628 |        "williams.com      37\n",
629 |        "Name: Email, dtype: int64"
630 |       ]
631 |      },
632 |      "execution_count": 56,
633 |      "metadata": {},
634 |      "output_type": "execute_result"
635 |     }
636 |    ],
637 |    "source": [
638 |     "ecom['Email'].apply(lambda x: x.split('@')[1]).value_counts().head(5)"
639 |    ]
640 |   },
641 |   {
642 |    "cell_type": "markdown",
643 |    "metadata": {},
644 |    "source": [
645 |     "# Great Job!"
646 |    ]
647 |   }
648 |  ],
649 |  "metadata": {
650 |   "kernelspec": {
651 |    "display_name": "Python 3",
652 |    "language": "python",
653 |    "name": "python3"
654 |   },
655 |   "language_info": {
656 |    "codemirror_mode": {
657 |     "name": "ipython",
658 |     "version": 3
659 |    },
660 |    "file_extension": ".py",
661 |    "mimetype": "text/x-python",
662 |    "name": "python",
663 |    "nbconvert_exporter": "python",
664 |    "pygments_lexer": "ipython3",
665 |    "version": "3.7.6"
666 |   }
667 |  },
668 |  "nbformat": 4,
669 |  "nbformat_minor": 1
670 | }
671 | 


--------------------------------------------------------------------------------
/04-Pandas-Exercises/02-SF Salaries Exercise - Solutions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# SF Salaries Exercise - Solutions\n",
  8 |     "\n",
  9 |     "Welcome to a quick exercise for you to practice your pandas skills! We will be using the [SF Salaries Dataset](https://www.kaggle.com/kaggle/sf-salaries) from Kaggle! Just follow along and complete the tasks outlined in bold below. The tasks will get harder and harder as you go along."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "** Import pandas as pd.**"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 6,
 22 |    "metadata": {
 23 |     "collapsed": true
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import pandas as pd"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "** Read Salaries.csv as a dataframe called sal.**"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 7,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "sal = pd.read_csv('Salaries.csv')"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "** Check the head of the DataFrame. **"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 8,
 56 |    "metadata": {},
 57 |    "outputs": [
 58 |     {
 59 |      "data": {
 60 |       "text/html": [
 61 |        "<div>\n",
 62 |        "<table border=\"1\" class=\"dataframe\">\n",
 63 |        "  <thead>\n",
 64 |        "    <tr style=\"text-align: right;\">\n",
 65 |        "      <th></th>\n",
 66 |        "      <th>Id</th>\n",
 67 |        "      <th>EmployeeName</th>\n",
 68 |        "      <th>JobTitle</th>\n",
 69 |        "      <th>BasePay</th>\n",
 70 |        "      <th>OvertimePay</th>\n",
 71 |        "      <th>OtherPay</th>\n",
 72 |        "      <th>Benefits</th>\n",
 73 |        "      <th>TotalPay</th>\n",
 74 |        "      <th>TotalPayBenefits</th>\n",
 75 |        "      <th>Year</th>\n",
 76 |        "      <th>Notes</th>\n",
 77 |        "      <th>Agency</th>\n",
 78 |        "      <th>Status</th>\n",
 79 |        "    </tr>\n",
 80 |        "  </thead>\n",
 81 |        "  <tbody>\n",
 82 |        "    <tr>\n",
 83 |        "      <th>0</th>\n",
 84 |        "      <td>1</td>\n",
 85 |        "      <td>NATHANIEL FORD</td>\n",
 86 |        "      <td>GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY</td>\n",
 87 |        "      <td>167411.18</td>\n",
 88 |        "      <td>0.00</td>\n",
 89 |        "      <td>400184.25</td>\n",
 90 |        "      <td>NaN</td>\n",
 91 |        "      <td>567595.43</td>\n",
 92 |        "      <td>567595.43</td>\n",
 93 |        "      <td>2011</td>\n",
 94 |        "      <td>NaN</td>\n",
 95 |        "      <td>San Francisco</td>\n",
 96 |        "      <td>NaN</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>1</th>\n",
100 |        "      <td>2</td>\n",
101 |        "      <td>GARY JIMENEZ</td>\n",
102 |        "      <td>CAPTAIN III (POLICE DEPARTMENT)</td>\n",
103 |        "      <td>155966.02</td>\n",
104 |        "      <td>245131.88</td>\n",
105 |        "      <td>137811.38</td>\n",
106 |        "      <td>NaN</td>\n",
107 |        "      <td>538909.28</td>\n",
108 |        "      <td>538909.28</td>\n",
109 |        "      <td>2011</td>\n",
110 |        "      <td>NaN</td>\n",
111 |        "      <td>San Francisco</td>\n",
112 |        "      <td>NaN</td>\n",
113 |        "    </tr>\n",
114 |        "    <tr>\n",
115 |        "      <th>2</th>\n",
116 |        "      <td>3</td>\n",
117 |        "      <td>ALBERT PARDINI</td>\n",
118 |        "      <td>CAPTAIN III (POLICE DEPARTMENT)</td>\n",
119 |        "      <td>212739.13</td>\n",
120 |        "      <td>106088.18</td>\n",
121 |        "      <td>16452.60</td>\n",
122 |        "      <td>NaN</td>\n",
123 |        "      <td>335279.91</td>\n",
124 |        "      <td>335279.91</td>\n",
125 |        "      <td>2011</td>\n",
126 |        "      <td>NaN</td>\n",
127 |        "      <td>San Francisco</td>\n",
128 |        "      <td>NaN</td>\n",
129 |        "    </tr>\n",
130 |        "    <tr>\n",
131 |        "      <th>3</th>\n",
132 |        "      <td>4</td>\n",
133 |        "      <td>CHRISTOPHER CHONG</td>\n",
134 |        "      <td>WIRE ROPE CABLE MAINTENANCE MECHANIC</td>\n",
135 |        "      <td>77916.00</td>\n",
136 |        "      <td>56120.71</td>\n",
137 |        "      <td>198306.90</td>\n",
138 |        "      <td>NaN</td>\n",
139 |        "      <td>332343.61</td>\n",
140 |        "      <td>332343.61</td>\n",
141 |        "      <td>2011</td>\n",
142 |        "      <td>NaN</td>\n",
143 |        "      <td>San Francisco</td>\n",
144 |        "      <td>NaN</td>\n",
145 |        "    </tr>\n",
146 |        "    <tr>\n",
147 |        "      <th>4</th>\n",
148 |        "      <td>5</td>\n",
149 |        "      <td>PATRICK GARDNER</td>\n",
150 |        "      <td>DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)</td>\n",
151 |        "      <td>134401.60</td>\n",
152 |        "      <td>9737.00</td>\n",
153 |        "      <td>182234.59</td>\n",
154 |        "      <td>NaN</td>\n",
155 |        "      <td>326373.19</td>\n",
156 |        "      <td>326373.19</td>\n",
157 |        "      <td>2011</td>\n",
158 |        "      <td>NaN</td>\n",
159 |        "      <td>San Francisco</td>\n",
160 |        "      <td>NaN</td>\n",
161 |        "    </tr>\n",
162 |        "  </tbody>\n",
163 |        "</table>\n",
164 |        "</div>"
165 |       ],
166 |       "text/plain": [
167 |        "   Id       EmployeeName                                        JobTitle  \\\n",
168 |        "0   1     NATHANIEL FORD  GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY   \n",
169 |        "1   2       GARY JIMENEZ                 CAPTAIN III (POLICE DEPARTMENT)   \n",
170 |        "2   3     ALBERT PARDINI                 CAPTAIN III (POLICE DEPARTMENT)   \n",
171 |        "3   4  CHRISTOPHER CHONG            WIRE ROPE CABLE MAINTENANCE MECHANIC   \n",
172 |        "4   5    PATRICK GARDNER    DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)   \n",
173 |        "\n",
174 |        "     BasePay  OvertimePay   OtherPay  Benefits   TotalPay  TotalPayBenefits  \\\n",
175 |        "0  167411.18         0.00  400184.25       NaN  567595.43         567595.43   \n",
176 |        "1  155966.02    245131.88  137811.38       NaN  538909.28         538909.28   \n",
177 |        "2  212739.13    106088.18   16452.60       NaN  335279.91         335279.91   \n",
178 |        "3   77916.00     56120.71  198306.90       NaN  332343.61         332343.61   \n",
179 |        "4  134401.60      9737.00  182234.59       NaN  326373.19         326373.19   \n",
180 |        "\n",
181 |        "   Year  Notes         Agency  Status  \n",
182 |        "0  2011    NaN  San Francisco     NaN  \n",
183 |        "1  2011    NaN  San Francisco     NaN  \n",
184 |        "2  2011    NaN  San Francisco     NaN  \n",
185 |        "3  2011    NaN  San Francisco     NaN  \n",
186 |        "4  2011    NaN  San Francisco     NaN  "
187 |       ]
188 |      },
189 |      "execution_count": 8,
190 |      "metadata": {},
191 |      "output_type": "execute_result"
192 |     }
193 |    ],
194 |    "source": [
195 |     "sal.head()"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "metadata": {},
201 |    "source": [
202 |     "** Use the .info() method to find out how many entries there are.**"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 9,
208 |    "metadata": {},
209 |    "outputs": [
210 |     {
211 |      "name": "stdout",
212 |      "output_type": "stream",
213 |      "text": [
214 |       "<class 'pandas.core.frame.DataFrame'>\n",
215 |       "RangeIndex: 148654 entries, 0 to 148653\n",
216 |       "Data columns (total 13 columns):\n",
217 |       "Id                  148654 non-null int64\n",
218 |       "EmployeeName        148654 non-null object\n",
219 |       "JobTitle            148654 non-null object\n",
220 |       "BasePay             148045 non-null float64\n",
221 |       "OvertimePay         148650 non-null float64\n",
222 |       "OtherPay            148650 non-null float64\n",
223 |       "Benefits            112491 non-null float64\n",
224 |       "TotalPay            148654 non-null float64\n",
225 |       "TotalPayBenefits    148654 non-null float64\n",
226 |       "Year                148654 non-null int64\n",
227 |       "Notes               0 non-null float64\n",
228 |       "Agency              148654 non-null object\n",
229 |       "Status              0 non-null float64\n",
230 |       "dtypes: float64(8), int64(2), object(3)\n",
231 |       "memory usage: 14.7+ MB\n"
232 |      ]
233 |     }
234 |    ],
235 |    "source": [
236 |     "sal.info() # 148654 Entries"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "markdown",
241 |    "metadata": {},
242 |    "source": [
243 |     "**What is the average BasePay ?**"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 10,
249 |    "metadata": {},
250 |    "outputs": [
251 |     {
252 |      "data": {
253 |       "text/plain": [
254 |        "66325.44884050643"
255 |       ]
256 |      },
257 |      "execution_count": 10,
258 |      "metadata": {},
259 |      "output_type": "execute_result"
260 |     }
261 |    ],
262 |    "source": [
263 |     "sal['BasePay'].mean()"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "markdown",
268 |    "metadata": {},
269 |    "source": [
270 |     "** What is the highest amount of OvertimePay in the dataset ? **"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": 11,
276 |    "metadata": {},
277 |    "outputs": [
278 |     {
279 |      "data": {
280 |       "text/plain": [
281 |        "245131.88"
282 |       ]
283 |      },
284 |      "execution_count": 11,
285 |      "metadata": {},
286 |      "output_type": "execute_result"
287 |     }
288 |    ],
289 |    "source": [
290 |     "sal['OvertimePay'].max()"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "** What is the job title of  JOSEPH DRISCOLL ? Note: Use all caps, otherwise you may get an answer that doesn't match up (there is also a lowercase Joseph Driscoll). **"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": 12,
303 |    "metadata": {},
304 |    "outputs": [
305 |     {
306 |      "data": {
307 |       "text/plain": [
308 |        "24    CAPTAIN, FIRE SUPPRESSION\n",
309 |        "Name: JobTitle, dtype: object"
310 |       ]
311 |      },
312 |      "execution_count": 12,
313 |      "metadata": {},
314 |      "output_type": "execute_result"
315 |     }
316 |    ],
317 |    "source": [
318 |     "sal[sal['EmployeeName']=='JOSEPH DRISCOLL']['JobTitle']"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "markdown",
323 |    "metadata": {},
324 |    "source": [
325 |     "** How much does JOSEPH DRISCOLL make (including benefits)? **"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": 13,
331 |    "metadata": {},
332 |    "outputs": [
333 |     {
334 |      "data": {
335 |       "text/plain": [
336 |        "24    270324.91\n",
337 |        "Name: TotalPayBenefits, dtype: float64"
338 |       ]
339 |      },
340 |      "execution_count": 13,
341 |      "metadata": {},
342 |      "output_type": "execute_result"
343 |     }
344 |    ],
345 |    "source": [
346 |     "sal[sal['EmployeeName']=='JOSEPH DRISCOLL']['TotalPayBenefits']"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "markdown",
351 |    "metadata": {},
352 |    "source": [
353 |     "** What is the name of highest paid person (including benefits)?**"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "execution_count": 14,
359 |    "metadata": {},
360 |    "outputs": [
361 |     {
362 |      "data": {
363 |       "text/html": [
364 |        "<div>\n",
365 |        "<table border=\"1\" class=\"dataframe\">\n",
366 |        "  <thead>\n",
367 |        "    <tr style=\"text-align: right;\">\n",
368 |        "      <th></th>\n",
369 |        "      <th>Id</th>\n",
370 |        "      <th>EmployeeName</th>\n",
371 |        "      <th>JobTitle</th>\n",
372 |        "      <th>BasePay</th>\n",
373 |        "      <th>OvertimePay</th>\n",
374 |        "      <th>OtherPay</th>\n",
375 |        "      <th>Benefits</th>\n",
376 |        "      <th>TotalPay</th>\n",
377 |        "      <th>TotalPayBenefits</th>\n",
378 |        "      <th>Year</th>\n",
379 |        "      <th>Notes</th>\n",
380 |        "      <th>Agency</th>\n",
381 |        "      <th>Status</th>\n",
382 |        "    </tr>\n",
383 |        "  </thead>\n",
384 |        "  <tbody>\n",
385 |        "    <tr>\n",
386 |        "      <th>0</th>\n",
387 |        "      <td>1</td>\n",
388 |        "      <td>NATHANIEL FORD</td>\n",
389 |        "      <td>GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY</td>\n",
390 |        "      <td>167411.18</td>\n",
391 |        "      <td>0.0</td>\n",
392 |        "      <td>400184.25</td>\n",
393 |        "      <td>NaN</td>\n",
394 |        "      <td>567595.43</td>\n",
395 |        "      <td>567595.43</td>\n",
396 |        "      <td>2011</td>\n",
397 |        "      <td>NaN</td>\n",
398 |        "      <td>San Francisco</td>\n",
399 |        "      <td>NaN</td>\n",
400 |        "    </tr>\n",
401 |        "  </tbody>\n",
402 |        "</table>\n",
403 |        "</div>"
404 |       ],
405 |       "text/plain": [
406 |        "   Id    EmployeeName                                        JobTitle  \\\n",
407 |        "0   1  NATHANIEL FORD  GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY   \n",
408 |        "\n",
409 |        "     BasePay  OvertimePay   OtherPay  Benefits   TotalPay  TotalPayBenefits  \\\n",
410 |        "0  167411.18          0.0  400184.25       NaN  567595.43         567595.43   \n",
411 |        "\n",
412 |        "   Year  Notes         Agency  Status  \n",
413 |        "0  2011    NaN  San Francisco     NaN  "
414 |       ]
415 |      },
416 |      "execution_count": 14,
417 |      "metadata": {},
418 |      "output_type": "execute_result"
419 |     }
420 |    ],
421 |    "source": [
422 |     "sal[sal['TotalPayBenefits']== sal['TotalPayBenefits'].max()] #['EmployeeName']\n",
423 |     "# or\n",
424 |     "# sal.loc[sal['TotalPayBenefits'].idxmax()]"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "markdown",
429 |    "metadata": {},
430 |    "source": [
431 |     "** What is the name of lowest paid person (including benefits)? Do you notice something strange about how much he or she is paid?**"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": 15,
437 |    "metadata": {},
438 |    "outputs": [
439 |     {
440 |      "data": {
441 |       "text/html": [
442 |        "<div>\n",
443 |        "<table border=\"1\" class=\"dataframe\">\n",
444 |        "  <thead>\n",
445 |        "    <tr style=\"text-align: right;\">\n",
446 |        "      <th></th>\n",
447 |        "      <th>Id</th>\n",
448 |        "      <th>EmployeeName</th>\n",
449 |        "      <th>JobTitle</th>\n",
450 |        "      <th>BasePay</th>\n",
451 |        "      <th>OvertimePay</th>\n",
452 |        "      <th>OtherPay</th>\n",
453 |        "      <th>Benefits</th>\n",
454 |        "      <th>TotalPay</th>\n",
455 |        "      <th>TotalPayBenefits</th>\n",
456 |        "      <th>Year</th>\n",
457 |        "      <th>Notes</th>\n",
458 |        "      <th>Agency</th>\n",
459 |        "      <th>Status</th>\n",
460 |        "    </tr>\n",
461 |        "  </thead>\n",
462 |        "  <tbody>\n",
463 |        "    <tr>\n",
464 |        "      <th>148653</th>\n",
465 |        "      <td>148654</td>\n",
466 |        "      <td>Joe Lopez</td>\n",
467 |        "      <td>Counselor, Log Cabin Ranch</td>\n",
468 |        "      <td>0.0</td>\n",
469 |        "      <td>0.0</td>\n",
470 |        "      <td>-618.13</td>\n",
471 |        "      <td>0.0</td>\n",
472 |        "      <td>-618.13</td>\n",
473 |        "      <td>-618.13</td>\n",
474 |        "      <td>2014</td>\n",
475 |        "      <td>NaN</td>\n",
476 |        "      <td>San Francisco</td>\n",
477 |        "      <td>NaN</td>\n",
478 |        "    </tr>\n",
479 |        "  </tbody>\n",
480 |        "</table>\n",
481 |        "</div>"
482 |       ],
483 |       "text/plain": [
484 |        "            Id EmployeeName                    JobTitle  BasePay  OvertimePay  \\\n",
485 |        "148653  148654    Joe Lopez  Counselor, Log Cabin Ranch      0.0          0.0   \n",
486 |        "\n",
487 |        "        OtherPay  Benefits  TotalPay  TotalPayBenefits  Year  Notes  \\\n",
488 |        "148653   -618.13       0.0   -618.13           -618.13  2014    NaN   \n",
489 |        "\n",
490 |        "               Agency  Status  \n",
491 |        "148653  San Francisco     NaN  "
492 |       ]
493 |      },
494 |      "execution_count": 15,
495 |      "metadata": {},
496 |      "output_type": "execute_result"
497 |     }
498 |    ],
499 |    "source": [
500 |     "sal[sal['TotalPayBenefits']== sal['TotalPayBenefits'].min()] #['EmployeeName']\n",
501 |     "# or\n",
502 |     "# sal.loc[sal['TotalPayBenefits'].idxmax()]['EmployeeName']\n",
503 |     "\n",
504 |     "## ITS NEGATIVE!! VERY STRANGE"
505 |    ]
506 |   },
507 |   {
508 |    "cell_type": "markdown",
509 |    "metadata": {},
510 |    "source": [
511 |     "** What was the average (mean) BasePay of all employees per year? (2011-2014) ? **"
512 |    ]
513 |   },
514 |   {
515 |    "cell_type": "code",
516 |    "execution_count": 16,
517 |    "metadata": {},
518 |    "outputs": [
519 |     {
520 |      "data": {
521 |       "text/plain": [
522 |        "Year\n",
523 |        "2011    63595.956517\n",
524 |        "2012    65436.406857\n",
525 |        "2013    69630.030216\n",
526 |        "2014    66564.421924\n",
527 |        "Name: BasePay, dtype: float64"
528 |       ]
529 |      },
530 |      "execution_count": 16,
531 |      "metadata": {},
532 |      "output_type": "execute_result"
533 |     }
534 |    ],
535 |    "source": [
536 |     "sal.groupby('Year').mean()['BasePay']"
537 |    ]
538 |   },
539 |   {
540 |    "cell_type": "markdown",
541 |    "metadata": {},
542 |    "source": [
543 |     "** How many unique job titles are there? **"
544 |    ]
545 |   },
546 |   {
547 |    "cell_type": "code",
548 |    "execution_count": 17,
549 |    "metadata": {},
550 |    "outputs": [
551 |     {
552 |      "data": {
553 |       "text/plain": [
554 |        "2159"
555 |       ]
556 |      },
557 |      "execution_count": 17,
558 |      "metadata": {},
559 |      "output_type": "execute_result"
560 |     }
561 |    ],
562 |    "source": [
563 |     "sal['JobTitle'].nunique()"
564 |    ]
565 |   },
566 |   {
567 |    "cell_type": "markdown",
568 |    "metadata": {},
569 |    "source": [
570 |     "** What are the top 5 most common jobs? **"
571 |    ]
572 |   },
573 |   {
574 |    "cell_type": "code",
575 |    "execution_count": 18,
576 |    "metadata": {},
577 |    "outputs": [
578 |     {
579 |      "data": {
580 |       "text/plain": [
581 |        "Transit Operator                7036\n",
582 |        "Special Nurse                   4389\n",
583 |        "Registered Nurse                3736\n",
584 |        "Public Svc Aide-Public Works    2518\n",
585 |        "Police Officer 3                2421\n",
586 |        "Name: JobTitle, dtype: int64"
587 |       ]
588 |      },
589 |      "execution_count": 18,
590 |      "metadata": {},
591 |      "output_type": "execute_result"
592 |     }
593 |    ],
594 |    "source": [
595 |     "sal['JobTitle'].value_counts().head(5)"
596 |    ]
597 |   },
598 |   {
599 |    "cell_type": "markdown",
600 |    "metadata": {},
601 |    "source": [
602 |     "** How many Job Titles were represented by only one person in 2013? (e.g. Job Titles with only one occurence in 2013?) **"
603 |    ]
604 |   },
605 |   {
606 |    "cell_type": "code",
607 |    "execution_count": 19,
608 |    "metadata": {},
609 |    "outputs": [
610 |     {
611 |      "data": {
612 |       "text/plain": [
613 |        "202"
614 |       ]
615 |      },
616 |      "execution_count": 19,
617 |      "metadata": {},
618 |      "output_type": "execute_result"
619 |     }
620 |    ],
621 |    "source": [
622 |     "sum(sal[sal['Year']==2013]['JobTitle'].value_counts() == 1) # pretty tricky way to do this..."
623 |    ]
624 |   },
625 |   {
626 |    "cell_type": "markdown",
627 |    "metadata": {},
628 |    "source": [
629 |     "** Bonus: Is there a correlation between length of the Job Title string and Salary? **"
630 |    ]
631 |   },
632 |   {
633 |    "cell_type": "code",
634 |    "execution_count": 22,
635 |    "metadata": {},
636 |    "outputs": [],
637 |    "source": [
638 |     "sal['title_len'] = sal['JobTitle'].apply(len)"
639 |    ]
640 |   },
641 |   {
642 |    "cell_type": "code",
643 |    "execution_count": 23,
644 |    "metadata": {},
645 |    "outputs": [
646 |     {
647 |      "data": {
648 |       "text/html": [
649 |        "<div>\n",
650 |        "<table border=\"1\" class=\"dataframe\">\n",
651 |        "  <thead>\n",
652 |        "    <tr style=\"text-align: right;\">\n",
653 |        "      <th></th>\n",
654 |        "      <th>title_len</th>\n",
655 |        "      <th>TotalPayBenefits</th>\n",
656 |        "    </tr>\n",
657 |        "  </thead>\n",
658 |        "  <tbody>\n",
659 |        "    <tr>\n",
660 |        "      <th>title_len</th>\n",
661 |        "      <td>1.000000</td>\n",
662 |        "      <td>-0.036878</td>\n",
663 |        "    </tr>\n",
664 |        "    <tr>\n",
665 |        "      <th>TotalPayBenefits</th>\n",
666 |        "      <td>-0.036878</td>\n",
667 |        "      <td>1.000000</td>\n",
668 |        "    </tr>\n",
669 |        "  </tbody>\n",
670 |        "</table>\n",
671 |        "</div>"
672 |       ],
673 |       "text/plain": [
674 |        "                  title_len  TotalPayBenefits\n",
675 |        "title_len          1.000000         -0.036878\n",
676 |        "TotalPayBenefits  -0.036878          1.000000"
677 |       ]
678 |      },
679 |      "execution_count": 23,
680 |      "metadata": {},
681 |      "output_type": "execute_result"
682 |     }
683 |    ],
684 |    "source": [
685 |     "sal[['title_len','TotalPayBenefits']].corr() # No correlation."
686 |    ]
687 |   },
688 |   {
689 |    "cell_type": "markdown",
690 |    "metadata": {},
691 |    "source": [
692 |     "# Great Job!"
693 |    ]
694 |   }
695 |  ],
696 |  "metadata": {
697 |   "kernelspec": {
698 |    "display_name": "Python 3",
699 |    "language": "python",
700 |    "name": "python3"
701 |   },
702 |   "language_info": {
703 |    "codemirror_mode": {
704 |     "name": "ipython",
705 |     "version": 3
706 |    },
707 |    "file_extension": ".py",
708 |    "mimetype": "text/x-python",
709 |    "name": "python",
710 |    "nbconvert_exporter": "python",
711 |    "pygments_lexer": "ipython3",
712 |    "version": "3.7.6"
713 |   }
714 |  },
715 |  "nbformat": 4,
716 |  "nbformat_minor": 1
717 | }
718 | 


--------------------------------------------------------------------------------
/02-Python-for-Data-Analysis-NumPy/01-NumPy Arrays.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# NumPy \n",
  8 |     "\n",
  9 |     "NumPy (or Numpy) is a Linear Algebra Library for Python, the reason it is so important for Data Science with Python is that almost all of the libraries in the PyData Ecosystem rely on NumPy as one of their main building blocks.\n",
 10 |     "\n",
 11 |     "Numpy is also incredibly fast, as it has bindings to C libraries. For more info on why you would want to use Arrays instead of lists, check out this great [StackOverflow post](http://stackoverflow.com/questions/993984/why-numpy-instead-of-python-lists).\n",
 12 |     "\n",
 13 |     "We will only learn the basics of NumPy, to get started we need to install it!"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "## Installation Instructions\n",
 21 |     "\n",
 22 |     "**It is highly recommended you install Python using the Anaconda distribution to make sure all underlying dependencies (such as Linear Algebra libraries) all sync up with the use of a conda install. If you have Anaconda, install NumPy by going to your terminal or command prompt and typing:**\n",
 23 |     "    \n",
 24 |     "    conda install numpy\n",
 25 |     "    \n",
 26 |     "**If you do not have Anaconda and can not install it, please refer to [Numpy's official documentation on various installation instructions.](http://docs.scipy.org/doc/numpy-1.10.1/user/install.html)**"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "## Using NumPy\n",
 34 |     "\n",
 35 |     "Once you've installed NumPy you can import it as a library:"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 1,
 41 |    "metadata": {
 42 |     "collapsed": true
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "import numpy as np"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "Numpy has many built-in functions and capabilities. We won't cover them all but instead we will focus on some of the most important aspects of Numpy: vectors,arrays,matrices, and number generation. Let's start by discussing arrays.\n",
 54 |     "\n",
 55 |     "# Numpy Arrays\n",
 56 |     "\n",
 57 |     "NumPy arrays are the main way we will use Numpy throughout the course. Numpy arrays essentially come in two flavors: vectors and matrices. Vectors are strictly 1-d arrays and matrices are 2-d (but you should note a matrix can still have only one row or one column).\n",
 58 |     "\n",
 59 |     "Let's begin our introduction by exploring how to create NumPy arrays.\n",
 60 |     "\n",
 61 |     "## Creating NumPy Arrays\n",
 62 |     "\n",
 63 |     "### From a Python List\n",
 64 |     "\n",
 65 |     "We can create an array by directly converting a list or list of lists:"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 19,
 71 |    "metadata": {},
 72 |    "outputs": [
 73 |     {
 74 |      "data": {
 75 |       "text/plain": [
 76 |        "[1, 2, 3]"
 77 |       ]
 78 |      },
 79 |      "execution_count": 19,
 80 |      "metadata": {},
 81 |      "output_type": "execute_result"
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "my_list = [1,2,3]\n",
 86 |     "my_list"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 16,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "data": {
 96 |       "text/plain": [
 97 |        "array([1, 2, 3])"
 98 |       ]
 99 |      },
100 |      "execution_count": 16,
101 |      "metadata": {},
102 |      "output_type": "execute_result"
103 |     }
104 |    ],
105 |    "source": [
106 |     "np.array(my_list)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 20,
112 |    "metadata": {},
113 |    "outputs": [
114 |     {
115 |      "data": {
116 |       "text/plain": [
117 |        "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]"
118 |       ]
119 |      },
120 |      "execution_count": 20,
121 |      "metadata": {},
122 |      "output_type": "execute_result"
123 |     }
124 |    ],
125 |    "source": []
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 21,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "data": {
134 |       "text/plain": [
135 |        "array([[1, 2, 3],\n",
136 |        "       [4, 5, 6],\n",
137 |        "       [7, 8, 9]])"
138 |       ]
139 |      },
140 |      "execution_count": 21,
141 |      "metadata": {},
142 |      "output_type": "execute_result"
143 |     }
144 |    ],
145 |    "source": [
146 |     "np.array(my_matrix)"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "## Built-in Methods\n",
154 |     "\n",
155 |     "There are lots of built-in ways to generate Arrays"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "### arange\n",
163 |     "\n",
164 |     "Return evenly spaced values within a given interval."
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 22,
170 |    "metadata": {},
171 |    "outputs": [
172 |     {
173 |      "data": {
174 |       "text/plain": [
175 |        "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
176 |       ]
177 |      },
178 |      "execution_count": 22,
179 |      "metadata": {},
180 |      "output_type": "execute_result"
181 |     }
182 |    ],
183 |    "source": [
184 |     "np.arange(0,10)"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 23,
190 |    "metadata": {},
191 |    "outputs": [
192 |     {
193 |      "data": {
194 |       "text/plain": [
195 |        "array([ 0,  2,  4,  6,  8, 10])"
196 |       ]
197 |      },
198 |      "execution_count": 23,
199 |      "metadata": {},
200 |      "output_type": "execute_result"
201 |     }
202 |    ],
203 |    "source": [
204 |     "np.arange(0,11,2)"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {},
210 |    "source": [
211 |     "### zeros and ones\n",
212 |     "\n",
213 |     "Generate arrays of zeros or ones"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 24,
219 |    "metadata": {},
220 |    "outputs": [
221 |     {
222 |      "data": {
223 |       "text/plain": [
224 |        "array([ 0.,  0.,  0.])"
225 |       ]
226 |      },
227 |      "execution_count": 24,
228 |      "metadata": {},
229 |      "output_type": "execute_result"
230 |     }
231 |    ],
232 |    "source": [
233 |     "np.zeros(3)"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 26,
239 |    "metadata": {},
240 |    "outputs": [
241 |     {
242 |      "data": {
243 |       "text/plain": [
244 |        "array([[ 0.,  0.,  0.,  0.,  0.],\n",
245 |        "       [ 0.,  0.,  0.,  0.,  0.],\n",
246 |        "       [ 0.,  0.,  0.,  0.,  0.],\n",
247 |        "       [ 0.,  0.,  0.,  0.,  0.],\n",
248 |        "       [ 0.,  0.,  0.,  0.,  0.]])"
249 |       ]
250 |      },
251 |      "execution_count": 26,
252 |      "metadata": {},
253 |      "output_type": "execute_result"
254 |     }
255 |    ],
256 |    "source": [
257 |     "np.zeros((5,5))"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 27,
263 |    "metadata": {},
264 |    "outputs": [
265 |     {
266 |      "data": {
267 |       "text/plain": [
268 |        "array([ 1.,  1.,  1.])"
269 |       ]
270 |      },
271 |      "execution_count": 27,
272 |      "metadata": {},
273 |      "output_type": "execute_result"
274 |     }
275 |    ],
276 |    "source": [
277 |     "np.ones(3)"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 28,
283 |    "metadata": {},
284 |    "outputs": [
285 |     {
286 |      "data": {
287 |       "text/plain": [
288 |        "array([[ 1.,  1.,  1.],\n",
289 |        "       [ 1.,  1.,  1.],\n",
290 |        "       [ 1.,  1.,  1.]])"
291 |       ]
292 |      },
293 |      "execution_count": 28,
294 |      "metadata": {},
295 |      "output_type": "execute_result"
296 |     }
297 |    ],
298 |    "source": [
299 |     "np.ones((3,3))"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "markdown",
304 |    "metadata": {},
305 |    "source": [
306 |     "### linspace\n",
307 |     "Return evenly spaced numbers over a specified interval."
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": 29,
313 |    "metadata": {},
314 |    "outputs": [
315 |     {
316 |      "data": {
317 |       "text/plain": [
318 |        "array([  0.,   5.,  10.])"
319 |       ]
320 |      },
321 |      "execution_count": 29,
322 |      "metadata": {},
323 |      "output_type": "execute_result"
324 |     }
325 |    ],
326 |    "source": [
327 |     "np.linspace(0,10,3)"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": 31,
333 |    "metadata": {},
334 |    "outputs": [
335 |     {
336 |      "data": {
337 |       "text/plain": [
338 |        "array([  0.        ,   0.20408163,   0.40816327,   0.6122449 ,\n",
339 |        "         0.81632653,   1.02040816,   1.2244898 ,   1.42857143,\n",
340 |        "         1.63265306,   1.83673469,   2.04081633,   2.24489796,\n",
341 |        "         2.44897959,   2.65306122,   2.85714286,   3.06122449,\n",
342 |        "         3.26530612,   3.46938776,   3.67346939,   3.87755102,\n",
343 |        "         4.08163265,   4.28571429,   4.48979592,   4.69387755,\n",
344 |        "         4.89795918,   5.10204082,   5.30612245,   5.51020408,\n",
345 |        "         5.71428571,   5.91836735,   6.12244898,   6.32653061,\n",
346 |        "         6.53061224,   6.73469388,   6.93877551,   7.14285714,\n",
347 |        "         7.34693878,   7.55102041,   7.75510204,   7.95918367,\n",
348 |        "         8.16326531,   8.36734694,   8.57142857,   8.7755102 ,\n",
349 |        "         8.97959184,   9.18367347,   9.3877551 ,   9.59183673,\n",
350 |        "         9.79591837,  10.        ])"
351 |       ]
352 |      },
353 |      "execution_count": 31,
354 |      "metadata": {},
355 |      "output_type": "execute_result"
356 |     }
357 |    ],
358 |    "source": [
359 |     "np.linspace(0,10,50)"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "markdown",
364 |    "metadata": {},
365 |    "source": [
366 |     "## eye\n",
367 |     "\n",
368 |     "Creates an identity matrix"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": 37,
374 |    "metadata": {},
375 |    "outputs": [
376 |     {
377 |      "data": {
378 |       "text/plain": [
379 |        "array([[ 1.,  0.,  0.,  0.],\n",
380 |        "       [ 0.,  1.,  0.,  0.],\n",
381 |        "       [ 0.,  0.,  1.,  0.],\n",
382 |        "       [ 0.,  0.,  0.,  1.]])"
383 |       ]
384 |      },
385 |      "execution_count": 37,
386 |      "metadata": {},
387 |      "output_type": "execute_result"
388 |     }
389 |    ],
390 |    "source": [
391 |     "np.eye(4)"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "markdown",
396 |    "metadata": {},
397 |    "source": [
398 |     "## Random \n",
399 |     "\n",
400 |     "Numpy also has lots of ways to create random number arrays:\n",
401 |     "\n",
402 |     "### rand\n",
403 |     "Create an array of the given shape and populate it with\n",
404 |     "random samples from a uniform distribution\n",
405 |     "over ``[0, 1)``."
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": 47,
411 |    "metadata": {},
412 |    "outputs": [
413 |     {
414 |      "data": {
415 |       "text/plain": [
416 |        "array([ 0.11570539,  0.35279769])"
417 |       ]
418 |      },
419 |      "execution_count": 47,
420 |      "metadata": {},
421 |      "output_type": "execute_result"
422 |     }
423 |    ],
424 |    "source": [
425 |     "np.random.rand(2)"
426 |    ]
427 |   },
428 |   {
429 |    "cell_type": "code",
430 |    "execution_count": 46,
431 |    "metadata": {},
432 |    "outputs": [
433 |     {
434 |      "data": {
435 |       "text/plain": [
436 |        "array([[ 0.66660768,  0.87589888,  0.12421056,  0.65074126,  0.60260888],\n",
437 |        "       [ 0.70027668,  0.85572434,  0.8464595 ,  0.2735416 ,  0.10955384],\n",
438 |        "       [ 0.0670566 ,  0.83267738,  0.9082729 ,  0.58249129,  0.12305748],\n",
439 |        "       [ 0.27948423,  0.66422017,  0.95639833,  0.34238788,  0.9578872 ],\n",
440 |        "       [ 0.72155386,  0.3035422 ,  0.85249683,  0.30414307,  0.79718816]])"
441 |       ]
442 |      },
443 |      "execution_count": 46,
444 |      "metadata": {},
445 |      "output_type": "execute_result"
446 |     }
447 |    ],
448 |    "source": [
449 |     "np.random.rand(5,5)"
450 |    ]
451 |   },
452 |   {
453 |    "cell_type": "markdown",
454 |    "metadata": {},
455 |    "source": [
456 |     "### randn\n",
457 |     "\n",
458 |     "Return a sample (or samples) from the \"standard normal\" distribution. Unlike rand which is uniform:"
459 |    ]
460 |   },
461 |   {
462 |    "cell_type": "code",
463 |    "execution_count": 48,
464 |    "metadata": {},
465 |    "outputs": [
466 |     {
467 |      "data": {
468 |       "text/plain": [
469 |        "array([-0.27954018,  0.90078368])"
470 |       ]
471 |      },
472 |      "execution_count": 48,
473 |      "metadata": {},
474 |      "output_type": "execute_result"
475 |     }
476 |    ],
477 |    "source": [
478 |     "np.random.randn(2)"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "code",
483 |    "execution_count": 45,
484 |    "metadata": {},
485 |    "outputs": [
486 |     {
487 |      "data": {
488 |       "text/plain": [
489 |        "array([[ 0.70154515,  0.22441999,  1.33563186,  0.82872577, -0.28247509],\n",
490 |        "       [ 0.64489788,  0.61815094, -0.81693168, -0.30102424, -0.29030574],\n",
491 |        "       [ 0.8695976 ,  0.413755  ,  2.20047208,  0.17955692, -0.82159344],\n",
492 |        "       [ 0.59264235,  1.29869894, -1.18870241,  0.11590888, -0.09181687],\n",
493 |        "       [-0.96924265, -1.62888685, -2.05787102, -0.29705576,  0.68915542]])"
494 |       ]
495 |      },
496 |      "execution_count": 45,
497 |      "metadata": {},
498 |      "output_type": "execute_result"
499 |     }
500 |    ],
501 |    "source": [
502 |     "np.random.randn(5,5)"
503 |    ]
504 |   },
505 |   {
506 |    "cell_type": "markdown",
507 |    "metadata": {},
508 |    "source": [
509 |     "### randint\n",
510 |     "Return random integers from `low` (inclusive) to `high` (exclusive)."
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "code",
515 |    "execution_count": 50,
516 |    "metadata": {},
517 |    "outputs": [
518 |     {
519 |      "data": {
520 |       "text/plain": [
521 |        "44"
522 |       ]
523 |      },
524 |      "execution_count": 50,
525 |      "metadata": {},
526 |      "output_type": "execute_result"
527 |     }
528 |    ],
529 |    "source": [
530 |     "np.random.randint(1,100)"
531 |    ]
532 |   },
533 |   {
534 |    "cell_type": "code",
535 |    "execution_count": 51,
536 |    "metadata": {},
537 |    "outputs": [
538 |     {
539 |      "data": {
540 |       "text/plain": [
541 |        "array([13, 64, 27, 63, 46, 68, 92, 10, 58, 24])"
542 |       ]
543 |      },
544 |      "execution_count": 51,
545 |      "metadata": {},
546 |      "output_type": "execute_result"
547 |     }
548 |    ],
549 |    "source": [
550 |     "np.random.randint(1,100,10)"
551 |    ]
552 |   },
553 |   {
554 |    "cell_type": "markdown",
555 |    "metadata": {},
556 |    "source": [
557 |     "## Array Attributes and Methods\n",
558 |     "\n",
559 |     "Let's discuss some useful attributes and methods or an array:"
560 |    ]
561 |   },
562 |   {
563 |    "cell_type": "code",
564 |    "execution_count": 55,
565 |    "metadata": {
566 |     "collapsed": true
567 |    },
568 |    "outputs": [],
569 |    "source": [
570 |     "arr = np.arange(25)\n",
571 |     "ranarr = np.random.randint(0,50,10)"
572 |    ]
573 |   },
574 |   {
575 |    "cell_type": "code",
576 |    "execution_count": 56,
577 |    "metadata": {},
578 |    "outputs": [
579 |     {
580 |      "data": {
581 |       "text/plain": [
582 |        "array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,\n",
583 |        "       17, 18, 19, 20, 21, 22, 23, 24])"
584 |       ]
585 |      },
586 |      "execution_count": 56,
587 |      "metadata": {},
588 |      "output_type": "execute_result"
589 |     }
590 |    ],
591 |    "source": [
592 |     "arr"
593 |    ]
594 |   },
595 |   {
596 |    "cell_type": "code",
597 |    "execution_count": 57,
598 |    "metadata": {},
599 |    "outputs": [
600 |     {
601 |      "data": {
602 |       "text/plain": [
603 |        "array([10, 12, 41, 17, 49,  2, 46,  3, 19, 39])"
604 |       ]
605 |      },
606 |      "execution_count": 57,
607 |      "metadata": {},
608 |      "output_type": "execute_result"
609 |     }
610 |    ],
611 |    "source": [
612 |     "ranarr"
613 |    ]
614 |   },
615 |   {
616 |    "cell_type": "markdown",
617 |    "metadata": {},
618 |    "source": [
619 |     "## Reshape\n",
620 |     "Returns an array containing the same data with a new shape."
621 |    ]
622 |   },
623 |   {
624 |    "cell_type": "code",
625 |    "execution_count": 54,
626 |    "metadata": {},
627 |    "outputs": [
628 |     {
629 |      "data": {
630 |       "text/plain": [
631 |        "array([[ 0,  1,  2,  3,  4],\n",
632 |        "       [ 5,  6,  7,  8,  9],\n",
633 |        "       [10, 11, 12, 13, 14],\n",
634 |        "       [15, 16, 17, 18, 19],\n",
635 |        "       [20, 21, 22, 23, 24]])"
636 |       ]
637 |      },
638 |      "execution_count": 54,
639 |      "metadata": {},
640 |      "output_type": "execute_result"
641 |     }
642 |    ],
643 |    "source": [
644 |     "arr.reshape(5,5)"
645 |    ]
646 |   },
647 |   {
648 |    "cell_type": "markdown",
649 |    "metadata": {},
650 |    "source": [
651 |     "### max,min,argmax,argmin\n",
652 |     "\n",
653 |     "These are useful methods for finding max or min values. Or to find their index locations using argmin or argmax"
654 |    ]
655 |   },
656 |   {
657 |    "cell_type": "code",
658 |    "execution_count": 64,
659 |    "metadata": {},
660 |    "outputs": [
661 |     {
662 |      "data": {
663 |       "text/plain": [
664 |        "array([10, 12, 41, 17, 49,  2, 46,  3, 19, 39])"
665 |       ]
666 |      },
667 |      "execution_count": 64,
668 |      "metadata": {},
669 |      "output_type": "execute_result"
670 |     }
671 |    ],
672 |    "source": [
673 |     "ranarr"
674 |    ]
675 |   },
676 |   {
677 |    "cell_type": "code",
678 |    "execution_count": 61,
679 |    "metadata": {},
680 |    "outputs": [
681 |     {
682 |      "data": {
683 |       "text/plain": [
684 |        "49"
685 |       ]
686 |      },
687 |      "execution_count": 61,
688 |      "metadata": {},
689 |      "output_type": "execute_result"
690 |     }
691 |    ],
692 |    "source": [
693 |     "ranarr.max()"
694 |    ]
695 |   },
696 |   {
697 |    "cell_type": "code",
698 |    "execution_count": 62,
699 |    "metadata": {},
700 |    "outputs": [
701 |     {
702 |      "data": {
703 |       "text/plain": [
704 |        "4"
705 |       ]
706 |      },
707 |      "execution_count": 62,
708 |      "metadata": {},
709 |      "output_type": "execute_result"
710 |     }
711 |    ],
712 |    "source": [
713 |     "ranarr.argmax()"
714 |    ]
715 |   },
716 |   {
717 |    "cell_type": "code",
718 |    "execution_count": 63,
719 |    "metadata": {},
720 |    "outputs": [
721 |     {
722 |      "data": {
723 |       "text/plain": [
724 |        "2"
725 |       ]
726 |      },
727 |      "execution_count": 63,
728 |      "metadata": {},
729 |      "output_type": "execute_result"
730 |     }
731 |    ],
732 |    "source": [
733 |     "ranarr.min()"
734 |    ]
735 |   },
736 |   {
737 |    "cell_type": "code",
738 |    "execution_count": 60,
739 |    "metadata": {},
740 |    "outputs": [
741 |     {
742 |      "data": {
743 |       "text/plain": [
744 |        "5"
745 |       ]
746 |      },
747 |      "execution_count": 60,
748 |      "metadata": {},
749 |      "output_type": "execute_result"
750 |     }
751 |    ],
752 |    "source": [
753 |     "ranarr.argmin()"
754 |    ]
755 |   },
756 |   {
757 |    "cell_type": "markdown",
758 |    "metadata": {},
759 |    "source": [
760 |     "## Shape\n",
761 |     "\n",
762 |     "Shape is an attribute that arrays have (not a method):"
763 |    ]
764 |   },
765 |   {
766 |    "cell_type": "code",
767 |    "execution_count": 65,
768 |    "metadata": {},
769 |    "outputs": [
770 |     {
771 |      "data": {
772 |       "text/plain": [
773 |        "(25,)"
774 |       ]
775 |      },
776 |      "execution_count": 65,
777 |      "metadata": {},
778 |      "output_type": "execute_result"
779 |     }
780 |    ],
781 |    "source": [
782 |     "# Vector\n",
783 |     "arr.shape"
784 |    ]
785 |   },
786 |   {
787 |    "cell_type": "code",
788 |    "execution_count": 66,
789 |    "metadata": {},
790 |    "outputs": [
791 |     {
792 |      "data": {
793 |       "text/plain": [
794 |        "array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,\n",
795 |        "        17, 18, 19, 20, 21, 22, 23, 24]])"
796 |       ]
797 |      },
798 |      "execution_count": 66,
799 |      "metadata": {},
800 |      "output_type": "execute_result"
801 |     }
802 |    ],
803 |    "source": [
804 |     "# Notice the two sets of brackets\n",
805 |     "arr.reshape(1,25)"
806 |    ]
807 |   },
808 |   {
809 |    "cell_type": "code",
810 |    "execution_count": 69,
811 |    "metadata": {},
812 |    "outputs": [
813 |     {
814 |      "data": {
815 |       "text/plain": [
816 |        "(1, 25)"
817 |       ]
818 |      },
819 |      "execution_count": 69,
820 |      "metadata": {},
821 |      "output_type": "execute_result"
822 |     }
823 |    ],
824 |    "source": [
825 |     "arr.reshape(1,25).shape"
826 |    ]
827 |   },
828 |   {
829 |    "cell_type": "code",
830 |    "execution_count": 70,
831 |    "metadata": {},
832 |    "outputs": [
833 |     {
834 |      "data": {
835 |       "text/plain": [
836 |        "array([[ 0],\n",
837 |        "       [ 1],\n",
838 |        "       [ 2],\n",
839 |        "       [ 3],\n",
840 |        "       [ 4],\n",
841 |        "       [ 5],\n",
842 |        "       [ 6],\n",
843 |        "       [ 7],\n",
844 |        "       [ 8],\n",
845 |        "       [ 9],\n",
846 |        "       [10],\n",
847 |        "       [11],\n",
848 |        "       [12],\n",
849 |        "       [13],\n",
850 |        "       [14],\n",
851 |        "       [15],\n",
852 |        "       [16],\n",
853 |        "       [17],\n",
854 |        "       [18],\n",
855 |        "       [19],\n",
856 |        "       [20],\n",
857 |        "       [21],\n",
858 |        "       [22],\n",
859 |        "       [23],\n",
860 |        "       [24]])"
861 |       ]
862 |      },
863 |      "execution_count": 70,
864 |      "metadata": {},
865 |      "output_type": "execute_result"
866 |     }
867 |    ],
868 |    "source": [
869 |     "arr.reshape(25,1)"
870 |    ]
871 |   },
872 |   {
873 |    "cell_type": "code",
874 |    "execution_count": 76,
875 |    "metadata": {},
876 |    "outputs": [
877 |     {
878 |      "data": {
879 |       "text/plain": [
880 |        "(25, 1)"
881 |       ]
882 |      },
883 |      "execution_count": 76,
884 |      "metadata": {},
885 |      "output_type": "execute_result"
886 |     }
887 |    ],
888 |    "source": [
889 |     "arr.reshape(25,1).shape"
890 |    ]
891 |   },
892 |   {
893 |    "cell_type": "markdown",
894 |    "metadata": {},
895 |    "source": [
896 |     "### dtype\n",
897 |     "\n",
898 |     "You can also grab the data type of the object in the array:"
899 |    ]
900 |   },
901 |   {
902 |    "cell_type": "code",
903 |    "execution_count": 75,
904 |    "metadata": {},
905 |    "outputs": [
906 |     {
907 |      "data": {
908 |       "text/plain": [
909 |        "dtype('int64')"
910 |       ]
911 |      },
912 |      "execution_count": 75,
913 |      "metadata": {},
914 |      "output_type": "execute_result"
915 |     }
916 |    ],
917 |    "source": [
918 |     "arr.dtype"
919 |    ]
920 |   },
921 |   {
922 |    "cell_type": "markdown",
923 |    "metadata": {},
924 |    "source": [
925 |     "# Great Job!"
926 |    ]
927 |   }
928 |  ],
929 |  "metadata": {
930 |   "kernelspec": {
931 |    "display_name": "Python 3",
932 |    "language": "python",
933 |    "name": "python3"
934 |   },
935 |   "language_info": {
936 |    "codemirror_mode": {
937 |     "name": "ipython",
938 |     "version": 3
939 |    },
940 |    "file_extension": ".py",
941 |    "mimetype": "text/x-python",
942 |    "name": "python",
943 |    "nbconvert_exporter": "python",
944 |    "pygments_lexer": "ipython3",
945 |    "version": "3.7.6"
946 |   }
947 |  },
948 |  "nbformat": 4,
949 |  "nbformat_minor": 1
950 | }
951 | 


--------------------------------------------------------------------------------
/03-Python-for-Data-Analysis-Pandas/05-Groupby.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# GroupBy"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 31,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import pandas as pd\n",
 19 |     "# Create dataframe\n",
 20 |     "data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],\n",
 21 |     "       'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],\n",
 22 |     "       'Sales':[200,120,340,124,243,350]}"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 32,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "df = pd.DataFrame(data)"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 33,
 37 |    "metadata": {},
 38 |    "outputs": [
 39 |     {
 40 |      "data": {
 41 |       "text/html": [
 42 |        "<div>\n",
 43 |        "<table border=\"1\" class=\"dataframe\">\n",
 44 |        "  <thead>\n",
 45 |        "    <tr style=\"text-align: right;\">\n",
 46 |        "      <th></th>\n",
 47 |        "      <th>Company</th>\n",
 48 |        "      <th>Person</th>\n",
 49 |        "      <th>Sales</th>\n",
 50 |        "    </tr>\n",
 51 |        "  </thead>\n",
 52 |        "  <tbody>\n",
 53 |        "    <tr>\n",
 54 |        "      <th>0</th>\n",
 55 |        "      <td>GOOG</td>\n",
 56 |        "      <td>Sam</td>\n",
 57 |        "      <td>200</td>\n",
 58 |        "    </tr>\n",
 59 |        "    <tr>\n",
 60 |        "      <th>1</th>\n",
 61 |        "      <td>GOOG</td>\n",
 62 |        "      <td>Charlie</td>\n",
 63 |        "      <td>120</td>\n",
 64 |        "    </tr>\n",
 65 |        "    <tr>\n",
 66 |        "      <th>2</th>\n",
 67 |        "      <td>MSFT</td>\n",
 68 |        "      <td>Amy</td>\n",
 69 |        "      <td>340</td>\n",
 70 |        "    </tr>\n",
 71 |        "    <tr>\n",
 72 |        "      <th>3</th>\n",
 73 |        "      <td>MSFT</td>\n",
 74 |        "      <td>Vanessa</td>\n",
 75 |        "      <td>124</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>4</th>\n",
 79 |        "      <td>FB</td>\n",
 80 |        "      <td>Carl</td>\n",
 81 |        "      <td>243</td>\n",
 82 |        "    </tr>\n",
 83 |        "    <tr>\n",
 84 |        "      <th>5</th>\n",
 85 |        "      <td>FB</td>\n",
 86 |        "      <td>Sarah</td>\n",
 87 |        "      <td>350</td>\n",
 88 |        "    </tr>\n",
 89 |        "  </tbody>\n",
 90 |        "</table>\n",
 91 |        "</div>"
 92 |       ],
 93 |       "text/plain": [
 94 |        "  Company   Person  Sales\n",
 95 |        "0    GOOG      Sam    200\n",
 96 |        "1    GOOG  Charlie    120\n",
 97 |        "2    MSFT      Amy    340\n",
 98 |        "3    MSFT  Vanessa    124\n",
 99 |        "4      FB     Carl    243\n",
100 |        "5      FB    Sarah    350"
101 |       ]
102 |      },
103 |      "execution_count": 33,
104 |      "metadata": {},
105 |      "output_type": "execute_result"
106 |     }
107 |    ],
108 |    "source": [
109 |     "df"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "** Now you can use the .groupby() method to group rows together based off of a column name. For instance let's group based off of Company. This will create a DataFrameGroupBy object:**"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 34,
122 |    "metadata": {},
123 |    "outputs": [
124 |     {
125 |      "data": {
126 |       "text/plain": [
127 |        "<pandas.core.groupby.DataFrameGroupBy object at 0x113014128>"
128 |       ]
129 |      },
130 |      "execution_count": 34,
131 |      "metadata": {},
132 |      "output_type": "execute_result"
133 |     }
134 |    ],
135 |    "source": [
136 |     "df.groupby('Company')"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "You can save this object as a new variable:"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 35,
149 |    "metadata": {
150 |     "collapsed": true
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "by_comp = df.groupby(\"Company\")"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "And then call aggregate methods off the object:"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 36,
167 |    "metadata": {},
168 |    "outputs": [
169 |     {
170 |      "data": {
171 |       "text/html": [
172 |        "<div>\n",
173 |        "<table border=\"1\" class=\"dataframe\">\n",
174 |        "  <thead>\n",
175 |        "    <tr style=\"text-align: right;\">\n",
176 |        "      <th></th>\n",
177 |        "      <th>Sales</th>\n",
178 |        "    </tr>\n",
179 |        "    <tr>\n",
180 |        "      <th>Company</th>\n",
181 |        "      <th></th>\n",
182 |        "    </tr>\n",
183 |        "  </thead>\n",
184 |        "  <tbody>\n",
185 |        "    <tr>\n",
186 |        "      <th>FB</th>\n",
187 |        "      <td>296.5</td>\n",
188 |        "    </tr>\n",
189 |        "    <tr>\n",
190 |        "      <th>GOOG</th>\n",
191 |        "      <td>160.0</td>\n",
192 |        "    </tr>\n",
193 |        "    <tr>\n",
194 |        "      <th>MSFT</th>\n",
195 |        "      <td>232.0</td>\n",
196 |        "    </tr>\n",
197 |        "  </tbody>\n",
198 |        "</table>\n",
199 |        "</div>"
200 |       ],
201 |       "text/plain": [
202 |        "         Sales\n",
203 |        "Company       \n",
204 |        "FB       296.5\n",
205 |        "GOOG     160.0\n",
206 |        "MSFT     232.0"
207 |       ]
208 |      },
209 |      "execution_count": 36,
210 |      "metadata": {},
211 |      "output_type": "execute_result"
212 |     }
213 |    ],
214 |    "source": [
215 |     "by_comp.mean()"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 37,
221 |    "metadata": {},
222 |    "outputs": [
223 |     {
224 |      "data": {
225 |       "text/html": [
226 |        "<div>\n",
227 |        "<table border=\"1\" class=\"dataframe\">\n",
228 |        "  <thead>\n",
229 |        "    <tr style=\"text-align: right;\">\n",
230 |        "      <th></th>\n",
231 |        "      <th>Sales</th>\n",
232 |        "    </tr>\n",
233 |        "    <tr>\n",
234 |        "      <th>Company</th>\n",
235 |        "      <th></th>\n",
236 |        "    </tr>\n",
237 |        "  </thead>\n",
238 |        "  <tbody>\n",
239 |        "    <tr>\n",
240 |        "      <th>FB</th>\n",
241 |        "      <td>296.5</td>\n",
242 |        "    </tr>\n",
243 |        "    <tr>\n",
244 |        "      <th>GOOG</th>\n",
245 |        "      <td>160.0</td>\n",
246 |        "    </tr>\n",
247 |        "    <tr>\n",
248 |        "      <th>MSFT</th>\n",
249 |        "      <td>232.0</td>\n",
250 |        "    </tr>\n",
251 |        "  </tbody>\n",
252 |        "</table>\n",
253 |        "</div>"
254 |       ],
255 |       "text/plain": [
256 |        "         Sales\n",
257 |        "Company       \n",
258 |        "FB       296.5\n",
259 |        "GOOG     160.0\n",
260 |        "MSFT     232.0"
261 |       ]
262 |      },
263 |      "execution_count": 37,
264 |      "metadata": {},
265 |      "output_type": "execute_result"
266 |     }
267 |    ],
268 |    "source": [
269 |     "df.groupby('Company').mean()"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "markdown",
274 |    "metadata": {},
275 |    "source": [
276 |     "More examples of aggregate methods:"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": 38,
282 |    "metadata": {},
283 |    "outputs": [
284 |     {
285 |      "data": {
286 |       "text/html": [
287 |        "<div>\n",
288 |        "<table border=\"1\" class=\"dataframe\">\n",
289 |        "  <thead>\n",
290 |        "    <tr style=\"text-align: right;\">\n",
291 |        "      <th></th>\n",
292 |        "      <th>Sales</th>\n",
293 |        "    </tr>\n",
294 |        "    <tr>\n",
295 |        "      <th>Company</th>\n",
296 |        "      <th></th>\n",
297 |        "    </tr>\n",
298 |        "  </thead>\n",
299 |        "  <tbody>\n",
300 |        "    <tr>\n",
301 |        "      <th>FB</th>\n",
302 |        "      <td>75.660426</td>\n",
303 |        "    </tr>\n",
304 |        "    <tr>\n",
305 |        "      <th>GOOG</th>\n",
306 |        "      <td>56.568542</td>\n",
307 |        "    </tr>\n",
308 |        "    <tr>\n",
309 |        "      <th>MSFT</th>\n",
310 |        "      <td>152.735065</td>\n",
311 |        "    </tr>\n",
312 |        "  </tbody>\n",
313 |        "</table>\n",
314 |        "</div>"
315 |       ],
316 |       "text/plain": [
317 |        "              Sales\n",
318 |        "Company            \n",
319 |        "FB        75.660426\n",
320 |        "GOOG      56.568542\n",
321 |        "MSFT     152.735065"
322 |       ]
323 |      },
324 |      "execution_count": 38,
325 |      "metadata": {},
326 |      "output_type": "execute_result"
327 |     }
328 |    ],
329 |    "source": [
330 |     "by_comp.std()"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": 39,
336 |    "metadata": {},
337 |    "outputs": [
338 |     {
339 |      "data": {
340 |       "text/html": [
341 |        "<div>\n",
342 |        "<table border=\"1\" class=\"dataframe\">\n",
343 |        "  <thead>\n",
344 |        "    <tr style=\"text-align: right;\">\n",
345 |        "      <th></th>\n",
346 |        "      <th>Person</th>\n",
347 |        "      <th>Sales</th>\n",
348 |        "    </tr>\n",
349 |        "    <tr>\n",
350 |        "      <th>Company</th>\n",
351 |        "      <th></th>\n",
352 |        "      <th></th>\n",
353 |        "    </tr>\n",
354 |        "  </thead>\n",
355 |        "  <tbody>\n",
356 |        "    <tr>\n",
357 |        "      <th>FB</th>\n",
358 |        "      <td>Carl</td>\n",
359 |        "      <td>243</td>\n",
360 |        "    </tr>\n",
361 |        "    <tr>\n",
362 |        "      <th>GOOG</th>\n",
363 |        "      <td>Charlie</td>\n",
364 |        "      <td>120</td>\n",
365 |        "    </tr>\n",
366 |        "    <tr>\n",
367 |        "      <th>MSFT</th>\n",
368 |        "      <td>Amy</td>\n",
369 |        "      <td>124</td>\n",
370 |        "    </tr>\n",
371 |        "  </tbody>\n",
372 |        "</table>\n",
373 |        "</div>"
374 |       ],
375 |       "text/plain": [
376 |        "          Person  Sales\n",
377 |        "Company                \n",
378 |        "FB          Carl    243\n",
379 |        "GOOG     Charlie    120\n",
380 |        "MSFT         Amy    124"
381 |       ]
382 |      },
383 |      "execution_count": 39,
384 |      "metadata": {},
385 |      "output_type": "execute_result"
386 |     }
387 |    ],
388 |    "source": [
389 |     "by_comp.min()"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "code",
394 |    "execution_count": 40,
395 |    "metadata": {},
396 |    "outputs": [
397 |     {
398 |      "data": {
399 |       "text/html": [
400 |        "<div>\n",
401 |        "<table border=\"1\" class=\"dataframe\">\n",
402 |        "  <thead>\n",
403 |        "    <tr style=\"text-align: right;\">\n",
404 |        "      <th></th>\n",
405 |        "      <th>Person</th>\n",
406 |        "      <th>Sales</th>\n",
407 |        "    </tr>\n",
408 |        "    <tr>\n",
409 |        "      <th>Company</th>\n",
410 |        "      <th></th>\n",
411 |        "      <th></th>\n",
412 |        "    </tr>\n",
413 |        "  </thead>\n",
414 |        "  <tbody>\n",
415 |        "    <tr>\n",
416 |        "      <th>FB</th>\n",
417 |        "      <td>Sarah</td>\n",
418 |        "      <td>350</td>\n",
419 |        "    </tr>\n",
420 |        "    <tr>\n",
421 |        "      <th>GOOG</th>\n",
422 |        "      <td>Sam</td>\n",
423 |        "      <td>200</td>\n",
424 |        "    </tr>\n",
425 |        "    <tr>\n",
426 |        "      <th>MSFT</th>\n",
427 |        "      <td>Vanessa</td>\n",
428 |        "      <td>340</td>\n",
429 |        "    </tr>\n",
430 |        "  </tbody>\n",
431 |        "</table>\n",
432 |        "</div>"
433 |       ],
434 |       "text/plain": [
435 |        "          Person  Sales\n",
436 |        "Company                \n",
437 |        "FB         Sarah    350\n",
438 |        "GOOG         Sam    200\n",
439 |        "MSFT     Vanessa    340"
440 |       ]
441 |      },
442 |      "execution_count": 40,
443 |      "metadata": {},
444 |      "output_type": "execute_result"
445 |     }
446 |    ],
447 |    "source": [
448 |     "by_comp.max()"
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "code",
453 |    "execution_count": 41,
454 |    "metadata": {},
455 |    "outputs": [
456 |     {
457 |      "data": {
458 |       "text/html": [
459 |        "<div>\n",
460 |        "<table border=\"1\" class=\"dataframe\">\n",
461 |        "  <thead>\n",
462 |        "    <tr style=\"text-align: right;\">\n",
463 |        "      <th></th>\n",
464 |        "      <th>Person</th>\n",
465 |        "      <th>Sales</th>\n",
466 |        "    </tr>\n",
467 |        "    <tr>\n",
468 |        "      <th>Company</th>\n",
469 |        "      <th></th>\n",
470 |        "      <th></th>\n",
471 |        "    </tr>\n",
472 |        "  </thead>\n",
473 |        "  <tbody>\n",
474 |        "    <tr>\n",
475 |        "      <th>FB</th>\n",
476 |        "      <td>2</td>\n",
477 |        "      <td>2</td>\n",
478 |        "    </tr>\n",
479 |        "    <tr>\n",
480 |        "      <th>GOOG</th>\n",
481 |        "      <td>2</td>\n",
482 |        "      <td>2</td>\n",
483 |        "    </tr>\n",
484 |        "    <tr>\n",
485 |        "      <th>MSFT</th>\n",
486 |        "      <td>2</td>\n",
487 |        "      <td>2</td>\n",
488 |        "    </tr>\n",
489 |        "  </tbody>\n",
490 |        "</table>\n",
491 |        "</div>"
492 |       ],
493 |       "text/plain": [
494 |        "         Person  Sales\n",
495 |        "Company               \n",
496 |        "FB            2      2\n",
497 |        "GOOG          2      2\n",
498 |        "MSFT          2      2"
499 |       ]
500 |      },
501 |      "execution_count": 41,
502 |      "metadata": {},
503 |      "output_type": "execute_result"
504 |     }
505 |    ],
506 |    "source": [
507 |     "by_comp.count()"
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "code",
512 |    "execution_count": 42,
513 |    "metadata": {},
514 |    "outputs": [
515 |     {
516 |      "data": {
517 |       "text/html": [
518 |        "<div>\n",
519 |        "<table border=\"1\" class=\"dataframe\">\n",
520 |        "  <thead>\n",
521 |        "    <tr style=\"text-align: right;\">\n",
522 |        "      <th></th>\n",
523 |        "      <th></th>\n",
524 |        "      <th>Sales</th>\n",
525 |        "    </tr>\n",
526 |        "    <tr>\n",
527 |        "      <th>Company</th>\n",
528 |        "      <th></th>\n",
529 |        "      <th></th>\n",
530 |        "    </tr>\n",
531 |        "  </thead>\n",
532 |        "  <tbody>\n",
533 |        "    <tr>\n",
534 |        "      <th rowspan=\"8\" valign=\"top\">FB</th>\n",
535 |        "      <th>count</th>\n",
536 |        "      <td>2.000000</td>\n",
537 |        "    </tr>\n",
538 |        "    <tr>\n",
539 |        "      <th>mean</th>\n",
540 |        "      <td>296.500000</td>\n",
541 |        "    </tr>\n",
542 |        "    <tr>\n",
543 |        "      <th>std</th>\n",
544 |        "      <td>75.660426</td>\n",
545 |        "    </tr>\n",
546 |        "    <tr>\n",
547 |        "      <th>min</th>\n",
548 |        "      <td>243.000000</td>\n",
549 |        "    </tr>\n",
550 |        "    <tr>\n",
551 |        "      <th>25%</th>\n",
552 |        "      <td>269.750000</td>\n",
553 |        "    </tr>\n",
554 |        "    <tr>\n",
555 |        "      <th>50%</th>\n",
556 |        "      <td>296.500000</td>\n",
557 |        "    </tr>\n",
558 |        "    <tr>\n",
559 |        "      <th>75%</th>\n",
560 |        "      <td>323.250000</td>\n",
561 |        "    </tr>\n",
562 |        "    <tr>\n",
563 |        "      <th>max</th>\n",
564 |        "      <td>350.000000</td>\n",
565 |        "    </tr>\n",
566 |        "    <tr>\n",
567 |        "      <th rowspan=\"8\" valign=\"top\">GOOG</th>\n",
568 |        "      <th>count</th>\n",
569 |        "      <td>2.000000</td>\n",
570 |        "    </tr>\n",
571 |        "    <tr>\n",
572 |        "      <th>mean</th>\n",
573 |        "      <td>160.000000</td>\n",
574 |        "    </tr>\n",
575 |        "    <tr>\n",
576 |        "      <th>std</th>\n",
577 |        "      <td>56.568542</td>\n",
578 |        "    </tr>\n",
579 |        "    <tr>\n",
580 |        "      <th>min</th>\n",
581 |        "      <td>120.000000</td>\n",
582 |        "    </tr>\n",
583 |        "    <tr>\n",
584 |        "      <th>25%</th>\n",
585 |        "      <td>140.000000</td>\n",
586 |        "    </tr>\n",
587 |        "    <tr>\n",
588 |        "      <th>50%</th>\n",
589 |        "      <td>160.000000</td>\n",
590 |        "    </tr>\n",
591 |        "    <tr>\n",
592 |        "      <th>75%</th>\n",
593 |        "      <td>180.000000</td>\n",
594 |        "    </tr>\n",
595 |        "    <tr>\n",
596 |        "      <th>max</th>\n",
597 |        "      <td>200.000000</td>\n",
598 |        "    </tr>\n",
599 |        "    <tr>\n",
600 |        "      <th rowspan=\"8\" valign=\"top\">MSFT</th>\n",
601 |        "      <th>count</th>\n",
602 |        "      <td>2.000000</td>\n",
603 |        "    </tr>\n",
604 |        "    <tr>\n",
605 |        "      <th>mean</th>\n",
606 |        "      <td>232.000000</td>\n",
607 |        "    </tr>\n",
608 |        "    <tr>\n",
609 |        "      <th>std</th>\n",
610 |        "      <td>152.735065</td>\n",
611 |        "    </tr>\n",
612 |        "    <tr>\n",
613 |        "      <th>min</th>\n",
614 |        "      <td>124.000000</td>\n",
615 |        "    </tr>\n",
616 |        "    <tr>\n",
617 |        "      <th>25%</th>\n",
618 |        "      <td>178.000000</td>\n",
619 |        "    </tr>\n",
620 |        "    <tr>\n",
621 |        "      <th>50%</th>\n",
622 |        "      <td>232.000000</td>\n",
623 |        "    </tr>\n",
624 |        "    <tr>\n",
625 |        "      <th>75%</th>\n",
626 |        "      <td>286.000000</td>\n",
627 |        "    </tr>\n",
628 |        "    <tr>\n",
629 |        "      <th>max</th>\n",
630 |        "      <td>340.000000</td>\n",
631 |        "    </tr>\n",
632 |        "  </tbody>\n",
633 |        "</table>\n",
634 |        "</div>"
635 |       ],
636 |       "text/plain": [
637 |        "                    Sales\n",
638 |        "Company                  \n",
639 |        "FB      count    2.000000\n",
640 |        "        mean   296.500000\n",
641 |        "        std     75.660426\n",
642 |        "        min    243.000000\n",
643 |        "        25%    269.750000\n",
644 |        "        50%    296.500000\n",
645 |        "        75%    323.250000\n",
646 |        "        max    350.000000\n",
647 |        "GOOG    count    2.000000\n",
648 |        "        mean   160.000000\n",
649 |        "        std     56.568542\n",
650 |        "        min    120.000000\n",
651 |        "        25%    140.000000\n",
652 |        "        50%    160.000000\n",
653 |        "        75%    180.000000\n",
654 |        "        max    200.000000\n",
655 |        "MSFT    count    2.000000\n",
656 |        "        mean   232.000000\n",
657 |        "        std    152.735065\n",
658 |        "        min    124.000000\n",
659 |        "        25%    178.000000\n",
660 |        "        50%    232.000000\n",
661 |        "        75%    286.000000\n",
662 |        "        max    340.000000"
663 |       ]
664 |      },
665 |      "execution_count": 42,
666 |      "metadata": {},
667 |      "output_type": "execute_result"
668 |     }
669 |    ],
670 |    "source": [
671 |     "by_comp.describe()"
672 |    ]
673 |   },
674 |   {
675 |    "cell_type": "code",
676 |    "execution_count": 43,
677 |    "metadata": {},
678 |    "outputs": [
679 |     {
680 |      "data": {
681 |       "text/html": [
682 |        "<div>\n",
683 |        "<table border=\"1\" class=\"dataframe\">\n",
684 |        "  <thead>\n",
685 |        "    <tr>\n",
686 |        "      <th>Company</th>\n",
687 |        "      <th colspan=\"8\" halign=\"left\">FB</th>\n",
688 |        "      <th colspan=\"5\" halign=\"left\">GOOG</th>\n",
689 |        "      <th colspan=\"8\" halign=\"left\">MSFT</th>\n",
690 |        "    </tr>\n",
691 |        "    <tr>\n",
692 |        "      <th></th>\n",
693 |        "      <th>count</th>\n",
694 |        "      <th>mean</th>\n",
695 |        "      <th>std</th>\n",
696 |        "      <th>min</th>\n",
697 |        "      <th>25%</th>\n",
698 |        "      <th>50%</th>\n",
699 |        "      <th>75%</th>\n",
700 |        "      <th>max</th>\n",
701 |        "      <th>count</th>\n",
702 |        "      <th>mean</th>\n",
703 |        "      <th>...</th>\n",
704 |        "      <th>75%</th>\n",
705 |        "      <th>max</th>\n",
706 |        "      <th>count</th>\n",
707 |        "      <th>mean</th>\n",
708 |        "      <th>std</th>\n",
709 |        "      <th>min</th>\n",
710 |        "      <th>25%</th>\n",
711 |        "      <th>50%</th>\n",
712 |        "      <th>75%</th>\n",
713 |        "      <th>max</th>\n",
714 |        "    </tr>\n",
715 |        "  </thead>\n",
716 |        "  <tbody>\n",
717 |        "    <tr>\n",
718 |        "      <th>Sales</th>\n",
719 |        "      <td>2.0</td>\n",
720 |        "      <td>296.5</td>\n",
721 |        "      <td>75.660426</td>\n",
722 |        "      <td>243.0</td>\n",
723 |        "      <td>269.75</td>\n",
724 |        "      <td>296.5</td>\n",
725 |        "      <td>323.25</td>\n",
726 |        "      <td>350.0</td>\n",
727 |        "      <td>2.0</td>\n",
728 |        "      <td>160.0</td>\n",
729 |        "      <td>...</td>\n",
730 |        "      <td>180.0</td>\n",
731 |        "      <td>200.0</td>\n",
732 |        "      <td>2.0</td>\n",
733 |        "      <td>232.0</td>\n",
734 |        "      <td>152.735065</td>\n",
735 |        "      <td>124.0</td>\n",
736 |        "      <td>178.0</td>\n",
737 |        "      <td>232.0</td>\n",
738 |        "      <td>286.0</td>\n",
739 |        "      <td>340.0</td>\n",
740 |        "    </tr>\n",
741 |        "  </tbody>\n",
742 |        "</table>\n",
743 |        "<p>1 rows × 24 columns</p>\n",
744 |        "</div>"
745 |       ],
746 |       "text/plain": [
747 |        "Company    FB                                                         GOOG  \\\n",
748 |        "        count   mean        std    min     25%    50%     75%    max count   \n",
749 |        "Sales     2.0  296.5  75.660426  243.0  269.75  296.5  323.25  350.0   2.0   \n",
750 |        "\n",
751 |        "Company         ...                  MSFT                                   \\\n",
752 |        "          mean  ...      75%    max count   mean         std    min    25%   \n",
753 |        "Sales    160.0  ...    180.0  200.0   2.0  232.0  152.735065  124.0  178.0   \n",
754 |        "\n",
755 |        "Company                       \n",
756 |        "           50%    75%    max  \n",
757 |        "Sales    232.0  286.0  340.0  \n",
758 |        "\n",
759 |        "[1 rows x 24 columns]"
760 |       ]
761 |      },
762 |      "execution_count": 43,
763 |      "metadata": {},
764 |      "output_type": "execute_result"
765 |     }
766 |    ],
767 |    "source": [
768 |     "by_comp.describe().transpose()"
769 |    ]
770 |   },
771 |   {
772 |    "cell_type": "code",
773 |    "execution_count": 44,
774 |    "metadata": {},
775 |    "outputs": [
776 |     {
777 |      "data": {
778 |       "text/html": [
779 |        "<div>\n",
780 |        "<table border=\"1\" class=\"dataframe\">\n",
781 |        "  <thead>\n",
782 |        "    <tr style=\"text-align: right;\">\n",
783 |        "      <th></th>\n",
784 |        "      <th>count</th>\n",
785 |        "      <th>mean</th>\n",
786 |        "      <th>std</th>\n",
787 |        "      <th>min</th>\n",
788 |        "      <th>25%</th>\n",
789 |        "      <th>50%</th>\n",
790 |        "      <th>75%</th>\n",
791 |        "      <th>max</th>\n",
792 |        "    </tr>\n",
793 |        "  </thead>\n",
794 |        "  <tbody>\n",
795 |        "    <tr>\n",
796 |        "      <th>Sales</th>\n",
797 |        "      <td>2.0</td>\n",
798 |        "      <td>160.0</td>\n",
799 |        "      <td>56.568542</td>\n",
800 |        "      <td>120.0</td>\n",
801 |        "      <td>140.0</td>\n",
802 |        "      <td>160.0</td>\n",
803 |        "      <td>180.0</td>\n",
804 |        "      <td>200.0</td>\n",
805 |        "    </tr>\n",
806 |        "  </tbody>\n",
807 |        "</table>\n",
808 |        "</div>"
809 |       ],
810 |       "text/plain": [
811 |        "       count   mean        std    min    25%    50%    75%    max\n",
812 |        "Sales    2.0  160.0  56.568542  120.0  140.0  160.0  180.0  200.0"
813 |       ]
814 |      },
815 |      "execution_count": 44,
816 |      "metadata": {},
817 |      "output_type": "execute_result"
818 |     }
819 |    ],
820 |    "source": [
821 |     "by_comp.describe().transpose()['GOOG']"
822 |    ]
823 |   },
824 |   {
825 |    "cell_type": "markdown",
826 |    "metadata": {},
827 |    "source": [
828 |     "# Great Job!"
829 |    ]
830 |   }
831 |  ],
832 |  "metadata": {
833 |   "kernelspec": {
834 |    "display_name": "Python 3",
835 |    "language": "python",
836 |    "name": "python3"
837 |   },
838 |   "language_info": {
839 |    "codemirror_mode": {
840 |     "name": "ipython",
841 |     "version": 3
842 |    },
843 |    "file_extension": ".py",
844 |    "mimetype": "text/x-python",
845 |    "name": "python",
846 |    "nbconvert_exporter": "python",
847 |    "pygments_lexer": "ipython3",
848 |    "version": "3.7.6"
849 |   }
850 |  },
851 |  "nbformat": 4,
852 |  "nbformat_minor": 1
853 | }
854 | 


--------------------------------------------------------------------------------