├── 03-Python-for-Data-Analysis-Pandas ├── example ├── Excel_Sample.xlsx ├── multi_index_example ├── 01-Introduction to Pandas.ipynb ├── 04-Missing Data.ipynb ├── 02-Series.ipynb └── 05-Groupby.ipynb ├── 05-Data-Visualization-with-Matplotlib └── LWM.png ├── 07-Pandas-Built-in-Data-Viz └── df2 ├── 02-Python-for-Data-Analysis-NumPy ├── 03-Numpy Operations.ipynb ├── 04-Numpy Exercises.ipynb ├── 05-Numpy Exercises - Solutions.ipynb ├── 02-Numpy Indexing and Selection.ipynb └── 01-NumPy Arrays.ipynb └── 04-Pandas-Exercises ├── practice.ipynb ├── 03-Ecommerce Purchases Exercise .ipynb ├── 01-SF Salaries Exercise.ipynb ├── 04-Ecommerce Purchases Exercise - Solutions.ipynb └── 02-SF Salaries Exercise - Solutions.ipynb /03-Python-for-Data-Analysis-Pandas/example: -------------------------------------------------------------------------------- 1 | a,b,c,d 2 | 0,1,2,3 3 | 4,5,6,7 4 | 8,9,10,11 5 | 12,13,14,15 6 | -------------------------------------------------------------------------------- /05-Data-Visualization-with-Matplotlib/LWM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learnwithme4998/Data-analysis-and-Data-Analytics/HEAD/05-Data-Visualization-with-Matplotlib/LWM.png -------------------------------------------------------------------------------- /03-Python-for-Data-Analysis-Pandas/Excel_Sample.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learnwithme4998/Data-analysis-and-Data-Analytics/HEAD/03-Python-for-Data-Analysis-Pandas/Excel_Sample.xlsx -------------------------------------------------------------------------------- /03-Python-for-Data-Analysis-Pandas/multi_index_example: -------------------------------------------------------------------------------- 1 | first,bar,bar,baz,baz,foo,foo,qux,qux 2 | second,one,two,one,two,one,two,one,two 3 | ,,,,,,,, 4 | A,1.025984152081572,-0.1565979042889875,-0.031579143908112575,0.6498258334908454,2.154846443259472,-0.6102588558227414,-0.755325340010558,-0.34641850351854453 5 | B,0.1470267713241236,-0.47944803904109595,0.558769406443067,1.0248102783372157,-0.925874258809907,1.8628641384939535,-1.1338171615837889,0.6104779075384634 6 | C,0.3860303121135517,2.084018530338962,-0.37651867524923904,0.23033634359240704,0.6812092925867574,1.0351250747739213,-0.031160481493099617,1.9399323109926203 7 | -------------------------------------------------------------------------------- /07-Pandas-Built-in-Data-Viz/df2: -------------------------------------------------------------------------------- 1 | a,b,c,d 2 | 0.039761986133905136,0.2185172274750622,0.10342298051665423,0.9579042338107532 3 | 0.9372879037285884,0.04156728027953449,0.8991254222382951,0.9776795571253272 4 | 0.7805044779316328,0.008947537857148302,0.5578084027546968,0.7975104497549266 5 | 0.6727174963492204,0.24786984946279625,0.2640713103088026,0.44435791644122935 6 | 0.05382860859967886,0.5201244020579979,0.5522642392797277,0.19000759632053632 7 | 0.2860433671280178,0.5934650440000543,0.9073072637456548,0.6378977150631427 8 | 0.4304355863327313,0.16623013749421356,0.4693825447762464,0.4977008828313123 9 | 0.3122955538295512,0.5028232900921878,0.8066087010958843,0.8505190941429479 10 | 0.1877648514121828,0.9970746427719338,0.8959552961495315,0.530390137569463 11 | 0.9081621790575398,0.23272641071536715,0.4141382611943452,0.4320069001558664 12 | -------------------------------------------------------------------------------- /03-Python-for-Data-Analysis-Pandas/01-Introduction to Pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# Introduction to Pandas\n", 10 | "\n", 11 | " we will learn how to use pandas for data analysis. You can think of pandas as an extremely powerful version of Excel, with a lot more features. In this section of the course, you should go through the notebooks in this order:\n", 12 | "\n", 13 | "* Introduction to Pandas\n", 14 | "* Series\n", 15 | "* DataFrames\n", 16 | "* Missing Data\n", 17 | "* GroupBy\n", 18 | "* Merging,Joining,and Concatenating\n", 19 | "* Operations\n", 20 | "* Data Input and Output" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "___" 28 | ] 29 | } 30 | ], 31 | "metadata": { 32 | "kernelspec": { 33 | "display_name": "Python 3", 34 | "language": "python", 35 | "name": "python3" 36 | }, 37 | "language_info": { 38 | "codemirror_mode": { 39 | "name": "ipython", 40 | "version": 3 41 | }, 42 | "file_extension": ".py", 43 | "mimetype": "text/x-python", 44 | "name": "python", 45 | "nbconvert_exporter": "python", 46 | "pygments_lexer": "ipython3", 47 | "version": "3.7.6" 48 | } 49 | }, 50 | "nbformat": 4, 51 | "nbformat_minor": 1 52 | } 53 | -------------------------------------------------------------------------------- /02-Python-for-Data-Analysis-NumPy/03-Numpy Operations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# NumPy Operations" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Arithmetic\n", 17 | "\n", 18 | "You can easily perform array with array arithmetic, or scalar with array arithmetic. Let's see some examples:" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "import numpy as np\n", 30 | "arr = np.arange(0,10)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])" 42 | ] 43 | }, 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "arr + arr" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 3, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/plain": [ 61 | "array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81])" 62 | ] 63 | }, 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "arr * arr" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 4, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" 82 | ] 83 | }, 84 | "execution_count": 4, 85 | "metadata": {}, 86 | "output_type": "execute_result" 87 | } 88 | ], 89 | "source": [ 90 | "arr - arr" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 5, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stderr", 100 | "output_type": "stream", 101 | "text": [ 102 | "/Users/marci/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: invalid value encountered in true_divide\n", 103 | " if __name__ == '__main__':\n" 104 | ] 105 | }, 106 | { 107 | "data": { 108 | "text/plain": [ 109 | "array([ nan, 1., 1., 1., 1., 1., 1., 1., 1., 1.])" 110 | ] 111 | }, 112 | "execution_count": 5, 113 | "metadata": {}, 114 | "output_type": "execute_result" 115 | } 116 | ], 117 | "source": [ 118 | "# Warning on division by zero, but not an error!\n", 119 | "# Just replaced with nan\n", 120 | "arr/arr" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 6, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "name": "stderr", 130 | "output_type": "stream", 131 | "text": [ 132 | "/Users/marci/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: divide by zero encountered in true_divide\n", 133 | " if __name__ == '__main__':\n" 134 | ] 135 | }, 136 | { 137 | "data": { 138 | "text/plain": [ 139 | "array([ inf, 1. , 0.5 , 0.33333333, 0.25 ,\n", 140 | " 0.2 , 0.16666667, 0.14285714, 0.125 , 0.11111111])" 141 | ] 142 | }, 143 | "execution_count": 6, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "# Also warning, but not an error instead infinity\n", 150 | "1/arr" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 10, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "data": { 160 | "text/plain": [ 161 | "array([ 0, 1, 8, 27, 64, 125, 216, 343, 512, 729])" 162 | ] 163 | }, 164 | "execution_count": 10, 165 | "metadata": {}, 166 | "output_type": "execute_result" 167 | } 168 | ], 169 | "source": [ 170 | "arr**3" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "## Universal Array Functions\n", 178 | "\n", 179 | "Numpy comes with many [universal array functions](http://docs.scipy.org/doc/numpy/reference/ufuncs.html), which are essentially just mathematical operations you can use to perform the operation across the array. Let's show some common ones:" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 12, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "data": { 189 | "text/plain": [ 190 | "array([ 0. , 1. , 1.41421356, 1.73205081, 2. ,\n", 191 | " 2.23606798, 2.44948974, 2.64575131, 2.82842712, 3. ])" 192 | ] 193 | }, 194 | "execution_count": 12, 195 | "metadata": {}, 196 | "output_type": "execute_result" 197 | } 198 | ], 199 | "source": [ 200 | "#Taking Square Roots\n", 201 | "np.sqrt(arr)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 13, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/plain": [ 212 | "array([ 1.00000000e+00, 2.71828183e+00, 7.38905610e+00,\n", 213 | " 2.00855369e+01, 5.45981500e+01, 1.48413159e+02,\n", 214 | " 4.03428793e+02, 1.09663316e+03, 2.98095799e+03,\n", 215 | " 8.10308393e+03])" 216 | ] 217 | }, 218 | "execution_count": 13, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": [ 224 | "#Calcualting exponential (e^)\n", 225 | "np.exp(arr)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 14, 231 | "metadata": {}, 232 | "outputs": [ 233 | { 234 | "data": { 235 | "text/plain": [ 236 | "9" 237 | ] 238 | }, 239 | "execution_count": 14, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "np.max(arr) #same as arr.max()" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 15, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "data": { 255 | "text/plain": [ 256 | "array([ 0. , 0.84147098, 0.90929743, 0.14112001, -0.7568025 ,\n", 257 | " -0.95892427, -0.2794155 , 0.6569866 , 0.98935825, 0.41211849])" 258 | ] 259 | }, 260 | "execution_count": 15, 261 | "metadata": {}, 262 | "output_type": "execute_result" 263 | } 264 | ], 265 | "source": [ 266 | "np.sin(arr)" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 16, 272 | "metadata": {}, 273 | "outputs": [ 274 | { 275 | "name": "stderr", 276 | "output_type": "stream", 277 | "text": [ 278 | "/Users/marci/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:1: RuntimeWarning: divide by zero encountered in log\n", 279 | " if __name__ == '__main__':\n" 280 | ] 281 | }, 282 | { 283 | "data": { 284 | "text/plain": [ 285 | "array([ -inf, 0. , 0.69314718, 1.09861229, 1.38629436,\n", 286 | " 1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458])" 287 | ] 288 | }, 289 | "execution_count": 16, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "np.log(arr)" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "# Great Job!\n", 303 | "\n", 304 | "That's all we need to know for now!" 305 | ] 306 | } 307 | ], 308 | "metadata": { 309 | "kernelspec": { 310 | "display_name": "Python 3", 311 | "language": "python", 312 | "name": "python3" 313 | }, 314 | "language_info": { 315 | "codemirror_mode": { 316 | "name": "ipython", 317 | "version": 3 318 | }, 319 | "file_extension": ".py", 320 | "mimetype": "text/x-python", 321 | "name": "python", 322 | "nbconvert_exporter": "python", 323 | "pygments_lexer": "ipython3", 324 | "version": "3.7.6" 325 | } 326 | }, 327 | "nbformat": 4, 328 | "nbformat_minor": 1 329 | } 330 | -------------------------------------------------------------------------------- /03-Python-for-Data-Analysis-Pandas/04-Missing Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Missing Data\n", 8 | "\n", 9 | "Let's show a few convenient methods to deal with Missing Data in pandas:" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np\n", 21 | "import pandas as pd" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 9, 27 | "metadata": { 28 | "collapsed": true 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "df = pd.DataFrame({'A':[1,2,np.nan],\n", 33 | " 'B':[5,np.nan,np.nan],\n", 34 | " 'C':[1,2,3]})" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 10, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/html": [ 45 | "
\n", 46 | "\n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | "
ABC
01.05.01
12.0NaN2
2NaNNaN3
\n", 76 | "
" 77 | ], 78 | "text/plain": [ 79 | " A B C\n", 80 | "0 1.0 5.0 1\n", 81 | "1 2.0 NaN 2\n", 82 | "2 NaN NaN 3" 83 | ] 84 | }, 85 | "execution_count": 10, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "df" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 12, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/html": [ 102 | "
\n", 103 | "\n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | "
ABC
01.05.01
\n", 121 | "
" 122 | ], 123 | "text/plain": [ 124 | " A B C\n", 125 | "0 1.0 5.0 1" 126 | ] 127 | }, 128 | "execution_count": 12, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "df.dropna()" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 13, 140 | "metadata": {}, 141 | "outputs": [ 142 | { 143 | "data": { 144 | "text/html": [ 145 | "
\n", 146 | "\n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | "
C
01
12
23
\n", 168 | "
" 169 | ], 170 | "text/plain": [ 171 | " C\n", 172 | "0 1\n", 173 | "1 2\n", 174 | "2 3" 175 | ] 176 | }, 177 | "execution_count": 13, 178 | "metadata": {}, 179 | "output_type": "execute_result" 180 | } 181 | ], 182 | "source": [ 183 | "df.dropna(axis=1)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 14, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "data": { 193 | "text/html": [ 194 | "
\n", 195 | "\n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | "
ABC
01.05.01
12.0NaN2
\n", 219 | "
" 220 | ], 221 | "text/plain": [ 222 | " A B C\n", 223 | "0 1.0 5.0 1\n", 224 | "1 2.0 NaN 2" 225 | ] 226 | }, 227 | "execution_count": 14, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "df.dropna(thresh=2)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 15, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "data": { 243 | "text/html": [ 244 | "
\n", 245 | "\n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | "
ABC
0151
12FILL VALUE2
2FILL VALUEFILL VALUE3
\n", 275 | "
" 276 | ], 277 | "text/plain": [ 278 | " A B C\n", 279 | "0 1 5 1\n", 280 | "1 2 FILL VALUE 2\n", 281 | "2 FILL VALUE FILL VALUE 3" 282 | ] 283 | }, 284 | "execution_count": 15, 285 | "metadata": {}, 286 | "output_type": "execute_result" 287 | } 288 | ], 289 | "source": [ 290 | "df.fillna(value='FILL VALUE')" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 17, 296 | "metadata": {}, 297 | "outputs": [ 298 | { 299 | "data": { 300 | "text/plain": [ 301 | "0 1.0\n", 302 | "1 2.0\n", 303 | "2 1.5\n", 304 | "Name: A, dtype: float64" 305 | ] 306 | }, 307 | "execution_count": 17, 308 | "metadata": {}, 309 | "output_type": "execute_result" 310 | } 311 | ], 312 | "source": [ 313 | "df['A'].fillna(value=df['A'].mean())" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "# Great Job!" 321 | ] 322 | } 323 | ], 324 | "metadata": { 325 | "kernelspec": { 326 | "display_name": "Python 3", 327 | "language": "python", 328 | "name": "python3" 329 | }, 330 | "language_info": { 331 | "codemirror_mode": { 332 | "name": "ipython", 333 | "version": 3 334 | }, 335 | "file_extension": ".py", 336 | "mimetype": "text/x-python", 337 | "name": "python", 338 | "nbconvert_exporter": "python", 339 | "pygments_lexer": "ipython3", 340 | "version": "3.7.6" 341 | } 342 | }, 343 | "nbformat": 4, 344 | "nbformat_minor": 1 345 | } 346 | -------------------------------------------------------------------------------- /03-Python-for-Data-Analysis-Pandas/02-Series.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "# Series" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "The first main data type we will learn about for pandas is the Series data type. Let's import Pandas and explore the Series object.\n", 16 | "\n", 17 | "A Series is very similar to a NumPy array (in fact it is built on top of the NumPy array object). What differentiates the NumPy array from a Series, is that a Series can have axis labels, meaning it can be indexed by a label, instead of just a number location. It also doesn't need to hold numeric data, it can hold any arbitrary Python Object.\n", 18 | "\n", 19 | "Let's explore this concept through some examples:" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import numpy as np\n", 29 | "import pandas as pd" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "### Creating a Series\n", 37 | "\n", 38 | "You can convert a list,numpy array, or dictionary to a Series:" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "labels = ['a','b','c']\n", 50 | "my_list = [10,20,30]\n", 51 | "arr = np.array([10,20,30])\n", 52 | "d = {'a':10,'b':20,'c':30}" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "** Using Lists**" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "0 10\n", 71 | "1 20\n", 72 | "2 30\n", 73 | "dtype: int64" 74 | ] 75 | }, 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "pd.Series(data=my_list)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 5, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "a 10\n", 94 | "b 20\n", 95 | "c 30\n", 96 | "dtype: int64" 97 | ] 98 | }, 99 | "execution_count": 5, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "pd.Series(data=my_list,index=labels)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 6, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/plain": [ 116 | "a 10\n", 117 | "b 20\n", 118 | "c 30\n", 119 | "dtype: int64" 120 | ] 121 | }, 122 | "execution_count": 6, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "pd.Series(my_list,labels)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "** NumPy Arrays **" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 7, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": [ 146 | "0 10\n", 147 | "1 20\n", 148 | "2 30\n", 149 | "dtype: int64" 150 | ] 151 | }, 152 | "execution_count": 7, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "pd.Series(arr)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 8, 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "data": { 168 | "text/plain": [ 169 | "a 10\n", 170 | "b 20\n", 171 | "c 30\n", 172 | "dtype: int64" 173 | ] 174 | }, 175 | "execution_count": 8, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "pd.Series(arr,labels)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "** Dictionary**" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 9, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "data": { 198 | "text/plain": [ 199 | "a 10\n", 200 | "b 20\n", 201 | "c 30\n", 202 | "dtype: int64" 203 | ] 204 | }, 205 | "execution_count": 9, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "pd.Series(d)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "### Data in a Series\n", 219 | "\n", 220 | "A pandas Series can hold a variety of object types:" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 10, 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "data": { 230 | "text/plain": [ 231 | "0 a\n", 232 | "1 b\n", 233 | "2 c\n", 234 | "dtype: object" 235 | ] 236 | }, 237 | "execution_count": 10, 238 | "metadata": {}, 239 | "output_type": "execute_result" 240 | } 241 | ], 242 | "source": [ 243 | "pd.Series(data=labels)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 11, 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "data": { 253 | "text/plain": [ 254 | "0 \n", 255 | "1 \n", 256 | "2 \n", 257 | "dtype: object" 258 | ] 259 | }, 260 | "execution_count": 11, 261 | "metadata": {}, 262 | "output_type": "execute_result" 263 | } 264 | ], 265 | "source": [ 266 | "# Even functions (although unlikely that you will use this)\n", 267 | "pd.Series([sum,print,len])" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": {}, 273 | "source": [ 274 | "## Using an Index\n", 275 | "\n", 276 | "The key to using a Series is understanding its index. Pandas makes use of these index names or numbers by allowing for fast look ups of information (works like a hash table or dictionary).\n", 277 | "\n", 278 | "Let's see some examples of how to grab information from a Series. Let us create two sereis, ser1 and ser2:" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 12, 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [ 287 | "ser1 = pd.Series([1,2,3,4],index = ['USA', 'Germany','USSR', 'Japan']) " 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 13, 293 | "metadata": {}, 294 | "outputs": [ 295 | { 296 | "data": { 297 | "text/plain": [ 298 | "USA 1\n", 299 | "Germany 2\n", 300 | "USSR 3\n", 301 | "Japan 4\n", 302 | "dtype: int64" 303 | ] 304 | }, 305 | "execution_count": 13, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "ser1" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 2, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "ser2 = pd.Series([1,2,5,4],index = ['USA', 'Germany','Italy', 'Japan']) " 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 15, 326 | "metadata": {}, 327 | "outputs": [ 328 | { 329 | "data": { 330 | "text/plain": [ 331 | "USA 1\n", 332 | "Germany 2\n", 333 | "Italy 5\n", 334 | "Japan 4\n", 335 | "dtype: int64" 336 | ] 337 | }, 338 | "execution_count": 15, 339 | "metadata": {}, 340 | "output_type": "execute_result" 341 | } 342 | ], 343 | "source": [ 344 | "ser2" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 16, 350 | "metadata": {}, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "text/plain": [ 355 | "1" 356 | ] 357 | }, 358 | "execution_count": 16, 359 | "metadata": {}, 360 | "output_type": "execute_result" 361 | } 362 | ], 363 | "source": [ 364 | "ser1['USA']" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": [ 371 | "Operations are then also done based off of index:" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 17, 377 | "metadata": {}, 378 | "outputs": [ 379 | { 380 | "data": { 381 | "text/plain": [ 382 | "Germany 4.0\n", 383 | "Italy NaN\n", 384 | "Japan 8.0\n", 385 | "USA 2.0\n", 386 | "USSR NaN\n", 387 | "dtype: float64" 388 | ] 389 | }, 390 | "execution_count": 17, 391 | "metadata": {}, 392 | "output_type": "execute_result" 393 | } 394 | ], 395 | "source": [ 396 | "ser1 + ser2" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "Let's stop here for now and move on to DataFrames, which will expand on the concept of Series!\n", 404 | "# Great Job!" 405 | ] 406 | } 407 | ], 408 | "metadata": { 409 | "kernelspec": { 410 | "display_name": "Python 3", 411 | "language": "python", 412 | "name": "python3" 413 | }, 414 | "language_info": { 415 | "codemirror_mode": { 416 | "name": "ipython", 417 | "version": 3 418 | }, 419 | "file_extension": ".py", 420 | "mimetype": "text/x-python", 421 | "name": "python", 422 | "nbconvert_exporter": "python", 423 | "pygments_lexer": "ipython3", 424 | "version": "3.7.6" 425 | } 426 | }, 427 | "nbformat": 4, 428 | "nbformat_minor": 1 429 | } 430 | -------------------------------------------------------------------------------- /04-Pandas-Exercises/practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 7, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 55, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "df = pd.read_csv('Salaries.csv')" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 59, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "
\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
\n", 113 | "
" 114 | ], 115 | "text/plain": [ 116 | " Id EmployeeName JobTitle \\\n", 117 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 118 | "1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", 119 | "2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", 120 | "\n", 121 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 122 | "0 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n", 123 | "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", 124 | "2 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n", 125 | "\n", 126 | " Year Notes Agency Status \n", 127 | "0 2011 NaN San Francisco NaN \n", 128 | "1 2011 NaN San Francisco NaN \n", 129 | "2 2011 NaN San Francisco NaN " 130 | ] 131 | }, 132 | "execution_count": 59, 133 | "metadata": {}, 134 | "output_type": "execute_result" 135 | } 136 | ], 137 | "source": [ 138 | "df.head(3)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 60, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "name": "stdout", 148 | "output_type": "stream", 149 | "text": [ 150 | "\n", 151 | "RangeIndex: 148654 entries, 0 to 148653\n", 152 | "Data columns (total 13 columns):\n", 153 | " # Column Non-Null Count Dtype \n", 154 | "--- ------ -------------- ----- \n", 155 | " 0 Id 148654 non-null int64 \n", 156 | " 1 EmployeeName 148654 non-null object \n", 157 | " 2 JobTitle 148654 non-null object \n", 158 | " 3 BasePay 148045 non-null float64\n", 159 | " 4 OvertimePay 148650 non-null float64\n", 160 | " 5 OtherPay 148650 non-null float64\n", 161 | " 6 Benefits 112491 non-null float64\n", 162 | " 7 TotalPay 148654 non-null float64\n", 163 | " 8 TotalPayBenefits 148654 non-null float64\n", 164 | " 9 Year 148654 non-null int64 \n", 165 | " 10 Notes 0 non-null float64\n", 166 | " 11 Agency 148654 non-null object \n", 167 | " 12 Status 0 non-null float64\n", 168 | "dtypes: float64(8), int64(2), object(3)\n", 169 | "memory usage: 14.7+ MB\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "df.info()" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 61, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": [ 185 | "66325.44884050643" 186 | ] 187 | }, 188 | "execution_count": 61, 189 | "metadata": {}, 190 | "output_type": "execute_result" 191 | } 192 | ], 193 | "source": [ 194 | "df['BasePay'].mean()" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 62, 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/plain": [ 205 | "245131.88" 206 | ] 207 | }, 208 | "execution_count": 62, 209 | "metadata": {}, 210 | "output_type": "execute_result" 211 | } 212 | ], 213 | "source": [ 214 | "df['OvertimePay'].max()" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 65, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "data": { 224 | "text/plain": [ 225 | "24 270324.91\n", 226 | "Name: TotalPayBenefits, dtype: float64" 227 | ] 228 | }, 229 | "execution_count": 65, 230 | "metadata": {}, 231 | "output_type": "execute_result" 232 | } 233 | ], 234 | "source": [ 235 | "df[df['EmployeeName'] == 'JOSEPH DRISCOLL']['TotalPayBenefits']" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 67, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "text/html": [ 246 | "
\n", 247 | "\n", 260 | "\n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
148653148654Joe LopezCounselor, Log Cabin Ranch0.00.0-618.130.0-618.13-618.132014NaNSan FranciscoNaN
\n", 298 | "
" 299 | ], 300 | "text/plain": [ 301 | " Id EmployeeName JobTitle BasePay OvertimePay \\\n", 302 | "148653 148654 Joe Lopez Counselor, Log Cabin Ranch 0.0 0.0 \n", 303 | "\n", 304 | " OtherPay Benefits TotalPay TotalPayBenefits Year Notes \\\n", 305 | "148653 -618.13 0.0 -618.13 -618.13 2014 NaN \n", 306 | "\n", 307 | " Agency Status \n", 308 | "148653 San Francisco NaN " 309 | ] 310 | }, 311 | "execution_count": 67, 312 | "metadata": {}, 313 | "output_type": "execute_result" 314 | } 315 | ], 316 | "source": [ 317 | "df[df['TotalPayBenefits'] == df['TotalPayBenefits'].min()]" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 70, 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "data": { 327 | "text/plain": [ 328 | "Year\n", 329 | "2011 63595.956517\n", 330 | "2012 65436.406857\n", 331 | "2013 69630.030216\n", 332 | "2014 66564.421924\n", 333 | "Name: BasePay, dtype: float64" 334 | ] 335 | }, 336 | "execution_count": 70, 337 | "metadata": {}, 338 | "output_type": "execute_result" 339 | } 340 | ], 341 | "source": [ 342 | "df.groupby('Year').mean()['BasePay']" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 71, 348 | "metadata": {}, 349 | "outputs": [ 350 | { 351 | "data": { 352 | "text/plain": [ 353 | "2159" 354 | ] 355 | }, 356 | "execution_count": 71, 357 | "metadata": {}, 358 | "output_type": "execute_result" 359 | } 360 | ], 361 | "source": [ 362 | "df['JobTitle'].nunique()" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 73, 368 | "metadata": {}, 369 | "outputs": [ 370 | { 371 | "data": { 372 | "text/plain": [ 373 | "Transit Operator 7036\n", 374 | "Special Nurse 4389\n", 375 | "Registered Nurse 3736\n", 376 | "Public Svc Aide-Public Works 2518\n", 377 | "Police Officer 3 2421\n", 378 | "Name: JobTitle, dtype: int64" 379 | ] 380 | }, 381 | "execution_count": 73, 382 | "metadata": {}, 383 | "output_type": "execute_result" 384 | } 385 | ], 386 | "source": [ 387 | "df['JobTitle'].value_counts().head()" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": 77, 393 | "metadata": {}, 394 | "outputs": [ 395 | { 396 | "data": { 397 | "text/plain": [ 398 | "202" 399 | ] 400 | }, 401 | "execution_count": 77, 402 | "metadata": {}, 403 | "output_type": "execute_result" 404 | } 405 | ], 406 | "source": [ 407 | "sum(df[df['Year'] == 2013]['JobTitle'].value_counts() == 1)" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 80, 413 | "metadata": {}, 414 | "outputs": [], 415 | "source": [ 416 | "df['len'] = df['JobTitle'].apply(len)" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 83, 422 | "metadata": {}, 423 | "outputs": [ 424 | { 425 | "data": { 426 | "text/html": [ 427 | "
\n", 428 | "\n", 441 | "\n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | "
lenTotalPayBenefits
len1.000000-0.036878
TotalPayBenefits-0.0368781.000000
\n", 462 | "
" 463 | ], 464 | "text/plain": [ 465 | " len TotalPayBenefits\n", 466 | "len 1.000000 -0.036878\n", 467 | "TotalPayBenefits -0.036878 1.000000" 468 | ] 469 | }, 470 | "execution_count": 83, 471 | "metadata": {}, 472 | "output_type": "execute_result" 473 | } 474 | ], 475 | "source": [ 476 | "df[['len','TotalPayBenefits']].corr()" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": null, 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [] 499 | } 500 | ], 501 | "metadata": { 502 | "kernelspec": { 503 | "display_name": "Python 3", 504 | "language": "python", 505 | "name": "python3" 506 | }, 507 | "language_info": { 508 | "codemirror_mode": { 509 | "name": "ipython", 510 | "version": 3 511 | }, 512 | "file_extension": ".py", 513 | "mimetype": "text/x-python", 514 | "name": "python", 515 | "nbconvert_exporter": "python", 516 | "pygments_lexer": "ipython3", 517 | "version": "3.7.6" 518 | } 519 | }, 520 | "nbformat": 4, 521 | "nbformat_minor": 4 522 | } 523 | -------------------------------------------------------------------------------- /02-Python-for-Data-Analysis-NumPy/04-Numpy Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NumPy Exercises \n", 8 | "\n", 9 | "Now that we've learned about NumPy let's test your knowledge. We'll start off with a few simple tasks, and then you'll be asked some more complicated questions." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "#### Import NumPy as np" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "#### Create an array of 10 zeros " 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/plain": [ 43 | "array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])" 44 | ] 45 | }, 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "output_type": "execute_result" 49 | } 50 | ], 51 | "source": [] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "#### Create an array of 10 ones" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 3, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "array([ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])" 69 | ] 70 | }, 71 | "execution_count": 3, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "#### Create an array of 10 fives" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 4, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "array([ 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.])" 94 | ] 95 | }, 96 | "execution_count": 4, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "#### Create an array of the integers from 10 to 50" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 5, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/plain": [ 118 | "array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n", 119 | " 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,\n", 120 | " 44, 45, 46, 47, 48, 49, 50])" 121 | ] 122 | }, 123 | "execution_count": 5, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "#### Create an array of all the even integers from 10 to 50" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 6, 140 | "metadata": {}, 141 | "outputs": [ 142 | { 143 | "data": { 144 | "text/plain": [ 145 | "array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42,\n", 146 | " 44, 46, 48, 50])" 147 | ] 148 | }, 149 | "execution_count": 6, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "#### Create a 3x3 matrix with values ranging from 0 to 8" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 7, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "data": { 170 | "text/plain": [ 171 | "array([[0, 1, 2],\n", 172 | " [3, 4, 5],\n", 173 | " [6, 7, 8]])" 174 | ] 175 | }, 176 | "execution_count": 7, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "#### Create a 3x3 identity matrix" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 8, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "data": { 197 | "text/plain": [ 198 | "array([[ 1., 0., 0.],\n", 199 | " [ 0., 1., 0.],\n", 200 | " [ 0., 0., 1.]])" 201 | ] 202 | }, 203 | "execution_count": 8, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "#### Use NumPy to generate a random number between 0 and 1" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 15, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "data": { 224 | "text/plain": [ 225 | "array([ 0.42829726])" 226 | ] 227 | }, 228 | "execution_count": 15, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "#### Use NumPy to generate an array of 25 random numbers sampled from a standard normal distribution" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 33, 245 | "metadata": {}, 246 | "outputs": [ 247 | { 248 | "data": { 249 | "text/plain": [ 250 | "array([ 1.32031013, 1.6798602 , -0.42985892, -1.53116655, 0.85753232,\n", 251 | " 0.87339938, 0.35668636, -1.47491157, 0.15349697, 0.99530727,\n", 252 | " -0.94865451, -1.69174783, 1.57525349, -0.70615234, 0.10991879,\n", 253 | " -0.49478947, 1.08279872, 0.76488333, -2.3039931 , 0.35401124,\n", 254 | " -0.45454399, -0.64754649, -0.29391671, 0.02339861, 0.38272124])" 255 | ] 256 | }, 257 | "execution_count": 33, 258 | "metadata": {}, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "#### Create the following matrix:" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 35, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "data": { 278 | "text/plain": [ 279 | "array([[ 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ],\n", 280 | " [ 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 ],\n", 281 | " [ 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 ],\n", 282 | " [ 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 ],\n", 283 | " [ 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 ],\n", 284 | " [ 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 ],\n", 285 | " [ 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7 ],\n", 286 | " [ 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8 ],\n", 287 | " [ 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9 ],\n", 288 | " [ 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1. ]])" 289 | ] 290 | }, 291 | "execution_count": 35, 292 | "metadata": {}, 293 | "output_type": "execute_result" 294 | } 295 | ], 296 | "source": [] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "#### Create an array of 20 linearly spaced points between 0 and 1:" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 36, 308 | "metadata": {}, 309 | "outputs": [ 310 | { 311 | "data": { 312 | "text/plain": [ 313 | "array([ 0. , 0.05263158, 0.10526316, 0.15789474, 0.21052632,\n", 314 | " 0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,\n", 315 | " 0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,\n", 316 | " 0.78947368, 0.84210526, 0.89473684, 0.94736842, 1. ])" 317 | ] 318 | }, 319 | "execution_count": 36, 320 | "metadata": {}, 321 | "output_type": "execute_result" 322 | } 323 | ], 324 | "source": [] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": {}, 329 | "source": [ 330 | "## Numpy Indexing and Selection\n", 331 | "\n", 332 | "Now you will be given a few matrices, and be asked to replicate the resulting matrix outputs:" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 38, 338 | "metadata": {}, 339 | "outputs": [ 340 | { 341 | "data": { 342 | "text/plain": [ 343 | "array([[ 1, 2, 3, 4, 5],\n", 344 | " [ 6, 7, 8, 9, 10],\n", 345 | " [11, 12, 13, 14, 15],\n", 346 | " [16, 17, 18, 19, 20],\n", 347 | " [21, 22, 23, 24, 25]])" 348 | ] 349 | }, 350 | "execution_count": 38, 351 | "metadata": {}, 352 | "output_type": "execute_result" 353 | } 354 | ], 355 | "source": [ 356 | "mat = np.arange(1,26).reshape(5,5)\n", 357 | "mat" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 39, 363 | "metadata": { 364 | "collapsed": true 365 | }, 366 | "outputs": [], 367 | "source": [ 368 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 369 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 370 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": 40, 376 | "metadata": {}, 377 | "outputs": [ 378 | { 379 | "data": { 380 | "text/plain": [ 381 | "array([[12, 13, 14, 15],\n", 382 | " [17, 18, 19, 20],\n", 383 | " [22, 23, 24, 25]])" 384 | ] 385 | }, 386 | "execution_count": 40, 387 | "metadata": {}, 388 | "output_type": "execute_result" 389 | } 390 | ], 391 | "source": [] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 29, 396 | "metadata": { 397 | "collapsed": true 398 | }, 399 | "outputs": [], 400 | "source": [ 401 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 402 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 403 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 41, 409 | "metadata": {}, 410 | "outputs": [ 411 | { 412 | "data": { 413 | "text/plain": [ 414 | "20" 415 | ] 416 | }, 417 | "execution_count": 41, 418 | "metadata": {}, 419 | "output_type": "execute_result" 420 | } 421 | ], 422 | "source": [] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 30, 427 | "metadata": { 428 | "collapsed": true 429 | }, 430 | "outputs": [], 431 | "source": [ 432 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 433 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 434 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": 42, 440 | "metadata": {}, 441 | "outputs": [ 442 | { 443 | "data": { 444 | "text/plain": [ 445 | "array([[ 2],\n", 446 | " [ 7],\n", 447 | " [12]])" 448 | ] 449 | }, 450 | "execution_count": 42, 451 | "metadata": {}, 452 | "output_type": "execute_result" 453 | } 454 | ], 455 | "source": [] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": 31, 460 | "metadata": { 461 | "collapsed": true 462 | }, 463 | "outputs": [], 464 | "source": [ 465 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 466 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 467 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 46, 473 | "metadata": {}, 474 | "outputs": [ 475 | { 476 | "data": { 477 | "text/plain": [ 478 | "array([21, 22, 23, 24, 25])" 479 | ] 480 | }, 481 | "execution_count": 46, 482 | "metadata": {}, 483 | "output_type": "execute_result" 484 | } 485 | ], 486 | "source": [] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": 32, 491 | "metadata": { 492 | "collapsed": true 493 | }, 494 | "outputs": [], 495 | "source": [ 496 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 497 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 498 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": 49, 504 | "metadata": {}, 505 | "outputs": [ 506 | { 507 | "data": { 508 | "text/plain": [ 509 | "array([[16, 17, 18, 19, 20],\n", 510 | " [21, 22, 23, 24, 25]])" 511 | ] 512 | }, 513 | "execution_count": 49, 514 | "metadata": {}, 515 | "output_type": "execute_result" 516 | } 517 | ], 518 | "source": [] 519 | }, 520 | { 521 | "cell_type": "markdown", 522 | "metadata": {}, 523 | "source": [ 524 | "### Now do the following" 525 | ] 526 | }, 527 | { 528 | "cell_type": "markdown", 529 | "metadata": {}, 530 | "source": [ 531 | "#### Get the sum of all the values in mat" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": 50, 537 | "metadata": {}, 538 | "outputs": [ 539 | { 540 | "data": { 541 | "text/plain": [ 542 | "325" 543 | ] 544 | }, 545 | "execution_count": 50, 546 | "metadata": {}, 547 | "output_type": "execute_result" 548 | } 549 | ], 550 | "source": [] 551 | }, 552 | { 553 | "cell_type": "markdown", 554 | "metadata": {}, 555 | "source": [ 556 | "#### Get the standard deviation of the values in mat" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": 51, 562 | "metadata": {}, 563 | "outputs": [ 564 | { 565 | "data": { 566 | "text/plain": [ 567 | "7.2111025509279782" 568 | ] 569 | }, 570 | "execution_count": 51, 571 | "metadata": {}, 572 | "output_type": "execute_result" 573 | } 574 | ], 575 | "source": [] 576 | }, 577 | { 578 | "cell_type": "markdown", 579 | "metadata": {}, 580 | "source": [ 581 | "#### Get the sum of all the columns in mat" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": 53, 587 | "metadata": {}, 588 | "outputs": [ 589 | { 590 | "data": { 591 | "text/plain": [ 592 | "array([55, 60, 65, 70, 75])" 593 | ] 594 | }, 595 | "execution_count": 53, 596 | "metadata": {}, 597 | "output_type": "execute_result" 598 | } 599 | ], 600 | "source": [] 601 | }, 602 | { 603 | "cell_type": "markdown", 604 | "metadata": { 605 | "collapsed": true 606 | }, 607 | "source": [ 608 | "# Great Job!" 609 | ] 610 | } 611 | ], 612 | "metadata": { 613 | "kernelspec": { 614 | "display_name": "Python 3", 615 | "language": "python", 616 | "name": "python3" 617 | }, 618 | "language_info": { 619 | "codemirror_mode": { 620 | "name": "ipython", 621 | "version": 3 622 | }, 623 | "file_extension": ".py", 624 | "mimetype": "text/x-python", 625 | "name": "python", 626 | "nbconvert_exporter": "python", 627 | "pygments_lexer": "ipython3", 628 | "version": "3.7.6" 629 | } 630 | }, 631 | "nbformat": 4, 632 | "nbformat_minor": 1 633 | } 634 | -------------------------------------------------------------------------------- /02-Python-for-Data-Analysis-NumPy/05-Numpy Exercises - Solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NumPy Exercises - Solutions\n", 8 | "\n", 9 | "Now that we've learned about NumPy let's test your knowledge. We'll start off with a few simple tasks and then you'll be asked some more complicated questions." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "#### Import NumPy as np" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "import numpy as np" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "#### Create an array of 10 zeros " 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/plain": [ 45 | "array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])" 46 | ] 47 | }, 48 | "execution_count": 2, 49 | "metadata": {}, 50 | "output_type": "execute_result" 51 | } 52 | ], 53 | "source": [ 54 | "np.zeros(10)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "#### Create an array of 10 ones" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 3, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/plain": [ 72 | "array([ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])" 73 | ] 74 | }, 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "np.ones(10)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "#### Create an array of 10 fives" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 4, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "data": { 98 | "text/plain": [ 99 | "array([ 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.])" 100 | ] 101 | }, 102 | "execution_count": 4, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "np.ones(10) * 5" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "#### Create an array of the integers from 10 to 50" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 5, 121 | "metadata": {}, 122 | "outputs": [ 123 | { 124 | "data": { 125 | "text/plain": [ 126 | "array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n", 127 | " 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,\n", 128 | " 44, 45, 46, 47, 48, 49, 50])" 129 | ] 130 | }, 131 | "execution_count": 5, 132 | "metadata": {}, 133 | "output_type": "execute_result" 134 | } 135 | ], 136 | "source": [ 137 | "np.arange(10,51)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "#### Create an array of all the even integers from 10 to 50" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 6, 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "data": { 154 | "text/plain": [ 155 | "array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42,\n", 156 | " 44, 46, 48, 50])" 157 | ] 158 | }, 159 | "execution_count": 6, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "np.arange(10,51,2)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "#### Create a 3x3 matrix with values ranging from 0 to 8" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 7, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/plain": [ 183 | "array([[0, 1, 2],\n", 184 | " [3, 4, 5],\n", 185 | " [6, 7, 8]])" 186 | ] 187 | }, 188 | "execution_count": 7, 189 | "metadata": {}, 190 | "output_type": "execute_result" 191 | } 192 | ], 193 | "source": [ 194 | "np.arange(9).reshape(3,3)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "#### Create a 3x3 identity matrix" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 8, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/plain": [ 212 | "array([[ 1., 0., 0.],\n", 213 | " [ 0., 1., 0.],\n", 214 | " [ 0., 0., 1.]])" 215 | ] 216 | }, 217 | "execution_count": 8, 218 | "metadata": {}, 219 | "output_type": "execute_result" 220 | } 221 | ], 222 | "source": [ 223 | "np.eye(3)" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "#### Use NumPy to generate a random number between 0 and 1" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 15, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "array([ 0.42829726])" 242 | ] 243 | }, 244 | "execution_count": 15, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "np.random.rand(1)" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "#### Use NumPy to generate an array of 25 random numbers sampled from a standard normal distribution" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 33, 263 | "metadata": {}, 264 | "outputs": [ 265 | { 266 | "data": { 267 | "text/plain": [ 268 | "array([ 1.32031013, 1.6798602 , -0.42985892, -1.53116655, 0.85753232,\n", 269 | " 0.87339938, 0.35668636, -1.47491157, 0.15349697, 0.99530727,\n", 270 | " -0.94865451, -1.69174783, 1.57525349, -0.70615234, 0.10991879,\n", 271 | " -0.49478947, 1.08279872, 0.76488333, -2.3039931 , 0.35401124,\n", 272 | " -0.45454399, -0.64754649, -0.29391671, 0.02339861, 0.38272124])" 273 | ] 274 | }, 275 | "execution_count": 33, 276 | "metadata": {}, 277 | "output_type": "execute_result" 278 | } 279 | ], 280 | "source": [ 281 | "np.random.randn(25)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "#### Create the following matrix:" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 35, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "data": { 298 | "text/plain": [ 299 | "array([[ 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ],\n", 300 | " [ 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 ],\n", 301 | " [ 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 ],\n", 302 | " [ 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 ],\n", 303 | " [ 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 ],\n", 304 | " [ 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 ],\n", 305 | " [ 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7 ],\n", 306 | " [ 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8 ],\n", 307 | " [ 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9 ],\n", 308 | " [ 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1. ]])" 309 | ] 310 | }, 311 | "execution_count": 35, 312 | "metadata": {}, 313 | "output_type": "execute_result" 314 | } 315 | ], 316 | "source": [ 317 | "\n" 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": {}, 323 | "source": [ 324 | "#### Create an array of 20 linearly spaced points between 0 and 1:" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 36, 330 | "metadata": {}, 331 | "outputs": [ 332 | { 333 | "data": { 334 | "text/plain": [ 335 | "array([ 0. , 0.05263158, 0.10526316, 0.15789474, 0.21052632,\n", 336 | " 0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,\n", 337 | " 0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,\n", 338 | " 0.78947368, 0.84210526, 0.89473684, 0.94736842, 1. ])" 339 | ] 340 | }, 341 | "execution_count": 36, 342 | "metadata": {}, 343 | "output_type": "execute_result" 344 | } 345 | ], 346 | "source": [ 347 | "np.linspace(0,1,20)" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "## Numpy Indexing and Selection\n", 355 | "\n", 356 | "Now you will be given a few matrices, and be asked to replicate the resulting matrix outputs:" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 38, 362 | "metadata": {}, 363 | "outputs": [ 364 | { 365 | "data": { 366 | "text/plain": [ 367 | "array([[ 1, 2, 3, 4, 5],\n", 368 | " [ 6, 7, 8, 9, 10],\n", 369 | " [11, 12, 13, 14, 15],\n", 370 | " [16, 17, 18, 19, 20],\n", 371 | " [21, 22, 23, 24, 25]])" 372 | ] 373 | }, 374 | "execution_count": 38, 375 | "metadata": {}, 376 | "output_type": "execute_result" 377 | } 378 | ], 379 | "source": [ 380 | "mat = np.arange(1,26).reshape(5,5)\n", 381 | "mat" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 39, 387 | "metadata": { 388 | "collapsed": true 389 | }, 390 | "outputs": [], 391 | "source": [ 392 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 393 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 394 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": 40, 400 | "metadata": {}, 401 | "outputs": [ 402 | { 403 | "data": { 404 | "text/plain": [ 405 | "array([[12, 13, 14, 15],\n", 406 | " [17, 18, 19, 20],\n", 407 | " [22, 23, 24, 25]])" 408 | ] 409 | }, 410 | "execution_count": 40, 411 | "metadata": {}, 412 | "output_type": "execute_result" 413 | } 414 | ], 415 | "source": [ 416 | "mat[2:,1:]" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 29, 422 | "metadata": { 423 | "collapsed": true 424 | }, 425 | "outputs": [], 426 | "source": [ 427 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 428 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 429 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 41, 435 | "metadata": {}, 436 | "outputs": [ 437 | { 438 | "data": { 439 | "text/plain": [ 440 | "20" 441 | ] 442 | }, 443 | "execution_count": 41, 444 | "metadata": {}, 445 | "output_type": "execute_result" 446 | } 447 | ], 448 | "source": [ 449 | "mat[3,4]" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": 30, 455 | "metadata": { 456 | "collapsed": true 457 | }, 458 | "outputs": [], 459 | "source": [ 460 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 461 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 462 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": 42, 468 | "metadata": {}, 469 | "outputs": [ 470 | { 471 | "data": { 472 | "text/plain": [ 473 | "array([[ 2],\n", 474 | " [ 7],\n", 475 | " [12]])" 476 | ] 477 | }, 478 | "execution_count": 42, 479 | "metadata": {}, 480 | "output_type": "execute_result" 481 | } 482 | ], 483 | "source": [ 484 | "mat[:3,1:2]" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": 31, 490 | "metadata": { 491 | "collapsed": true 492 | }, 493 | "outputs": [], 494 | "source": [ 495 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 496 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 497 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 46, 503 | "metadata": {}, 504 | "outputs": [ 505 | { 506 | "data": { 507 | "text/plain": [ 508 | "array([21, 22, 23, 24, 25])" 509 | ] 510 | }, 511 | "execution_count": 46, 512 | "metadata": {}, 513 | "output_type": "execute_result" 514 | } 515 | ], 516 | "source": [ 517 | "mat[4,:]" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": 32, 523 | "metadata": { 524 | "collapsed": true 525 | }, 526 | "outputs": [], 527 | "source": [ 528 | "# WRITE CODE HERE THAT REPRODUCES THE OUTPUT OF THE CELL BELOW\n", 529 | "# BE CAREFUL NOT TO RUN THE CELL BELOW, OTHERWISE YOU WON'T\n", 530 | "# BE ABLE TO SEE THE OUTPUT ANY MORE" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": 49, 536 | "metadata": {}, 537 | "outputs": [ 538 | { 539 | "data": { 540 | "text/plain": [ 541 | "array([[16, 17, 18, 19, 20],\n", 542 | " [21, 22, 23, 24, 25]])" 543 | ] 544 | }, 545 | "execution_count": 49, 546 | "metadata": {}, 547 | "output_type": "execute_result" 548 | } 549 | ], 550 | "source": [ 551 | "mat[3:5,:]" 552 | ] 553 | }, 554 | { 555 | "cell_type": "markdown", 556 | "metadata": {}, 557 | "source": [ 558 | "### Now do the following" 559 | ] 560 | }, 561 | { 562 | "cell_type": "markdown", 563 | "metadata": {}, 564 | "source": [ 565 | "#### Get the sum of all the values in mat" 566 | ] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "execution_count": 50, 571 | "metadata": {}, 572 | "outputs": [ 573 | { 574 | "data": { 575 | "text/plain": [ 576 | "325" 577 | ] 578 | }, 579 | "execution_count": 50, 580 | "metadata": {}, 581 | "output_type": "execute_result" 582 | } 583 | ], 584 | "source": [ 585 | "mat.sum()" 586 | ] 587 | }, 588 | { 589 | "cell_type": "markdown", 590 | "metadata": {}, 591 | "source": [ 592 | "#### Get the standard deviation of the values in mat" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": 51, 598 | "metadata": {}, 599 | "outputs": [ 600 | { 601 | "data": { 602 | "text/plain": [ 603 | "7.2111025509279782" 604 | ] 605 | }, 606 | "execution_count": 51, 607 | "metadata": {}, 608 | "output_type": "execute_result" 609 | } 610 | ], 611 | "source": [ 612 | "mat.std()" 613 | ] 614 | }, 615 | { 616 | "cell_type": "markdown", 617 | "metadata": {}, 618 | "source": [ 619 | "#### Get the sum of all the columns in mat" 620 | ] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "execution_count": 53, 625 | "metadata": {}, 626 | "outputs": [ 627 | { 628 | "data": { 629 | "text/plain": [ 630 | "array([55, 60, 65, 70, 75])" 631 | ] 632 | }, 633 | "execution_count": 53, 634 | "metadata": {}, 635 | "output_type": "execute_result" 636 | } 637 | ], 638 | "source": [ 639 | "mat.sum(axis=0)" 640 | ] 641 | }, 642 | { 643 | "cell_type": "markdown", 644 | "metadata": { 645 | "collapsed": true 646 | }, 647 | "source": [ 648 | "# Great Job!" 649 | ] 650 | } 651 | ], 652 | "metadata": { 653 | "kernelspec": { 654 | "display_name": "Python 3", 655 | "language": "python", 656 | "name": "python3" 657 | }, 658 | "language_info": { 659 | "codemirror_mode": { 660 | "name": "ipython", 661 | "version": 3 662 | }, 663 | "file_extension": ".py", 664 | "mimetype": "text/x-python", 665 | "name": "python", 666 | "nbconvert_exporter": "python", 667 | "pygments_lexer": "ipython3", 668 | "version": "3.7.6" 669 | } 670 | }, 671 | "nbformat": 4, 672 | "nbformat_minor": 1 673 | } 674 | -------------------------------------------------------------------------------- /02-Python-for-Data-Analysis-NumPy/02-Numpy Indexing and Selection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NumPy Indexing and Selection\n", 8 | "\n", 9 | "In this lecture we will discuss how to select elements or groups of elements from an array." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "#Creating sample array\n", 28 | "arr = np.arange(0,11)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 4, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "text/plain": [ 39 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 40 | ] 41 | }, 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "output_type": "execute_result" 45 | } 46 | ], 47 | "source": [ 48 | "#Show\n", 49 | "arr" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## Bracket Indexing and Selection\n", 57 | "The simplest way to pick one or some elements of an array looks very similar to python lists:" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 5, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "8" 69 | ] 70 | }, 71 | "execution_count": 5, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "#Get a value at an index\n", 78 | "arr[8]" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 6, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "array([1, 2, 3, 4])" 90 | ] 91 | }, 92 | "execution_count": 6, 93 | "metadata": {}, 94 | "output_type": "execute_result" 95 | } 96 | ], 97 | "source": [ 98 | "#Get values in a range\n", 99 | "arr[1:5]" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 7, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "array([0, 1, 2, 3, 4])" 111 | ] 112 | }, 113 | "execution_count": 7, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "#Get values in a range\n", 120 | "arr[0:5]" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "## Broadcasting\n", 128 | "\n", 129 | "Numpy arrays differ from a normal Python list because of their ability to broadcast:" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 8, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/plain": [ 140 | "array([100, 100, 100, 100, 100, 5, 6, 7, 8, 9, 10])" 141 | ] 142 | }, 143 | "execution_count": 8, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "#Setting a value with index range (Broadcasting)\n", 150 | "arr[0:5]=100\n", 151 | "\n", 152 | "#Show\n", 153 | "arr" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 9, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "data": { 163 | "text/plain": [ 164 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 165 | ] 166 | }, 167 | "execution_count": 9, 168 | "metadata": {}, 169 | "output_type": "execute_result" 170 | } 171 | ], 172 | "source": [ 173 | "# Reset array, we'll see why I had to reset in a moment\n", 174 | "arr = np.arange(0,11)\n", 175 | "\n", 176 | "#Show\n", 177 | "arr" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 10, 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "data": { 187 | "text/plain": [ 188 | "array([0, 1, 2, 3, 4, 5])" 189 | ] 190 | }, 191 | "execution_count": 10, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "#Important notes on Slices\n", 198 | "slice_of_arr = arr[0:6]\n", 199 | "\n", 200 | "#Show slice\n", 201 | "slice_of_arr" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 11, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/plain": [ 212 | "array([99, 99, 99, 99, 99, 99])" 213 | ] 214 | }, 215 | "execution_count": 11, 216 | "metadata": {}, 217 | "output_type": "execute_result" 218 | } 219 | ], 220 | "source": [ 221 | "#Change Slice\n", 222 | "slice_of_arr[:]=99\n", 223 | "\n", 224 | "#Show Slice again\n", 225 | "slice_of_arr" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "Now note the changes also occur in our original array!" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 12, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "data": { 242 | "text/plain": [ 243 | "array([99, 99, 99, 99, 99, 99, 6, 7, 8, 9, 10])" 244 | ] 245 | }, 246 | "execution_count": 12, 247 | "metadata": {}, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": [ 252 | "arr" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "Data is not copied, it's a view of the original array! This avoids memory problems!" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 13, 265 | "metadata": {}, 266 | "outputs": [ 267 | { 268 | "data": { 269 | "text/plain": [ 270 | "array([99, 99, 99, 99, 99, 99, 6, 7, 8, 9, 10])" 271 | ] 272 | }, 273 | "execution_count": 13, 274 | "metadata": {}, 275 | "output_type": "execute_result" 276 | } 277 | ], 278 | "source": [ 279 | "#To get a copy, need to be explicit\n", 280 | "arr_copy = arr.copy()\n", 281 | "\n", 282 | "arr_copy" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "## Indexing a 2D array (matrices)\n", 290 | "\n", 291 | "The general format is **arr_2d[row][col]** or **arr_2d[row,col]**. I recommend usually using the comma notation for clarity." 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 14, 297 | "metadata": {}, 298 | "outputs": [ 299 | { 300 | "data": { 301 | "text/plain": [ 302 | "array([[ 5, 10, 15],\n", 303 | " [20, 25, 30],\n", 304 | " [35, 40, 45]])" 305 | ] 306 | }, 307 | "execution_count": 14, 308 | "metadata": {}, 309 | "output_type": "execute_result" 310 | } 311 | ], 312 | "source": [ 313 | "arr_2d = np.array(([5,10,15],[20,25,30],[35,40,45]))\n", 314 | "\n", 315 | "#Show\n", 316 | "arr_2d" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 15, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "data": { 326 | "text/plain": [ 327 | "array([20, 25, 30])" 328 | ] 329 | }, 330 | "execution_count": 15, 331 | "metadata": {}, 332 | "output_type": "execute_result" 333 | } 334 | ], 335 | "source": [ 336 | "#Indexing row\n", 337 | "arr_2d[1]\n" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": 16, 343 | "metadata": {}, 344 | "outputs": [ 345 | { 346 | "data": { 347 | "text/plain": [ 348 | "20" 349 | ] 350 | }, 351 | "execution_count": 16, 352 | "metadata": {}, 353 | "output_type": "execute_result" 354 | } 355 | ], 356 | "source": [ 357 | "# Format is arr_2d[row][col] or arr_2d[row,col]\n", 358 | "\n", 359 | "# Getting individual element value\n", 360 | "arr_2d[1][0]" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 17, 366 | "metadata": {}, 367 | "outputs": [ 368 | { 369 | "data": { 370 | "text/plain": [ 371 | "20" 372 | ] 373 | }, 374 | "execution_count": 17, 375 | "metadata": {}, 376 | "output_type": "execute_result" 377 | } 378 | ], 379 | "source": [ 380 | "# Getting individual element value\n", 381 | "arr_2d[1,0]" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 18, 387 | "metadata": {}, 388 | "outputs": [ 389 | { 390 | "data": { 391 | "text/plain": [ 392 | "array([[10, 15],\n", 393 | " [25, 30]])" 394 | ] 395 | }, 396 | "execution_count": 18, 397 | "metadata": {}, 398 | "output_type": "execute_result" 399 | } 400 | ], 401 | "source": [ 402 | "# 2D array slicing\n", 403 | "\n", 404 | "#Shape (2,2) from top right corner\n", 405 | "arr_2d[:2,1:]" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 19, 411 | "metadata": {}, 412 | "outputs": [ 413 | { 414 | "data": { 415 | "text/plain": [ 416 | "array([35, 40, 45])" 417 | ] 418 | }, 419 | "execution_count": 19, 420 | "metadata": {}, 421 | "output_type": "execute_result" 422 | } 423 | ], 424 | "source": [ 425 | "#Shape bottom row\n", 426 | "arr_2d[2]" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": 20, 432 | "metadata": {}, 433 | "outputs": [ 434 | { 435 | "data": { 436 | "text/plain": [ 437 | "array([35, 40, 45])" 438 | ] 439 | }, 440 | "execution_count": 20, 441 | "metadata": {}, 442 | "output_type": "execute_result" 443 | } 444 | ], 445 | "source": [ 446 | "#Shape bottom row\n", 447 | "arr_2d[2,:]" 448 | ] 449 | }, 450 | { 451 | "cell_type": "markdown", 452 | "metadata": {}, 453 | "source": [ 454 | "### Fancy Indexing\n", 455 | "\n", 456 | "Fancy indexing allows you to select entire rows or columns out of order,to show this, let's quickly build out a numpy array:" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": 21, 462 | "metadata": {}, 463 | "outputs": [], 464 | "source": [ 465 | "#Set up matrix\n", 466 | "arr2d = np.zeros((10,10))" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": 22, 472 | "metadata": {}, 473 | "outputs": [], 474 | "source": [ 475 | "#Length of array\n", 476 | "arr_length = arr2d.shape[1]" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": 23, 482 | "metadata": {}, 483 | "outputs": [ 484 | { 485 | "data": { 486 | "text/plain": [ 487 | "array([[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", 488 | " [ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],\n", 489 | " [ 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],\n", 490 | " [ 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.],\n", 491 | " [ 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.],\n", 492 | " [ 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.],\n", 493 | " [ 6., 6., 6., 6., 6., 6., 6., 6., 6., 6.],\n", 494 | " [ 7., 7., 7., 7., 7., 7., 7., 7., 7., 7.],\n", 495 | " [ 8., 8., 8., 8., 8., 8., 8., 8., 8., 8.],\n", 496 | " [ 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.]])" 497 | ] 498 | }, 499 | "execution_count": 23, 500 | "metadata": {}, 501 | "output_type": "execute_result" 502 | } 503 | ], 504 | "source": [ 505 | "#Set up array\n", 506 | "\n", 507 | "for i in range(arr_length):\n", 508 | " arr2d[i] = i\n", 509 | " \n", 510 | "arr2d" 511 | ] 512 | }, 513 | { 514 | "cell_type": "markdown", 515 | "metadata": {}, 516 | "source": [ 517 | "Fancy indexing allows the following" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": 24, 523 | "metadata": {}, 524 | "outputs": [ 525 | { 526 | "data": { 527 | "text/plain": [ 528 | "array([[ 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],\n", 529 | " [ 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.],\n", 530 | " [ 6., 6., 6., 6., 6., 6., 6., 6., 6., 6.],\n", 531 | " [ 8., 8., 8., 8., 8., 8., 8., 8., 8., 8.]])" 532 | ] 533 | }, 534 | "execution_count": 24, 535 | "metadata": {}, 536 | "output_type": "execute_result" 537 | } 538 | ], 539 | "source": [ 540 | "arr2d[[2,4,6,8]]" 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": 25, 546 | "metadata": {}, 547 | "outputs": [ 548 | { 549 | "data": { 550 | "text/plain": [ 551 | "array([[ 6., 6., 6., 6., 6., 6., 6., 6., 6., 6.],\n", 552 | " [ 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.],\n", 553 | " [ 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],\n", 554 | " [ 7., 7., 7., 7., 7., 7., 7., 7., 7., 7.]])" 555 | ] 556 | }, 557 | "execution_count": 25, 558 | "metadata": {}, 559 | "output_type": "execute_result" 560 | } 561 | ], 562 | "source": [ 563 | "#Allows in any order\n", 564 | "arr2d[[6,4,2,7]]" 565 | ] 566 | }, 567 | { 568 | "cell_type": "markdown", 569 | "metadata": {}, 570 | "source": [ 571 | "## Selection\n", 572 | "\n", 573 | "Let's briefly go over how to use brackets for selection based off of comparison operators." 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": 28, 579 | "metadata": {}, 580 | "outputs": [ 581 | { 582 | "data": { 583 | "text/plain": [ 584 | "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 585 | ] 586 | }, 587 | "execution_count": 28, 588 | "metadata": {}, 589 | "output_type": "execute_result" 590 | } 591 | ], 592 | "source": [ 593 | "arr = np.arange(1,11)\n", 594 | "arr" 595 | ] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "execution_count": 30, 600 | "metadata": {}, 601 | "outputs": [ 602 | { 603 | "data": { 604 | "text/plain": [ 605 | "array([False, False, False, False, True, True, True, True, True, True], dtype=bool)" 606 | ] 607 | }, 608 | "execution_count": 30, 609 | "metadata": {}, 610 | "output_type": "execute_result" 611 | } 612 | ], 613 | "source": [ 614 | "arr > 4" 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": 31, 620 | "metadata": { 621 | "collapsed": true 622 | }, 623 | "outputs": [], 624 | "source": [ 625 | "bool_arr = arr>4" 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": 32, 631 | "metadata": {}, 632 | "outputs": [ 633 | { 634 | "data": { 635 | "text/plain": [ 636 | "array([False, False, False, False, True, True, True, True, True, True], dtype=bool)" 637 | ] 638 | }, 639 | "execution_count": 32, 640 | "metadata": {}, 641 | "output_type": "execute_result" 642 | } 643 | ], 644 | "source": [ 645 | "bool_arr" 646 | ] 647 | }, 648 | { 649 | "cell_type": "code", 650 | "execution_count": 33, 651 | "metadata": {}, 652 | "outputs": [ 653 | { 654 | "data": { 655 | "text/plain": [ 656 | "array([ 5, 6, 7, 8, 9, 10])" 657 | ] 658 | }, 659 | "execution_count": 33, 660 | "metadata": {}, 661 | "output_type": "execute_result" 662 | } 663 | ], 664 | "source": [ 665 | "arr[bool_arr]" 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": 34, 671 | "metadata": {}, 672 | "outputs": [ 673 | { 674 | "data": { 675 | "text/plain": [ 676 | "array([ 3, 4, 5, 6, 7, 8, 9, 10])" 677 | ] 678 | }, 679 | "execution_count": 34, 680 | "metadata": {}, 681 | "output_type": "execute_result" 682 | } 683 | ], 684 | "source": [ 685 | "arr[arr>2]" 686 | ] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "execution_count": 37, 691 | "metadata": {}, 692 | "outputs": [ 693 | { 694 | "data": { 695 | "text/plain": [ 696 | "array([ 3, 4, 5, 6, 7, 8, 9, 10])" 697 | ] 698 | }, 699 | "execution_count": 37, 700 | "metadata": {}, 701 | "output_type": "execute_result" 702 | } 703 | ], 704 | "source": [ 705 | "x = 2\n", 706 | "arr[arr>x]" 707 | ] 708 | }, 709 | { 710 | "cell_type": "markdown", 711 | "metadata": {}, 712 | "source": [ 713 | "# Great Job!\n" 714 | ] 715 | } 716 | ], 717 | "metadata": { 718 | "kernelspec": { 719 | "display_name": "Python 3", 720 | "language": "python", 721 | "name": "python3" 722 | }, 723 | "language_info": { 724 | "codemirror_mode": { 725 | "name": "ipython", 726 | "version": 3 727 | }, 728 | "file_extension": ".py", 729 | "mimetype": "text/x-python", 730 | "name": "python", 731 | "nbconvert_exporter": "python", 732 | "pygments_lexer": "ipython3", 733 | "version": "3.7.6" 734 | } 735 | }, 736 | "nbformat": 4, 737 | "nbformat_minor": 1 738 | } 739 | -------------------------------------------------------------------------------- /04-Pandas-Exercises/03-Ecommerce Purchases Exercise .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___\n", 11 | "# Ecommerce Purchases Exercise\n", 12 | "\n", 13 | "In this Exercise you will be given some Fake Data about some purchases done through Amazon! Just go ahead and follow the directions and try your best to answer the questions and complete the tasks. Feel free to reference the solutions. Most of the tasks can be solved in different ways. For the most part, the questions get progressively harder.\n", 14 | "\n", 15 | "Please excuse anything that doesn't make \"Real-World\" sense in the dataframe, all the data is fake and made-up.\n", 16 | "\n", 17 | "Also note that all of these questions can be answered with one line of code.\n", 18 | "____\n", 19 | "** Import pandas and read in the Ecommerce Purchases csv file and set it to a DataFrame called ecom. **" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import pandas as pd" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 86, 34 | "metadata": { 35 | "collapsed": true 36 | }, 37 | "outputs": [], 38 | "source": [] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "**Check the head of the DataFrame.**" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 87, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/html": [ 55 | "
\n", 56 | "\n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | "
AddressLotAM or PMBrowser InfoCompanyCredit CardCC Exp DateCC Security CodeCC ProviderEmailJobIP AddressLanguagePurchase Price
016629 Pace Camp Apt. 448\\nAlexisborough, NE 77...46 inPMOpera/9.56.(X11; Linux x86_64; sl-SI) Presto/2...Martinez-Herman601192906112340602/20900JCB 16 digitpdunlap@yahoo.comScientist, product/process development149.146.147.205el98.14
19374 Jasmine Spurs Suite 508\\nSouth John, TN 8...28 rnPMOpera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr...Fletcher, Richards and Whitaker333775816964535611/18561Mastercardanthony41@reed.comDrilling engineer15.160.41.51fr70.73
2Unit 0065 Box 5052\\nDPO AP 2745094 vEPMMozilla/5.0 (compatible; MSIE 9.0; Windows NT ...Simpson, Williams and Pham67595766612508/19699JCB 16 digitamymiller@morales-harrison.comCustomer service manager132.207.160.22de0.95
37780 Julia Fords\\nNew Stacy, WA 4579836 vmPMMozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ...Williams, Marshall and Buchanan601157850443071002/24384Discoverbrent16@olson-robinson.infoDrilling engineer30.250.74.19es78.04
423012 Munoz Drive Suite 337\\nNew Cynthia, TX 5...20 IEAMOpera/9.58.(X11; Linux x86_64; it-IT) Presto/2...Brown, Watson and Andrews601145662320799810/25678Diners Club / Carte Blanchechristopherwright@gmail.comFine artist24.140.33.94es77.82
\n", 164 | "
" 165 | ], 166 | "text/plain": [ 167 | " Address Lot AM or PM \\\n", 168 | "0 16629 Pace Camp Apt. 448\\nAlexisborough, NE 77... 46 in PM \n", 169 | "1 9374 Jasmine Spurs Suite 508\\nSouth John, TN 8... 28 rn PM \n", 170 | "2 Unit 0065 Box 5052\\nDPO AP 27450 94 vE PM \n", 171 | "3 7780 Julia Fords\\nNew Stacy, WA 45798 36 vm PM \n", 172 | "4 23012 Munoz Drive Suite 337\\nNew Cynthia, TX 5... 20 IE AM \n", 173 | "\n", 174 | " Browser Info \\\n", 175 | "0 Opera/9.56.(X11; Linux x86_64; sl-SI) Presto/2... \n", 176 | "1 Opera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr... \n", 177 | "2 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ... \n", 178 | "3 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ... \n", 179 | "4 Opera/9.58.(X11; Linux x86_64; it-IT) Presto/2... \n", 180 | "\n", 181 | " Company Credit Card CC Exp Date \\\n", 182 | "0 Martinez-Herman 6011929061123406 02/20 \n", 183 | "1 Fletcher, Richards and Whitaker 3337758169645356 11/18 \n", 184 | "2 Simpson, Williams and Pham 675957666125 08/19 \n", 185 | "3 Williams, Marshall and Buchanan 6011578504430710 02/24 \n", 186 | "4 Brown, Watson and Andrews 6011456623207998 10/25 \n", 187 | "\n", 188 | " CC Security Code CC Provider \\\n", 189 | "0 900 JCB 16 digit \n", 190 | "1 561 Mastercard \n", 191 | "2 699 JCB 16 digit \n", 192 | "3 384 Discover \n", 193 | "4 678 Diners Club / Carte Blanche \n", 194 | "\n", 195 | " Email Job \\\n", 196 | "0 pdunlap@yahoo.com Scientist, product/process development \n", 197 | "1 anthony41@reed.com Drilling engineer \n", 198 | "2 amymiller@morales-harrison.com Customer service manager \n", 199 | "3 brent16@olson-robinson.info Drilling engineer \n", 200 | "4 christopherwright@gmail.com Fine artist \n", 201 | "\n", 202 | " IP Address Language Purchase Price \n", 203 | "0 149.146.147.205 el 98.14 \n", 204 | "1 15.160.41.51 fr 70.73 \n", 205 | "2 132.207.160.22 de 0.95 \n", 206 | "3 30.250.74.19 es 78.04 \n", 207 | "4 24.140.33.94 es 77.82 " 208 | ] 209 | }, 210 | "execution_count": 87, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "** How many rows and columns are there? **" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 88, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "name": "stdout", 231 | "output_type": "stream", 232 | "text": [ 233 | "\n", 234 | "RangeIndex: 10000 entries, 0 to 9999\n", 235 | "Data columns (total 14 columns):\n", 236 | "Address 10000 non-null object\n", 237 | "Lot 10000 non-null object\n", 238 | "AM or PM 10000 non-null object\n", 239 | "Browser Info 10000 non-null object\n", 240 | "Company 10000 non-null object\n", 241 | "Credit Card 10000 non-null int64\n", 242 | "CC Exp Date 10000 non-null object\n", 243 | "CC Security Code 10000 non-null int64\n", 244 | "CC Provider 10000 non-null object\n", 245 | "Email 10000 non-null object\n", 246 | "Job 10000 non-null object\n", 247 | "IP Address 10000 non-null object\n", 248 | "Language 10000 non-null object\n", 249 | "Purchase Price 10000 non-null float64\n", 250 | "dtypes: float64(1), int64(2), object(11)\n", 251 | "memory usage: 1.1+ MB\n" 252 | ] 253 | } 254 | ], 255 | "source": [] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "** What is the average Purchase Price? **" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 90, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "data": { 271 | "text/plain": [ 272 | "50.34730200000025" 273 | ] 274 | }, 275 | "execution_count": 90, 276 | "metadata": {}, 277 | "output_type": "execute_result" 278 | } 279 | ], 280 | "source": [] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "** What were the highest and lowest purchase prices? **" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 92, 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "data": { 296 | "text/plain": [ 297 | "99.989999999999995" 298 | ] 299 | }, 300 | "execution_count": 92, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": [] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 93, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "data": { 314 | "text/plain": [ 315 | "0.0" 316 | ] 317 | }, 318 | "execution_count": 93, 319 | "metadata": {}, 320 | "output_type": "execute_result" 321 | } 322 | ], 323 | "source": [] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "** How many people have English 'en' as their Language of choice on the website? **" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 94, 335 | "metadata": {}, 336 | "outputs": [ 337 | { 338 | "data": { 339 | "text/plain": [ 340 | "Address 1098\n", 341 | "Lot 1098\n", 342 | "AM or PM 1098\n", 343 | "Browser Info 1098\n", 344 | "Company 1098\n", 345 | "Credit Card 1098\n", 346 | "CC Exp Date 1098\n", 347 | "CC Security Code 1098\n", 348 | "CC Provider 1098\n", 349 | "Email 1098\n", 350 | "Job 1098\n", 351 | "IP Address 1098\n", 352 | "Language 1098\n", 353 | "Purchase Price 1098\n", 354 | "dtype: int64" 355 | ] 356 | }, 357 | "execution_count": 94, 358 | "metadata": {}, 359 | "output_type": "execute_result" 360 | } 361 | ], 362 | "source": [] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "** How many people have the job title of \"Lawyer\" ? **\n" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 95, 374 | "metadata": { 375 | "scrolled": true 376 | }, 377 | "outputs": [ 378 | { 379 | "name": "stdout", 380 | "output_type": "stream", 381 | "text": [ 382 | "\n", 383 | "Int64Index: 30 entries, 470 to 9979\n", 384 | "Data columns (total 14 columns):\n", 385 | "Address 30 non-null object\n", 386 | "Lot 30 non-null object\n", 387 | "AM or PM 30 non-null object\n", 388 | "Browser Info 30 non-null object\n", 389 | "Company 30 non-null object\n", 390 | "Credit Card 30 non-null int64\n", 391 | "CC Exp Date 30 non-null object\n", 392 | "CC Security Code 30 non-null int64\n", 393 | "CC Provider 30 non-null object\n", 394 | "Email 30 non-null object\n", 395 | "Job 30 non-null object\n", 396 | "IP Address 30 non-null object\n", 397 | "Language 30 non-null object\n", 398 | "Purchase Price 30 non-null float64\n", 399 | "dtypes: float64(1), int64(2), object(11)\n", 400 | "memory usage: 3.5+ KB\n" 401 | ] 402 | } 403 | ], 404 | "source": [] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "metadata": {}, 409 | "source": [ 410 | "** How many people made the purchase during the AM and how many people made the purchase during PM ? **\n", 411 | "\n", 412 | "**(Hint: Check out [value_counts()](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.value_counts.html) ) **" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": 96, 418 | "metadata": {}, 419 | "outputs": [ 420 | { 421 | "data": { 422 | "text/plain": [ 423 | "PM 5068\n", 424 | "AM 4932\n", 425 | "Name: AM or PM, dtype: int64" 426 | ] 427 | }, 428 | "execution_count": 96, 429 | "metadata": {}, 430 | "output_type": "execute_result" 431 | } 432 | ], 433 | "source": [] 434 | }, 435 | { 436 | "cell_type": "markdown", 437 | "metadata": {}, 438 | "source": [ 439 | "** What are the 5 most common Job Titles? **" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": 97, 445 | "metadata": {}, 446 | "outputs": [ 447 | { 448 | "data": { 449 | "text/plain": [ 450 | "Interior and spatial designer 31\n", 451 | "Lawyer 30\n", 452 | "Social researcher 28\n", 453 | "Purchasing manager 27\n", 454 | "Designer, jewellery 27\n", 455 | "Name: Job, dtype: int64" 456 | ] 457 | }, 458 | "execution_count": 97, 459 | "metadata": {}, 460 | "output_type": "execute_result" 461 | } 462 | ], 463 | "source": [] 464 | }, 465 | { 466 | "cell_type": "markdown", 467 | "metadata": {}, 468 | "source": [ 469 | "** Someone made a purchase that came from Lot: \"90 WT\" , what was the Purchase Price for this transaction? **" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": 99, 475 | "metadata": {}, 476 | "outputs": [ 477 | { 478 | "data": { 479 | "text/plain": [ 480 | "513 75.1\n", 481 | "Name: Purchase Price, dtype: float64" 482 | ] 483 | }, 484 | "execution_count": 99, 485 | "metadata": {}, 486 | "output_type": "execute_result" 487 | } 488 | ], 489 | "source": [] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": {}, 494 | "source": [ 495 | "** What is the email of the person with the following Credit Card Number: 4926535242672853 **" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": 100, 501 | "metadata": {}, 502 | "outputs": [ 503 | { 504 | "data": { 505 | "text/plain": [ 506 | "1234 bondellen@williams-garza.com\n", 507 | "Name: Email, dtype: object" 508 | ] 509 | }, 510 | "execution_count": 100, 511 | "metadata": {}, 512 | "output_type": "execute_result" 513 | } 514 | ], 515 | "source": [] 516 | }, 517 | { 518 | "cell_type": "markdown", 519 | "metadata": {}, 520 | "source": [ 521 | "** How many people have American Express as their Credit Card Provider *and* made a purchase above $95 ?**" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 101, 527 | "metadata": {}, 528 | "outputs": [ 529 | { 530 | "data": { 531 | "text/plain": [ 532 | "Address 39\n", 533 | "Lot 39\n", 534 | "AM or PM 39\n", 535 | "Browser Info 39\n", 536 | "Company 39\n", 537 | "Credit Card 39\n", 538 | "CC Exp Date 39\n", 539 | "CC Security Code 39\n", 540 | "CC Provider 39\n", 541 | "Email 39\n", 542 | "Job 39\n", 543 | "IP Address 39\n", 544 | "Language 39\n", 545 | "Purchase Price 39\n", 546 | "dtype: int64" 547 | ] 548 | }, 549 | "execution_count": 101, 550 | "metadata": {}, 551 | "output_type": "execute_result" 552 | } 553 | ], 554 | "source": [] 555 | }, 556 | { 557 | "cell_type": "markdown", 558 | "metadata": {}, 559 | "source": [ 560 | "** Hard: How many people have a credit card that expires in 2025? **" 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": 102, 566 | "metadata": {}, 567 | "outputs": [ 568 | { 569 | "data": { 570 | "text/plain": [ 571 | "1033" 572 | ] 573 | }, 574 | "execution_count": 102, 575 | "metadata": {}, 576 | "output_type": "execute_result" 577 | } 578 | ], 579 | "source": [] 580 | }, 581 | { 582 | "cell_type": "markdown", 583 | "metadata": {}, 584 | "source": [ 585 | "** Hard: What are the top 5 most popular email providers/hosts (e.g. gmail.com, yahoo.com, etc...) **" 586 | ] 587 | }, 588 | { 589 | "cell_type": "code", 590 | "execution_count": 56, 591 | "metadata": {}, 592 | "outputs": [ 593 | { 594 | "data": { 595 | "text/plain": [ 596 | "hotmail.com 1638\n", 597 | "yahoo.com 1616\n", 598 | "gmail.com 1605\n", 599 | "smith.com 42\n", 600 | "williams.com 37\n", 601 | "Name: Email, dtype: int64" 602 | ] 603 | }, 604 | "execution_count": 56, 605 | "metadata": {}, 606 | "output_type": "execute_result" 607 | } 608 | ], 609 | "source": [] 610 | }, 611 | { 612 | "cell_type": "markdown", 613 | "metadata": {}, 614 | "source": [ 615 | "# Great Job!" 616 | ] 617 | } 618 | ], 619 | "metadata": { 620 | "kernelspec": { 621 | "display_name": "Python 3", 622 | "language": "python", 623 | "name": "python3" 624 | }, 625 | "language_info": { 626 | "codemirror_mode": { 627 | "name": "ipython", 628 | "version": 3 629 | }, 630 | "file_extension": ".py", 631 | "mimetype": "text/x-python", 632 | "name": "python", 633 | "nbconvert_exporter": "python", 634 | "pygments_lexer": "ipython3", 635 | "version": "3.7.6" 636 | } 637 | }, 638 | "nbformat": 4, 639 | "nbformat_minor": 1 640 | } 641 | -------------------------------------------------------------------------------- /04-Pandas-Exercises/01-SF Salaries Exercise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# SF Salaries Exercise \n", 8 | "\n", 9 | "Welcome to a quick exercise for you to practice your pandas skills! We will be using the [SF Salaries Dataset](https://www.kaggle.com/kaggle/sf-salaries) from Kaggle! Just follow along and complete the tasks outlined in bold below. The tasks will get harder and harder as you go along." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "** Import pandas as pd.**" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 6, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "** Read Salaries.csv as a dataframe called sal.**" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 7, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "** Check the head of the DataFrame. **" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 8, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/html": [ 57 | "
\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
\n", 160 | "
" 161 | ], 162 | "text/plain": [ 163 | " Id EmployeeName JobTitle \\\n", 164 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 165 | "1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", 166 | "2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", 167 | "3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", 168 | "4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", 169 | "\n", 170 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 171 | "0 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n", 172 | "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", 173 | "2 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n", 174 | "3 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n", 175 | "4 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n", 176 | "\n", 177 | " Year Notes Agency Status \n", 178 | "0 2011 NaN San Francisco NaN \n", 179 | "1 2011 NaN San Francisco NaN \n", 180 | "2 2011 NaN San Francisco NaN \n", 181 | "3 2011 NaN San Francisco NaN \n", 182 | "4 2011 NaN San Francisco NaN " 183 | ] 184 | }, 185 | "execution_count": 8, 186 | "metadata": {}, 187 | "output_type": "execute_result" 188 | } 189 | ], 190 | "source": [] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "** Use the .info() method to find out how many entries there are.**" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 9, 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | "\n", 209 | "RangeIndex: 148654 entries, 0 to 148653\n", 210 | "Data columns (total 13 columns):\n", 211 | "Id 148654 non-null int64\n", 212 | "EmployeeName 148654 non-null object\n", 213 | "JobTitle 148654 non-null object\n", 214 | "BasePay 148045 non-null float64\n", 215 | "OvertimePay 148650 non-null float64\n", 216 | "OtherPay 148650 non-null float64\n", 217 | "Benefits 112491 non-null float64\n", 218 | "TotalPay 148654 non-null float64\n", 219 | "TotalPayBenefits 148654 non-null float64\n", 220 | "Year 148654 non-null int64\n", 221 | "Notes 0 non-null float64\n", 222 | "Agency 148654 non-null object\n", 223 | "Status 0 non-null float64\n", 224 | "dtypes: float64(8), int64(2), object(3)\n", 225 | "memory usage: 14.7+ MB\n" 226 | ] 227 | } 228 | ], 229 | "source": [] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "**What is the average BasePay ?**" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 10, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "text/plain": [ 246 | "66325.44884050643" 247 | ] 248 | }, 249 | "execution_count": 10, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "** What is the highest amount of OvertimePay in the dataset ? **" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 11, 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "data": { 270 | "text/plain": [ 271 | "245131.88" 272 | ] 273 | }, 274 | "execution_count": 11, 275 | "metadata": {}, 276 | "output_type": "execute_result" 277 | } 278 | ], 279 | "source": [] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "** What is the job title of JOSEPH DRISCOLL ? Note: Use all caps, otherwise you may get an answer that doesn't match up (there is also a lowercase Joseph Driscoll). **" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 12, 291 | "metadata": {}, 292 | "outputs": [ 293 | { 294 | "data": { 295 | "text/plain": [ 296 | "24 CAPTAIN, FIRE SUPPRESSION\n", 297 | "Name: JobTitle, dtype: object" 298 | ] 299 | }, 300 | "execution_count": 12, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": [] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [ 311 | "** How much does JOSEPH DRISCOLL make (including benefits)? **" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 13, 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "data": { 321 | "text/plain": [ 322 | "24 270324.91\n", 323 | "Name: TotalPayBenefits, dtype: float64" 324 | ] 325 | }, 326 | "execution_count": 13, 327 | "metadata": {}, 328 | "output_type": "execute_result" 329 | } 330 | ], 331 | "source": [] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": {}, 336 | "source": [ 337 | "** What is the name of highest paid person (including benefits)?**" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": 14, 343 | "metadata": {}, 344 | "outputs": [ 345 | { 346 | "data": { 347 | "text/html": [ 348 | "
\n", 349 | "\n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.0400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
\n", 387 | "
" 388 | ], 389 | "text/plain": [ 390 | " Id EmployeeName JobTitle \\\n", 391 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 392 | "\n", 393 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 394 | "0 167411.18 0.0 400184.25 NaN 567595.43 567595.43 \n", 395 | "\n", 396 | " Year Notes Agency Status \n", 397 | "0 2011 NaN San Francisco NaN " 398 | ] 399 | }, 400 | "execution_count": 14, 401 | "metadata": {}, 402 | "output_type": "execute_result" 403 | } 404 | ], 405 | "source": [] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": {}, 410 | "source": [ 411 | "** What is the name of lowest paid person (including benefits)? Do you notice something strange about how much he or she is paid?**" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": 15, 417 | "metadata": {}, 418 | "outputs": [ 419 | { 420 | "data": { 421 | "text/html": [ 422 | "
\n", 423 | "\n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
148653148654Joe LopezCounselor, Log Cabin Ranch0.00.0-618.130.0-618.13-618.132014NaNSan FranciscoNaN
\n", 461 | "
" 462 | ], 463 | "text/plain": [ 464 | " Id EmployeeName JobTitle BasePay OvertimePay \\\n", 465 | "148653 148654 Joe Lopez Counselor, Log Cabin Ranch 0.0 0.0 \n", 466 | "\n", 467 | " OtherPay Benefits TotalPay TotalPayBenefits Year Notes \\\n", 468 | "148653 -618.13 0.0 -618.13 -618.13 2014 NaN \n", 469 | "\n", 470 | " Agency Status \n", 471 | "148653 San Francisco NaN " 472 | ] 473 | }, 474 | "execution_count": 15, 475 | "metadata": {}, 476 | "output_type": "execute_result" 477 | } 478 | ], 479 | "source": [] 480 | }, 481 | { 482 | "cell_type": "markdown", 483 | "metadata": {}, 484 | "source": [ 485 | "** What was the average (mean) BasePay of all employees per year? (2011-2014) ? **" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": 16, 491 | "metadata": {}, 492 | "outputs": [ 493 | { 494 | "data": { 495 | "text/plain": [ 496 | "Year\n", 497 | "2011 63595.956517\n", 498 | "2012 65436.406857\n", 499 | "2013 69630.030216\n", 500 | "2014 66564.421924\n", 501 | "Name: BasePay, dtype: float64" 502 | ] 503 | }, 504 | "execution_count": 16, 505 | "metadata": {}, 506 | "output_type": "execute_result" 507 | } 508 | ], 509 | "source": [] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": {}, 514 | "source": [ 515 | "** How many unique job titles are there? **" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 17, 521 | "metadata": {}, 522 | "outputs": [ 523 | { 524 | "data": { 525 | "text/plain": [ 526 | "2159" 527 | ] 528 | }, 529 | "execution_count": 17, 530 | "metadata": {}, 531 | "output_type": "execute_result" 532 | } 533 | ], 534 | "source": [] 535 | }, 536 | { 537 | "cell_type": "markdown", 538 | "metadata": {}, 539 | "source": [ 540 | "** What are the top 5 most common jobs? **" 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": 18, 546 | "metadata": {}, 547 | "outputs": [ 548 | { 549 | "data": { 550 | "text/plain": [ 551 | "Transit Operator 7036\n", 552 | "Special Nurse 4389\n", 553 | "Registered Nurse 3736\n", 554 | "Public Svc Aide-Public Works 2518\n", 555 | "Police Officer 3 2421\n", 556 | "Name: JobTitle, dtype: int64" 557 | ] 558 | }, 559 | "execution_count": 18, 560 | "metadata": {}, 561 | "output_type": "execute_result" 562 | } 563 | ], 564 | "source": [] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "** How many Job Titles were represented by only one person in 2013? (e.g. Job Titles with only one occurence in 2013?) **" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": 19, 576 | "metadata": {}, 577 | "outputs": [ 578 | { 579 | "data": { 580 | "text/plain": [ 581 | "202" 582 | ] 583 | }, 584 | "execution_count": 19, 585 | "metadata": {}, 586 | "output_type": "execute_result" 587 | } 588 | ], 589 | "source": [] 590 | }, 591 | { 592 | "cell_type": "markdown", 593 | "metadata": {}, 594 | "source": [ 595 | "** Bonus: Is there a correlation between length of the Job Title string and Salary? **" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": 22, 601 | "metadata": {}, 602 | "outputs": [], 603 | "source": [] 604 | }, 605 | { 606 | "cell_type": "code", 607 | "execution_count": 23, 608 | "metadata": {}, 609 | "outputs": [ 610 | { 611 | "data": { 612 | "text/html": [ 613 | "
\n", 614 | "\n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | "
title_lenTotalPayBenefits
title_len1.000000-0.036878
TotalPayBenefits-0.0368781.000000
\n", 635 | "
" 636 | ], 637 | "text/plain": [ 638 | " title_len TotalPayBenefits\n", 639 | "title_len 1.000000 -0.036878\n", 640 | "TotalPayBenefits -0.036878 1.000000" 641 | ] 642 | }, 643 | "execution_count": 23, 644 | "metadata": {}, 645 | "output_type": "execute_result" 646 | } 647 | ], 648 | "source": [] 649 | }, 650 | { 651 | "cell_type": "markdown", 652 | "metadata": {}, 653 | "source": [ 654 | "# Great Job!" 655 | ] 656 | } 657 | ], 658 | "metadata": { 659 | "kernelspec": { 660 | "display_name": "Python 3", 661 | "language": "python", 662 | "name": "python3" 663 | }, 664 | "language_info": { 665 | "codemirror_mode": { 666 | "name": "ipython", 667 | "version": 3 668 | }, 669 | "file_extension": ".py", 670 | "mimetype": "text/x-python", 671 | "name": "python", 672 | "nbconvert_exporter": "python", 673 | "pygments_lexer": "ipython3", 674 | "version": "3.7.6" 675 | } 676 | }, 677 | "nbformat": 4, 678 | "nbformat_minor": 1 679 | } 680 | -------------------------------------------------------------------------------- /04-Pandas-Exercises/04-Ecommerce Purchases Exercise - Solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___\n", 11 | "# Ecommerce Purchases Exercise - Solutions\n", 12 | "\n", 13 | "In this Exercise you will be given some Fake Data about some purchases done through Amazon! Just go ahead and follow the directions and try your best to answer the questions and complete the tasks. Feel free to reference the solutions. Most of the tasks can be solved in different ways. For the most part, the questions get progressively harder.\n", 14 | "\n", 15 | "Please excuse anything that doesn't make \"Real-World\" sense in the dataframe, all the data is fake and made-up.\n", 16 | "\n", 17 | "Also note that all of these questions can be answered with one line of code.\n", 18 | "____\n", 19 | "** Import pandas and read in the Ecommerce Purchases csv file and set it to a DataFrame called ecom. **" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 84, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 86, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "ecom = pd.read_csv('Ecommerce Purchases')" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "**Check the head of the DataFrame.**" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 87, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/html": [ 59 | "
\n", 60 | "\n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | "
AddressLotAM or PMBrowser InfoCompanyCredit CardCC Exp DateCC Security CodeCC ProviderEmailJobIP AddressLanguagePurchase Price
016629 Pace Camp Apt. 448\\nAlexisborough, NE 77...46 inPMOpera/9.56.(X11; Linux x86_64; sl-SI) Presto/2...Martinez-Herman601192906112340602/20900JCB 16 digitpdunlap@yahoo.comScientist, product/process development149.146.147.205el98.14
19374 Jasmine Spurs Suite 508\\nSouth John, TN 8...28 rnPMOpera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr...Fletcher, Richards and Whitaker333775816964535611/18561Mastercardanthony41@reed.comDrilling engineer15.160.41.51fr70.73
2Unit 0065 Box 5052\\nDPO AP 2745094 vEPMMozilla/5.0 (compatible; MSIE 9.0; Windows NT ...Simpson, Williams and Pham67595766612508/19699JCB 16 digitamymiller@morales-harrison.comCustomer service manager132.207.160.22de0.95
37780 Julia Fords\\nNew Stacy, WA 4579836 vmPMMozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ...Williams, Marshall and Buchanan601157850443071002/24384Discoverbrent16@olson-robinson.infoDrilling engineer30.250.74.19es78.04
423012 Munoz Drive Suite 337\\nNew Cynthia, TX 5...20 IEAMOpera/9.58.(X11; Linux x86_64; it-IT) Presto/2...Brown, Watson and Andrews601145662320799810/25678Diners Club / Carte Blanchechristopherwright@gmail.comFine artist24.140.33.94es77.82
\n", 168 | "
" 169 | ], 170 | "text/plain": [ 171 | " Address Lot AM or PM \\\n", 172 | "0 16629 Pace Camp Apt. 448\\nAlexisborough, NE 77... 46 in PM \n", 173 | "1 9374 Jasmine Spurs Suite 508\\nSouth John, TN 8... 28 rn PM \n", 174 | "2 Unit 0065 Box 5052\\nDPO AP 27450 94 vE PM \n", 175 | "3 7780 Julia Fords\\nNew Stacy, WA 45798 36 vm PM \n", 176 | "4 23012 Munoz Drive Suite 337\\nNew Cynthia, TX 5... 20 IE AM \n", 177 | "\n", 178 | " Browser Info \\\n", 179 | "0 Opera/9.56.(X11; Linux x86_64; sl-SI) Presto/2... \n", 180 | "1 Opera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr... \n", 181 | "2 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ... \n", 182 | "3 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ... \n", 183 | "4 Opera/9.58.(X11; Linux x86_64; it-IT) Presto/2... \n", 184 | "\n", 185 | " Company Credit Card CC Exp Date \\\n", 186 | "0 Martinez-Herman 6011929061123406 02/20 \n", 187 | "1 Fletcher, Richards and Whitaker 3337758169645356 11/18 \n", 188 | "2 Simpson, Williams and Pham 675957666125 08/19 \n", 189 | "3 Williams, Marshall and Buchanan 6011578504430710 02/24 \n", 190 | "4 Brown, Watson and Andrews 6011456623207998 10/25 \n", 191 | "\n", 192 | " CC Security Code CC Provider \\\n", 193 | "0 900 JCB 16 digit \n", 194 | "1 561 Mastercard \n", 195 | "2 699 JCB 16 digit \n", 196 | "3 384 Discover \n", 197 | "4 678 Diners Club / Carte Blanche \n", 198 | "\n", 199 | " Email Job \\\n", 200 | "0 pdunlap@yahoo.com Scientist, product/process development \n", 201 | "1 anthony41@reed.com Drilling engineer \n", 202 | "2 amymiller@morales-harrison.com Customer service manager \n", 203 | "3 brent16@olson-robinson.info Drilling engineer \n", 204 | "4 christopherwright@gmail.com Fine artist \n", 205 | "\n", 206 | " IP Address Language Purchase Price \n", 207 | "0 149.146.147.205 el 98.14 \n", 208 | "1 15.160.41.51 fr 70.73 \n", 209 | "2 132.207.160.22 de 0.95 \n", 210 | "3 30.250.74.19 es 78.04 \n", 211 | "4 24.140.33.94 es 77.82 " 212 | ] 213 | }, 214 | "execution_count": 87, 215 | "metadata": {}, 216 | "output_type": "execute_result" 217 | } 218 | ], 219 | "source": [ 220 | "ecom.head()" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "** How many rows and columns are there? **" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 88, 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "name": "stdout", 237 | "output_type": "stream", 238 | "text": [ 239 | "\n", 240 | "RangeIndex: 10000 entries, 0 to 9999\n", 241 | "Data columns (total 14 columns):\n", 242 | "Address 10000 non-null object\n", 243 | "Lot 10000 non-null object\n", 244 | "AM or PM 10000 non-null object\n", 245 | "Browser Info 10000 non-null object\n", 246 | "Company 10000 non-null object\n", 247 | "Credit Card 10000 non-null int64\n", 248 | "CC Exp Date 10000 non-null object\n", 249 | "CC Security Code 10000 non-null int64\n", 250 | "CC Provider 10000 non-null object\n", 251 | "Email 10000 non-null object\n", 252 | "Job 10000 non-null object\n", 253 | "IP Address 10000 non-null object\n", 254 | "Language 10000 non-null object\n", 255 | "Purchase Price 10000 non-null float64\n", 256 | "dtypes: float64(1), int64(2), object(11)\n", 257 | "memory usage: 1.1+ MB\n" 258 | ] 259 | } 260 | ], 261 | "source": [ 262 | "ecom.info()" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "** What is the average Purchase Price? **" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 90, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "data": { 279 | "text/plain": [ 280 | "50.34730200000025" 281 | ] 282 | }, 283 | "execution_count": 90, 284 | "metadata": {}, 285 | "output_type": "execute_result" 286 | } 287 | ], 288 | "source": [ 289 | "ecom['Purchase Price'].mean()" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "** What were the highest and lowest purchase prices? **" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 92, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "data": { 306 | "text/plain": [ 307 | "99.989999999999995" 308 | ] 309 | }, 310 | "execution_count": 92, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "ecom['Purchase Price'].max()" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 93, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "data": { 326 | "text/plain": [ 327 | "0.0" 328 | ] 329 | }, 330 | "execution_count": 93, 331 | "metadata": {}, 332 | "output_type": "execute_result" 333 | } 334 | ], 335 | "source": [ 336 | "ecom['Purchase Price'].min()" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "** How many people have English 'en' as their Language of choice on the website? **" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 94, 349 | "metadata": {}, 350 | "outputs": [ 351 | { 352 | "data": { 353 | "text/plain": [ 354 | "Address 1098\n", 355 | "Lot 1098\n", 356 | "AM or PM 1098\n", 357 | "Browser Info 1098\n", 358 | "Company 1098\n", 359 | "Credit Card 1098\n", 360 | "CC Exp Date 1098\n", 361 | "CC Security Code 1098\n", 362 | "CC Provider 1098\n", 363 | "Email 1098\n", 364 | "Job 1098\n", 365 | "IP Address 1098\n", 366 | "Language 1098\n", 367 | "Purchase Price 1098\n", 368 | "dtype: int64" 369 | ] 370 | }, 371 | "execution_count": 94, 372 | "metadata": {}, 373 | "output_type": "execute_result" 374 | } 375 | ], 376 | "source": [ 377 | "ecom[ecom['Language']=='en'].count()" 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": {}, 383 | "source": [ 384 | "** How many people have the job title of \"Lawyer\" ? **\n" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": 95, 390 | "metadata": {}, 391 | "outputs": [ 392 | { 393 | "name": "stdout", 394 | "output_type": "stream", 395 | "text": [ 396 | "\n", 397 | "Int64Index: 30 entries, 470 to 9979\n", 398 | "Data columns (total 14 columns):\n", 399 | "Address 30 non-null object\n", 400 | "Lot 30 non-null object\n", 401 | "AM or PM 30 non-null object\n", 402 | "Browser Info 30 non-null object\n", 403 | "Company 30 non-null object\n", 404 | "Credit Card 30 non-null int64\n", 405 | "CC Exp Date 30 non-null object\n", 406 | "CC Security Code 30 non-null int64\n", 407 | "CC Provider 30 non-null object\n", 408 | "Email 30 non-null object\n", 409 | "Job 30 non-null object\n", 410 | "IP Address 30 non-null object\n", 411 | "Language 30 non-null object\n", 412 | "Purchase Price 30 non-null float64\n", 413 | "dtypes: float64(1), int64(2), object(11)\n", 414 | "memory usage: 3.5+ KB\n" 415 | ] 416 | } 417 | ], 418 | "source": [ 419 | "ecom[ecom['Job'] == 'Lawyer'].info()" 420 | ] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": {}, 425 | "source": [ 426 | "** How many people made the purchase during the AM and how many people made the purchase during PM ? **\n", 427 | "\n", 428 | "**(Hint: Check out [value_counts()](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.value_counts.html) ) **" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 96, 434 | "metadata": {}, 435 | "outputs": [ 436 | { 437 | "data": { 438 | "text/plain": [ 439 | "PM 5068\n", 440 | "AM 4932\n", 441 | "Name: AM or PM, dtype: int64" 442 | ] 443 | }, 444 | "execution_count": 96, 445 | "metadata": {}, 446 | "output_type": "execute_result" 447 | } 448 | ], 449 | "source": [ 450 | "ecom['AM or PM'].value_counts()" 451 | ] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "metadata": {}, 456 | "source": [ 457 | "** What are the 5 most common Job Titles? **" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": 97, 463 | "metadata": {}, 464 | "outputs": [ 465 | { 466 | "data": { 467 | "text/plain": [ 468 | "Interior and spatial designer 31\n", 469 | "Lawyer 30\n", 470 | "Social researcher 28\n", 471 | "Purchasing manager 27\n", 472 | "Designer, jewellery 27\n", 473 | "Name: Job, dtype: int64" 474 | ] 475 | }, 476 | "execution_count": 97, 477 | "metadata": {}, 478 | "output_type": "execute_result" 479 | } 480 | ], 481 | "source": [ 482 | "ecom['Job'].value_counts().head(5)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "** Someone made a purchase that came from Lot: \"90 WT\" , what was the Purchase Price for this transaction? **" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 99, 495 | "metadata": {}, 496 | "outputs": [ 497 | { 498 | "data": { 499 | "text/plain": [ 500 | "513 75.1\n", 501 | "Name: Purchase Price, dtype: float64" 502 | ] 503 | }, 504 | "execution_count": 99, 505 | "metadata": {}, 506 | "output_type": "execute_result" 507 | } 508 | ], 509 | "source": [ 510 | "ecom[ecom['Lot']=='90 WT']['Purchase Price']" 511 | ] 512 | }, 513 | { 514 | "cell_type": "markdown", 515 | "metadata": {}, 516 | "source": [ 517 | "** What is the email of the person with the following Credit Card Number: 4926535242672853 **" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": 100, 523 | "metadata": {}, 524 | "outputs": [ 525 | { 526 | "data": { 527 | "text/plain": [ 528 | "1234 bondellen@williams-garza.com\n", 529 | "Name: Email, dtype: object" 530 | ] 531 | }, 532 | "execution_count": 100, 533 | "metadata": {}, 534 | "output_type": "execute_result" 535 | } 536 | ], 537 | "source": [ 538 | "ecom[ecom[\"Credit Card\"] == 4926535242672853]['Email'] " 539 | ] 540 | }, 541 | { 542 | "cell_type": "markdown", 543 | "metadata": {}, 544 | "source": [ 545 | "** How many people have American Express as their Credit Card Provider *and* made a purchase above $95 ?**" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 101, 551 | "metadata": {}, 552 | "outputs": [ 553 | { 554 | "data": { 555 | "text/plain": [ 556 | "Address 39\n", 557 | "Lot 39\n", 558 | "AM or PM 39\n", 559 | "Browser Info 39\n", 560 | "Company 39\n", 561 | "Credit Card 39\n", 562 | "CC Exp Date 39\n", 563 | "CC Security Code 39\n", 564 | "CC Provider 39\n", 565 | "Email 39\n", 566 | "Job 39\n", 567 | "IP Address 39\n", 568 | "Language 39\n", 569 | "Purchase Price 39\n", 570 | "dtype: int64" 571 | ] 572 | }, 573 | "execution_count": 101, 574 | "metadata": {}, 575 | "output_type": "execute_result" 576 | } 577 | ], 578 | "source": [ 579 | "ecom[(ecom['CC Provider']=='American Express') & (ecom['Purchase Price']>95)].count()" 580 | ] 581 | }, 582 | { 583 | "cell_type": "markdown", 584 | "metadata": {}, 585 | "source": [ 586 | "** Hard: How many people have a credit card that expires in 2025? **" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": 102, 592 | "metadata": {}, 593 | "outputs": [ 594 | { 595 | "data": { 596 | "text/plain": [ 597 | "1033" 598 | ] 599 | }, 600 | "execution_count": 102, 601 | "metadata": {}, 602 | "output_type": "execute_result" 603 | } 604 | ], 605 | "source": [ 606 | "sum(ecom['CC Exp Date'].apply(lambda x: x[3:]) == '25')" 607 | ] 608 | }, 609 | { 610 | "cell_type": "markdown", 611 | "metadata": {}, 612 | "source": [ 613 | "** Hard: What are the top 5 most popular email providers/hosts (e.g. gmail.com, yahoo.com, etc...) **" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": 56, 619 | "metadata": {}, 620 | "outputs": [ 621 | { 622 | "data": { 623 | "text/plain": [ 624 | "hotmail.com 1638\n", 625 | "yahoo.com 1616\n", 626 | "gmail.com 1605\n", 627 | "smith.com 42\n", 628 | "williams.com 37\n", 629 | "Name: Email, dtype: int64" 630 | ] 631 | }, 632 | "execution_count": 56, 633 | "metadata": {}, 634 | "output_type": "execute_result" 635 | } 636 | ], 637 | "source": [ 638 | "ecom['Email'].apply(lambda x: x.split('@')[1]).value_counts().head(5)" 639 | ] 640 | }, 641 | { 642 | "cell_type": "markdown", 643 | "metadata": {}, 644 | "source": [ 645 | "# Great Job!" 646 | ] 647 | } 648 | ], 649 | "metadata": { 650 | "kernelspec": { 651 | "display_name": "Python 3", 652 | "language": "python", 653 | "name": "python3" 654 | }, 655 | "language_info": { 656 | "codemirror_mode": { 657 | "name": "ipython", 658 | "version": 3 659 | }, 660 | "file_extension": ".py", 661 | "mimetype": "text/x-python", 662 | "name": "python", 663 | "nbconvert_exporter": "python", 664 | "pygments_lexer": "ipython3", 665 | "version": "3.7.6" 666 | } 667 | }, 668 | "nbformat": 4, 669 | "nbformat_minor": 1 670 | } 671 | -------------------------------------------------------------------------------- /04-Pandas-Exercises/02-SF Salaries Exercise - Solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# SF Salaries Exercise - Solutions\n", 8 | "\n", 9 | "Welcome to a quick exercise for you to practice your pandas skills! We will be using the [SF Salaries Dataset](https://www.kaggle.com/kaggle/sf-salaries) from Kaggle! Just follow along and complete the tasks outlined in bold below. The tasks will get harder and harder as you go along." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "** Import pandas as pd.**" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 6, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "import pandas as pd" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "** Read Salaries.csv as a dataframe called sal.**" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 7, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "sal = pd.read_csv('Salaries.csv')" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "** Check the head of the DataFrame. **" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 8, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/html": [ 61 | "
\n", 62 | "\n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
\n", 164 | "
" 165 | ], 166 | "text/plain": [ 167 | " Id EmployeeName JobTitle \\\n", 168 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 169 | "1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", 170 | "2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", 171 | "3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", 172 | "4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", 173 | "\n", 174 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 175 | "0 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n", 176 | "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", 177 | "2 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n", 178 | "3 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n", 179 | "4 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n", 180 | "\n", 181 | " Year Notes Agency Status \n", 182 | "0 2011 NaN San Francisco NaN \n", 183 | "1 2011 NaN San Francisco NaN \n", 184 | "2 2011 NaN San Francisco NaN \n", 185 | "3 2011 NaN San Francisco NaN \n", 186 | "4 2011 NaN San Francisco NaN " 187 | ] 188 | }, 189 | "execution_count": 8, 190 | "metadata": {}, 191 | "output_type": "execute_result" 192 | } 193 | ], 194 | "source": [ 195 | "sal.head()" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "** Use the .info() method to find out how many entries there are.**" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 9, 208 | "metadata": {}, 209 | "outputs": [ 210 | { 211 | "name": "stdout", 212 | "output_type": "stream", 213 | "text": [ 214 | "\n", 215 | "RangeIndex: 148654 entries, 0 to 148653\n", 216 | "Data columns (total 13 columns):\n", 217 | "Id 148654 non-null int64\n", 218 | "EmployeeName 148654 non-null object\n", 219 | "JobTitle 148654 non-null object\n", 220 | "BasePay 148045 non-null float64\n", 221 | "OvertimePay 148650 non-null float64\n", 222 | "OtherPay 148650 non-null float64\n", 223 | "Benefits 112491 non-null float64\n", 224 | "TotalPay 148654 non-null float64\n", 225 | "TotalPayBenefits 148654 non-null float64\n", 226 | "Year 148654 non-null int64\n", 227 | "Notes 0 non-null float64\n", 228 | "Agency 148654 non-null object\n", 229 | "Status 0 non-null float64\n", 230 | "dtypes: float64(8), int64(2), object(3)\n", 231 | "memory usage: 14.7+ MB\n" 232 | ] 233 | } 234 | ], 235 | "source": [ 236 | "sal.info() # 148654 Entries" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "**What is the average BasePay ?**" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 10, 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "data": { 253 | "text/plain": [ 254 | "66325.44884050643" 255 | ] 256 | }, 257 | "execution_count": 10, 258 | "metadata": {}, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [ 263 | "sal['BasePay'].mean()" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "** What is the highest amount of OvertimePay in the dataset ? **" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 11, 276 | "metadata": {}, 277 | "outputs": [ 278 | { 279 | "data": { 280 | "text/plain": [ 281 | "245131.88" 282 | ] 283 | }, 284 | "execution_count": 11, 285 | "metadata": {}, 286 | "output_type": "execute_result" 287 | } 288 | ], 289 | "source": [ 290 | "sal['OvertimePay'].max()" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "** What is the job title of JOSEPH DRISCOLL ? Note: Use all caps, otherwise you may get an answer that doesn't match up (there is also a lowercase Joseph Driscoll). **" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 12, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "data": { 307 | "text/plain": [ 308 | "24 CAPTAIN, FIRE SUPPRESSION\n", 309 | "Name: JobTitle, dtype: object" 310 | ] 311 | }, 312 | "execution_count": 12, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "sal[sal['EmployeeName']=='JOSEPH DRISCOLL']['JobTitle']" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "** How much does JOSEPH DRISCOLL make (including benefits)? **" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 13, 331 | "metadata": {}, 332 | "outputs": [ 333 | { 334 | "data": { 335 | "text/plain": [ 336 | "24 270324.91\n", 337 | "Name: TotalPayBenefits, dtype: float64" 338 | ] 339 | }, 340 | "execution_count": 13, 341 | "metadata": {}, 342 | "output_type": "execute_result" 343 | } 344 | ], 345 | "source": [ 346 | "sal[sal['EmployeeName']=='JOSEPH DRISCOLL']['TotalPayBenefits']" 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": [ 353 | "** What is the name of highest paid person (including benefits)?**" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 14, 359 | "metadata": {}, 360 | "outputs": [ 361 | { 362 | "data": { 363 | "text/html": [ 364 | "
\n", 365 | "\n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.0400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
\n", 403 | "
" 404 | ], 405 | "text/plain": [ 406 | " Id EmployeeName JobTitle \\\n", 407 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 408 | "\n", 409 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 410 | "0 167411.18 0.0 400184.25 NaN 567595.43 567595.43 \n", 411 | "\n", 412 | " Year Notes Agency Status \n", 413 | "0 2011 NaN San Francisco NaN " 414 | ] 415 | }, 416 | "execution_count": 14, 417 | "metadata": {}, 418 | "output_type": "execute_result" 419 | } 420 | ], 421 | "source": [ 422 | "sal[sal['TotalPayBenefits']== sal['TotalPayBenefits'].max()] #['EmployeeName']\n", 423 | "# or\n", 424 | "# sal.loc[sal['TotalPayBenefits'].idxmax()]" 425 | ] 426 | }, 427 | { 428 | "cell_type": "markdown", 429 | "metadata": {}, 430 | "source": [ 431 | "** What is the name of lowest paid person (including benefits)? Do you notice something strange about how much he or she is paid?**" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 15, 437 | "metadata": {}, 438 | "outputs": [ 439 | { 440 | "data": { 441 | "text/html": [ 442 | "
\n", 443 | "\n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
148653148654Joe LopezCounselor, Log Cabin Ranch0.00.0-618.130.0-618.13-618.132014NaNSan FranciscoNaN
\n", 481 | "
" 482 | ], 483 | "text/plain": [ 484 | " Id EmployeeName JobTitle BasePay OvertimePay \\\n", 485 | "148653 148654 Joe Lopez Counselor, Log Cabin Ranch 0.0 0.0 \n", 486 | "\n", 487 | " OtherPay Benefits TotalPay TotalPayBenefits Year Notes \\\n", 488 | "148653 -618.13 0.0 -618.13 -618.13 2014 NaN \n", 489 | "\n", 490 | " Agency Status \n", 491 | "148653 San Francisco NaN " 492 | ] 493 | }, 494 | "execution_count": 15, 495 | "metadata": {}, 496 | "output_type": "execute_result" 497 | } 498 | ], 499 | "source": [ 500 | "sal[sal['TotalPayBenefits']== sal['TotalPayBenefits'].min()] #['EmployeeName']\n", 501 | "# or\n", 502 | "# sal.loc[sal['TotalPayBenefits'].idxmax()]['EmployeeName']\n", 503 | "\n", 504 | "## ITS NEGATIVE!! VERY STRANGE" 505 | ] 506 | }, 507 | { 508 | "cell_type": "markdown", 509 | "metadata": {}, 510 | "source": [ 511 | "** What was the average (mean) BasePay of all employees per year? (2011-2014) ? **" 512 | ] 513 | }, 514 | { 515 | "cell_type": "code", 516 | "execution_count": 16, 517 | "metadata": {}, 518 | "outputs": [ 519 | { 520 | "data": { 521 | "text/plain": [ 522 | "Year\n", 523 | "2011 63595.956517\n", 524 | "2012 65436.406857\n", 525 | "2013 69630.030216\n", 526 | "2014 66564.421924\n", 527 | "Name: BasePay, dtype: float64" 528 | ] 529 | }, 530 | "execution_count": 16, 531 | "metadata": {}, 532 | "output_type": "execute_result" 533 | } 534 | ], 535 | "source": [ 536 | "sal.groupby('Year').mean()['BasePay']" 537 | ] 538 | }, 539 | { 540 | "cell_type": "markdown", 541 | "metadata": {}, 542 | "source": [ 543 | "** How many unique job titles are there? **" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": 17, 549 | "metadata": {}, 550 | "outputs": [ 551 | { 552 | "data": { 553 | "text/plain": [ 554 | "2159" 555 | ] 556 | }, 557 | "execution_count": 17, 558 | "metadata": {}, 559 | "output_type": "execute_result" 560 | } 561 | ], 562 | "source": [ 563 | "sal['JobTitle'].nunique()" 564 | ] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "** What are the top 5 most common jobs? **" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": 18, 576 | "metadata": {}, 577 | "outputs": [ 578 | { 579 | "data": { 580 | "text/plain": [ 581 | "Transit Operator 7036\n", 582 | "Special Nurse 4389\n", 583 | "Registered Nurse 3736\n", 584 | "Public Svc Aide-Public Works 2518\n", 585 | "Police Officer 3 2421\n", 586 | "Name: JobTitle, dtype: int64" 587 | ] 588 | }, 589 | "execution_count": 18, 590 | "metadata": {}, 591 | "output_type": "execute_result" 592 | } 593 | ], 594 | "source": [ 595 | "sal['JobTitle'].value_counts().head(5)" 596 | ] 597 | }, 598 | { 599 | "cell_type": "markdown", 600 | "metadata": {}, 601 | "source": [ 602 | "** How many Job Titles were represented by only one person in 2013? (e.g. Job Titles with only one occurence in 2013?) **" 603 | ] 604 | }, 605 | { 606 | "cell_type": "code", 607 | "execution_count": 19, 608 | "metadata": {}, 609 | "outputs": [ 610 | { 611 | "data": { 612 | "text/plain": [ 613 | "202" 614 | ] 615 | }, 616 | "execution_count": 19, 617 | "metadata": {}, 618 | "output_type": "execute_result" 619 | } 620 | ], 621 | "source": [ 622 | "sum(sal[sal['Year']==2013]['JobTitle'].value_counts() == 1) # pretty tricky way to do this..." 623 | ] 624 | }, 625 | { 626 | "cell_type": "markdown", 627 | "metadata": {}, 628 | "source": [ 629 | "** Bonus: Is there a correlation between length of the Job Title string and Salary? **" 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": 22, 635 | "metadata": {}, 636 | "outputs": [], 637 | "source": [ 638 | "sal['title_len'] = sal['JobTitle'].apply(len)" 639 | ] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "execution_count": 23, 644 | "metadata": {}, 645 | "outputs": [ 646 | { 647 | "data": { 648 | "text/html": [ 649 | "
\n", 650 | "\n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | "
title_lenTotalPayBenefits
title_len1.000000-0.036878
TotalPayBenefits-0.0368781.000000
\n", 671 | "
" 672 | ], 673 | "text/plain": [ 674 | " title_len TotalPayBenefits\n", 675 | "title_len 1.000000 -0.036878\n", 676 | "TotalPayBenefits -0.036878 1.000000" 677 | ] 678 | }, 679 | "execution_count": 23, 680 | "metadata": {}, 681 | "output_type": "execute_result" 682 | } 683 | ], 684 | "source": [ 685 | "sal[['title_len','TotalPayBenefits']].corr() # No correlation." 686 | ] 687 | }, 688 | { 689 | "cell_type": "markdown", 690 | "metadata": {}, 691 | "source": [ 692 | "# Great Job!" 693 | ] 694 | } 695 | ], 696 | "metadata": { 697 | "kernelspec": { 698 | "display_name": "Python 3", 699 | "language": "python", 700 | "name": "python3" 701 | }, 702 | "language_info": { 703 | "codemirror_mode": { 704 | "name": "ipython", 705 | "version": 3 706 | }, 707 | "file_extension": ".py", 708 | "mimetype": "text/x-python", 709 | "name": "python", 710 | "nbconvert_exporter": "python", 711 | "pygments_lexer": "ipython3", 712 | "version": "3.7.6" 713 | } 714 | }, 715 | "nbformat": 4, 716 | "nbformat_minor": 1 717 | } 718 | -------------------------------------------------------------------------------- /02-Python-for-Data-Analysis-NumPy/01-NumPy Arrays.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NumPy \n", 8 | "\n", 9 | "NumPy (or Numpy) is a Linear Algebra Library for Python, the reason it is so important for Data Science with Python is that almost all of the libraries in the PyData Ecosystem rely on NumPy as one of their main building blocks.\n", 10 | "\n", 11 | "Numpy is also incredibly fast, as it has bindings to C libraries. For more info on why you would want to use Arrays instead of lists, check out this great [StackOverflow post](http://stackoverflow.com/questions/993984/why-numpy-instead-of-python-lists).\n", 12 | "\n", 13 | "We will only learn the basics of NumPy, to get started we need to install it!" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "## Installation Instructions\n", 21 | "\n", 22 | "**It is highly recommended you install Python using the Anaconda distribution to make sure all underlying dependencies (such as Linear Algebra libraries) all sync up with the use of a conda install. If you have Anaconda, install NumPy by going to your terminal or command prompt and typing:**\n", 23 | " \n", 24 | " conda install numpy\n", 25 | " \n", 26 | "**If you do not have Anaconda and can not install it, please refer to [Numpy's official documentation on various installation instructions.](http://docs.scipy.org/doc/numpy-1.10.1/user/install.html)**" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "## Using NumPy\n", 34 | "\n", 35 | "Once you've installed NumPy you can import it as a library:" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 1, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "import numpy as np" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "Numpy has many built-in functions and capabilities. We won't cover them all but instead we will focus on some of the most important aspects of Numpy: vectors,arrays,matrices, and number generation. Let's start by discussing arrays.\n", 54 | "\n", 55 | "# Numpy Arrays\n", 56 | "\n", 57 | "NumPy arrays are the main way we will use Numpy throughout the course. Numpy arrays essentially come in two flavors: vectors and matrices. Vectors are strictly 1-d arrays and matrices are 2-d (but you should note a matrix can still have only one row or one column).\n", 58 | "\n", 59 | "Let's begin our introduction by exploring how to create NumPy arrays.\n", 60 | "\n", 61 | "## Creating NumPy Arrays\n", 62 | "\n", 63 | "### From a Python List\n", 64 | "\n", 65 | "We can create an array by directly converting a list or list of lists:" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 19, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/plain": [ 76 | "[1, 2, 3]" 77 | ] 78 | }, 79 | "execution_count": 19, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": [ 85 | "my_list = [1,2,3]\n", 86 | "my_list" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 16, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/plain": [ 97 | "array([1, 2, 3])" 98 | ] 99 | }, 100 | "execution_count": 16, 101 | "metadata": {}, 102 | "output_type": "execute_result" 103 | } 104 | ], 105 | "source": [ 106 | "np.array(my_list)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 20, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "data": { 116 | "text/plain": [ 117 | "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]" 118 | ] 119 | }, 120 | "execution_count": 20, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 21, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "array([[1, 2, 3],\n", 136 | " [4, 5, 6],\n", 137 | " [7, 8, 9]])" 138 | ] 139 | }, 140 | "execution_count": 21, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "np.array(my_matrix)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "## Built-in Methods\n", 154 | "\n", 155 | "There are lots of built-in ways to generate Arrays" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "### arange\n", 163 | "\n", 164 | "Return evenly spaced values within a given interval." 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 22, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "data": { 174 | "text/plain": [ 175 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])" 176 | ] 177 | }, 178 | "execution_count": 22, 179 | "metadata": {}, 180 | "output_type": "execute_result" 181 | } 182 | ], 183 | "source": [ 184 | "np.arange(0,10)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 23, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "data": { 194 | "text/plain": [ 195 | "array([ 0, 2, 4, 6, 8, 10])" 196 | ] 197 | }, 198 | "execution_count": 23, 199 | "metadata": {}, 200 | "output_type": "execute_result" 201 | } 202 | ], 203 | "source": [ 204 | "np.arange(0,11,2)" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "### zeros and ones\n", 212 | "\n", 213 | "Generate arrays of zeros or ones" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 24, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "array([ 0., 0., 0.])" 225 | ] 226 | }, 227 | "execution_count": 24, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "np.zeros(3)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 26, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "data": { 243 | "text/plain": [ 244 | "array([[ 0., 0., 0., 0., 0.],\n", 245 | " [ 0., 0., 0., 0., 0.],\n", 246 | " [ 0., 0., 0., 0., 0.],\n", 247 | " [ 0., 0., 0., 0., 0.],\n", 248 | " [ 0., 0., 0., 0., 0.]])" 249 | ] 250 | }, 251 | "execution_count": 26, 252 | "metadata": {}, 253 | "output_type": "execute_result" 254 | } 255 | ], 256 | "source": [ 257 | "np.zeros((5,5))" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 27, 263 | "metadata": {}, 264 | "outputs": [ 265 | { 266 | "data": { 267 | "text/plain": [ 268 | "array([ 1., 1., 1.])" 269 | ] 270 | }, 271 | "execution_count": 27, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [ 277 | "np.ones(3)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 28, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "data": { 287 | "text/plain": [ 288 | "array([[ 1., 1., 1.],\n", 289 | " [ 1., 1., 1.],\n", 290 | " [ 1., 1., 1.]])" 291 | ] 292 | }, 293 | "execution_count": 28, 294 | "metadata": {}, 295 | "output_type": "execute_result" 296 | } 297 | ], 298 | "source": [ 299 | "np.ones((3,3))" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "### linspace\n", 307 | "Return evenly spaced numbers over a specified interval." 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 29, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/plain": [ 318 | "array([ 0., 5., 10.])" 319 | ] 320 | }, 321 | "execution_count": 29, 322 | "metadata": {}, 323 | "output_type": "execute_result" 324 | } 325 | ], 326 | "source": [ 327 | "np.linspace(0,10,3)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 31, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "data": { 337 | "text/plain": [ 338 | "array([ 0. , 0.20408163, 0.40816327, 0.6122449 ,\n", 339 | " 0.81632653, 1.02040816, 1.2244898 , 1.42857143,\n", 340 | " 1.63265306, 1.83673469, 2.04081633, 2.24489796,\n", 341 | " 2.44897959, 2.65306122, 2.85714286, 3.06122449,\n", 342 | " 3.26530612, 3.46938776, 3.67346939, 3.87755102,\n", 343 | " 4.08163265, 4.28571429, 4.48979592, 4.69387755,\n", 344 | " 4.89795918, 5.10204082, 5.30612245, 5.51020408,\n", 345 | " 5.71428571, 5.91836735, 6.12244898, 6.32653061,\n", 346 | " 6.53061224, 6.73469388, 6.93877551, 7.14285714,\n", 347 | " 7.34693878, 7.55102041, 7.75510204, 7.95918367,\n", 348 | " 8.16326531, 8.36734694, 8.57142857, 8.7755102 ,\n", 349 | " 8.97959184, 9.18367347, 9.3877551 , 9.59183673,\n", 350 | " 9.79591837, 10. ])" 351 | ] 352 | }, 353 | "execution_count": 31, 354 | "metadata": {}, 355 | "output_type": "execute_result" 356 | } 357 | ], 358 | "source": [ 359 | "np.linspace(0,10,50)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": {}, 365 | "source": [ 366 | "## eye\n", 367 | "\n", 368 | "Creates an identity matrix" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 37, 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "data": { 378 | "text/plain": [ 379 | "array([[ 1., 0., 0., 0.],\n", 380 | " [ 0., 1., 0., 0.],\n", 381 | " [ 0., 0., 1., 0.],\n", 382 | " [ 0., 0., 0., 1.]])" 383 | ] 384 | }, 385 | "execution_count": 37, 386 | "metadata": {}, 387 | "output_type": "execute_result" 388 | } 389 | ], 390 | "source": [ 391 | "np.eye(4)" 392 | ] 393 | }, 394 | { 395 | "cell_type": "markdown", 396 | "metadata": {}, 397 | "source": [ 398 | "## Random \n", 399 | "\n", 400 | "Numpy also has lots of ways to create random number arrays:\n", 401 | "\n", 402 | "### rand\n", 403 | "Create an array of the given shape and populate it with\n", 404 | "random samples from a uniform distribution\n", 405 | "over ``[0, 1)``." 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 47, 411 | "metadata": {}, 412 | "outputs": [ 413 | { 414 | "data": { 415 | "text/plain": [ 416 | "array([ 0.11570539, 0.35279769])" 417 | ] 418 | }, 419 | "execution_count": 47, 420 | "metadata": {}, 421 | "output_type": "execute_result" 422 | } 423 | ], 424 | "source": [ 425 | "np.random.rand(2)" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": 46, 431 | "metadata": {}, 432 | "outputs": [ 433 | { 434 | "data": { 435 | "text/plain": [ 436 | "array([[ 0.66660768, 0.87589888, 0.12421056, 0.65074126, 0.60260888],\n", 437 | " [ 0.70027668, 0.85572434, 0.8464595 , 0.2735416 , 0.10955384],\n", 438 | " [ 0.0670566 , 0.83267738, 0.9082729 , 0.58249129, 0.12305748],\n", 439 | " [ 0.27948423, 0.66422017, 0.95639833, 0.34238788, 0.9578872 ],\n", 440 | " [ 0.72155386, 0.3035422 , 0.85249683, 0.30414307, 0.79718816]])" 441 | ] 442 | }, 443 | "execution_count": 46, 444 | "metadata": {}, 445 | "output_type": "execute_result" 446 | } 447 | ], 448 | "source": [ 449 | "np.random.rand(5,5)" 450 | ] 451 | }, 452 | { 453 | "cell_type": "markdown", 454 | "metadata": {}, 455 | "source": [ 456 | "### randn\n", 457 | "\n", 458 | "Return a sample (or samples) from the \"standard normal\" distribution. Unlike rand which is uniform:" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 48, 464 | "metadata": {}, 465 | "outputs": [ 466 | { 467 | "data": { 468 | "text/plain": [ 469 | "array([-0.27954018, 0.90078368])" 470 | ] 471 | }, 472 | "execution_count": 48, 473 | "metadata": {}, 474 | "output_type": "execute_result" 475 | } 476 | ], 477 | "source": [ 478 | "np.random.randn(2)" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 45, 484 | "metadata": {}, 485 | "outputs": [ 486 | { 487 | "data": { 488 | "text/plain": [ 489 | "array([[ 0.70154515, 0.22441999, 1.33563186, 0.82872577, -0.28247509],\n", 490 | " [ 0.64489788, 0.61815094, -0.81693168, -0.30102424, -0.29030574],\n", 491 | " [ 0.8695976 , 0.413755 , 2.20047208, 0.17955692, -0.82159344],\n", 492 | " [ 0.59264235, 1.29869894, -1.18870241, 0.11590888, -0.09181687],\n", 493 | " [-0.96924265, -1.62888685, -2.05787102, -0.29705576, 0.68915542]])" 494 | ] 495 | }, 496 | "execution_count": 45, 497 | "metadata": {}, 498 | "output_type": "execute_result" 499 | } 500 | ], 501 | "source": [ 502 | "np.random.randn(5,5)" 503 | ] 504 | }, 505 | { 506 | "cell_type": "markdown", 507 | "metadata": {}, 508 | "source": [ 509 | "### randint\n", 510 | "Return random integers from `low` (inclusive) to `high` (exclusive)." 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 50, 516 | "metadata": {}, 517 | "outputs": [ 518 | { 519 | "data": { 520 | "text/plain": [ 521 | "44" 522 | ] 523 | }, 524 | "execution_count": 50, 525 | "metadata": {}, 526 | "output_type": "execute_result" 527 | } 528 | ], 529 | "source": [ 530 | "np.random.randint(1,100)" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": 51, 536 | "metadata": {}, 537 | "outputs": [ 538 | { 539 | "data": { 540 | "text/plain": [ 541 | "array([13, 64, 27, 63, 46, 68, 92, 10, 58, 24])" 542 | ] 543 | }, 544 | "execution_count": 51, 545 | "metadata": {}, 546 | "output_type": "execute_result" 547 | } 548 | ], 549 | "source": [ 550 | "np.random.randint(1,100,10)" 551 | ] 552 | }, 553 | { 554 | "cell_type": "markdown", 555 | "metadata": {}, 556 | "source": [ 557 | "## Array Attributes and Methods\n", 558 | "\n", 559 | "Let's discuss some useful attributes and methods or an array:" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": 55, 565 | "metadata": { 566 | "collapsed": true 567 | }, 568 | "outputs": [], 569 | "source": [ 570 | "arr = np.arange(25)\n", 571 | "ranarr = np.random.randint(0,50,10)" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": 56, 577 | "metadata": {}, 578 | "outputs": [ 579 | { 580 | "data": { 581 | "text/plain": [ 582 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", 583 | " 17, 18, 19, 20, 21, 22, 23, 24])" 584 | ] 585 | }, 586 | "execution_count": 56, 587 | "metadata": {}, 588 | "output_type": "execute_result" 589 | } 590 | ], 591 | "source": [ 592 | "arr" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": 57, 598 | "metadata": {}, 599 | "outputs": [ 600 | { 601 | "data": { 602 | "text/plain": [ 603 | "array([10, 12, 41, 17, 49, 2, 46, 3, 19, 39])" 604 | ] 605 | }, 606 | "execution_count": 57, 607 | "metadata": {}, 608 | "output_type": "execute_result" 609 | } 610 | ], 611 | "source": [ 612 | "ranarr" 613 | ] 614 | }, 615 | { 616 | "cell_type": "markdown", 617 | "metadata": {}, 618 | "source": [ 619 | "## Reshape\n", 620 | "Returns an array containing the same data with a new shape." 621 | ] 622 | }, 623 | { 624 | "cell_type": "code", 625 | "execution_count": 54, 626 | "metadata": {}, 627 | "outputs": [ 628 | { 629 | "data": { 630 | "text/plain": [ 631 | "array([[ 0, 1, 2, 3, 4],\n", 632 | " [ 5, 6, 7, 8, 9],\n", 633 | " [10, 11, 12, 13, 14],\n", 634 | " [15, 16, 17, 18, 19],\n", 635 | " [20, 21, 22, 23, 24]])" 636 | ] 637 | }, 638 | "execution_count": 54, 639 | "metadata": {}, 640 | "output_type": "execute_result" 641 | } 642 | ], 643 | "source": [ 644 | "arr.reshape(5,5)" 645 | ] 646 | }, 647 | { 648 | "cell_type": "markdown", 649 | "metadata": {}, 650 | "source": [ 651 | "### max,min,argmax,argmin\n", 652 | "\n", 653 | "These are useful methods for finding max or min values. Or to find their index locations using argmin or argmax" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": 64, 659 | "metadata": {}, 660 | "outputs": [ 661 | { 662 | "data": { 663 | "text/plain": [ 664 | "array([10, 12, 41, 17, 49, 2, 46, 3, 19, 39])" 665 | ] 666 | }, 667 | "execution_count": 64, 668 | "metadata": {}, 669 | "output_type": "execute_result" 670 | } 671 | ], 672 | "source": [ 673 | "ranarr" 674 | ] 675 | }, 676 | { 677 | "cell_type": "code", 678 | "execution_count": 61, 679 | "metadata": {}, 680 | "outputs": [ 681 | { 682 | "data": { 683 | "text/plain": [ 684 | "49" 685 | ] 686 | }, 687 | "execution_count": 61, 688 | "metadata": {}, 689 | "output_type": "execute_result" 690 | } 691 | ], 692 | "source": [ 693 | "ranarr.max()" 694 | ] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": 62, 699 | "metadata": {}, 700 | "outputs": [ 701 | { 702 | "data": { 703 | "text/plain": [ 704 | "4" 705 | ] 706 | }, 707 | "execution_count": 62, 708 | "metadata": {}, 709 | "output_type": "execute_result" 710 | } 711 | ], 712 | "source": [ 713 | "ranarr.argmax()" 714 | ] 715 | }, 716 | { 717 | "cell_type": "code", 718 | "execution_count": 63, 719 | "metadata": {}, 720 | "outputs": [ 721 | { 722 | "data": { 723 | "text/plain": [ 724 | "2" 725 | ] 726 | }, 727 | "execution_count": 63, 728 | "metadata": {}, 729 | "output_type": "execute_result" 730 | } 731 | ], 732 | "source": [ 733 | "ranarr.min()" 734 | ] 735 | }, 736 | { 737 | "cell_type": "code", 738 | "execution_count": 60, 739 | "metadata": {}, 740 | "outputs": [ 741 | { 742 | "data": { 743 | "text/plain": [ 744 | "5" 745 | ] 746 | }, 747 | "execution_count": 60, 748 | "metadata": {}, 749 | "output_type": "execute_result" 750 | } 751 | ], 752 | "source": [ 753 | "ranarr.argmin()" 754 | ] 755 | }, 756 | { 757 | "cell_type": "markdown", 758 | "metadata": {}, 759 | "source": [ 760 | "## Shape\n", 761 | "\n", 762 | "Shape is an attribute that arrays have (not a method):" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": 65, 768 | "metadata": {}, 769 | "outputs": [ 770 | { 771 | "data": { 772 | "text/plain": [ 773 | "(25,)" 774 | ] 775 | }, 776 | "execution_count": 65, 777 | "metadata": {}, 778 | "output_type": "execute_result" 779 | } 780 | ], 781 | "source": [ 782 | "# Vector\n", 783 | "arr.shape" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": 66, 789 | "metadata": {}, 790 | "outputs": [ 791 | { 792 | "data": { 793 | "text/plain": [ 794 | "array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", 795 | " 17, 18, 19, 20, 21, 22, 23, 24]])" 796 | ] 797 | }, 798 | "execution_count": 66, 799 | "metadata": {}, 800 | "output_type": "execute_result" 801 | } 802 | ], 803 | "source": [ 804 | "# Notice the two sets of brackets\n", 805 | "arr.reshape(1,25)" 806 | ] 807 | }, 808 | { 809 | "cell_type": "code", 810 | "execution_count": 69, 811 | "metadata": {}, 812 | "outputs": [ 813 | { 814 | "data": { 815 | "text/plain": [ 816 | "(1, 25)" 817 | ] 818 | }, 819 | "execution_count": 69, 820 | "metadata": {}, 821 | "output_type": "execute_result" 822 | } 823 | ], 824 | "source": [ 825 | "arr.reshape(1,25).shape" 826 | ] 827 | }, 828 | { 829 | "cell_type": "code", 830 | "execution_count": 70, 831 | "metadata": {}, 832 | "outputs": [ 833 | { 834 | "data": { 835 | "text/plain": [ 836 | "array([[ 0],\n", 837 | " [ 1],\n", 838 | " [ 2],\n", 839 | " [ 3],\n", 840 | " [ 4],\n", 841 | " [ 5],\n", 842 | " [ 6],\n", 843 | " [ 7],\n", 844 | " [ 8],\n", 845 | " [ 9],\n", 846 | " [10],\n", 847 | " [11],\n", 848 | " [12],\n", 849 | " [13],\n", 850 | " [14],\n", 851 | " [15],\n", 852 | " [16],\n", 853 | " [17],\n", 854 | " [18],\n", 855 | " [19],\n", 856 | " [20],\n", 857 | " [21],\n", 858 | " [22],\n", 859 | " [23],\n", 860 | " [24]])" 861 | ] 862 | }, 863 | "execution_count": 70, 864 | "metadata": {}, 865 | "output_type": "execute_result" 866 | } 867 | ], 868 | "source": [ 869 | "arr.reshape(25,1)" 870 | ] 871 | }, 872 | { 873 | "cell_type": "code", 874 | "execution_count": 76, 875 | "metadata": {}, 876 | "outputs": [ 877 | { 878 | "data": { 879 | "text/plain": [ 880 | "(25, 1)" 881 | ] 882 | }, 883 | "execution_count": 76, 884 | "metadata": {}, 885 | "output_type": "execute_result" 886 | } 887 | ], 888 | "source": [ 889 | "arr.reshape(25,1).shape" 890 | ] 891 | }, 892 | { 893 | "cell_type": "markdown", 894 | "metadata": {}, 895 | "source": [ 896 | "### dtype\n", 897 | "\n", 898 | "You can also grab the data type of the object in the array:" 899 | ] 900 | }, 901 | { 902 | "cell_type": "code", 903 | "execution_count": 75, 904 | "metadata": {}, 905 | "outputs": [ 906 | { 907 | "data": { 908 | "text/plain": [ 909 | "dtype('int64')" 910 | ] 911 | }, 912 | "execution_count": 75, 913 | "metadata": {}, 914 | "output_type": "execute_result" 915 | } 916 | ], 917 | "source": [ 918 | "arr.dtype" 919 | ] 920 | }, 921 | { 922 | "cell_type": "markdown", 923 | "metadata": {}, 924 | "source": [ 925 | "# Great Job!" 926 | ] 927 | } 928 | ], 929 | "metadata": { 930 | "kernelspec": { 931 | "display_name": "Python 3", 932 | "language": "python", 933 | "name": "python3" 934 | }, 935 | "language_info": { 936 | "codemirror_mode": { 937 | "name": "ipython", 938 | "version": 3 939 | }, 940 | "file_extension": ".py", 941 | "mimetype": "text/x-python", 942 | "name": "python", 943 | "nbconvert_exporter": "python", 944 | "pygments_lexer": "ipython3", 945 | "version": "3.7.6" 946 | } 947 | }, 948 | "nbformat": 4, 949 | "nbformat_minor": 1 950 | } 951 | -------------------------------------------------------------------------------- /03-Python-for-Data-Analysis-Pandas/05-Groupby.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# GroupBy" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 31, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "# Create dataframe\n", 20 | "data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],\n", 21 | " 'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],\n", 22 | " 'Sales':[200,120,340,124,243,350]}" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 32, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "df = pd.DataFrame(data)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 33, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/html": [ 42 | "
\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | "
CompanyPersonSales
0GOOGSam200
1GOOGCharlie120
2MSFTAmy340
3MSFTVanessa124
4FBCarl243
5FBSarah350
\n", 91 | "
" 92 | ], 93 | "text/plain": [ 94 | " Company Person Sales\n", 95 | "0 GOOG Sam 200\n", 96 | "1 GOOG Charlie 120\n", 97 | "2 MSFT Amy 340\n", 98 | "3 MSFT Vanessa 124\n", 99 | "4 FB Carl 243\n", 100 | "5 FB Sarah 350" 101 | ] 102 | }, 103 | "execution_count": 33, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "df" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "** Now you can use the .groupby() method to group rows together based off of a column name. For instance let's group based off of Company. This will create a DataFrameGroupBy object:**" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 34, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "" 128 | ] 129 | }, 130 | "execution_count": 34, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "df.groupby('Company')" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "You can save this object as a new variable:" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 35, 149 | "metadata": { 150 | "collapsed": true 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "by_comp = df.groupby(\"Company\")" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "And then call aggregate methods off the object:" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 36, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/html": [ 172 | "
\n", 173 | "\n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | "
Sales
Company
FB296.5
GOOG160.0
MSFT232.0
\n", 199 | "
" 200 | ], 201 | "text/plain": [ 202 | " Sales\n", 203 | "Company \n", 204 | "FB 296.5\n", 205 | "GOOG 160.0\n", 206 | "MSFT 232.0" 207 | ] 208 | }, 209 | "execution_count": 36, 210 | "metadata": {}, 211 | "output_type": "execute_result" 212 | } 213 | ], 214 | "source": [ 215 | "by_comp.mean()" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 37, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "data": { 225 | "text/html": [ 226 | "
\n", 227 | "\n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | "
Sales
Company
FB296.5
GOOG160.0
MSFT232.0
\n", 253 | "
" 254 | ], 255 | "text/plain": [ 256 | " Sales\n", 257 | "Company \n", 258 | "FB 296.5\n", 259 | "GOOG 160.0\n", 260 | "MSFT 232.0" 261 | ] 262 | }, 263 | "execution_count": 37, 264 | "metadata": {}, 265 | "output_type": "execute_result" 266 | } 267 | ], 268 | "source": [ 269 | "df.groupby('Company').mean()" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "More examples of aggregate methods:" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 38, 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "data": { 286 | "text/html": [ 287 | "
\n", 288 | "\n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | "
Sales
Company
FB75.660426
GOOG56.568542
MSFT152.735065
\n", 314 | "
" 315 | ], 316 | "text/plain": [ 317 | " Sales\n", 318 | "Company \n", 319 | "FB 75.660426\n", 320 | "GOOG 56.568542\n", 321 | "MSFT 152.735065" 322 | ] 323 | }, 324 | "execution_count": 38, 325 | "metadata": {}, 326 | "output_type": "execute_result" 327 | } 328 | ], 329 | "source": [ 330 | "by_comp.std()" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 39, 336 | "metadata": {}, 337 | "outputs": [ 338 | { 339 | "data": { 340 | "text/html": [ 341 | "
\n", 342 | "\n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | "
PersonSales
Company
FBCarl243
GOOGCharlie120
MSFTAmy124
\n", 373 | "
" 374 | ], 375 | "text/plain": [ 376 | " Person Sales\n", 377 | "Company \n", 378 | "FB Carl 243\n", 379 | "GOOG Charlie 120\n", 380 | "MSFT Amy 124" 381 | ] 382 | }, 383 | "execution_count": 39, 384 | "metadata": {}, 385 | "output_type": "execute_result" 386 | } 387 | ], 388 | "source": [ 389 | "by_comp.min()" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": 40, 395 | "metadata": {}, 396 | "outputs": [ 397 | { 398 | "data": { 399 | "text/html": [ 400 | "
\n", 401 | "\n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | "
PersonSales
Company
FBSarah350
GOOGSam200
MSFTVanessa340
\n", 432 | "
" 433 | ], 434 | "text/plain": [ 435 | " Person Sales\n", 436 | "Company \n", 437 | "FB Sarah 350\n", 438 | "GOOG Sam 200\n", 439 | "MSFT Vanessa 340" 440 | ] 441 | }, 442 | "execution_count": 40, 443 | "metadata": {}, 444 | "output_type": "execute_result" 445 | } 446 | ], 447 | "source": [ 448 | "by_comp.max()" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 41, 454 | "metadata": {}, 455 | "outputs": [ 456 | { 457 | "data": { 458 | "text/html": [ 459 | "
\n", 460 | "\n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | "
PersonSales
Company
FB22
GOOG22
MSFT22
\n", 491 | "
" 492 | ], 493 | "text/plain": [ 494 | " Person Sales\n", 495 | "Company \n", 496 | "FB 2 2\n", 497 | "GOOG 2 2\n", 498 | "MSFT 2 2" 499 | ] 500 | }, 501 | "execution_count": 41, 502 | "metadata": {}, 503 | "output_type": "execute_result" 504 | } 505 | ], 506 | "source": [ 507 | "by_comp.count()" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": 42, 513 | "metadata": {}, 514 | "outputs": [ 515 | { 516 | "data": { 517 | "text/html": [ 518 | "
\n", 519 | "\n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | "
Sales
Company
FBcount2.000000
mean296.500000
std75.660426
min243.000000
25%269.750000
50%296.500000
75%323.250000
max350.000000
GOOGcount2.000000
mean160.000000
std56.568542
min120.000000
25%140.000000
50%160.000000
75%180.000000
max200.000000
MSFTcount2.000000
mean232.000000
std152.735065
min124.000000
25%178.000000
50%232.000000
75%286.000000
max340.000000
\n", 634 | "
" 635 | ], 636 | "text/plain": [ 637 | " Sales\n", 638 | "Company \n", 639 | "FB count 2.000000\n", 640 | " mean 296.500000\n", 641 | " std 75.660426\n", 642 | " min 243.000000\n", 643 | " 25% 269.750000\n", 644 | " 50% 296.500000\n", 645 | " 75% 323.250000\n", 646 | " max 350.000000\n", 647 | "GOOG count 2.000000\n", 648 | " mean 160.000000\n", 649 | " std 56.568542\n", 650 | " min 120.000000\n", 651 | " 25% 140.000000\n", 652 | " 50% 160.000000\n", 653 | " 75% 180.000000\n", 654 | " max 200.000000\n", 655 | "MSFT count 2.000000\n", 656 | " mean 232.000000\n", 657 | " std 152.735065\n", 658 | " min 124.000000\n", 659 | " 25% 178.000000\n", 660 | " 50% 232.000000\n", 661 | " 75% 286.000000\n", 662 | " max 340.000000" 663 | ] 664 | }, 665 | "execution_count": 42, 666 | "metadata": {}, 667 | "output_type": "execute_result" 668 | } 669 | ], 670 | "source": [ 671 | "by_comp.describe()" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": 43, 677 | "metadata": {}, 678 | "outputs": [ 679 | { 680 | "data": { 681 | "text/html": [ 682 | "
\n", 683 | "\n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | "
CompanyFBGOOGMSFT
countmeanstdmin25%50%75%maxcountmean...75%maxcountmeanstdmin25%50%75%max
Sales2.0296.575.660426243.0269.75296.5323.25350.02.0160.0...180.0200.02.0232.0152.735065124.0178.0232.0286.0340.0
\n", 743 | "

1 rows × 24 columns

\n", 744 | "
" 745 | ], 746 | "text/plain": [ 747 | "Company FB GOOG \\\n", 748 | " count mean std min 25% 50% 75% max count \n", 749 | "Sales 2.0 296.5 75.660426 243.0 269.75 296.5 323.25 350.0 2.0 \n", 750 | "\n", 751 | "Company ... MSFT \\\n", 752 | " mean ... 75% max count mean std min 25% \n", 753 | "Sales 160.0 ... 180.0 200.0 2.0 232.0 152.735065 124.0 178.0 \n", 754 | "\n", 755 | "Company \n", 756 | " 50% 75% max \n", 757 | "Sales 232.0 286.0 340.0 \n", 758 | "\n", 759 | "[1 rows x 24 columns]" 760 | ] 761 | }, 762 | "execution_count": 43, 763 | "metadata": {}, 764 | "output_type": "execute_result" 765 | } 766 | ], 767 | "source": [ 768 | "by_comp.describe().transpose()" 769 | ] 770 | }, 771 | { 772 | "cell_type": "code", 773 | "execution_count": 44, 774 | "metadata": {}, 775 | "outputs": [ 776 | { 777 | "data": { 778 | "text/html": [ 779 | "
\n", 780 | "\n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | "
countmeanstdmin25%50%75%max
Sales2.0160.056.568542120.0140.0160.0180.0200.0
\n", 808 | "
" 809 | ], 810 | "text/plain": [ 811 | " count mean std min 25% 50% 75% max\n", 812 | "Sales 2.0 160.0 56.568542 120.0 140.0 160.0 180.0 200.0" 813 | ] 814 | }, 815 | "execution_count": 44, 816 | "metadata": {}, 817 | "output_type": "execute_result" 818 | } 819 | ], 820 | "source": [ 821 | "by_comp.describe().transpose()['GOOG']" 822 | ] 823 | }, 824 | { 825 | "cell_type": "markdown", 826 | "metadata": {}, 827 | "source": [ 828 | "# Great Job!" 829 | ] 830 | } 831 | ], 832 | "metadata": { 833 | "kernelspec": { 834 | "display_name": "Python 3", 835 | "language": "python", 836 | "name": "python3" 837 | }, 838 | "language_info": { 839 | "codemirror_mode": { 840 | "name": "ipython", 841 | "version": 3 842 | }, 843 | "file_extension": ".py", 844 | "mimetype": "text/x-python", 845 | "name": "python", 846 | "nbconvert_exporter": "python", 847 | "pygments_lexer": "ipython3", 848 | "version": "3.7.6" 849 | } 850 | }, 851 | "nbformat": 4, 852 | "nbformat_minor": 1 853 | } 854 | --------------------------------------------------------------------------------