├── .DS_Store ├── .ipynb_checkpoints ├── Matplotlib Visualization-checkpoint.ipynb ├── Numpy - Indexing and Selection-checkpoint.ipynb ├── Numpy_Arrays-checkpoint.ipynb └── Pandas-checkpoint.ipynb ├── 04-Numpy Exercises.ipynb ├── 04-Pandas-Exercises ├── .DS_Store ├── .ipynb_checkpoints │ ├── 01-SF Salaries Exercise-checkpoint.ipynb │ ├── 02-SF Salaries Exercise - Solutions-checkpoint.ipynb │ ├── 03-Ecommerce Purchases Exercise -checkpoint.ipynb │ └── 04-Ecommerce Purchases Exercise - Solutions-checkpoint.ipynb ├── 01-SF Salaries Exercise.ipynb ├── 03-Ecommerce Purchases Exercise .ipynb ├── Ecommerce Purchases └── Salaries.csv ├── Matplotlib Visualization.ipynb ├── Numpy - Indexing and Selection.ipynb ├── Numpy_Arrays.ipynb └── Pandas.ipynb /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pksvv/25JPandasNumpy/d8630eaa416a0160c7e719fbbee90f192376d708/.DS_Store -------------------------------------------------------------------------------- /.ipynb_checkpoints/Numpy - Indexing and Selection-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 12 | ] 13 | }, 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "import numpy as np\n", 21 | "\n", 22 | "arr = np.arange(1,11)\n", 23 | "arr" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### Bracket indexing and selection" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "10" 42 | ] 43 | }, 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "arr[9]" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "### Slicing array from 1 to 5" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 3, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "array([2, 3, 4, 5])" 69 | ] 70 | }, 71 | "execution_count": 3, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "arr[1:5]" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 4, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "array([[ 0, 1, 2, 3, 4, 5],\n", 89 | " [ 6, 7, 8, 9, 10, 11],\n", 90 | " [12, 13, 14, 15, 16, 17],\n", 91 | " [18, 19, 20, 21, 22, 23],\n", 92 | " [24, 25, 26, 27, 28, 29],\n", 93 | " [30, 31, 32, 33, 34, 35]])" 94 | ] 95 | }, 96 | "execution_count": 4, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [ 102 | "mat = np.arange(36).reshape(6,6)\n", 103 | "mat" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 5, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": [ 114 | "array([0, 1, 2, 3, 4, 5])" 115 | ] 116 | }, 117 | "execution_count": 5, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "mat[0]" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "[[1, 2],\n", 131 | " [7, 8]]" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 11, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "data": { 141 | "text/plain": [ 142 | "array([[1, 2],\n", 143 | " [7, 8]])" 144 | ] 145 | }, 146 | "execution_count": 11, 147 | "metadata": {}, 148 | "output_type": "execute_result" 149 | } 150 | ], 151 | "source": [ 152 | "mat[:2,1:3]" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "[[15,16,17],\n", 160 | " [21,22,23],\n", 161 | " [27,28,29],\n", 162 | " [33,34,35]\n", 163 | " ] " 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 14, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "data": { 173 | "text/plain": [ 174 | "array([[15, 16, 17],\n", 175 | " [21, 22, 23],\n", 176 | " [27, 28, 29],\n", 177 | " [33, 34, 35]])" 178 | ] 179 | }, 180 | "execution_count": 14, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "mat[2:,3:]" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "### Selection" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 15, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 205 | ] 206 | }, 207 | "execution_count": 15, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "arr" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 18, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "array([False, False, False, False, True, True, True, True, True,\n", 225 | " True])" 226 | ] 227 | }, 228 | "execution_count": 18, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [ 234 | "arr>4" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 19, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/plain": [ 245 | "array([ 5, 6, 7, 8, 9, 10])" 246 | ] 247 | }, 248 | "execution_count": 19, 249 | "metadata": {}, 250 | "output_type": "execute_result" 251 | } 252 | ], 253 | "source": [ 254 | "arr[arr>4]" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 20, 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "data": { 264 | "text/plain": [ 265 | "array([ True, True, True, True, False, False, False, False, False,\n", 266 | " False])" 267 | ] 268 | }, 269 | "execution_count": 20, 270 | "metadata": {}, 271 | "output_type": "execute_result" 272 | } 273 | ], 274 | "source": [ 275 | "arr<5" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 21, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "data": { 285 | "text/plain": [ 286 | "array([1, 2, 3, 4])" 287 | ] 288 | }, 289 | "execution_count": 21, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "arr[arr<5]" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "# Great Job !" 303 | ] 304 | } 305 | ], 306 | "metadata": { 307 | "kernelspec": { 308 | "display_name": "Python 3", 309 | "language": "python", 310 | "name": "python3" 311 | }, 312 | "language_info": { 313 | "codemirror_mode": { 314 | "name": "ipython", 315 | "version": 3 316 | }, 317 | "file_extension": ".py", 318 | "mimetype": "text/x-python", 319 | "name": "python", 320 | "nbconvert_exporter": "python", 321 | "pygments_lexer": "ipython3", 322 | "version": "3.7.4" 323 | } 324 | }, 325 | "nbformat": 4, 326 | "nbformat_minor": 2 327 | } 328 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/Numpy_Arrays-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "The fundamental package for scientific computing with Python\n", 15 | "\n", 16 | "https://numpy.org/\n", 17 | "\n", 18 | "* powerful N-dim array object\n", 19 | "\n", 20 | "* broadcasting functions\n", 21 | "\n", 22 | "* LinAlg, Matrix Manipulation, Fourier Series" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "Requirement already satisfied: numpy in /Users/vipulgaur/opt/anaconda3/lib/python3.7/site-packages (1.17.2)\r\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "!pip install numpy" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "import numpy as np" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "### Numpy Arrays\n", 56 | "\n", 57 | "* Vector - 1D array\n", 58 | "\n", 59 | "\n", 60 | "* Matrix - 2D array" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "mylist = [1,2,3]" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "text/plain": [ 80 | "list" 81 | ] 82 | }, 83 | "execution_count": 5, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "type(mylist)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 6, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "numpy.ndarray" 101 | ] 102 | }, 103 | "execution_count": 6, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "type(np.array(mylist))" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 7, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "data": { 119 | "text/plain": [ 120 | "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]" 121 | ] 122 | }, 123 | "execution_count": 7, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "mymatrix = [[1,2,3],[4,5,6],[7,8,9]]\n", 130 | "mymatrix" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 8, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "text/plain": [ 141 | "list" 142 | ] 143 | }, 144 | "execution_count": 8, 145 | "metadata": {}, 146 | "output_type": "execute_result" 147 | } 148 | ], 149 | "source": [ 150 | "type(mymatrix)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 9, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "data": { 160 | "text/plain": [ 161 | "numpy.ndarray" 162 | ] 163 | }, 164 | "execution_count": 9, 165 | "metadata": {}, 166 | "output_type": "execute_result" 167 | } 168 | ], 169 | "source": [ 170 | "type(np.array(mymatrix))" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 10, 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/plain": [ 181 | "array([[1, 2, 3],\n", 182 | " [4, 5, 6],\n", 183 | " [7, 8, 9]])" 184 | ] 185 | }, 186 | "execution_count": 10, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "np.array(mymatrix)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "## Built In Methods" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "#### arange -\n", 207 | "Returns evenly spaced values in given interval" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 11, 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "data": { 217 | "text/plain": [ 218 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])" 219 | ] 220 | }, 221 | "execution_count": 11, 222 | "metadata": {}, 223 | "output_type": "execute_result" 224 | } 225 | ], 226 | "source": [ 227 | "np.arange(0,10)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 12, 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "data": { 237 | "text/plain": [ 238 | "array([0, 2, 4, 6, 8])" 239 | ] 240 | }, 241 | "execution_count": 12, 242 | "metadata": {}, 243 | "output_type": "execute_result" 244 | } 245 | ], 246 | "source": [ 247 | "np.arange(0,10,2)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "#### Zeros and Ones" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 13, 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "data": { 264 | "text/plain": [ 265 | "array([0., 0., 0.])" 266 | ] 267 | }, 268 | "execution_count": 13, 269 | "metadata": {}, 270 | "output_type": "execute_result" 271 | } 272 | ], 273 | "source": [ 274 | "np.zeros(3)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 14, 280 | "metadata": {}, 281 | "outputs": [ 282 | { 283 | "data": { 284 | "text/plain": [ 285 | "array([1., 1., 1.])" 286 | ] 287 | }, 288 | "execution_count": 14, 289 | "metadata": {}, 290 | "output_type": "execute_result" 291 | } 292 | ], 293 | "source": [ 294 | "np.ones(3)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 15, 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "data": { 304 | "text/plain": [ 305 | "array([[0., 0., 0., 0.],\n", 306 | " [0., 0., 0., 0.],\n", 307 | " [0., 0., 0., 0.],\n", 308 | " [0., 0., 0., 0.]])" 309 | ] 310 | }, 311 | "execution_count": 15, 312 | "metadata": {}, 313 | "output_type": "execute_result" 314 | } 315 | ], 316 | "source": [ 317 | "np.zeros((4,4))" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 16, 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "data": { 327 | "text/plain": [ 328 | "array([[1., 1., 1., 1.],\n", 329 | " [1., 1., 1., 1.],\n", 330 | " [1., 1., 1., 1.],\n", 331 | " [1., 1., 1., 1.]])" 332 | ] 333 | }, 334 | "execution_count": 16, 335 | "metadata": {}, 336 | "output_type": "execute_result" 337 | } 338 | ], 339 | "source": [ 340 | "np.ones((4,4))" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "#### Linspace -\n", 348 | "\n", 349 | "Returns evenly spaced numbers over a specified interval" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 50, 355 | "metadata": {}, 356 | "outputs": [ 357 | { 358 | "data": { 359 | "text/plain": [ 360 | "array([ 0., 5., 10., 15., 20.])" 361 | ] 362 | }, 363 | "execution_count": 50, 364 | "metadata": {}, 365 | "output_type": "execute_result" 366 | } 367 | ], 368 | "source": [ 369 | "np.linspace(0,20,5)" 370 | ] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "metadata": {}, 375 | "source": [ 376 | "#### Identity Matrix" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 21, 382 | "metadata": {}, 383 | "outputs": [ 384 | { 385 | "data": { 386 | "text/plain": [ 387 | "array([[1., 0., 0., 0.],\n", 388 | " [0., 1., 0., 0.],\n", 389 | " [0., 0., 1., 0.],\n", 390 | " [0., 0., 0., 1.]])" 391 | ] 392 | }, 393 | "execution_count": 21, 394 | "metadata": {}, 395 | "output_type": "execute_result" 396 | } 397 | ], 398 | "source": [ 399 | "np.eye(4)" 400 | ] 401 | }, 402 | { 403 | "cell_type": "markdown", 404 | "metadata": {}, 405 | "source": [ 406 | "### Random Number Generation" 407 | ] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": {}, 412 | "source": [ 413 | "#### rand\n", 414 | "\n", 415 | "Creates an array of given shape and populates it with array of uniform distribution over (0,1)" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": 23, 421 | "metadata": {}, 422 | "outputs": [ 423 | { 424 | "data": { 425 | "text/plain": [ 426 | "array([0.60646952, 0.48400759])" 427 | ] 428 | }, 429 | "execution_count": 23, 430 | "metadata": {}, 431 | "output_type": "execute_result" 432 | } 433 | ], 434 | "source": [ 435 | "np.random.rand(2)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "metadata": {}, 441 | "source": [ 442 | "#### randn\n", 443 | "Returns sample of standard normal distribution" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": 24, 449 | "metadata": {}, 450 | "outputs": [ 451 | { 452 | "data": { 453 | "text/plain": [ 454 | "array([ 1.16581461, -0.3645305 , 1.02588802, 0.38129504])" 455 | ] 456 | }, 457 | "execution_count": 24, 458 | "metadata": {}, 459 | "output_type": "execute_result" 460 | } 461 | ], 462 | "source": [ 463 | "np.random.randn(4)" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": 26, 469 | "metadata": {}, 470 | "outputs": [ 471 | { 472 | "data": { 473 | "text/plain": [ 474 | "array([[ 0.04504038, 0.26234797, 0.87563777, 0.62370201, 1.85319986],\n", 475 | " [ 0.23401525, 0.82444257, 0.5612748 , -0.94721164, -0.51689866],\n", 476 | " [ 1.01065937, 0.64561756, -1.11634715, 2.03449087, 0.21399304],\n", 477 | " [ 0.09878095, 0.27902561, -0.99533311, 0.98662053, 0.14694491],\n", 478 | " [-0.26181701, -0.46646171, -0.99481831, -0.54899662, 0.85076694]])" 479 | ] 480 | }, 481 | "execution_count": 26, 482 | "metadata": {}, 483 | "output_type": "execute_result" 484 | } 485 | ], 486 | "source": [ 487 | "np.random.randn(5,5)" 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "#### randint" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 30, 500 | "metadata": {}, 501 | "outputs": [ 502 | { 503 | "data": { 504 | "text/plain": [ 505 | "array([89, 80, 79, 28, 62, 2, 50, 94, 67, 42])" 506 | ] 507 | }, 508 | "execution_count": 30, 509 | "metadata": {}, 510 | "output_type": "execute_result" 511 | } 512 | ], 513 | "source": [ 514 | "np.random.randint(1,100,10)" 515 | ] 516 | }, 517 | { 518 | "cell_type": "markdown", 519 | "metadata": {}, 520 | "source": [ 521 | "### Array attributes and methods" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 32, 527 | "metadata": {}, 528 | "outputs": [ 529 | { 530 | "data": { 531 | "text/plain": [ 532 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", 533 | " 17, 18, 19, 20, 21, 22, 23, 24])" 534 | ] 535 | }, 536 | "execution_count": 32, 537 | "metadata": {}, 538 | "output_type": "execute_result" 539 | } 540 | ], 541 | "source": [ 542 | "arr = np.arange(25)\n", 543 | "arr" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": 34, 549 | "metadata": {}, 550 | "outputs": [ 551 | { 552 | "data": { 553 | "text/plain": [ 554 | "array([13, 33, 3, 37, 1, 9, 1, 36, 47, 20])" 555 | ] 556 | }, 557 | "execution_count": 34, 558 | "metadata": {}, 559 | "output_type": "execute_result" 560 | } 561 | ], 562 | "source": [ 563 | "ranarr = np.random.randint(0,50,10)\n", 564 | "ranarr" 565 | ] 566 | }, 567 | { 568 | "cell_type": "markdown", 569 | "metadata": {}, 570 | "source": [ 571 | "#### Reshape" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": 39, 577 | "metadata": {}, 578 | "outputs": [ 579 | { 580 | "data": { 581 | "text/plain": [ 582 | "array([[ 0, 1, 2, 3, 4],\n", 583 | " [ 5, 6, 7, 8, 9],\n", 584 | " [10, 11, 12, 13, 14],\n", 585 | " [15, 16, 17, 18, 19],\n", 586 | " [20, 21, 22, 23, 24]])" 587 | ] 588 | }, 589 | "execution_count": 39, 590 | "metadata": {}, 591 | "output_type": "execute_result" 592 | } 593 | ], 594 | "source": [ 595 | "arr.reshape(5,5)#.shape" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": 37, 601 | "metadata": {}, 602 | "outputs": [ 603 | { 604 | "data": { 605 | "text/plain": [ 606 | "(25,)" 607 | ] 608 | }, 609 | "execution_count": 37, 610 | "metadata": {}, 611 | "output_type": "execute_result" 612 | } 613 | ], 614 | "source": [ 615 | "arr.shape" 616 | ] 617 | }, 618 | { 619 | "cell_type": "markdown", 620 | "metadata": {}, 621 | "source": [ 622 | "#### max, min, argmax, argmin" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": 40, 628 | "metadata": {}, 629 | "outputs": [ 630 | { 631 | "data": { 632 | "text/plain": [ 633 | "array([13, 33, 3, 37, 1, 9, 1, 36, 47, 20])" 634 | ] 635 | }, 636 | "execution_count": 40, 637 | "metadata": {}, 638 | "output_type": "execute_result" 639 | } 640 | ], 641 | "source": [ 642 | "ranarr" 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": 44, 648 | "metadata": {}, 649 | "outputs": [ 650 | { 651 | "data": { 652 | "text/plain": [ 653 | "1" 654 | ] 655 | }, 656 | "execution_count": 44, 657 | "metadata": {}, 658 | "output_type": "execute_result" 659 | } 660 | ], 661 | "source": [ 662 | "ranarr.min()" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 45, 668 | "metadata": {}, 669 | "outputs": [ 670 | { 671 | "data": { 672 | "text/plain": [ 673 | "4" 674 | ] 675 | }, 676 | "execution_count": 45, 677 | "metadata": {}, 678 | "output_type": "execute_result" 679 | } 680 | ], 681 | "source": [ 682 | "ranarr.argmin()" 683 | ] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "execution_count": 42, 688 | "metadata": {}, 689 | "outputs": [ 690 | { 691 | "data": { 692 | "text/plain": [ 693 | "47" 694 | ] 695 | }, 696 | "execution_count": 42, 697 | "metadata": {}, 698 | "output_type": "execute_result" 699 | } 700 | ], 701 | "source": [ 702 | "ranarr.max()" 703 | ] 704 | }, 705 | { 706 | "cell_type": "code", 707 | "execution_count": 43, 708 | "metadata": {}, 709 | "outputs": [ 710 | { 711 | "data": { 712 | "text/plain": [ 713 | "8" 714 | ] 715 | }, 716 | "execution_count": 43, 717 | "metadata": {}, 718 | "output_type": "execute_result" 719 | } 720 | ], 721 | "source": [ 722 | "ranarr.argmax()" 723 | ] 724 | }, 725 | { 726 | "cell_type": "markdown", 727 | "metadata": {}, 728 | "source": [ 729 | "# Great Job !" 730 | ] 731 | } 732 | ], 733 | "metadata": { 734 | "kernelspec": { 735 | "display_name": "Python 3", 736 | "language": "python", 737 | "name": "python3" 738 | }, 739 | "language_info": { 740 | "codemirror_mode": { 741 | "name": "ipython", 742 | "version": 3 743 | }, 744 | "file_extension": ".py", 745 | "mimetype": "text/x-python", 746 | "name": "python", 747 | "nbconvert_exporter": "python", 748 | "pygments_lexer": "ipython3", 749 | "version": "3.7.4" 750 | } 751 | }, 752 | "nbformat": 4, 753 | "nbformat_minor": 2 754 | } 755 | -------------------------------------------------------------------------------- /04-Numpy Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NumPy Exercises " 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "#### Import NumPy as np" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "#### Create an array of 10 zeros " 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])" 42 | ] 43 | }, 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "#### Create an array of 10 ones" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "array([ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])" 67 | ] 68 | }, 69 | "execution_count": 3, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "#### Create an array of 10 fives" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 4, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "array([ 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.])" 92 | ] 93 | }, 94 | "execution_count": 4, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "#### Create an array of the integers from 10 to 50" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 5, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/plain": [ 116 | "array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,\n", 117 | " 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,\n", 118 | " 44, 45, 46, 47, 48, 49, 50])" 119 | ] 120 | }, 121 | "execution_count": 5, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "#### Create an array of all the even integers from 10 to 50" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 6, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "data": { 142 | "text/plain": [ 143 | "array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42,\n", 144 | " 44, 46, 48, 50])" 145 | ] 146 | }, 147 | "execution_count": 6, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "#### Create a 3x3 matrix with values ranging from 0 to 8" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 7, 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "data": { 168 | "text/plain": [ 169 | "array([[0, 1, 2],\n", 170 | " [3, 4, 5],\n", 171 | " [6, 7, 8]])" 172 | ] 173 | }, 174 | "execution_count": 7, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": [] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "#### Create a 3x3 identity matrix" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 8, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "text/plain": [ 196 | "array([[ 1., 0., 0.],\n", 197 | " [ 0., 1., 0.],\n", 198 | " [ 0., 0., 1.]])" 199 | ] 200 | }, 201 | "execution_count": 8, 202 | "metadata": {}, 203 | "output_type": "execute_result" 204 | } 205 | ], 206 | "source": [] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "#### Use NumPy to generate a random number between 0 and 1" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 15, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "data": { 222 | "text/plain": [ 223 | "array([ 0.42829726])" 224 | ] 225 | }, 226 | "execution_count": 15, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "#### Use NumPy to generate an array of 25 random numbers sampled from a standard normal distribution" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 33, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "data": { 247 | "text/plain": [ 248 | "array([ 1.32031013, 1.6798602 , -0.42985892, -1.53116655, 0.85753232,\n", 249 | " 0.87339938, 0.35668636, -1.47491157, 0.15349697, 0.99530727,\n", 250 | " -0.94865451, -1.69174783, 1.57525349, -0.70615234, 0.10991879,\n", 251 | " -0.49478947, 1.08279872, 0.76488333, -2.3039931 , 0.35401124,\n", 252 | " -0.45454399, -0.64754649, -0.29391671, 0.02339861, 0.38272124])" 253 | ] 254 | }, 255 | "execution_count": 33, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | } 259 | ], 260 | "source": [] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "#### Create the following matrix:" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 35, 272 | "metadata": {}, 273 | "outputs": [ 274 | { 275 | "data": { 276 | "text/plain": [ 277 | "array([[ 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ],\n", 278 | " [ 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 ],\n", 279 | " [ 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 ],\n", 280 | " [ 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 ],\n", 281 | " [ 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 ],\n", 282 | " [ 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 ],\n", 283 | " [ 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7 ],\n", 284 | " [ 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8 ],\n", 285 | " [ 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9 ],\n", 286 | " [ 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1. ]])" 287 | ] 288 | }, 289 | "execution_count": 35, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "#### Create an array of 20 linearly spaced points between 0 and 1:" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 36, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "array([ 0. , 0.05263158, 0.10526316, 0.15789474, 0.21052632,\n", 312 | " 0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,\n", 313 | " 0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,\n", 314 | " 0.78947368, 0.84210526, 0.89473684, 0.94736842, 1. ])" 315 | ] 316 | }, 317 | "execution_count": 36, 318 | "metadata": {}, 319 | "output_type": "execute_result" 320 | } 321 | ], 322 | "source": [] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "## Numpy Indexing and Selection\n", 329 | "\n", 330 | "Now you will be given a few matrices, and be asked to replicate the resulting matrix outputs:" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 38, 336 | "metadata": {}, 337 | "outputs": [ 338 | { 339 | "data": { 340 | "text/plain": [ 341 | "array([[ 1, 2, 3, 4, 5],\n", 342 | " [ 6, 7, 8, 9, 10],\n", 343 | " [11, 12, 13, 14, 15],\n", 344 | " [16, 17, 18, 19, 20],\n", 345 | " [21, 22, 23, 24, 25]])" 346 | ] 347 | }, 348 | "execution_count": 38, 349 | "metadata": {}, 350 | "output_type": "execute_result" 351 | } 352 | ], 353 | "source": [ 354 | "mat = np.arange(1,26).reshape(5,5)\n", 355 | "mat" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 40, 361 | "metadata": {}, 362 | "outputs": [ 363 | { 364 | "data": { 365 | "text/plain": [ 366 | "array([[12, 13, 14, 15],\n", 367 | " [17, 18, 19, 20],\n", 368 | " [22, 23, 24, 25]])" 369 | ] 370 | }, 371 | "execution_count": 40, 372 | "metadata": {}, 373 | "output_type": "execute_result" 374 | } 375 | ], 376 | "source": [] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": 41, 381 | "metadata": {}, 382 | "outputs": [ 383 | { 384 | "data": { 385 | "text/plain": [ 386 | "20" 387 | ] 388 | }, 389 | "execution_count": 41, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 42, 399 | "metadata": {}, 400 | "outputs": [ 401 | { 402 | "data": { 403 | "text/plain": [ 404 | "array([[ 2],\n", 405 | " [ 7],\n", 406 | " [12]])" 407 | ] 408 | }, 409 | "execution_count": 42, 410 | "metadata": {}, 411 | "output_type": "execute_result" 412 | } 413 | ], 414 | "source": [] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 46, 419 | "metadata": {}, 420 | "outputs": [ 421 | { 422 | "data": { 423 | "text/plain": [ 424 | "array([21, 22, 23, 24, 25])" 425 | ] 426 | }, 427 | "execution_count": 46, 428 | "metadata": {}, 429 | "output_type": "execute_result" 430 | } 431 | ], 432 | "source": [] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 49, 437 | "metadata": {}, 438 | "outputs": [ 439 | { 440 | "data": { 441 | "text/plain": [ 442 | "array([[16, 17, 18, 19, 20],\n", 443 | " [21, 22, 23, 24, 25]])" 444 | ] 445 | }, 446 | "execution_count": 49, 447 | "metadata": {}, 448 | "output_type": "execute_result" 449 | } 450 | ], 451 | "source": [] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "metadata": {}, 456 | "source": [ 457 | "#### Get the sum of all the values in mat" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": 50, 463 | "metadata": {}, 464 | "outputs": [ 465 | { 466 | "data": { 467 | "text/plain": [ 468 | "325" 469 | ] 470 | }, 471 | "execution_count": 50, 472 | "metadata": {}, 473 | "output_type": "execute_result" 474 | } 475 | ], 476 | "source": [] 477 | }, 478 | { 479 | "cell_type": "markdown", 480 | "metadata": {}, 481 | "source": [ 482 | "#### Get the standard deviation of the values in mat" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": 51, 488 | "metadata": {}, 489 | "outputs": [ 490 | { 491 | "data": { 492 | "text/plain": [ 493 | "7.2111025509279782" 494 | ] 495 | }, 496 | "execution_count": 51, 497 | "metadata": {}, 498 | "output_type": "execute_result" 499 | } 500 | ], 501 | "source": [] 502 | }, 503 | { 504 | "cell_type": "markdown", 505 | "metadata": {}, 506 | "source": [ 507 | "#### Get the sum of all the columns in mat" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": 53, 513 | "metadata": {}, 514 | "outputs": [ 515 | { 516 | "data": { 517 | "text/plain": [ 518 | "array([55, 60, 65, 70, 75])" 519 | ] 520 | }, 521 | "execution_count": 53, 522 | "metadata": {}, 523 | "output_type": "execute_result" 524 | } 525 | ], 526 | "source": [] 527 | } 528 | ], 529 | "metadata": { 530 | "kernelspec": { 531 | "display_name": "Python 3", 532 | "language": "python", 533 | "name": "python3" 534 | }, 535 | "language_info": { 536 | "codemirror_mode": { 537 | "name": "ipython", 538 | "version": 3 539 | }, 540 | "file_extension": ".py", 541 | "mimetype": "text/x-python", 542 | "name": "python", 543 | "nbconvert_exporter": "python", 544 | "pygments_lexer": "ipython3", 545 | "version": "3.7.4" 546 | } 547 | }, 548 | "nbformat": 4, 549 | "nbformat_minor": 1 550 | } 551 | -------------------------------------------------------------------------------- /04-Pandas-Exercises/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pksvv/25JPandasNumpy/d8630eaa416a0160c7e719fbbee90f192376d708/04-Pandas-Exercises/.DS_Store -------------------------------------------------------------------------------- /04-Pandas-Exercises/.ipynb_checkpoints/01-SF Salaries Exercise-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# SF Salaries Exercise \n", 8 | "\n", 9 | "Welcome to a quick exercise for you to practice your pandas skills! We will be using the [SF Salaries Dataset](https://www.kaggle.com/kaggle/sf-salaries) from Kaggle! Just follow along and complete the tasks outlined in bold below. The tasks will get harder and harder as you go along." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "** Import pandas as pd.**" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import pandas as pd" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "** Read Salaries.csv as a dataframe called sal.**" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "\u001b[31m01-SF Salaries Exercise.ipynb\u001b[m\u001b[m* \u001b[31mEcommerce Purchases\u001b[m\u001b[m*\r\n", 45 | "\u001b[31m03-Ecommerce Purchases Exercise .ipynb\u001b[m\u001b[m* \u001b[31mSalaries.csv\u001b[m\u001b[m*\r\n" 46 | ] 47 | } 48 | ], 49 | "source": [ 50 | "ls" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 3, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "df = pd.read_csv('Salaries.csv')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "** Check the head of the DataFrame. **" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 8, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/html": [ 77 | "
\n", 78 | "\n", 91 | "\n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
56DAVID SULLIVANASSISTANT DEPUTY CHIEF II118602.008601.00189082.74NaN316285.74316285.742011NaNSan FranciscoNaN
\n", 209 | "
" 210 | ], 211 | "text/plain": [ 212 | " Id EmployeeName JobTitle \\\n", 213 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 214 | "1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", 215 | "2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", 216 | "3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", 217 | "4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", 218 | "5 6 DAVID SULLIVAN ASSISTANT DEPUTY CHIEF II \n", 219 | "\n", 220 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 221 | "0 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n", 222 | "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", 223 | "2 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n", 224 | "3 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n", 225 | "4 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n", 226 | "5 118602.00 8601.00 189082.74 NaN 316285.74 316285.74 \n", 227 | "\n", 228 | " Year Notes Agency Status \n", 229 | "0 2011 NaN San Francisco NaN \n", 230 | "1 2011 NaN San Francisco NaN \n", 231 | "2 2011 NaN San Francisco NaN \n", 232 | "3 2011 NaN San Francisco NaN \n", 233 | "4 2011 NaN San Francisco NaN \n", 234 | "5 2011 NaN San Francisco NaN " 235 | ] 236 | }, 237 | "execution_count": 8, 238 | "metadata": {}, 239 | "output_type": "execute_result" 240 | } 241 | ], 242 | "source": [ 243 | "df.head(6)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "** Use the .info() method to find out how many entries there are.**" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 9, 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "name": "stdout", 260 | "output_type": "stream", 261 | "text": [ 262 | "\n", 263 | "RangeIndex: 148654 entries, 0 to 148653\n", 264 | "Data columns (total 13 columns):\n", 265 | "Id 148654 non-null int64\n", 266 | "EmployeeName 148654 non-null object\n", 267 | "JobTitle 148654 non-null object\n", 268 | "BasePay 148045 non-null float64\n", 269 | "OvertimePay 148650 non-null float64\n", 270 | "OtherPay 148650 non-null float64\n", 271 | "Benefits 112491 non-null float64\n", 272 | "TotalPay 148654 non-null float64\n", 273 | "TotalPayBenefits 148654 non-null float64\n", 274 | "Year 148654 non-null int64\n", 275 | "Notes 0 non-null float64\n", 276 | "Agency 148654 non-null object\n", 277 | "Status 0 non-null float64\n", 278 | "dtypes: float64(8), int64(2), object(3)\n", 279 | "memory usage: 14.7+ MB\n" 280 | ] 281 | } 282 | ], 283 | "source": [ 284 | "df.info()" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 10, 290 | "metadata": {}, 291 | "outputs": [ 292 | { 293 | "data": { 294 | "text/plain": [ 295 | "'/Users/vipulgaur/Documents/Training/advanced-ML/25Jul/DataWrangling/04-Pandas-Exercises'" 296 | ] 297 | }, 298 | "execution_count": 10, 299 | "metadata": {}, 300 | "output_type": "execute_result" 301 | } 302 | ], 303 | "source": [ 304 | "pwd" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [ 311 | "**What is the average BasePay ?**" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 10, 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "data": { 321 | "text/plain": [ 322 | "66325.44884050643" 323 | ] 324 | }, 325 | "execution_count": 10, 326 | "metadata": {}, 327 | "output_type": "execute_result" 328 | } 329 | ], 330 | "source": [] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "metadata": {}, 335 | "source": [ 336 | "** What is the highest amount of OvertimePay in the dataset ? **" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 11, 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "data": { 346 | "text/plain": [ 347 | "245131.88" 348 | ] 349 | }, 350 | "execution_count": 11, 351 | "metadata": {}, 352 | "output_type": "execute_result" 353 | } 354 | ], 355 | "source": [] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "** What is the job title of JOSEPH DRISCOLL ? Note: Use all caps, otherwise you may get an answer that doesn't match up (there is also a lowercase Joseph Driscoll). **" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 12, 367 | "metadata": {}, 368 | "outputs": [ 369 | { 370 | "data": { 371 | "text/plain": [ 372 | "24 CAPTAIN, FIRE SUPPRESSION\n", 373 | "Name: JobTitle, dtype: object" 374 | ] 375 | }, 376 | "execution_count": 12, 377 | "metadata": {}, 378 | "output_type": "execute_result" 379 | } 380 | ], 381 | "source": [] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": {}, 386 | "source": [ 387 | "** How much does JOSEPH DRISCOLL make (including benefits)? **" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": 13, 393 | "metadata": {}, 394 | "outputs": [ 395 | { 396 | "data": { 397 | "text/plain": [ 398 | "24 270324.91\n", 399 | "Name: TotalPayBenefits, dtype: float64" 400 | ] 401 | }, 402 | "execution_count": 13, 403 | "metadata": {}, 404 | "output_type": "execute_result" 405 | } 406 | ], 407 | "source": [] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": {}, 412 | "source": [ 413 | "** What is the name of highest paid person (including benefits)?**" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 14, 419 | "metadata": {}, 420 | "outputs": [ 421 | { 422 | "data": { 423 | "text/html": [ 424 | "
\n", 425 | "\n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.0400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
\n", 463 | "
" 464 | ], 465 | "text/plain": [ 466 | " Id EmployeeName JobTitle \\\n", 467 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 468 | "\n", 469 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 470 | "0 167411.18 0.0 400184.25 NaN 567595.43 567595.43 \n", 471 | "\n", 472 | " Year Notes Agency Status \n", 473 | "0 2011 NaN San Francisco NaN " 474 | ] 475 | }, 476 | "execution_count": 14, 477 | "metadata": {}, 478 | "output_type": "execute_result" 479 | } 480 | ], 481 | "source": [] 482 | }, 483 | { 484 | "cell_type": "markdown", 485 | "metadata": {}, 486 | "source": [ 487 | "** What is the name of lowest paid person (including benefits)? Do you notice something strange about how much he or she is paid?**" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 15, 493 | "metadata": {}, 494 | "outputs": [ 495 | { 496 | "data": { 497 | "text/html": [ 498 | "
\n", 499 | "\n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
148653148654Joe LopezCounselor, Log Cabin Ranch0.00.0-618.130.0-618.13-618.132014NaNSan FranciscoNaN
\n", 537 | "
" 538 | ], 539 | "text/plain": [ 540 | " Id EmployeeName JobTitle BasePay OvertimePay \\\n", 541 | "148653 148654 Joe Lopez Counselor, Log Cabin Ranch 0.0 0.0 \n", 542 | "\n", 543 | " OtherPay Benefits TotalPay TotalPayBenefits Year Notes \\\n", 544 | "148653 -618.13 0.0 -618.13 -618.13 2014 NaN \n", 545 | "\n", 546 | " Agency Status \n", 547 | "148653 San Francisco NaN " 548 | ] 549 | }, 550 | "execution_count": 15, 551 | "metadata": {}, 552 | "output_type": "execute_result" 553 | } 554 | ], 555 | "source": [] 556 | }, 557 | { 558 | "cell_type": "markdown", 559 | "metadata": {}, 560 | "source": [ 561 | "** What was the average (mean) BasePay of all employees per year? (2011-2014) ? **" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": 16, 567 | "metadata": {}, 568 | "outputs": [ 569 | { 570 | "data": { 571 | "text/plain": [ 572 | "Year\n", 573 | "2011 63595.956517\n", 574 | "2012 65436.406857\n", 575 | "2013 69630.030216\n", 576 | "2014 66564.421924\n", 577 | "Name: BasePay, dtype: float64" 578 | ] 579 | }, 580 | "execution_count": 16, 581 | "metadata": {}, 582 | "output_type": "execute_result" 583 | } 584 | ], 585 | "source": [] 586 | }, 587 | { 588 | "cell_type": "markdown", 589 | "metadata": {}, 590 | "source": [ 591 | "** How many unique job titles are there? **" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": 17, 597 | "metadata": {}, 598 | "outputs": [ 599 | { 600 | "data": { 601 | "text/plain": [ 602 | "2159" 603 | ] 604 | }, 605 | "execution_count": 17, 606 | "metadata": {}, 607 | "output_type": "execute_result" 608 | } 609 | ], 610 | "source": [] 611 | }, 612 | { 613 | "cell_type": "markdown", 614 | "metadata": {}, 615 | "source": [ 616 | "** What are the top 5 most common jobs? **" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": 18, 622 | "metadata": {}, 623 | "outputs": [ 624 | { 625 | "data": { 626 | "text/plain": [ 627 | "Transit Operator 7036\n", 628 | "Special Nurse 4389\n", 629 | "Registered Nurse 3736\n", 630 | "Public Svc Aide-Public Works 2518\n", 631 | "Police Officer 3 2421\n", 632 | "Name: JobTitle, dtype: int64" 633 | ] 634 | }, 635 | "execution_count": 18, 636 | "metadata": {}, 637 | "output_type": "execute_result" 638 | } 639 | ], 640 | "source": [] 641 | }, 642 | { 643 | "cell_type": "markdown", 644 | "metadata": {}, 645 | "source": [ 646 | "** How many Job Titles were represented by only one person in 2013? (e.g. Job Titles with only one occurence in 2013?) **" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": 19, 652 | "metadata": {}, 653 | "outputs": [ 654 | { 655 | "data": { 656 | "text/plain": [ 657 | "202" 658 | ] 659 | }, 660 | "execution_count": 19, 661 | "metadata": {}, 662 | "output_type": "execute_result" 663 | } 664 | ], 665 | "source": [] 666 | }, 667 | { 668 | "cell_type": "markdown", 669 | "metadata": {}, 670 | "source": [ 671 | "** How many people have the word Chief in their job title? (This is pretty tricky) **" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": 20, 677 | "metadata": { 678 | "collapsed": true 679 | }, 680 | "outputs": [], 681 | "source": [] 682 | }, 683 | { 684 | "cell_type": "code", 685 | "execution_count": 21, 686 | "metadata": {}, 687 | "outputs": [ 688 | { 689 | "data": { 690 | "text/plain": [ 691 | "477" 692 | ] 693 | }, 694 | "execution_count": 21, 695 | "metadata": {}, 696 | "output_type": "execute_result" 697 | } 698 | ], 699 | "source": [] 700 | }, 701 | { 702 | "cell_type": "markdown", 703 | "metadata": {}, 704 | "source": [ 705 | "** Bonus: Is there a correlation between length of the Job Title string and Salary? **" 706 | ] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": 22, 711 | "metadata": {}, 712 | "outputs": [], 713 | "source": [] 714 | }, 715 | { 716 | "cell_type": "code", 717 | "execution_count": 23, 718 | "metadata": {}, 719 | "outputs": [ 720 | { 721 | "data": { 722 | "text/html": [ 723 | "
\n", 724 | "\n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | "
title_lenTotalPayBenefits
title_len1.000000-0.036878
TotalPayBenefits-0.0368781.000000
\n", 745 | "
" 746 | ], 747 | "text/plain": [ 748 | " title_len TotalPayBenefits\n", 749 | "title_len 1.000000 -0.036878\n", 750 | "TotalPayBenefits -0.036878 1.000000" 751 | ] 752 | }, 753 | "execution_count": 23, 754 | "metadata": {}, 755 | "output_type": "execute_result" 756 | } 757 | ], 758 | "source": [] 759 | }, 760 | { 761 | "cell_type": "markdown", 762 | "metadata": {}, 763 | "source": [ 764 | "# Great Job!" 765 | ] 766 | } 767 | ], 768 | "metadata": { 769 | "kernelspec": { 770 | "display_name": "Python 3", 771 | "language": "python", 772 | "name": "python3" 773 | }, 774 | "language_info": { 775 | "codemirror_mode": { 776 | "name": "ipython", 777 | "version": 3 778 | }, 779 | "file_extension": ".py", 780 | "mimetype": "text/x-python", 781 | "name": "python", 782 | "nbconvert_exporter": "python", 783 | "pygments_lexer": "ipython3", 784 | "version": "3.7.4" 785 | } 786 | }, 787 | "nbformat": 4, 788 | "nbformat_minor": 1 789 | } 790 | -------------------------------------------------------------------------------- /04-Pandas-Exercises/.ipynb_checkpoints/02-SF Salaries Exercise - Solutions-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "raw", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# SF Salaries Exercise - Solutions\n", 18 | "\n", 19 | "Welcome to a quick exercise for you to practice your pandas skills! We will be using the [SF Salaries Dataset](https://www.kaggle.com/kaggle/sf-salaries) from Kaggle! Just follow along and complete the tasks outlined in bold below. The tasks will get harder and harder as you go along." 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "** Import pandas as pd.**" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 6, 32 | "metadata": { 33 | "collapsed": true 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "import pandas as pd" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "** Read Salaries.csv as a dataframe called sal.**" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 7, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "sal = pd.read_csv('Salaries.csv')" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "** Check the head of the DataFrame. **" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 8, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/html": [ 71 | "
\n", 72 | "\n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
\n", 174 | "
" 175 | ], 176 | "text/plain": [ 177 | " Id EmployeeName JobTitle \\\n", 178 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 179 | "1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", 180 | "2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", 181 | "3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", 182 | "4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", 183 | "\n", 184 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 185 | "0 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n", 186 | "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", 187 | "2 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n", 188 | "3 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n", 189 | "4 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n", 190 | "\n", 191 | " Year Notes Agency Status \n", 192 | "0 2011 NaN San Francisco NaN \n", 193 | "1 2011 NaN San Francisco NaN \n", 194 | "2 2011 NaN San Francisco NaN \n", 195 | "3 2011 NaN San Francisco NaN \n", 196 | "4 2011 NaN San Francisco NaN " 197 | ] 198 | }, 199 | "execution_count": 8, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "sal.head()" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "** Use the .info() method to find out how many entries there are.**" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 9, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "name": "stdout", 222 | "output_type": "stream", 223 | "text": [ 224 | "\n", 225 | "RangeIndex: 148654 entries, 0 to 148653\n", 226 | "Data columns (total 13 columns):\n", 227 | "Id 148654 non-null int64\n", 228 | "EmployeeName 148654 non-null object\n", 229 | "JobTitle 148654 non-null object\n", 230 | "BasePay 148045 non-null float64\n", 231 | "OvertimePay 148650 non-null float64\n", 232 | "OtherPay 148650 non-null float64\n", 233 | "Benefits 112491 non-null float64\n", 234 | "TotalPay 148654 non-null float64\n", 235 | "TotalPayBenefits 148654 non-null float64\n", 236 | "Year 148654 non-null int64\n", 237 | "Notes 0 non-null float64\n", 238 | "Agency 148654 non-null object\n", 239 | "Status 0 non-null float64\n", 240 | "dtypes: float64(8), int64(2), object(3)\n", 241 | "memory usage: 14.7+ MB\n" 242 | ] 243 | } 244 | ], 245 | "source": [ 246 | "sal.info() # 148654 Entries" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "**What is the average BasePay ?**" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 10, 259 | "metadata": {}, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/plain": [ 264 | "66325.44884050643" 265 | ] 266 | }, 267 | "execution_count": 10, 268 | "metadata": {}, 269 | "output_type": "execute_result" 270 | } 271 | ], 272 | "source": [ 273 | "sal['BasePay'].mean()" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "** What is the highest amount of OvertimePay in the dataset ? **" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 11, 286 | "metadata": {}, 287 | "outputs": [ 288 | { 289 | "data": { 290 | "text/plain": [ 291 | "245131.88" 292 | ] 293 | }, 294 | "execution_count": 11, 295 | "metadata": {}, 296 | "output_type": "execute_result" 297 | } 298 | ], 299 | "source": [ 300 | "sal['OvertimePay'].max()" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": {}, 306 | "source": [ 307 | "** What is the job title of JOSEPH DRISCOLL ? Note: Use all caps, otherwise you may get an answer that doesn't match up (there is also a lowercase Joseph Driscoll). **" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 12, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/plain": [ 318 | "24 CAPTAIN, FIRE SUPPRESSION\n", 319 | "Name: JobTitle, dtype: object" 320 | ] 321 | }, 322 | "execution_count": 12, 323 | "metadata": {}, 324 | "output_type": "execute_result" 325 | } 326 | ], 327 | "source": [ 328 | "sal[sal['EmployeeName']=='JOSEPH DRISCOLL']['JobTitle']" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "** How much does JOSEPH DRISCOLL make (including benefits)? **" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 13, 341 | "metadata": {}, 342 | "outputs": [ 343 | { 344 | "data": { 345 | "text/plain": [ 346 | "24 270324.91\n", 347 | "Name: TotalPayBenefits, dtype: float64" 348 | ] 349 | }, 350 | "execution_count": 13, 351 | "metadata": {}, 352 | "output_type": "execute_result" 353 | } 354 | ], 355 | "source": [ 356 | "sal[sal['EmployeeName']=='JOSEPH DRISCOLL']['TotalPayBenefits']" 357 | ] 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "metadata": {}, 362 | "source": [ 363 | "** What is the name of highest paid person (including benefits)?**" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 14, 369 | "metadata": {}, 370 | "outputs": [ 371 | { 372 | "data": { 373 | "text/html": [ 374 | "
\n", 375 | "\n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.0400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
\n", 413 | "
" 414 | ], 415 | "text/plain": [ 416 | " Id EmployeeName JobTitle \\\n", 417 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 418 | "\n", 419 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 420 | "0 167411.18 0.0 400184.25 NaN 567595.43 567595.43 \n", 421 | "\n", 422 | " Year Notes Agency Status \n", 423 | "0 2011 NaN San Francisco NaN " 424 | ] 425 | }, 426 | "execution_count": 14, 427 | "metadata": {}, 428 | "output_type": "execute_result" 429 | } 430 | ], 431 | "source": [ 432 | "sal[sal['TotalPayBenefits']== sal['TotalPayBenefits'].max()] #['EmployeeName']\n", 433 | "# or\n", 434 | "# sal.loc[sal['TotalPayBenefits'].idxmax()]" 435 | ] 436 | }, 437 | { 438 | "cell_type": "markdown", 439 | "metadata": {}, 440 | "source": [ 441 | "** What is the name of lowest paid person (including benefits)? Do you notice something strange about how much he or she is paid?**" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 15, 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "data": { 451 | "text/html": [ 452 | "
\n", 453 | "\n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
148653148654Joe LopezCounselor, Log Cabin Ranch0.00.0-618.130.0-618.13-618.132014NaNSan FranciscoNaN
\n", 491 | "
" 492 | ], 493 | "text/plain": [ 494 | " Id EmployeeName JobTitle BasePay OvertimePay \\\n", 495 | "148653 148654 Joe Lopez Counselor, Log Cabin Ranch 0.0 0.0 \n", 496 | "\n", 497 | " OtherPay Benefits TotalPay TotalPayBenefits Year Notes \\\n", 498 | "148653 -618.13 0.0 -618.13 -618.13 2014 NaN \n", 499 | "\n", 500 | " Agency Status \n", 501 | "148653 San Francisco NaN " 502 | ] 503 | }, 504 | "execution_count": 15, 505 | "metadata": {}, 506 | "output_type": "execute_result" 507 | } 508 | ], 509 | "source": [ 510 | "sal[sal['TotalPayBenefits']== sal['TotalPayBenefits'].min()] #['EmployeeName']\n", 511 | "# or\n", 512 | "# sal.loc[sal['TotalPayBenefits'].idxmax()]['EmployeeName']\n", 513 | "\n", 514 | "## ITS NEGATIVE!! VERY STRANGE" 515 | ] 516 | }, 517 | { 518 | "cell_type": "markdown", 519 | "metadata": {}, 520 | "source": [ 521 | "** What was the average (mean) BasePay of all employees per year? (2011-2014) ? **" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 16, 527 | "metadata": {}, 528 | "outputs": [ 529 | { 530 | "data": { 531 | "text/plain": [ 532 | "Year\n", 533 | "2011 63595.956517\n", 534 | "2012 65436.406857\n", 535 | "2013 69630.030216\n", 536 | "2014 66564.421924\n", 537 | "Name: BasePay, dtype: float64" 538 | ] 539 | }, 540 | "execution_count": 16, 541 | "metadata": {}, 542 | "output_type": "execute_result" 543 | } 544 | ], 545 | "source": [ 546 | "sal.groupby('Year').mean()['BasePay']" 547 | ] 548 | }, 549 | { 550 | "cell_type": "markdown", 551 | "metadata": {}, 552 | "source": [ 553 | "** How many unique job titles are there? **" 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": 17, 559 | "metadata": {}, 560 | "outputs": [ 561 | { 562 | "data": { 563 | "text/plain": [ 564 | "2159" 565 | ] 566 | }, 567 | "execution_count": 17, 568 | "metadata": {}, 569 | "output_type": "execute_result" 570 | } 571 | ], 572 | "source": [ 573 | "sal['JobTitle'].nunique()" 574 | ] 575 | }, 576 | { 577 | "cell_type": "markdown", 578 | "metadata": {}, 579 | "source": [ 580 | "** What are the top 5 most common jobs? **" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": 18, 586 | "metadata": {}, 587 | "outputs": [ 588 | { 589 | "data": { 590 | "text/plain": [ 591 | "Transit Operator 7036\n", 592 | "Special Nurse 4389\n", 593 | "Registered Nurse 3736\n", 594 | "Public Svc Aide-Public Works 2518\n", 595 | "Police Officer 3 2421\n", 596 | "Name: JobTitle, dtype: int64" 597 | ] 598 | }, 599 | "execution_count": 18, 600 | "metadata": {}, 601 | "output_type": "execute_result" 602 | } 603 | ], 604 | "source": [ 605 | "sal['JobTitle'].value_counts().head(5)" 606 | ] 607 | }, 608 | { 609 | "cell_type": "markdown", 610 | "metadata": {}, 611 | "source": [ 612 | "** How many Job Titles were represented by only one person in 2013? (e.g. Job Titles with only one occurence in 2013?) **" 613 | ] 614 | }, 615 | { 616 | "cell_type": "code", 617 | "execution_count": 19, 618 | "metadata": {}, 619 | "outputs": [ 620 | { 621 | "data": { 622 | "text/plain": [ 623 | "202" 624 | ] 625 | }, 626 | "execution_count": 19, 627 | "metadata": {}, 628 | "output_type": "execute_result" 629 | } 630 | ], 631 | "source": [ 632 | "sum(sal[sal['Year']==2013]['JobTitle'].value_counts() == 1) # pretty tricky way to do this..." 633 | ] 634 | }, 635 | { 636 | "cell_type": "markdown", 637 | "metadata": {}, 638 | "source": [ 639 | "** How many people have the word Chief in their job title? (This is pretty tricky) **" 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": 20, 645 | "metadata": { 646 | "collapsed": true 647 | }, 648 | "outputs": [], 649 | "source": [ 650 | "def chief_string(title):\n", 651 | " if 'chief' in title.lower():\n", 652 | " return True\n", 653 | " else:\n", 654 | " return False" 655 | ] 656 | }, 657 | { 658 | "cell_type": "code", 659 | "execution_count": 21, 660 | "metadata": {}, 661 | "outputs": [ 662 | { 663 | "data": { 664 | "text/plain": [ 665 | "477" 666 | ] 667 | }, 668 | "execution_count": 21, 669 | "metadata": {}, 670 | "output_type": "execute_result" 671 | } 672 | ], 673 | "source": [ 674 | "sum(sal['JobTitle'].apply(lambda x: chief_string(x)))" 675 | ] 676 | }, 677 | { 678 | "cell_type": "markdown", 679 | "metadata": {}, 680 | "source": [ 681 | "** Bonus: Is there a correlation between length of the Job Title string and Salary? **" 682 | ] 683 | }, 684 | { 685 | "cell_type": "code", 686 | "execution_count": 22, 687 | "metadata": {}, 688 | "outputs": [], 689 | "source": [ 690 | "sal['title_len'] = sal['JobTitle'].apply(len)" 691 | ] 692 | }, 693 | { 694 | "cell_type": "code", 695 | "execution_count": 23, 696 | "metadata": {}, 697 | "outputs": [ 698 | { 699 | "data": { 700 | "text/html": [ 701 | "
\n", 702 | "\n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | "
title_lenTotalPayBenefits
title_len1.000000-0.036878
TotalPayBenefits-0.0368781.000000
\n", 723 | "
" 724 | ], 725 | "text/plain": [ 726 | " title_len TotalPayBenefits\n", 727 | "title_len 1.000000 -0.036878\n", 728 | "TotalPayBenefits -0.036878 1.000000" 729 | ] 730 | }, 731 | "execution_count": 23, 732 | "metadata": {}, 733 | "output_type": "execute_result" 734 | } 735 | ], 736 | "source": [ 737 | "sal[['title_len','TotalPayBenefits']].corr() # No correlation." 738 | ] 739 | }, 740 | { 741 | "cell_type": "markdown", 742 | "metadata": {}, 743 | "source": [ 744 | "# Great Job!" 745 | ] 746 | } 747 | ], 748 | "metadata": { 749 | "kernelspec": { 750 | "display_name": "Python 3", 751 | "language": "python", 752 | "name": "python3" 753 | }, 754 | "language_info": { 755 | "codemirror_mode": { 756 | "name": "ipython", 757 | "version": 3 758 | }, 759 | "file_extension": ".py", 760 | "mimetype": "text/x-python", 761 | "name": "python", 762 | "nbconvert_exporter": "python", 763 | "pygments_lexer": "ipython3", 764 | "version": "3.7.4" 765 | } 766 | }, 767 | "nbformat": 4, 768 | "nbformat_minor": 1 769 | } 770 | -------------------------------------------------------------------------------- /04-Pandas-Exercises/.ipynb_checkpoints/03-Ecommerce Purchases Exercise -checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ecommerce Purchases Exercise\n", 8 | "\n", 9 | "In this Exercise you will be given some Fake Data about some purchases done through Amazon! Just go ahead and follow the directions and try your best to answer the questions and complete the tasks. Feel free to reference the solutions. Most of the tasks can be solved in different ways. For the most part, the questions get progressively harder.\n", 10 | "\n", 11 | "Please excuse anything that doesn't make \"Real-World\" sense in the dataframe, all the data is fake and made-up.\n", 12 | "\n", 13 | "Also note that all of these questions can be answered with one line of code.\n", 14 | "____\n", 15 | "** Import pandas and read in the Ecommerce Purchases csv file and set it to a DataFrame called ecom. **" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 84, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 86, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "**Check the head of the DataFrame.**" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 87, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/html": [ 51 | "
\n", 52 | "\n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | "
AddressLotAM or PMBrowser InfoCompanyCredit CardCC Exp DateCC Security CodeCC ProviderEmailJobIP AddressLanguagePurchase Price
016629 Pace Camp Apt. 448\\nAlexisborough, NE 77...46 inPMOpera/9.56.(X11; Linux x86_64; sl-SI) Presto/2...Martinez-Herman601192906112340602/20900JCB 16 digitpdunlap@yahoo.comScientist, product/process development149.146.147.205el98.14
19374 Jasmine Spurs Suite 508\\nSouth John, TN 8...28 rnPMOpera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr...Fletcher, Richards and Whitaker333775816964535611/18561Mastercardanthony41@reed.comDrilling engineer15.160.41.51fr70.73
2Unit 0065 Box 5052\\nDPO AP 2745094 vEPMMozilla/5.0 (compatible; MSIE 9.0; Windows NT ...Simpson, Williams and Pham67595766612508/19699JCB 16 digitamymiller@morales-harrison.comCustomer service manager132.207.160.22de0.95
37780 Julia Fords\\nNew Stacy, WA 4579836 vmPMMozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ...Williams, Marshall and Buchanan601157850443071002/24384Discoverbrent16@olson-robinson.infoDrilling engineer30.250.74.19es78.04
423012 Munoz Drive Suite 337\\nNew Cynthia, TX 5...20 IEAMOpera/9.58.(X11; Linux x86_64; it-IT) Presto/2...Brown, Watson and Andrews601145662320799810/25678Diners Club / Carte Blanchechristopherwright@gmail.comFine artist24.140.33.94es77.82
\n", 160 | "
" 161 | ], 162 | "text/plain": [ 163 | " Address Lot AM or PM \\\n", 164 | "0 16629 Pace Camp Apt. 448\\nAlexisborough, NE 77... 46 in PM \n", 165 | "1 9374 Jasmine Spurs Suite 508\\nSouth John, TN 8... 28 rn PM \n", 166 | "2 Unit 0065 Box 5052\\nDPO AP 27450 94 vE PM \n", 167 | "3 7780 Julia Fords\\nNew Stacy, WA 45798 36 vm PM \n", 168 | "4 23012 Munoz Drive Suite 337\\nNew Cynthia, TX 5... 20 IE AM \n", 169 | "\n", 170 | " Browser Info \\\n", 171 | "0 Opera/9.56.(X11; Linux x86_64; sl-SI) Presto/2... \n", 172 | "1 Opera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr... \n", 173 | "2 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ... \n", 174 | "3 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ... \n", 175 | "4 Opera/9.58.(X11; Linux x86_64; it-IT) Presto/2... \n", 176 | "\n", 177 | " Company Credit Card CC Exp Date \\\n", 178 | "0 Martinez-Herman 6011929061123406 02/20 \n", 179 | "1 Fletcher, Richards and Whitaker 3337758169645356 11/18 \n", 180 | "2 Simpson, Williams and Pham 675957666125 08/19 \n", 181 | "3 Williams, Marshall and Buchanan 6011578504430710 02/24 \n", 182 | "4 Brown, Watson and Andrews 6011456623207998 10/25 \n", 183 | "\n", 184 | " CC Security Code CC Provider \\\n", 185 | "0 900 JCB 16 digit \n", 186 | "1 561 Mastercard \n", 187 | "2 699 JCB 16 digit \n", 188 | "3 384 Discover \n", 189 | "4 678 Diners Club / Carte Blanche \n", 190 | "\n", 191 | " Email Job \\\n", 192 | "0 pdunlap@yahoo.com Scientist, product/process development \n", 193 | "1 anthony41@reed.com Drilling engineer \n", 194 | "2 amymiller@morales-harrison.com Customer service manager \n", 195 | "3 brent16@olson-robinson.info Drilling engineer \n", 196 | "4 christopherwright@gmail.com Fine artist \n", 197 | "\n", 198 | " IP Address Language Purchase Price \n", 199 | "0 149.146.147.205 el 98.14 \n", 200 | "1 15.160.41.51 fr 70.73 \n", 201 | "2 132.207.160.22 de 0.95 \n", 202 | "3 30.250.74.19 es 78.04 \n", 203 | "4 24.140.33.94 es 77.82 " 204 | ] 205 | }, 206 | "execution_count": 87, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "** How many rows and columns are there? **" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 88, 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "name": "stdout", 227 | "output_type": "stream", 228 | "text": [ 229 | "\n", 230 | "RangeIndex: 10000 entries, 0 to 9999\n", 231 | "Data columns (total 14 columns):\n", 232 | "Address 10000 non-null object\n", 233 | "Lot 10000 non-null object\n", 234 | "AM or PM 10000 non-null object\n", 235 | "Browser Info 10000 non-null object\n", 236 | "Company 10000 non-null object\n", 237 | "Credit Card 10000 non-null int64\n", 238 | "CC Exp Date 10000 non-null object\n", 239 | "CC Security Code 10000 non-null int64\n", 240 | "CC Provider 10000 non-null object\n", 241 | "Email 10000 non-null object\n", 242 | "Job 10000 non-null object\n", 243 | "IP Address 10000 non-null object\n", 244 | "Language 10000 non-null object\n", 245 | "Purchase Price 10000 non-null float64\n", 246 | "dtypes: float64(1), int64(2), object(11)\n", 247 | "memory usage: 1.1+ MB\n" 248 | ] 249 | } 250 | ], 251 | "source": [] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "** What is the average Purchase Price? **" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 90, 263 | "metadata": {}, 264 | "outputs": [ 265 | { 266 | "data": { 267 | "text/plain": [ 268 | "50.34730200000025" 269 | ] 270 | }, 271 | "execution_count": 90, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "** What were the highest and lowest purchase prices? **" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 92, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "data": { 292 | "text/plain": [ 293 | "99.989999999999995" 294 | ] 295 | }, 296 | "execution_count": 92, 297 | "metadata": {}, 298 | "output_type": "execute_result" 299 | } 300 | ], 301 | "source": [] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 93, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "0.0" 312 | ] 313 | }, 314 | "execution_count": 93, 315 | "metadata": {}, 316 | "output_type": "execute_result" 317 | } 318 | ], 319 | "source": [] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "** How many people have English 'en' as their Language of choice on the website? **" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 94, 331 | "metadata": {}, 332 | "outputs": [ 333 | { 334 | "data": { 335 | "text/plain": [ 336 | "Address 1098\n", 337 | "Lot 1098\n", 338 | "AM or PM 1098\n", 339 | "Browser Info 1098\n", 340 | "Company 1098\n", 341 | "Credit Card 1098\n", 342 | "CC Exp Date 1098\n", 343 | "CC Security Code 1098\n", 344 | "CC Provider 1098\n", 345 | "Email 1098\n", 346 | "Job 1098\n", 347 | "IP Address 1098\n", 348 | "Language 1098\n", 349 | "Purchase Price 1098\n", 350 | "dtype: int64" 351 | ] 352 | }, 353 | "execution_count": 94, 354 | "metadata": {}, 355 | "output_type": "execute_result" 356 | } 357 | ], 358 | "source": [] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "** How many people have the job title of \"Lawyer\" ? **\n" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 95, 370 | "metadata": {}, 371 | "outputs": [ 372 | { 373 | "name": "stdout", 374 | "output_type": "stream", 375 | "text": [ 376 | "\n", 377 | "Int64Index: 30 entries, 470 to 9979\n", 378 | "Data columns (total 14 columns):\n", 379 | "Address 30 non-null object\n", 380 | "Lot 30 non-null object\n", 381 | "AM or PM 30 non-null object\n", 382 | "Browser Info 30 non-null object\n", 383 | "Company 30 non-null object\n", 384 | "Credit Card 30 non-null int64\n", 385 | "CC Exp Date 30 non-null object\n", 386 | "CC Security Code 30 non-null int64\n", 387 | "CC Provider 30 non-null object\n", 388 | "Email 30 non-null object\n", 389 | "Job 30 non-null object\n", 390 | "IP Address 30 non-null object\n", 391 | "Language 30 non-null object\n", 392 | "Purchase Price 30 non-null float64\n", 393 | "dtypes: float64(1), int64(2), object(11)\n", 394 | "memory usage: 3.5+ KB\n" 395 | ] 396 | } 397 | ], 398 | "source": [] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "** How many people made the purchase during the AM and how many people made the purchase during PM ? **\n", 405 | "\n", 406 | "**(Hint: Check out [value_counts()](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.value_counts.html) ) **" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 96, 412 | "metadata": {}, 413 | "outputs": [ 414 | { 415 | "data": { 416 | "text/plain": [ 417 | "PM 5068\n", 418 | "AM 4932\n", 419 | "Name: AM or PM, dtype: int64" 420 | ] 421 | }, 422 | "execution_count": 96, 423 | "metadata": {}, 424 | "output_type": "execute_result" 425 | } 426 | ], 427 | "source": [] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": {}, 432 | "source": [ 433 | "** What are the 5 most common Job Titles? **" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 97, 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "data": { 443 | "text/plain": [ 444 | "Interior and spatial designer 31\n", 445 | "Lawyer 30\n", 446 | "Social researcher 28\n", 447 | "Purchasing manager 27\n", 448 | "Designer, jewellery 27\n", 449 | "Name: Job, dtype: int64" 450 | ] 451 | }, 452 | "execution_count": 97, 453 | "metadata": {}, 454 | "output_type": "execute_result" 455 | } 456 | ], 457 | "source": [] 458 | }, 459 | { 460 | "cell_type": "markdown", 461 | "metadata": {}, 462 | "source": [ 463 | "** Someone made a purchase that came from Lot: \"90 WT\" , what was the Purchase Price for this transaction? **" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": 99, 469 | "metadata": {}, 470 | "outputs": [ 471 | { 472 | "data": { 473 | "text/plain": [ 474 | "513 75.1\n", 475 | "Name: Purchase Price, dtype: float64" 476 | ] 477 | }, 478 | "execution_count": 99, 479 | "metadata": {}, 480 | "output_type": "execute_result" 481 | } 482 | ], 483 | "source": [] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "** What is the email of the person with the following Credit Card Number: 4926535242672853 **" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 100, 495 | "metadata": {}, 496 | "outputs": [ 497 | { 498 | "data": { 499 | "text/plain": [ 500 | "1234 bondellen@williams-garza.com\n", 501 | "Name: Email, dtype: object" 502 | ] 503 | }, 504 | "execution_count": 100, 505 | "metadata": {}, 506 | "output_type": "execute_result" 507 | } 508 | ], 509 | "source": [] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": {}, 514 | "source": [ 515 | "** How many people have American Express as their Credit Card Provider *and* made a purchase above $95 ?**" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 101, 521 | "metadata": {}, 522 | "outputs": [ 523 | { 524 | "data": { 525 | "text/plain": [ 526 | "Address 39\n", 527 | "Lot 39\n", 528 | "AM or PM 39\n", 529 | "Browser Info 39\n", 530 | "Company 39\n", 531 | "Credit Card 39\n", 532 | "CC Exp Date 39\n", 533 | "CC Security Code 39\n", 534 | "CC Provider 39\n", 535 | "Email 39\n", 536 | "Job 39\n", 537 | "IP Address 39\n", 538 | "Language 39\n", 539 | "Purchase Price 39\n", 540 | "dtype: int64" 541 | ] 542 | }, 543 | "execution_count": 101, 544 | "metadata": {}, 545 | "output_type": "execute_result" 546 | } 547 | ], 548 | "source": [] 549 | }, 550 | { 551 | "cell_type": "markdown", 552 | "metadata": {}, 553 | "source": [ 554 | "** Hard: How many people have a credit card that expires in 2025? **" 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": 102, 560 | "metadata": {}, 561 | "outputs": [ 562 | { 563 | "data": { 564 | "text/plain": [ 565 | "1033" 566 | ] 567 | }, 568 | "execution_count": 102, 569 | "metadata": {}, 570 | "output_type": "execute_result" 571 | } 572 | ], 573 | "source": [] 574 | }, 575 | { 576 | "cell_type": "markdown", 577 | "metadata": {}, 578 | "source": [ 579 | "** Hard: What are the top 5 most popular email providers/hosts (e.g. gmail.com, yahoo.com, etc...) **" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 56, 585 | "metadata": {}, 586 | "outputs": [ 587 | { 588 | "data": { 589 | "text/plain": [ 590 | "hotmail.com 1638\n", 591 | "yahoo.com 1616\n", 592 | "gmail.com 1605\n", 593 | "smith.com 42\n", 594 | "williams.com 37\n", 595 | "Name: Email, dtype: int64" 596 | ] 597 | }, 598 | "execution_count": 56, 599 | "metadata": {}, 600 | "output_type": "execute_result" 601 | } 602 | ], 603 | "source": [] 604 | }, 605 | { 606 | "cell_type": "markdown", 607 | "metadata": {}, 608 | "source": [ 609 | "# Great Job!" 610 | ] 611 | } 612 | ], 613 | "metadata": { 614 | "kernelspec": { 615 | "display_name": "Python 3", 616 | "language": "python", 617 | "name": "python3" 618 | }, 619 | "language_info": { 620 | "codemirror_mode": { 621 | "name": "ipython", 622 | "version": 3 623 | }, 624 | "file_extension": ".py", 625 | "mimetype": "text/x-python", 626 | "name": "python", 627 | "nbconvert_exporter": "python", 628 | "pygments_lexer": "ipython3", 629 | "version": "3.7.4" 630 | } 631 | }, 632 | "nbformat": 4, 633 | "nbformat_minor": 1 634 | } 635 | -------------------------------------------------------------------------------- /04-Pandas-Exercises/.ipynb_checkpoints/04-Ecommerce Purchases Exercise - Solutions-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___\n", 11 | "# Ecommerce Purchases Exercise - Solutions\n", 12 | "\n", 13 | "In this Exercise you will be given some Fake Data about some purchases done through Amazon! Just go ahead and follow the directions and try your best to answer the questions and complete the tasks. Feel free to reference the solutions. Most of the tasks can be solved in different ways. For the most part, the questions get progressively harder.\n", 14 | "\n", 15 | "Please excuse anything that doesn't make \"Real-World\" sense in the dataframe, all the data is fake and made-up.\n", 16 | "\n", 17 | "Also note that all of these questions can be answered with one line of code.\n", 18 | "____\n", 19 | "** Import pandas and read in the Ecommerce Purchases csv file and set it to a DataFrame called ecom. **" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 84, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 86, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "ecom = pd.read_csv('Ecommerce Purchases')" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "**Check the head of the DataFrame.**" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 87, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/html": [ 59 | "
\n", 60 | "\n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | "
AddressLotAM or PMBrowser InfoCompanyCredit CardCC Exp DateCC Security CodeCC ProviderEmailJobIP AddressLanguagePurchase Price
016629 Pace Camp Apt. 448\\nAlexisborough, NE 77...46 inPMOpera/9.56.(X11; Linux x86_64; sl-SI) Presto/2...Martinez-Herman601192906112340602/20900JCB 16 digitpdunlap@yahoo.comScientist, product/process development149.146.147.205el98.14
19374 Jasmine Spurs Suite 508\\nSouth John, TN 8...28 rnPMOpera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr...Fletcher, Richards and Whitaker333775816964535611/18561Mastercardanthony41@reed.comDrilling engineer15.160.41.51fr70.73
2Unit 0065 Box 5052\\nDPO AP 2745094 vEPMMozilla/5.0 (compatible; MSIE 9.0; Windows NT ...Simpson, Williams and Pham67595766612508/19699JCB 16 digitamymiller@morales-harrison.comCustomer service manager132.207.160.22de0.95
37780 Julia Fords\\nNew Stacy, WA 4579836 vmPMMozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ...Williams, Marshall and Buchanan601157850443071002/24384Discoverbrent16@olson-robinson.infoDrilling engineer30.250.74.19es78.04
423012 Munoz Drive Suite 337\\nNew Cynthia, TX 5...20 IEAMOpera/9.58.(X11; Linux x86_64; it-IT) Presto/2...Brown, Watson and Andrews601145662320799810/25678Diners Club / Carte Blanchechristopherwright@gmail.comFine artist24.140.33.94es77.82
\n", 168 | "
" 169 | ], 170 | "text/plain": [ 171 | " Address Lot AM or PM \\\n", 172 | "0 16629 Pace Camp Apt. 448\\nAlexisborough, NE 77... 46 in PM \n", 173 | "1 9374 Jasmine Spurs Suite 508\\nSouth John, TN 8... 28 rn PM \n", 174 | "2 Unit 0065 Box 5052\\nDPO AP 27450 94 vE PM \n", 175 | "3 7780 Julia Fords\\nNew Stacy, WA 45798 36 vm PM \n", 176 | "4 23012 Munoz Drive Suite 337\\nNew Cynthia, TX 5... 20 IE AM \n", 177 | "\n", 178 | " Browser Info \\\n", 179 | "0 Opera/9.56.(X11; Linux x86_64; sl-SI) Presto/2... \n", 180 | "1 Opera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr... \n", 181 | "2 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ... \n", 182 | "3 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ... \n", 183 | "4 Opera/9.58.(X11; Linux x86_64; it-IT) Presto/2... \n", 184 | "\n", 185 | " Company Credit Card CC Exp Date \\\n", 186 | "0 Martinez-Herman 6011929061123406 02/20 \n", 187 | "1 Fletcher, Richards and Whitaker 3337758169645356 11/18 \n", 188 | "2 Simpson, Williams and Pham 675957666125 08/19 \n", 189 | "3 Williams, Marshall and Buchanan 6011578504430710 02/24 \n", 190 | "4 Brown, Watson and Andrews 6011456623207998 10/25 \n", 191 | "\n", 192 | " CC Security Code CC Provider \\\n", 193 | "0 900 JCB 16 digit \n", 194 | "1 561 Mastercard \n", 195 | "2 699 JCB 16 digit \n", 196 | "3 384 Discover \n", 197 | "4 678 Diners Club / Carte Blanche \n", 198 | "\n", 199 | " Email Job \\\n", 200 | "0 pdunlap@yahoo.com Scientist, product/process development \n", 201 | "1 anthony41@reed.com Drilling engineer \n", 202 | "2 amymiller@morales-harrison.com Customer service manager \n", 203 | "3 brent16@olson-robinson.info Drilling engineer \n", 204 | "4 christopherwright@gmail.com Fine artist \n", 205 | "\n", 206 | " IP Address Language Purchase Price \n", 207 | "0 149.146.147.205 el 98.14 \n", 208 | "1 15.160.41.51 fr 70.73 \n", 209 | "2 132.207.160.22 de 0.95 \n", 210 | "3 30.250.74.19 es 78.04 \n", 211 | "4 24.140.33.94 es 77.82 " 212 | ] 213 | }, 214 | "execution_count": 87, 215 | "metadata": {}, 216 | "output_type": "execute_result" 217 | } 218 | ], 219 | "source": [ 220 | "ecom.head()" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "** How many rows and columns are there? **" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 88, 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "name": "stdout", 237 | "output_type": "stream", 238 | "text": [ 239 | "\n", 240 | "RangeIndex: 10000 entries, 0 to 9999\n", 241 | "Data columns (total 14 columns):\n", 242 | "Address 10000 non-null object\n", 243 | "Lot 10000 non-null object\n", 244 | "AM or PM 10000 non-null object\n", 245 | "Browser Info 10000 non-null object\n", 246 | "Company 10000 non-null object\n", 247 | "Credit Card 10000 non-null int64\n", 248 | "CC Exp Date 10000 non-null object\n", 249 | "CC Security Code 10000 non-null int64\n", 250 | "CC Provider 10000 non-null object\n", 251 | "Email 10000 non-null object\n", 252 | "Job 10000 non-null object\n", 253 | "IP Address 10000 non-null object\n", 254 | "Language 10000 non-null object\n", 255 | "Purchase Price 10000 non-null float64\n", 256 | "dtypes: float64(1), int64(2), object(11)\n", 257 | "memory usage: 1.1+ MB\n" 258 | ] 259 | } 260 | ], 261 | "source": [ 262 | "ecom.info()" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "** What is the average Purchase Price? **" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 90, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "data": { 279 | "text/plain": [ 280 | "50.34730200000025" 281 | ] 282 | }, 283 | "execution_count": 90, 284 | "metadata": {}, 285 | "output_type": "execute_result" 286 | } 287 | ], 288 | "source": [ 289 | "ecom['Purchase Price'].mean()" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "** What were the highest and lowest purchase prices? **" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 92, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "data": { 306 | "text/plain": [ 307 | "99.989999999999995" 308 | ] 309 | }, 310 | "execution_count": 92, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "ecom['Purchase Price'].max()" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 93, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "data": { 326 | "text/plain": [ 327 | "0.0" 328 | ] 329 | }, 330 | "execution_count": 93, 331 | "metadata": {}, 332 | "output_type": "execute_result" 333 | } 334 | ], 335 | "source": [ 336 | "ecom['Purchase Price'].min()" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "** How many people have English 'en' as their Language of choice on the website? **" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 94, 349 | "metadata": {}, 350 | "outputs": [ 351 | { 352 | "data": { 353 | "text/plain": [ 354 | "Address 1098\n", 355 | "Lot 1098\n", 356 | "AM or PM 1098\n", 357 | "Browser Info 1098\n", 358 | "Company 1098\n", 359 | "Credit Card 1098\n", 360 | "CC Exp Date 1098\n", 361 | "CC Security Code 1098\n", 362 | "CC Provider 1098\n", 363 | "Email 1098\n", 364 | "Job 1098\n", 365 | "IP Address 1098\n", 366 | "Language 1098\n", 367 | "Purchase Price 1098\n", 368 | "dtype: int64" 369 | ] 370 | }, 371 | "execution_count": 94, 372 | "metadata": {}, 373 | "output_type": "execute_result" 374 | } 375 | ], 376 | "source": [ 377 | "ecom[ecom['Language']=='en'].count()" 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": {}, 383 | "source": [ 384 | "** How many people have the job title of \"Lawyer\" ? **\n" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": 95, 390 | "metadata": {}, 391 | "outputs": [ 392 | { 393 | "name": "stdout", 394 | "output_type": "stream", 395 | "text": [ 396 | "\n", 397 | "Int64Index: 30 entries, 470 to 9979\n", 398 | "Data columns (total 14 columns):\n", 399 | "Address 30 non-null object\n", 400 | "Lot 30 non-null object\n", 401 | "AM or PM 30 non-null object\n", 402 | "Browser Info 30 non-null object\n", 403 | "Company 30 non-null object\n", 404 | "Credit Card 30 non-null int64\n", 405 | "CC Exp Date 30 non-null object\n", 406 | "CC Security Code 30 non-null int64\n", 407 | "CC Provider 30 non-null object\n", 408 | "Email 30 non-null object\n", 409 | "Job 30 non-null object\n", 410 | "IP Address 30 non-null object\n", 411 | "Language 30 non-null object\n", 412 | "Purchase Price 30 non-null float64\n", 413 | "dtypes: float64(1), int64(2), object(11)\n", 414 | "memory usage: 3.5+ KB\n" 415 | ] 416 | } 417 | ], 418 | "source": [ 419 | "ecom[ecom['Job'] == 'Lawyer'].info()" 420 | ] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": {}, 425 | "source": [ 426 | "** How many people made the purchase during the AM and how many people made the purchase during PM ? **\n", 427 | "\n", 428 | "**(Hint: Check out [value_counts()](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.value_counts.html) ) **" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 96, 434 | "metadata": {}, 435 | "outputs": [ 436 | { 437 | "data": { 438 | "text/plain": [ 439 | "PM 5068\n", 440 | "AM 4932\n", 441 | "Name: AM or PM, dtype: int64" 442 | ] 443 | }, 444 | "execution_count": 96, 445 | "metadata": {}, 446 | "output_type": "execute_result" 447 | } 448 | ], 449 | "source": [ 450 | "ecom['AM or PM'].value_counts()" 451 | ] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "metadata": {}, 456 | "source": [ 457 | "** What are the 5 most common Job Titles? **" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": 97, 463 | "metadata": {}, 464 | "outputs": [ 465 | { 466 | "data": { 467 | "text/plain": [ 468 | "Interior and spatial designer 31\n", 469 | "Lawyer 30\n", 470 | "Social researcher 28\n", 471 | "Purchasing manager 27\n", 472 | "Designer, jewellery 27\n", 473 | "Name: Job, dtype: int64" 474 | ] 475 | }, 476 | "execution_count": 97, 477 | "metadata": {}, 478 | "output_type": "execute_result" 479 | } 480 | ], 481 | "source": [ 482 | "ecom['Job'].value_counts().head(5)" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "** Someone made a purchase that came from Lot: \"90 WT\" , what was the Purchase Price for this transaction? **" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 99, 495 | "metadata": {}, 496 | "outputs": [ 497 | { 498 | "data": { 499 | "text/plain": [ 500 | "513 75.1\n", 501 | "Name: Purchase Price, dtype: float64" 502 | ] 503 | }, 504 | "execution_count": 99, 505 | "metadata": {}, 506 | "output_type": "execute_result" 507 | } 508 | ], 509 | "source": [ 510 | "ecom[ecom['Lot']=='90 WT']['Purchase Price']" 511 | ] 512 | }, 513 | { 514 | "cell_type": "markdown", 515 | "metadata": {}, 516 | "source": [ 517 | "** What is the email of the person with the following Credit Card Number: 4926535242672853 **" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": 100, 523 | "metadata": {}, 524 | "outputs": [ 525 | { 526 | "data": { 527 | "text/plain": [ 528 | "1234 bondellen@williams-garza.com\n", 529 | "Name: Email, dtype: object" 530 | ] 531 | }, 532 | "execution_count": 100, 533 | "metadata": {}, 534 | "output_type": "execute_result" 535 | } 536 | ], 537 | "source": [ 538 | "ecom[ecom[\"Credit Card\"] == 4926535242672853]['Email'] " 539 | ] 540 | }, 541 | { 542 | "cell_type": "markdown", 543 | "metadata": {}, 544 | "source": [ 545 | "** How many people have American Express as their Credit Card Provider *and* made a purchase above $95 ?**" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 101, 551 | "metadata": {}, 552 | "outputs": [ 553 | { 554 | "data": { 555 | "text/plain": [ 556 | "Address 39\n", 557 | "Lot 39\n", 558 | "AM or PM 39\n", 559 | "Browser Info 39\n", 560 | "Company 39\n", 561 | "Credit Card 39\n", 562 | "CC Exp Date 39\n", 563 | "CC Security Code 39\n", 564 | "CC Provider 39\n", 565 | "Email 39\n", 566 | "Job 39\n", 567 | "IP Address 39\n", 568 | "Language 39\n", 569 | "Purchase Price 39\n", 570 | "dtype: int64" 571 | ] 572 | }, 573 | "execution_count": 101, 574 | "metadata": {}, 575 | "output_type": "execute_result" 576 | } 577 | ], 578 | "source": [ 579 | "ecom[(ecom['CC Provider']=='American Express') & (ecom['Purchase Price']>95)].count()" 580 | ] 581 | }, 582 | { 583 | "cell_type": "markdown", 584 | "metadata": {}, 585 | "source": [ 586 | "** Hard: How many people have a credit card that expires in 2025? **" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": 102, 592 | "metadata": {}, 593 | "outputs": [ 594 | { 595 | "data": { 596 | "text/plain": [ 597 | "1033" 598 | ] 599 | }, 600 | "execution_count": 102, 601 | "metadata": {}, 602 | "output_type": "execute_result" 603 | } 604 | ], 605 | "source": [ 606 | "sum(ecom['CC Exp Date'].apply(lambda x: x[3:]) == '25')" 607 | ] 608 | }, 609 | { 610 | "cell_type": "markdown", 611 | "metadata": {}, 612 | "source": [ 613 | "** Hard: What are the top 5 most popular email providers/hosts (e.g. gmail.com, yahoo.com, etc...) **" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": 56, 619 | "metadata": {}, 620 | "outputs": [ 621 | { 622 | "data": { 623 | "text/plain": [ 624 | "hotmail.com 1638\n", 625 | "yahoo.com 1616\n", 626 | "gmail.com 1605\n", 627 | "smith.com 42\n", 628 | "williams.com 37\n", 629 | "Name: Email, dtype: int64" 630 | ] 631 | }, 632 | "execution_count": 56, 633 | "metadata": {}, 634 | "output_type": "execute_result" 635 | } 636 | ], 637 | "source": [ 638 | "ecom['Email'].apply(lambda x: x.split('@')[1]).value_counts().head(5)" 639 | ] 640 | }, 641 | { 642 | "cell_type": "markdown", 643 | "metadata": {}, 644 | "source": [ 645 | "# Great Job!" 646 | ] 647 | } 648 | ], 649 | "metadata": { 650 | "kernelspec": { 651 | "display_name": "Python 3", 652 | "language": "python", 653 | "name": "python3" 654 | }, 655 | "language_info": { 656 | "codemirror_mode": { 657 | "name": "ipython", 658 | "version": 3 659 | }, 660 | "file_extension": ".py", 661 | "mimetype": "text/x-python", 662 | "name": "python", 663 | "nbconvert_exporter": "python", 664 | "pygments_lexer": "ipython3", 665 | "version": "3.7.4" 666 | } 667 | }, 668 | "nbformat": 4, 669 | "nbformat_minor": 1 670 | } 671 | -------------------------------------------------------------------------------- /04-Pandas-Exercises/01-SF Salaries Exercise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# SF Salaries Exercise \n", 8 | "\n", 9 | "Welcome to a quick exercise for you to practice your pandas skills! We will be using the [SF Salaries Dataset](https://www.kaggle.com/kaggle/sf-salaries) from Kaggle! Just follow along and complete the tasks outlined in bold below. The tasks will get harder and harder as you go along." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "** Import pandas as pd.**" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import pandas as pd" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "** Read Salaries.csv as a dataframe called sal.**" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "\u001b[31m01-SF Salaries Exercise.ipynb\u001b[m\u001b[m* \u001b[31mEcommerce Purchases\u001b[m\u001b[m*\r\n", 45 | "\u001b[31m03-Ecommerce Purchases Exercise .ipynb\u001b[m\u001b[m* \u001b[31mSalaries.csv\u001b[m\u001b[m*\r\n" 46 | ] 47 | } 48 | ], 49 | "source": [ 50 | "ls" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 3, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "df = pd.read_csv('Salaries.csv')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "** Check the head of the DataFrame. **" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 8, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/html": [ 77 | "
\n", 78 | "\n", 91 | "\n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
56DAVID SULLIVANASSISTANT DEPUTY CHIEF II118602.008601.00189082.74NaN316285.74316285.742011NaNSan FranciscoNaN
\n", 209 | "
" 210 | ], 211 | "text/plain": [ 212 | " Id EmployeeName JobTitle \\\n", 213 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 214 | "1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", 215 | "2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", 216 | "3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", 217 | "4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", 218 | "5 6 DAVID SULLIVAN ASSISTANT DEPUTY CHIEF II \n", 219 | "\n", 220 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 221 | "0 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n", 222 | "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", 223 | "2 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n", 224 | "3 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n", 225 | "4 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n", 226 | "5 118602.00 8601.00 189082.74 NaN 316285.74 316285.74 \n", 227 | "\n", 228 | " Year Notes Agency Status \n", 229 | "0 2011 NaN San Francisco NaN \n", 230 | "1 2011 NaN San Francisco NaN \n", 231 | "2 2011 NaN San Francisco NaN \n", 232 | "3 2011 NaN San Francisco NaN \n", 233 | "4 2011 NaN San Francisco NaN \n", 234 | "5 2011 NaN San Francisco NaN " 235 | ] 236 | }, 237 | "execution_count": 8, 238 | "metadata": {}, 239 | "output_type": "execute_result" 240 | } 241 | ], 242 | "source": [ 243 | "df.head(6)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "** Use the .info() method to find out how many entries there are.**" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 9, 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "name": "stdout", 260 | "output_type": "stream", 261 | "text": [ 262 | "\n", 263 | "RangeIndex: 148654 entries, 0 to 148653\n", 264 | "Data columns (total 13 columns):\n", 265 | "Id 148654 non-null int64\n", 266 | "EmployeeName 148654 non-null object\n", 267 | "JobTitle 148654 non-null object\n", 268 | "BasePay 148045 non-null float64\n", 269 | "OvertimePay 148650 non-null float64\n", 270 | "OtherPay 148650 non-null float64\n", 271 | "Benefits 112491 non-null float64\n", 272 | "TotalPay 148654 non-null float64\n", 273 | "TotalPayBenefits 148654 non-null float64\n", 274 | "Year 148654 non-null int64\n", 275 | "Notes 0 non-null float64\n", 276 | "Agency 148654 non-null object\n", 277 | "Status 0 non-null float64\n", 278 | "dtypes: float64(8), int64(2), object(3)\n", 279 | "memory usage: 14.7+ MB\n" 280 | ] 281 | } 282 | ], 283 | "source": [ 284 | "df.info()" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 10, 290 | "metadata": {}, 291 | "outputs": [ 292 | { 293 | "data": { 294 | "text/plain": [ 295 | "'/Users/vipulgaur/Documents/Training/advanced-ML/25Jul/DataWrangling/04-Pandas-Exercises'" 296 | ] 297 | }, 298 | "execution_count": 10, 299 | "metadata": {}, 300 | "output_type": "execute_result" 301 | } 302 | ], 303 | "source": [ 304 | "pwd" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [ 311 | "**What is the average BasePay ?**" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 11, 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "data": { 321 | "text/plain": [ 322 | "Index(['Id', 'EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n", 323 | " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n", 324 | " 'Status'],\n", 325 | " dtype='object')" 326 | ] 327 | }, 328 | "execution_count": 11, 329 | "metadata": {}, 330 | "output_type": "execute_result" 331 | } 332 | ], 333 | "source": [ 334 | "df.columns" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 13, 340 | "metadata": {}, 341 | "outputs": [ 342 | { 343 | "data": { 344 | "text/plain": [ 345 | "66325.44884050643" 346 | ] 347 | }, 348 | "execution_count": 13, 349 | "metadata": {}, 350 | "output_type": "execute_result" 351 | } 352 | ], 353 | "source": [ 354 | "df['BasePay'].mean()" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "** What is the highest amount of OvertimePay in the dataset ? **" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 14, 367 | "metadata": {}, 368 | "outputs": [ 369 | { 370 | "data": { 371 | "text/plain": [ 372 | "245131.88" 373 | ] 374 | }, 375 | "execution_count": 14, 376 | "metadata": {}, 377 | "output_type": "execute_result" 378 | } 379 | ], 380 | "source": [ 381 | "df['OvertimePay'].max()" 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "** What is the job title of JOSEPH DRISCOLL ? Note: Use all caps, otherwise you may get an answer that doesn't match up (there is also a lowercase Joseph Driscoll). **" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": 17, 394 | "metadata": {}, 395 | "outputs": [ 396 | { 397 | "data": { 398 | "text/plain": [ 399 | "24 CAPTAIN, FIRE SUPPRESSION\n", 400 | "Name: JobTitle, dtype: object" 401 | ] 402 | }, 403 | "execution_count": 17, 404 | "metadata": {}, 405 | "output_type": "execute_result" 406 | } 407 | ], 408 | "source": [ 409 | "df[(df['EmployeeName']=='JOSEPH DRISCOLL')]['JobTitle']" 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": {}, 415 | "source": [ 416 | "** How much does JOSEPH DRISCOLL make (including benefits)? **" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 18, 422 | "metadata": {}, 423 | "outputs": [ 424 | { 425 | "data": { 426 | "text/plain": [ 427 | "24 270324.91\n", 428 | "Name: TotalPayBenefits, dtype: float64" 429 | ] 430 | }, 431 | "execution_count": 18, 432 | "metadata": {}, 433 | "output_type": "execute_result" 434 | } 435 | ], 436 | "source": [ 437 | "df[(df['EmployeeName']=='JOSEPH DRISCOLL')]['TotalPayBenefits']" 438 | ] 439 | }, 440 | { 441 | "cell_type": "markdown", 442 | "metadata": {}, 443 | "source": [ 444 | "** What is the name of highest paid person (including benefits)?**" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 19, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "data": { 454 | "text/html": [ 455 | "
\n", 456 | "\n", 469 | "\n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.0400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
\n", 507 | "
" 508 | ], 509 | "text/plain": [ 510 | " Id EmployeeName JobTitle \\\n", 511 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 512 | "\n", 513 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 514 | "0 167411.18 0.0 400184.25 NaN 567595.43 567595.43 \n", 515 | "\n", 516 | " Year Notes Agency Status \n", 517 | "0 2011 NaN San Francisco NaN " 518 | ] 519 | }, 520 | "execution_count": 19, 521 | "metadata": {}, 522 | "output_type": "execute_result" 523 | } 524 | ], 525 | "source": [ 526 | "df[df['TotalPayBenefits']==df['TotalPayBenefits'].max()]" 527 | ] 528 | }, 529 | { 530 | "cell_type": "markdown", 531 | "metadata": {}, 532 | "source": [ 533 | "** What is the name of lowest paid person (including benefits)? Do you notice something strange about how much he or she is paid?**" 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": 20, 539 | "metadata": {}, 540 | "outputs": [ 541 | { 542 | "data": { 543 | "text/html": [ 544 | "
\n", 545 | "\n", 558 | "\n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
148653148654Joe LopezCounselor, Log Cabin Ranch0.00.0-618.130.0-618.13-618.132014NaNSan FranciscoNaN
\n", 596 | "
" 597 | ], 598 | "text/plain": [ 599 | " Id EmployeeName JobTitle BasePay OvertimePay \\\n", 600 | "148653 148654 Joe Lopez Counselor, Log Cabin Ranch 0.0 0.0 \n", 601 | "\n", 602 | " OtherPay Benefits TotalPay TotalPayBenefits Year Notes \\\n", 603 | "148653 -618.13 0.0 -618.13 -618.13 2014 NaN \n", 604 | "\n", 605 | " Agency Status \n", 606 | "148653 San Francisco NaN " 607 | ] 608 | }, 609 | "execution_count": 20, 610 | "metadata": {}, 611 | "output_type": "execute_result" 612 | } 613 | ], 614 | "source": [ 615 | "df[df['TotalPayBenefits']==df['TotalPayBenefits'].min()]" 616 | ] 617 | }, 618 | { 619 | "cell_type": "markdown", 620 | "metadata": {}, 621 | "source": [ 622 | "** What was the average (mean) BasePay of all employees per year? (2011-2014) ? **" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": 23, 628 | "metadata": {}, 629 | "outputs": [ 630 | { 631 | "data": { 632 | "text/plain": [ 633 | "Year\n", 634 | "2011 63595.956517\n", 635 | "2012 65436.406857\n", 636 | "2013 69630.030216\n", 637 | "2014 66564.421924\n", 638 | "Name: BasePay, dtype: float64" 639 | ] 640 | }, 641 | "execution_count": 23, 642 | "metadata": {}, 643 | "output_type": "execute_result" 644 | } 645 | ], 646 | "source": [ 647 | "df.groupby('Year').mean()['BasePay']" 648 | ] 649 | }, 650 | { 651 | "cell_type": "markdown", 652 | "metadata": {}, 653 | "source": [ 654 | "** How many unique job titles are there? **" 655 | ] 656 | }, 657 | { 658 | "cell_type": "code", 659 | "execution_count": 24, 660 | "metadata": {}, 661 | "outputs": [ 662 | { 663 | "data": { 664 | "text/plain": [ 665 | "2159" 666 | ] 667 | }, 668 | "execution_count": 24, 669 | "metadata": {}, 670 | "output_type": "execute_result" 671 | } 672 | ], 673 | "source": [ 674 | "df['JobTitle'].nunique()" 675 | ] 676 | }, 677 | { 678 | "cell_type": "markdown", 679 | "metadata": {}, 680 | "source": [ 681 | "** What are the top 5 most common jobs? **" 682 | ] 683 | }, 684 | { 685 | "cell_type": "code", 686 | "execution_count": 26, 687 | "metadata": {}, 688 | "outputs": [ 689 | { 690 | "data": { 691 | "text/plain": [ 692 | "Transit Operator 7036\n", 693 | "Special Nurse 4389\n", 694 | "Registered Nurse 3736\n", 695 | "Public Svc Aide-Public Works 2518\n", 696 | "Police Officer 3 2421\n", 697 | "Name: JobTitle, dtype: int64" 698 | ] 699 | }, 700 | "execution_count": 26, 701 | "metadata": {}, 702 | "output_type": "execute_result" 703 | } 704 | ], 705 | "source": [ 706 | "df['JobTitle'].value_counts().head()" 707 | ] 708 | }, 709 | { 710 | "cell_type": "markdown", 711 | "metadata": {}, 712 | "source": [ 713 | "** How many Job Titles were represented by only one person in 2013? (e.g. Job Titles with only one occurence in 2013?) **" 714 | ] 715 | }, 716 | { 717 | "cell_type": "code", 718 | "execution_count": 38, 719 | "metadata": {}, 720 | "outputs": [ 721 | { 722 | "data": { 723 | "text/plain": [ 724 | "202" 725 | ] 726 | }, 727 | "execution_count": 38, 728 | "metadata": {}, 729 | "output_type": "execute_result" 730 | } 731 | ], 732 | "source": [ 733 | "sum(df[df['Year']==2013]['JobTitle'].value_counts()==1)" 734 | ] 735 | }, 736 | { 737 | "cell_type": "code", 738 | "execution_count": 33, 739 | "metadata": {}, 740 | "outputs": [ 741 | { 742 | "data": { 743 | "text/plain": [ 744 | "202" 745 | ] 746 | }, 747 | "execution_count": 33, 748 | "metadata": {}, 749 | "output_type": "execute_result" 750 | } 751 | ], 752 | "source": [ 753 | "sum(df[df['Year']==2013]['JobTitle'].value_counts()==1)" 754 | ] 755 | }, 756 | { 757 | "cell_type": "markdown", 758 | "metadata": {}, 759 | "source": [ 760 | "** How many people have the word Chief in their job title? (This is pretty tricky) **" 761 | ] 762 | }, 763 | { 764 | "cell_type": "code", 765 | "execution_count": 42, 766 | "metadata": {}, 767 | "outputs": [ 768 | { 769 | "data": { 770 | "text/plain": [ 771 | "423" 772 | ] 773 | }, 774 | "execution_count": 42, 775 | "metadata": {}, 776 | "output_type": "execute_result" 777 | } 778 | ], 779 | "source": [ 780 | "sum(df['JobTitle'].str.contains(\"Chief\"))" 781 | ] 782 | }, 783 | { 784 | "cell_type": "code", 785 | "execution_count": 41, 786 | "metadata": {}, 787 | "outputs": [ 788 | { 789 | "data": { 790 | "text/plain": [ 791 | "627" 792 | ] 793 | }, 794 | "execution_count": 41, 795 | "metadata": {}, 796 | "output_type": "execute_result" 797 | } 798 | ], 799 | "source": [ 800 | "len(list(filter(lambda x: 'chief' in x.lower(), df['JobTitle'])))" 801 | ] 802 | }, 803 | { 804 | "cell_type": "code", 805 | "execution_count": 50, 806 | "metadata": {}, 807 | "outputs": [ 808 | { 809 | "data": { 810 | "text/plain": [ 811 | "True" 812 | ] 813 | }, 814 | "execution_count": 50, 815 | "metadata": {}, 816 | "output_type": "execute_result" 817 | } 818 | ], 819 | "source": [ 820 | "title = \"CChief Fire Assitant\"\n", 821 | "\n", 822 | "def ch(title):\n", 823 | " if 'chief' in title.lower():\n", 824 | " return True\n", 825 | " else:\n", 826 | " return False\n", 827 | " \n", 828 | "ch(title)" 829 | ] 830 | }, 831 | { 832 | "cell_type": "code", 833 | "execution_count": 48, 834 | "metadata": {}, 835 | "outputs": [ 836 | { 837 | "data": { 838 | "text/plain": [ 839 | "627" 840 | ] 841 | }, 842 | "execution_count": 48, 843 | "metadata": {}, 844 | "output_type": "execute_result" 845 | } 846 | ], 847 | "source": [ 848 | "sum(df['JobTitle'].apply(lambda x: ch(x)))" 849 | ] 850 | }, 851 | { 852 | "cell_type": "markdown", 853 | "metadata": {}, 854 | "source": [ 855 | "### cchief must return false" 856 | ] 857 | }, 858 | { 859 | "cell_type": "markdown", 860 | "metadata": {}, 861 | "source": [ 862 | "** Bonus: Is there a correlation between length of the Job Title string and Salary? **" 863 | ] 864 | }, 865 | { 866 | "cell_type": "code", 867 | "execution_count": 51, 868 | "metadata": {}, 869 | "outputs": [ 870 | { 871 | "data": { 872 | "text/html": [ 873 | "
\n", 874 | "\n", 887 | "\n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatuslength_title
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN46
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN31
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN31
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN36
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN44
\n", 995 | "
" 996 | ], 997 | "text/plain": [ 998 | " Id EmployeeName JobTitle \\\n", 999 | "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", 1000 | "1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", 1001 | "2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", 1002 | "3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", 1003 | "4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", 1004 | "\n", 1005 | " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", 1006 | "0 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n", 1007 | "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", 1008 | "2 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n", 1009 | "3 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n", 1010 | "4 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n", 1011 | "\n", 1012 | " Year Notes Agency Status length_title \n", 1013 | "0 2011 NaN San Francisco NaN 46 \n", 1014 | "1 2011 NaN San Francisco NaN 31 \n", 1015 | "2 2011 NaN San Francisco NaN 31 \n", 1016 | "3 2011 NaN San Francisco NaN 36 \n", 1017 | "4 2011 NaN San Francisco NaN 44 " 1018 | ] 1019 | }, 1020 | "execution_count": 51, 1021 | "metadata": {}, 1022 | "output_type": "execute_result" 1023 | } 1024 | ], 1025 | "source": [ 1026 | "df['length_title']=df['JobTitle'].apply(len)\n", 1027 | "df.head()" 1028 | ] 1029 | }, 1030 | { 1031 | "cell_type": "code", 1032 | "execution_count": 52, 1033 | "metadata": {}, 1034 | "outputs": [ 1035 | { 1036 | "data": { 1037 | "text/html": [ 1038 | "
\n", 1039 | "\n", 1052 | "\n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | "
length_titleTotalPayBenefits
length_title1.000000-0.036878
TotalPayBenefits-0.0368781.000000
\n", 1073 | "
" 1074 | ], 1075 | "text/plain": [ 1076 | " length_title TotalPayBenefits\n", 1077 | "length_title 1.000000 -0.036878\n", 1078 | "TotalPayBenefits -0.036878 1.000000" 1079 | ] 1080 | }, 1081 | "execution_count": 52, 1082 | "metadata": {}, 1083 | "output_type": "execute_result" 1084 | } 1085 | ], 1086 | "source": [ 1087 | "df[['length_title','TotalPayBenefits']].corr()" 1088 | ] 1089 | }, 1090 | { 1091 | "cell_type": "markdown", 1092 | "metadata": {}, 1093 | "source": [ 1094 | "# Great Job!" 1095 | ] 1096 | } 1097 | ], 1098 | "metadata": { 1099 | "kernelspec": { 1100 | "display_name": "Python 3", 1101 | "language": "python", 1102 | "name": "python3" 1103 | }, 1104 | "language_info": { 1105 | "codemirror_mode": { 1106 | "name": "ipython", 1107 | "version": 3 1108 | }, 1109 | "file_extension": ".py", 1110 | "mimetype": "text/x-python", 1111 | "name": "python", 1112 | "nbconvert_exporter": "python", 1113 | "pygments_lexer": "ipython3", 1114 | "version": "3.7.4" 1115 | } 1116 | }, 1117 | "nbformat": 4, 1118 | "nbformat_minor": 1 1119 | } 1120 | -------------------------------------------------------------------------------- /04-Pandas-Exercises/03-Ecommerce Purchases Exercise .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Ecommerce Purchases Exercise\n", 8 | "\n", 9 | "In this Exercise you will be given some Fake Data about some purchases done through Amazon! Just go ahead and follow the directions and try your best to answer the questions and complete the tasks. Feel free to reference the solutions. Most of the tasks can be solved in different ways. For the most part, the questions get progressively harder.\n", 10 | "\n", 11 | "Please excuse anything that doesn't make \"Real-World\" sense in the dataframe, all the data is fake and made-up.\n", 12 | "\n", 13 | "Also note that all of these questions can be answered with one line of code.\n", 14 | "____\n", 15 | "** Import pandas and read in the Ecommerce Purchases csv file and set it to a DataFrame called ecom. **" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 84, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 86, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "**Check the head of the DataFrame.**" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 87, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/html": [ 51 | "
\n", 52 | "\n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | "
AddressLotAM or PMBrowser InfoCompanyCredit CardCC Exp DateCC Security CodeCC ProviderEmailJobIP AddressLanguagePurchase Price
016629 Pace Camp Apt. 448\\nAlexisborough, NE 77...46 inPMOpera/9.56.(X11; Linux x86_64; sl-SI) Presto/2...Martinez-Herman601192906112340602/20900JCB 16 digitpdunlap@yahoo.comScientist, product/process development149.146.147.205el98.14
19374 Jasmine Spurs Suite 508\\nSouth John, TN 8...28 rnPMOpera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr...Fletcher, Richards and Whitaker333775816964535611/18561Mastercardanthony41@reed.comDrilling engineer15.160.41.51fr70.73
2Unit 0065 Box 5052\\nDPO AP 2745094 vEPMMozilla/5.0 (compatible; MSIE 9.0; Windows NT ...Simpson, Williams and Pham67595766612508/19699JCB 16 digitamymiller@morales-harrison.comCustomer service manager132.207.160.22de0.95
37780 Julia Fords\\nNew Stacy, WA 4579836 vmPMMozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ...Williams, Marshall and Buchanan601157850443071002/24384Discoverbrent16@olson-robinson.infoDrilling engineer30.250.74.19es78.04
423012 Munoz Drive Suite 337\\nNew Cynthia, TX 5...20 IEAMOpera/9.58.(X11; Linux x86_64; it-IT) Presto/2...Brown, Watson and Andrews601145662320799810/25678Diners Club / Carte Blanchechristopherwright@gmail.comFine artist24.140.33.94es77.82
\n", 160 | "
" 161 | ], 162 | "text/plain": [ 163 | " Address Lot AM or PM \\\n", 164 | "0 16629 Pace Camp Apt. 448\\nAlexisborough, NE 77... 46 in PM \n", 165 | "1 9374 Jasmine Spurs Suite 508\\nSouth John, TN 8... 28 rn PM \n", 166 | "2 Unit 0065 Box 5052\\nDPO AP 27450 94 vE PM \n", 167 | "3 7780 Julia Fords\\nNew Stacy, WA 45798 36 vm PM \n", 168 | "4 23012 Munoz Drive Suite 337\\nNew Cynthia, TX 5... 20 IE AM \n", 169 | "\n", 170 | " Browser Info \\\n", 171 | "0 Opera/9.56.(X11; Linux x86_64; sl-SI) Presto/2... \n", 172 | "1 Opera/8.93.(Windows 98; Win 9x 4.90; en-US) Pr... \n", 173 | "2 Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ... \n", 174 | "3 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0 ... \n", 175 | "4 Opera/9.58.(X11; Linux x86_64; it-IT) Presto/2... \n", 176 | "\n", 177 | " Company Credit Card CC Exp Date \\\n", 178 | "0 Martinez-Herman 6011929061123406 02/20 \n", 179 | "1 Fletcher, Richards and Whitaker 3337758169645356 11/18 \n", 180 | "2 Simpson, Williams and Pham 675957666125 08/19 \n", 181 | "3 Williams, Marshall and Buchanan 6011578504430710 02/24 \n", 182 | "4 Brown, Watson and Andrews 6011456623207998 10/25 \n", 183 | "\n", 184 | " CC Security Code CC Provider \\\n", 185 | "0 900 JCB 16 digit \n", 186 | "1 561 Mastercard \n", 187 | "2 699 JCB 16 digit \n", 188 | "3 384 Discover \n", 189 | "4 678 Diners Club / Carte Blanche \n", 190 | "\n", 191 | " Email Job \\\n", 192 | "0 pdunlap@yahoo.com Scientist, product/process development \n", 193 | "1 anthony41@reed.com Drilling engineer \n", 194 | "2 amymiller@morales-harrison.com Customer service manager \n", 195 | "3 brent16@olson-robinson.info Drilling engineer \n", 196 | "4 christopherwright@gmail.com Fine artist \n", 197 | "\n", 198 | " IP Address Language Purchase Price \n", 199 | "0 149.146.147.205 el 98.14 \n", 200 | "1 15.160.41.51 fr 70.73 \n", 201 | "2 132.207.160.22 de 0.95 \n", 202 | "3 30.250.74.19 es 78.04 \n", 203 | "4 24.140.33.94 es 77.82 " 204 | ] 205 | }, 206 | "execution_count": 87, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "** How many rows and columns are there? **" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 88, 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "name": "stdout", 227 | "output_type": "stream", 228 | "text": [ 229 | "\n", 230 | "RangeIndex: 10000 entries, 0 to 9999\n", 231 | "Data columns (total 14 columns):\n", 232 | "Address 10000 non-null object\n", 233 | "Lot 10000 non-null object\n", 234 | "AM or PM 10000 non-null object\n", 235 | "Browser Info 10000 non-null object\n", 236 | "Company 10000 non-null object\n", 237 | "Credit Card 10000 non-null int64\n", 238 | "CC Exp Date 10000 non-null object\n", 239 | "CC Security Code 10000 non-null int64\n", 240 | "CC Provider 10000 non-null object\n", 241 | "Email 10000 non-null object\n", 242 | "Job 10000 non-null object\n", 243 | "IP Address 10000 non-null object\n", 244 | "Language 10000 non-null object\n", 245 | "Purchase Price 10000 non-null float64\n", 246 | "dtypes: float64(1), int64(2), object(11)\n", 247 | "memory usage: 1.1+ MB\n" 248 | ] 249 | } 250 | ], 251 | "source": [] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "** What is the average Purchase Price? **" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 90, 263 | "metadata": {}, 264 | "outputs": [ 265 | { 266 | "data": { 267 | "text/plain": [ 268 | "50.34730200000025" 269 | ] 270 | }, 271 | "execution_count": 90, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "** What were the highest and lowest purchase prices? **" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 92, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "data": { 292 | "text/plain": [ 293 | "99.989999999999995" 294 | ] 295 | }, 296 | "execution_count": 92, 297 | "metadata": {}, 298 | "output_type": "execute_result" 299 | } 300 | ], 301 | "source": [] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 93, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "0.0" 312 | ] 313 | }, 314 | "execution_count": 93, 315 | "metadata": {}, 316 | "output_type": "execute_result" 317 | } 318 | ], 319 | "source": [] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "** How many people have English 'en' as their Language of choice on the website? **" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 94, 331 | "metadata": {}, 332 | "outputs": [ 333 | { 334 | "data": { 335 | "text/plain": [ 336 | "Address 1098\n", 337 | "Lot 1098\n", 338 | "AM or PM 1098\n", 339 | "Browser Info 1098\n", 340 | "Company 1098\n", 341 | "Credit Card 1098\n", 342 | "CC Exp Date 1098\n", 343 | "CC Security Code 1098\n", 344 | "CC Provider 1098\n", 345 | "Email 1098\n", 346 | "Job 1098\n", 347 | "IP Address 1098\n", 348 | "Language 1098\n", 349 | "Purchase Price 1098\n", 350 | "dtype: int64" 351 | ] 352 | }, 353 | "execution_count": 94, 354 | "metadata": {}, 355 | "output_type": "execute_result" 356 | } 357 | ], 358 | "source": [] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "** How many people have the job title of \"Lawyer\" ? **\n" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 95, 370 | "metadata": {}, 371 | "outputs": [ 372 | { 373 | "name": "stdout", 374 | "output_type": "stream", 375 | "text": [ 376 | "\n", 377 | "Int64Index: 30 entries, 470 to 9979\n", 378 | "Data columns (total 14 columns):\n", 379 | "Address 30 non-null object\n", 380 | "Lot 30 non-null object\n", 381 | "AM or PM 30 non-null object\n", 382 | "Browser Info 30 non-null object\n", 383 | "Company 30 non-null object\n", 384 | "Credit Card 30 non-null int64\n", 385 | "CC Exp Date 30 non-null object\n", 386 | "CC Security Code 30 non-null int64\n", 387 | "CC Provider 30 non-null object\n", 388 | "Email 30 non-null object\n", 389 | "Job 30 non-null object\n", 390 | "IP Address 30 non-null object\n", 391 | "Language 30 non-null object\n", 392 | "Purchase Price 30 non-null float64\n", 393 | "dtypes: float64(1), int64(2), object(11)\n", 394 | "memory usage: 3.5+ KB\n" 395 | ] 396 | } 397 | ], 398 | "source": [] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "** How many people made the purchase during the AM and how many people made the purchase during PM ? **\n", 405 | "\n", 406 | "**(Hint: Check out [value_counts()](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.value_counts.html) ) **" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 96, 412 | "metadata": {}, 413 | "outputs": [ 414 | { 415 | "data": { 416 | "text/plain": [ 417 | "PM 5068\n", 418 | "AM 4932\n", 419 | "Name: AM or PM, dtype: int64" 420 | ] 421 | }, 422 | "execution_count": 96, 423 | "metadata": {}, 424 | "output_type": "execute_result" 425 | } 426 | ], 427 | "source": [] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": {}, 432 | "source": [ 433 | "** What are the 5 most common Job Titles? **" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 97, 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "data": { 443 | "text/plain": [ 444 | "Interior and spatial designer 31\n", 445 | "Lawyer 30\n", 446 | "Social researcher 28\n", 447 | "Purchasing manager 27\n", 448 | "Designer, jewellery 27\n", 449 | "Name: Job, dtype: int64" 450 | ] 451 | }, 452 | "execution_count": 97, 453 | "metadata": {}, 454 | "output_type": "execute_result" 455 | } 456 | ], 457 | "source": [] 458 | }, 459 | { 460 | "cell_type": "markdown", 461 | "metadata": {}, 462 | "source": [ 463 | "** Someone made a purchase that came from Lot: \"90 WT\" , what was the Purchase Price for this transaction? **" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": 99, 469 | "metadata": {}, 470 | "outputs": [ 471 | { 472 | "data": { 473 | "text/plain": [ 474 | "513 75.1\n", 475 | "Name: Purchase Price, dtype: float64" 476 | ] 477 | }, 478 | "execution_count": 99, 479 | "metadata": {}, 480 | "output_type": "execute_result" 481 | } 482 | ], 483 | "source": [] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "** What is the email of the person with the following Credit Card Number: 4926535242672853 **" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 100, 495 | "metadata": {}, 496 | "outputs": [ 497 | { 498 | "data": { 499 | "text/plain": [ 500 | "1234 bondellen@williams-garza.com\n", 501 | "Name: Email, dtype: object" 502 | ] 503 | }, 504 | "execution_count": 100, 505 | "metadata": {}, 506 | "output_type": "execute_result" 507 | } 508 | ], 509 | "source": [] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": {}, 514 | "source": [ 515 | "** How many people have American Express as their Credit Card Provider *and* made a purchase above $95 ?**" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 101, 521 | "metadata": {}, 522 | "outputs": [ 523 | { 524 | "data": { 525 | "text/plain": [ 526 | "Address 39\n", 527 | "Lot 39\n", 528 | "AM or PM 39\n", 529 | "Browser Info 39\n", 530 | "Company 39\n", 531 | "Credit Card 39\n", 532 | "CC Exp Date 39\n", 533 | "CC Security Code 39\n", 534 | "CC Provider 39\n", 535 | "Email 39\n", 536 | "Job 39\n", 537 | "IP Address 39\n", 538 | "Language 39\n", 539 | "Purchase Price 39\n", 540 | "dtype: int64" 541 | ] 542 | }, 543 | "execution_count": 101, 544 | "metadata": {}, 545 | "output_type": "execute_result" 546 | } 547 | ], 548 | "source": [] 549 | }, 550 | { 551 | "cell_type": "markdown", 552 | "metadata": {}, 553 | "source": [ 554 | "** Hard: How many people have a credit card that expires in 2025? **" 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": 102, 560 | "metadata": {}, 561 | "outputs": [ 562 | { 563 | "data": { 564 | "text/plain": [ 565 | "1033" 566 | ] 567 | }, 568 | "execution_count": 102, 569 | "metadata": {}, 570 | "output_type": "execute_result" 571 | } 572 | ], 573 | "source": [] 574 | }, 575 | { 576 | "cell_type": "markdown", 577 | "metadata": {}, 578 | "source": [ 579 | "** Hard: What are the top 5 most popular email providers/hosts (e.g. gmail.com, yahoo.com, etc...) **" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 56, 585 | "metadata": {}, 586 | "outputs": [ 587 | { 588 | "data": { 589 | "text/plain": [ 590 | "hotmail.com 1638\n", 591 | "yahoo.com 1616\n", 592 | "gmail.com 1605\n", 593 | "smith.com 42\n", 594 | "williams.com 37\n", 595 | "Name: Email, dtype: int64" 596 | ] 597 | }, 598 | "execution_count": 56, 599 | "metadata": {}, 600 | "output_type": "execute_result" 601 | } 602 | ], 603 | "source": [] 604 | }, 605 | { 606 | "cell_type": "markdown", 607 | "metadata": {}, 608 | "source": [ 609 | "# Great Job!" 610 | ] 611 | } 612 | ], 613 | "metadata": { 614 | "kernelspec": { 615 | "display_name": "Python 3", 616 | "language": "python", 617 | "name": "python3" 618 | }, 619 | "language_info": { 620 | "codemirror_mode": { 621 | "name": "ipython", 622 | "version": 3 623 | }, 624 | "file_extension": ".py", 625 | "mimetype": "text/x-python", 626 | "name": "python", 627 | "nbconvert_exporter": "python", 628 | "pygments_lexer": "ipython3", 629 | "version": "3.7.4" 630 | } 631 | }, 632 | "nbformat": 4, 633 | "nbformat_minor": 1 634 | } 635 | -------------------------------------------------------------------------------- /Numpy - Indexing and Selection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 12 | ] 13 | }, 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "import numpy as np\n", 21 | "\n", 22 | "arr = np.arange(1,11)\n", 23 | "arr" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### Bracket indexing and selection" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "10" 42 | ] 43 | }, 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "arr[9]" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "### Slicing array from 1 to 5" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 3, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "array([2, 3, 4, 5])" 69 | ] 70 | }, 71 | "execution_count": 3, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "arr[1:5]" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 4, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "array([[ 0, 1, 2, 3, 4, 5],\n", 89 | " [ 6, 7, 8, 9, 10, 11],\n", 90 | " [12, 13, 14, 15, 16, 17],\n", 91 | " [18, 19, 20, 21, 22, 23],\n", 92 | " [24, 25, 26, 27, 28, 29],\n", 93 | " [30, 31, 32, 33, 34, 35]])" 94 | ] 95 | }, 96 | "execution_count": 4, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [ 102 | "mat = np.arange(36).reshape(6,6)\n", 103 | "mat" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 5, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": [ 114 | "array([0, 1, 2, 3, 4, 5])" 115 | ] 116 | }, 117 | "execution_count": 5, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "mat[0]" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "[[1, 2],\n", 131 | " [7, 8]]" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 11, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "data": { 141 | "text/plain": [ 142 | "array([[1, 2],\n", 143 | " [7, 8]])" 144 | ] 145 | }, 146 | "execution_count": 11, 147 | "metadata": {}, 148 | "output_type": "execute_result" 149 | } 150 | ], 151 | "source": [ 152 | "mat[:2,1:3]" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "[[15,16,17],\n", 160 | " [21,22,23],\n", 161 | " [27,28,29],\n", 162 | " [33,34,35]\n", 163 | " ] " 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 14, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "data": { 173 | "text/plain": [ 174 | "array([[15, 16, 17],\n", 175 | " [21, 22, 23],\n", 176 | " [27, 28, 29],\n", 177 | " [33, 34, 35]])" 178 | ] 179 | }, 180 | "execution_count": 14, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "mat[2:,3:]" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "### Selection" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 15, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 205 | ] 206 | }, 207 | "execution_count": 15, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "arr" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 18, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "array([False, False, False, False, True, True, True, True, True,\n", 225 | " True])" 226 | ] 227 | }, 228 | "execution_count": 18, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [ 234 | "arr>4" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 19, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/plain": [ 245 | "array([ 5, 6, 7, 8, 9, 10])" 246 | ] 247 | }, 248 | "execution_count": 19, 249 | "metadata": {}, 250 | "output_type": "execute_result" 251 | } 252 | ], 253 | "source": [ 254 | "arr[arr>4]" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 20, 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "data": { 264 | "text/plain": [ 265 | "array([ True, True, True, True, False, False, False, False, False,\n", 266 | " False])" 267 | ] 268 | }, 269 | "execution_count": 20, 270 | "metadata": {}, 271 | "output_type": "execute_result" 272 | } 273 | ], 274 | "source": [ 275 | "arr<5" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 21, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "data": { 285 | "text/plain": [ 286 | "array([1, 2, 3, 4])" 287 | ] 288 | }, 289 | "execution_count": 21, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "arr[arr<5]" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "# Great Job !" 303 | ] 304 | } 305 | ], 306 | "metadata": { 307 | "kernelspec": { 308 | "display_name": "Python 3", 309 | "language": "python", 310 | "name": "python3" 311 | }, 312 | "language_info": { 313 | "codemirror_mode": { 314 | "name": "ipython", 315 | "version": 3 316 | }, 317 | "file_extension": ".py", 318 | "mimetype": "text/x-python", 319 | "name": "python", 320 | "nbconvert_exporter": "python", 321 | "pygments_lexer": "ipython3", 322 | "version": "3.7.4" 323 | } 324 | }, 325 | "nbformat": 4, 326 | "nbformat_minor": 2 327 | } 328 | -------------------------------------------------------------------------------- /Numpy_Arrays.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "The fundamental package for scientific computing with Python\n", 15 | "\n", 16 | "https://numpy.org/\n", 17 | "\n", 18 | "* powerful N-dim array object\n", 19 | "\n", 20 | "* broadcasting functions\n", 21 | "\n", 22 | "* LinAlg, Matrix Manipulation, Fourier Series" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "Requirement already satisfied: numpy in /Users/vipulgaur/opt/anaconda3/lib/python3.7/site-packages (1.17.2)\r\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "!pip install numpy" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "import numpy as np" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "### Numpy Arrays\n", 56 | "\n", 57 | "* Vector - 1D array\n", 58 | "\n", 59 | "\n", 60 | "* Matrix - 2D array" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "mylist = [1,2,3]" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "text/plain": [ 80 | "list" 81 | ] 82 | }, 83 | "execution_count": 5, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "type(mylist)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 6, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "numpy.ndarray" 101 | ] 102 | }, 103 | "execution_count": 6, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "type(np.array(mylist))" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 7, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "data": { 119 | "text/plain": [ 120 | "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]" 121 | ] 122 | }, 123 | "execution_count": 7, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "mymatrix = [[1,2,3],[4,5,6],[7,8,9]]\n", 130 | "mymatrix" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 8, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "text/plain": [ 141 | "list" 142 | ] 143 | }, 144 | "execution_count": 8, 145 | "metadata": {}, 146 | "output_type": "execute_result" 147 | } 148 | ], 149 | "source": [ 150 | "type(mymatrix)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 9, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "data": { 160 | "text/plain": [ 161 | "numpy.ndarray" 162 | ] 163 | }, 164 | "execution_count": 9, 165 | "metadata": {}, 166 | "output_type": "execute_result" 167 | } 168 | ], 169 | "source": [ 170 | "type(np.array(mymatrix))" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 10, 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/plain": [ 181 | "array([[1, 2, 3],\n", 182 | " [4, 5, 6],\n", 183 | " [7, 8, 9]])" 184 | ] 185 | }, 186 | "execution_count": 10, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "np.array(mymatrix)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "## Built In Methods" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "#### arange -\n", 207 | "Returns evenly spaced values in given interval" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 11, 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "data": { 217 | "text/plain": [ 218 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])" 219 | ] 220 | }, 221 | "execution_count": 11, 222 | "metadata": {}, 223 | "output_type": "execute_result" 224 | } 225 | ], 226 | "source": [ 227 | "np.arange(0,10)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 12, 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "data": { 237 | "text/plain": [ 238 | "array([0, 2, 4, 6, 8])" 239 | ] 240 | }, 241 | "execution_count": 12, 242 | "metadata": {}, 243 | "output_type": "execute_result" 244 | } 245 | ], 246 | "source": [ 247 | "np.arange(0,10,2)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "#### Zeros and Ones" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 13, 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "data": { 264 | "text/plain": [ 265 | "array([0., 0., 0.])" 266 | ] 267 | }, 268 | "execution_count": 13, 269 | "metadata": {}, 270 | "output_type": "execute_result" 271 | } 272 | ], 273 | "source": [ 274 | "np.zeros(3)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 14, 280 | "metadata": {}, 281 | "outputs": [ 282 | { 283 | "data": { 284 | "text/plain": [ 285 | "array([1., 1., 1.])" 286 | ] 287 | }, 288 | "execution_count": 14, 289 | "metadata": {}, 290 | "output_type": "execute_result" 291 | } 292 | ], 293 | "source": [ 294 | "np.ones(3)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 15, 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "data": { 304 | "text/plain": [ 305 | "array([[0., 0., 0., 0.],\n", 306 | " [0., 0., 0., 0.],\n", 307 | " [0., 0., 0., 0.],\n", 308 | " [0., 0., 0., 0.]])" 309 | ] 310 | }, 311 | "execution_count": 15, 312 | "metadata": {}, 313 | "output_type": "execute_result" 314 | } 315 | ], 316 | "source": [ 317 | "np.zeros((4,4))" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 16, 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "data": { 327 | "text/plain": [ 328 | "array([[1., 1., 1., 1.],\n", 329 | " [1., 1., 1., 1.],\n", 330 | " [1., 1., 1., 1.],\n", 331 | " [1., 1., 1., 1.]])" 332 | ] 333 | }, 334 | "execution_count": 16, 335 | "metadata": {}, 336 | "output_type": "execute_result" 337 | } 338 | ], 339 | "source": [ 340 | "np.ones((4,4))" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "#### Linspace -\n", 348 | "\n", 349 | "Returns evenly spaced numbers over a specified interval" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 50, 355 | "metadata": {}, 356 | "outputs": [ 357 | { 358 | "data": { 359 | "text/plain": [ 360 | "array([ 0., 5., 10., 15., 20.])" 361 | ] 362 | }, 363 | "execution_count": 50, 364 | "metadata": {}, 365 | "output_type": "execute_result" 366 | } 367 | ], 368 | "source": [ 369 | "np.linspace(0,20,5)" 370 | ] 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "metadata": {}, 375 | "source": [ 376 | "#### Identity Matrix" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 21, 382 | "metadata": {}, 383 | "outputs": [ 384 | { 385 | "data": { 386 | "text/plain": [ 387 | "array([[1., 0., 0., 0.],\n", 388 | " [0., 1., 0., 0.],\n", 389 | " [0., 0., 1., 0.],\n", 390 | " [0., 0., 0., 1.]])" 391 | ] 392 | }, 393 | "execution_count": 21, 394 | "metadata": {}, 395 | "output_type": "execute_result" 396 | } 397 | ], 398 | "source": [ 399 | "np.eye(4)" 400 | ] 401 | }, 402 | { 403 | "cell_type": "markdown", 404 | "metadata": {}, 405 | "source": [ 406 | "### Random Number Generation" 407 | ] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": {}, 412 | "source": [ 413 | "#### rand\n", 414 | "\n", 415 | "Creates an array of given shape and populates it with array of uniform distribution over (0,1)" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": 23, 421 | "metadata": {}, 422 | "outputs": [ 423 | { 424 | "data": { 425 | "text/plain": [ 426 | "array([0.60646952, 0.48400759])" 427 | ] 428 | }, 429 | "execution_count": 23, 430 | "metadata": {}, 431 | "output_type": "execute_result" 432 | } 433 | ], 434 | "source": [ 435 | "np.random.rand(2)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "metadata": {}, 441 | "source": [ 442 | "#### randn\n", 443 | "Returns sample of standard normal distribution" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": 24, 449 | "metadata": {}, 450 | "outputs": [ 451 | { 452 | "data": { 453 | "text/plain": [ 454 | "array([ 1.16581461, -0.3645305 , 1.02588802, 0.38129504])" 455 | ] 456 | }, 457 | "execution_count": 24, 458 | "metadata": {}, 459 | "output_type": "execute_result" 460 | } 461 | ], 462 | "source": [ 463 | "np.random.randn(4)" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": 26, 469 | "metadata": {}, 470 | "outputs": [ 471 | { 472 | "data": { 473 | "text/plain": [ 474 | "array([[ 0.04504038, 0.26234797, 0.87563777, 0.62370201, 1.85319986],\n", 475 | " [ 0.23401525, 0.82444257, 0.5612748 , -0.94721164, -0.51689866],\n", 476 | " [ 1.01065937, 0.64561756, -1.11634715, 2.03449087, 0.21399304],\n", 477 | " [ 0.09878095, 0.27902561, -0.99533311, 0.98662053, 0.14694491],\n", 478 | " [-0.26181701, -0.46646171, -0.99481831, -0.54899662, 0.85076694]])" 479 | ] 480 | }, 481 | "execution_count": 26, 482 | "metadata": {}, 483 | "output_type": "execute_result" 484 | } 485 | ], 486 | "source": [ 487 | "np.random.randn(5,5)" 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "#### randint" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 30, 500 | "metadata": {}, 501 | "outputs": [ 502 | { 503 | "data": { 504 | "text/plain": [ 505 | "array([89, 80, 79, 28, 62, 2, 50, 94, 67, 42])" 506 | ] 507 | }, 508 | "execution_count": 30, 509 | "metadata": {}, 510 | "output_type": "execute_result" 511 | } 512 | ], 513 | "source": [ 514 | "np.random.randint(1,100,10)" 515 | ] 516 | }, 517 | { 518 | "cell_type": "markdown", 519 | "metadata": {}, 520 | "source": [ 521 | "### Array attributes and methods" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 32, 527 | "metadata": {}, 528 | "outputs": [ 529 | { 530 | "data": { 531 | "text/plain": [ 532 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", 533 | " 17, 18, 19, 20, 21, 22, 23, 24])" 534 | ] 535 | }, 536 | "execution_count": 32, 537 | "metadata": {}, 538 | "output_type": "execute_result" 539 | } 540 | ], 541 | "source": [ 542 | "arr = np.arange(25)\n", 543 | "arr" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": 34, 549 | "metadata": {}, 550 | "outputs": [ 551 | { 552 | "data": { 553 | "text/plain": [ 554 | "array([13, 33, 3, 37, 1, 9, 1, 36, 47, 20])" 555 | ] 556 | }, 557 | "execution_count": 34, 558 | "metadata": {}, 559 | "output_type": "execute_result" 560 | } 561 | ], 562 | "source": [ 563 | "ranarr = np.random.randint(0,50,10)\n", 564 | "ranarr" 565 | ] 566 | }, 567 | { 568 | "cell_type": "markdown", 569 | "metadata": {}, 570 | "source": [ 571 | "#### Reshape" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": 39, 577 | "metadata": {}, 578 | "outputs": [ 579 | { 580 | "data": { 581 | "text/plain": [ 582 | "array([[ 0, 1, 2, 3, 4],\n", 583 | " [ 5, 6, 7, 8, 9],\n", 584 | " [10, 11, 12, 13, 14],\n", 585 | " [15, 16, 17, 18, 19],\n", 586 | " [20, 21, 22, 23, 24]])" 587 | ] 588 | }, 589 | "execution_count": 39, 590 | "metadata": {}, 591 | "output_type": "execute_result" 592 | } 593 | ], 594 | "source": [ 595 | "arr.reshape(5,5)#.shape" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": 37, 601 | "metadata": {}, 602 | "outputs": [ 603 | { 604 | "data": { 605 | "text/plain": [ 606 | "(25,)" 607 | ] 608 | }, 609 | "execution_count": 37, 610 | "metadata": {}, 611 | "output_type": "execute_result" 612 | } 613 | ], 614 | "source": [ 615 | "arr.shape" 616 | ] 617 | }, 618 | { 619 | "cell_type": "markdown", 620 | "metadata": {}, 621 | "source": [ 622 | "#### max, min, argmax, argmin" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": 40, 628 | "metadata": {}, 629 | "outputs": [ 630 | { 631 | "data": { 632 | "text/plain": [ 633 | "array([13, 33, 3, 37, 1, 9, 1, 36, 47, 20])" 634 | ] 635 | }, 636 | "execution_count": 40, 637 | "metadata": {}, 638 | "output_type": "execute_result" 639 | } 640 | ], 641 | "source": [ 642 | "ranarr" 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": 44, 648 | "metadata": {}, 649 | "outputs": [ 650 | { 651 | "data": { 652 | "text/plain": [ 653 | "1" 654 | ] 655 | }, 656 | "execution_count": 44, 657 | "metadata": {}, 658 | "output_type": "execute_result" 659 | } 660 | ], 661 | "source": [ 662 | "ranarr.min()" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 45, 668 | "metadata": {}, 669 | "outputs": [ 670 | { 671 | "data": { 672 | "text/plain": [ 673 | "4" 674 | ] 675 | }, 676 | "execution_count": 45, 677 | "metadata": {}, 678 | "output_type": "execute_result" 679 | } 680 | ], 681 | "source": [ 682 | "ranarr.argmin()" 683 | ] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "execution_count": 42, 688 | "metadata": {}, 689 | "outputs": [ 690 | { 691 | "data": { 692 | "text/plain": [ 693 | "47" 694 | ] 695 | }, 696 | "execution_count": 42, 697 | "metadata": {}, 698 | "output_type": "execute_result" 699 | } 700 | ], 701 | "source": [ 702 | "ranarr.max()" 703 | ] 704 | }, 705 | { 706 | "cell_type": "code", 707 | "execution_count": 43, 708 | "metadata": {}, 709 | "outputs": [ 710 | { 711 | "data": { 712 | "text/plain": [ 713 | "8" 714 | ] 715 | }, 716 | "execution_count": 43, 717 | "metadata": {}, 718 | "output_type": "execute_result" 719 | } 720 | ], 721 | "source": [ 722 | "ranarr.argmax()" 723 | ] 724 | }, 725 | { 726 | "cell_type": "markdown", 727 | "metadata": {}, 728 | "source": [ 729 | "# Great Job !" 730 | ] 731 | } 732 | ], 733 | "metadata": { 734 | "kernelspec": { 735 | "display_name": "Python 3", 736 | "language": "python", 737 | "name": "python3" 738 | }, 739 | "language_info": { 740 | "codemirror_mode": { 741 | "name": "ipython", 742 | "version": 3 743 | }, 744 | "file_extension": ".py", 745 | "mimetype": "text/x-python", 746 | "name": "python", 747 | "nbconvert_exporter": "python", 748 | "pygments_lexer": "ipython3", 749 | "version": "3.7.4" 750 | } 751 | }, 752 | "nbformat": 4, 753 | "nbformat_minor": 2 754 | } 755 | --------------------------------------------------------------------------------