├── .gitignore ├── 01-Python-Crash-Course ├── 01-Python Crash Course.ipynb ├── 02-Python Crash Course Exercises.ipynb └── 03-Python Crash Course Exercises - Solutions.ipynb ├── 02-Numpy ├── 00-NumPy-Arrays.ipynb ├── 01-NumPy-Indexing-and-Selection.ipynb ├── 02-NumPy-Operations.ipynb ├── 03-NumPy-Exercises.ipynb ├── 04-NumPy-Exercises-Solutions.ipynb ├── NumPy Basics.ipynb ├── NumPy Indexing and Selection.ipynb ├── NumPy Operations.ipynb ├── Untitled.ipynb ├── Untitled1.ipynb ├── Untitled2.ipynb ├── axis_logic.png └── numpy_indexing.png ├── 03-Pandas ├── 00-Series.ipynb ├── 01-DataFrames.ipynb ├── 02-Conditional-Filtering.ipynb ├── 03-Useful-Methods.ipynb ├── 04-Missing-Data.ipynb ├── 05-Groupby-Operations-and-MultiIndex.ipynb ├── 06-Combining-DataFrames.ipynb ├── 07-Text-Methods.ipynb ├── 08-Time-Methods.ipynb ├── 09-Inputs-and-Outputs.ipynb ├── 10-Pivot-Tables.ipynb ├── 11-Pandas-Project-Exercise .ipynb ├── 12-Pandas-Project-Exercise-Solution.ipynb ├── Concatenation.ipynb ├── Conditional Filtering.ipynb ├── Data Input and Output.ipynb ├── DataFrames.ipynb ├── Date and Time Methods.ipynb ├── DropBy Operations.ipynb ├── Merging.ipynb ├── Missing Data.ipynb ├── Pandas Basics.ipynb ├── Pivot Tables.ipynb ├── Text Methods.ipynb ├── Untitled.ipynb ├── Useful Methods.ipynb ├── __pycache__ │ └── pydbgen.cpython-36.pyc ├── example.xlsx ├── my_excel_file.xlsx ├── new_workbook.xlsx ├── newexcelfile.xlsx ├── reshaping_pivot.png ├── sample_table.html └── simple.html ├── 04-Matplotlib ├── 00-Matplotlib-Basics.ipynb ├── 01-Matplotlib-Figures.ipynb ├── 02-Matplotlib-SubPlots.ipynb ├── 03-Matplotlib-Styling-Plots.ipynb ├── 04-Matplotlib Exercises .ipynb ├── 05-Matplotlib Exercises - Solutions.ipynb ├── 06-Additional-Matplotlib-Commands-NO_VIDEO.ipynb ├── Matplotlib Basics.ipynb ├── Matplotlib Styling.ipynb ├── Matplotlib Subplots.ipynb ├── Untitled.ipynb ├── example.png ├── figure.png ├── myfirstplot.png ├── new_figure.png ├── new_subplots.png ├── subplots.png └── test.png ├── 05-Seaborn ├── 00-Scatter-Plots.ipynb ├── 01-Distribution-Plots.ipynb ├── 02-Categorical-Plots-Stat-Estimation.ipynb ├── 03-Categorical-Plots-Distributions.ipynb ├── 04-Comparison-Plots.ipynb ├── 05-Seaborn-Grids.ipynb ├── 06-Matrix-Plots.ipynb ├── 07-Seaborn-Exercise.ipynb ├── 08-Seaborn-Exercise-Solutions.ipynb ├── Categorical Plots.ipynb ├── Comparison Plots.ipynb ├── DistPlot_solution.png ├── Distribution Plots.ipynb ├── Distribution within Categories.ipynb ├── Matrix Plots.ipynb ├── Seaborn Basics.ipynb ├── Seaborn Grids.ipynb ├── Seabron Basics.ipynb ├── catplot_solution.png ├── example_scatter.jpg ├── heatmap_solution.png ├── my_plot.jpg └── task_one.jpg ├── 06-Capstone-Project ├── 00-Capstone-Project.ipynb └── 01-Capstone-Project-Solutions .ipynb ├── 07-Overview-of-Machine-Learning ├── ML Basics.ipynb └── Template.ipynb ├── 08-Linear-Regression-Models ├── 00-Intro-to-Simple-Linear-Regression.ipynb ├── 01-Linear-Regression-with-Scitkit-Learn.ipynb ├── 02-Polynomial-Regression.ipynb ├── 03-Regularization-Ridge-Lasso-ElasticNet.ipynb ├── 04-Linear-Regression-Project-DataSet .ipynb ├── Bias-Variance Trade Off.ipynb ├── Cross-Validation.ipynb ├── Elastic Net.ipynb ├── Linear Regression Basics.ipynb ├── Polynomial Regression.ipynb ├── Regularisation for Linear Regression.ipynb ├── Regularisation.ipynb ├── Scikit-Learn Basics.ipynb ├── Simple Linear Regression.ipynb ├── final_converter.joblib ├── final_poly_converter.joblib ├── final_poly_model.joblib ├── final_sales_model.joblib ├── poly_converter.joblib ├── sales_model.joblib └── sales_poly_model.joblib ├── 09-Feature-Engineering ├── 00-Dealing-with-Outliers.ipynb ├── 01-Dealing-with-Missing-Data.ipynb ├── 02-Dealing-with-Categorical-Data.ipynb ├── Categorical Feature Encoding.ipynb └── Feature Engineering Basics.ipynb ├── 10-Cross-Val-and-LinReg-Project ├── 00-Cross-Validation.ipynb ├── 01-Grid-Search.ipynb ├── 02-Linear-Regression-Project-Exercise.ipynb ├── 03-Linear-Regression-Project-Exercise-Solutions.ipynb ├── Cross Validation.ipynb └── grid_search_cross_validation.png ├── 11-Logistic-Regression-Models ├── 00-Logistic-Regression.ipynb ├── 01-Multi-Class-Logistic-Regression.ipynb ├── 02-Logistic-Regression-Project-Exercise .ipynb ├── 03-Logistic-Regression-Project-Exercise-Solution.ipynb └── Logistic Regression Basics.ipynb ├── 12-K-Nearest-Neighbors ├── 00-KNN-Classification.ipynb ├── 01-KNN-Exercise .ipynb ├── 02-KNN-Exercise-Solutions.ipynb ├── KNN Basics.ipynb ├── mine.jpg └── sonar.jpg ├── 13-Support-Vector-Machines ├── 00-SVM-Classification.ipynb ├── 01-SVM-Regression.ipynb ├── 02-SVM-Project-Exercise.ipynb ├── 03-SVM-Project-Exercise-Solutions.ipynb ├── SVM with regression.ipynb ├── Support Vector Machine Basics.ipynb ├── Types_of_concrete_slump.jpg ├── __pycache__ │ ├── svm_margin_plot.cpython-37.pyc │ └── svm_margin_plot.cpython-38.pyc ├── svm_margin_plot.py └── wine.jpg ├── 14-Decision-Trees ├── 00-Decision-Trees.ipynb ├── Decision Tree Basics.ipynb └── penguin.jpg ├── 15-Random-Forests ├── 00-Random-Forest-Classification.ipynb ├── 01-Random-Forest-Regression.ipynb ├── Random Forest Basics.ipynb ├── Random Forest Regression.ipynb ├── boring.jpg └── penguin.jpg ├── 16-Boosted-Trees ├── 00-AdaBoost.ipynb ├── 01-Gradient-Boosting.ipynb ├── Boosted Trees Basics.ipynb └── mushroom.jpg ├── 17-Supervised-Learning-Capstone-Project ├── 00-Sup-Learning-Capstone-Tree-Methods .ipynb ├── 01-Sup-Learning-Capstone-Tree-Methods-SOLNs.ipynb ├── cplot.png ├── fig1.png ├── fig10.png ├── fig11.png ├── fig2.png ├── fig3.png ├── fig5.png ├── fig6.png ├── fig7.png ├── fig9.png ├── figbar.png └── hugetree.png ├── 18-Naive-Bayes-and-NLP ├── 00-Feature-Extraction-From-Text.ipynb ├── 01-Text-Classification.ipynb ├── 02-Text-Classification-Assessment .ipynb ├── 03-Text-Classification-Assessment-Solution.ipynb ├── Classification of Text.ipynb ├── Naive Bayes and NLP Basics.ipynb ├── One.txt └── Two.txt ├── 19-Unsupervised-Learning-Overview ├── Basics.ipynb └── Blank on Purpose.ipynb ├── 20-Kmeans-Clustering ├── 00-Kmeans-Clustering.ipynb ├── 01-Kmeans-Color-Quantization.ipynb ├── 02-Kmeans-Clustering-Project-Exercise.ipynb ├── 03-Kmeans-Clustering-Project-Solutions.ipynb ├── Basics.ipynb └── Colour Quantisation.ipynb ├── 21-Hierarchal-Clustering └── 00-Hierarchal-Clustering-Example.ipynb ├── 21-Hierarchical-Clustering ├── 00-Hierarchical-Clustering.ipynb └── Basics.ipynb ├── 22-DBSCAN ├── 00-DBSCAN.ipynb ├── 01-DBSCAN-Hyperparameters.ipynb ├── 02-DBSCAN-Project-Exercise.ipynb ├── 03-DBSCAN-Project-Solutions.ipynb └── Basics.ipynb ├── 23-PCA-Principal-Component-Analysis ├── 00-PCA-Manual-Implementation.ipynb ├── 01-PCA-Scikit-Learn.ipynb ├── 02-PCA-Exercise-Project.ipynb ├── 03-PCA-Exercise-Project-Solutions.ipynb └── Basics.ipynb ├── 24-Model-Deployment ├── 00-Model-Persistence.ipynb ├── 01-Serving-Model-as-API.ipynb ├── Finale.ipynb ├── api.py ├── col_names.pkl ├── column_names.pkl ├── final_model.pkl ├── mynewapi.py └── untitled.txt ├── Pierian_Data_Logo.png ├── README.md ├── project ├── api.py ├── column_names.pkl ├── final_model.pkl └── requirements.txt └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | /DATA 2 | 3 | .ipynb_checkpoints 4 | 5 | **.csv 6 | **.jpg 7 | **.png 8 | **.pdf 9 | **.pyc 10 | 11 | Untitled* -------------------------------------------------------------------------------- /01-Python-Crash-Course/02-Python Crash Course Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___\n", 11 | "# Python Crash Course Exercises \n", 12 | "\n", 13 | "This is an optional exercise to test your understanding of Python Basics. If you find this extremely challenging, then you probably are not ready for the rest of this course yet and don't have enough programming experience to continue. I would suggest you take another course more geared towards complete beginners, such as [Complete Python Bootcamp](https://www.udemy.com/complete-python-bootcamp/?couponCode=PY20)" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "## Exercises\n", 21 | "\n", 22 | "Answer the questions or complete the tasks outlined in bold below, use the specific method described if applicable." 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "** What is 7 to the power of 4?**" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "2401" 41 | ] 42 | }, 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "7 ** 4" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "** Split this string:**\n", 57 | "\n", 58 | " s = \"Hi there Sam!\"\n", 59 | " \n", 60 | "**into a list. **" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "['Hi', 'there', 'Sam!']" 72 | ] 73 | }, 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "\"Hi there Sam!\".split(' ')" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "['Hi', 'there', 'dad!']" 92 | ] 93 | }, 94 | "execution_count": 5, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "\"Hi there dad!\".split(' ')" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "** Given the variables:**\n", 108 | "\n", 109 | " planet = \"Earth\"\n", 110 | " diameter = 12742\n", 111 | "\n", 112 | "** Use .format() to print the following string: **\n", 113 | "\n", 114 | " The diameter of Earth is 12742 kilometers." 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 5, 120 | "metadata": { 121 | "collapsed": true 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "planet = \"Earth\"\n", 126 | "diameter = 12742" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 9, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "The diameter of Earth is 12742 kilometers\n" 139 | ] 140 | } 141 | ], 142 | "source": [ 143 | "print(\"The diameter of Earth is {diam} kilometers\".format(diam=12742))" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "** Given this nested list, use indexing to grab the word \"hello\" **" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 7, 156 | "metadata": { 157 | "collapsed": true 158 | }, 159 | "outputs": [], 160 | "source": [ 161 | "lst = [1,2,[3,4],[5,[100,200,['hello']],23,11],1,7]" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 15, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | "hello\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "lst = [1,2,[3,4],[5,[100,200,['hello']],23,11],1,7]\n", 179 | "print(lst[3][1][2][0])" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "** Given this nested dictionary grab the word \"hello\". Be prepared, this will be annoying/tricky **" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 16, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "d = {'k1':[1,2,3,{'tricky':['oh','man','inception',{'target':[1,2,3,'hello']}]}]}" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 24, 201 | "metadata": {}, 202 | "outputs": [ 203 | { 204 | "name": "stdout", 205 | "output_type": "stream", 206 | "text": [ 207 | "hello\n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "d = {'k1':[1,2,3,{'tricky':['oh','man','inception',{'target':[1,2,3,'hello']}]}]}\n", 213 | "\n", 214 | "print(d['k1'][3]['tricky'][3]['target'][3])" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "** What is the main difference between a tuple and a list? **" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 23, 227 | "metadata": { 228 | "collapsed": true 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "# Tuple is immutable" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "** Create a function that grabs the email website domain from a string in the form: **\n", 240 | "\n", 241 | " user@domain.com\n", 242 | " \n", 243 | "**So for example, passing \"user@domain.com\" would return: domain.com**" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 46, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "import re\n", 253 | "\n", 254 | "def domainGet(email):\n", 255 | " return re.sub('^.*@', '', email)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 47, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "data": { 265 | "text/plain": [ 266 | "'domain.com'" 267 | ] 268 | }, 269 | "execution_count": 47, 270 | "metadata": {}, 271 | "output_type": "execute_result" 272 | } 273 | ], 274 | "source": [ 275 | "domainGet('user@domain.com')" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "** Create a basic function that returns True if the word 'dog' is contained in the input string. Don't worry about edge cases like a punctuation being attached to the word dog, but do account for capitalization. **" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 96, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "import re\n", 292 | "\n", 293 | "findDog = lambda x: bool(re.match(r\"^.*dog\", x))\n", 294 | "\n", 295 | "# I could have literally just done this though.\n", 296 | "findDogTwo = lambda x: 'dog' in x.lower().split()" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 73, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "data": { 306 | "text/plain": [ 307 | "False" 308 | ] 309 | }, 310 | "execution_count": 73, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "findDog('Is there a dog here?')" 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "** Create a function that counts the number of times the word \"dog\" occurs in a string. Again ignore edge cases. **" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 79, 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "import re\n", 333 | "\n", 334 | "countDog = lambda x: len(re.findall(r\"dog\", x))" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 80, 340 | "metadata": {}, 341 | "outputs": [ 342 | { 343 | "data": { 344 | "text/plain": [ 345 | "2" 346 | ] 347 | }, 348 | "execution_count": 80, 349 | "metadata": {}, 350 | "output_type": "execute_result" 351 | } 352 | ], 353 | "source": [ 354 | "countDog('This dog runs faster than the other dog dude!')" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "** Use lambda expressions and the filter() function to filter out words from a list that don't start with the letter 's'. For example:**\n", 362 | "\n", 363 | " seq = ['soup','dog','salad','cat','great']\n", 364 | "\n", 365 | "**should be filtered down to:**\n", 366 | "\n", 367 | " ['soup','salad']" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 90, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [ 376 | "seq = ['soup','dog','salad','cat','great']" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 91, 382 | "metadata": {}, 383 | "outputs": [ 384 | { 385 | "data": { 386 | "text/plain": [ 387 | "['soup', 'salad']" 388 | ] 389 | }, 390 | "execution_count": 91, 391 | "metadata": {}, 392 | "output_type": "execute_result" 393 | } 394 | ], 395 | "source": [ 396 | "import re\n", 397 | "\n", 398 | "list(filter(lambda x: x[0] == 's', seq))" 399 | ] 400 | }, 401 | { 402 | "cell_type": "markdown", 403 | "metadata": {}, 404 | "source": [ 405 | "### Final Problem\n", 406 | "**You are driving a little too fast, and a police officer stops you. Write a function\n", 407 | " to return one of 3 possible results: \"No ticket\", \"Small ticket\", or \"Big Ticket\". \n", 408 | " If your speed is 60 or less, the result is \"No Ticket\". If speed is between 61 \n", 409 | " and 80 inclusive, the result is \"Small Ticket\". If speed is 81 or more, the result is \"Big Ticket\". Unless it is your birthday (encoded as a boolean value in the parameters of the function) -- on your birthday, your speed can be 5 higher in all \n", 410 | " cases. **" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 93, 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "def caught_speeding(speed, is_birthday):\n", 420 | " realspeed = speed - 5 if is_birthday else speed\n", 421 | " \n", 422 | " if (realspeed < 60): return \"No Ticket\"\n", 423 | " if (realspeed < 81): return \"Small Ticket\"\n", 424 | " return \"Big Ticket\"" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": 94, 430 | "metadata": {}, 431 | "outputs": [ 432 | { 433 | "data": { 434 | "text/plain": [ 435 | "'Small Ticket'" 436 | ] 437 | }, 438 | "execution_count": 94, 439 | "metadata": {}, 440 | "output_type": "execute_result" 441 | } 442 | ], 443 | "source": [ 444 | "caught_speeding(81,True)" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 95, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "data": { 454 | "text/plain": [ 455 | "'Big Ticket'" 456 | ] 457 | }, 458 | "execution_count": 95, 459 | "metadata": {}, 460 | "output_type": "execute_result" 461 | } 462 | ], 463 | "source": [ 464 | "caught_speeding(81,False)" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "# Great job!" 472 | ] 473 | } 474 | ], 475 | "metadata": { 476 | "kernelspec": { 477 | "display_name": "Python 3", 478 | "language": "python", 479 | "name": "python3" 480 | }, 481 | "language_info": { 482 | "codemirror_mode": { 483 | "name": "ipython", 484 | "version": 3 485 | }, 486 | "file_extension": ".py", 487 | "mimetype": "text/x-python", 488 | "name": "python", 489 | "nbconvert_exporter": "python", 490 | "pygments_lexer": "ipython3", 491 | "version": "3.8.8" 492 | } 493 | }, 494 | "nbformat": 4, 495 | "nbformat_minor": 1 496 | } 497 | -------------------------------------------------------------------------------- /01-Python-Crash-Course/03-Python Crash Course Exercises - Solutions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | " \n", 10 | "___\n", 11 | "# Python Crash Course Exercises - Solutions\n", 12 | "\n", 13 | "This is an optional exercise to test your understanding of Python Basics. If you find this extremely challenging, then you probably are not ready for the rest of this course yet and don't have enough programming experience to continue. I would suggest you take another course more geared towards complete beginners, such as [Complete Python Bootcamp](https://www.udemy.com/complete-python-bootcamp/?couponCode=PY20)" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "## Exercises\n", 21 | "\n", 22 | "Answer the questions or complete the tasks outlined in bold below, use the specific method described if applicable." 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "** What is 7 to the power of 4?**" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 1, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "2401" 41 | ] 42 | }, 43 | "execution_count": 1, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "7**4" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "** Split this string:**\n", 57 | "\n", 58 | " s = \"Hi there Sam!\"\n", 59 | " \n", 60 | "**into a list. **" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "s = 'Hi there Sam!'" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 3, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "['Hi', 'there', 'dad!']" 83 | ] 84 | }, 85 | "execution_count": 3, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "s.split()" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "** Given the variables:**\n", 99 | "\n", 100 | " planet = \"Earth\"\n", 101 | " diameter = 12742\n", 102 | "\n", 103 | "** Use .format() to print the following string: **\n", 104 | "\n", 105 | " The diameter of Earth is 12742 kilometers." 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 5, 111 | "metadata": { 112 | "collapsed": true 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "planet = \"Earth\"\n", 117 | "diameter = 12742" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 6, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "The diameter of Earth is 12742 kilometers.\n" 130 | ] 131 | } 132 | ], 133 | "source": [ 134 | "print(\"The diameter of {} is {} kilometers.\".format(planet,diameter))" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "** Given this nested list, use indexing to grab the word \"hello\" **" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 7, 147 | "metadata": { 148 | "collapsed": true 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "lst = [1,2,[3,4],[5,[100,200,['hello']],23,11],1,7]" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 14, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "'hello'" 164 | ] 165 | }, 166 | "execution_count": 14, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "lst[3][1][2][0]" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "** Given this nest dictionary grab the word \"hello\". Be prepared, this will be annoying/tricky **" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 16, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "d = {'k1':[1,2,3,{'tricky':['oh','man','inception',{'target':[1,2,3,'hello']}]}]}" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 22, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "data": { 198 | "text/plain": [ 199 | "'hello'" 200 | ] 201 | }, 202 | "execution_count": 22, 203 | "metadata": {}, 204 | "output_type": "execute_result" 205 | } 206 | ], 207 | "source": [ 208 | "d['k1'][3]['tricky'][3]['target'][3]" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "** What is the main difference between a tuple and a list? **" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 23, 221 | "metadata": { 222 | "collapsed": true 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "# Tuple is immutable" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "** Create a function that grabs the email website domain from a string in the form: **\n", 234 | "\n", 235 | " user@domain.com\n", 236 | " \n", 237 | "**So for example, passing \"user@domain.com\" would return: domain.com**" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 24, 243 | "metadata": { 244 | "collapsed": true 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "def domainGet(email):\n", 249 | " return email.split('@')[-1]" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 26, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "data": { 259 | "text/plain": [ 260 | "'domain.com'" 261 | ] 262 | }, 263 | "execution_count": 26, 264 | "metadata": {}, 265 | "output_type": "execute_result" 266 | } 267 | ], 268 | "source": [ 269 | "domainGet('user@domain.com')" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "** Create a basic function that returns True if the word 'dog' is contained in the input string. Don't worry about edge cases like a punctuation being attached to the word dog, but do account for capitalization. **" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 27, 282 | "metadata": { 283 | "collapsed": true 284 | }, 285 | "outputs": [], 286 | "source": [ 287 | "def findDog(st):\n", 288 | " return 'dog' in st.lower().split()" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 28, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "data": { 298 | "text/plain": [ 299 | "True" 300 | ] 301 | }, 302 | "execution_count": 28, 303 | "metadata": {}, 304 | "output_type": "execute_result" 305 | } 306 | ], 307 | "source": [ 308 | "findDog('Is there a dog here?')" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "** Create a function that counts the number of times the word \"dog\" occurs in a string. Again ignore edge cases. **" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 30, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "def countDog(st):\n", 325 | " count = 0\n", 326 | " for word in st.lower().split():\n", 327 | " if word == 'dog':\n", 328 | " count += 1\n", 329 | " return count" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 31, 335 | "metadata": {}, 336 | "outputs": [ 337 | { 338 | "data": { 339 | "text/plain": [ 340 | "2" 341 | ] 342 | }, 343 | "execution_count": 31, 344 | "metadata": {}, 345 | "output_type": "execute_result" 346 | } 347 | ], 348 | "source": [ 349 | "countDog('This dog runs faster than the other dog dude!')" 350 | ] 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": {}, 355 | "source": [ 356 | "** Use lambda expressions and the filter() function to filter out words from a list that don't start with the letter 's'. For example:**\n", 357 | "\n", 358 | " seq = ['soup','dog','salad','cat','great']\n", 359 | "\n", 360 | "**should be filtered down to:**\n", 361 | "\n", 362 | " ['soup','salad']" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 34, 368 | "metadata": { 369 | "collapsed": true 370 | }, 371 | "outputs": [], 372 | "source": [ 373 | "seq = ['soup','dog','salad','cat','great']" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 35, 379 | "metadata": {}, 380 | "outputs": [ 381 | { 382 | "data": { 383 | "text/plain": [ 384 | "['soup', 'salad']" 385 | ] 386 | }, 387 | "execution_count": 35, 388 | "metadata": {}, 389 | "output_type": "execute_result" 390 | } 391 | ], 392 | "source": [ 393 | "list(filter(lambda word: word[0]=='s',seq))" 394 | ] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | "### Final Problem\n", 401 | "**You are driving a little too fast, and a police officer stops you. Write a function\n", 402 | " to return one of 3 possible results: \"No ticket\", \"Small ticket\", or \"Big Ticket\". \n", 403 | " If your speed is 60 or less, the result is \"No Ticket\". If speed is between 61 \n", 404 | " and 80 inclusive, the result is \"Small Ticket\". If speed is 81 or more, the result is \"Big Ticket\". Unless it is your birthday (encoded as a boolean value in the parameters of the function) -- on your birthday, your speed can be 5 higher in all \n", 405 | " cases. **" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 4, 411 | "metadata": { 412 | "collapsed": true 413 | }, 414 | "outputs": [], 415 | "source": [ 416 | "def caught_speeding(speed, is_birthday):\n", 417 | " \n", 418 | " if is_birthday:\n", 419 | " speeding = speed - 5\n", 420 | " else:\n", 421 | " speeding = speed\n", 422 | " \n", 423 | " if speeding > 80:\n", 424 | " return 'Big Ticket'\n", 425 | " elif speeding > 60:\n", 426 | " return 'Small Ticket'\n", 427 | " else:\n", 428 | " return 'No Ticket'" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 5, 434 | "metadata": {}, 435 | "outputs": [ 436 | { 437 | "data": { 438 | "text/plain": [ 439 | "'Small Ticket'" 440 | ] 441 | }, 442 | "execution_count": 5, 443 | "metadata": {}, 444 | "output_type": "execute_result" 445 | } 446 | ], 447 | "source": [ 448 | "caught_speeding(81,True)" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 6, 454 | "metadata": {}, 455 | "outputs": [ 456 | { 457 | "data": { 458 | "text/plain": [ 459 | "'Big Ticket'" 460 | ] 461 | }, 462 | "execution_count": 6, 463 | "metadata": {}, 464 | "output_type": "execute_result" 465 | } 466 | ], 467 | "source": [ 468 | "caught_speeding(81,False)" 469 | ] 470 | }, 471 | { 472 | "cell_type": "markdown", 473 | "metadata": {}, 474 | "source": [ 475 | "# Great job!" 476 | ] 477 | } 478 | ], 479 | "metadata": { 480 | "kernelspec": { 481 | "display_name": "Python 3", 482 | "language": "python", 483 | "name": "python3" 484 | }, 485 | "language_info": { 486 | "codemirror_mode": { 487 | "name": "ipython", 488 | "version": 3 489 | }, 490 | "file_extension": ".py", 491 | "mimetype": "text/x-python", 492 | "name": "python", 493 | "nbconvert_exporter": "python", 494 | "pygments_lexer": "ipython3", 495 | "version": "3.8.8" 496 | } 497 | }, 498 | "nbformat": 4, 499 | "nbformat_minor": 1 500 | } 501 | -------------------------------------------------------------------------------- /02-Numpy/01-NumPy-Indexing-and-Selection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | "\n", 10 | "___\n", 11 | "
Copyright Pierian Data
\n", 12 | "
For more information, visit us at www.pieriandata.com
" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# NumPy Indexing and Selection\n", 20 | "\n", 21 | "In this lecture we will discuss how to select elements or groups of elements from an array." 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": { 28 | "collapsed": true 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "import numpy as np" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "#Creating sample array\n", 44 | "arr = np.arange(0,11)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 3, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/plain": [ 55 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 56 | ] 57 | }, 58 | "execution_count": 3, 59 | "metadata": {}, 60 | "output_type": "execute_result" 61 | } 62 | ], 63 | "source": [ 64 | "#Show\n", 65 | "arr" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "## Bracket Indexing and Selection\n", 73 | "The simplest way to pick one or some elements of an array looks very similar to python lists:" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 4, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "text/plain": [ 84 | "8" 85 | ] 86 | }, 87 | "execution_count": 4, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "#Get a value at an index\n", 94 | "arr[8]" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 5, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/plain": [ 105 | "array([1, 2, 3, 4])" 106 | ] 107 | }, 108 | "execution_count": 5, 109 | "metadata": {}, 110 | "output_type": "execute_result" 111 | } 112 | ], 113 | "source": [ 114 | "#Get values in a range\n", 115 | "arr[1:5]" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 6, 121 | "metadata": {}, 122 | "outputs": [ 123 | { 124 | "data": { 125 | "text/plain": [ 126 | "array([0, 1, 2, 3, 4])" 127 | ] 128 | }, 129 | "execution_count": 6, 130 | "metadata": {}, 131 | "output_type": "execute_result" 132 | } 133 | ], 134 | "source": [ 135 | "#Get values in a range\n", 136 | "arr[0:5]" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "## Broadcasting\n", 144 | "\n", 145 | "NumPy arrays differ from normal Python lists because of their ability to broadcast. With lists, you can only reassign parts of a list with new parts of the same size and shape. That is, if you wanted to replace the first 5 elements in a list with a new value, you would have to pass in a new 5 element list. With NumPy arrays, you can broadcast a single value across a larger set of values:" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 7, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/plain": [ 156 | "array([100, 100, 100, 100, 100, 5, 6, 7, 8, 9, 10])" 157 | ] 158 | }, 159 | "execution_count": 7, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "#Setting a value with index range (Broadcasting)\n", 166 | "arr[0:5]=100\n", 167 | "\n", 168 | "#Show\n", 169 | "arr" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 8, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "text/plain": [ 180 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 181 | ] 182 | }, 183 | "execution_count": 8, 184 | "metadata": {}, 185 | "output_type": "execute_result" 186 | } 187 | ], 188 | "source": [ 189 | "# Reset array, we'll see why I had to reset in a moment\n", 190 | "arr = np.arange(0,11)\n", 191 | "\n", 192 | "#Show\n", 193 | "arr" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 9, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "array([0, 1, 2, 3, 4, 5])" 205 | ] 206 | }, 207 | "execution_count": 9, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "#Important notes on Slices\n", 214 | "slice_of_arr = arr[0:6]\n", 215 | "\n", 216 | "#Show slice\n", 217 | "slice_of_arr" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 10, 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "data": { 227 | "text/plain": [ 228 | "array([99, 99, 99, 99, 99, 99])" 229 | ] 230 | }, 231 | "execution_count": 10, 232 | "metadata": {}, 233 | "output_type": "execute_result" 234 | } 235 | ], 236 | "source": [ 237 | "#Change Slice\n", 238 | "slice_of_arr[:]=99\n", 239 | "\n", 240 | "#Show Slice again\n", 241 | "slice_of_arr" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "Now note the changes also occur in our original array!" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 11, 254 | "metadata": {}, 255 | "outputs": [ 256 | { 257 | "data": { 258 | "text/plain": [ 259 | "array([99, 99, 99, 99, 99, 99, 6, 7, 8, 9, 10])" 260 | ] 261 | }, 262 | "execution_count": 11, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | } 266 | ], 267 | "source": [ 268 | "arr" 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": {}, 274 | "source": [ 275 | "Data is not copied, it's a view of the original array! This avoids memory problems!" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 12, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "data": { 285 | "text/plain": [ 286 | "array([99, 99, 99, 99, 99, 99, 6, 7, 8, 9, 10])" 287 | ] 288 | }, 289 | "execution_count": 12, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "#To get a copy, need to be explicit\n", 296 | "arr_copy = arr.copy()\n", 297 | "\n", 298 | "arr_copy" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "## Indexing a 2D array (matrices)\n", 306 | "\n", 307 | "The general format is **arr_2d[row][col]** or **arr_2d[row,col]**. I recommend using the comma notation for clarity." 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 13, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/plain": [ 318 | "array([[ 5, 10, 15],\n", 319 | " [20, 25, 30],\n", 320 | " [35, 40, 45]])" 321 | ] 322 | }, 323 | "execution_count": 13, 324 | "metadata": {}, 325 | "output_type": "execute_result" 326 | } 327 | ], 328 | "source": [ 329 | "arr_2d = np.array(([5,10,15],[20,25,30],[35,40,45]))\n", 330 | "\n", 331 | "#Show\n", 332 | "arr_2d" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 14, 338 | "metadata": {}, 339 | "outputs": [ 340 | { 341 | "data": { 342 | "text/plain": [ 343 | "array([20, 25, 30])" 344 | ] 345 | }, 346 | "execution_count": 14, 347 | "metadata": {}, 348 | "output_type": "execute_result" 349 | } 350 | ], 351 | "source": [ 352 | "#Indexing row\n", 353 | "arr_2d[1]" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 15, 359 | "metadata": {}, 360 | "outputs": [ 361 | { 362 | "data": { 363 | "text/plain": [ 364 | "20" 365 | ] 366 | }, 367 | "execution_count": 15, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "# Format is arr_2d[row][col] or arr_2d[row,col]\n", 374 | "\n", 375 | "# Getting individual element value\n", 376 | "arr_2d[1][0]" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 16, 382 | "metadata": {}, 383 | "outputs": [ 384 | { 385 | "data": { 386 | "text/plain": [ 387 | "20" 388 | ] 389 | }, 390 | "execution_count": 16, 391 | "metadata": {}, 392 | "output_type": "execute_result" 393 | } 394 | ], 395 | "source": [ 396 | "# Getting individual element value\n", 397 | "arr_2d[1,0]" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 17, 403 | "metadata": {}, 404 | "outputs": [ 405 | { 406 | "data": { 407 | "text/plain": [ 408 | "array([[10, 15],\n", 409 | " [25, 30]])" 410 | ] 411 | }, 412 | "execution_count": 17, 413 | "metadata": {}, 414 | "output_type": "execute_result" 415 | } 416 | ], 417 | "source": [ 418 | "# 2D array slicing\n", 419 | "\n", 420 | "#Shape (2,2) from top right corner\n", 421 | "arr_2d[:2,1:]" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 18, 427 | "metadata": {}, 428 | "outputs": [ 429 | { 430 | "data": { 431 | "text/plain": [ 432 | "array([35, 40, 45])" 433 | ] 434 | }, 435 | "execution_count": 18, 436 | "metadata": {}, 437 | "output_type": "execute_result" 438 | } 439 | ], 440 | "source": [ 441 | "#Shape bottom row\n", 442 | "arr_2d[2]" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 19, 448 | "metadata": {}, 449 | "outputs": [ 450 | { 451 | "data": { 452 | "text/plain": [ 453 | "array([35, 40, 45])" 454 | ] 455 | }, 456 | "execution_count": 19, 457 | "metadata": {}, 458 | "output_type": "execute_result" 459 | } 460 | ], 461 | "source": [ 462 | "#Shape bottom row\n", 463 | "arr_2d[2,:]" 464 | ] 465 | }, 466 | { 467 | "cell_type": "markdown", 468 | "metadata": {}, 469 | "source": [ 470 | "## More Indexing Help\n", 471 | "Indexing a 2D matrix can be a bit confusing at first, especially when you start to add in step size. Try google image searching *NumPy indexing* to find useful images, like this one:\n", 472 | "\n", 473 | " Image source: http://www.scipy-lectures.org/intro/numpy/numpy.html" 474 | ] 475 | }, 476 | { 477 | "cell_type": "markdown", 478 | "metadata": {}, 479 | "source": [ 480 | "## Conditional Selection\n", 481 | "\n", 482 | "This is a very fundamental concept that will directly translate to pandas later on, make sure you understand this part!\n", 483 | "\n", 484 | "Let's briefly go over how to use brackets for selection based off of comparison operators." 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": 20, 490 | "metadata": {}, 491 | "outputs": [ 492 | { 493 | "data": { 494 | "text/plain": [ 495 | "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 496 | ] 497 | }, 498 | "execution_count": 20, 499 | "metadata": {}, 500 | "output_type": "execute_result" 501 | } 502 | ], 503 | "source": [ 504 | "arr = np.arange(1,11)\n", 505 | "arr" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 21, 511 | "metadata": {}, 512 | "outputs": [ 513 | { 514 | "data": { 515 | "text/plain": [ 516 | "array([False, False, False, False, True, True, True, True, True,\n", 517 | " True])" 518 | ] 519 | }, 520 | "execution_count": 21, 521 | "metadata": {}, 522 | "output_type": "execute_result" 523 | } 524 | ], 525 | "source": [ 526 | "arr > 4" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": 22, 532 | "metadata": { 533 | "collapsed": true 534 | }, 535 | "outputs": [], 536 | "source": [ 537 | "bool_arr = arr>4" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": 23, 543 | "metadata": {}, 544 | "outputs": [ 545 | { 546 | "data": { 547 | "text/plain": [ 548 | "array([False, False, False, False, True, True, True, True, True,\n", 549 | " True])" 550 | ] 551 | }, 552 | "execution_count": 23, 553 | "metadata": {}, 554 | "output_type": "execute_result" 555 | } 556 | ], 557 | "source": [ 558 | "bool_arr" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": 24, 564 | "metadata": {}, 565 | "outputs": [ 566 | { 567 | "data": { 568 | "text/plain": [ 569 | "array([ 5, 6, 7, 8, 9, 10])" 570 | ] 571 | }, 572 | "execution_count": 24, 573 | "metadata": {}, 574 | "output_type": "execute_result" 575 | } 576 | ], 577 | "source": [ 578 | "arr[bool_arr]" 579 | ] 580 | }, 581 | { 582 | "cell_type": "code", 583 | "execution_count": 25, 584 | "metadata": {}, 585 | "outputs": [ 586 | { 587 | "data": { 588 | "text/plain": [ 589 | "array([ 3, 4, 5, 6, 7, 8, 9, 10])" 590 | ] 591 | }, 592 | "execution_count": 25, 593 | "metadata": {}, 594 | "output_type": "execute_result" 595 | } 596 | ], 597 | "source": [ 598 | "arr[arr>2]" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": 26, 604 | "metadata": {}, 605 | "outputs": [ 606 | { 607 | "data": { 608 | "text/plain": [ 609 | "array([ 3, 4, 5, 6, 7, 8, 9, 10])" 610 | ] 611 | }, 612 | "execution_count": 26, 613 | "metadata": {}, 614 | "output_type": "execute_result" 615 | } 616 | ], 617 | "source": [ 618 | "x = 2\n", 619 | "arr[arr>x]" 620 | ] 621 | }, 622 | { 623 | "cell_type": "markdown", 624 | "metadata": {}, 625 | "source": [ 626 | "# Great Job!\n" 627 | ] 628 | } 629 | ], 630 | "metadata": { 631 | "anaconda-cloud": {}, 632 | "kernelspec": { 633 | "display_name": "Python 3", 634 | "language": "python", 635 | "name": "python3" 636 | }, 637 | "language_info": { 638 | "codemirror_mode": { 639 | "name": "ipython", 640 | "version": 3 641 | }, 642 | "file_extension": ".py", 643 | "mimetype": "text/x-python", 644 | "name": "python", 645 | "nbconvert_exporter": "python", 646 | "pygments_lexer": "ipython3", 647 | "version": "3.6.6" 648 | } 649 | }, 650 | "nbformat": 4, 651 | "nbformat_minor": 1 652 | } 653 | -------------------------------------------------------------------------------- /02-Numpy/02-NumPy-Operations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | "\n", 10 | "___\n", 11 | "
Copyright Pierian Data
\n", 12 | "
For more information, visit us at www.pieriandata.com
" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "source": [ 21 | "# NumPy Operations" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "## Arithmetic\n", 29 | "\n", 30 | "You can easily perform *array with array* arithmetic, or *scalar with array* arithmetic. Let's see some examples:" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 1, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])" 42 | ] 43 | }, 44 | "execution_count": 1, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "import numpy as np\n", 51 | "arr = np.arange(0,10)\n", 52 | "arr" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 2, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "data": { 62 | "text/plain": [ 63 | "array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])" 64 | ] 65 | }, 66 | "execution_count": 2, 67 | "metadata": {}, 68 | "output_type": "execute_result" 69 | } 70 | ], 71 | "source": [ 72 | "arr + arr" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 3, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/plain": [ 83 | "array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81])" 84 | ] 85 | }, 86 | "execution_count": 3, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "arr * arr" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 4, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "data": { 102 | "text/plain": [ 103 | "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" 104 | ] 105 | }, 106 | "execution_count": 4, 107 | "metadata": {}, 108 | "output_type": "execute_result" 109 | } 110 | ], 111 | "source": [ 112 | "arr - arr" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 5, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "name": "stderr", 122 | "output_type": "stream", 123 | "text": [ 124 | "C:\\Anaconda3\\envs\\tsa_course\\lib\\site-packages\\ipykernel_launcher.py:3: RuntimeWarning: invalid value encountered in true_divide\n", 125 | " This is separate from the ipykernel package so we can avoid doing imports until\n" 126 | ] 127 | }, 128 | { 129 | "data": { 130 | "text/plain": [ 131 | "array([nan, 1., 1., 1., 1., 1., 1., 1., 1., 1.])" 132 | ] 133 | }, 134 | "execution_count": 5, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | } 138 | ], 139 | "source": [ 140 | "# This will raise a Warning on division by zero, but not an error!\n", 141 | "# It just fills the spot with nan\n", 142 | "arr/arr" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 6, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "name": "stderr", 152 | "output_type": "stream", 153 | "text": [ 154 | "C:\\Anaconda3\\envs\\tsa_course\\lib\\site-packages\\ipykernel_launcher.py:2: RuntimeWarning: divide by zero encountered in true_divide\n", 155 | " \n" 156 | ] 157 | }, 158 | { 159 | "data": { 160 | "text/plain": [ 161 | "array([ inf, 1. , 0.5 , 0.33333333, 0.25 ,\n", 162 | " 0.2 , 0.16666667, 0.14285714, 0.125 , 0.11111111])" 163 | ] 164 | }, 165 | "execution_count": 6, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": [ 171 | "# Also a warning (but not an error) relating to infinity\n", 172 | "1/arr" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 7, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/plain": [ 183 | "array([ 0, 1, 8, 27, 64, 125, 216, 343, 512, 729], dtype=int32)" 184 | ] 185 | }, 186 | "execution_count": 7, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "arr**3" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "## Universal Array Functions\n", 200 | "\n", 201 | "NumPy comes with many [universal array functions](http://docs.scipy.org/doc/numpy/reference/ufuncs.html), or ufuncs, which are essentially just mathematical operations that can be applied across the array.
Let's show some common ones:" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 8, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/plain": [ 212 | "array([0. , 1. , 1.41421356, 1.73205081, 2. ,\n", 213 | " 2.23606798, 2.44948974, 2.64575131, 2.82842712, 3. ])" 214 | ] 215 | }, 216 | "execution_count": 8, 217 | "metadata": {}, 218 | "output_type": "execute_result" 219 | } 220 | ], 221 | "source": [ 222 | "# Taking Square Roots\n", 223 | "np.sqrt(arr)" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 9, 229 | "metadata": {}, 230 | "outputs": [ 231 | { 232 | "data": { 233 | "text/plain": [ 234 | "array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,\n", 235 | " 5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,\n", 236 | " 2.98095799e+03, 8.10308393e+03])" 237 | ] 238 | }, 239 | "execution_count": 9, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "# Calculating exponential (e^)\n", 246 | "np.exp(arr)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 10, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "data": { 256 | "text/plain": [ 257 | "array([ 0. , 0.84147098, 0.90929743, 0.14112001, -0.7568025 ,\n", 258 | " -0.95892427, -0.2794155 , 0.6569866 , 0.98935825, 0.41211849])" 259 | ] 260 | }, 261 | "execution_count": 10, 262 | "metadata": {}, 263 | "output_type": "execute_result" 264 | } 265 | ], 266 | "source": [ 267 | "# Trigonometric Functions like sine\n", 268 | "np.sin(arr)" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 11, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "name": "stderr", 278 | "output_type": "stream", 279 | "text": [ 280 | "C:\\Anaconda3\\envs\\tsa_course\\lib\\site-packages\\ipykernel_launcher.py:2: RuntimeWarning: divide by zero encountered in log\n", 281 | " \n" 282 | ] 283 | }, 284 | { 285 | "data": { 286 | "text/plain": [ 287 | "array([ -inf, 0. , 0.69314718, 1.09861229, 1.38629436,\n", 288 | " 1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458])" 289 | ] 290 | }, 291 | "execution_count": 11, 292 | "metadata": {}, 293 | "output_type": "execute_result" 294 | } 295 | ], 296 | "source": [ 297 | "# Taking the Natural Logarithm\n", 298 | "np.log(arr)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "## Summary Statistics on Arrays\n", 306 | "\n", 307 | "NumPy also offers common summary statistics like sum, mean and max. You would call these as methods on an array." 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 12, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/plain": [ 318 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])" 319 | ] 320 | }, 321 | "execution_count": 12, 322 | "metadata": {}, 323 | "output_type": "execute_result" 324 | } 325 | ], 326 | "source": [ 327 | "arr = np.arange(0,10)\n", 328 | "arr" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 13, 334 | "metadata": {}, 335 | "outputs": [ 336 | { 337 | "data": { 338 | "text/plain": [ 339 | "45" 340 | ] 341 | }, 342 | "execution_count": 13, 343 | "metadata": {}, 344 | "output_type": "execute_result" 345 | } 346 | ], 347 | "source": [ 348 | "arr.sum()" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": 14, 354 | "metadata": {}, 355 | "outputs": [ 356 | { 357 | "data": { 358 | "text/plain": [ 359 | "4.5" 360 | ] 361 | }, 362 | "execution_count": 14, 363 | "metadata": {}, 364 | "output_type": "execute_result" 365 | } 366 | ], 367 | "source": [ 368 | "arr.mean()" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 15, 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "data": { 378 | "text/plain": [ 379 | "9" 380 | ] 381 | }, 382 | "execution_count": 15, 383 | "metadata": {}, 384 | "output_type": "execute_result" 385 | } 386 | ], 387 | "source": [ 388 | "arr.max()" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": {}, 394 | "source": [ 395 | "Other summary statistics include:\n", 396 | "
\n",
397 |     "arr.min() returns 0                   minimum\n",
398 |     "arr.var() returns 8.25                variance\n",
399 |     "arr.std() returns 2.8722813232690143  standard deviation\n",
400 |     "
" 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "## Axis Logic\n", 408 | "When working with 2-dimensional arrays (matrices) we have to consider rows and columns. This becomes very important when we get to the section on pandas. In array terms, axis 0 (zero) is the vertical axis (rows), and axis 1 is the horizonal axis (columns). These values (0,1) correspond to the order in which arr.shape values are returned.\n", 409 | "\n", 410 | "Let's see how this affects our summary statistic calculations from above." 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 16, 416 | "metadata": {}, 417 | "outputs": [ 418 | { 419 | "data": { 420 | "text/plain": [ 421 | "array([[ 1, 2, 3, 4],\n", 422 | " [ 5, 6, 7, 8],\n", 423 | " [ 9, 10, 11, 12]])" 424 | ] 425 | }, 426 | "execution_count": 16, 427 | "metadata": {}, 428 | "output_type": "execute_result" 429 | } 430 | ], 431 | "source": [ 432 | "arr_2d = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])\n", 433 | "arr_2d" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 17, 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "data": { 443 | "text/plain": [ 444 | "array([15, 18, 21, 24])" 445 | ] 446 | }, 447 | "execution_count": 17, 448 | "metadata": {}, 449 | "output_type": "execute_result" 450 | } 451 | ], 452 | "source": [ 453 | "arr_2d.sum(axis=0)" 454 | ] 455 | }, 456 | { 457 | "cell_type": "markdown", 458 | "metadata": {}, 459 | "source": [ 460 | "By passing in axis=0, we're returning an array of sums along the vertical axis, essentially [(1+5+9), (2+6+10), (3+7+11), (4+8+12)]\n", 461 | "\n", 462 | "" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": 18, 468 | "metadata": {}, 469 | "outputs": [ 470 | { 471 | "data": { 472 | "text/plain": [ 473 | "(3, 4)" 474 | ] 475 | }, 476 | "execution_count": 18, 477 | "metadata": {}, 478 | "output_type": "execute_result" 479 | } 480 | ], 481 | "source": [ 482 | "arr_2d.shape" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "This tells us that arr_2d has 3 rows and 4 columns.\n", 490 | "\n", 491 | "In arr_2d.sum(axis=0) above, the first element in each row was summed, then the second element, and so forth.\n", 492 | "\n", 493 | "So what should arr_2d.sum(axis=1) return?" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "metadata": { 500 | "collapsed": true 501 | }, 502 | "outputs": [], 503 | "source": [ 504 | "# THINK ABOUT WHAT THIS WILL RETURN BEFORE RUNNING THE CELL!\n", 505 | "arr_2d.sum(axis=1)" 506 | ] 507 | }, 508 | { 509 | "cell_type": "markdown", 510 | "metadata": {}, 511 | "source": [ 512 | "# Great Job!\n", 513 | "\n", 514 | "That's all we need to know for now!" 515 | ] 516 | } 517 | ], 518 | "metadata": { 519 | "kernelspec": { 520 | "display_name": "Python 3", 521 | "language": "python", 522 | "name": "python3" 523 | }, 524 | "language_info": { 525 | "codemirror_mode": { 526 | "name": "ipython", 527 | "version": 3 528 | }, 529 | "file_extension": ".py", 530 | "mimetype": "text/x-python", 531 | "name": "python", 532 | "nbconvert_exporter": "python", 533 | "pygments_lexer": "ipython3", 534 | "version": "3.6.6" 535 | } 536 | }, 537 | "nbformat": 4, 538 | "nbformat_minor": 1 539 | } 540 | -------------------------------------------------------------------------------- /02-Numpy/NumPy Indexing and Selection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a7bd5048", 6 | "metadata": {}, 7 | "source": [ 8 | "# Indexing and Selection" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "d9d0bb5e", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "53a540c4", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "arr = np.arange(0, 11)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "id": "a3cd2e51", 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 41 | ] 42 | }, 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "arr" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "id": "89092a60", 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/plain": [ 61 | "8" 62 | ] 63 | }, 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "arr[8]" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 5, 76 | "id": "c5d2cb13", 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "array([1, 2, 3, 4])" 83 | ] 84 | }, 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "# Goes up to but not including.\n", 92 | "arr[1:5]" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 6, 98 | "id": "9daad536", 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": [ 104 | "array([0, 1, 2, 3, 4])" 105 | ] 106 | }, 107 | "execution_count": 6, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "arr[0:5]" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 7, 119 | "id": "9f7a60de", 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/plain": [ 125 | "array([0, 1, 2, 3, 4])" 126 | ] 127 | }, 128 | "execution_count": 7, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "arr[:5]" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 9, 140 | "id": "019ee16d", 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": [ 146 | "array([ 5, 6, 7, 8, 9, 10])" 147 | ] 148 | }, 149 | "execution_count": 9, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "# Do not need to know how long the array is\n", 156 | "arr[5:]" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 11, 162 | "id": "28422792", 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "# Ability to broadcast. we can broadcast a single value across a large number of values.\n", 167 | "arr[0:5] = 100" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 12, 173 | "id": "bddd6e83", 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "array([100, 100, 100, 100, 100, 5, 6, 7, 8, 9, 10])" 180 | ] 181 | }, 182 | "execution_count": 12, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | } 186 | ], 187 | "source": [ 188 | "arr" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 13, 194 | "id": "738294c8", 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "arr = np.arange(0, 11)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 14, 204 | "id": "a917418d", 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "data": { 209 | "text/plain": [ 210 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 211 | ] 212 | }, 213 | "execution_count": 14, 214 | "metadata": {}, 215 | "output_type": "execute_result" 216 | } 217 | ], 218 | "source": [ 219 | "arr" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 18, 225 | "id": "7f5d9207", 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "slice_of_arr = arr[:5]" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 19, 235 | "id": "ded9ab0d", 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 242 | ] 243 | }, 244 | "execution_count": 19, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "arr" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 21, 256 | "id": "52ca54fe", 257 | "metadata": {}, 258 | "outputs": [ 259 | { 260 | "data": { 261 | "text/plain": [ 262 | "array([0, 1, 2, 3, 4])" 263 | ] 264 | }, 265 | "execution_count": 21, 266 | "metadata": {}, 267 | "output_type": "execute_result" 268 | } 269 | ], 270 | "source": [ 271 | "slice_of_arr" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 22, 277 | "id": "6ac71c6b", 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [ 281 | "slice_of_arr[:] = 99" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 23, 287 | "id": "2464cee1", 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "data": { 292 | "text/plain": [ 293 | "array([99, 99, 99, 99, 99, 5, 6, 7, 8, 9, 10])" 294 | ] 295 | }, 296 | "execution_count": 23, 297 | "metadata": {}, 298 | "output_type": "execute_result" 299 | } 300 | ], 301 | "source": [ 302 | "arr" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 24, 308 | "id": "66f390cc", 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "# A slice is not a copy of the original array\n", 313 | "arr_copy = arr.copy()" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 25, 319 | "id": "e1f59b50", 320 | "metadata": {}, 321 | "outputs": [ 322 | { 323 | "data": { 324 | "text/plain": [ 325 | "array([99, 99, 99, 99, 99, 5, 6, 7, 8, 9, 10])" 326 | ] 327 | }, 328 | "execution_count": 25, 329 | "metadata": {}, 330 | "output_type": "execute_result" 331 | } 332 | ], 333 | "source": [ 334 | "arr_copy" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 26, 340 | "id": "04c5908e", 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [ 344 | "arr_copy[:] = 100" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 27, 350 | "id": "8ded82e7", 351 | "metadata": {}, 352 | "outputs": [ 353 | { 354 | "data": { 355 | "text/plain": [ 356 | "array([100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100])" 357 | ] 358 | }, 359 | "execution_count": 27, 360 | "metadata": {}, 361 | "output_type": "execute_result" 362 | } 363 | ], 364 | "source": [ 365 | "arr_copy" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 28, 371 | "id": "50cf4453", 372 | "metadata": {}, 373 | "outputs": [ 374 | { 375 | "data": { 376 | "text/plain": [ 377 | "array([99, 99, 99, 99, 99, 5, 6, 7, 8, 9, 10])" 378 | ] 379 | }, 380 | "execution_count": 28, 381 | "metadata": {}, 382 | "output_type": "execute_result" 383 | } 384 | ], 385 | "source": [ 386 | "arr" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 29, 392 | "id": "5461f8c8", 393 | "metadata": {}, 394 | "outputs": [], 395 | "source": [ 396 | "arr2d = np.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 30, 402 | "id": "8c0b33d6", 403 | "metadata": {}, 404 | "outputs": [ 405 | { 406 | "data": { 407 | "text/plain": [ 408 | "array([[ 5, 10, 15],\n", 409 | " [20, 25, 30],\n", 410 | " [35, 40, 45]])" 411 | ] 412 | }, 413 | "execution_count": 30, 414 | "metadata": {}, 415 | "output_type": "execute_result" 416 | } 417 | ], 418 | "source": [ 419 | "arr2d" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 31, 425 | "id": "2ab2a052", 426 | "metadata": {}, 427 | "outputs": [ 428 | { 429 | "data": { 430 | "text/plain": [ 431 | "(3, 3)" 432 | ] 433 | }, 434 | "execution_count": 31, 435 | "metadata": {}, 436 | "output_type": "execute_result" 437 | } 438 | ], 439 | "source": [ 440 | "arr2d.shape" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": 33, 446 | "id": "6c173ddd", 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "data": { 451 | "text/plain": [ 452 | "array([ 5, 10, 15])" 453 | ] 454 | }, 455 | "execution_count": 33, 456 | "metadata": {}, 457 | "output_type": "execute_result" 458 | } 459 | ], 460 | "source": [ 461 | "arr2d[0]" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": 34, 467 | "id": "bd74f6e6", 468 | "metadata": {}, 469 | "outputs": [ 470 | { 471 | "data": { 472 | "text/plain": [ 473 | "array([20, 25, 30])" 474 | ] 475 | }, 476 | "execution_count": 34, 477 | "metadata": {}, 478 | "output_type": "execute_result" 479 | } 480 | ], 481 | "source": [ 482 | "arr2d[1]" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": 35, 488 | "id": "e861a645", 489 | "metadata": {}, 490 | "outputs": [ 491 | { 492 | "data": { 493 | "text/plain": [ 494 | "5" 495 | ] 496 | }, 497 | "execution_count": 35, 498 | "metadata": {}, 499 | "output_type": "execute_result" 500 | } 501 | ], 502 | "source": [ 503 | "arr2d[0][0]" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": 36, 509 | "id": "b67f11c5", 510 | "metadata": {}, 511 | "outputs": [ 512 | { 513 | "data": { 514 | "text/plain": [ 515 | "25" 516 | ] 517 | }, 518 | "execution_count": 36, 519 | "metadata": {}, 520 | "output_type": "execute_result" 521 | } 522 | ], 523 | "source": [ 524 | "arr2d[1][1]" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": 37, 530 | "id": "fd7127ab", 531 | "metadata": {}, 532 | "outputs": [ 533 | { 534 | "data": { 535 | "text/plain": [ 536 | "25" 537 | ] 538 | }, 539 | "execution_count": 37, 540 | "metadata": {}, 541 | "output_type": "execute_result" 542 | } 543 | ], 544 | "source": [ 545 | "# Equivaluent to seperate definition\n", 546 | "arr2d[1, 1]" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": 39, 552 | "id": "252c983d", 553 | "metadata": {}, 554 | "outputs": [ 555 | { 556 | "data": { 557 | "text/plain": [ 558 | "array([[10, 15],\n", 559 | " [25, 30]])" 560 | ] 561 | }, 562 | "execution_count": 39, 563 | "metadata": {}, 564 | "output_type": "execute_result" 565 | } 566 | ], 567 | "source": [ 568 | "# Get the first two rows and the last two columns using same syntax\n", 569 | "arr2d[:2, 1:]" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": 40, 575 | "id": "94412ce4", 576 | "metadata": {}, 577 | "outputs": [], 578 | "source": [ 579 | "# Conditional selection\n", 580 | "arr = np.arange(1, 11)" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": 41, 586 | "id": "39b0fe4f", 587 | "metadata": {}, 588 | "outputs": [ 589 | { 590 | "data": { 591 | "text/plain": [ 592 | "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" 593 | ] 594 | }, 595 | "execution_count": 41, 596 | "metadata": {}, 597 | "output_type": "execute_result" 598 | } 599 | ], 600 | "source": [ 601 | "arr" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": 42, 607 | "id": "5b42eb48", 608 | "metadata": {}, 609 | "outputs": [ 610 | { 611 | "data": { 612 | "text/plain": [ 613 | "array([False, False, False, False, True, True, True, True, True,\n", 614 | " True])" 615 | ] 616 | }, 617 | "execution_count": 42, 618 | "metadata": {}, 619 | "output_type": "execute_result" 620 | } 621 | ], 622 | "source": [ 623 | "# Returns where index location matches up with the original data\n", 624 | "arr > 4" 625 | ] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "execution_count": 43, 630 | "id": "860902bd", 631 | "metadata": {}, 632 | "outputs": [], 633 | "source": [ 634 | "boolarr = arr > 4" 635 | ] 636 | }, 637 | { 638 | "cell_type": "code", 639 | "execution_count": 44, 640 | "id": "e83f014a", 641 | "metadata": {}, 642 | "outputs": [ 643 | { 644 | "data": { 645 | "text/plain": [ 646 | "array([False, False, False, False, True, True, True, True, True,\n", 647 | " True])" 648 | ] 649 | }, 650 | "execution_count": 44, 651 | "metadata": {}, 652 | "output_type": "execute_result" 653 | } 654 | ], 655 | "source": [ 656 | "boolarr" 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": 45, 662 | "id": "18f8039e", 663 | "metadata": {}, 664 | "outputs": [ 665 | { 666 | "data": { 667 | "text/plain": [ 668 | "array([ 5, 6, 7, 8, 9, 10])" 669 | ] 670 | }, 671 | "execution_count": 45, 672 | "metadata": {}, 673 | "output_type": "execute_result" 674 | } 675 | ], 676 | "source": [ 677 | "# Can use bool array to filter! by indexing by the conditional array\n", 678 | "# The finesse!\n", 679 | "arr[boolarr]" 680 | ] 681 | }, 682 | { 683 | "cell_type": "code", 684 | "execution_count": 46, 685 | "id": "1e362a3f", 686 | "metadata": {}, 687 | "outputs": [ 688 | { 689 | "data": { 690 | "text/plain": [ 691 | "array([ 5, 6, 7, 8, 9, 10])" 692 | ] 693 | }, 694 | "execution_count": 46, 695 | "metadata": {}, 696 | "output_type": "execute_result" 697 | } 698 | ], 699 | "source": [ 700 | "# Simplified (simple way to apply a filter)\n", 701 | "arr[arr > 4]" 702 | ] 703 | }, 704 | { 705 | "cell_type": "code", 706 | "execution_count": 47, 707 | "id": "8c6da409", 708 | "metadata": {}, 709 | "outputs": [ 710 | { 711 | "name": "stdout", 712 | "output_type": "stream", 713 | "text": [ 714 | "19\n" 715 | ] 716 | } 717 | ], 718 | "source": [ 719 | "dice_rolls = np.array([3, 1, 5, 2, 5, 1, 1, 5, 1, 4, 2, 1, 4, 5, 3, 4, 5, 2, 4, 2, 6, 6, 3, 6, 2, 3, 5, 6, 5])\n", 720 | "\n", 721 | "total_rolls_over_two = len(dice_rolls[dice_rolls > 2])\n", 722 | "\n", 723 | "print(total_rolls_over_two)" 724 | ] 725 | }, 726 | { 727 | "cell_type": "code", 728 | "execution_count": null, 729 | "id": "79310111", 730 | "metadata": {}, 731 | "outputs": [], 732 | "source": [] 733 | } 734 | ], 735 | "metadata": { 736 | "kernelspec": { 737 | "display_name": "Python 3", 738 | "language": "python", 739 | "name": "python3" 740 | }, 741 | "language_info": { 742 | "codemirror_mode": { 743 | "name": "ipython", 744 | "version": 3 745 | }, 746 | "file_extension": ".py", 747 | "mimetype": "text/x-python", 748 | "name": "python", 749 | "nbconvert_exporter": "python", 750 | "pygments_lexer": "ipython3", 751 | "version": "3.8.8" 752 | } 753 | }, 754 | "nbformat": 4, 755 | "nbformat_minor": 5 756 | } 757 | -------------------------------------------------------------------------------- /02-Numpy/NumPy Operations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "3a68a1fb", 6 | "metadata": {}, 7 | "source": [ 8 | "# NumPy Operations" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "45618576", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "4fbe2e21", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "arr = np.arange(0, 10)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "id": "41650e40", 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])" 41 | ] 42 | }, 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "arr" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "id": "beb616ed", 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/plain": [ 61 | "array([ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])" 62 | ] 63 | }, 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "# Arithmetic between arrays.\n", 71 | "arr + 5 # element by element basis!" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 5, 77 | "id": "fec9fdfe", 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/plain": [ 83 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])" 84 | ] 85 | }, 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "arr" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 6, 98 | "id": "fe7fb6e1", 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": [ 104 | "array([-2, -1, 0, 1, 2, 3, 4, 5, 6, 7])" 105 | ] 106 | }, 107 | "execution_count": 6, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "arr - 2" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 7, 119 | "id": "e99cd714", 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/plain": [ 125 | "array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])" 126 | ] 127 | }, 128 | "execution_count": 7, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "arr + arr # Must be same shape, adds each index in one to the other" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 8, 140 | "id": "a022ccf1", 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": [ 146 | "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" 147 | ] 148 | }, 149 | "execution_count": 8, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "arr - arr" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 9, 161 | "id": "47da02bc", 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/plain": [ 167 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])" 168 | ] 169 | }, 170 | "execution_count": 9, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "arr" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 10, 182 | "id": "2a3d6a41", 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "name": "stderr", 187 | "output_type": "stream", 188 | "text": [ 189 | ":1: RuntimeWarning: invalid value encountered in true_divide\n", 190 | " arr / arr\n" 191 | ] 192 | }, 193 | { 194 | "data": { 195 | "text/plain": [ 196 | "array([nan, 1., 1., 1., 1., 1., 1., 1., 1., 1.])" 197 | ] 198 | }, 199 | "execution_count": 10, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "# Numpy gives nan if 0 / 0\n", 206 | "arr / arr" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 11, 212 | "id": "3d623e9a", 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "ename": "ZeroDivisionError", 217 | "evalue": "division by zero", 218 | "output_type": "error", 219 | "traceback": [ 220 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 221 | "\u001b[1;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", 222 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;36m1\u001b[0m \u001b[1;33m/\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 223 | "\u001b[1;31mZeroDivisionError\u001b[0m: division by zero" 224 | ] 225 | } 226 | ], 227 | "source": [ 228 | "1 / 0" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 12, 234 | "id": "f2b9e702", 235 | "metadata": {}, 236 | "outputs": [ 237 | { 238 | "name": "stderr", 239 | "output_type": "stream", 240 | "text": [ 241 | ":1: RuntimeWarning: divide by zero encountered in true_divide\n", 242 | " 1 / arr\n" 243 | ] 244 | }, 245 | { 246 | "data": { 247 | "text/plain": [ 248 | "array([ inf, 1. , 0.5 , 0.33333333, 0.25 ,\n", 249 | " 0.2 , 0.16666667, 0.14285714, 0.125 , 0.11111111])" 250 | ] 251 | }, 252 | "execution_count": 12, 253 | "metadata": {}, 254 | "output_type": "execute_result" 255 | } 256 | ], 257 | "source": [ 258 | "1 / arr" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 13, 264 | "id": "11089461", 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "# Division by 0 in numpy gives a warning but not an exception and puts nan in place instead." 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 14, 274 | "id": "6c586955", 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "data": { 279 | "text/plain": [ 280 | "array([0. , 1. , 1.41421356, 1.73205081, 2. ,\n", 281 | " 2.23606798, 2.44948974, 2.64575131, 2.82842712, 3. ])" 282 | ] 283 | }, 284 | "execution_count": 14, 285 | "metadata": {}, 286 | "output_type": "execute_result" 287 | } 288 | ], 289 | "source": [ 290 | "np.sqrt(arr)" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 15, 296 | "id": "05928a3f", 297 | "metadata": {}, 298 | "outputs": [ 299 | { 300 | "data": { 301 | "text/plain": [ 302 | "array([ 0. , 0.84147098, 0.90929743, 0.14112001, -0.7568025 ,\n", 303 | " -0.95892427, -0.2794155 , 0.6569866 , 0.98935825, 0.41211849])" 304 | ] 305 | }, 306 | "execution_count": 15, 307 | "metadata": {}, 308 | "output_type": "execute_result" 309 | } 310 | ], 311 | "source": [ 312 | "np.sin(arr)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 17, 318 | "id": "8ffea73a", 319 | "metadata": {}, 320 | "outputs": [ 321 | { 322 | "name": "stderr", 323 | "output_type": "stream", 324 | "text": [ 325 | ":1: RuntimeWarning: divide by zero encountered in log\n", 326 | " np.log(arr)\n" 327 | ] 328 | }, 329 | { 330 | "data": { 331 | "text/plain": [ 332 | "array([ -inf, 0. , 0.69314718, 1.09861229, 1.38629436,\n", 333 | " 1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458])" 334 | ] 335 | }, 336 | "execution_count": 17, 337 | "metadata": {}, 338 | "output_type": "execute_result" 339 | } 340 | ], 341 | "source": [ 342 | "np.log(arr)" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 18, 348 | "id": "ccd59eea", 349 | "metadata": {}, 350 | "outputs": [ 351 | { 352 | "data": { 353 | "text/plain": [ 354 | "45" 355 | ] 356 | }, 357 | "execution_count": 18, 358 | "metadata": {}, 359 | "output_type": "execute_result" 360 | } 361 | ], 362 | "source": [ 363 | "# Perform summary stats on dataset\n", 364 | "arr.sum() # Sum of all elements" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 19, 370 | "id": "6ebe0009", 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "data": { 375 | "text/plain": [ 376 | "4.5" 377 | ] 378 | }, 379 | "execution_count": 19, 380 | "metadata": {}, 381 | "output_type": "execute_result" 382 | } 383 | ], 384 | "source": [ 385 | "arr.mean()" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 20, 391 | "id": "3ffdf96f", 392 | "metadata": {}, 393 | "outputs": [ 394 | { 395 | "data": { 396 | "text/plain": [ 397 | "9" 398 | ] 399 | }, 400 | "execution_count": 20, 401 | "metadata": {}, 402 | "output_type": "execute_result" 403 | } 404 | ], 405 | "source": [ 406 | "arr.max()" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 21, 412 | "id": "aff1e839", 413 | "metadata": {}, 414 | "outputs": [ 415 | { 416 | "data": { 417 | "text/plain": [ 418 | "8.25" 419 | ] 420 | }, 421 | "execution_count": 21, 422 | "metadata": {}, 423 | "output_type": "execute_result" 424 | } 425 | ], 426 | "source": [ 427 | "arr.var() # Variance" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": 22, 433 | "id": "5d6f61a5", 434 | "metadata": {}, 435 | "outputs": [ 436 | { 437 | "data": { 438 | "text/plain": [ 439 | "2.8722813232690143" 440 | ] 441 | }, 442 | "execution_count": 22, 443 | "metadata": {}, 444 | "output_type": "execute_result" 445 | } 446 | ], 447 | "source": [ 448 | "arr.std() # Standard deviation" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 24, 454 | "id": "b428f62b", 455 | "metadata": {}, 456 | "outputs": [], 457 | "source": [ 458 | "arr2d = np.arange(0, 25).reshape(5, 5)" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 25, 464 | "id": "d9856cf4", 465 | "metadata": {}, 466 | "outputs": [ 467 | { 468 | "data": { 469 | "text/plain": [ 470 | "array([[ 0, 1, 2, 3, 4],\n", 471 | " [ 5, 6, 7, 8, 9],\n", 472 | " [10, 11, 12, 13, 14],\n", 473 | " [15, 16, 17, 18, 19],\n", 474 | " [20, 21, 22, 23, 24]])" 475 | ] 476 | }, 477 | "execution_count": 25, 478 | "metadata": {}, 479 | "output_type": "execute_result" 480 | } 481 | ], 482 | "source": [ 483 | "arr2d" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 26, 489 | "id": "f938b08b", 490 | "metadata": {}, 491 | "outputs": [ 492 | { 493 | "data": { 494 | "text/plain": [ 495 | "300" 496 | ] 497 | }, 498 | "execution_count": 26, 499 | "metadata": {}, 500 | "output_type": "execute_result" 501 | } 502 | ], 503 | "source": [ 504 | "arr2d.sum()" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": 27, 510 | "id": "e1e84ca2", 511 | "metadata": {}, 512 | "outputs": [ 513 | { 514 | "data": { 515 | "text/plain": [ 516 | "array([50, 55, 60, 65, 70])" 517 | ] 518 | }, 519 | "execution_count": 27, 520 | "metadata": {}, 521 | "output_type": "execute_result" 522 | } 523 | ], 524 | "source": [ 525 | "# Get sum of all the rows or all the columns?\n", 526 | "arr2d.sum(axis=0) # Row = axis 0 (perform the operation across the rows)" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": 28, 532 | "id": "6438b3c7", 533 | "metadata": {}, 534 | "outputs": [ 535 | { 536 | "data": { 537 | "text/plain": [ 538 | "array([ 10, 35, 60, 85, 110])" 539 | ] 540 | }, 541 | "execution_count": 28, 542 | "metadata": {}, 543 | "output_type": "execute_result" 544 | } 545 | ], 546 | "source": [ 547 | "arr2d.sum(axis=1) # Row = axis 0 (perform the operation across the columns)" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": 29, 553 | "id": "7316a87c", 554 | "metadata": {}, 555 | "outputs": [ 556 | { 557 | "name": "stdout", 558 | "output_type": "stream", 559 | "text": [ 560 | "2720\n" 561 | ] 562 | } 563 | ], 564 | "source": [ 565 | "account_transactions = np.array([100,-200,300,-400,100,100,-230,450,500,2000])\n", 566 | "\n", 567 | "account_total = account_transactions.sum()\n", 568 | "\n", 569 | "print(account_total)" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": null, 575 | "id": "5b53ddc9", 576 | "metadata": {}, 577 | "outputs": [], 578 | "source": [] 579 | } 580 | ], 581 | "metadata": { 582 | "kernelspec": { 583 | "display_name": "Python 3", 584 | "language": "python", 585 | "name": "python3" 586 | }, 587 | "language_info": { 588 | "codemirror_mode": { 589 | "name": "ipython", 590 | "version": 3 591 | }, 592 | "file_extension": ".py", 593 | "mimetype": "text/x-python", 594 | "name": "python", 595 | "nbconvert_exporter": "python", 596 | "pygments_lexer": "ipython3", 597 | "version": "3.8.8" 598 | } 599 | }, 600 | "nbformat": 4, 601 | "nbformat_minor": 5 602 | } 603 | -------------------------------------------------------------------------------- /02-Numpy/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "61f42306", 6 | "metadata": {}, 7 | "source": [ 8 | "## NumPy arrays" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "45ee754a", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "id": "81c0caca", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "# Create a np array.\n", 29 | "mylist = [1, 2, 3]" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 4, 35 | "id": "f40aead5", 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "list" 42 | ] 43 | }, 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "type(mylist)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 5, 56 | "id": "abbc5d8b", 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": [ 62 | "array([1, 2, 3])" 63 | ] 64 | }, 65 | "execution_count": 5, 66 | "metadata": {}, 67 | "output_type": "execute_result" 68 | } 69 | ], 70 | "source": [ 71 | "np.array(mylist)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 6, 77 | "id": "734ba713", 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/plain": [ 83 | "[1, 2, 3]" 84 | ] 85 | }, 86 | "execution_count": 6, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "mylist" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 7, 98 | "id": "661581c1", 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "myarr = np.array(mylist)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 8, 108 | "id": "77c9b421", 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": [ 114 | "numpy.ndarray" 115 | ] 116 | }, 117 | "execution_count": 8, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "type(myarr)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 9, 129 | "id": "add6edf5", 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "mymatrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 10, 139 | "id": "daca4ffa", 140 | "metadata": {}, 141 | "outputs": [ 142 | { 143 | "data": { 144 | "text/plain": [ 145 | "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]" 146 | ] 147 | }, 148 | "execution_count": 10, 149 | "metadata": {}, 150 | "output_type": "execute_result" 151 | } 152 | ], 153 | "source": [ 154 | "mymatrix" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 11, 160 | "id": "c93c6538", 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/plain": [ 166 | "array([[1, 2, 3],\n", 167 | " [4, 5, 6],\n", 168 | " [7, 8, 9]])" 169 | ] 170 | }, 171 | "execution_count": 11, 172 | "metadata": {}, 173 | "output_type": "execute_result" 174 | } 175 | ], 176 | "source": [ 177 | "# np new to print as a matrix\n", 178 | "np.array(mymatrix)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 12, 184 | "id": "a79980ef", 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "data": { 189 | "text/plain": [ 190 | "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])" 191 | ] 192 | }, 193 | "execution_count": 12, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "# Bootstrap an array\n", 200 | "np.arange(0, 10)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "id": "f710861a", 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [] 210 | } 211 | ], 212 | "metadata": { 213 | "kernelspec": { 214 | "display_name": "Python 3", 215 | "language": "python", 216 | "name": "python3" 217 | }, 218 | "language_info": { 219 | "codemirror_mode": { 220 | "name": "ipython", 221 | "version": 3 222 | }, 223 | "file_extension": ".py", 224 | "mimetype": "text/x-python", 225 | "name": "python", 226 | "nbconvert_exporter": "python", 227 | "pygments_lexer": "ipython3", 228 | "version": "3.8.8" 229 | } 230 | }, 231 | "nbformat": 4, 232 | "nbformat_minor": 5 233 | } 234 | -------------------------------------------------------------------------------- /02-Numpy/Untitled1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0150c16c", 6 | "metadata": {}, 7 | "source": [ 8 | "# Indexing and Selection" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "e6f7c193", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [] 18 | } 19 | ], 20 | "metadata": { 21 | "kernelspec": { 22 | "display_name": "Python 3", 23 | "language": "python", 24 | "name": "python3" 25 | }, 26 | "language_info": { 27 | "codemirror_mode": { 28 | "name": "ipython", 29 | "version": 3 30 | }, 31 | "file_extension": ".py", 32 | "mimetype": "text/x-python", 33 | "name": "python", 34 | "nbconvert_exporter": "python", 35 | "pygments_lexer": "ipython3", 36 | "version": "3.8.8" 37 | } 38 | }, 39 | "nbformat": 4, 40 | "nbformat_minor": 5 41 | } 42 | -------------------------------------------------------------------------------- /02-Numpy/Untitled2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "c23ce166", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [] 10 | } 11 | ], 12 | "metadata": { 13 | "kernelspec": { 14 | "display_name": "Python 3", 15 | "language": "python", 16 | "name": "python3" 17 | }, 18 | "language_info": { 19 | "codemirror_mode": { 20 | "name": "ipython", 21 | "version": 3 22 | }, 23 | "file_extension": ".py", 24 | "mimetype": "text/x-python", 25 | "name": "python", 26 | "nbconvert_exporter": "python", 27 | "pygments_lexer": "ipython3", 28 | "version": "3.8.8" 29 | } 30 | }, 31 | "nbformat": 4, 32 | "nbformat_minor": 5 33 | } 34 | -------------------------------------------------------------------------------- /02-Numpy/axis_logic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/02-Numpy/axis_logic.png -------------------------------------------------------------------------------- /02-Numpy/numpy_indexing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/02-Numpy/numpy_indexing.png -------------------------------------------------------------------------------- /03-Pandas/Concatenation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8ae25cb2", 6 | "metadata": {}, 7 | "source": [ 8 | "# Concatenation: How to Combine DataFrames\n", 9 | "\n", 10 | "- Very simple if DFs are in same format.\n", 11 | "- Each row should align with the other.\n", 12 | "- Concatenation = \"pasting\" dataframes together by rows.\n", 13 | "- Pandas will fill null values where necessary." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "id": "8eb0dd5e", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "import pandas as pd" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "id": "742f5e42", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "data_one = {'A': ['A0', 'A1', 'A2', 'A3'],'B': ['B0', 'B1', 'B2', 'B3']}" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "id": "fee2970e", 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "data_two = {'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 4, 50 | "id": "ec113c82", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "one = pd.DataFrame(data_one)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 5, 60 | "id": "db40ea1d", 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "two = pd.DataFrame(data_two)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 7, 70 | "id": "69c047dd", 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/html": [ 76 | "
\n", 77 | "\n", 90 | "\n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | "
ABCD
0A0B0C0D0
1A1B1C1D1
2A2B2C2D2
3A3B3C3D3
\n", 131 | "
" 132 | ], 133 | "text/plain": [ 134 | " A B C D\n", 135 | "0 A0 B0 C0 D0\n", 136 | "1 A1 B1 C1 D1\n", 137 | "2 A2 B2 C2 D2\n", 138 | "3 A3 B3 C3 D3" 139 | ] 140 | }, 141 | "execution_count": 7, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [ 147 | "# If in same format can just:\n", 148 | "\n", 149 | "pd.concat([one, two], axis=1) # Pass in list. axis = 1 = join the columns" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 8, 155 | "id": "888ed5b1", 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "data": { 160 | "text/html": [ 161 | "
\n", 162 | "\n", 175 | "\n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | "
ABCD
0A0B0NaNNaN
1A1B1NaNNaN
2A2B2NaNNaN
3A3B3NaNNaN
0NaNNaNC0D0
1NaNNaNC1D1
2NaNNaNC2D2
3NaNNaNC3D3
\n", 244 | "
" 245 | ], 246 | "text/plain": [ 247 | " A B C D\n", 248 | "0 A0 B0 NaN NaN\n", 249 | "1 A1 B1 NaN NaN\n", 250 | "2 A2 B2 NaN NaN\n", 251 | "3 A3 B3 NaN NaN\n", 252 | "0 NaN NaN C0 D0\n", 253 | "1 NaN NaN C1 D1\n", 254 | "2 NaN NaN C2 D2\n", 255 | "3 NaN NaN C3 D3" 256 | ] 257 | }, 258 | "execution_count": 8, 259 | "metadata": {}, 260 | "output_type": "execute_result" 261 | } 262 | ], 263 | "source": [ 264 | "# Concat along the rows\n", 265 | "pd.concat([one, two], axis=0)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 11, 271 | "id": "f84e0537", 272 | "metadata": {}, 273 | "outputs": [ 274 | { 275 | "data": { 276 | "text/html": [ 277 | "
\n", 278 | "\n", 291 | "\n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | "
AB
0C0D0
1C1D1
2C2D2
3C3D3
\n", 322 | "
" 323 | ], 324 | "text/plain": [ 325 | " A B\n", 326 | "0 C0 D0\n", 327 | "1 C1 D1\n", 328 | "2 C2 D2\n", 329 | "3 C3 D3" 330 | ] 331 | }, 332 | "execution_count": 11, 333 | "metadata": {}, 334 | "output_type": "execute_result" 335 | } 336 | ], 337 | "source": [ 338 | "# But maybe you want to plug rows from one DF to the rows of another!\n", 339 | "# Will need to rename cols of one to match the other.\n", 340 | "two.columns = one.columns\n", 341 | "two" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 14, 347 | "id": "477780d6", 348 | "metadata": {}, 349 | "outputs": [ 350 | { 351 | "data": { 352 | "text/html": [ 353 | "
\n", 354 | "\n", 367 | "\n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | "
AB
0A0B0
1A1B1
2A2B2
3A3B3
4C0D0
5C1D1
6C2D2
7C3D3
\n", 418 | "
" 419 | ], 420 | "text/plain": [ 421 | " A B\n", 422 | "0 A0 B0\n", 423 | "1 A1 B1\n", 424 | "2 A2 B2\n", 425 | "3 A3 B3\n", 426 | "4 C0 D0\n", 427 | "5 C1 D1\n", 428 | "6 C2 D2\n", 429 | "7 C3 D3" 430 | ] 431 | }, 432 | "execution_count": 14, 433 | "metadata": {}, 434 | "output_type": "execute_result" 435 | } 436 | ], 437 | "source": [ 438 | "mydf = pd.concat([one, two], axis=0)\n", 439 | "# Fix up the repeated index\n", 440 | "mydf.index = range(len(mydf))\n", 441 | "mydf" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": null, 447 | "id": "8fd64b3b", 448 | "metadata": {}, 449 | "outputs": [], 450 | "source": [] 451 | } 452 | ], 453 | "metadata": { 454 | "kernelspec": { 455 | "display_name": "Python 3", 456 | "language": "python", 457 | "name": "python3" 458 | }, 459 | "language_info": { 460 | "codemirror_mode": { 461 | "name": "ipython", 462 | "version": 3 463 | }, 464 | "file_extension": ".py", 465 | "mimetype": "text/x-python", 466 | "name": "python", 467 | "nbconvert_exporter": "python", 468 | "pygments_lexer": "ipython3", 469 | "version": "3.8.8" 470 | } 471 | }, 472 | "nbformat": 4, 473 | "nbformat_minor": 5 474 | } 475 | -------------------------------------------------------------------------------- /03-Pandas/Pandas Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "id": "bc32b314", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "id": "0f1b148d", 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "text/plain": [ 23 | "USA 1776\n", 24 | "CANADA 1867\n", 25 | "MEXICO 1821\n", 26 | "dtype: int64" 27 | ] 28 | }, 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "output_type": "execute_result" 32 | } 33 | ], 34 | "source": [ 35 | "# pd series! help(pd.series)\n", 36 | "\n", 37 | "myindex = ['USA', 'CANADA', 'MEXICO']\n", 38 | "mydata = [1776, 1867, 1821]\n", 39 | "\n", 40 | "myser = pd.Series(data=mydata, index=myindex)\n", 41 | "myser" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "id": "f9eac8f7", 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/plain": [ 53 | "pandas.core.series.Series" 54 | ] 55 | }, 56 | "execution_count": 3, 57 | "metadata": {}, 58 | "output_type": "execute_result" 59 | } 60 | ], 61 | "source": [ 62 | "type(myser)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 4, 68 | "id": "c49a8a2d", 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "text/plain": [ 74 | "1776" 75 | ] 76 | }, 77 | "execution_count": 4, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "myser[0]" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 5, 89 | "id": "a6873bac", 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "1776" 96 | ] 97 | }, 98 | "execution_count": 5, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "myser['USA']" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "id": "c50f9bd4", 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/plain": [ 116 | "1821" 117 | ] 118 | }, 119 | "execution_count": 6, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "# Series object maintains normal + labelled index\n", 126 | "myser['MEXICO']" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 7, 132 | "id": "9caaa85e", 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "myseranom = pd.Series(mydata, myindex)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 8, 142 | "id": "8eec74e8", 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "USA 1776\n", 149 | "CANADA 1867\n", 150 | "MEXICO 1821\n", 151 | "dtype: int64" 152 | ] 153 | }, 154 | "execution_count": 8, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "myseranom" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 11, 166 | "id": "4a0d7322", 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "SAM 5\n", 173 | "FRANK 10\n", 174 | "SPIKE 7\n", 175 | "dtype: int64" 176 | ] 177 | }, 178 | "execution_count": 11, 179 | "metadata": {}, 180 | "output_type": "execute_result" 181 | } 182 | ], 183 | "source": [ 184 | "# We can create a series from a dict\n", 185 | "ages = {\n", 186 | " 'SAM': 5,\n", 187 | " 'FRANK': 10,\n", 188 | " 'SPIKE': 7\n", 189 | "}\n", 190 | "\n", 191 | "# Yes pandas is that smart!\n", 192 | "pd.Series(ages)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 3, 198 | "id": "b42527e8", 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "# Imaginary Sales Data for 1st and 2nd Quarters for Global Company\n", 203 | "q1 = {'Japan': 80, 'China': 450, 'India': 200, 'USA': 250}\n", 204 | "q2 = {'Brazil': 100,'China': 500, 'India': 210,'USA': 260}\n", 205 | "\n", 206 | "sales_q1 = pd.Series(q1)\n", 207 | "sales_q2 = pd.Series(q2)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 4, 213 | "id": "4057e02a", 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/plain": [ 219 | "Japan 80\n", 220 | "China 450\n", 221 | "India 200\n", 222 | "USA 250\n", 223 | "dtype: int64" 224 | ] 225 | }, 226 | "execution_count": 4, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "sales_q1" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 5, 238 | "id": "e1f78b0c", 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "data": { 243 | "text/plain": [ 244 | "80" 245 | ] 246 | }, 247 | "execution_count": 5, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "sales_q1['Japan']" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 6, 259 | "id": "121aa789", 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "data": { 264 | "text/plain": [ 265 | "80" 266 | ] 267 | }, 268 | "execution_count": 6, 269 | "metadata": {}, 270 | "output_type": "execute_result" 271 | } 272 | ], 273 | "source": [ 274 | "sales_q1[0]" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 7, 280 | "id": "e032c5a4", 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "data": { 285 | "text/plain": [ 286 | "Index(['Japan', 'China', 'India', 'USA'], dtype='object')" 287 | ] 288 | }, 289 | "execution_count": 7, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "# See what keys are available.\n", 296 | "sales_q1.keys()" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "id": "e2033971", 302 | "metadata": {}, 303 | "source": [ 304 | "## Operations Between Two Series" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 8, 310 | "id": "d9072359", 311 | "metadata": {}, 312 | "outputs": [ 313 | { 314 | "data": { 315 | "text/plain": [ 316 | "[1, 2, 1, 2]" 317 | ] 318 | }, 319 | "execution_count": 8, 320 | "metadata": {}, 321 | "output_type": "execute_result" 322 | } 323 | ], 324 | "source": [ 325 | "[1, 2] * 2" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 9, 331 | "id": "2492e484", 332 | "metadata": {}, 333 | "outputs": [ 334 | { 335 | "data": { 336 | "text/plain": [ 337 | "array([2, 4])" 338 | ] 339 | }, 340 | "execution_count": 9, 341 | "metadata": {}, 342 | "output_type": "execute_result" 343 | } 344 | ], 345 | "source": [ 346 | "# Multiply each item in the array (broadcast)\n", 347 | "np.array([1, 2]) * 2" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 10, 353 | "id": "7ea89481", 354 | "metadata": {}, 355 | "outputs": [ 356 | { 357 | "data": { 358 | "text/plain": [ 359 | "Japan 160\n", 360 | "China 900\n", 361 | "India 400\n", 362 | "USA 500\n", 363 | "dtype: int64" 364 | ] 365 | }, 366 | "execution_count": 10, 367 | "metadata": {}, 368 | "output_type": "execute_result" 369 | } 370 | ], 371 | "source": [ 372 | "sales_q1 * 2" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 11, 378 | "id": "71b47712", 379 | "metadata": {}, 380 | "outputs": [ 381 | { 382 | "data": { 383 | "text/plain": [ 384 | "Japan 0.8\n", 385 | "China 4.5\n", 386 | "India 2.0\n", 387 | "USA 2.5\n", 388 | "dtype: float64" 389 | ] 390 | }, 391 | "execution_count": 11, 392 | "metadata": {}, 393 | "output_type": "execute_result" 394 | } 395 | ], 396 | "source": [ 397 | "sales_q1 / 100" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 12, 403 | "id": "1c0bf71a", 404 | "metadata": {}, 405 | "outputs": [ 406 | { 407 | "data": { 408 | "text/plain": [ 409 | "Japan 80\n", 410 | "China 450\n", 411 | "India 200\n", 412 | "USA 250\n", 413 | "dtype: int64" 414 | ] 415 | }, 416 | "execution_count": 12, 417 | "metadata": {}, 418 | "output_type": "execute_result" 419 | } 420 | ], 421 | "source": [ 422 | "# Takes the labeled index into account!!\n", 423 | "sales_q1" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 13, 429 | "id": "20b95dd5", 430 | "metadata": {}, 431 | "outputs": [ 432 | { 433 | "data": { 434 | "text/plain": [ 435 | "Brazil 100\n", 436 | "China 500\n", 437 | "India 210\n", 438 | "USA 260\n", 439 | "dtype: int64" 440 | ] 441 | }, 442 | "execution_count": 13, 443 | "metadata": {}, 444 | "output_type": "execute_result" 445 | } 446 | ], 447 | "source": [ 448 | "sales_q2" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 14, 454 | "id": "5540c95d", 455 | "metadata": {}, 456 | "outputs": [ 457 | { 458 | "data": { 459 | "text/plain": [ 460 | "Brazil NaN\n", 461 | "China 950.0\n", 462 | "India 410.0\n", 463 | "Japan NaN\n", 464 | "USA 510.0\n", 465 | "dtype: float64" 466 | ] 467 | }, 468 | "execution_count": 14, 469 | "metadata": {}, 470 | "output_type": "execute_result" 471 | } 472 | ], 473 | "source": [ 474 | "# Check this out! (does not know what to do if labels not present in both series)\n", 475 | "sales_q1 + sales_q2" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": 18, 481 | "id": "d61f5a25", 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [ 485 | "# Fill in the blanks. (using the add method on Series to add two series together.)\n", 486 | "first_half = sales_q1.add(sales_q2, fill_value=0)" 487 | ] 488 | }, 489 | { 490 | "cell_type": "markdown", 491 | "id": "2c478ce3", 492 | "metadata": {}, 493 | "source": [ 494 | "### Notice that when we do operations on Series, pandas converts these values to floats for precision" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 17, 500 | "id": "27b4e7c1", 501 | "metadata": {}, 502 | "outputs": [ 503 | { 504 | "data": { 505 | "text/plain": [ 506 | "dtype('int64')" 507 | ] 508 | }, 509 | "execution_count": 17, 510 | "metadata": {}, 511 | "output_type": "execute_result" 512 | } 513 | ], 514 | "source": [ 515 | "sales_q1.dtype" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 19, 521 | "id": "4ff87249", 522 | "metadata": {}, 523 | "outputs": [ 524 | { 525 | "data": { 526 | "text/plain": [ 527 | "Brazil 100.0\n", 528 | "China 950.0\n", 529 | "India 410.0\n", 530 | "Japan 80.0\n", 531 | "USA 510.0\n", 532 | "dtype: float64" 533 | ] 534 | }, 535 | "execution_count": 19, 536 | "metadata": {}, 537 | "output_type": "execute_result" 538 | } 539 | ], 540 | "source": [ 541 | "first_half" 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": 20, 547 | "id": "c9afaa3e", 548 | "metadata": {}, 549 | "outputs": [ 550 | { 551 | "data": { 552 | "text/plain": [ 553 | "dtype('float64')" 554 | ] 555 | }, 556 | "execution_count": 20, 557 | "metadata": {}, 558 | "output_type": "execute_result" 559 | } 560 | ], 561 | "source": [ 562 | "first_half.dtype" 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": 22, 568 | "id": "90a7d647", 569 | "metadata": {}, 570 | "outputs": [ 571 | { 572 | "data": { 573 | "text/plain": [ 574 | "Series([], dtype: float64)" 575 | ] 576 | }, 577 | "execution_count": 22, 578 | "metadata": {}, 579 | "output_type": "execute_result" 580 | } 581 | ], 582 | "source": [ 583 | "# Can control the datatype.\n", 584 | "pd.Series(dtype=\"float64\")" 585 | ] 586 | }, 587 | { 588 | "cell_type": "code", 589 | "execution_count": null, 590 | "id": "4a79fae2", 591 | "metadata": {}, 592 | "outputs": [], 593 | "source": [] 594 | } 595 | ], 596 | "metadata": { 597 | "kernelspec": { 598 | "display_name": "Python 3", 599 | "language": "python", 600 | "name": "python3" 601 | }, 602 | "language_info": { 603 | "codemirror_mode": { 604 | "name": "ipython", 605 | "version": 3 606 | }, 607 | "file_extension": ".py", 608 | "mimetype": "text/x-python", 609 | "name": "python", 610 | "nbconvert_exporter": "python", 611 | "pygments_lexer": "ipython3", 612 | "version": "3.8.8" 613 | } 614 | }, 615 | "nbformat": 4, 616 | "nbformat_minor": 5 617 | } 618 | -------------------------------------------------------------------------------- /03-Pandas/Text Methods.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a58c9c58", 6 | "metadata": {}, 7 | "source": [ 8 | "# Text Methods for String Data\n", 9 | "\n", 10 | "- pandas comes with special methods for string manipulation in dataframes.\n", 11 | "- we can apply string functions to a whole series" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "id": "611b0af5", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import numpy as np\n", 22 | "import pandas as pd" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "id": "3fba70d1", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "email = 'ale.alberga1@gmail.com'" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "id": "cfc8906a", 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/plain": [ 44 | "['ale.alberga1', 'gmail.com']" 45 | ] 46 | }, 47 | "execution_count": 3, 48 | "metadata": {}, 49 | "output_type": "execute_result" 50 | } 51 | ], 52 | "source": [ 53 | "email.split('@')" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 4, 59 | "id": "e1f06c85", 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "names = pd.Series(['andrew', 'bobo', 'cinnamon', 'david', '5'])" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 5, 69 | "id": "212b9920", 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "data": { 74 | "text/plain": [ 75 | "0 andrew\n", 76 | "1 bobo\n", 77 | "2 cinnamon\n", 78 | "3 david\n", 79 | "4 5\n", 80 | "dtype: object" 81 | ] 82 | }, 83 | "execution_count": 5, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "names" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 6, 95 | "id": "160ee112", 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "data": { 100 | "text/plain": [ 101 | "0 ANDREW\n", 102 | "1 BOBO\n", 103 | "2 CINNAMON\n", 104 | "3 DAVID\n", 105 | "4 5\n", 106 | "dtype: object" 107 | ] 108 | }, 109 | "execution_count": 6, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "names.str.upper()" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 7, 121 | "id": "501ab031", 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "0 andrew\n", 128 | "1 bobo\n", 129 | "2 cinnamon\n", 130 | "3 david\n", 131 | "4 5\n", 132 | "dtype: object" 133 | ] 134 | }, 135 | "execution_count": 7, 136 | "metadata": {}, 137 | "output_type": "execute_result" 138 | } 139 | ], 140 | "source": [ 141 | "# access to the .str. method library!\n", 142 | "names.str.lower()" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 9, 148 | "id": "61aa9f20", 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "data": { 153 | "text/plain": [ 154 | "0 False\n", 155 | "1 False\n", 156 | "2 False\n", 157 | "3 False\n", 158 | "4 True\n", 159 | "dtype: bool" 160 | ] 161 | }, 162 | "execution_count": 9, 163 | "metadata": {}, 164 | "output_type": "execute_result" 165 | } 166 | ], 167 | "source": [ 168 | "names.str.isdigit() # returns bool list" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 10, 174 | "id": "7b58fc5a", 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "text/plain": [ 180 | "4 5\n", 181 | "dtype: object" 182 | ] 183 | }, 184 | "execution_count": 10, 185 | "metadata": {}, 186 | "output_type": "execute_result" 187 | } 188 | ], 189 | "source": [ 190 | "names[names.str.isdigit()] # only grabbing where it is a digit!" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 11, 196 | "id": "45ff4d84", 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "tech_finance = ['GOOG, APPL, AMZN', 'JPM, BAC, GS']" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 12, 206 | "id": "e95ad303", 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/plain": [ 212 | "2" 213 | ] 214 | }, 215 | "execution_count": 12, 216 | "metadata": {}, 217 | "output_type": "execute_result" 218 | } 219 | ], 220 | "source": [ 221 | "len(tech_finance)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 13, 227 | "id": "debf8336", 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "tickers = pd.Series(tech_finance)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 14, 237 | "id": "72d26196", 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "data": { 242 | "text/plain": [ 243 | "0 GOOG, APPL, AMZN\n", 244 | "1 JPM, BAC, GS\n", 245 | "dtype: object" 246 | ] 247 | }, 248 | "execution_count": 14, 249 | "metadata": {}, 250 | "output_type": "execute_result" 251 | } 252 | ], 253 | "source": [ 254 | "tickers" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 15, 260 | "id": "bda184a7", 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "data": { 265 | "text/plain": [ 266 | "0 [GOOG, APPL, AMZN]\n", 267 | "1 [JPM, BAC, GS]\n", 268 | "dtype: object" 269 | ] 270 | }, 271 | "execution_count": 15, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [ 277 | "tickers.str.split(',')" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 16, 283 | "id": "bc2aedda", 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "data": { 288 | "text/plain": [ 289 | "'GOOG'" 290 | ] 291 | }, 292 | "execution_count": 16, 293 | "metadata": {}, 294 | "output_type": "execute_result" 295 | } 296 | ], 297 | "source": [ 298 | "tech = 'GOOG, APPL, AMZN'\n", 299 | "tech.split(',')[0]" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 20, 305 | "id": "6c2ec11d", 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/html": [ 311 | "
\n", 312 | "\n", 325 | "\n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | "
012
0GOOGAPPLAMZN
1JPMBACGS
\n", 349 | "
" 350 | ], 351 | "text/plain": [ 352 | " 0 1 2\n", 353 | "0 GOOG APPL AMZN\n", 354 | "1 JPM BAC GS" 355 | ] 356 | }, 357 | "execution_count": 20, 358 | "metadata": {}, 359 | "output_type": "execute_result" 360 | } 361 | ], 362 | "source": [ 363 | "# List will be sxpanded to just be different columns\n", 364 | "tickers.str.split(',', expand=True)" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 21, 370 | "id": "630a6e1d", 371 | "metadata": {}, 372 | "outputs": [], 373 | "source": [ 374 | "messy_names = pd.Series(['andrew ', 'bob;o', ' claire '])" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 22, 380 | "id": "a7742cdc", 381 | "metadata": {}, 382 | "outputs": [ 383 | { 384 | "data": { 385 | "text/plain": [ 386 | "0 andrew \n", 387 | "1 bob;o\n", 388 | "2 claire \n", 389 | "dtype: object" 390 | ] 391 | }, 392 | "execution_count": 22, 393 | "metadata": {}, 394 | "output_type": "execute_result" 395 | } 396 | ], 397 | "source": [ 398 | "messy_names" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 23, 404 | "id": "ed499a1a", 405 | "metadata": {}, 406 | "outputs": [], 407 | "source": [ 408 | "# Let's clean this up" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 25, 414 | "id": "4c140a33", 415 | "metadata": {}, 416 | "outputs": [ 417 | { 418 | "data": { 419 | "text/plain": [ 420 | "0 andrew \n", 421 | "1 bobo\n", 422 | "2 claire \n", 423 | "dtype: object" 424 | ] 425 | }, 426 | "execution_count": 25, 427 | "metadata": {}, 428 | "output_type": "execute_result" 429 | } 430 | ], 431 | "source": [ 432 | "# Remove the semi-colons\n", 433 | "messy_names.str.replace(';', '')" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 27, 439 | "id": "e21d8a1a", 440 | "metadata": {}, 441 | "outputs": [ 442 | { 443 | "data": { 444 | "text/plain": [ 445 | "0 andrew\n", 446 | "1 bobo\n", 447 | "2 claire\n", 448 | "dtype: object" 449 | ] 450 | }, 451 | "execution_count": 27, 452 | "metadata": {}, 453 | "output_type": "execute_result" 454 | } 455 | ], 456 | "source": [ 457 | "# Also remove whitespaces\n", 458 | "messy_names.str.replace(';', '').str.strip()" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 31, 464 | "id": "2b1c6fea", 465 | "metadata": {}, 466 | "outputs": [ 467 | { 468 | "data": { 469 | "text/plain": [ 470 | "0 Andrew\n", 471 | "1 Bobo\n", 472 | "2 Claire\n", 473 | "dtype: object" 474 | ] 475 | }, 476 | "execution_count": 31, 477 | "metadata": {}, 478 | "output_type": "execute_result" 479 | } 480 | ], 481 | "source": [ 482 | "# Capitalise first letter, typical syntax to chain these.\n", 483 | "# We could of course use .apply() to take a function\n", 484 | "# If you need if statements use .apply()\n", 485 | "(\n", 486 | " messy_names.str.replace(';', '')\n", 487 | " .str.strip()\n", 488 | " .str.capitalize()\n", 489 | ")" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 32, 495 | "id": "c14e5164", 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [ 499 | "# Which is more efficient, str or apply()?\n", 500 | "# apply() functions are more efficient." 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": null, 506 | "id": "aff2ec59", 507 | "metadata": {}, 508 | "outputs": [], 509 | "source": [] 510 | } 511 | ], 512 | "metadata": { 513 | "kernelspec": { 514 | "display_name": "Python 3", 515 | "language": "python", 516 | "name": "python3" 517 | }, 518 | "language_info": { 519 | "codemirror_mode": { 520 | "name": "ipython", 521 | "version": 3 522 | }, 523 | "file_extension": ".py", 524 | "mimetype": "text/x-python", 525 | "name": "python", 526 | "nbconvert_exporter": "python", 527 | "pygments_lexer": "ipython3", 528 | "version": "3.8.8" 529 | } 530 | }, 531 | "nbformat": 4, 532 | "nbformat_minor": 5 533 | } 534 | -------------------------------------------------------------------------------- /03-Pandas/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 5 6 | } 7 | -------------------------------------------------------------------------------- /03-Pandas/__pycache__/pydbgen.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/03-Pandas/__pycache__/pydbgen.cpython-36.pyc -------------------------------------------------------------------------------- /03-Pandas/example.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/03-Pandas/example.xlsx -------------------------------------------------------------------------------- /03-Pandas/my_excel_file.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/03-Pandas/my_excel_file.xlsx -------------------------------------------------------------------------------- /03-Pandas/new_workbook.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/03-Pandas/new_workbook.xlsx -------------------------------------------------------------------------------- /03-Pandas/newexcelfile.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/03-Pandas/newexcelfile.xlsx -------------------------------------------------------------------------------- /03-Pandas/reshaping_pivot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/03-Pandas/reshaping_pivot.png -------------------------------------------------------------------------------- /03-Pandas/sample_table.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 |
country200020152030
China[B]127013761416
India105313111528
United States283322356
Indonesia212258295
Pakistan136208245
Brazil176206228
Nigeria123182263
Bangladesh131161186
Russia146146149
Mexico103127148
World total612773498501
-------------------------------------------------------------------------------- /03-Pandas/simple.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 |
abcd
0123
4567
891011
12131415
-------------------------------------------------------------------------------- /04-Matplotlib/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import matplotlib.pyplot as plt" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 5, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "
" 21 | ] 22 | }, 23 | "execution_count": 5, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | }, 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "
" 31 | ] 32 | }, 33 | "metadata": {}, 34 | "output_type": "display_data" 35 | } 36 | ], 37 | "source": [ 38 | "plt.figure()" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 6, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "[]" 50 | ] 51 | }, 52 | "execution_count": 6, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | }, 56 | { 57 | "data": { 58 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAiEUlEQVR4nO3deXhU9b3H8fcvEEjYwpKwE8IOIWEzCC5VBFTEFdG61H3B2ttbr20FFFQEF9C2tve6FbUurV1sEhBR3FHcQEFlEsIW9p2wE7JnvvePTPvQCDLAJCcz83k9T54nmfll5nNM+Hhy5pzvODNDRETCX4zXAUREJDRU6CIiEUKFLiISIVToIiIRQoUuIhIh6nv1xImJiZaSkuLV04uIhKUlS5bsMrOkI93nWaGnpKSwePFir55eRCQsOec2HO0+HXIREYkQKnQRkQihQhcRiRAqdBGRCKFCFxGJEEEXunOunnPuW+fc3CPc19A59w/nXL5zbpFzLiWkKUVE5JiOZw/9LmD5Ue67FdhrZt2BJ4EZJxtMRESOT1CF7pzrCFwIvHCUJZcCrwQ+zwRGOOfcyccTEYkcxWWVPDZvOZv3FtXI4we7h/57YDzgP8r9HYBNAGZWAewHWlVf5Jwb55xb7JxbXFBQcPxpRUTC1BdrdnH+7xfwx0/WMn9lzfTfMQvdOXcRsNPMlpzsk5nZTDPLMLOMpKQjXrkqIhJRDpSUc2+2j2ufX0SMg7+PG8r1QzvXyHMFc+n/GcAlzrnRQBzQzDn3FzO77rA1W4BOwGbnXH0gAdgd8rQiImHk/bwdTJ6dQ8HBUu44uyt3j+xJXGy9Gnu+Yxa6md0L3AvgnBsG/LpamQPMAW4EvgSuAD4yvbediESpXYWlTJmzjLm+bfRu25Tnb8igX8fmNf68Jzycyzk3FVhsZnOAF4E/O+fygT3A1SHKJyISNsyMN77bykNvLuNQaSW/Orcnd5zdjQb1a+eSn+MqdDP7GPg48PkDh91eAlwZymAiIuFk675iJs/O5aMVOxmY3JzHx/ajR5umtZrBs/G5IiKRwO83/vrVRqbPW0Gl33jgolRuPD2FejG1f+a2Cl1E5ASt23WICVk+vlq3hzO7J/LY5el0atnIszwqdBGR41RR6eeFz9bx5PuraFA/hsfH9uPKjI54fT2lCl1E5DjkbT3AhCwfOVv2c15qG6ZdlkabZnFexwJU6CIiQSmtqOSpj/J59uM1NG8Uy9PXDmJ0elvP98oPp0IXETmGJRv2MiHLR/7OQi4f1IH7L0ylReMGXsf6HhW6iMhRFJVV8MS7K3n5i/W0T4jn5ZsHM6xXa69jHZUKXUTkCD5bvYuJ2T427y3mhtM6M35Ub5o0rNuVWbfTiYjUsv1F5Tzydh6vL95M18TGvH7HaZzapaXXsYKiQhcRCXgndzv3v5HLnkNl3DmsG3eN6FGjw7RCTYUuIlGv4GDVMK23craR2q4ZL900mLQOCV7HOm4qdBGJWmZG9jdbmDo3j+KySu45vxfjzupKbL3aGaYVaip0EYlKW/YVc192Dp+sKuCUzi2YMbYf3Vs38TrWSVGhi0hU8fuNvyzawIx5KzDgoUv6cv3QzsR4MEwr1FToIhI11hQUMjHLx9fr9/KjHok8OsbbYVqhpkIXkYhXXunn+U/X8vsPVhMfW4/fXNmfsYM61KnL9kNBhS4iES13y34mZPlYtvUAF6S15aFL+9K6ad0YphVqKnQRiUgl5ZX830eree6TtbRo1IBnfzKIC9LbeR2rRqnQRSTiLF6/h/FZPtYWHOLKUzoy6cI+NG9U94ZphZoKXUQiRmFpBU+8s4JXF26gfUI8r95yKmf1TPI6Vq1RoYtIRPhkVQH3ZeewdX8xN56Wwj3n96JxHR+mFWrRtbUiEnH2FZUxbe5ysr7ZTLekxvzzjtPISAmPYVqhpkIXkbA1L2cb97+xjL1FZfz8nO78fHj3sBqmFWoqdBEJOzsPlPDAG8t4Z9l20jo045VbBtO3ffgN0wo1FbqIhA0zI3PJZqbNzaOkws+EUb25/UddqB+mw7RCTYUuImFh054i7puVw6erd3FqSkumj02na1J4D9MKNRW6iNRplX7j1S/X88S7K3HAtEv78pMhkTFMK9RU6CJSZ+XvPMiErByWbNjL2T2TePTydDo0j/c6Vp2lQheROqe80s8fP1nD/36YT6OG9fjdj/szZmDkDdMKNRW6iNQpOZv3Mz7Lx/JtB7iwXzumXNyXpKYNvY4VFlToIlInlJRX8vsPVvP8p2tp1bgBf7z+FM7v29brWGHlmIXunIsDFgANA+szzezBamuSgVeA5kA9YKKZvR3ytCISkRat3c3E7BzW7TrEVRmduO/CPiTEx3odK+wEs4deCgw3s0LnXCzwmXNunpktPGzNZOB1M3vWOZcKvA2khD6uiESSgyXlPP7OSv68cAOdWsbz2m1DOKN7otexwtYxC93MDCgMfBkb+LDqy4Bmgc8TgK2hCigikWn+yp1Mys5h24ESbjmjC78+vyeNGugo8MkI6r+ec64esAToDjxtZouqLZkCvOec+2+gMTDyKI8zDhgHkJycfIKRRSSc7T1UxrS5eWR/u4UerZuQdefpDEpu4XWsiBDU9bJmVmlmA4COwKnOubRqS64BXjazjsBo4M/Oue89tpnNNLMMM8tISoqeGcUiUnXZ/lzfVkb+7hPmLN3KL0b0YO4vzlSZh9Bx/X1jZvucc/OBUUDuYXfdGrgNM/sy8EJqIrAzVEFFJHztOFDC5Nm5vJ+3g34dE/jLbUPo067Zsb9RjkswZ7kkAeWBMo8HzgVmVFu2ERgBvOyc6wPEAQWhDisi4cXMeH3xJh5+azllFX7uG92bW87QMK2aEsweejvglcBx9BiqzmaZ65ybCiw2sznAr4DnnXN3U/UC6U2BF1NFJEpt3F3ExGwfX6zZzZAuLZkxth8piY29jhXRgjnLxQcMPMLtDxz2eR5wRmijiUg4qvQbL3+xnt+8u5J6MY5HxqRxzeBkDdOqBTpHSERCZtWOg4zP9PHdpn0M792aR8ak0S5Bw7RqiwpdRE5aWYWfZz9ew1PzV9OkYX3+cPUALunfXsO0apkKXUROytJN+5iQ5WPF9oNc0r89D16cSqsmGqblBRW6iJyQ4rJKnvxgFS98upbWTeN44YYMRqa28TpWVFOhi8hx+3LNbu7N9rF+dxHXnJrMvaN70yxOw7S8pkIXkaAdKCln+rwV/HXRRjq3asRfbx/C6d00TKuuUKGLSFA+XL6DSbNy2XmwhNt/1IVfntuL+Ab1vI4lh1Ghi8gP2l1YykNv5jFn6VZ6tWnKc9efwoBOzb2OJUegQheRIzIz5izdykNv5nGwpJy7R/bkzmHdaFBfl+3XVSp0EfmebfuLmTwrlw9X7KR/p+Y8PrYfvdo29TqWHIMKXUT+ze83/v71Jh57eznlfj+TL+zDzWd0oZ4u2w8LKnQRAWD9rkNMzPaxcO0eTuvaiulj0+ncSsO0wokKXSTKVVT6eenz9fz2/ZXExsQw/fJ0rhrcSZfthyEVukgUW7H9ABMyfSzdvJ+Rfdrw8GVptE2I8zqWnCAVukgUKq2o5On5a3hmfj4J8bH83zUDuahfO+2VhzkVukiU+XbjXiZk+Vi1o5AxAztw/0WptGzcwOtYEgIqdJEoUVRWwW/fW8WfPl9H22Zx/OmmDIb31jCtSKJCF4kCX+TvYmJ2Dhv3FHHd0GQmjOpNUw3TijgqdJEItr+4nMfeXs7fv95El8TG/H3cUIZ2beV1LKkhKnSRCPXesu1Mnp3LrsJS7ji7K3eP7ElcrIZpRTIVukiE2VVYypQ5y5jr20bvtk154cYM+nVs7nUsqQUqdJEIYWbM/m4LD72ZR1FpJb86tyc/HdaN2HoaphUtVOgiEWDrvmImzcph/soCBiZXDdPq0UbDtKKNCl0kjPn9xmtfbWTGvBVU+o0HLkrlxtNTNEwrSqnQRcLU2oJCJmbl8NX6PZzZPZHHLk+nU8tGXscSD6nQRcJMRaWfFz5bx5Pvr6Jh/Rgev6IfV57SUZftiwpdJJzkbT3A+Kyl5G45wPl92zDt0jRaN9MwLamiQhcJA6UVlTz1UT7PfryG5o1ieeYng7ggra32yuU/qNBF6rglG/YwISuH/J2FXD6oA/dfmEoLDdOSI1Chi9RRh0oreOLdlbzy5XraJ8Tz8s2DGdartdexpA47ZqE75+KABUDDwPpMM3vwCOt+DEwBDFhqZteGNqpI9Ph0dQH3ZueweW8xN57WmXtG9aZJQ+1/yQ8L5jekFBhuZoXOuVjgM+fcPDNb+K8FzrkewL3AGWa21zmn3QiRE7C/qJyH38rjn0s20zWpMf/86WkMTmnpdSwJE8csdDMzoDDwZWzgw6otux142sz2Br5nZyhDikSDd3K3c/8buew5VMbPhnXjFyN6aJiWHJeg/oZzztUDlgDdqSruRdWW9Ays+xyoB0wxs3eO8DjjgHEAycnJJxFbJHLsPFjClDnLeDtnO6ntmvHSTYNJ65DgdSwJQ0EVuplVAgOcc82BWc65NDPLrfY4PYBhQEdggXMu3cz2VXucmcBMgIyMjOp7+SJRxczI+mYL0+bmUVxeyT3n92LcWV01TEtO2HG9ymJm+5xz84FRwOGFvhlYZGblwDrn3CqqCv7rkCUViSCb9xZx36xcFqwqIKNzC6aP7Uf31k28jiVhLpizXJKA8kCZxwPnAjOqLZsNXAO85JxLpOoQzNoQZxUJe36/8eeFG5jxzgoAHrqkL9cP7UyMhmlJCASzh94OeCVwHD0GeN3M5jrnpgKLzWwO8C5wnnMuD6gE7jGz3TWWWiQMrSkoZEKmj8Ub9nJWzyQeHZNGxxYapiWh46pOYql9GRkZtnjxYk+eW6Q2lVf6mblgLX/4cDXxsfW4/6JUxg7qoMv25YQ455aYWcaR7tOVCiI1KHfLfsZn+sjbdoDR6W2ZcklfWjfVMC2pGSp0kRpQUl7JHz5czcwFa2nRqAHPXTeIUWntvI4lEU6FLhJiX6/fw4RMH2t3HeLKUzoy+cJUEhrFeh1LooAKXSRECksrePydFbz65QY6tojnz7eeyo96JHkdS6KICl0kBD5ZVcB92Tls3V/MTaencM/5vWisYVpSy/QbJ3IS9hWVMXVuHtnfbKFbUmMyf3oap3TWMC3xhgpd5ASYGfNyt/PAG7nsKyrn5+d05+fDu2uYlnhKhS5ynHYeKOH+N3J5d9kO0jo045VbTqVvew3TEu+p0EWCZGb8c8lmHp6bR2mFn4kX9Oa2M7tQX8O0pI5QoYsEYdOeIu7NzuGz/F2cmtKS6WPT6ZqkYVpSt6jQRX5Apd949cv1PP7OSmIcTLssjZ+cmqxhWlInqdBFjiJ/50HGZ/r4ZuM+hvVK4pEx6XRoHu91LJGjUqGLVFNe6ee5j9fwfx/l06hhPZ68qj+XDdAwLan7VOgih8nZvJ97MpeyYvtBLurXjimX9CWxSUOvY4kERYUuQtUwrSc/WMXzC9aS2KQhM68/hfP6tvU6lshxUaFL1Fu0djcTs3NYt+sQVw/uxL2j+5AQr2FaEn5U6BK1DpaUM+OdFfxl4UY6tYzntduGcEb3RK9jiZwwFbpEpfkrdnLfrBy2Hyjh1jO78KvzetKogf45SHjTb7BElT2Hypj65jJmf7eVHq2bkHXn6QxKbuF1LJGQUKFLVDAz5vq2MWXOMvYXl/OLET34r3O60bC+hmlJ5FChS8TbcaCESbNy+WD5Dvp1TOC124fQu20zr2OJhJwKXSKWmfGPrzfxyNvLKavwM2l0H24+I0XDtCRiqdAlIm3cXcTEbB9frNnNkC4tmTG2HymJjb2OJVKjVOgSUSr9xkufr+M3762kfkwMj45J5+rBnTRMS6KCCl0ixsrtBxmf5WPppn0M792aR8ak0S5Bw7QkeqjQJeyVVfh55uN8np6fT9O4WP5w9QAu6d9ew7Qk6qjQJawt3bSP8Zk+Vu44yKUD2vPARam00jAtiVIqdAlLxWWV/O79lbz42TpaN43jhRsyGJnaxutYIp5SoUvY+WLNLu7NzmHD7iKuHZLMxAt60yxOw7REVOgSNg6UlPPY2yv421cb6dyqEX+9fQind9MwLZF/UaFLWPggbweTZudQcLCUcWd15e6RPYlvoMv2RQ53zEJ3zsUBC4CGgfWZZvbgUdaOBTKBwWa2OJRBJTrtLizloTfzmLN0K73bNmXm9Rn079Tc61gidVIwe+ilwHAzK3TOxQKfOefmmdnCwxc555oCdwGLaiCnRBkzY87SrUyZs4zC0gruHtmTO4d1o0F9XbYvcjTHLHQzM6Aw8GVs4MOOsHQaMAO4J2TpJCpt21/M5Fm5fLhiJwM6NefxK/rRs01Tr2OJ1HlBHUN3ztUDlgDdgafNbFG1+wcBnczsLefcUQvdOTcOGAeQnJx8wqElMvn9xt++3shjb6+gwu9n8oV9uPmMLtTTZfsiQQmq0M2sEhjgnGsOzHLOpZlZLoBzLgb4HXBTEI8zE5gJkJGRcaS9fIlS63YdYmKWj0Xr9nB6t1ZMv7wfya0aeR1LJKwc11kuZrbPOTcfGAXkBm5uCqQBHwcutW4LzHHOXaIXRuVYKir9/Onzdfz2vVU0qB/DjLHp/Dijky7bFzkBwZzlkgSUB8o8HjiXqmPlAJjZfiDxsPUfA79WmcuxLN92gAlZPnyb93NuahseviyNNs3ivI4lEraC2UNvB7wSOI4eA7xuZnOdc1OBxWY2p0YTSsQprajk6flreGZ+PgnxsTx17UAuTG+nvXKRkxTMWS4+YOARbn/gKOuHnXwsiVTfbNzLhEwfq3cWMmZgBx64KJUWjRt4HUskIuhKUakVRWUV/ObdVbz0xTraNovjpZsGc07v1l7HEokoKnSpcZ/n72Jito9Ne4q5bmgyE0b1pqmGaYmEnApdasz+4nIefWs5/1i8iS6JjfnHuKEM6drK61giEUuFLjXivWXbmTw7l92Hyvjp2d34n5E9iIvVMC2RmqRCl5AqOFjKlDeX8ZZvG33aNePFGweT3jHB61giUUGFLiFhZsz6dgtT5+ZRVFrJr8/ryR1ndyO2noZpidQWFbqctC37ipk0K4ePVxYwKLlqmFb31hqmJVLbVOhywvx+47VFG5g+bwV+gwcvTuWG01I0TEvEIyp0OSFrCwqZmJXDV+v38KMeiTw6Jp1OLTVMS8RLKnQ5LhWVfp7/dB1PfrCKuPoxPHFFP644paMu2xepA1ToErS8rQcYn7WU3C0HOL9vG6ZdmkZrDdMSqTNU6HJMJeWVPPVRPs99sobmjRrw7E8GcUF6O69jiUg1KnT5QUs27GF8po81BYcYO6gj91/Uh+aNNExLpC5SocsRHSqt4Il3V/LKl+tpnxDPK7ecytk9k7yOJSI/QIUu37NgVQH3ZuewdX8xNwztzD2jetOkoX5VROo6/SuVf9tfVM60t/LIXLKZrkmNef2O0xic0tLrWCISJBW6APBO7jbuf2MZew6V8bNh3fjFCA3TEgk3KvQot/NgCQ++sYx5udtJbdeMl24aTFoHDdMSCUcq9ChlZmQu2czDby2nuLySe87vxbizumqYlkgYU6FHoU17irhvVg6frt5FRucWTB/bj+6tm3gdS0ROkgo9ivj9xqtfrufxd1figKmX9uW6IZ2J0TAtkYigQo8S+TsLmZjlY/GGvZzVM4lHx6TRsYWGaYlEEhV6hCuv9DNzwVr+8MFq4hvU47dX9ufyQR00TEskAqnQI1julv2Mz/SRt+0Ao9Pb8tAlaSQ1beh1LBGpISr0CFRSXskfPlzNzAVradm4Ac9dN4hRaRqmJRLpVOgR5uv1e5iQ6WPtrkP8OKMjk0anktAo1utYIlILVOgRorC0gsffWcGrX26gY4t4/nLrEM7skeh1LBGpRSr0CDB/5U4mZeew7UAJN5+Rwq/P60VjDdMSiTr6Vx/G9h4qY9rcPLK/3UL31k3I/OnpnNK5hdexRMQjKvQwZGa8nbOdB+fksq+onP8e3p2fD+9Ow/oapiUSzY5Z6M65OGAB0DCwPtPMHqy25pfAbUAFUADcYmYbQh9Xdh4oYfLsXN7L20F6hwRevWUIqe2beR1LROqAYPbQS4HhZlbonIsFPnPOzTOzhYet+RbIMLMi59ydwOPAVTWQN2qZGf9cvJlpb+VRVuHn3gt6c+uZXaivYVoiEnDMQjczAwoDX8YGPqzamvmHfbkQuC5UAaVqmNa92Tl8lr+LU7u0ZPrl6XRN0jAtEflPQR1Dd87VA5YA3YGnzWzRDyy/FZh3lMcZB4wDSE5OPr6kUajSb7zyxXqeeHcl9WIcD1+WxrWnJmuYlogcUVCFbmaVwADnXHNglnMuzcxyq69zzl0HZABnH+VxZgIzATIyMuxIa6TK6h0HGZ/l49uN+xjWK4lHx6TTvnm817FEpA47rrNczGyfc24+MAr4j0J3zo0EJgFnm1lp6CJGl7IKP899soanPsqnccN6/P6qAVw6oL2GaYnIMQVzlksSUB4o83jgXGBGtTUDgT8Co8xsZ40kjQK+zfsYn+ljxfaDXNy/PQ9enEpiEw3TEpHgBLOH3g54JXAcPQZ43czmOuemAovNbA7wBNAE+GdgT3KjmV1SU6EjTUl5JU++v4rnP11LUtOGPH9DBuemtvE6loiEmWDOcvEBA49w+wOHfT4yxLmixsK1u5mY5WP97iKuObUTEy/oQ0K8hmmJyPHTlaIeOVhSzvR5K3ht0UaSWzbir7cN4fTuGqYlIidOhe6Bj1bsYNKsXHYcKOG2M7vwy/N60qiBfhQicnLUIrVoz6Eypr65jNnfbaVH6yY8c+fpDEzWMC0RCQ0Vei0wM970bWPKnGUcLCnnrhE9+Nk53TRMS0RCSoVew7bvrxqm9cHyHfTvmMCMK4bQu62GaYlI6KnQa4iZ8fevN/HoW8sp9/uZNLoPt5zZhXq6bF9EaogKvQZs2H2IiVk5fLl2N0O7tmT65f1ISWzsdSwRiXAq9BCq9Bsvfb6O37y3ktiYGB4dk87VgztpmJaI1AoVeois3F41TGvppn2M6N2ah8ek0S5Bw7REpPao0E9SWYWfZz7O5+n5+TSNi+V/rxnIxf3aaZiWiNQ6FfpJ+G7TPiZk+li54yCXDmjPgxf3pWXjBl7HEpEopUI/AcVllfz2vZX86fN1tG4ax4s3ZjCij4ZpiYi3VOjH6Ys1u5iYlcPGPUVcOySZiRf0plmchmmJiPdU6EE6UFLOY28v529fbaJzq0b87fahnNatldexRET+TYUehA/ydjBpdg4FB0sZd1ZX7h7Zk/gGumxfROoWFfoP2F1YypQ383hz6VZ6t23KzOsz6N+pudexRESOSIV+BGbGG99t5aE3l1FYWsEvz+3JT8/uRoP6MV5HExE5KhV6NVv3FTN5di4frdjJgE7NefyKfvRs09TrWCIix6RCD/D7jb9+tZHp81ZQ6TfuvyiVm05P0TAtEQkbKnRg3a5DTMzysWjdHs7o3orHxvQjuVUjr2OJiByXqC70iko/L362jt+9v4oG9WOYMTadH2d00mX7IhKWorbQl287wIQsH77N+zk3tQ0PX5ZGm2ZxXscSETlhUVfopRWVPP1RPs98vIbmjWJ5+tpBjE5vq71yEQl7UVXoSzbsZUKWj/ydhVw+sAP3X5RKCw3TEpEIERWFXlRWwRPvruTlL9bTrlkcL908mHN6tfY6lohISEV8oX+2ehcTs31s3lvM9UM7M35UL5pqmJaIRKCILfT9xeU88lYery/eTJfExvxj3FCGdNUwLRGJXBFZ6O8u2879s3PZfaiMO4d1464RPYiL1TAtEYlsEVXoBQdLmTJnGW/lbKNPu2a8eONg0jsmeB1LRKRWREShmxnZ32xh6tw8issquef8Xow7qyux9TRMS0SiR9gX+pZ9xdyXncMnqwoYlFw1TKt7aw3TEpHoc8xCd87FAQuAhoH1mWb2YLU1DYFXgVOA3cBVZrY+5GkP4/cbf1m0gRnzVmDAlItTuf40DdMSkegVzB56KTDczAqdc7HAZ865eWa28LA1twJ7zay7c+5qYAZwVQ3kBWBNQSETs3x8vX4vP+qRyKNj0unUUsO0RCS6HbPQzcyAwsCXsYEPq7bsUmBK4PNM4CnnnAt8b0i9/vUmJr+RS1z9GJ64oh9XnNJRl+2LiBDkMXTnXD1gCdAdeNrMFlVb0gHYBGBmFc65/UArYFe1xxkHjANITk4+ocBdkhozondrHrq0L62bapiWiMi/BFXoZlYJDHDONQdmOefSzCz3eJ/MzGYCMwEyMjJOaO99cEpLBqe0PJFvFRGJaMd1Xp+Z7QPmA6Oq3bUF6ATgnKsPJFD14qiIiNSSYxa6cy4psGeOcy4eOBdYUW3ZHODGwOdXAB/VxPFzERE5umAOubQDXgkcR48BXjezuc65qcBiM5sDvAj82TmXD+wBrq6xxCIickTBnOXiAwYe4fYHDvu8BLgytNFEROR46Np4EZEIoUIXEYkQKnQRkQihQhcRiRDOq7MLnXMFwIYT/PZEql2FGgW0zdFB2xwdTmabO5tZ0pHu8KzQT4ZzbrGZZXidozZpm6ODtjk61NQ265CLiEiEUKGLiESIcC30mV4H8IC2OTpom6NDjWxzWB5DFxGR7wvXPXQREalGhS4iEiHqbKE75/7knNvpnDviG2m4Kv/rnMt3zvmcc4NqO2OoBbHNPwlsa45z7gvnXP/azhhqx9rmw9YNds5VOOeuqK1sNSWYbXbODXPOfeecW+ac+6Q289WEIH63E5xzbzrnlga2+ebazhhKzrlOzrn5zrm8wPbcdYQ1Ie+wOlvowMt8/400DncB0CPwMQ54thYy1bSX+eFtXgecbWbpwDQi48Wkl/nhbf7XWyDOAN6rjUC14GV+YJsD7z/wDHCJmfUlMiaZvswP/5z/C8gzs/7AMOC3zrkGtZCrplQAvzKzVGAo8F/OudRqa0LeYXW20M1sAVWz1Y/mUuBVq7IQaO6ca1c76WrGsbbZzL4ws72BLxcCHWslWA0K4ucM8N9AFrCz5hPVvCC2+Vog28w2BtaH/XYHsc0GNHVV7/jeJLC2ojay1QQz22Zm3wQ+Pwgsp+q9lw8X8g6rs4UehH+/MXXAZr7/HyyS3QrM8zpETXPOdQDGEBl/gQWrJ9DCOfexc26Jc+4GrwPVgqeAPsBWIAe4y8z83kYKDedcClXvKbGo2l0h77Cg3iRa6hbn3DlUFfqZXmepBb8HJpiZv2rnLSrUB04BRgDxwJfOuYVmtsrbWDXqfOA7YDjQDXjfOfepmR3wNNVJcs41oeqvy/+pjW0J50L/9xtTB3QM3BbRnHP9gBeAC8wsGt6IOwP4e6DME4HRzrkKM5vtaaqatRnYbWaHgEPOuQVAfyCSC/1mYHrgvYjznXPrgN7AV97GOnHOuViqyvw1M8s+wpKQd1g4H3KZA9wQeKV4KLDfzLZ5HaomOeeSgWzg+gjfW/s3M+tiZilmlgJkAj+L8DIHeAM40zlX3znXCBhC1THYSLaRqr9IcM61AXoBaz1NdBICrwW8CCw3s98dZVnIO6zO7qE75/5G1avdic65zcCDQCyAmT0HvA2MBvKBIqr+Dx/WgtjmB4BWwDOBPdaKcJ9SF8Q2R5xjbbOZLXfOvQP4AD/wgpn94GmddV0QP+dpwMvOuRzAUXWYLZxH6p4BXA/kOOe+C9x2H5AMNddhuvRfRCRChPMhFxEROYwKXUQkQqjQRUQihApdRCRCqNBFRCKECl1EJEKo0EVEIsT/A24cZPwRA9rTAAAAAElFTkSuQmCC\n", 59 | "text/plain": [ 60 | "
" 61 | ] 62 | }, 63 | "metadata": { 64 | "needs_background": "light" 65 | }, 66 | "output_type": "display_data" 67 | } 68 | ], 69 | "source": [ 70 | "plt.plot([1,2],[3,4])" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 7, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd0AAAFDCAYAAAB/UdRdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPFUlEQVR4nO3cX6jkd3nH8c9jYirVqKVZQbKJSelaXbSgPYQUoaZoS5KLzYWtJCBWCS7YRkoVIcUSJV5ZqQUhrW6pWAWN0QtZcEsubCQgRrJiDSYhso3WbBSy/stN0Jj26cUZy3HdzZls5jzrnLxecGB+v/memYcvh/PemTP7q+4OALDznnW2BwCAZwrRBYAhogsAQ0QXAIaILgAMEV0AGLJtdKvqY1X1SFV98zT3V1V9uKqOVdU9VfXq1Y8JAOtvmVe6H09y5ZPcf1WSfYuvg0n++emPBQC7z7bR7e47k/zoSZZck+QTvemuJC+sqhevakAA2C1W8TfdC5M8tOX4+OIcALDFuZNPVlUHs/kWdJ773Of+wcte9rLJpweAp+1rX/vaD7p7z5l87yqi+3CSi7Yc712c+xXdfSjJoSTZ2Njoo0ePruDpAWBOVf33mX7vKt5ePpzkzYtPMV+e5NHu/v4KHhcAdpVtX+lW1aeTXJHkgqo6nuS9SZ6dJN39kSRHklyd5FiSx5K8daeGBYB1tm10u/u6be7vJH+1sokAYJdyRSoAGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGLJUdKvqyqp6oKqOVdWNp7j/4qq6o6q+XlX3VNXVqx8VANbbttGtqnOS3JLkqiT7k1xXVftPWvZ3SW7r7lcluTbJP616UABYd8u80r0sybHufrC7H09ya5JrTlrTSZ6/uP2CJN9b3YgAsDssE90Lkzy05fj44txW70vypqo6nuRIknec6oGq6mBVHa2qoydOnDiDcQFgfa3qg1TXJfl4d+9NcnWST1bVrzx2dx/q7o3u3tizZ8+KnhoA1sMy0X04yUVbjvcuzm11fZLbkqS7v5LkOUkuWMWAALBbLBPdu5Psq6pLq+q8bH5Q6vBJa76b5HVJUlUvz2Z0vX8MAFtsG93ufiLJDUluT3J/Nj+lfG9V3VxVBxbL3pXkbVX1jSSfTvKW7u6dGhoA1tG5yyzq7iPZ/IDU1nM3bbl9X5LXrHY0ANhdXJEKAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ5aKblVdWVUPVNWxqrrxNGveWFX3VdW9VfWp1Y4JAOvv3O0WVNU5SW5J8idJjie5u6oOd/d9W9bsS/K3SV7T3T+uqhft1MAAsK6WeaV7WZJj3f1gdz+e5NYk15y05m1JbunuHydJdz+y2jEBYP0tE90Lkzy05fj44txWL03y0qr6clXdVVVXrmpAANgttn17+Sk8zr4kVyTZm+TOqnpld/9k66KqOpjkYJJcfPHFK3pqAFgPy7zSfTjJRVuO9y7ObXU8yeHu/nl3fzvJt7IZ4V/S3Ye6e6O7N/bs2XOmMwPAWlomuncn2VdVl1bVeUmuTXL4pDWfz+ar3FTVBdl8u/nB1Y0JAOtv2+h29xNJbkhye5L7k9zW3fdW1c1VdWCx7PYkP6yq+5LckeTd3f3DnRoaANZRdfdZeeKNjY0+evToWXluADhTVfW17t44k+91RSoAGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGCK6ADBEdAFgiOgCwBDRBYAhogsAQ0QXAIaILgAMEV0AGLJUdKvqyqp6oKqOVdWNT7LuDVXVVbWxuhEBYHfYNrpVdU6SW5JclWR/kuuqav8p1p2f5K+TfHXVQwLAbrDMK93Lkhzr7ge7+/Ektya55hTr3p/kA0l+usL5AGDXWCa6FyZ5aMvx8cW5/1dVr05yUXd/YYWzAcCu8rQ/SFVVz0ryoSTvWmLtwao6WlVHT5w48XSfGgDWyjLRfTjJRVuO9y7O/cL5SV6R5EtV9Z0klyc5fKoPU3X3oe7e6O6NPXv2nPnUALCGlonu3Un2VdWlVXVekmuTHP7Fnd39aHdf0N2XdPclSe5KcqC7j+7IxACwpraNbnc/keSGJLcnuT/Jbd19b1XdXFUHdnpAANgtzl1mUXcfSXLkpHM3nWbtFU9/LADYfVyRCgCGiC4ADBFdABgiugAwRHQBYIjoAsAQ0QWAIaILAENEFwCGiC4ADBFdABgiugAwRHQBYIjoAsAQ0QWAIaILAENEFwCGiC4ADBFdABgiugAwRHQBYIjoAsAQ0QWAIaILAENEFwCGiC4ADBFdABgiugAwRHQBYIjoAsAQ0QWAIaILAENEFwCGiC4ADBFdABgiugAwRHQBYIjoAsAQ0QWAIaILAENEFwCGiC4ADBFdABgiugAwRHQBYIjoAsAQ0QWAIaILAENEFwCGLBXdqrqyqh6oqmNVdeMp7n9nVd1XVfdU1Rer6iWrHxUA1tu20a2qc5LckuSqJPuTXFdV+09a9vUkG939+0k+l+TvVz0oAKy7ZV7pXpbkWHc/2N2PJ7k1yTVbF3T3Hd392OLwriR7VzsmAKy/ZaJ7YZKHthwfX5w7neuT/Pup7qiqg1V1tKqOnjhxYvkpAWAXWOkHqarqTUk2knzwVPd396Hu3ujujT179qzyqQHg1965S6x5OMlFW473Ls79kqp6fZL3JHltd/9sNeMBwO6xzCvdu5Psq6pLq+q8JNcmObx1QVW9KslHkxzo7kdWPyYArL9to9vdTyS5IcntSe5Pclt331tVN1fVgcWyDyZ5XpLPVtV/VtXh0zwcADxjLfP2crr7SJIjJ527acvt1694LgDYdVyRCgCGiC4ADBFdABgiugAwRHQBYIjoAsAQ0QWAIaILAENEFwCGiC4ADBFdABgiugAwRHQBYIjoAsAQ0QWAIaILAENEFwCGiC4ADBFdABgiugAwRHQBYIjoAsAQ0QWAIaILAENEFwCGiC4ADBFdABgiugAwRHQBYIjoAsAQ0QWAIaILAENEFwCGiC4ADBFdABgiugAwRHQBYIjoAsAQ0QWAIaILAENEFwCGiC4ADBFdABgiugAwRHQBYIjoAsAQ0QWAIaILAEOWim5VXVlVD1TVsaq68RT3/0ZVfWZx/1er6pKVTwoAa27b6FbVOUluSXJVkv1Jrquq/Sctuz7Jj7v7d5P8Y5IPrHpQAFh3y7zSvSzJse5+sLsfT3JrkmtOWnNNkn9b3P5cktdVVa1uTABYf8tE98IkD205Pr44d8o13f1EkkeT/PYqBgSA3eLcySerqoNJDi4Of1ZV35x8/meIC5L84GwPsUvZ251hX3eOvd0Zv3em37hMdB9OctGW472Lc6dac7yqzk3ygiQ/PPmBuvtQkkNJUlVHu3vjTIbm9OzrzrG3O8O+7hx7uzOq6uiZfu8yby/fnWRfVV1aVecluTbJ4ZPWHE7yF4vbf5bkP7q7z3QoANiNtn2l291PVNUNSW5Pck6Sj3X3vVV1c5Kj3X04yb8m+WRVHUvyo2yGGQDYYqm/6Xb3kSRHTjp305bbP03y50/xuQ89xfUsx77uHHu7M+zrzrG3O+OM97W8CwwAM1wGEgCG7Hh0XUJyZyyxr++sqvuq6p6q+mJVveRszLmOttvbLeveUFVdVT4duoRl9rWq3rj4ub23qj41PeM6WuJ3wcVVdUdVfX3x++DqszHnuqmqj1XVI6f7r6216cOLfb+nql691AN39459ZfODV/+V5HeSnJfkG0n2n7TmL5N8ZHH72iSf2cmZdsPXkvv6x0l+c3H77fZ1dXu7WHd+kjuT3JVk42zP/ev+teTP7L4kX0/yW4vjF53tuX/dv5bc10NJ3r64vT/Jd8723OvwleSPkrw6yTdPc//VSf49SSW5PMlXl3ncnX6l6xKSO2Pbfe3uO7r7scXhXdn8/9Vsb5mf2SR5fzavMf7TyeHW2DL7+rYkt3T3j5Okux8ZnnEdLbOvneT5i9svSPK9wfnWVnffmc3/jXM61yT5RG+6K8kLq+rF2z3uTkfXJSR3xjL7utX12fwXGdvbdm8XbyNd1N1fmBxszS3zM/vSJC+tqi9X1V1VdeXYdOtrmX19X5I3VdXxbP4vlHfMjLbrPdXfw0mGLwPJvKp6U5KNJK8927PsBlX1rCQfSvKWszzKbnRuNt9iviKb78zcWVWv7O6fnM2hdoHrkny8u/+hqv4wm9dUeEV3/+/ZHuyZaKdf6T6VS0jmyS4hyS9ZZl9TVa9P8p4kB7r7Z0Ozrbvt9vb8JK9I8qWq+k42/5Zz2IeptrXMz+zxJIe7++fd/e0k38pmhDm9Zfb1+iS3JUl3fyXJc7J5TWaenqV+D59sp6PrEpI7Y9t9rapXJfloNoPrb2PLe9K97e5Hu/uC7r6kuy/J5t/LD3T3GV+L9Rlimd8Fn8/mq9xU1QXZfLv5wcEZ19Ey+/rdJK9Lkqp6eTaje2J0yt3pcJI3Lz7FfHmSR7v7+9t9046+vdwuIbkjltzXDyZ5XpLPLj6X9t3uPnDWhl4TS+4tT9GS+3p7kj+tqvuS/E+Sd3e3d72exJL7+q4k/1JVf5PND1W9xQub7VXVp7P5j8ALFn8Pf2+SZydJd38km38fvzrJsSSPJXnrUo9r7wFghitSAcAQ0QWAIaILAENEFwCGiC4ADBFdABgiugAwRHQBYMj/AZjNiq1BTsFgAAAAAElFTkSuQmCC\n", 81 | "text/plain": [ 82 | "
" 83 | ] 84 | }, 85 | "metadata": { 86 | "needs_background": "light" 87 | }, 88 | "output_type": "display_data" 89 | } 90 | ], 91 | "source": [ 92 | "# Create Figure (empty canvas)\n", 93 | "fig = plt.figure()\n", 94 | "\n", 95 | "# Add set of axes to figure\n", 96 | "axes = fig.add_axes([0, 0, 1, 1])" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [] 105 | } 106 | ], 107 | "metadata": { 108 | "kernelspec": { 109 | "display_name": "Python 3", 110 | "language": "python", 111 | "name": "python3" 112 | }, 113 | "language_info": { 114 | "codemirror_mode": { 115 | "name": "ipython", 116 | "version": 3 117 | }, 118 | "file_extension": ".py", 119 | "mimetype": "text/x-python", 120 | "name": "python", 121 | "nbconvert_exporter": "python", 122 | "pygments_lexer": "ipython3", 123 | "version": "3.7.6" 124 | } 125 | }, 126 | "nbformat": 4, 127 | "nbformat_minor": 4 128 | } 129 | -------------------------------------------------------------------------------- /04-Matplotlib/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/04-Matplotlib/example.png -------------------------------------------------------------------------------- /04-Matplotlib/figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/04-Matplotlib/figure.png -------------------------------------------------------------------------------- /04-Matplotlib/myfirstplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/04-Matplotlib/myfirstplot.png -------------------------------------------------------------------------------- /04-Matplotlib/new_figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/04-Matplotlib/new_figure.png -------------------------------------------------------------------------------- /04-Matplotlib/new_subplots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/04-Matplotlib/new_subplots.png -------------------------------------------------------------------------------- /04-Matplotlib/subplots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/04-Matplotlib/subplots.png -------------------------------------------------------------------------------- /04-Matplotlib/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/04-Matplotlib/test.png -------------------------------------------------------------------------------- /05-Seaborn/DistPlot_solution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/05-Seaborn/DistPlot_solution.png -------------------------------------------------------------------------------- /05-Seaborn/catplot_solution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/05-Seaborn/catplot_solution.png -------------------------------------------------------------------------------- /05-Seaborn/example_scatter.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/05-Seaborn/example_scatter.jpg -------------------------------------------------------------------------------- /05-Seaborn/heatmap_solution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/05-Seaborn/heatmap_solution.png -------------------------------------------------------------------------------- /05-Seaborn/my_plot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/05-Seaborn/my_plot.jpg -------------------------------------------------------------------------------- /05-Seaborn/task_one.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/05-Seaborn/task_one.jpg -------------------------------------------------------------------------------- /07-Overview-of-Machine-Learning/ML Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "dfa9854d", 6 | "metadata": {}, 7 | "source": [ 8 | "# Why Machine Learning\n", 9 | "\n", 10 | "- In general is the study of statistical computer algorithms that improve automatically through data.\n", 11 | "- is a subset for AI\n", 12 | "- What kind of problems can it solve: Credit Scoring, Insurance Risk, Price Forecasting, Spam Filtering, Customer Segmentation, Entity Classification\n", 13 | "\n", 14 | "## Structure of ML problem framing\n", 15 | "\n", 16 | "- Given features from a data set obtain a desired label.\n", 17 | "- ML algorithms are often called \"estimators\" as they estimate the desired label or output.\n", 18 | "\n", 19 | "## So why do we use it?\n", 20 | "\n", 21 | "- Some problems are only possible using ML.\n", 22 | "- major caveat is that we NEED good existing data.\n", 23 | "- majority of time is spent on preparaing and massagind the data into the form you want.\n", 24 | "\n", 25 | "# Different Types of ML\n", 26 | "\n", 27 | "Two main types are:\n", 28 | "\n", 29 | "## Supervised Learning\n", 30 | "\n", 31 | "- Uses historical and labeled data, the ML model PREDICTS a value\n", 32 | "- Known results and data from the past.\n", 33 | "- labeled data means that we KNOW the features of the data and past prediciton labels.\n", 34 | "\n", 35 | "### Two main label types\n", 36 | "\n", 37 | "- Categorical value to predict (CLASSIFICATION task) (E.g predict the category, handwriting detection)\n", 38 | "- Continuos value to predict (REGRESSION task) (E.g. predict the price, electricity loads, test scores)\n", 39 | "\n", 40 | "## Unsupervised Learning\n", 41 | "\n", 42 | "- Applied to unlabeled data, the ML model discovers possible patterns in the data.\n", 43 | "- Desired output is unknown.\n", 44 | "- Trying to group and label data without a label.\n", 45 | "- Downside is that there is no historical correct label, it's hard to evaluate performance." 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "id": "79447e3a", 51 | "metadata": {}, 52 | "source": [ 53 | "# Supervised ML\n", 54 | "\n", 55 | "PREDICTING AN OUTCOME\n", 56 | "\n", 57 | "scikit-learn used for ML models.\n", 58 | "\n", 59 | "- Starts with collecting and organising a data set on history.\n", 60 | "- Create a data product that can help us with predicting.\n", 61 | "- x: features, y: label\n", 62 | "- label is a term for what we are trying to predict\n", 63 | "- features are known components in the data we are predicting the label from.\n", 64 | "\n", 65 | "- Once we have identified features and label, we split the data into a training set and a test set.\n", 66 | "- We split by train and test so that we can verify that our model is high performant.\n", 67 | "\n", 68 | "## Training Set\n", 69 | "\n", 70 | "- The data we use to train the model.\n", 71 | "\n", 72 | "\n", 73 | "## Testing Set\n", 74 | "\n", 75 | "- The data we use to check against the trained model to test that predicted values are accurate.\n", 76 | "- We provide only the features not the labels when testing the algorithm.\n", 77 | "- The algorithm could not have memorized data it has never seen which is why we do this, we can see that predictions are accurate or not.\n", 78 | "\n", 79 | "- Common split is 70% training, 30% test.\n", 80 | "\n", 81 | "\n", 82 | "What happend if performance is not great?\n", 83 | "- We can adjust the models paramaters and try again.\n", 84 | "\n", 85 | "## Beggining to End, the Process\n", 86 | "\n", 87 | "1. We get the x and y data (features + label)\n", 88 | "2. split the data into test and training set.\n", 89 | "3. Fit ML model on training data set.\n", 90 | "4. Evaluate model performance given test set.\n", 91 | "5. adjust model params if performance is poor.\n", 92 | "6. once satisified, deploy model to the real world.\n" 93 | ] 94 | } 95 | ], 96 | "metadata": { 97 | "kernelspec": { 98 | "display_name": "Python 3", 99 | "language": "python", 100 | "name": "python3" 101 | }, 102 | "language_info": { 103 | "codemirror_mode": { 104 | "name": "ipython", 105 | "version": 3 106 | }, 107 | "file_extension": ".py", 108 | "mimetype": "text/x-python", 109 | "name": "python", 110 | "nbconvert_exporter": "python", 111 | "pygments_lexer": "ipython3", 112 | "version": "3.8.8" 113 | } 114 | }, 115 | "nbformat": 4, 116 | "nbformat_minor": 5 117 | } 118 | -------------------------------------------------------------------------------- /07-Overview-of-Machine-Learning/Template.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | "\n", 10 | "___\n", 11 | "
Copyright by Pierian Data Inc.
\n", 12 | "
For more information, visit us at www.pieriandata.com
" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# Template" 20 | ] 21 | } 22 | ], 23 | "metadata": { 24 | "anaconda-cloud": {}, 25 | "kernelspec": { 26 | "display_name": "Python 3", 27 | "language": "python", 28 | "name": "python3" 29 | }, 30 | "language_info": { 31 | "codemirror_mode": { 32 | "name": "ipython", 33 | "version": 3 34 | }, 35 | "file_extension": ".py", 36 | "mimetype": "text/x-python", 37 | "name": "python", 38 | "nbconvert_exporter": "python", 39 | "pygments_lexer": "ipython3", 40 | "version": "3.7.4" 41 | } 42 | }, 43 | "nbformat": 4, 44 | "nbformat_minor": 1 45 | } 46 | -------------------------------------------------------------------------------- /08-Linear-Regression-Models/04-Linear-Regression-Project-DataSet .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | "\n", 10 | "___\n", 11 | "
Copyright by Pierian Data Inc.
\n", 12 | "
For more information, visit us at www.pieriandata.com
" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# NOTE: BEFORE WE CAN CONTINUE WITH THIS PROJECT WE NEED TO PERFORM FEATURE ENGINEERING. THE NEXT SECTION OF THE COURSE WILL FOCUS ON TEACHING YOU FEATURE ENGINEERING ON THIS DATASET.\n", 20 | "\n", 21 | "---\n", 22 | "----\n", 23 | "# JUMP FORWARD TO THE FEATURE ENGINEERING SECTION\n", 24 | "\n", 25 | "The dataset we will be working with later on in the next section:\n", 26 | "\n", 27 | "* Ames Iowa Data Set: http://jse.amstat.org/v19n3/decock.pdf" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "import pandas as pd" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 14, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "df = pd.read_csv(\"..\\DATA\\Ames_Housing_Data.csv\")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 15, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "text/html": [ 56 | "
\n", 57 | "\n", 70 | "\n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | "
PIDMS SubClassMS ZoningLot FrontageLot AreaStreetAlleyLot ShapeLand ContourUtilities...Pool AreaPool QCFenceMisc FeatureMisc ValMo SoldYr SoldSale TypeSale ConditionSalePrice
052630110020RL141.031770PaveNaNIR1LvlAllPub...0NaNNaNNaN052010WDNormal215000
152635004020RH80.011622PaveNaNRegLvlAllPub...0NaNMnPrvNaN062010WDNormal105000
252635101020RL81.014267PaveNaNIR1LvlAllPub...0NaNNaNGar21250062010WDNormal172000
352635303020RL93.011160PaveNaNRegLvlAllPub...0NaNNaNNaN042010WDNormal244000
452710501060RL74.013830PaveNaNIR1LvlAllPub...0NaNMnPrvNaN032010WDNormal189900
\n", 220 | "

5 rows × 81 columns

\n", 221 | "
" 222 | ], 223 | "text/plain": [ 224 | " PID MS SubClass MS Zoning Lot Frontage Lot Area Street Alley \\\n", 225 | "0 526301100 20 RL 141.0 31770 Pave NaN \n", 226 | "1 526350040 20 RH 80.0 11622 Pave NaN \n", 227 | "2 526351010 20 RL 81.0 14267 Pave NaN \n", 228 | "3 526353030 20 RL 93.0 11160 Pave NaN \n", 229 | "4 527105010 60 RL 74.0 13830 Pave NaN \n", 230 | "\n", 231 | " Lot Shape Land Contour Utilities ... Pool Area Pool QC Fence Misc Feature \\\n", 232 | "0 IR1 Lvl AllPub ... 0 NaN NaN NaN \n", 233 | "1 Reg Lvl AllPub ... 0 NaN MnPrv NaN \n", 234 | "2 IR1 Lvl AllPub ... 0 NaN NaN Gar2 \n", 235 | "3 Reg Lvl AllPub ... 0 NaN NaN NaN \n", 236 | "4 IR1 Lvl AllPub ... 0 NaN MnPrv NaN \n", 237 | "\n", 238 | " Misc Val Mo Sold Yr Sold Sale Type Sale Condition SalePrice \n", 239 | "0 0 5 2010 WD Normal 215000 \n", 240 | "1 0 6 2010 WD Normal 105000 \n", 241 | "2 12500 6 2010 WD Normal 172000 \n", 242 | "3 0 4 2010 WD Normal 244000 \n", 243 | "4 0 3 2010 WD Normal 189900 \n", 244 | "\n", 245 | "[5 rows x 81 columns]" 246 | ] 247 | }, 248 | "execution_count": 15, 249 | "metadata": {}, 250 | "output_type": "execute_result" 251 | } 252 | ], 253 | "source": [ 254 | "df.head()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 16, 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "name": "stdout", 264 | "output_type": "stream", 265 | "text": [ 266 | "\n", 267 | "RangeIndex: 2930 entries, 0 to 2929\n", 268 | "Data columns (total 81 columns):\n", 269 | " # Column Non-Null Count Dtype \n", 270 | "--- ------ -------------- ----- \n", 271 | " 0 PID 2930 non-null int64 \n", 272 | " 1 MS SubClass 2930 non-null int64 \n", 273 | " 2 MS Zoning 2930 non-null object \n", 274 | " 3 Lot Frontage 2440 non-null float64\n", 275 | " 4 Lot Area 2930 non-null int64 \n", 276 | " 5 Street 2930 non-null object \n", 277 | " 6 Alley 198 non-null object \n", 278 | " 7 Lot Shape 2930 non-null object \n", 279 | " 8 Land Contour 2930 non-null object \n", 280 | " 9 Utilities 2930 non-null object \n", 281 | " 10 Lot Config 2930 non-null object \n", 282 | " 11 Land Slope 2930 non-null object \n", 283 | " 12 Neighborhood 2930 non-null object \n", 284 | " 13 Condition 1 2930 non-null object \n", 285 | " 14 Condition 2 2930 non-null object \n", 286 | " 15 Bldg Type 2930 non-null object \n", 287 | " 16 House Style 2930 non-null object \n", 288 | " 17 Overall Qual 2930 non-null int64 \n", 289 | " 18 Overall Cond 2930 non-null int64 \n", 290 | " 19 Year Built 2930 non-null int64 \n", 291 | " 20 Year Remod/Add 2930 non-null int64 \n", 292 | " 21 Roof Style 2930 non-null object \n", 293 | " 22 Roof Matl 2930 non-null object \n", 294 | " 23 Exterior 1st 2930 non-null object \n", 295 | " 24 Exterior 2nd 2930 non-null object \n", 296 | " 25 Mas Vnr Type 2907 non-null object \n", 297 | " 26 Mas Vnr Area 2907 non-null float64\n", 298 | " 27 Exter Qual 2930 non-null object \n", 299 | " 28 Exter Cond 2930 non-null object \n", 300 | " 29 Foundation 2930 non-null object \n", 301 | " 30 Bsmt Qual 2850 non-null object \n", 302 | " 31 Bsmt Cond 2850 non-null object \n", 303 | " 32 Bsmt Exposure 2847 non-null object \n", 304 | " 33 BsmtFin Type 1 2850 non-null object \n", 305 | " 34 BsmtFin SF 1 2929 non-null float64\n", 306 | " 35 BsmtFin Type 2 2849 non-null object \n", 307 | " 36 BsmtFin SF 2 2929 non-null float64\n", 308 | " 37 Bsmt Unf SF 2929 non-null float64\n", 309 | " 38 Total Bsmt SF 2929 non-null float64\n", 310 | " 39 Heating 2930 non-null object \n", 311 | " 40 Heating QC 2930 non-null object \n", 312 | " 41 Central Air 2930 non-null object \n", 313 | " 42 Electrical 2929 non-null object \n", 314 | " 43 1st Flr SF 2930 non-null int64 \n", 315 | " 44 2nd Flr SF 2930 non-null int64 \n", 316 | " 45 Low Qual Fin SF 2930 non-null int64 \n", 317 | " 46 Gr Liv Area 2930 non-null int64 \n", 318 | " 47 Bsmt Full Bath 2928 non-null float64\n", 319 | " 48 Bsmt Half Bath 2928 non-null float64\n", 320 | " 49 Full Bath 2930 non-null int64 \n", 321 | " 50 Half Bath 2930 non-null int64 \n", 322 | " 51 Bedroom AbvGr 2930 non-null int64 \n", 323 | " 52 Kitchen AbvGr 2930 non-null int64 \n", 324 | " 53 Kitchen Qual 2930 non-null object \n", 325 | " 54 TotRms AbvGrd 2930 non-null int64 \n", 326 | " 55 Functional 2930 non-null object \n", 327 | " 56 Fireplaces 2930 non-null int64 \n", 328 | " 57 Fireplace Qu 1508 non-null object \n", 329 | " 58 Garage Type 2773 non-null object \n", 330 | " 59 Garage Yr Blt 2771 non-null float64\n", 331 | " 60 Garage Finish 2771 non-null object \n", 332 | " 61 Garage Cars 2929 non-null float64\n", 333 | " 62 Garage Area 2929 non-null float64\n", 334 | " 63 Garage Qual 2771 non-null object \n", 335 | " 64 Garage Cond 2771 non-null object \n", 336 | " 65 Paved Drive 2930 non-null object \n", 337 | " 66 Wood Deck SF 2930 non-null int64 \n", 338 | " 67 Open Porch SF 2930 non-null int64 \n", 339 | " 68 Enclosed Porch 2930 non-null int64 \n", 340 | " 69 3Ssn Porch 2930 non-null int64 \n", 341 | " 70 Screen Porch 2930 non-null int64 \n", 342 | " 71 Pool Area 2930 non-null int64 \n", 343 | " 72 Pool QC 13 non-null object \n", 344 | " 73 Fence 572 non-null object \n", 345 | " 74 Misc Feature 106 non-null object \n", 346 | " 75 Misc Val 2930 non-null int64 \n", 347 | " 76 Mo Sold 2930 non-null int64 \n", 348 | " 77 Yr Sold 2930 non-null int64 \n", 349 | " 78 Sale Type 2930 non-null object \n", 350 | " 79 Sale Condition 2930 non-null object \n", 351 | " 80 SalePrice 2930 non-null int64 \n", 352 | "dtypes: float64(11), int64(27), object(43)\n", 353 | "memory usage: 1.8+ MB\n" 354 | ] 355 | } 356 | ], 357 | "source": [ 358 | "df.info()" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "----\n", 366 | "----" 367 | ] 368 | } 369 | ], 370 | "metadata": { 371 | "kernelspec": { 372 | "display_name": "Python 3", 373 | "language": "python", 374 | "name": "python3" 375 | }, 376 | "language_info": { 377 | "codemirror_mode": { 378 | "name": "ipython", 379 | "version": 3 380 | }, 381 | "file_extension": ".py", 382 | "mimetype": "text/x-python", 383 | "name": "python", 384 | "nbconvert_exporter": "python", 385 | "pygments_lexer": "ipython3", 386 | "version": "3.8.8" 387 | } 388 | }, 389 | "nbformat": 4, 390 | "nbformat_minor": 2 391 | } 392 | -------------------------------------------------------------------------------- /08-Linear-Regression-Models/Bias-Variance Trade Off.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "98fbb84f", 6 | "metadata": {}, 7 | "source": [ 8 | "Also known as overfitting vs underfiiting\n", 9 | "\n", 10 | "- very important for supervised ML\n", 11 | "- in general increasing model complexity (using higher order of numbers) in search for better performance leads to a bias-variance trad-off\n", 12 | "- this means we want to have a model that can generalise well to new unseen data but also account for variance and patterns in the known data\n", 13 | "\n", 14 | "Overfitting (could be hard to catch)\n", 15 | "- The model fits too much to the noise from the data,\n", 16 | "- results in low error on training sets but high error on test sets\n", 17 | "- This is why it is CRUCIAL to test trained data so the algorithm is pitted against data it has not seen before.\n", 18 | "\n", 19 | "Underfitting\n", 20 | "- Too much bias, does not capture the underlying trend of the data well enough.\n", 21 | "- Usually the result of an excessively simple model. Is generalising too much.\n", 22 | "\n", 23 | "We can plot out level of error vs model complexity\n", 24 | "- A good model would reduce in error as complexity increases.\n", 25 | "- In case of polynomial regression the complexity would = the polynomial degree.\n", 26 | "\n", 27 | "- A BAD model would have an increase in error as we increase model complexity." 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "id": "b455cd84", 33 | "metadata": {}, 34 | "source": [ 35 | "# Choosing the Correct Degree of Polynomial (Complexity)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "id": "220392a0", 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [] 45 | } 46 | ], 47 | "metadata": { 48 | "kernelspec": { 49 | "display_name": "Python 3", 50 | "language": "python", 51 | "name": "python3" 52 | }, 53 | "language_info": { 54 | "codemirror_mode": { 55 | "name": "ipython", 56 | "version": 3 57 | }, 58 | "file_extension": ".py", 59 | "mimetype": "text/x-python", 60 | "name": "python", 61 | "nbconvert_exporter": "python", 62 | "pygments_lexer": "ipython3", 63 | "version": "3.8.8" 64 | } 65 | }, 66 | "nbformat": 4, 67 | "nbformat_minor": 5 68 | } 69 | -------------------------------------------------------------------------------- /08-Linear-Regression-Models/Cross-Validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6e0e12b5", 6 | "metadata": {}, 7 | "source": [ 8 | "# Cross-Validation\n", 9 | "\n", 10 | "More advanced set of methods of splitting data into training and test sets.\n", 11 | "\n", 12 | "Can we train on all the data and evaluate on all the data?\n", 13 | "\n", 14 | "We test all portions of our data! We have to repeat compuitations K number of times.\n", 15 | "\n", 16 | "Also known as K-fold cross-validation (typically K = 10)\n", 17 | "You average the errors across all the tests done." 18 | ] 19 | } 20 | ], 21 | "metadata": { 22 | "kernelspec": { 23 | "display_name": "Python 3", 24 | "language": "python", 25 | "name": "python3" 26 | }, 27 | "language_info": { 28 | "codemirror_mode": { 29 | "name": "ipython", 30 | "version": 3 31 | }, 32 | "file_extension": ".py", 33 | "mimetype": "text/x-python", 34 | "name": "python", 35 | "nbconvert_exporter": "python", 36 | "pygments_lexer": "ipython3", 37 | "version": "3.8.8" 38 | } 39 | }, 40 | "nbformat": 4, 41 | "nbformat_minor": 5 42 | } 43 | -------------------------------------------------------------------------------- /08-Linear-Regression-Models/Elastic Net.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "08b0fa86", 6 | "metadata": {}, 7 | "source": [ 8 | "# Elastic Net\n", 9 | "\n", 10 | "L1 and L2 Regularisation\n", 11 | "\n", 12 | "- Elastic net seeks to improve on both L1 and L2 regularisation by combining them." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "69c4477d", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [] 22 | } 23 | ], 24 | "metadata": { 25 | "kernelspec": { 26 | "display_name": "Python 3", 27 | "language": "python", 28 | "name": "python3" 29 | }, 30 | "language_info": { 31 | "codemirror_mode": { 32 | "name": "ipython", 33 | "version": 3 34 | }, 35 | "file_extension": ".py", 36 | "mimetype": "text/x-python", 37 | "name": "python", 38 | "nbconvert_exporter": "python", 39 | "pygments_lexer": "ipython3", 40 | "version": "3.8.8" 41 | } 42 | }, 43 | "nbformat": 4, 44 | "nbformat_minor": 5 45 | } 46 | -------------------------------------------------------------------------------- /08-Linear-Regression-Models/Linear Regression Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "eb776cd8", 6 | "metadata": {}, 7 | "source": [ 8 | "# Linear Regression\n", 9 | "\n", 10 | "## Algorithm History\n", 11 | "\n", 12 | "- Based on \"least squares\" grew out of a need for improving navigation methods based on astronomy during the age of exploration in the 1700s\n", 13 | "- Put simply a linear relationship implies some constant straight line relationship. (e.g. x = y)\n", 14 | "- This implies for a new x value i could predict the y value.\n", 15 | "\n", 16 | "BUT\n", 17 | "\n", 18 | "- Real data will not draw a straight line (think of a scatterplot with a million points)\n", 19 | "- We can instead draw a line that would be the best fit for all those data points.\n", 20 | "- Fundamentally we want to minimize the overall distance from the points to the line. (distance from lines to points is called \"Residual Error\" and it must be minimised as much as possible.\n", 21 | "\n", 22 | "### Ordinary Least Squares (OLS)\n", 23 | "Works by minimizing the sum of the squares of the differences between the observed dependent variable in the given dataset and those predicted by the linear function.\n", 24 | "\n", 25 | "- Essentially we SQUARE the distance to the line drawn for all points and sum them together to get the least squared error." 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "id": "8a1d6268", 31 | "metadata": {}, 32 | "source": [ 33 | "# Understanding Ordinary Least Squares\n", 34 | "\n", 35 | "Linear regression OLS theory; the equation of a simple straight line is always\n", 36 | "** y = mx + b **\n", 37 | "\n", 38 | "Where m is the slope and b is the intercept with the y-axis\n", 39 | "\n", 40 | "- There is only room for one possible feature X.\n", 41 | "- OLS will allow us to directly solve for slope M and intercept B given a set of real data points for X and Y.\n", 42 | "- The issue is that if we have multiple features we need to use GRADIENT DESCENT to scale it.\n", 43 | "\n", 44 | "\n", 45 | "## Independent And Dependent Variable (X and Y)\n", 46 | "In the case of predicting how many hours of operating a factory result in production volume,\n", 47 | "- x, the independent variable is how many hours\n", 48 | "- y, the dependent variable is production volume (what we want to predict)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "id": "390e4b46", 54 | "metadata": {}, 55 | "source": [ 56 | "# Cost Functions\n", 57 | "\n", 58 | "If we try and add multiple features OLS will not be scalable.\n", 59 | "\n", 60 | "## Gradient Descent\n", 61 | "\n", 62 | "Here we an leverage computational power to help resolve problems for us.\n", 63 | "Taking the cost function derivative and then solving for zero to get the set of Beta coefficients will be too difficult to solve directly through an analytical solution (math mumbo jumbo)\n", 64 | "\n", 65 | "- We can describe the cost function through vectorised matrix notation and use **gradient descent** to have a computer figure out the beta coefficient values to minimise the COST/LOSS function." 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "id": "5cfb38e3", 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [] 75 | } 76 | ], 77 | "metadata": { 78 | "kernelspec": { 79 | "display_name": "Python 3", 80 | "language": "python", 81 | "name": "python3" 82 | }, 83 | "language_info": { 84 | "codemirror_mode": { 85 | "name": "ipython", 86 | "version": 3 87 | }, 88 | "file_extension": ".py", 89 | "mimetype": "text/x-python", 90 | "name": "python", 91 | "nbconvert_exporter": "python", 92 | "pygments_lexer": "ipython3", 93 | "version": "3.8.8" 94 | } 95 | }, 96 | "nbformat": 4, 97 | "nbformat_minor": 5 98 | } 99 | -------------------------------------------------------------------------------- /08-Linear-Regression-Models/Regularisation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "85e061f4", 6 | "metadata": {}, 7 | "source": [ 8 | "# Regularisation\n", 9 | "\n", 10 | "In general we can think of regularisation as a wway to reduce model overfitting and variance.\n", 11 | "- Requires additional bias and a search for optimal penalty hyperparamater (or just paramater!)\n", 12 | "\n", 13 | "3 Main types\n", 14 | "\n", 15 | "## L1 Regularisation (LASSO)\n", 16 | "\n", 17 | "- adds a penalty equal to the absolute value of the magnitude of coefficients.\n", 18 | "\n", 19 | "## L2 Regularisation (Ridge)\n", 20 | "\n", 21 | "- adds a penalty term equal to the square of the magnitude of the coefficients.\n", 22 | "\n", 23 | "## L1 + L2 (Elastic Net)\n", 24 | "\n", 25 | "- adds both but also an alpha term defining the ratio between the two." 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "id": "a12dbbf6", 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [] 35 | } 36 | ], 37 | "metadata": { 38 | "kernelspec": { 39 | "display_name": "Python 3", 40 | "language": "python", 41 | "name": "python3" 42 | }, 43 | "language_info": { 44 | "codemirror_mode": { 45 | "name": "ipython", 46 | "version": 3 47 | }, 48 | "file_extension": ".py", 49 | "mimetype": "text/x-python", 50 | "name": "python", 51 | "nbconvert_exporter": "python", 52 | "pygments_lexer": "ipython3", 53 | "version": "3.8.8" 54 | } 55 | }, 56 | "nbformat": 4, 57 | "nbformat_minor": 5 58 | } 59 | -------------------------------------------------------------------------------- /08-Linear-Regression-Models/final_converter.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/08-Linear-Regression-Models/final_converter.joblib -------------------------------------------------------------------------------- /08-Linear-Regression-Models/final_poly_converter.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/08-Linear-Regression-Models/final_poly_converter.joblib -------------------------------------------------------------------------------- /08-Linear-Regression-Models/final_poly_model.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/08-Linear-Regression-Models/final_poly_model.joblib -------------------------------------------------------------------------------- /08-Linear-Regression-Models/final_sales_model.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/08-Linear-Regression-Models/final_sales_model.joblib -------------------------------------------------------------------------------- /08-Linear-Regression-Models/poly_converter.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/08-Linear-Regression-Models/poly_converter.joblib -------------------------------------------------------------------------------- /08-Linear-Regression-Models/sales_model.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/08-Linear-Regression-Models/sales_model.joblib -------------------------------------------------------------------------------- /08-Linear-Regression-Models/sales_poly_model.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/08-Linear-Regression-Models/sales_poly_model.joblib -------------------------------------------------------------------------------- /10-Cross-Val-and-LinReg-Project/grid_search_cross_validation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/10-Cross-Val-and-LinReg-Project/grid_search_cross_validation.png -------------------------------------------------------------------------------- /12-K-Nearest-Neighbors/mine.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/12-K-Nearest-Neighbors/mine.jpg -------------------------------------------------------------------------------- /12-K-Nearest-Neighbors/sonar.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/12-K-Nearest-Neighbors/sonar.jpg -------------------------------------------------------------------------------- /13-Support-Vector-Machines/Types_of_concrete_slump.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/13-Support-Vector-Machines/Types_of_concrete_slump.jpg -------------------------------------------------------------------------------- /13-Support-Vector-Machines/__pycache__/svm_margin_plot.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/13-Support-Vector-Machines/__pycache__/svm_margin_plot.cpython-37.pyc -------------------------------------------------------------------------------- /13-Support-Vector-Machines/__pycache__/svm_margin_plot.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/13-Support-Vector-Machines/__pycache__/svm_margin_plot.cpython-38.pyc -------------------------------------------------------------------------------- /13-Support-Vector-Machines/svm_margin_plot.py: -------------------------------------------------------------------------------- 1 | # CODE SOURCE IS DIRECTLY FROM DOCUMENTATION 2 | # https://scikit-learn.org/stable/auto_examples/svm/plot_separating_hyperplane.html 3 | import numpy as np 4 | import seaborn as sns 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def plot_svm_boundary(model,X,y): 9 | 10 | X = X.values 11 | y = y.values 12 | 13 | # Scatter Plot 14 | plt.scatter(X[:, 0], X[:, 1], c=y, s=30,cmap='seismic') 15 | 16 | 17 | # plot the decision function 18 | ax = plt.gca() 19 | xlim = ax.get_xlim() 20 | ylim = ax.get_ylim() 21 | 22 | # create grid to evaluate model 23 | xx = np.linspace(xlim[0], xlim[1], 30) 24 | yy = np.linspace(ylim[0], ylim[1], 30) 25 | YY, XX = np.meshgrid(yy, xx) 26 | xy = np.vstack([XX.ravel(), YY.ravel()]).T 27 | Z = model.decision_function(xy).reshape(XX.shape) 28 | 29 | # plot decision boundary and margins 30 | ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, 31 | linestyles=['--', '-', '--']) 32 | # plot support vectors 33 | ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=100, 34 | linewidth=1, facecolors='none', edgecolors='k') 35 | plt.show() -------------------------------------------------------------------------------- /13-Support-Vector-Machines/wine.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/13-Support-Vector-Machines/wine.jpg -------------------------------------------------------------------------------- /14-Decision-Trees/penguin.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/14-Decision-Trees/penguin.jpg -------------------------------------------------------------------------------- /15-Random-Forests/boring.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/15-Random-Forests/boring.jpg -------------------------------------------------------------------------------- /15-Random-Forests/penguin.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/15-Random-Forests/penguin.jpg -------------------------------------------------------------------------------- /16-Boosted-Trees/mushroom.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/16-Boosted-Trees/mushroom.jpg -------------------------------------------------------------------------------- /17-Supervised-Learning-Capstone-Project/cplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/17-Supervised-Learning-Capstone-Project/cplot.png -------------------------------------------------------------------------------- /17-Supervised-Learning-Capstone-Project/fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/17-Supervised-Learning-Capstone-Project/fig1.png -------------------------------------------------------------------------------- /17-Supervised-Learning-Capstone-Project/fig10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/17-Supervised-Learning-Capstone-Project/fig10.png -------------------------------------------------------------------------------- /17-Supervised-Learning-Capstone-Project/fig11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/17-Supervised-Learning-Capstone-Project/fig11.png -------------------------------------------------------------------------------- /17-Supervised-Learning-Capstone-Project/fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/17-Supervised-Learning-Capstone-Project/fig2.png -------------------------------------------------------------------------------- /17-Supervised-Learning-Capstone-Project/fig3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/17-Supervised-Learning-Capstone-Project/fig3.png -------------------------------------------------------------------------------- /17-Supervised-Learning-Capstone-Project/fig5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/17-Supervised-Learning-Capstone-Project/fig5.png -------------------------------------------------------------------------------- /17-Supervised-Learning-Capstone-Project/fig6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/17-Supervised-Learning-Capstone-Project/fig6.png -------------------------------------------------------------------------------- /17-Supervised-Learning-Capstone-Project/fig7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/17-Supervised-Learning-Capstone-Project/fig7.png -------------------------------------------------------------------------------- /17-Supervised-Learning-Capstone-Project/fig9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/17-Supervised-Learning-Capstone-Project/fig9.png -------------------------------------------------------------------------------- /17-Supervised-Learning-Capstone-Project/figbar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/17-Supervised-Learning-Capstone-Project/figbar.png -------------------------------------------------------------------------------- /17-Supervised-Learning-Capstone-Project/hugetree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/17-Supervised-Learning-Capstone-Project/hugetree.png -------------------------------------------------------------------------------- /18-Naive-Bayes-and-NLP/One.txt: -------------------------------------------------------------------------------- 1 | This is a story about dogs 2 | our canine pets 3 | Dogs are furry animals 4 | -------------------------------------------------------------------------------- /18-Naive-Bayes-and-NLP/Two.txt: -------------------------------------------------------------------------------- 1 | This story is about surfing 2 | Catching waves is fun 3 | Surfing is a popular water sport 4 | -------------------------------------------------------------------------------- /19-Unsupervised-Learning-Overview/Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7039fd3c", 6 | "metadata": {}, 7 | "source": [ 8 | "# Unsupervised learning\n", 9 | "- Supervised: using historical labeled data to predict on a label for new data.\n", 10 | "- Unsupervised data: Using unlabeled data, discover patterns, clusters or significant components\n", 11 | "\n", 12 | "## We split unsupervised learing as such:\n", 13 | "- clustering: using features we group together data rows into destinct clusters\n", 14 | "- dimension reduction: using features, discover how to combine and reduce into fewer components" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "id": "7374e2ea", 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [] 24 | } 25 | ], 26 | "metadata": { 27 | "kernelspec": { 28 | "display_name": "Python 3", 29 | "language": "python", 30 | "name": "python3" 31 | }, 32 | "language_info": { 33 | "codemirror_mode": { 34 | "name": "ipython", 35 | "version": 3 36 | }, 37 | "file_extension": ".py", 38 | "mimetype": "text/x-python", 39 | "name": "python", 40 | "nbconvert_exporter": "python", 41 | "pygments_lexer": "ipython3", 42 | "version": "3.8.8" 43 | } 44 | }, 45 | "nbformat": 4, 46 | "nbformat_minor": 5 47 | } 48 | -------------------------------------------------------------------------------- /19-Unsupervised-Learning-Overview/Blank on Purpose.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | "\n", 10 | "___\n", 11 | "
Copyright by Pierian Data Inc.
\n", 12 | "
For more information, visit us at www.pieriandata.com
" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# THIS NOTEBOOK IS BLANK ON PURPOSE\n", 20 | "\n", 21 | "# THERE ARE NO NOTEBOOKS FOR THIS SECTION OF THE COURSE" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | } 31 | ], 32 | "metadata": { 33 | "anaconda-cloud": {}, 34 | "kernelspec": { 35 | "display_name": "Python 3", 36 | "language": "python", 37 | "name": "python3" 38 | }, 39 | "language_info": { 40 | "codemirror_mode": { 41 | "name": "ipython", 42 | "version": 3 43 | }, 44 | "file_extension": ".py", 45 | "mimetype": "text/x-python", 46 | "name": "python", 47 | "nbconvert_exporter": "python", 48 | "pygments_lexer": "ipython3", 49 | "version": "3.8.5" 50 | } 51 | }, 52 | "nbformat": 4, 53 | "nbformat_minor": 1 54 | } 55 | -------------------------------------------------------------------------------- /21-Hierarchal-Clustering/00-Hierarchal-Clustering-Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | "\n", 10 | "___\n", 11 | "
Copyright by Pierian Data Inc.
\n", 12 | "
For more information, visit us at www.pieriandata.com
" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# Hiearchal Clustering" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import pandas as pd" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 8, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "df = pd.read_excel('C://Users/Marcial/Downloads/Data_User_Modeling_Dataset_Hamdi Tolga KAHRAMAN.xls')" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 9, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/html": [ 48 | "
\n", 49 | "\n", 62 | "\n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | "
Data Set Characteristics: MultivariateUnnamed: 1Attribute Information:
0Number of Instances: 403NaNSTG (The degree of study time for goal object ...
1Area: EducationNaNSCG (The degree of repetition number of user f...
2Attribute Characteristics: RealNaNSTR (The degree of study time of user for rela...
3Number of Attributes: 5NaNLPR (The exam performance of user for related ...
4Associated Tasks: ClassificationNaNPEG (The exam performance of user for goal obj...
5NaNNaNUNS (The knowledge level of user)
6Class DistributionNaNNaN
7Very Low: 50NaNNaN
8Low:129NaNNaN
9Middle: 122NaNNaN
10high 130NaNNaN
11Total:403NaNNaN
\n", 146 | "
" 147 | ], 148 | "text/plain": [ 149 | " Data Set Characteristics: Multivariate Unnamed: 1 \\\n", 150 | "0 Number of Instances: 403 NaN \n", 151 | "1 Area: Education NaN \n", 152 | "2 Attribute Characteristics: Real NaN \n", 153 | "3 Number of Attributes: 5 NaN \n", 154 | "4 Associated Tasks: Classification NaN \n", 155 | "5 NaN NaN \n", 156 | "6 Class Distribution NaN \n", 157 | "7 Very Low: 50 NaN \n", 158 | "8 Low:129 NaN \n", 159 | "9 Middle: 122 NaN \n", 160 | "10 high 130 NaN \n", 161 | "11 Total:403 NaN \n", 162 | "\n", 163 | " Attribute Information: \n", 164 | "0 STG (The degree of study time for goal object ... \n", 165 | "1 SCG (The degree of repetition number of user f... \n", 166 | "2 STR (The degree of study time of user for rela... \n", 167 | "3 LPR (The exam performance of user for related ... \n", 168 | "4 PEG (The exam performance of user for goal obj... \n", 169 | "5 UNS (The knowledge level of user) \n", 170 | "6 NaN \n", 171 | "7 NaN \n", 172 | "8 NaN \n", 173 | "9 NaN \n", 174 | "10 NaN \n", 175 | "11 NaN " 176 | ] 177 | }, 178 | "execution_count": 9, 179 | "metadata": {}, 180 | "output_type": "execute_result" 181 | } 182 | ], 183 | "source": [ 184 | "df" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [] 193 | } 194 | ], 195 | "metadata": { 196 | "anaconda-cloud": {}, 197 | "kernelspec": { 198 | "display_name": "Python 3", 199 | "language": "python", 200 | "name": "python3" 201 | }, 202 | "language_info": { 203 | "codemirror_mode": { 204 | "name": "ipython", 205 | "version": 3 206 | }, 207 | "file_extension": ".py", 208 | "mimetype": "text/x-python", 209 | "name": "python", 210 | "nbconvert_exporter": "python", 211 | "pygments_lexer": "ipython3", 212 | "version": "3.7.4" 213 | } 214 | }, 215 | "nbformat": 4, 216 | "nbformat_minor": 1 217 | } 218 | -------------------------------------------------------------------------------- /24-Model-Deployment/01-Serving-Model-as-API.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "___\n", 8 | "\n", 9 | "\n", 10 | "___\n", 11 | "
Copyright by Pierian Data Inc.
\n", 12 | "
For more information, visit us at www.pieriandata.com
" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# Serving a Model as an API\n", 20 | "\n", 21 | "**NOTE: While we show this inside a Jupyter Notebook, you would probably never deploy something as a notebook in a real-world setting. Everything here is in one cell to reflect that this should be a .py file. We also included a duplicate .py file in this folder.**\n", 22 | "\n", 23 | "\n", 24 | "---\n", 25 | "\n", 26 | "**NOTE: You will need to install Flask to serve the API: https://flask.palletsprojects.com/en/2.0.x/installation/**\n", 27 | "\n", 28 | " pip install flask\n", 29 | " \n", 30 | " or\n", 31 | " \n", 32 | " conda install flask\n", 33 | "\n", 34 | "---\n", 35 | "\n", 36 | "\n", 37 | "## api.py (Run this as a script as shown in the video, NOT from within a Jupyter Cell)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "############################\n", 47 | "######## IMPORTS ##########\n", 48 | "##########################\n", 49 | "from flask import Flask, request, jsonify\n", 50 | "import joblib\n", 51 | "import pandas as pd\n", 52 | "\n", 53 | "# Create Flask App\n", 54 | "app = Flask(__name__)\n", 55 | "\n", 56 | "\n", 57 | "# Create API routing call\n", 58 | "@app.route('/predict', methods=['POST'])\n", 59 | "def predict():\n", 60 | " \n", 61 | " # Get JSON Request\n", 62 | " feat_data = request.json\n", 63 | " # Convert JSON request to Pandas DataFrame\n", 64 | " df = pd.DataFrame(feat_data)\n", 65 | " # Match Column Na,es\n", 66 | " df = df.reindex(columns=col_names)\n", 67 | " # Get prediction\n", 68 | " prediction = list(model.predict(df))\n", 69 | " # Return JSON version of Prediction\n", 70 | " return jsonify({'prediction': str(prediction)})\n", 71 | "\n", 72 | " \n", 73 | "\n", 74 | "if __name__ == '__main__':\n", 75 | "\n", 76 | " # LOADS MODEL AND FEATURE COLUMNS\n", 77 | " model = joblib.load(\"final_model.pkl\") \n", 78 | " col_names = joblib.load(\"column_names.pkl\") \n", 79 | "\n", 80 | " app.run(debug=True)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "# JSON Post Request\n", 88 | "\n", 89 | "1. POST to: http://127.0.0.1:5000/predict\n", 90 | "2. Select Body\n", 91 | "3. Select Raw\n", 92 | "4. Select JSON(application/json)\n", 93 | "5. Supply JSON for Features:\n", 94 | " [{\"TV\":230.1,\"radio\":37.8,\"newspaper\":69.2}]\n" 95 | ] 96 | } 97 | ], 98 | "metadata": { 99 | "kernelspec": { 100 | "display_name": "Python 3", 101 | "language": "python", 102 | "name": "python3" 103 | }, 104 | "language_info": { 105 | "codemirror_mode": { 106 | "name": "ipython", 107 | "version": 3 108 | }, 109 | "file_extension": ".py", 110 | "mimetype": "text/x-python", 111 | "name": "python", 112 | "nbconvert_exporter": "python", 113 | "pygments_lexer": "ipython3", 114 | "version": "3.8.5" 115 | } 116 | }, 117 | "nbformat": 4, 118 | "nbformat_minor": 4 119 | } 120 | -------------------------------------------------------------------------------- /24-Model-Deployment/api.py: -------------------------------------------------------------------------------- 1 | ############################ 2 | ######## IMPORTS ########## 3 | ########################## 4 | from flask import Flask, request, jsonify 5 | import joblib 6 | import pandas as pd 7 | 8 | # Create Flask App 9 | app = Flask(__name__) 10 | 11 | 12 | # Create API routing call 13 | @app.route('/predict', methods=['POST']) 14 | def predict(): 15 | 16 | # Get JSON Request 17 | feat_data = request.json 18 | # Convert JSON request to Pandas DataFrame 19 | df = pd.DataFrame(feat_data) 20 | # Match Column Na,es 21 | df = df.reindex(columns=col_names) 22 | # Get prediction 23 | prediction = list(model.predict(df)) 24 | # Return JSON version of Prediction 25 | return jsonify({'prediction': str(prediction)}) 26 | 27 | 28 | 29 | if __name__ == '__main__': 30 | 31 | # LOADS MODEL AND FEATURE COLUMNS 32 | model = joblib.load("final_model.pkl") 33 | col_names = joblib.load("column_names.pkl") 34 | 35 | app.run(debug=True) -------------------------------------------------------------------------------- /24-Model-Deployment/col_names.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/24-Model-Deployment/col_names.pkl -------------------------------------------------------------------------------- /24-Model-Deployment/column_names.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/24-Model-Deployment/column_names.pkl -------------------------------------------------------------------------------- /24-Model-Deployment/final_model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/24-Model-Deployment/final_model.pkl -------------------------------------------------------------------------------- /24-Model-Deployment/mynewapi.py: -------------------------------------------------------------------------------- 1 | from flask import Flask,request,jsonify 2 | import joblib 3 | import pandas as pd 4 | 5 | # CREATE FLASK APP 6 | app = Flask(__name__) 7 | 8 | 9 | # CONNECT POST API CALL ---> predict() Function #http://localhost:5000/predict 10 | @app.route('/predict',methods=['POST']) 11 | def predict(): 12 | 13 | # GET JSON REQUEST 14 | feat_data = request.json 15 | # CONVERT JSON to PANDAS DF (col names) 16 | df = pd.DataFrame(feat_data) 17 | df = df.reindex(columns=col_names) 18 | # PREDICT 19 | prediction = list(model.predict(df)) 20 | 21 | 22 | return jsonify({'prediction':str(prediction)}) 23 | 24 | 25 | # LOAD MY MODEL and LOAD COLUMN NAMES 26 | if __name__ == '__main__': 27 | 28 | model = joblib.load("final_model.pkl") 29 | col_names = joblib.load('column_names.pkl') 30 | 31 | app.run(debug=True) -------------------------------------------------------------------------------- /24-Model-Deployment/untitled.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/24-Model-Deployment/untitled.txt -------------------------------------------------------------------------------- /Pierian_Data_Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/Pierian_Data_Logo.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 2021 Python for Machine Learning & Data Science Masterclass 2 | A project following along with the "2021 Python for Machine Learning & Data Science Masterclass" class by Jose Portilla on Udemy. 3 | 4 | Note that the folder 'project' contains an actual flask server implementation that loads a 5 | random forest regressor model and does predictions over an API. 6 | 7 | ### Progress 8 | - 11/235 9 | - 23/235 10 | - 32/235 11 | - 43/235 12 | - 50/235 13 | - 53/235 14 | - 63/235 15 | - 80/235 16 | - 94/235 17 | - 101/235 18 | - 107/235 19 | - 113/235 20 | - 122/235 21 | - 129/235 22 | - 136/235 23 | - 141/235 24 | - 146/235 (62.1) 25 | - 152/235 (64.6) 26 | - 159/235 (67.6) 27 | - 173/235 (73.6) 28 | - 187/235 (79.5) 29 | - 198/235 (84.2) 30 | - 203/235 (86.3) 31 | - 210/235 (89.3) 32 | - 215/235 (91.4) 33 | - 221/235 (94) 34 | - 235/235 (100) 35 | 36 | ## Notes and learnings from the tutorial 37 | 38 | ## ML Pathway Overview 39 | - Supervised learning = trying to predict an outcome 40 | - Unsupervised learning = dicover patterns in data 41 | 42 | ## NumPy 43 | Many many Data Science libraries are built powered by NumPy. 44 | It is a library for creating N-dimensional arrays. 45 | - NumPy structures look similar to python lists but they are much more efficient. 46 | 47 | ## Pandas 48 | Is a library for data analysis, uses a dataframe system built off NumPy. 49 | - Has fantastic docs: https://pandas.pydata.org/docs/ 50 | 51 | ## What can we do with it? 52 | Comes built in with tools for reading and writing data (or files) 53 | - Can read/write directly to external data sources (databases + html tables) 54 | - Can intelligently retreive data (to handle missing data and adjustment) 55 | - the "Excel for python" but so much more than that. 56 | - Only limited by how much RAM you have, no limit to size of opening files. 57 | 58 | Series = one-dimensional ndarray with axis labels 59 | 60 | - Allows arrays to have row labels. -------------------------------------------------------------------------------- /project/api.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify 2 | import joblib 3 | import pandas as pd 4 | 5 | # Just convention I like these declared at top even though it is not necessary. 6 | model = None 7 | col_names = None 8 | 9 | # Create flask app. 10 | app = Flask(__name__) 11 | 12 | # Connect post api call to predict func. 13 | @app.route('/predict', methods=['POST']) 14 | def predict(): 15 | # Get json request. 16 | feat_data = request.json 17 | 18 | # convert into pandas df and ensure matches with col names. 19 | df = pd.DataFrame(feat_data) 20 | df = df.reindex(columns=col_names) 21 | 22 | # predict and return 23 | prediction = list(model.predict(df)) 24 | return jsonify({ 'prediction': str(prediction) }); 25 | 26 | # load model and setup column names. 27 | if __name__ == '__main__': 28 | # Load model and feature models. 29 | model = joblib.load('final_model.pkl') 30 | col_names = joblib.load('column_names.pkl') 31 | 32 | app.run(debug=True) 33 | 34 | # Woo and that's it folks!! -------------------------------------------------------------------------------- /project/column_names.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/project/column_names.pkl -------------------------------------------------------------------------------- /project/final_model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpha-nero1/2021-Python-for-Machine-Learning-Data-Science-Masterclass/5017dac02c03ff765987c54246f12e33627d63c5/project/final_model.pkl -------------------------------------------------------------------------------- /project/requirements.txt: -------------------------------------------------------------------------------- 1 | flask 2 | numpy 3 | pandas 4 | sklearn 5 | matplotlib -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jupyter==1.0.0 2 | lxml==4.5.1 3 | MarkupSafe==1.1.1 4 | matplotlib==3.3.2 5 | notebook==6.0.3 6 | numpy==1.18.1 7 | openpyxl==3.0.4 8 | pandas==1.1.2 9 | Pillow==7.2.0 10 | scikit-learn==0.23.2 11 | scipy==1.4.1 12 | seaborn==0.11.0 13 | SQLAlchemy==1.3.18 14 | 15 | --------------------------------------------------------------------------------