├── .gitignore ├── Adv Regression.ipynb ├── Auto_EDA.ipynb ├── Bite Sized Learning_Data Structures_Python.pdf ├── Bite Sized Learning_Loops_Python.pdf ├── Data Visualisation_Matplotlib.ipynb ├── Data Visualization_Seaborn.ipynb ├── Data Wrangling_Python_Pyspark.ipynb ├── Evaluation Metrics - Regression.ipynb ├── Excel Automation.ipynb ├── Gif Plots.ipynb ├── KNN Classifier.ipynb ├── List Comprehensions_Python.pdf ├── Pandas Basics.ipynb ├── Pandas Crosstab vs Pivot table.ipynb ├── Pandas Tricks.ipynb ├── Pandas_Cheat_Sheet.pdf ├── Pandas_Data Wrangling_CheatSheet.pdf ├── Polynomial Regression.ipynb ├── Python Cheat Sheet.pdf ├── Python Functions.ipynb ├── Python Loops.ipynb ├── Python-Patterns.ipynb ├── README.md ├── Regular Expressions.ipynb ├── Seaborn_Cheatsheet.png ├── Text to Speech.ipynb └── User Defined Functions EDA.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /Bite Sized Learning_Data Structures_Python.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/Bite Sized Learning_Data Structures_Python.pdf -------------------------------------------------------------------------------- /Bite Sized Learning_Loops_Python.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/Bite Sized Learning_Loops_Python.pdf -------------------------------------------------------------------------------- /Data Wrangling_Python_Pyspark.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Data Wrangling" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Data wrangling:\n", 15 | "The task that lies between the data acquisition and exploratory data analysis is what most call as data wrangling. It is the process of formatting, merging, grouping, concatenating etc. for the purpose of analysing or making it ready for the modelling purpose" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Merging" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "import pandas as pd\n", 32 | "import numpy as np\n", 33 | "#Create data frame\n", 34 | "left = pd.DataFrame({\n", 35 | " 'id':[1,2,3,4,5],\n", 36 | " 'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],\n", 37 | " 'subject_id':['sub1','sub2','sub4','sub6','sub5']})\n", 38 | "right = pd.DataFrame(\n", 39 | " {'id':[1,2,3,4,5],\n", 40 | " 'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],\n", 41 | " 'subject_id':['sub2','sub4','sub3','sub6','sub5']})" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/html": [ 52 | "
\n", 53 | "\n", 66 | "\n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | "
idNamesubject_id
01Alexsub1
12Amysub2
23Allensub4
34Alicesub6
45Ayoungsub5
\n", 108 | "
" 109 | ], 110 | "text/plain": [ 111 | " id Name subject_id\n", 112 | "0 1 Alex sub1\n", 113 | "1 2 Amy sub2\n", 114 | "2 3 Allen sub4\n", 115 | "3 4 Alice sub6\n", 116 | "4 5 Ayoung sub5" 117 | ] 118 | }, 119 | "execution_count": 2, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "left.head()" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 3, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/html": [ 136 | "
\n", 137 | "\n", 150 | "\n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | "
idNamesubject_id
01Billysub2
12Briansub4
23Bransub3
34Brycesub6
45Bettysub5
\n", 192 | "
" 193 | ], 194 | "text/plain": [ 195 | " id Name subject_id\n", 196 | "0 1 Billy sub2\n", 197 | "1 2 Brian sub4\n", 198 | "2 3 Bran sub3\n", 199 | "3 4 Bryce sub6\n", 200 | "4 5 Betty sub5" 201 | ] 202 | }, 203 | "execution_count": 3, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [ 209 | "right.head()" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 4, 215 | "metadata": {}, 216 | "outputs": [ 217 | { 218 | "data": { 219 | "text/html": [ 220 | "
\n", 221 | "\n", 234 | "\n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | "
idName_xsubject_id_xName_ysubject_id_y
01Alexsub1Billysub2
12Amysub2Briansub4
23Allensub4Bransub3
34Alicesub6Brycesub6
45Ayoungsub5Bettysub5
\n", 288 | "
" 289 | ], 290 | "text/plain": [ 291 | " id Name_x subject_id_x Name_y subject_id_y\n", 292 | "0 1 Alex sub1 Billy sub2\n", 293 | "1 2 Amy sub2 Brian sub4\n", 294 | "2 3 Allen sub4 Bran sub3\n", 295 | "3 4 Alice sub6 Bryce sub6\n", 296 | "4 5 Ayoung sub5 Betty sub5" 297 | ] 298 | }, 299 | "execution_count": 4, 300 | "metadata": {}, 301 | "output_type": "execute_result" 302 | } 303 | ], 304 | "source": [ 305 | "# merge based on one key\n", 306 | "merged_id = pd.merge(left, right, how='inner', on='id', sort=True)\n", 307 | "merged_id.head()" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 5, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/html": [ 318 | "
\n", 319 | "\n", 332 | "\n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | "
idName_xsubject_idName_y
04Alicesub6Bryce
15Ayoungsub5Betty
\n", 359 | "
" 360 | ], 361 | "text/plain": [ 362 | " id Name_x subject_id Name_y\n", 363 | "0 4 Alice sub6 Bryce\n", 364 | "1 5 Ayoung sub5 Betty" 365 | ] 366 | }, 367 | "execution_count": 5, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "# merge based on multiple keys\n", 374 | "merged_multiple = pd.merge(left,right,on=['id','subject_id'])\n", 375 | "merged_multiple.head()" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": 6, 381 | "metadata": {}, 382 | "outputs": [ 383 | { 384 | "data": { 385 | "text/html": [ 386 | "
\n", 387 | "\n", 400 | "\n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | "
id_xName_xsubject_idid_yName_y
01Alexsub1NaNNaN
12Amysub21.0Billy
23Allensub42.0Brian
34Alicesub64.0Bryce
45Ayoungsub55.0Betty
\n", 454 | "
" 455 | ], 456 | "text/plain": [ 457 | " id_x Name_x subject_id id_y Name_y\n", 458 | "0 1 Alex sub1 NaN NaN\n", 459 | "1 2 Amy sub2 1.0 Billy\n", 460 | "2 3 Allen sub4 2.0 Brian\n", 461 | "3 4 Alice sub6 4.0 Bryce\n", 462 | "4 5 Ayoung sub5 5.0 Betty" 463 | ] 464 | }, 465 | "execution_count": 6, 466 | "metadata": {}, 467 | "output_type": "execute_result" 468 | } 469 | ], 470 | "source": [ 471 | "# left join\n", 472 | "merge_left = pd.merge(left, right, on='subject_id', how='left')\n", 473 | "merge_left.head()" 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": 7, 479 | "metadata": {}, 480 | "outputs": [ 481 | { 482 | "data": { 483 | "text/html": [ 484 | "
\n", 485 | "\n", 498 | "\n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | "
id_xName_xsubject_idid_yName_y
02.0Amysub21Billy
13.0Allensub42Brian
24.0Alicesub64Bryce
35.0Ayoungsub55Betty
4NaNNaNsub33Bran
\n", 552 | "
" 553 | ], 554 | "text/plain": [ 555 | " id_x Name_x subject_id id_y Name_y\n", 556 | "0 2.0 Amy sub2 1 Billy\n", 557 | "1 3.0 Allen sub4 2 Brian\n", 558 | "2 4.0 Alice sub6 4 Bryce\n", 559 | "3 5.0 Ayoung sub5 5 Betty\n", 560 | "4 NaN NaN sub3 3 Bran" 561 | ] 562 | }, 563 | "execution_count": 7, 564 | "metadata": {}, 565 | "output_type": "execute_result" 566 | } 567 | ], 568 | "source": [ 569 | "# right join\n", 570 | "merge_right = pd.merge(left, right, on='subject_id', how='right')\n", 571 | "merge_right.head()" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": 8, 577 | "metadata": {}, 578 | "outputs": [ 579 | { 580 | "data": { 581 | "text/html": [ 582 | "
\n", 583 | "\n", 596 | "\n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | "
id_xName_xsubject_idid_yName_y
01.0Alexsub1NaNNaN
12.0Amysub21.0Billy
23.0Allensub42.0Brian
34.0Alicesub64.0Bryce
45.0Ayoungsub55.0Betty
\n", 650 | "
" 651 | ], 652 | "text/plain": [ 653 | " id_x Name_x subject_id id_y Name_y\n", 654 | "0 1.0 Alex sub1 NaN NaN\n", 655 | "1 2.0 Amy sub2 1.0 Billy\n", 656 | "2 3.0 Allen sub4 2.0 Brian\n", 657 | "3 4.0 Alice sub6 4.0 Bryce\n", 658 | "4 5.0 Ayoung sub5 5.0 Betty" 659 | ] 660 | }, 661 | "execution_count": 8, 662 | "metadata": {}, 663 | "output_type": "execute_result" 664 | } 665 | ], 666 | "source": [ 667 | "# outer join\n", 668 | "merge_outer = pd.merge(left, right, how='outer', on='subject_id')\n", 669 | "merge_outer.head()" 670 | ] 671 | }, 672 | { 673 | "cell_type": "code", 674 | "execution_count": 9, 675 | "metadata": {}, 676 | "outputs": [ 677 | { 678 | "data": { 679 | "text/html": [ 680 | "
\n", 681 | "\n", 694 | "\n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | "
id_xName_xsubject_idid_yName_y
02Amysub21Billy
13Allensub42Brian
24Alicesub64Bryce
35Ayoungsub55Betty
\n", 740 | "
" 741 | ], 742 | "text/plain": [ 743 | " id_x Name_x subject_id id_y Name_y\n", 744 | "0 2 Amy sub2 1 Billy\n", 745 | "1 3 Allen sub4 2 Brian\n", 746 | "2 4 Alice sub6 4 Bryce\n", 747 | "3 5 Ayoung sub5 5 Betty" 748 | ] 749 | }, 750 | "execution_count": 9, 751 | "metadata": {}, 752 | "output_type": "execute_result" 753 | } 754 | ], 755 | "source": [ 756 | "# inner join\n", 757 | "merge_inner = pd.merge(left, right, how='inner', on='subject_id')\n", 758 | "merge_inner.head()" 759 | ] 760 | }, 761 | { 762 | "cell_type": "code", 763 | "execution_count": 10, 764 | "metadata": { 765 | "scrolled": false 766 | }, 767 | "outputs": [ 768 | { 769 | "data": { 770 | "text/html": [ 771 | "" 772 | ], 773 | "text/plain": [ 774 | "" 775 | ] 776 | }, 777 | "execution_count": 10, 778 | "metadata": {}, 779 | "output_type": "execute_result" 780 | } 781 | ], 782 | "source": [ 783 | "from IPython.display import Image\n", 784 | "from IPython.core.display import HTML \n", 785 | "Image(url= \"https://data36.com/wp-content/uploads/2018/08/4-pandas-merge-inner-outer-left-right-1024x771.png\", width=600, height=400)" 786 | ] 787 | }, 788 | { 789 | "cell_type": "markdown", 790 | "metadata": {}, 791 | "source": [ 792 | "## Grouping Data" 793 | ] 794 | }, 795 | { 796 | "cell_type": "markdown", 797 | "metadata": {}, 798 | "source": [ 799 | "Grouping data sets is a frequent need in data analysis where we need the result in terms of various groups present in the data set" 800 | ] 801 | }, 802 | { 803 | "cell_type": "code", 804 | "execution_count": 11, 805 | "metadata": {}, 806 | "outputs": [ 807 | { 808 | "data": { 809 | "text/html": [ 810 | "
\n", 811 | "\n", 824 | "\n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | "
TeamRankYearPoints
0Riders12014876
1Riders22015789
2Devils22014863
3Devils32015673
4Kings32014741
5kings42015812
6Kings12016756
7Kings12017788
8Riders22016694
9Royals42014701
10Royals12015804
11Riders22017690
\n", 921 | "
" 922 | ], 923 | "text/plain": [ 924 | " Team Rank Year Points\n", 925 | "0 Riders 1 2014 876\n", 926 | "1 Riders 2 2015 789\n", 927 | "2 Devils 2 2014 863\n", 928 | "3 Devils 3 2015 673\n", 929 | "4 Kings 3 2014 741\n", 930 | "5 kings 4 2015 812\n", 931 | "6 Kings 1 2016 756\n", 932 | "7 Kings 1 2017 788\n", 933 | "8 Riders 2 2016 694\n", 934 | "9 Royals 4 2014 701\n", 935 | "10 Royals 1 2015 804\n", 936 | "11 Riders 2 2017 690" 937 | ] 938 | }, 939 | "execution_count": 11, 940 | "metadata": {}, 941 | "output_type": "execute_result" 942 | } 943 | ], 944 | "source": [ 945 | "ipl_data = {'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings',\n", 946 | " 'kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'],\n", 947 | " 'Rank': [1, 2, 2, 3, 3,4 ,1 ,1,2 , 4,1,2],\n", 948 | " 'Year': [2014,2015,2014,2015,2014,2015,2016,2017,2016,2014,2015,2017],\n", 949 | " 'Points':[876,789,863,673,741,812,756,788,694,701,804,690]}\n", 950 | "df = pd.DataFrame(ipl_data)\n", 951 | "df" 952 | ] 953 | }, 954 | { 955 | "cell_type": "code", 956 | "execution_count": 12, 957 | "metadata": {}, 958 | "outputs": [ 959 | { 960 | "name": "stdout", 961 | "output_type": "stream", 962 | "text": [ 963 | "2014\n", 964 | " Team Rank Year Points\n", 965 | "0 Riders 1 2014 876\n", 966 | "2 Devils 2 2014 863\n", 967 | "4 Kings 3 2014 741\n", 968 | "9 Royals 4 2014 701\n", 969 | "2015\n", 970 | " Team Rank Year Points\n", 971 | "1 Riders 2 2015 789\n", 972 | "3 Devils 3 2015 673\n", 973 | "5 kings 4 2015 812\n", 974 | "10 Royals 1 2015 804\n", 975 | "2016\n", 976 | " Team Rank Year Points\n", 977 | "6 Kings 1 2016 756\n", 978 | "8 Riders 2 2016 694\n", 979 | "2017\n", 980 | " Team Rank Year Points\n", 981 | "7 Kings 1 2017 788\n", 982 | "11 Riders 2 2017 690\n" 983 | ] 984 | } 985 | ], 986 | "source": [ 987 | "# group by one column\n", 988 | "grouped_1 = df.groupby('Year')\n", 989 | "for name,group in grouped_1:\n", 990 | " print(name)\n", 991 | " print(group)" 992 | ] 993 | }, 994 | { 995 | "cell_type": "code", 996 | "execution_count": 13, 997 | "metadata": {}, 998 | "outputs": [ 999 | { 1000 | "data": { 1001 | "text/html": [ 1002 | "
\n", 1003 | "\n", 1016 | "\n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | "
TeamRankYearPoints
TeamYear
Devils20142Devils22014863
20153Devils32015673
Kings20144Kings32014741
20166Kings12016756
20177Kings12017788
Riders20140Riders12014876
20151Riders22015789
20168Riders22016694
201711Riders22017690
Royals20149Royals42014701
201510Royals12015804
kings20155kings42015812
\n", 1141 | "
" 1142 | ], 1143 | "text/plain": [ 1144 | " Team Rank Year Points\n", 1145 | "Team Year \n", 1146 | "Devils 2014 2 Devils 2 2014 863\n", 1147 | " 2015 3 Devils 3 2015 673\n", 1148 | "Kings 2014 4 Kings 3 2014 741\n", 1149 | " 2016 6 Kings 1 2016 756\n", 1150 | " 2017 7 Kings 1 2017 788\n", 1151 | "Riders 2014 0 Riders 1 2014 876\n", 1152 | " 2015 1 Riders 2 2015 789\n", 1153 | " 2016 8 Riders 2 2016 694\n", 1154 | " 2017 11 Riders 2 2017 690\n", 1155 | "Royals 2014 9 Royals 4 2014 701\n", 1156 | " 2015 10 Royals 1 2015 804\n", 1157 | "kings 2015 5 kings 4 2015 812" 1158 | ] 1159 | }, 1160 | "execution_count": 13, 1161 | "metadata": {}, 1162 | "output_type": "execute_result" 1163 | } 1164 | ], 1165 | "source": [ 1166 | "# group by one column\n", 1167 | "grouped_2 = df.groupby(['Team','Year'])\n", 1168 | "grouped_2.apply(lambda a: a[:])" 1169 | ] 1170 | }, 1171 | { 1172 | "cell_type": "code", 1173 | "execution_count": 14, 1174 | "metadata": {}, 1175 | "outputs": [ 1176 | { 1177 | "name": "stdout", 1178 | "output_type": "stream", 1179 | "text": [ 1180 | "Team\n", 1181 | "Devils 768.000000\n", 1182 | "Kings 761.666667\n", 1183 | "Riders 762.250000\n", 1184 | "Royals 752.500000\n", 1185 | "kings 812.000000\n", 1186 | "Name: Points, dtype: float64\n" 1187 | ] 1188 | } 1189 | ], 1190 | "source": [ 1191 | "# Aggregations\n", 1192 | "grouped_a = df.groupby('Team')\n", 1193 | "print(grouped_a['Points'].agg(np.mean))" 1194 | ] 1195 | }, 1196 | { 1197 | "cell_type": "code", 1198 | "execution_count": 15, 1199 | "metadata": {}, 1200 | "outputs": [ 1201 | { 1202 | "name": "stdout", 1203 | "output_type": "stream", 1204 | "text": [ 1205 | " sum mean std\n", 1206 | "Team \n", 1207 | "Devils 1536 768.000000 134.350288\n", 1208 | "Kings 2285 761.666667 24.006943\n", 1209 | "Riders 3049 762.250000 88.567771\n", 1210 | "Royals 1505 752.500000 72.831998\n", 1211 | "kings 812 812.000000 NaN\n" 1212 | ] 1213 | } 1214 | ], 1215 | "source": [ 1216 | "# multiple Aggregations\n", 1217 | "\n", 1218 | "print(grouped_a['Points'].agg([np.sum, np.mean, np.std]))" 1219 | ] 1220 | }, 1221 | { 1222 | "cell_type": "code", 1223 | "execution_count": 16, 1224 | "metadata": {}, 1225 | "outputs": [ 1226 | { 1227 | "name": "stdout", 1228 | "output_type": "stream", 1229 | "text": [ 1230 | " Rank Year Points\n", 1231 | "0 -4.500000 -3.485685 3.852982\n", 1232 | "1 1.500000 -1.161895 0.906086\n", 1233 | "2 -2.121320 -2.121320 2.121320\n", 1234 | "3 2.121320 2.121320 -2.121320\n", 1235 | "4 3.464102 -3.273268 -2.582586\n", 1236 | "5 NaN NaN NaN\n", 1237 | "6 -1.732051 0.654654 -0.708128\n", 1238 | "7 -1.732051 2.618615 3.290715\n", 1239 | "8 1.500000 1.161895 -2.311789\n", 1240 | "9 2.121320 -2.121320 -2.121320\n", 1241 | "10 -2.121320 2.121320 2.121320\n", 1242 | "11 1.500000 3.485685 -2.447278\n" 1243 | ] 1244 | } 1245 | ], 1246 | "source": [ 1247 | "# Transformation\n", 1248 | "score = lambda x: (x - x.mean()) / x.std()*3\n", 1249 | "print(grouped_a.transform(score))" 1250 | ] 1251 | }, 1252 | { 1253 | "cell_type": "code", 1254 | "execution_count": 17, 1255 | "metadata": {}, 1256 | "outputs": [ 1257 | { 1258 | "name": "stdout", 1259 | "output_type": "stream", 1260 | "text": [ 1261 | " Team Rank Year Points\n", 1262 | "0 Riders 1 2014 876\n", 1263 | "1 Riders 2 2015 789\n", 1264 | "8 Riders 2 2016 694\n", 1265 | "11 Riders 2 2017 690\n" 1266 | ] 1267 | } 1268 | ], 1269 | "source": [ 1270 | "# Filtration\n", 1271 | "print(df.groupby('Team').filter(lambda x: len(x) >= 4))" 1272 | ] 1273 | }, 1274 | { 1275 | "cell_type": "markdown", 1276 | "metadata": {}, 1277 | "source": [ 1278 | "## Concatenating Data" 1279 | ] 1280 | }, 1281 | { 1282 | "cell_type": "code", 1283 | "execution_count": 18, 1284 | "metadata": {}, 1285 | "outputs": [], 1286 | "source": [ 1287 | "one = pd.DataFrame({\n", 1288 | " 'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],\n", 1289 | " 'subject_id':['sub1','sub2','sub4','sub6','sub5'],\n", 1290 | " 'Marks_scored':[98,90,87,69,78]},\n", 1291 | " index=[1,2,3,4,5])\n", 1292 | "two = pd.DataFrame({\n", 1293 | " 'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],\n", 1294 | " 'subject_id':['sub2','sub4','sub3','sub6','sub5'],\n", 1295 | " 'Marks_scored':[89,80,79,97,88]},\n", 1296 | " index=[1,2,3,4,5])" 1297 | ] 1298 | }, 1299 | { 1300 | "cell_type": "code", 1301 | "execution_count": 19, 1302 | "metadata": {}, 1303 | "outputs": [ 1304 | { 1305 | "data": { 1306 | "text/html": [ 1307 | "
\n", 1308 | "\n", 1321 | "\n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | "
Namesubject_idMarks_scored
1Alexsub198
2Amysub290
3Allensub487
4Alicesub669
5Ayoungsub578
\n", 1363 | "
" 1364 | ], 1365 | "text/plain": [ 1366 | " Name subject_id Marks_scored\n", 1367 | "1 Alex sub1 98\n", 1368 | "2 Amy sub2 90\n", 1369 | "3 Allen sub4 87\n", 1370 | "4 Alice sub6 69\n", 1371 | "5 Ayoung sub5 78" 1372 | ] 1373 | }, 1374 | "execution_count": 19, 1375 | "metadata": {}, 1376 | "output_type": "execute_result" 1377 | } 1378 | ], 1379 | "source": [ 1380 | "one" 1381 | ] 1382 | }, 1383 | { 1384 | "cell_type": "code", 1385 | "execution_count": 20, 1386 | "metadata": {}, 1387 | "outputs": [ 1388 | { 1389 | "data": { 1390 | "text/html": [ 1391 | "
\n", 1392 | "\n", 1405 | "\n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | "
Namesubject_idMarks_scored
1Billysub289
2Briansub480
3Bransub379
4Brycesub697
5Bettysub588
\n", 1447 | "
" 1448 | ], 1449 | "text/plain": [ 1450 | " Name subject_id Marks_scored\n", 1451 | "1 Billy sub2 89\n", 1452 | "2 Brian sub4 80\n", 1453 | "3 Bran sub3 79\n", 1454 | "4 Bryce sub6 97\n", 1455 | "5 Betty sub5 88" 1456 | ] 1457 | }, 1458 | "execution_count": 20, 1459 | "metadata": {}, 1460 | "output_type": "execute_result" 1461 | } 1462 | ], 1463 | "source": [ 1464 | "two" 1465 | ] 1466 | }, 1467 | { 1468 | "cell_type": "code", 1469 | "execution_count": 21, 1470 | "metadata": {}, 1471 | "outputs": [ 1472 | { 1473 | "name": "stdout", 1474 | "output_type": "stream", 1475 | "text": [ 1476 | " Name subject_id Marks_scored\n", 1477 | "1 Alex sub1 98\n", 1478 | "2 Amy sub2 90\n", 1479 | "3 Allen sub4 87\n", 1480 | "4 Alice sub6 69\n", 1481 | "5 Ayoung sub5 78\n", 1482 | "1 Billy sub2 89\n", 1483 | "2 Brian sub4 80\n", 1484 | "3 Bran sub3 79\n", 1485 | "4 Bryce sub6 97\n", 1486 | "5 Betty sub5 88\n" 1487 | ] 1488 | } 1489 | ], 1490 | "source": [ 1491 | "print(pd.concat([one,two]))" 1492 | ] 1493 | }, 1494 | { 1495 | "cell_type": "code", 1496 | "execution_count": 22, 1497 | "metadata": {}, 1498 | "outputs": [ 1499 | { 1500 | "name": "stdout", 1501 | "output_type": "stream", 1502 | "text": [ 1503 | " Name subject_id Marks_scored Name subject_id Marks_scored\n", 1504 | "1 Alex sub1 98 Billy sub2 89\n", 1505 | "2 Amy sub2 90 Brian sub4 80\n", 1506 | "3 Allen sub4 87 Bran sub3 79\n", 1507 | "4 Alice sub6 69 Bryce sub6 97\n", 1508 | "5 Ayoung sub5 78 Betty sub5 88\n" 1509 | ] 1510 | } 1511 | ], 1512 | "source": [ 1513 | "#concat along axis=1, columns\n", 1514 | "print(pd.concat([one,two],axis=1))" 1515 | ] 1516 | }, 1517 | { 1518 | "cell_type": "code", 1519 | "execution_count": 23, 1520 | "metadata": {}, 1521 | "outputs": [ 1522 | { 1523 | "name": "stdout", 1524 | "output_type": "stream", 1525 | "text": [ 1526 | " Name subject_id Marks_scored\n", 1527 | "1 Alex sub1 98\n", 1528 | "2 Amy sub2 90\n", 1529 | "3 Allen sub4 87\n", 1530 | "4 Alice sub6 69\n", 1531 | "5 Ayoung sub5 78\n", 1532 | "1 Billy sub2 89\n", 1533 | "2 Brian sub4 80\n", 1534 | "3 Bran sub3 79\n", 1535 | "4 Bryce sub6 97\n", 1536 | "5 Betty sub5 88\n" 1537 | ] 1538 | } 1539 | ], 1540 | "source": [ 1541 | "# append\n", 1542 | "print(one.append(two))" 1543 | ] 1544 | }, 1545 | { 1546 | "cell_type": "markdown", 1547 | "metadata": {}, 1548 | "source": [ 1549 | "Reference:-\n", 1550 | "- [Tutorial Point - Merging](https://www.tutorialspoint.com/python_pandas/python_pandas_merging_joining.htm)\n", 1551 | "- [Tutorial Point - Grouping](https://www.tutorialspoint.com/python_pandas/python_pandas_groupby.htm)\n", 1552 | "- [Tutorial Point - concatenation](https://www.tutorialspoint.com/python_pandas/python_pandas_concatenation.htm)" 1553 | ] 1554 | }, 1555 | { 1556 | "cell_type": "markdown", 1557 | "metadata": {}, 1558 | "source": [ 1559 | "# PySpark" 1560 | ] 1561 | }, 1562 | { 1563 | "cell_type": "markdown", 1564 | "metadata": {}, 1565 | "source": [ 1566 | "Data sets used for this demonstration is log datasets from NASA Kennedy Space Center web server in Florida\n", 1567 | "- [Part-1](ftp://ita.ee.lbl.gov/traces/NASA_access_log_Jul95.gz)\n", 1568 | "- [Part-2](ftp://ita.ee.lbl.gov/traces/NASA_access_log_Aug95.gz)\n", 1569 | "\n", 1570 | "Make sure both the files are in the same directory as this notebook." 1571 | ] 1572 | }, 1573 | { 1574 | "cell_type": "code", 1575 | "execution_count": 24, 1576 | "metadata": {}, 1577 | "outputs": [], 1578 | "source": [ 1579 | "from pyspark.context import SparkContext\n", 1580 | "from pyspark.sql.context import SQLContext\n", 1581 | "from pyspark.sql.session import SparkSession\n", 1582 | " \n", 1583 | "sc = SparkContext()\n", 1584 | "sqlContext = SQLContext(sc)\n", 1585 | "spark = SparkSession(sc)" 1586 | ] 1587 | }, 1588 | { 1589 | "cell_type": "code", 1590 | "execution_count": 25, 1591 | "metadata": {}, 1592 | "outputs": [ 1593 | { 1594 | "data": { 1595 | "text/html": [ 1596 | "\n", 1597 | "
\n", 1598 | "

SparkSession - in-memory

\n", 1599 | " \n", 1600 | "
\n", 1601 | "

SparkContext

\n", 1602 | "\n", 1603 | "

Spark UI

\n", 1604 | "\n", 1605 | "
\n", 1606 | "
Version
\n", 1607 | "
v2.4.5
\n", 1608 | "
Master
\n", 1609 | "
local[*]
\n", 1610 | "
AppName
\n", 1611 | "
pyspark-shell
\n", 1612 | "
\n", 1613 | "
\n", 1614 | " \n", 1615 | "
\n", 1616 | " " 1617 | ], 1618 | "text/plain": [ 1619 | "" 1620 | ] 1621 | }, 1622 | "execution_count": 25, 1623 | "metadata": {}, 1624 | "output_type": "execute_result" 1625 | } 1626 | ], 1627 | "source": [ 1628 | "spark" 1629 | ] 1630 | }, 1631 | { 1632 | "cell_type": "code", 1633 | "execution_count": 26, 1634 | "metadata": {}, 1635 | "outputs": [ 1636 | { 1637 | "data": { 1638 | "text/plain": [ 1639 | "" 1640 | ] 1641 | }, 1642 | "execution_count": 26, 1643 | "metadata": {}, 1644 | "output_type": "execute_result" 1645 | } 1646 | ], 1647 | "source": [ 1648 | "sqlContext" 1649 | ] 1650 | }, 1651 | { 1652 | "cell_type": "code", 1653 | "execution_count": 27, 1654 | "metadata": {}, 1655 | "outputs": [], 1656 | "source": [ 1657 | "import re" 1658 | ] 1659 | }, 1660 | { 1661 | "cell_type": "markdown", 1662 | "metadata": {}, 1663 | "source": [ 1664 | "- [Regex](https://www.w3schools.com/python/python_regex.asp)\n", 1665 | "- [FindIter](https://www.tutorialspoint.com/How-do-we-use-re-finditer-method-in-Python-regular-expression)" 1666 | ] 1667 | }, 1668 | { 1669 | "cell_type": "code", 1670 | "execution_count": 28, 1671 | "metadata": {}, 1672 | "outputs": [ 1673 | { 1674 | "name": "stdout", 1675 | "output_type": "stream", 1676 | "text": [ 1677 | " String match \"spark\" at 19:24\n", 1678 | " String match \"spark\" at 30:35\n" 1679 | ] 1680 | } 1681 | ], 1682 | "source": [ 1683 | "# python example of how regular expressions can be used\n", 1684 | "s1 = 'Im searching for a spark in Pyspark'\n", 1685 | "pattern = 'spark'\n", 1686 | "for match in re.finditer(pattern, s1):\n", 1687 | " s = match.start()\n", 1688 | " e = match.end()\n", 1689 | " print(match, 'String match \"%s\" at %d:%d' % (s1[s:e], s, e))" 1690 | ] 1691 | }, 1692 | { 1693 | "cell_type": "code", 1694 | "execution_count": 29, 1695 | "metadata": {}, 1696 | "outputs": [ 1697 | { 1698 | "data": { 1699 | "text/plain": [ 1700 | "['NASA_access_log_Aug95.gz', 'NASA_access_log_Jul95.gz']" 1701 | ] 1702 | }, 1703 | "execution_count": 29, 1704 | "metadata": {}, 1705 | "output_type": "execute_result" 1706 | } 1707 | ], 1708 | "source": [ 1709 | "# Load and View the Dataset using `sqlContext.read.text()` or `spark.read.text()`\n", 1710 | "import glob\n", 1711 | "\n", 1712 | "raw_data_files = glob.glob('*.gz')\n", 1713 | "raw_data_files" 1714 | ] 1715 | }, 1716 | { 1717 | "cell_type": "code", 1718 | "execution_count": 30, 1719 | "metadata": {}, 1720 | "outputs": [ 1721 | { 1722 | "name": "stdout", 1723 | "output_type": "stream", 1724 | "text": [ 1725 | "root\n", 1726 | " |-- value: string (nullable = true)\n", 1727 | "\n" 1728 | ] 1729 | } 1730 | ], 1731 | "source": [ 1732 | "base_df = spark.read.text(raw_data_files)\n", 1733 | "base_df.printSchema()" 1734 | ] 1735 | }, 1736 | { 1737 | "cell_type": "code", 1738 | "execution_count": 31, 1739 | "metadata": {}, 1740 | "outputs": [ 1741 | { 1742 | "data": { 1743 | "text/plain": [ 1744 | "pyspark.sql.dataframe.DataFrame" 1745 | ] 1746 | }, 1747 | "execution_count": 31, 1748 | "metadata": {}, 1749 | "output_type": "execute_result" 1750 | } 1751 | ], 1752 | "source": [ 1753 | "type(base_df)" 1754 | ] 1755 | }, 1756 | { 1757 | "cell_type": "markdown", 1758 | "metadata": {}, 1759 | "source": [ 1760 | "Converting data frame to RDD - Just to show the original data structure of spark\n", 1761 | "\n", 1762 | "[What is RDD?](https://databricks.com/glossary/what-is-rdd#:~:text=RDD%20was%20the%20primary%20user,that%20offers%20transformations%20and%20actions.)" 1763 | ] 1764 | }, 1765 | { 1766 | "cell_type": "code", 1767 | "execution_count": 32, 1768 | "metadata": {}, 1769 | "outputs": [ 1770 | { 1771 | "data": { 1772 | "text/plain": [ 1773 | "pyspark.rdd.RDD" 1774 | ] 1775 | }, 1776 | "execution_count": 32, 1777 | "metadata": {}, 1778 | "output_type": "execute_result" 1779 | } 1780 | ], 1781 | "source": [ 1782 | "base_df_rdd = base_df.rdd\n", 1783 | "type(base_df_rdd)" 1784 | ] 1785 | }, 1786 | { 1787 | "cell_type": "code", 1788 | "execution_count": 33, 1789 | "metadata": {}, 1790 | "outputs": [ 1791 | { 1792 | "name": "stdout", 1793 | "output_type": "stream", 1794 | "text": [ 1795 | "+-----------------------------------------------------------------------------------------------------------------------+\n", 1796 | "|value |\n", 1797 | "+-----------------------------------------------------------------------------------------------------------------------+\n", 1798 | "|199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] \"GET /history/apollo/ HTTP/1.0\" 200 6245 |\n", 1799 | "|unicomp6.unicomp.net - - [01/Jul/1995:00:00:06 -0400] \"GET /shuttle/countdown/ HTTP/1.0\" 200 3985 |\n", 1800 | "|199.120.110.21 - - [01/Jul/1995:00:00:09 -0400] \"GET /shuttle/missions/sts-73/mission-sts-73.html HTTP/1.0\" 200 4085 |\n", 1801 | "|burger.letters.com - - [01/Jul/1995:00:00:11 -0400] \"GET /shuttle/countdown/liftoff.html HTTP/1.0\" 304 0 |\n", 1802 | "|199.120.110.21 - - [01/Jul/1995:00:00:11 -0400] \"GET /shuttle/missions/sts-73/sts-73-patch-small.gif HTTP/1.0\" 200 4179|\n", 1803 | "|burger.letters.com - - [01/Jul/1995:00:00:12 -0400] \"GET /images/NASA-logosmall.gif HTTP/1.0\" 304 0 |\n", 1804 | "|burger.letters.com - - [01/Jul/1995:00:00:12 -0400] \"GET /shuttle/countdown/video/livevideo.gif HTTP/1.0\" 200 0 |\n", 1805 | "|205.212.115.106 - - [01/Jul/1995:00:00:12 -0400] \"GET /shuttle/countdown/countdown.html HTTP/1.0\" 200 3985 |\n", 1806 | "|d104.aa.net - - [01/Jul/1995:00:00:13 -0400] \"GET /shuttle/countdown/ HTTP/1.0\" 200 3985 |\n", 1807 | "|129.94.144.152 - - [01/Jul/1995:00:00:13 -0400] \"GET / HTTP/1.0\" 200 7074 |\n", 1808 | "+-----------------------------------------------------------------------------------------------------------------------+\n", 1809 | "only showing top 10 rows\n", 1810 | "\n" 1811 | ] 1812 | } 1813 | ], 1814 | "source": [ 1815 | "#view data\n", 1816 | "base_df.show(10, truncate=False)" 1817 | ] 1818 | }, 1819 | { 1820 | "cell_type": "code", 1821 | "execution_count": 34, 1822 | "metadata": {}, 1823 | "outputs": [ 1824 | { 1825 | "data": { 1826 | "text/plain": [ 1827 | "[Row(value='199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] \"GET /history/apollo/ HTTP/1.0\" 200 6245'),\n", 1828 | " Row(value='unicomp6.unicomp.net - - [01/Jul/1995:00:00:06 -0400] \"GET /shuttle/countdown/ HTTP/1.0\" 200 3985'),\n", 1829 | " Row(value='199.120.110.21 - - [01/Jul/1995:00:00:09 -0400] \"GET /shuttle/missions/sts-73/mission-sts-73.html HTTP/1.0\" 200 4085'),\n", 1830 | " Row(value='burger.letters.com - - [01/Jul/1995:00:00:11 -0400] \"GET /shuttle/countdown/liftoff.html HTTP/1.0\" 304 0'),\n", 1831 | " Row(value='199.120.110.21 - - [01/Jul/1995:00:00:11 -0400] \"GET /shuttle/missions/sts-73/sts-73-patch-small.gif HTTP/1.0\" 200 4179'),\n", 1832 | " Row(value='burger.letters.com - - [01/Jul/1995:00:00:12 -0400] \"GET /images/NASA-logosmall.gif HTTP/1.0\" 304 0'),\n", 1833 | " Row(value='burger.letters.com - - [01/Jul/1995:00:00:12 -0400] \"GET /shuttle/countdown/video/livevideo.gif HTTP/1.0\" 200 0'),\n", 1834 | " Row(value='205.212.115.106 - - [01/Jul/1995:00:00:12 -0400] \"GET /shuttle/countdown/countdown.html HTTP/1.0\" 200 3985'),\n", 1835 | " Row(value='d104.aa.net - - [01/Jul/1995:00:00:13 -0400] \"GET /shuttle/countdown/ HTTP/1.0\" 200 3985'),\n", 1836 | " Row(value='129.94.144.152 - - [01/Jul/1995:00:00:13 -0400] \"GET / HTTP/1.0\" 200 7074')]" 1837 | ] 1838 | }, 1839 | "execution_count": 34, 1840 | "metadata": {}, 1841 | "output_type": "execute_result" 1842 | } 1843 | ], 1844 | "source": [ 1845 | "base_df_rdd.take(10)" 1846 | ] 1847 | }, 1848 | { 1849 | "cell_type": "markdown", 1850 | "metadata": {}, 1851 | "source": [ 1852 | "Above log data [which is similar to web server logs](https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format) is clearly semi structured and needs some processing and wrangling to make it useful for modelling purpose\n", 1853 | "\n", 1854 | "Let's clean and parse our log dataset to extract structured attributes with meaningful information from each log message." 1855 | ] 1856 | }, 1857 | { 1858 | "cell_type": "markdown", 1859 | "metadata": {}, 1860 | "source": [ 1861 | "Data has to be parsed into individual columns. Special built-in [regexp\\_extract()](http://spark.apache.org/docs/latest/api/python/pyspark.sql.html#pyspark.sql.functions.regexp_extract)\n", 1862 | "function can do the parsing. This function matches a column against a regular expression with one or more [capture groups](http://regexone.com/lesson/capturing_groups) and allows extraction of the matched groups. One regular expression is used for each field to extract" 1863 | ] 1864 | }, 1865 | { 1866 | "cell_type": "code", 1867 | "execution_count": 35, 1868 | "metadata": {}, 1869 | "outputs": [ 1870 | { 1871 | "name": "stdout", 1872 | "output_type": "stream", 1873 | "text": [ 1874 | "(3461613, 1)\n" 1875 | ] 1876 | } 1877 | ], 1878 | "source": [ 1879 | "# look at our dataset dimensions\n", 1880 | "#print((base_df.count(), len(base_df.columns)))\n", 1881 | "print(\"(3461613, 1)\")" 1882 | ] 1883 | }, 1884 | { 1885 | "cell_type": "code", 1886 | "execution_count": 36, 1887 | "metadata": {}, 1888 | "outputs": [ 1889 | { 1890 | "data": { 1891 | "text/plain": [ 1892 | "['199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] \"GET /history/apollo/ HTTP/1.0\" 200 6245',\n", 1893 | " 'unicomp6.unicomp.net - - [01/Jul/1995:00:00:06 -0400] \"GET /shuttle/countdown/ HTTP/1.0\" 200 3985',\n", 1894 | " '199.120.110.21 - - [01/Jul/1995:00:00:09 -0400] \"GET /shuttle/missions/sts-73/mission-sts-73.html HTTP/1.0\" 200 4085',\n", 1895 | " 'burger.letters.com - - [01/Jul/1995:00:00:11 -0400] \"GET /shuttle/countdown/liftoff.html HTTP/1.0\" 304 0',\n", 1896 | " '199.120.110.21 - - [01/Jul/1995:00:00:11 -0400] \"GET /shuttle/missions/sts-73/sts-73-patch-small.gif HTTP/1.0\" 200 4179',\n", 1897 | " 'burger.letters.com - - [01/Jul/1995:00:00:12 -0400] \"GET /images/NASA-logosmall.gif HTTP/1.0\" 304 0',\n", 1898 | " 'burger.letters.com - - [01/Jul/1995:00:00:12 -0400] \"GET /shuttle/countdown/video/livevideo.gif HTTP/1.0\" 200 0',\n", 1899 | " '205.212.115.106 - - [01/Jul/1995:00:00:12 -0400] \"GET /shuttle/countdown/countdown.html HTTP/1.0\" 200 3985',\n", 1900 | " 'd104.aa.net - - [01/Jul/1995:00:00:13 -0400] \"GET /shuttle/countdown/ HTTP/1.0\" 200 3985',\n", 1901 | " '129.94.144.152 - - [01/Jul/1995:00:00:13 -0400] \"GET / HTTP/1.0\" 200 7074',\n", 1902 | " 'unicomp6.unicomp.net - - [01/Jul/1995:00:00:14 -0400] \"GET /shuttle/countdown/count.gif HTTP/1.0\" 200 40310',\n", 1903 | " 'unicomp6.unicomp.net - - [01/Jul/1995:00:00:14 -0400] \"GET /images/NASA-logosmall.gif HTTP/1.0\" 200 786',\n", 1904 | " 'unicomp6.unicomp.net - - [01/Jul/1995:00:00:14 -0400] \"GET /images/KSC-logosmall.gif HTTP/1.0\" 200 1204',\n", 1905 | " 'd104.aa.net - - [01/Jul/1995:00:00:15 -0400] \"GET /shuttle/countdown/count.gif HTTP/1.0\" 200 40310',\n", 1906 | " 'd104.aa.net - - [01/Jul/1995:00:00:15 -0400] \"GET /images/NASA-logosmall.gif HTTP/1.0\" 200 786']" 1907 | ] 1908 | }, 1909 | "execution_count": 36, 1910 | "metadata": {}, 1911 | "output_type": "execute_result" 1912 | } 1913 | ], 1914 | "source": [ 1915 | "sample_logs = [item['value'] for item in base_df.take(15)]\n", 1916 | "sample_logs" 1917 | ] 1918 | }, 1919 | { 1920 | "cell_type": "code", 1921 | "execution_count": 37, 1922 | "metadata": {}, 1923 | "outputs": [ 1924 | { 1925 | "data": { 1926 | "text/plain": [ 1927 | "['199.72.81.55',\n", 1928 | " 'unicomp6.unicomp.net',\n", 1929 | " '199.120.110.21',\n", 1930 | " 'burger.letters.com',\n", 1931 | " '199.120.110.21',\n", 1932 | " 'burger.letters.com',\n", 1933 | " 'burger.letters.com',\n", 1934 | " '205.212.115.106',\n", 1935 | " 'd104.aa.net',\n", 1936 | " '129.94.144.152',\n", 1937 | " 'unicomp6.unicomp.net',\n", 1938 | " 'unicomp6.unicomp.net',\n", 1939 | " 'unicomp6.unicomp.net',\n", 1940 | " 'd104.aa.net',\n", 1941 | " 'd104.aa.net']" 1942 | ] 1943 | }, 1944 | "execution_count": 37, 1945 | "metadata": {}, 1946 | "output_type": "execute_result" 1947 | } 1948 | ], 1949 | "source": [ 1950 | "# Extracting hostnames\n", 1951 | "# Regular expressions to extract the hostname from the logs:\n", 1952 | "host_pattern = r'(^\\S+\\.[\\S+\\.]+\\S+)\\s'\n", 1953 | "hosts = [re.search(host_pattern, item).group(1)\n", 1954 | " if re.search(host_pattern, item)\n", 1955 | " else 'no match'\n", 1956 | " for item in sample_logs]\n", 1957 | "hosts" 1958 | ] 1959 | }, 1960 | { 1961 | "cell_type": "code", 1962 | "execution_count": 38, 1963 | "metadata": {}, 1964 | "outputs": [ 1965 | { 1966 | "data": { 1967 | "text/plain": [ 1968 | "['01/Jul/1995:00:00:01 -0400',\n", 1969 | " '01/Jul/1995:00:00:06 -0400',\n", 1970 | " '01/Jul/1995:00:00:09 -0400',\n", 1971 | " '01/Jul/1995:00:00:11 -0400',\n", 1972 | " '01/Jul/1995:00:00:11 -0400',\n", 1973 | " '01/Jul/1995:00:00:12 -0400',\n", 1974 | " '01/Jul/1995:00:00:12 -0400',\n", 1975 | " '01/Jul/1995:00:00:12 -0400',\n", 1976 | " '01/Jul/1995:00:00:13 -0400',\n", 1977 | " '01/Jul/1995:00:00:13 -0400',\n", 1978 | " '01/Jul/1995:00:00:14 -0400',\n", 1979 | " '01/Jul/1995:00:00:14 -0400',\n", 1980 | " '01/Jul/1995:00:00:14 -0400',\n", 1981 | " '01/Jul/1995:00:00:15 -0400',\n", 1982 | " '01/Jul/1995:00:00:15 -0400']" 1983 | ] 1984 | }, 1985 | "execution_count": 38, 1986 | "metadata": {}, 1987 | "output_type": "execute_result" 1988 | } 1989 | ], 1990 | "source": [ 1991 | "# Extracting timestamps\n", 1992 | "ts_pattern = r'\\[(\\d{2}/\\w{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} -\\d{4})]'\n", 1993 | "timestamps = [re.search(ts_pattern, item).group(1) for item in sample_logs]\n", 1994 | "timestamps" 1995 | ] 1996 | }, 1997 | { 1998 | "cell_type": "code", 1999 | "execution_count": 39, 2000 | "metadata": {}, 2001 | "outputs": [ 2002 | { 2003 | "data": { 2004 | "text/plain": [ 2005 | "[('GET', '/history/apollo/', 'HTTP/1.0'),\n", 2006 | " ('GET', '/shuttle/countdown/', 'HTTP/1.0'),\n", 2007 | " ('GET', '/shuttle/missions/sts-73/mission-sts-73.html', 'HTTP/1.0'),\n", 2008 | " ('GET', '/shuttle/countdown/liftoff.html', 'HTTP/1.0'),\n", 2009 | " ('GET', '/shuttle/missions/sts-73/sts-73-patch-small.gif', 'HTTP/1.0'),\n", 2010 | " ('GET', '/images/NASA-logosmall.gif', 'HTTP/1.0'),\n", 2011 | " ('GET', '/shuttle/countdown/video/livevideo.gif', 'HTTP/1.0'),\n", 2012 | " ('GET', '/shuttle/countdown/countdown.html', 'HTTP/1.0'),\n", 2013 | " ('GET', '/shuttle/countdown/', 'HTTP/1.0'),\n", 2014 | " ('GET', '/', 'HTTP/1.0'),\n", 2015 | " ('GET', '/shuttle/countdown/count.gif', 'HTTP/1.0'),\n", 2016 | " ('GET', '/images/NASA-logosmall.gif', 'HTTP/1.0'),\n", 2017 | " ('GET', '/images/KSC-logosmall.gif', 'HTTP/1.0'),\n", 2018 | " ('GET', '/shuttle/countdown/count.gif', 'HTTP/1.0'),\n", 2019 | " ('GET', '/images/NASA-logosmall.gif', 'HTTP/1.0')]" 2020 | ] 2021 | }, 2022 | "execution_count": 39, 2023 | "metadata": {}, 2024 | "output_type": "execute_result" 2025 | } 2026 | ], 2027 | "source": [ 2028 | "# Extracting HTTP request method, URIs, and protocol\n", 2029 | "\n", 2030 | "method_uri_protocol_pattern = r'\\\"(\\S+)\\s(\\S+)\\s*(\\S*)\\\"'\n", 2031 | "method_uri_protocol = [re.search(method_uri_protocol_pattern, item).groups()\n", 2032 | " if re.search(method_uri_protocol_pattern, item)\n", 2033 | " else 'no match'\n", 2034 | " for item in sample_logs]\n", 2035 | "method_uri_protocol" 2036 | ] 2037 | }, 2038 | { 2039 | "cell_type": "code", 2040 | "execution_count": 40, 2041 | "metadata": {}, 2042 | "outputs": [ 2043 | { 2044 | "name": "stdout", 2045 | "output_type": "stream", 2046 | "text": [ 2047 | "['200', '200', '200', '304', '200', '304', '200', '200', '200', '200', '200', '200', '200', '200', '200']\n" 2048 | ] 2049 | } 2050 | ], 2051 | "source": [ 2052 | "# Extracting HTTP status codes\n", 2053 | "\n", 2054 | "status_pattern = r'\\s(\\d{3})\\s'\n", 2055 | "status = [re.search(status_pattern, item).group(1) for item in sample_logs]\n", 2056 | "print(status)" 2057 | ] 2058 | }, 2059 | { 2060 | "cell_type": "code", 2061 | "execution_count": 41, 2062 | "metadata": {}, 2063 | "outputs": [ 2064 | { 2065 | "name": "stdout", 2066 | "output_type": "stream", 2067 | "text": [ 2068 | "['6245', '3985', '4085', '0', '4179', '0', '0', '3985', '3985', '7074', '40310', '786', '1204', '40310', '786']\n" 2069 | ] 2070 | } 2071 | ], 2072 | "source": [ 2073 | "# Extracting HTTP response content size\n", 2074 | "\n", 2075 | "content_size_pattern = r'\\s(\\d+)$'\n", 2076 | "content_size = [re.search(content_size_pattern, item).group(1) for item in sample_logs]\n", 2077 | "print(content_size)" 2078 | ] 2079 | }, 2080 | { 2081 | "cell_type": "code", 2082 | "execution_count": 42, 2083 | "metadata": { 2084 | "scrolled": true 2085 | }, 2086 | "outputs": [ 2087 | { 2088 | "name": "stdout", 2089 | "output_type": "stream", 2090 | "text": [ 2091 | "+--------------------+--------------------+------+--------------------+--------+------+------------+\n", 2092 | "| host| timestamp|method| endpoint|protocol|status|content_size|\n", 2093 | "+--------------------+--------------------+------+--------------------+--------+------+------------+\n", 2094 | "| 199.72.81.55|01/Jul/1995:00:00...| GET| /history/apollo/|HTTP/1.0| 200| 6245|\n", 2095 | "|unicomp6.unicomp.net|01/Jul/1995:00:00...| GET| /shuttle/countdown/|HTTP/1.0| 200| 3985|\n", 2096 | "| 199.120.110.21|01/Jul/1995:00:00...| GET|/shuttle/missions...|HTTP/1.0| 200| 4085|\n", 2097 | "| burger.letters.com|01/Jul/1995:00:00...| GET|/shuttle/countdow...|HTTP/1.0| 304| 0|\n", 2098 | "| 199.120.110.21|01/Jul/1995:00:00...| GET|/shuttle/missions...|HTTP/1.0| 200| 4179|\n", 2099 | "| burger.letters.com|01/Jul/1995:00:00...| GET|/images/NASA-logo...|HTTP/1.0| 304| 0|\n", 2100 | "| burger.letters.com|01/Jul/1995:00:00...| GET|/shuttle/countdow...|HTTP/1.0| 200| 0|\n", 2101 | "| 205.212.115.106|01/Jul/1995:00:00...| GET|/shuttle/countdow...|HTTP/1.0| 200| 3985|\n", 2102 | "| d104.aa.net|01/Jul/1995:00:00...| GET| /shuttle/countdown/|HTTP/1.0| 200| 3985|\n", 2103 | "| 129.94.144.152|01/Jul/1995:00:00...| GET| /|HTTP/1.0| 200| 7074|\n", 2104 | "+--------------------+--------------------+------+--------------------+--------+------+------------+\n", 2105 | "only showing top 10 rows\n", 2106 | "\n" 2107 | ] 2108 | } 2109 | ], 2110 | "source": [ 2111 | "# Putting it all together\n", 2112 | "\n", 2113 | "# regexp_extract(...) method to build our DataFrame with all of the log attributes neatly extracted in their own separate columns.\n", 2114 | "\n", 2115 | "from pyspark.sql.functions import regexp_extract\n", 2116 | "\n", 2117 | "logs_df = base_df.select(regexp_extract('value', host_pattern, 1).alias('host'),\n", 2118 | " regexp_extract('value', ts_pattern, 1).alias('timestamp'),\n", 2119 | " regexp_extract('value', method_uri_protocol_pattern, 1).alias('method'),\n", 2120 | " regexp_extract('value', method_uri_protocol_pattern, 2).alias('endpoint'),\n", 2121 | " regexp_extract('value', method_uri_protocol_pattern, 3).alias('protocol'),\n", 2122 | " regexp_extract('value', status_pattern, 1).cast('integer').alias('status'),\n", 2123 | " regexp_extract('value', content_size_pattern, 1).cast('integer').alias('content_size'))\n", 2124 | "logs_df.show(10, truncate=True)\n" 2125 | ] 2126 | }, 2127 | { 2128 | "cell_type": "markdown", 2129 | "metadata": {}, 2130 | "source": [ 2131 | "## Reference:-\n", 2132 | "\n", 2133 | " - To know more and wrangle data via Pyspark refer this [link](https://opensource.com/article/19/5/log-data-apache-spark)" 2134 | ] 2135 | }, 2136 | { 2137 | "cell_type": "code", 2138 | "execution_count": null, 2139 | "metadata": {}, 2140 | "outputs": [], 2141 | "source": [] 2142 | } 2143 | ], 2144 | "metadata": { 2145 | "kernelspec": { 2146 | "display_name": "Python 3", 2147 | "language": "python", 2148 | "name": "python3" 2149 | }, 2150 | "language_info": { 2151 | "codemirror_mode": { 2152 | "name": "ipython", 2153 | "version": 3 2154 | }, 2155 | "file_extension": ".py", 2156 | "mimetype": "text/x-python", 2157 | "name": "python", 2158 | "nbconvert_exporter": "python", 2159 | "pygments_lexer": "ipython3", 2160 | "version": "3.7.4" 2161 | } 2162 | }, 2163 | "nbformat": 4, 2164 | "nbformat_minor": 2 2165 | } 2166 | -------------------------------------------------------------------------------- /List Comprehensions_Python.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/List Comprehensions_Python.pdf -------------------------------------------------------------------------------- /Pandas Crosstab vs Pivot table.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "af387817", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# import libraries\n", 11 | "import pandas as pd\n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "id": "f8a9e062", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "#Import dataset\n", 23 | "df = pd.read_csv('sample data.csv')" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "id": "ef83769a", 29 | "metadata": {}, 30 | "source": [ 31 | "# Crosstab\n", 32 | "\n", 33 | "`pd.crosstab()` - It is used to get an initial view of the data. It is a tabular structure showing relationship between various variables\n", 34 | "\n", 35 | "Here you can quickly see the percentage of loan defaulters w.r.t loan grade" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "id": "1b57fd9b", 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/html": [ 47 | "
\n", 48 | "\n", 61 | "\n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | "
Loan Status01
Grade
A0.9034970.096503
B0.9049520.095048
C0.9053550.094645
D0.9071360.092864
E0.9022240.097776
F0.9149660.085034
G0.8849210.115079
All0.9049910.095009
\n", 117 | "
" 118 | ], 119 | "text/plain": [ 120 | "Loan Status 0 1\n", 121 | "Grade \n", 122 | "A 0.903497 0.096503\n", 123 | "B 0.904952 0.095048\n", 124 | "C 0.905355 0.094645\n", 125 | "D 0.907136 0.092864\n", 126 | "E 0.902224 0.097776\n", 127 | "F 0.914966 0.085034\n", 128 | "G 0.884921 0.115079\n", 129 | "All 0.904991 0.095009" 130 | ] 131 | }, 132 | "execution_count": 3, 133 | "metadata": {}, 134 | "output_type": "execute_result" 135 | } 136 | ], 137 | "source": [ 138 | "pd.crosstab(index=df['Grade'], columns=df['Loan Status'], margins=True, normalize = 'index')" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "id": "eb6dd7da", 144 | "metadata": {}, 145 | "source": [ 146 | "# Pivot Table\n", 147 | "\n", 148 | "Similar to excel spreadsheet pivot functionality. Data can be visualized at various hierarchy of rows and columns. It lets you calculate, summarize and aggregate your data\n", 149 | "\n", 150 | "Here you can see the aggregation of loan amount (mean) based on loan status and grade" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 32, 156 | "id": "eb14e4ce", 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/html": [ 162 | "
\n", 163 | "\n", 176 | "\n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | "
Loan Amount
Loan StatusGrade
0A16252.193081
B16615.844804
C16059.379174
D16594.997490
E16908.963026
F16920.323420
G16745.901345
1A15789.390688
B16442.690789
C15737.199478
D16458.419619
E16335.898039
F17092.786667
G18612.172414
\n", 250 | "
" 251 | ], 252 | "text/plain": [ 253 | " Loan Amount\n", 254 | "Loan Status Grade \n", 255 | "0 A 16252.193081\n", 256 | " B 16615.844804\n", 257 | " C 16059.379174\n", 258 | " D 16594.997490\n", 259 | " E 16908.963026\n", 260 | " F 16920.323420\n", 261 | " G 16745.901345\n", 262 | "1 A 15789.390688\n", 263 | " B 16442.690789\n", 264 | " C 15737.199478\n", 265 | " D 16458.419619\n", 266 | " E 16335.898039\n", 267 | " F 17092.786667\n", 268 | " G 18612.172414" 269 | ] 270 | }, 271 | "execution_count": 32, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [ 277 | "pd.pivot_table(df, values = ['Loan Amount'], index = ['Loan Status', 'Grade'], aggfunc = np.mean)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 33, 283 | "id": "af45ee5e", 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "data": { 288 | "text/html": [ 289 | "
\n", 290 | "\n", 307 | "\n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | "
Loan Amount
Employment DurationMORTGAGEOWNRENT
Loan Status010101
Grade
A16448.63191616081.43214315922.34369315881.75806516011.86206915213.743421
B17086.94026017003.88657416320.04275115233.76595715870.43361815782.131673
C16683.95420116461.77358515899.36363614337.85915515327.37786615215.772308
D17193.63938217084.95604416732.00781215137.25000015753.38612416148.000000
E17509.31188917437.47500016268.36492916632.73913016250.25164115094.678571
F17342.45814018054.82051316262.32978712378.88888916497.47349817274.481481
G16058.17142918264.28571416221.81081116140.60000017876.80246920335.000000
\n", 404 | "
" 405 | ], 406 | "text/plain": [ 407 | " Loan Amount \\\n", 408 | "Employment Duration MORTGAGE OWN \n", 409 | "Loan Status 0 1 0 1 \n", 410 | "Grade \n", 411 | "A 16448.631916 16081.432143 15922.343693 15881.758065 \n", 412 | "B 17086.940260 17003.886574 16320.042751 15233.765957 \n", 413 | "C 16683.954201 16461.773585 15899.363636 14337.859155 \n", 414 | "D 17193.639382 17084.956044 16732.007812 15137.250000 \n", 415 | "E 17509.311889 17437.475000 16268.364929 16632.739130 \n", 416 | "F 17342.458140 18054.820513 16262.329787 12378.888889 \n", 417 | "G 16058.171429 18264.285714 16221.810811 16140.600000 \n", 418 | "\n", 419 | " \n", 420 | "Employment Duration RENT \n", 421 | "Loan Status 0 1 \n", 422 | "Grade \n", 423 | "A 16011.862069 15213.743421 \n", 424 | "B 15870.433618 15782.131673 \n", 425 | "C 15327.377866 15215.772308 \n", 426 | "D 15753.386124 16148.000000 \n", 427 | "E 16250.251641 15094.678571 \n", 428 | "F 16497.473498 17274.481481 \n", 429 | "G 17876.802469 20335.000000 " 430 | ] 431 | }, 432 | "execution_count": 33, 433 | "metadata": {}, 434 | "output_type": "execute_result" 435 | } 436 | ], 437 | "source": [ 438 | "pd.pivot_table(df, values = ['Loan Amount'], index = ['Grade'], columns = ['Employment Duration','Loan Status'], aggfunc = np.mean)" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "id": "e65c286d", 444 | "metadata": {}, 445 | "source": [ 446 | "- `Crosstab` works with series or list of variables whereas `Pivot table` works only with dataframe\n", 447 | "- `Pivot table` does not have the normalize argument. In `crosstab`, the normalize argument calculates percentages by dividing each cell by the sum of cells, as described below:\n", 448 | "\n", 449 | " - normalize = `'index'` divides each cell by the sum of its row\n", 450 | " - normalize = `'columns'` divides each cell by the sum of its column\n", 451 | " - normalize = `'True'` divides each cell by the total of all cells in the table" 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": null, 457 | "id": "eac6d495", 458 | "metadata": {}, 459 | "outputs": [], 460 | "source": [] 461 | } 462 | ], 463 | "metadata": { 464 | "kernelspec": { 465 | "display_name": "Python 3 (ipykernel)", 466 | "language": "python", 467 | "name": "python3" 468 | }, 469 | "language_info": { 470 | "codemirror_mode": { 471 | "name": "ipython", 472 | "version": 3 473 | }, 474 | "file_extension": ".py", 475 | "mimetype": "text/x-python", 476 | "name": "python", 477 | "nbconvert_exporter": "python", 478 | "pygments_lexer": "ipython3", 479 | "version": "3.9.7" 480 | } 481 | }, 482 | "nbformat": 4, 483 | "nbformat_minor": 5 484 | } 485 | -------------------------------------------------------------------------------- /Pandas_Cheat_Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/Pandas_Cheat_Sheet.pdf -------------------------------------------------------------------------------- /Pandas_Data Wrangling_CheatSheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/Pandas_Data Wrangling_CheatSheet.pdf -------------------------------------------------------------------------------- /Polynomial Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Polynomial Regression in Python\n", 8 | "- Regression analysis is a form of predictive modelling technique which investigates the relationship between a dependent and independent variable\n", 9 | "- Comparison with Linear Regression\n", 10 | "- [Dataset](https://media.geeksforgeeks.org/wp-content/uploads/data.csv)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "### Import libraries and dataset\n", 18 | "- Import the important libraries and the dataset we are using to perform Polynomial Regression" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "# Importing the libraries \n", 28 | "import numpy as np \n", 29 | "import matplotlib.pyplot as plt \n", 30 | "import pandas as pd" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/html": [ 41 | "
\n", 42 | "\n", 55 | "\n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | "
snoTemperaturePressure
0100.0002
12200.0012
23400.0060
34600.0300
45800.0900
561000.2700
\n", 103 | "
" 104 | ], 105 | "text/plain": [ 106 | " sno Temperature Pressure\n", 107 | "0 1 0 0.0002\n", 108 | "1 2 20 0.0012\n", 109 | "2 3 40 0.0060\n", 110 | "3 4 60 0.0300\n", 111 | "4 5 80 0.0900\n", 112 | "5 6 100 0.2700" 113 | ] 114 | }, 115 | "execution_count": 3, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "# Importing the dataset \n", 122 | "datas = pd.read_csv('C:/Users/arock.000/Downloads/data.csv') \n", 123 | "datas" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "### Dividing the dataset into 2 components\n", 131 | "- Divide dataset into two components that is X and y.X will contain the Column between 1 and 2. y will contain the 2 column." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 5, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "X = datas.iloc[:, 1:2].values \n", 141 | "y = datas.iloc[:, 2].values" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "### Fitting Linear Regression to the dataset\n", 149 | "- Fitting the linear Regression model On two components." 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 6, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/plain": [ 160 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" 161 | ] 162 | }, 163 | "execution_count": 6, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "# Fitting Linear Regression to the dataset \n", 170 | "from sklearn.linear_model import LinearRegression \n", 171 | "lin = LinearRegression() \n", 172 | " \n", 173 | "lin.fit(X, y)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "### Fitting Polynomial Regression to the dataset\n", 181 | "- Fitting the Polynomial Regression model on two components X and y." 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 7, 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "data": { 191 | "text/plain": [ 192 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" 193 | ] 194 | }, 195 | "execution_count": 7, 196 | "metadata": {}, 197 | "output_type": "execute_result" 198 | } 199 | ], 200 | "source": [ 201 | "# Fitting Polynomial Regression to the dataset \n", 202 | "from sklearn.preprocessing import PolynomialFeatures \n", 203 | " \n", 204 | "poly = PolynomialFeatures(degree = 4) \n", 205 | "X_poly = poly.fit_transform(X) \n", 206 | " \n", 207 | "poly.fit(X_poly, y) \n", 208 | "lin2 = LinearRegression() \n", 209 | "lin2.fit(X_poly, y)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "### Visualization:\n", 217 | "- Visualizing the Linear Regression results using scatter plot\n", 218 | "- Visualising the Polynomial Regression results using scatter plot" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 8, 224 | "metadata": {}, 225 | "outputs": [ 226 | { 227 | "data": { 228 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAAEWCAYAAABIVsEJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deZyVdfn/8dcbEBXDpcQlVi3U0NRyIs3MJTU0Sy3Nhcwt0cpc0hS1+tmCueGO4qgoFea+kBuSlfJ1ZXBhUUlEEAQFFUUlWa/fH5978oQzcubMnLlnznk/H4/zmHOv5/qMcq65P6siAjMzs6bqkHcAZmbWPjmBmJlZSZxAzMysJE4gZmZWEicQMzMriROImZmVxAnEKoqknSRNzTuOSiBpiqRd8o7D2i4nEGuXJM2QtPvK+yNiXERsnkdMK5N0tqSlkt6X9I6kxyTtkHdcxYqILSPiX3nHYW2XE4hZC5DUqZFDN0fEp4D1gX8Ct5bhsyXJ/5at1fl/OqsoknaRNLtge4akUyVNlPSupJslrVFwfB9JzxY8IWxdcGywpJclvSfpeUn7Fxw7QtKjki6W9DZw9ifFFRHLgFFAd0ndsnusI+k6SXMlvSbpD5I6Zsc6Shoq6U1Jr0g6XlLUJypJ/5I0RNKjwCJgU0lbSBor6W1JUyX9oCDevbMyvJd91qnZ/vUl3ZOV/21J4+qTUeFTnqTVJV0iaU72ukTS6oW/c0mnSJqXlefI0v4LWnviBGLV4AfAAGATYGvgCABJXwZGAMcCnwGuBkbXfzECLwM7AesAvwX+Imnjgvt+FZgObAAM+aQAJHUGfgS8BSzIdo8ElgGfB74E7An8ODt2DLAXsC3wZWC/Bm57GDAI6ArMB8YCN2bxHAJcKWnL7NzrgGMjoiuwFfCPbP8pwGygG7AhcCbQ0PxGZwHbZ/FsA/QHflVwfCPS76k7cDQwTNJ6n/Q7sfbPCcSqwWURMSci3gb+RvoShPQlfXVEPBkRyyNiJLCY9EVJRNyaXbciIm4GXiJ9cdabExGXR8SyiPhPI5/9A0nvAP/JPu+AiFgmaUNSgjgpIj6IiHnAxcDB9dcBl0bE7IhYAJzbwL1viIgp2dPNAGBGRFyfxfM0cDtwQHbuUqCfpLUjYkF2vH7/xkDviFiatSE1lEAGAr+LiHkRMZ+UUA8rOL40O740Iu4D3gfaRFuUlY8TiFWD1wveLwI+lb3vDZySVd+8k33R9wQ+CyDpRwXVW++Q/nJfv+Bes4r47FsiYl3SX/eTge0KPns1YG7B/a8mPT2QxVB4/4Y+q3Bfb+CrK5VlIOnJAOD7wN7ATEkPFzTmXwBMAx6UNF3S4EbK8VlgZsH2zGxfvbeyRFav8PdsFaqxhj+zajALGBIRH6t+ktQbuAb4JvB4RCyX9CyggtOKnso6It6UdCwwXtKN2WcvBtZf6Yu33lygR8F2z4Zuu1JZHo6IPRr5/PHAvpJWA44HbgF6RsR7pGqsU7Lqrn9KGh8RD610izmkJDUl2+6V7bMq5icQa89Wk7RGwaupfxBdAxwn6atZT6a1JH1bUldgLdIX9HyArFF4q+YEGxEvAmOA0yJiLvAgMFTS2pI6SPqcpJ2z028BTpTUXdK6wOmruP09wGaSDpO0Wvb6iqQvSOosaaCkdSJiKbAQWJ6Vax9Jn5ekgv3LG7j/X4FfSeomaX3gN8BfmvP7sPbPCcTas/tIbQv1r7ObcnFE1JHaJa4gNWxPI2tgj4jngaHA48AbwBeBR1sg5guAQZI2IDWqdwaezz7/NlJ7BKTk9iAwEXiGVNZlNPzlTvYksSepDWUOqdruPKC+Q8BhwAxJC4HjgB9m+/sCfye1WTwOXNnI2I8/AHVZPJOAp7N9VsXkBaXM2j5JewHDI6J33rGY1fMTiFkbJGnNbOxGJ0ndgf8H3Jl3XGaF/ARi1gZJ6gI8DGxBqp67FzgxIhbmGphZAScQMzMriauwzMysJFU1DmT99dePPn365B2GmVm7MmHChDcjotvK+6sqgfTp04e6urq8wzAza1ckzWxov6uwzMysJE4gZmZWEicQMzMriROImZmVxAnEzMxK4gRiZlbBRo2CPn2gQ4f0c9Solrt3VXXjNTOrJqNGwaBBsGhR2p45M20DDBzY/Pv7CcTMrEKdddZHyaPeokVpf0twAjEzq1Cvvtq0/U3lBGJmVqF69Wra/qZyAjEzq1BDhkCXLv+7r0uXtL8lOIGYmVWogQOhthZ69wYp/aytbZkGdHAvLDOzijZwYMsljJX5CcTMzEriBGJmZiVxAjEzs5I4gZiZWUlyTSCSBkiaKmmapMENHB8oaWL2ekzSNgXHZkiaJOlZSV5m0MysleXWC0tSR2AYsAcwGxgvaXREPF9w2ivAzhGxQNJeQC3w1YLju0bEm60WtJmZ/VeeTyD9gWkRMT0ilgA3AfsWnhARj0XEgmzzCaBHK8doZmaNyDOBdAdmFWzPzvY15mjg/oLtAB6UNEHSoDLEZ2ZmnyDPgYRqYF80eKK0KymBfL1g944RMUfSBsBYSS9GxCMNXDsIGATQq6UmgDEzs1yfQGYDPQu2ewBzVj5J0tbAtcC+EfFW/f6ImJP9nAfcSaoS+5iIqI2Imoio6datWwuGb2ZW3fJMIOOBvpI2kdQZOBgYXXiCpF7AHcBhEfHvgv1rSepa/x7YE5jcapGbmVl+VVgRsUzS8cAYoCMwIiKmSDouOz4c+A3wGeBKSQDLIqIG2BC4M9vXCbgxIh7IoRhmZlVLEQ02O1SkmpqaqKvzkBEzs6aQNCH74/1/eCS6mZmVxAnEzMxK4gRiZmYlcQIxM7OSOIGYmVlJnEDMzKwkTiBmZlYSJxAzMyuJE4iZmZXECcTMzEriBGJmZiVxAjEzs5I4gZiZWUmcQMzMrCROIGZmVhInEDMzK4kTiJmZlcQJxMzMSuIEYmZmJXECMTOzkjiBmJlZSZxAzMysJLkmEEkDJE2VNE3S4AaOD5Q0MXs9JmmbYq81M7Pyyi2BSOoIDAP2AvoBh0jqt9JprwA7R8TWwO+B2iZca2ZmZZTnE0h/YFpETI+IJcBNwL6FJ0TEYxGxINt8AuhR7LVmZlZeeSaQ7sCsgu3Z2b7GHA3c39RrJQ2SVCepbv78+c0I18zMCuWZQNTAvmjwRGlXUgI5vanXRkRtRNRERE23bt1KCtTMzD6uU46fPRvoWbDdA5iz8kmStgauBfaKiLeacq2ZmZVPnk8g44G+kjaR1Bk4GBhdeIKkXsAdwGER8e+mXGtmZuWV2xNIRCyTdDwwBugIjIiIKZKOy44PB34DfAa4UhLAsqw6qsFrcymImVmVUkSDTQcVqaamJurq6vIOw8ysXZE0ISJqVt7vkehmZlYSJxAzMyuJE4iZmZXECcTMrNJNmQJlaO92AjEzq1TjxsHee8NWW8G997b47Z1AzMwqSQTcdx/stBN84xswfjz84Q/w9a+3+EflORLdzMxayvLlcNtt8Mc/wnPPQc+ecNllcPTR0KVLWT7SCcTMrD1bvBj+/Gc47zyYNg023xyuvx4OPRQ6dy7rRzuBmJm1R++/D7W1MHQozJkD222XnkD22w86dmyVEJxAzMzak7ffhssvT9VTb78Nu+ySnjj22APU0ETl5eMEYmbWHsyZAxddBMOHwwcfwHe+A2ecATvskFtITiBmZm3Zyy/D+efDDTfAsmVwyCFw+unwxS/mHZkTiJlZmzRxYupRdcstsNpqcNRR8Mtfwqab5h3ZfzmBmJm1JY8+mhLHvffCpz4Fp5wCJ58MG2+cd2Qf4wRiZpa3CBgzJiWORx6Bz3wGfvc7OP54WG+9vKNrlBOImVleli+H22+Hc8+FZ56BHj3gkkvgxz+GtdbKO7pVcgIxM2ttS5Z8NPjvpZdgs81gxAgYOLDsg/9akhOImVlr+eADuOYauPBCeO01+NKX4NZbYf/9W23wX0tyAjEzK7cFC+CKK+DSS+Gtt2DnneG662DPPVt98F9LcgIxMyuXuXM/Gvz3/vuwzz5p8N/XvpZ3ZC3CCcTMrKVNn54G/11/fRr8d9BBMHgwbL113pG1KCcQM7OWMmlS6lF1003QqRMceWQa/Pe5z+UdWVnkuqCUpAGSpkqaJmlwA8e3kPS4pMWSTl3p2AxJkyQ9K6mu9aI2M1vJ44+nuam23hruvht+8Qt45ZVUdVWhyQNyfAKR1BEYBuwBzAbGSxodEc8XnPY2cAKwXyO32TUi3ixvpGZmDYiABx9Mg/8efhg+/Wn47W/T4L9Pfzrv6FpFnk8g/YFpETE9IpYANwH7Fp4QEfMiYjywNI8Azcw+pn7lv5oaGDAgLeJ00UUwcyb85jdVkzwg3wTSHZhVsD0721esAB6UNEHSoMZOkjRIUp2kuvnz55cYqplVvSVLUqP4llvCgQfCwoVw7bVpttyTT07zVlWZPBvRG+r8HE24fseImCNpA2CspBcj4pGP3TCiFqgFqKmpacr9zcxg0aKUKC68EGbNgm23hZtvhu9/v10O/mtJeSaQ2UDPgu0ewJxiL46IOdnPeZLuJFWJfSyBmJmVZMECGDYsDf57803YaSe4+upUbdWOB/+1pDyrsMYDfSVtIqkzcDAwupgLJa0lqWv9e2BPYHLZIjWz6vH662nBpt694de/hv79Ydy4NEvuXns5eRTI7QkkIpZJOh4YA3QERkTEFEnHZceHS9oIqAPWBlZIOgnoB6wP3Kn0H7ITcGNEPJBHOcysQrzyClxwQZrUcOnS1M4xeHCqsrIG5TqQMCLuA+5bad/wgvevk6q2VrYQ2Ka80ZlZVZg8+aPBfx07wuGHw2mnwec/n3dkbV7RCUTSmkCviJhaxnjMzFrHE0+kMRyjR6e1N048MQ0A7N6UzqDVrag2EEnfAZ4FHsi2t5VUVHuFmVmbEQFjx8Juu8EOO6S2jbPPTmM4hg518miiYp9Azib1cvoXQEQ8K6lPWSIyM2tpK1bAXXfBOefAhAnw2c+mhDFoUFWO32gpxSaQZRHxrtz7wMzak6VLYdSotPLfiy+mealqa+FHP4LVV887unav2AQyWdKhQEdJfUnzUz1WvrDMzJph0aK0YNOFF8Krr6ZJDv/6VzjggDRLrrWIYseB/BzYElgM3Ai8C5xUrqDMzEryzjupmqpPHzjhBOjZE+69F559Fg4+2Mmjha3yt5nNmjs6InYHzip/SGZmTfTGG3DJJXDllWmOqr32Siv/7bRT3pFVtFUmkIhYLmmRpHUi4t3WCMrMrCgzZnw0+G/x4o8G/33pS3lHVhWKfZ77EJgkaSzwQf3OiDihLFGZmX2S559Pg/9uvBE6dEiN4qedBpttlndkVaXYBHJv9jIzy89TT6XBf3fdBV26wM9/DqecAj0amrDCyq2oBBIRI8sdiJlZgyLgH/9IieOhh2DdddPCTT//Oay/ft7RVbWiEoikV2hgrY6I2LTFIzIzgzT4b/TolDieego22ii1dxx7LHTtmnd0RvFVWDUF79cADgSqZ91GM2s9S5emiQ3PPTe1dWyyCQwfniY5XGONvKOzAkWNA4mItwper0XEJcBuZY7NzKrJf/6TFnDq2zc1infokEaR//vf6anDyaPNKbYK68sFmx1ITyR+hjSz5nv3XbjqKrj4Ypg3L01yePnl8O1vpyRibVaxVVhDC94vA2YAP2jxaMysesybl5aLHTYsJZE994Qzz4RvfMOr/rUTxfbC2rXcgZhZlXj11TRH1bXXwocfwve/nwb/bbdd3pFZExW7HsiJktZWcq2kpyXtWe7gzKyCvPACHHFEmhH3qqvS3FTPPw+33urk0U4VW8F4VEQsBPYENgCOBM4tW1RmVjnq6tJTxpZbwi23wE9/Ci+/nKYf2WKLvKOzZii2DaS+QnJv4PqIeE5eHMTMGhMB//pXGsMxdiyss05q3zjxROjWLe/orIUUm0AmSHoQ2AQ4Q1JXYEX5wjKzdmnFCrjnnpQ4nngCNtwwLeZ03HGw9tp5R2ctrNgEcjSwLTA9IhZJ+jSpGsvMDJYtg5tvToljypS0HseVV6Y2jzXXzDs6K5Ni20B2AKZGxDuSfgj8irSoVLNIGiBpqqRpkgY3cHwLSY9LWizp1KZca2at4MMPU4P4ZpvBD3+Y9v35z/DSS/CTnzh5VLhiE8hVwCJJ2wCnATOBPzXng7OFqoYBewH9gEMk9VvptLdJy+deWMK1ZlYuCxfC+eenaUZ++lPYYIM0Q+7EiSmReOW/qlBsAlkWEQHsC1waEZfS/JHo/YFpETE9IpYAN2X3/6+ImBcR44GlTb3WzMpg/nz49a+hd284/XTYaqs0U+7jj8O++3rkeJUp9s+E9ySdARwG7JQ9AazWzM/uDswq2J4NfLWlr5U0CBgE0KtXr6ZHaWYwaxYMHQq1tWnOqu99Lw3++8pX8o7MclTsnwsHAYtJ40FeJ32BX9DMz26oG/DHpoxv7rURURsRNRFR083dB82aZupUOOqoNPhv2DD4wQ/S4L/bb3fysKKnMnld0u1A32zXm8Cdzfzs2UDPgu0ewJxWuNbMVuXpp1OPqttvh9VXT7PhnnpqqroyyxQ7lckxwG3A1dmu7sBdzfzs8UBfSZtI6gwcDIxuhWvNrCER8PDDMGBAmlrkwQdTNdXMmWl23ApIHqNGpR7GHTqkn6NG5R1R+1ZsG8jPSA3XTwJExEuSNmjOB0fEMknHA2OAjsCIiJgi6bjs+HBJGwF1wNrACkknAf0iYmFD1zYnHrOqFQH33gvnnJMawzfYID19/OQnaQR5hRg1CgYNgkWL0vbMmWkbYODA/OJqz5Q6V63iJOnJiPiqpGci4kuSOgFPR8TW5Q+x5dTU1ERdXV3eYZi1DcuWpYkM//hHmDQpPWH88pepzaMCx2/06ZOSxsp694YZM1o7mvZF0oSIqFl5f7GN6A9LOhNYU9IewK3A31oyQDNrJR9+CFdfDZtvDocemhLJyJFp8N/PflaRyQPSLPJN2W+rVmwCOR2YD0wCjgXuI41GN7P24r330jocm26a5qb6zGfgjjtg8uS0hOxqze2Z37Y11ovfvftLt8o2EEkdgIkRsRVwTflDMrMW9dZbcNllqSF8wQLYbbc03chuu1XVyn9DhvxvGwhAly5pv5VmlU8gEbECeE6S87RZezJ7Npx8cvoT+3e/g513hiefhIcegm9+s6qSB6SG8tra1OYhpZ+1tW5Ab45ie2FtDEyR9BTwQf3OiPhuWaIys9K99FKaQv1Pf0rTqx96aJp2ZMst844sdwMHOmG0pGITyG/LGoWZNd+zz6YeVbfdltozjjkm9arq0yfvyKxCfWICkbQGcBzweVID+nURsaw1AjOzIo0blxLH/fdD165w2mlw0klpMSezMlrVE8hI0ky44/ho6vQTyx2Uma1CREoY55wDjz6alokdMiRNrb7uunlHZ1ViVQmkX0R8EUDSdcBT5Q/JzBq1fHka/HfuufDcc9CzZ+phdfTRqUuRWStaVQL57zoc2dQjZQ7HzBq0eHFqFD//fJg2DbbYAq6/PjWQd+6cd3RWpVaVQLaRtDB7L9JI9IXZ+4iItcsanVm1e//91Nd06FCYMydNcnj77bDffl68yXL3iQkkIjq2ViBmVuDtt9PAv8suS+933RVuuAF2373qxm9Y2+WFi83aktdeg4suSnNVffABfPe7cMYZsP32eUdm9jFOIGZtwbRpqX1j5MjUUH7IIR+tOW7WRjmBmOXpuedSj6pbbkmD/44+Og3+22STvCMzWyUnELM8PPpoGvx3771p8N+pp6Z5qzbaKO/IzIrmBGLWWiJgzJg0+G/cOFh/ffj979MaHOutl3d0Zk3mBGJWbsuXp663554LzzwDPXrApZem6qq11so7OrOSOYGYlcuSJWndjfPOSzPkbr45jBiRpoP14D+rAE4gZi3tgw/gmmvS6n+vvQZf/nKafmT//aGjh1ZZ5XACMWspCxbAFVek6qm33koLOI0YAXvs4cF/VpGcQMyaa+7cNPhv+PA09cg++6TBf1/7Wt6RmZVVrpPpSBogaaqkaZIGN3Bcki7Ljk+U9OWCYzMkTZL0rKS61o3cDHj5ZTjuuLRg00UXpVHjEyfC3/7m5GFVIbcnEEkdgWHAHsBsYLyk0RHxfMFpewF9s9dXgauyn/V2jYg3Wylks2TixNSj6uaboVMnOPLINPjvc5/LOzKzVpXnE0h/YFpETI+IJcBNwL4rnbMv8KdIngDWlbRxawdqBsBjj8F3vgPbbJOeMn7xC3jllVR15eRhVSjPBNIdmFWwPTvbV+w5ATwoaYKkQY19iKRBkuok1c2fP78FwraqUj/4b5ddYMcd4fHH4Xe/g5kz4YIL4LOfzTtCs9zk2YjeULeUaMI5O0bEHEkbAGMlvRgRj3zs5IhaoBagpqZm5fubNWz5crjzzjTdyNNPQ/fucPHFcMwxHvxnlsnzCWQ20LNguwcwp9hzIqL+5zzgTlKVmFnzLFmSut726wcHHgjvvQfXXQfTp8NJJzl5mBXIM4GMB/pK2kRSZ+BgYPRK54wGfpT1xtoeeDci5kpaS1JXAElrAXsCk1szeKswH3yQxm987nMfrS9+yy3wwgtw1FEeOW7WgNyqsLI11o8HxgAdgRERMUXScdnx4cB9wN7ANGARcGR2+YbAndka7Z2AGyPigVYuglWCBQtg2LCUPN58E3baKY0i/9a3PPjPbBUUUT3NAjU1NVFX5yEjBrz+emrTuOqqVE317W+nwX877ph3ZGZtjqQJEVGz8n6PRLfqMn166j11/fWwdCn84AcweHDqmmtmTeIEYtVh8uQ0+O+mm9KEhkcckQb/ff7zeUdm1m45gVhle+KJ1BV39OjUg+qkk9IAQI/fMGu2XOfCMiuLCBg7FnbbDXbYAf7v/+Dss9PgvwsvdPLIjBqVpvHq0CH9HDUq74isvfETiFWOFSvgrrvSkrETJqREMXQoDBoEn/pU3tG1KaNGpV/LokVpe+bMtA1pvSuzYvgJxNq/pUvhhhtgyy3h+9+Hd99NXXGnT0/VVU4eH3PWWR8lj3qLFqX9ZsXyE4i1X4sWpVHiF14Ir76aelLddBMccIBX/luFV19t2n6zhvgJxNqfd95J1VR9+sAJJ0CvXnDvvfDMM3DQQU4eRejVq2n7zRriBGLtxxtvpDEbvXunupaaGnjkERg3Dvbe2yPHm2DIkDRbS6EuXdJ+s2I5gVjbN2MG/OxnKXGcfz4MGJBmyL3vvjT1iDXZwIFQW5t+pVL6WVvrBnRrGreBWNs1ZQqcdx7ceGPqa3r44XDaadC3b96RVYSBA50wrHmcQKztefLJNPjv7rtTvcoJJ6TeVD165B2ZmRVwArG2IQIeeigljn/8A9ZbD37zG/j5z2H99fOOzswa4ARi+VqxIj1p/PGPMH48bLxx6pY7aBB07Zp3dGb2CZxALB9Ll8Jf/5omOHzhBdh0U7j6avjRj2CNNfKOzsyK4ARires//0lLxl5wQZo/44tfTI3kBx4Infy/o1l74n+x1jrefReuvBIuuQTmzYOvfQ2uuCIt5OTxG2btkhOIlde8eSlpDBsGCxempWLPPDON33DiMGvXPJDQymPmzNSDqnfv1M7xrW+lGXIfeAC+8Y02lzw8tblZ0/kJxFrWCy+khHHjjSlJHHZYGvy3+eZ5R9YoT21uVho/gVjLGD8evve9NKX6bbfB8cfDyy+n2XLbcPIAT21uVqpcE4ikAZKmSpomaXADxyXpsuz4RElfLvballKNVRtFlzkiDfrbYw/o3x/++U/41a/Sn/AXXww9e7Zi1KXz1OZmJYqIXF5AR+BlYFOgM/Ac0G+lc/YG7gcEbA88Wey1Db222267aIq//CWiS5eI9E2ZXl26pP2VqqgyL18ecdddEf37pxM22iji/PMj3n03t7ibo3fv/y1v/at377wjM2sbgLpo4Ds1zyeQ/sC0iJgeEUuAm4B9VzpnX+BPWRmeANaVtHGR1zZbNVZtfGKZly6FP/85jd3Ybz+YPx+uugpeeQV++UtYe+1cYm4uT21uVpo8E0h3YFbB9uxsXzHnFHNts1Vj1UZDZVuD//DtmVfCZpulkeIdOqR6rX//G447rt2PHPfU5malybMXVkP9OKPIc4q5Nt1AGgQMAujVxOXWevVK1fkN7a9UhWXuykJ+wlWczMVsxBuw0fZw2WVp8F+Hyup/4anNzZouz2+B2UBhK2sPYE6R5xRzLQARURsRNRFR061btyYFWI1VG0OGQK815/N7fsWr9OI8BjO5wzaMPfOf8Nhj8J3vVFzyMLPS5PlNMB7oK2kTSZ2Bg4HRK50zGvhR1htre+DdiJhb5LXNVnVVG6++ysAnT+Dl5b05k3P4O7uzz0Z1vPGnMewxZJc2N/jPzPKVWxVWRCyTdDwwhtSrakRETJF0XHZ8OHAfqSfWNGARcOQnXVuOOKuiauPFF9PKf3/5CwCdfvhDOP10DthiCw7IOTQza7uUemhVh5qamqirq8s7jLZjwoS0Dscdd6SG8GOOgVNOqexGHjNrMkkTIqJm5f2eyqTaRMDDD8M558DYsbDOOmlywxNPhCa2EZlZdXMCqRYrVsA996QnjieegA03THNW/eQn7Xb8hpnlywmk0i1bBjffnJLF5MlpbpJhw+DII2HNNfOOzszaMSeQSvXhh3DDDXD++WmkeL9+aRT5QQfBaqvlHZ2ZVQAnkErz3nswfDhcdBG8/nqa5PDiiz1+w8xanBNIpXjzTbj00rRM7DvvwO67p+lGdt3V4zfMrCycQNq7WbNg6FC45po06+H++8MZZ8BXvpJ3ZGZW4ZxA2qupUz8a/BeRRjuefjp84Qt5R2ZmVcIJpL15+unUFff222H11eHYY+HUU9M8K2ZmrcgJpD2IgEceSYljzJg0bmPwYDjpJNhgg7yjM7Mq5QTSlkXAvfemUeOPP56SxR//mAb/rbNO3tGZWZVzAmmLli2DW29NyWLSpFQ9dcUVcNRRHvxnZm2GE0hb8uGHMHJkGvw3fXpqEB85Eg45xIP/zKzNcQJpC957D66+Og3+mzs3dcEdOhS++10P/jOzNssJJE9vvZWWiL38cliwAHbbLU03sttuHvxnZm2eE0geZg/oo2AAAAkCSURBVM9OTxi1tWnw3377pcF//fvnHZmZWdGcQFrTSy+lwX9/+lOaXv3QQ9Pgvy23zDsyM7MmcwJpDc8+m3pU3XZbagw/5hj45S/T1OpmZu2UE0g5jRuXEsf990PXrnDaaWnw34Yb5h2ZmVmzOYG0tIiUMM45Bx59NC0TO2QI/PSnsO66eUdnZtZinEBayvLlafDfuefCc89Bz56ph9XRR0OXLnlHZ2bW4pxAmmvx4tQoft558PLLsMUWcP31qYG8c+e8ozMzK5tcRqlJ+rSksZJeyn6u18h5AyRNlTRN0uCC/WdLek3Ss9lr79aLPvP++6kr7qabwqBBsN56aYbcKVPgiCOcPMys4uU1zHkw8FBE9AUeyrb/h6SOwDBgL6AfcIikfgWnXBwR22av+1ojaCAN/jv77DQ/1amnwuabw4MPwlNPwfe+55HjZlY18vq22xcYmb0fCezXwDn9gWkRMT0ilgA3Zdfl47XX4JRTUuL47W/h619PM+T+4x+wxx4eOW5mVSevBLJhRMwFyH42tKhFd2BWwfbsbF+94yVNlDSisSowAEmDJNVJqps/f35p0f7+96mq6tJL05KxkybB3XfD9tuXdj8zswpQtgQi6e+SJjfwKvYpoqE/6SP7eRXwOWBbYC4wtLGbRERtRNRERE23bt2aVIb/6tMn9aZ66aU0V9VWW5V2HzOzClK2XlgRsXtjxyS9IWnjiJgraWNgXgOnzQZ6Fmz3AOZk936j4F7XAPe0TNSNOOyw9DIzs//KqwprNHB49v5w4O4GzhkP9JW0iaTOwMHZdWRJp97+wOQyxmpmZg3IaxzIucAtko4GXgUOBJD0WeDaiNg7IpZJOh4YA3QERkTElOz68yVtS6rSmgEc29oFMDOrdoqIVZ9VIWpqaqKuri7vMMzM2hVJEyKiZuX9HrRgZmYlcQIxM7OSOIGYmVlJnEDMzKwkTiBmZlaSquqFJWk+MLPEy9cH3mzBcNoDl7k6uMzVoTll7h0RH5vKo6oSSHNIqmuoG1slc5mrg8tcHcpRZldhmZlZSZxAzMysJE4gxavNO4AcuMzVwWWuDi1eZreBmJlZSfwEYmZmJXECMTOzkjiBFEHSAElTJU2TNDjveFqapJ6S/inpBUlTJJ2Y7f+0pLGSXsp+Nrp0cHslqaOkZyTdk21XdJklrSvpNkkvZv+9d6iCMp+c/X89WdJfJa1RaWXOlvaeJ2lywb5GyyjpjOz7bKqkb5X6uU4gqyCpIzAM2AvoBxwiqV++UbW4ZcApEfEFYHvgZ1kZBwMPRURf4KFsu9KcCLxQsF3pZb4UeCAitgC2IZW9YsssqTtwAlATEVuR1hY6mMor8w3AgJX2NVjG7N/2wcCW2TVXZt9zTeYEsmr9gWkRMT0ilgA3AcWu694uRMTciHg6e/8e6UulO6mcI7PTRgL75RNheUjqAXwbuLZgd8WWWdLawDeA6wAiYklEvEMFlznTCVhTUiegC2lp7Ioqc0Q8Ary90u7GyrgvcFNELI6IV4BppO+5JnMCWbXuwKyC7dnZvookqQ/wJeBJYMOImAspyQAb5BdZWVwCnAasKNhXyWXeFJgPXJ9V210raS0quMwR8RpwIWnl07nAuxHxIBVc5gKNlbHFvtOcQFZNDeyryL7Pkj4F3A6cFBEL846nnCTtA8yLiAl5x9KKOgFfBq6KiC8BH9D+q24+UVbvvy+wCfBZYC1JP8w3qty12HeaE8iqzQZ6Fmz3ID0CVxRJq5GSx6iIuCPb/YakjbPjGwPz8oqvDHYEvitpBqlacjdJf6GyyzwbmB0RT2bbt5ESSiWXeXfglYiYHxFLgTuAr1HZZa7XWBlb7DvNCWTVxgN9JW0iqTOp8Wl0zjG1KEki1Yu/EBEXFRwaDRyevT8cuLu1YyuXiDgjInpERB/Sf9N/RMQPqewyvw7MkrR5tuubwPNUcJlJVVfbS+qS/X/+TVIbXyWXuV5jZRwNHCxpdUmbAH2Bp0r5AI9EL4KkvUn15R2BERExJOeQWpSkrwPjgEl81B5wJqkd5BagF+kf4oERsXJDXbsnaRfg1IjYR9JnqOAyS9qW1GmgMzAdOJL0h2Qll/m3wEGk3obPAD8GPkUFlVnSX4FdSFO2vwH8P+AuGimjpLOAo0i/k5Mi4v6SPtcJxMzMSuEqLDMzK4kTiJmZlcQJxMzMSuIEYmZmJXECMTOzknTKOwCztiLrwvtQtrkRsJw09QdA/2wutDYh63q8JCIeyzsWq15OIGaZiHgL2BZA0tnA+xFxYV7xSOoUEcsaObwL8D5QdAKR1DEilrdEbGbgKiyzTyRpO0kPS5ogaUzB1BD/knSxpEeydTW+IumObO2FP2Tn9MnW3RgpaWK2DkeXIu57jqSHgRMlfUfSk9nkh3+XtGE24eVxwMmSnpW0k6QbJB1QEPf72c9dlNZ6uRGYpLT+yQWSxmcxHduav0+rLE4gZo0TcDlwQERsB4wACmchWBIR3wCGk6aJ+BmwFXBEVh0GsDlQGxFbAwuBn2bzjn3SfdeNiJ0jYijwf8D22eSHNwGnRcSM7DMvjohtI2LcKsrRHzgrIvoBR5NmpP0K8BXgmGw6C7MmcxWWWeNWJyWEsWkaJTqSpgSvVz8n2iRgSv3U2ZKmkyareweYFRGPZuf9hbS40QOruO/NBe97ADdnTyidgVdKKMdT2boPAHsCWxc8raxDmguplPtalXMCMWucSIlhh0aOL85+rih4X79d/29r5bmCooj7flDw/nLgoogYnTWcn93INcvIahSySQM7N3I/AT+PiDGN3MesaK7CMmvcYqCbpB0gTXkvacsm3qNX/fXAIaQqqalNuO86wGvZ+8ML9r8HdC3YngFsl73fF1itkfuNAX6SVaMhabNsUSmzJnMCMWvcCuAA4DxJzwHPktaSaIoXgMMlTQQ+TVrMaUkT7ns2cKukccCbBfv/Buxf34gOXAPsLOkp4Kv871NHoWtJU7g/LWkycDWuibASeTZeszLJekvdExFb5RyKWVn4CcTMzEriJxAzMyuJn0DMzKwkTiBmZlYSJxAzMyuJE4iZmZXECcTMzEry/wFcSPPp2HMABwAAAABJRU5ErkJggg==\n", 229 | "text/plain": [ 230 | "
" 231 | ] 232 | }, 233 | "metadata": { 234 | "needs_background": "light" 235 | }, 236 | "output_type": "display_data" 237 | } 238 | ], 239 | "source": [ 240 | "# Visualising the Linear Regression results \n", 241 | "plt.scatter(X, y, color = 'blue') \n", 242 | " \n", 243 | "plt.plot(X, lin.predict(X), color = 'red') \n", 244 | "plt.title('Linear Regression') \n", 245 | "plt.xlabel('Temperature') \n", 246 | "plt.ylabel('Pressure') \n", 247 | " \n", 248 | "plt.show()" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 9, 254 | "metadata": {}, 255 | "outputs": [ 256 | { 257 | "data": { 258 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3dd5xU5dn/8c+XxQZ2RUWQYqJJ1FhXsIuPGks0xsQYDRo7mmgsKUZjfnnMk2jEXmJDLCRir0QT0Rg1KhYWY8NIQhAFsYAdERD2+v1xnw0jzi6zZfbMzH7fr9e8Zk6/7lk419z3Oee+FRGYmZktrlveAZiZWWVygjAzs6KcIMzMrCgnCDMzK8oJwszMinKCMDOzopwgrOJImippl7zjKCTpL5IOKXHdiou/o0iaLWndvOOwzuEEYWWTnSg/yU4qb0m6VtLyecfVFhGxR0SMau9+JF0naX72nbwr6QFJX+6IGDtDRCwfEVPyjsM6hxOEldveEbE8sDmwJfDLnOOpBGdn30kf4HXg6o4+gKTuHb1P63qcIKxTRMTrwF+AjQAkfUPSREnvS3pY0lcW30bSWpLmSFqtYN4WkmZKWkrSoZIek3SupPckvSJpj4J115Y0JvulPlnSUQXLTpd0q6TrJX0k6QVJ60s6VdLbkqZJ+lrB+g9LOjL7/AVJf5P0jqRZkkZLWrkN38knwC3ApovFfHtWxlckHV+wbDlJo7Ky/lPSyZKmFyyfKunnkp4HPpbUXdJWksZl3/NzkoYUrH+opClZ+V+RNDSb/0VJj0j6ICvfzQXbhKQvZp9XkvSHLNZXJf1SUreCfTf7t7Hq4ARhnULSOsCewD8krQ/cCJwI9AL+DPxJ0tKF20TEm8DDwP4Fsw8CboqIT7PpwcAkYHXgbOBqScqW3QhMB9YG9gPOlLRzwb72Bv4IrAL8AxhL+j/RB/g/4MrmigP8LtvvV4B1gNNL+yYKdiL1BA4EJmfT3YA/Ac9lMewMnChpt2yT/wUGAOsCu2bfxeIOBL4OrAysCdwL/BZYFfgpcLukXtmxLwb2iIgVgG2AZ7N9/Aa4n/S99AUuaaYIlwArZfHsCHwfOKxgeUt/G6sGEeGXX2V5AVOB2cD7wKvAZcBywP8DbilYrxupqWVIwXa7ZJ+/Czyefa4D3gQGZdOHApML9tMDCGAt0kl7IbBCwfLfAddln08HHihYtncWa102vUK2r5Wz6YeBI5sp5zeBfyxW7l2aWfc6YG72nTQCrwAbZ8sGA68ttv6pwLXZ5ynAbgXLjgSmL3bcwwumfw78cbH9jQUOAXpmMXwbWG6xdf4AjAD6Fok/gC9mf4t5wAYFy44GHl7S3ybvf5d+lf5yDcLK7ZsRsXJE9I+IH0ZqVlmblDAAiIhGYBrpV/Pi7gY2yO6c2RX4ICKeLlj+ZsF+5mQfl8+O8W5EfFSw7quLHeOtgs+fALMiYmHBdNO+PkPSGpJukvS6pA+B60m/kkt1bkSsTKoNfAJ8KZvfH1g7aw56X9L7wC9INQGyMk0r2E/h52Lz+gPfWWx/2wG9I+JjUvI9BnhD0r0FF8tPJtWSns6aAQ8vcpzVgaUp+Dvy+e+3ub+NVQknCMvDDNLJC4Cs2WEdUi3iMyJiLqmdfihwMKlJqNRjrCpphYJ5/Yodow1+R/o1vHFErEhq6ml100lEvAacAFwkaTnSyf2VLKE2vVaIiD2zTd4gNfk0WafYbgs+TyPVIAr31zMizsqOPzYidgV6Ay8DV2Xz34yIoyJibVKt4LKm6w4FZgGfUvB3pOO+X6sQThCWh1uAr0vaWdJSwE9IzRXjmln/D6Qmi2+Qfq0vUURMy/b3O0nLStoYOAIY3c7YITU/zQbel9QH+FlbdxQRD5CS2TDgaeDD7ELzcpLqJG0kacts9VuAUyWtkh33uCXs/npgb0m7ZftaVtIQSX0lral0o0BP0nc/m9Qkh6TvSGpKRO+Rks7Cwh1nNa1bgDMkrSCpP/BjSvz7WHVwgrBOFxGTSL+6LyH9Et2bdDvs/GbWf5zUXv9MRExtxaEOJDXjzADuBP43OyG3169Jt+1+QLoIfEc793cOqVmnO+m72JR0bWIWMJJ0IRjShfPp2bK/AreRTu5FZUlyH1Iz1UxSjeJnpP/33UiJeQbwLuki8w+zTbcEnpI0GxgDnBARrxQ5xI+Aj0nXRh4DbgCuaW3hrXIpwgMGWeWT9DfghogYmXcslULSD4ADImLHvGOx2uQahFW8rIllc+DmJa1byyT1lrStpG6SvkSqAdyZd1xWu/y0pVU0SaNIt5GesNgdSV3R0qRnMwaSblG9iXTrsFlZuInJzMyKchOTmZkVVVNNTKuvvnoMGDAg7zDMzKrGhAkTZkVEr2LLaipBDBgwgIaGhrzDMDOrGpJebW6Zm5jMzKwoJwgzMyvKCcLMzIpygjAzs6KcIMzMrCgnCDOzKjV6NAwYAN26pffRHdFXcYGaus3VzKyrGD0ahg2DOdlQTK++mqYBhg7tmGO4BmFmVoVOOy0lh415jm15DEjTp53WccdwgjAzq0KvvZbez+QX3Mm+LMPcz8zvCE4QZmZVqF8/2IgX+Dp/5iJOYB7L/nd+R/E1CDOzKnTGGdDtkLOZvbAnl2WDAfbokeZ3FCcIM7MqNHTbqTTGjVyzwvG8P3tV+vdLyaGjLlCDE4SZWXU6/3y61XXjyJd+zJF9y3MIX4MwM6s2s2bByJGputC3TNkBJwgzs+pzySXwySdw8sllPYwThJlZNfn4Y/j972GffeArXynroZwgzMyqyciR8O678POfl/1QThBmZtXi00/hvPNg++1h663LfjjfxWRmVi1uvBGmTYMrruiUw7kGYWZWDRob4eyz4atfhT326JRDljVBSNpd0iRJkyWdUmT5UEnPZ69xkjYpWDZV0guSnpXUUM44zcwq3r33wsSJ6c4lqVMOWbYmJkl1wKXArsB0YLykMRHxUsFqrwA7RsR7kvYARgCDC5bvFBGzyhWjmVnVGD4c+veH73630w5ZzhrEIGByREyJiPnATcA+hStExLiIeC+bfBIo3xMfZmbV6rHH4PHH4Sc/gaWW6rTDljNB9AGmFUxPz+Y15wjgLwXTAdwvaYKkYc1tJGmYpAZJDTNnzmxXwGZmFWn4cFhtNTj88E49bDnvYirWSBZFV5R2IiWI7QpmbxsRMyStATwg6eWI+PvndhgxgtQ0RX19fdH9m5lVrRdfhHvugV//Gnr27NRDl7MGMR1Yp2C6LzBj8ZUkbQyMBPaJiHea5kfEjOz9beBOUpOVmVnXcs45qR/vY4/t9EOXM0GMB9aTNFDS0sABwJjCFST1A+4ADo6IfxXM7ylphabPwNeAF8sYq5lZ5XntNbjhBjjqqNTE1MnK1sQUEQskHQeMBeqAayJioqRjsuVXAL8CVgMuU7pta0FE1ANrAndm87oDN0TEfeWK1cysIp1/fnr/8Y9zObwiaqfZvr6+Phoa/MiEmdWAd95J44futx+MGlW2w0iakP0w/xw/SW1mVol+/3uYM6fsXXq3xAnCzKzSfPxxGvNh771hww1zC8MJwsys0lxzTWpi6oQuvVviBGFmVkk+/RTOPRe23Ta9cuTuvs3MKsnNN6fbWy+9NO9IXIMwM6sYEalbjQ03hD33zDsa1yDMzCrGn/+cutYYNQq65f/7Pf8IzMwsGT4c1lkHDjww70gA1yDMzCrDuHHw6KNw4YWd2qV3S1yDMDOrBMOHw6qrwpFH5h3JfzlBmJnl7aWXYMwY+NGPOr1L75Y4QZiZ5e2cc2C55eC44/KO5DOcIMzM8jRtGlx/fWpaWn31vKP5DCcIM7M8XXBBev7hJz/JO5LPcYIwM8vLu+/CiBHpttb+/fOO5nOcIMzM8nLppann1hy79G6JE4SZWR7mzIGLL4avfx2++tW8oynKCcLMLA/XXguzZuXepXdLnCDMzDrbggWpS++tt4bttss7mma5qw0zs852yy0wdSpcdBFIeUfTLNcgzMw6U1OX3htsAHvtlXc0LXINwsysM913Hzz/fLoGUQFdereksqMzM6s1w4dD377wve/lHckSOUGYmXWWJ5+ERx6BH/8Yll4672iWyAnCzKyzDB8Oq6wCRx2VdyQlcYIwM+sML78Md9+demxdfvm8oymJE4SZWWc45xxYdtk05kOVKGuCkLS7pEmSJks6pcjyoZKez17jJG1S6rZmZlVj+nT44x/h8MOhV6+8oylZ2RKEpDrgUmAPYAPgQEkbLLbaK8COEbEx8BtgRCu2NTOrDhdeCI2NFdmld0vKWYMYBEyOiCkRMR+4CdincIWIGBcR72WTTwJ9S93WzKwqvPceXHklfPe7MHBg3tG0SjkTRB9gWsH09Gxec44A/tLabSUNk9QgqWHmzJntCNfMrAwuvxxmz67YLr1bUs4EUayDkSi6orQTKUE0dWtY8rYRMSIi6iOivlcVte2ZWRfwySepeWmPPWCTTZa8foUpZ1cb04F1Cqb7AjMWX0nSxsBIYI+IeKc125qZVbTrroOZMyu6S++WlLMGMR5YT9JASUsDBwBjCleQ1A+4Azg4Iv7Vmm3NzCpaU5fegwfDDjvkHU2blK0GERELJB0HjAXqgGsiYqKkY7LlVwC/AlYDLlPq8nZB1lxUdNtyxWpm1uFuuw2mTIHzzqvoLr1booiiTftVqb6+PhoaGvIOw8y6ugjYfHOYOxcmTqzoXlslTYiI+mLL3N23mVlHe+ABePZZuPrqik4OS1K9kZuZVaqzzoI+fWDo0LwjaRcnCDOzjjR+PDz0EJx0EiyzTN7RtIsThJlZRxo+HFZeGYYNyzuSdnOCMDPrKJMmwR13wLHHwgor5B1NuzlBmJl1lHPPTc1Kxx+fdyQdwgnCzKwjzJgBf/gDHHYYrLFG3tF0CCcIM7OOcOGF6enpn/4070g6jBOEmVl7vf8+XHEF7L8/rLtu3tF0GCcIM7P2uuIK+OijquzSuyVOEGZm7TF3bmpe2m032GyzvKPpUE4QZmbtMWoUvPVW1Xbp3RInCDOztlq4EM45B7bcEoYMyTuaDufO+szM2ur22+E//4Gzz67aLr1b4hqEmVlbRKRuNdZfH/bZJ+9oysI1CDOztnjwQXjmGbjqKqiryzuasnANwsysLc46C3r3hoMPzjuSsnGCMDNrrQkTUg2iBrr0bokThJlZaw0fDiutBEcfnXckZeUEYWbWGv/+N9x2G/zwh7DiinlHU1ZOEGZmrXHuubD00nDCCXlHUnZOEGZmpXrjDbjuOjj0UFhzzbyjKTsnCDOzUl10Uc116d2SkhOEpOUkfamcwZiZVawPPoDLL4f99oMvfjHvaDpFSQlC0t7As8B92fSmksaUMzAzs4py5ZXw4Yc12Slfc0qtQZwODALeB4iIZ4EB5QnJzKzCzJ0LF1wAu+4Km2+edzSdptSuNhZExAeqwc6ozMyW6I9/hDffhOuvzzuSTlVqgnhR0veAOknrAccD48oXlplZhWjq0nuLLeB//ifvaDpVqU1MPwI2BOYBNwAfACcuaSNJu0uaJGmypFOKLP+ypCckzZP008WWTZX0gqRnJTWUGKeZWce68870cNwpp9Rkl94tWWINQlIdMCYidgFOK3XH2XaXArsC04HxksZExEsFq71Lqo18s5nd7BQRs0o9pplZh2rq0nu99WDfffOOptMtsQYREQuBOZJWauW+BwGTI2JKRMwHbgI+02l6RLwdEeOBT1u5bzOz8nvoIWhoSM891GiX3i0p9RrEXOAFSQ8AHzfNjIjjW9imDzCtYHo6MLgVsQVwv6QAroyIEcVWkjQMGAbQr1+/VuzezGwJzjoL1loLvv/9vCPJRakJ4t7s1RrFGuuiFdtvGxEzJK0BPCDp5Yj4++d2mBLHCID6+vrW7N/MrHnPPAMPPJCSxLLL5h1NLkpKEBExqg37ng6sUzDdF5hR6sYRMSN7f1vSnaQmq88lCDOzsjj77NRb6zHH5B1JbkpKEJJeociv/4hYt4XNxgPrSRoIvA4cAHyvxOP1BLpFxEfZ568B/1fKtmZm7faf/8Ctt8LPfpbGfeiiSm1iqi/4vCzwHWDVljaIiAWSjgPGAnXANRExUdIx2fIrJK0FNAArAo2STgQ2AFYH7swezOsO3BAR95VeLDOzdjj3XOjevUt06d0SRbSt2V7SYxGxXQfH0y719fXR0OBHJsysHd56C/r3TxemRxS9N6amSJoQEfXFlpXaxFTY+Ug3Uo1ihQ6Izcysslx0Ecyfn5qXurhSm5jOK/i8AJgK7N/h0ZiZ5enDD+Gyy+Db304Px3Vxpd7FtFO5AzEzy92IEWnchy7UpXdLSh0P4gRJKyoZKekZSV8rd3BmZp1m3jw4/3zYeWeoL9ok3+WU2lnf4RHxIel20zWAw4CzyhaVmVlnu/76NOa0aw//VWqCaHoqek/g2oh4juJPSpuZVZ+mLr032wx22SXvaCpGqRepJ0i6HxgInCppBaCxfGGZmXWiu++GSZPgppu6XJfeLSk1QRwBbApMiYg5klYlNTOZmVW3pi69v/CFdPeS/VepTUxbA5Mi4n1JBwG/JA0aZGZW3R55BJ5+OnXp3b3U38xdQ6kJ4nLSmBCbACcDrwJ/KFtUZmad5ayzYI014JBD8o6k4pSaIBZE6pNjH+CiiLgIP0ltZtXu2Wdh7Fg48URYbrm8o6k4pdanPpJ0KnAwsH02nOhS5QvLzKwTnH02rLAC/OAHeUdSkUqtQXwXmEd6HuJN0mhx55QtKjOzcpsyBW6+GY4+GlZeOe9oKlJJCSJLCrcDy2SzZgF3lisoM7OyO++8dFH6pJPyjqRildrVxlHAbcCV2aw+wF3lCsrMrKzefhuuuQYOPhjWXjvvaCpWqU1MxwLbAh8CRMS/SV1umJlVn4svTn0vuUvvFpWaIOZFxPymCUndKTIEqZlZxfvoI7j0Uth3X/jSl/KOpqKVmiAekfQLYDlJuwK3An8qX1hmZmVy1VXw/vvulK8EpSaInwMzgReAo4E/k56mNjOrHvPnpy69hwyBQYPyjqbiLfE5CEndgOcjYiPgqvKHZGZWJqNHw+uvw9VX5x1JVVhiDSIiGoHnJPXrhHjMzMqjsTE9GLfppvA1j3dWilKfpO4NTJT0NPBx08yI+EZZojIz62hjxsDLL8MNN7hL7xKVmiB+XdYozMzKqalL74ED4TvfyTuaqtFigpC0LHAM8EXSBeqrI2JBZwRmZtZhHn0Unnwy3d7qLr1LtqRrEKOAelJy2AM4r+wRmZl1tLPOgl694DCPc9YaS0qlG0TEVwEkXQ08Xf6QzMw60PPPw1/+Ar/9rbv0bqUl1SA+bfrgpiUzq0pnnw3LLw8//GHekVSdJSWITSR9mL0+AjZu+izpwyXtXNLukiZJmizplCLLvyzpCUnzJP20NduamS3R1Klw000wbBisskre0VSdFpuYIqKurTvOBhW6FNgVmA6MlzQmIl4qWO1d4Hjgm23Y1sysZeedB926uUvvNiq1q422GARMjogpWUd/N5GGLP2viHg7IsZT0JRV6rZmZi2aOTM9MX3QQdC3b97RVKVyJog+wLSC6enZvHJva2YGl1wCn3ziLr3boZwJotijiqV2EV7ytpKGSWqQ1DBz5sySgzOzGjZ7Nvz+9/DNb8JXvpJ3NFWrnAliOrBOwXRfYEZHbxsRIyKiPiLqe/Xq1aZAzazGjBwJ773nLr3bqZwJYjywnqSBkpYGDgDGdMK2ZtaVzZ+fLk7vsANstVXe0VS1sj1zHhELJB0HjAXqgGsiYqKkY7LlV0haC2gAVgQaJZ1Iejjvw2LblitWM6shN94I06fDiBF5R1L1FFE7I4fW19dHQ0ND3mGYWQ5Gj4Zf/qKRe177Kt2W6s4z1zzL0IPca+uSSJoQEfXFlrnXKjOreqNHp2fhvjfnajbkJYZ+ej13HS0QDB2ad3TVq5zXIMzMOsX//uJTfjvnJK5iGI+yHTfzXebMgdNOyzuy6uYEYWbVbfp0Rr02hJO4kIs4np15kIVZ48hrr+UbWrVzE5OZVa+//hUOPJBNNJf942ZuZf/PLO7ngZLbxTUIM6s+jY3wm9+ksaXXXJOHho/n3h6fTQ49esAZZ+QUX41wgjCz6vLOO7DXXvCrX8H3vgdPPcXeP/syI0ZA//5puOn+/dNdrr5A3T5uYjKz6jF+POy3H7z5Jlx+ORx9dMoIpGTghNCxXIMws8oXkRLCdtulhPDYY3DMMf9NDlYeThBmVtk+/hgOPjiNCLfzzjBhAmy5Zd5RdQlOEGZWuV5+GQYNSt1n/Pa3cM89sNpqeUfVZfgahJlVpptvhiOOSLcj3X9/qj1Yp3INwswqy/z5cPzxcMABsMkm8I9/ODnkxAnCzCrHtGmpm+5LLknjSD/8MPTxYJJ5cROTmVWG++9PzzXMnw+33ppuZ7VcuQZhZvlqbIRf/xp23x1694aGBieHCuEahJnlZ9YsOOggGDs23cp6+eXQs2feUVnGCcLM8vHUU/Cd78Bbb8GVV8JRR/nBtwrjJiYz61wR6SL09ttDXR2MG5dG+3FyqDhOEGbWeWbPTheijz8edtsNnnkGttgi76isGU4QZtY5XnopdZFxyy1w5plw992wyip5R2Ut8DUIMyu/G29M1xh69kyD/Oy0U94RWQlcgzCz8pk3D449NjUrbbZZeirayaFqOEGYWXm8+mp6Kvqyy+CnP4W//Q3WXjvvqKwV3MRkZh3vvvvS6D0LFsAdd8C+++YdkbWBaxBm1nEWLkxDge65J/Ttm56KdnKoWq5BmFnHmDkz1RoeeAAOOwwuvRSWWy7vqKwdnCDMrP2eeCI9FT1rFowcmcZxsKrnJiYza7sIuOiidDF6mWVSonByqBllTRCSdpc0SdJkSacUWS5JF2fLn5e0ecGyqZJekPSspIZyxmlmbfDRR2lQnxNPTNccJkxIt7JazShbE5OkOuBSYFdgOjBe0piIeKlgtT2A9bLXYODy7L3JThExq1wxmlkbTZwI3/42TJ4Mw4fDz37mvpRqUDlrEIOAyRExJSLmAzcB+yy2zj7AHyJ5ElhZUu8yxmRm7XX99TBoEHzwATz4IJx8spNDjSpngugDTCuYnp7NK3WdAO6XNEHSsOYOImmYpAZJDTNnzuyAsM2sqLlz4Qc/SOM21NenjvZ23DHvqKyMypkgiv2kiFass21EbE5qhjpW0g7FDhIRIyKiPiLqe/Xq1fZozax5U6fCdtvBFVekGsODD6bR36ymlfM21+nAOgXTfYEZpa4TEU3vb0u6k9Rk9feyRWtmxd17b6o1NDbCXXfBPou3FFutKmcNYjywnqSBkpYGDgDGLLbOGOD72d1MWwEfRMQbknpKWgFAUk/ga8CLZYzVzBa3cCH88pew117Qv3+6S8nJoUspWw0iIhZIOg4YC9QB10TEREnHZMuvAP4M7AlMBuYAh2WbrwncqXThqztwQ0TcV65YzWwxb78NBx6YOtg78ki4+GI/Fd0FKWLxywLVq76+Phoa/MiEWbs8/jjsvz+8+y5cfjkcemjeEVkZSZoQEfXFlvlJajNLIuCCC2DIEOjRA5580smhi3NfTGYGH34Ihx8Ot9+eel+99lpYaaW8o7KcuQZh1tW98EJ6ruGuu+Dcc1OScHIwXIMw69pGjUoPv628Mjz0EGy/fd4RWQVxDcKsK5o7F4YNS9cYBg9OT0U7OdhinCDMuppXXoFtt4WrroJTT00D/Ky1Vt5RWQVygjCrQaNHw4AB0K1beh89Olvwpz/B5pvDlCnp85lnQne3NFtxThBmNWb06NR69Oqr6c7VV1+FHxy1gInfOBW+8Q1Yd93UpLTXXnmHahXOCcKsxpx2GsyZs2h6Td7k7k92ZcM/nZUyx+OPw8CB+QVoVcMJwqzGvPbaos/b8SjPsDmDeYpDGAVXXgnLLptfcFZV3PhoVkvefJOjVn+CL84cxzaMYzBPMYV12Y2xfNT/q3lHZ1XGCcKsWi1YAC++COPGLXq98gpXAvNYmgbqOZuTOYtTWNhjRUackXfAVm2cIMyqxXvvpf6RmpLB00/D7NlpWe/esM02cNxxsM023PHyZpx6+jK89hr06wdnnAFDh+YbvlUfJwizStTYCP/616Jk8MQT8NJLaVldHWyySXrIbZtt0qtfv8+MC33gVnDgoblEbjXECcKsEsyeDePHL0oITz6ZutsGWHVV2HrrVAXYZhvYckvo2TPfeK1LcIIw62xNDycUXjt4/vk0ghvABhvAt76VksHWW8P666cn3sw6mROEWbnNm5ceTCtsLnrjjbRs+eVTX0i/+EVKBlttBauskm+8ZhknCLOO9sYbKQk0JYOGBpg/Py1bd13YeedFtYONNnJXF1ax/C/TrD0WLEjjKTQlg+xWUwCWWSaNs3DCCSkZbL21O8WzquIEYdYa77676FbTJ56Ap56Cjz9Oy3r3Tr2kZreastlmKUmYVSknCLPmNDbCpEmfrR38859pWV0dbLopHHZYs7eamlU7JwizJrNnp4fPCq8fvPdeWtZ0q+lBB/lWU+synCCsa4qAqVMXJYNx4+C551KtAWDDDeHb315UO1h/fdcOrMtxgrDat2ABd1z2JjecOZUBbz3Fzj3GMWSpcSz3wZtpedOtpqedlpLB4MG+1dQMJwirZhHwwQfw+uuLXjNmfHb69deJt97iWxF8K9vsP3PW5a66XfjCodsw6MRt0q2mdXW5FsWsEjlBWGWaPz89T1DkhP+ZRFA4Mk6T1VaDPn3Sa9NNufi2Pkz8oA/T6csEtuBt1oSF0P8hmHpt5xfNrFo4QVjnikgXfps74Te93n7789suswysvXY68W++Oey996JE0DR/7bU/NyDOSddAFAmlcGAdM/s8J4guaPTo1Nze4V1Bz5u36ETf0i//uXM/v22vXotO9vX1iz4XvlZdtU0Xivv1S10fFZtvZs0ra4KQtDtwEVAHjIyIsxZbrmz5nsAc4NCIeKaUbTtK2U6WFappQPumlplXX03T0EK5I2DWrOIn/cJ5s2Z9fttll110gh88+LMn/KZf/b17l/WBsjPO+GyZAXr0SPPNrHllSxCS6oBLgV2B6cB4SWMi4qWC1fYA1n6bquYAAAhESURBVMteg4HLgcElbttuTSfLLeb8nf4Eja9249ojxOr/6sZue3RLPWhK6b1bG6fbuo+mVwdbfED7ZfmEtebM4O6fvM7Qbs388p8xY1FfQk0kWGONdILv1y91MlfsV//KK+d+e2hT4utKPwTMOoIiirXOdsCOpa2B0yNit2z6VICI+F3BOlcCD0fEjdn0JGAIMGBJ2xZTX18fDQ0NJcc4YED6Bf0xPejBJyVv12maEkd7k1TB9KTJ3QjSCbsXM1mNdz9/3B49ip/sF//Vv9RSnfyFmFlHkzQhIuqLLStnE1MfYFrB9HRSLWFJ6/QpcVsAJA0DhgH0a2WjctNFyt25jzoW0o3G7BWM/UtjemiqsTE1sTR24HQ59lni9KQZjXwypxERzGJ1XqcPr9OHBWv04fqHsiSw4oq5/+o3s/yVM0EUO8MsXl1pbp1Stk0zI0YAIyDVIFoTYNPFy0fZ4TPz+/cHdm/NnqrHR6OLt8ePOB/YILewzKwClXOYqunAOgXTfYEZJa5TyrbtdsYZ6eRYqNYvXg4dCiNGpCQopfcRI9web2afV84EMR5YT9JASUsDBwBjFltnDPB9JVsBH0TEGyVu225d9WQ5dGjqhqixMb3XennNrG3K1sQUEQskHQeMJd2qek1ETJR0TLb8CuDPpFtcJ5Nucz2spW3LEefQoT5BmpkVU7a7mPLQ2ruYzMy6upbuYipnE5OZmVUxJwgzMyvKCcLMzIpygjAzs6Jq6iK1pJlAkX47S7I6UKS3uZrmMte+rlZecJlbq39E9Cq2oKYSRHtIamjuSn6tcplrX1crL7jMHclNTGZmVpQThJmZFeUEsciIvAPIgctc+7paecFl7jC+BmFmZkW5BmFmZkU5QZiZWVFdPkFI2l3SJEmTJZ2SdzzlIGkdSQ9J+qekiZJOyOavKukBSf/O3lfJO9aOJqlO0j8k3ZNN13SZJa0s6TZJL2d/7627QJlPyv5dvyjpRknL1lqZJV0j6W1JLxbMa7aMkk7NzmmTJO3W1uN26QQhqQ64FNiDNJ7agZJqcVy1BcBPIuIrwFbAsVk5TwEejIj1gAez6VpzAvDPgulaL/NFwH0R8WVgE1LZa7bMkvoAxwP1EbERaXiAA6i9Ml/H58e5LFrG7P/2AcCG2TaXZee6VuvSCQIYBEyOiCkRMR+4Cdgn55g6XES8ERHPZJ8/Ip00+pDKOipbbRTwzXwiLA9JfYGvAyMLZtdsmSWtCOwAXA0QEfMj4n1quMyZ7sBykroDPUijT9ZUmSPi78C7i81uroz7ADdFxLyIeIU03s6gthy3qyeIPsC0gunp2byaJWkAsBnwFLBmNoIf2fsa+UVWFhcCJwONBfNquczrAjOBa7NmtZGSelLDZY6I14FzgdeAN0ijUt5PDZe5QHNl7LDzWldPECoyr2bv+5W0PHA7cGJEfJh3POUkaS/g7YiYkHcsnag7sDlweURsBnxM9TettChrd98HGAisDfSUdFC+UeWuw85rXT1BTAfWKZjuS6qe1hxJS5GSw+iIuCOb/Zak3tny3sDbecVXBtsC35A0ldR0+D+Srqe2yzwdmB4RT2XTt5ESRi2XeRfglYiYGRGfAncA21DbZW7SXBk77LzW1RPEeGA9SQMlLU26sDMm55g6nCSR2qX/GRHnFywaAxySfT4EuLuzYyuXiDg1IvpGxADS3/VvEXEQtV3mN4Fpkr6UzdoZeIkaLjOpaWkrST2yf+c7k66x1XKZmzRXxjHAAZKWkTQQWA94uk1HiIgu/QL2BP4F/Ac4Le94ylTG7UhVzOeBZ7PXnsBqpLsf/p29r5p3rGUq/xDgnuxzTZcZ2BRoyP7WdwGrdIEy/xp4GXgR+COwTK2VGbiRdI3lU1IN4YiWygiclp3TJgF7tPW47mrDzMyK6upNTGZm1gwnCDMzK8oJwszMinKCMDOzopwgzMysqO55B2DWGSQ13RIIsBawkNQtBcCgSH1xVQRJQ4D5ETEu71isa3OCsC4hIt4hPSOApNOB2RFxbl7xSOoeEQuaWTwEmA2UnCAk1UXEwo6IzayJm5isy5K0haRHJE2QNLag24KHJV0g6e/ZmApbSroj63f/t9k6A7IxF0ZJej4bg6FHCfs9U9IjwAmS9pb0VNax3l8lrZl1pngMcJKkZyVtL+k6SfsVxD07ex+iNM7HDcALSmNfnCNpfBbT0Z35fVrtcYKwrkrAJcB+EbEFcA1wRsHy+RGxA3AFqQuDY4GNgEOz5iqALwEjImJj4EPgh1mfVy3td+WI2DEizgMeA7aK1LHeTcDJETE1O+YFEbFpRDy6hHIMIvUAsAHp6doPImJLYEvgqKyrBbM2cROTdVXLkE74D6QufKgjdWXQpKlPrheAiZF1qyxpCqkjtPeBaRHxeLbe9aSBa+5bwn5vLvjcF7g5q2EsDbzShnI8HanPf4CvARsX1DZWIvXD05b9mjlBWJcl0ol/62aWz8veGws+N003/b9ZvJ+aKGG/Hxd8vgQ4PyLGZBemT29mmwVktf2sQ7qlm9mfgB9FxNhm9mPWKm5isq5qHtBL0taQukOXtGEr99GvaXvgQFKT0aRW7Hcl4PXs8yEF8z8CViiYngpskX3eB1iqmf2NBX6QNXMhaf1swCCzNnGCsK6qEdgPGC7pOVIPt9u0ch//BA6R9DywKmmgnvmt2O/pwK2SHgVmFcz/E7Bv00Vq4CpgR0lPA4P5bK2h0EhS997PKA1ufyVuJbB2cG+uZm2Q3W10T0RslHMoZmXjGoSZmRXlGoSZmRXlGoSZmRXlBGFmZkU5QZiZWVFOEGZmVpQThJmZFfX/AUBPu4zpZN8GAAAAAElFTkSuQmCC\n", 259 | "text/plain": [ 260 | "
" 261 | ] 262 | }, 263 | "metadata": { 264 | "needs_background": "light" 265 | }, 266 | "output_type": "display_data" 267 | } 268 | ], 269 | "source": [ 270 | "# Visualising the Polynomial Regression results \n", 271 | "plt.scatter(X, y, color = 'blue') \n", 272 | " \n", 273 | "plt.plot(X, lin2.predict(poly.fit_transform(X)), color = 'red') \n", 274 | "plt.title('Polynomial Regression') \n", 275 | "plt.xlabel('Temperature') \n", 276 | "plt.ylabel('Pressure') \n", 277 | " \n", 278 | "plt.show() " 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "### Prediction\n", 286 | "- Predicting new result with both Linear and Polynomial Regression" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 13, 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "data": { 296 | "text/plain": [ 297 | "array([0.20675333])" 298 | ] 299 | }, 300 | "execution_count": 13, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": [ 306 | "# Predicting a new result with Linear Regression \n", 307 | "lin.predict([[110.0]])" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 14, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/plain": [ 318 | "array([0.43295877])" 319 | ] 320 | }, 321 | "execution_count": 14, 322 | "metadata": {}, 323 | "output_type": "execute_result" 324 | } 325 | ], 326 | "source": [ 327 | "# Predicting a new result with Polynomial Regression \n", 328 | "lin2.predict(poly.fit_transform([[110.0]]))" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "- [Reference](https://www.geeksforgeeks.org/python-implementation-of-polynomial-regression/)\n", 336 | "\n", 337 | "### More examples\n", 338 | "\n", 339 | "- [link1](https://www.geeksforgeeks.org/polynomial-regression-for-non-linear-data-ml/?ref=rp)\n", 340 | "- [link2](https://towardsdatascience.com/machine-learning-polynomial-regression-with-python-5328e4e8a386)\n", 341 | "- [link3](https://medium.com/kharpann/performing-polynomial-regression-using-python-840eb666bfd8)\n", 342 | "- [link4](https://towardsdatascience.com/introduction-to-linear-regression-and-polynomial-regression-f8adc96f31cb)\n", 343 | "- [link5](https://github.com/rickwierenga/MLFundamentals/blob/master/1_Polynomial_Regression.ipynb)" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [] 352 | } 353 | ], 354 | "metadata": { 355 | "kernelspec": { 356 | "display_name": "Python 3", 357 | "language": "python", 358 | "name": "python3" 359 | }, 360 | "language_info": { 361 | "codemirror_mode": { 362 | "name": "ipython", 363 | "version": 3 364 | }, 365 | "file_extension": ".py", 366 | "mimetype": "text/x-python", 367 | "name": "python", 368 | "nbconvert_exporter": "python", 369 | "pygments_lexer": "ipython3", 370 | "version": "3.7.4" 371 | } 372 | }, 373 | "nbformat": 4, 374 | "nbformat_minor": 2 375 | } 376 | -------------------------------------------------------------------------------- /Python Cheat Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/Python Cheat Sheet.pdf -------------------------------------------------------------------------------- /Python Functions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Functions\n", 8 | "\n", 9 | "- A function is a block of reusable code that is used to perform a specific action\n", 10 | "- It helps in eliminating the need to write the same code over and over again\n", 11 | "- Built-in functions | User defined functions\n", 12 | "#### General syntax:\n", 13 | " - def function_name(parameter list/argument):\n", 14 | " - statements, i.e. the function body\n", 15 | " \n", 16 | "- Refer this link for details of built in functions. https://www.w3schools.com/python/python_ref_functions.asp" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "Welcome to Small Group Coaching Session\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "def my_function():\n", 34 | " print(\"Welcome to Small Group Coaching Session\")\n", 35 | "\n", 36 | "my_function()" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "Hello Learners\n", 49 | "Hello Everyone\n" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "# Passing Arguments\n", 55 | "def my_function(name):\n", 56 | " print(\"Hello \" + name)\n", 57 | "\n", 58 | "my_function(\"Learners\")\n", 59 | "my_function(\"Everyone\")\n", 60 | "\n", 61 | "# Always remember to pass all the arguments that was defined in the function while calling it" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 3, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/plain": [ 72 | "15" 73 | ] 74 | }, 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "# Return values\n", 82 | "\n", 83 | "def add_10(x):\n", 84 | " return 10 + x\n", 85 | "\n", 86 | "add_10(5)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 4, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "Welcome Harry\n", 99 | "Welcome Rachel\n", 100 | "Welcome Linda\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "# Arbitary arguments\n", 106 | "# when you do not know how many numbers of arguments are to be passed into a function\n", 107 | "\n", 108 | "def greet(*names):\n", 109 | " for name in names:\n", 110 | " print(\"Welcome\",name)\n", 111 | "\n", 112 | "greet(\"Harry\",\"Rachel\",\"Linda\") #argument is a tuple" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 5, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "name": "stdout", 122 | "output_type": "stream", 123 | "text": [ 124 | "Dog is a pet\n", 125 | "Cat is a pet\n" 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "# Default Arguments\n", 131 | "\n", 132 | "def default_arg(disease = \"Dog\"):\n", 133 | " print(disease + \" is a pet\")\n", 134 | "\n", 135 | "default_arg()\n", 136 | "default_arg(\"Cat\")" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "#### Anonymous functions - Lambda\n", 144 | "\n", 145 | "- Functions that are not declared i.e. without the def keyword are called anonymous functions\n", 146 | "- The keyword for anonymous function is Lamba.\n", 147 | "- Lambda can take any number of arguments and returns one value.\n", 148 | "- It can be directly printed\n", 149 | "\n", 150 | "##### Syntax of lambda Functions\n", 151 | "- lambda [argument1, argument2, argument3....]:expression" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 6, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | "9\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "#Anonymous functions - lambda\n", 169 | "\n", 170 | "square = lambda x: x*x\n", 171 | "print(square(3))" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 7, 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "data": { 181 | "text/plain": [ 182 | "[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]" 183 | ] 184 | }, 185 | "execution_count": 7, 186 | "metadata": {}, 187 | "output_type": "execute_result" 188 | } 189 | ], 190 | "source": [ 191 | "squares = list(map(lambda x: x**2, range(10)))\n", 192 | "squares" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 8, 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "[16, 4, 4, 9]" 204 | ] 205 | }, 206 | "execution_count": 8, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [ 212 | "vec = [4, 2, -2, 3]\n", 213 | "squares_1 = list(map(lambda x: x**2, vec))\n", 214 | "squares_1 " 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "#### Recursive functions\n", 222 | "\n", 223 | "- A python function can call other functions. It can even call itself. These type of construct are called as recursive functions in python\n", 224 | "- Example-1: Finding the factorial of an integer\n", 225 | "- Example-2: Finding the fibonacci series\n", 226 | " - Factorial of a number is the product of all the integers from 1 to that number.Say factorial of 4 is 4! = 1*2*3*4 = 24\n", 227 | " - The Fibonacci series is the series of numbers starting with 0. The subsequent number is found by adding up the two numbers before it. 0, 1, 1, 2, 3, 5, 8, 13, 21, 34, ..." 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 9, 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "name": "stdout", 237 | "output_type": "stream", 238 | "text": [ 239 | "Factorial of 4 is 24\n" 240 | ] 241 | } 242 | ], 243 | "source": [ 244 | "# Factorial\n", 245 | "def find_factorial(x):\n", 246 | " if x == 1:\n", 247 | " return 1\n", 248 | " else:\n", 249 | " return (x * find_factorial(x-1))\n", 250 | "\n", 251 | "\n", 252 | "a = 4\n", 253 | "print(\"Factorial of \", a, \"is\", find_factorial(a))" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 10, 259 | "metadata": {}, 260 | "outputs": [ 261 | { 262 | "name": "stdout", 263 | "output_type": "stream", 264 | "text": [ 265 | "Recursion Results\n", 266 | "1\n", 267 | "3\n", 268 | "6\n", 269 | "10\n", 270 | "15\n" 271 | ] 272 | }, 273 | { 274 | "data": { 275 | "text/plain": [ 276 | "15" 277 | ] 278 | }, 279 | "execution_count": 10, 280 | "metadata": {}, 281 | "output_type": "execute_result" 282 | } 283 | ], 284 | "source": [ 285 | "# Fibonacci series\n", 286 | "def recur(k):\n", 287 | " if(k > 0):\n", 288 | " result = k + recur(k-1)\n", 289 | " print(result)\n", 290 | " else:\n", 291 | " result = 0\n", 292 | " return result\n", 293 | "\n", 294 | "print(\"Recursion Results\")\n", 295 | "recur(5)" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "##### Note:\n", 303 | "- If the base case of recursion is not defined properly then the code would run indefinitely\n", 304 | "- Debugging is difficult as the function calls iteself in a loop\n", 305 | "- Reference: https://beginnersbook.com/2018/02/python-recursion/" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 11, 311 | "metadata": {}, 312 | "outputs": [ 313 | { 314 | "name": "stdout", 315 | "output_type": "stream", 316 | "text": [ 317 | "please enter value for key no 0 :1234567890\n", 318 | "please enter value for key no 1 :23\n", 319 | "please enter value for key no 2 :2\n", 320 | "please enter value for key no 3 :234\n", 321 | "please enter value for key no 4 :1234\n", 322 | "please enter value for key no 5 :1236\n", 323 | "please enter value for key no 6 :677\n", 324 | "please enter value for key no 7 :78\n", 325 | "please enter value for key no 8 :89\n", 326 | "please enter value for key no 9 :67\n", 327 | "{'key no 0': '1234567890', 'key no 1': '23', 'key no 2': '2', 'key no 3': '234', 'key no 4': '1234', 'key no 5': '1236', 'key no 6': '677', 'key no 7': '78', 'key no 8': '89', 'key no 9': '67'}\n" 328 | ] 329 | } 330 | ], 331 | "source": [ 332 | "# Extras: Restricting the value entry in a dictionary\n", 333 | "# Challenge: Find the bug with this code and post in comments\n", 334 | "\n", 335 | "dict1={}\n", 336 | "key='key no '\n", 337 | "i=0\n", 338 | "while i<10:\n", 339 | " try:\n", 340 | " value=input('please enter value for '+key+str(i)+' :')\n", 341 | " if len(value)<=10 and value.isnumeric()==True:\n", 342 | " dict1[key+str(i)]=value\n", 343 | " i+=1\n", 344 | " except:\n", 345 | " print('wrong value try again')\n", 346 | " i-=1\n", 347 | "print(dict1)" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 12, 353 | "metadata": {}, 354 | "outputs": [ 355 | { 356 | "name": "stdout", 357 | "output_type": "stream", 358 | "text": [ 359 | "13\n" 360 | ] 361 | } 362 | ], 363 | "source": [ 364 | "# Extras: function to print the nth value of fibonacci series\n", 365 | "def Fibonacci(n):\n", 366 | " if n<0:\n", 367 | " print(\"Incorrect input\")\n", 368 | " elif n==1:\n", 369 | " return 0\n", 370 | " elif n==2:\n", 371 | " return 1\n", 372 | " else:\n", 373 | " return Fibonacci(n-1)+Fibonacci(n-2) \n", 374 | "\n", 375 | "print(Fibonacci(8))" 376 | ] 377 | } 378 | ], 379 | "metadata": { 380 | "kernelspec": { 381 | "display_name": "Python 3", 382 | "language": "python", 383 | "name": "python3" 384 | }, 385 | "language_info": { 386 | "codemirror_mode": { 387 | "name": "ipython", 388 | "version": 3 389 | }, 390 | "file_extension": ".py", 391 | "mimetype": "text/x-python", 392 | "name": "python", 393 | "nbconvert_exporter": "python", 394 | "pygments_lexer": "ipython3", 395 | "version": "3.7.4" 396 | } 397 | }, 398 | "nbformat": 4, 399 | "nbformat_minor": 2 400 | } 401 | -------------------------------------------------------------------------------- /Python Loops.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python Loops\n", 8 | "\n", 9 | "### While Loop\n", 10 | " - While loop is used to execute a set of statements repeatedly until a given a condition is satisfied. When the condition becomes false, the code immediately after the loop is executed" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# Simple program to illustrate while loop\n", 20 | "\n", 21 | "i = 0\n", 22 | "while (i < 4): \n", 23 | " i = i + 1\n", 24 | " print(\"Good Luck Learning Python Loops!\")" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "# Print i as long as i is less than 5:\n", 34 | "\n", 35 | "i = 1\n", 36 | "while i < 5:\n", 37 | " print(i)\n", 38 | " i += 1" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "### While + Break\n", 46 | " - Including break statement in while loop can stop the loop even if the while condition is true" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "# Exit the loop when i is 4:\n", 56 | "\n", 57 | "i = 1\n", 58 | "while i < 5:\n", 59 | " print(i)\n", 60 | " if i == 4:\n", 61 | " break\n", 62 | " #i += 1, try this code by removing the # to learn the importance of break statement\n", 63 | " # and falling into a trap of never ending loop \n", 64 | " i += 1" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "### While + Continue\n", 72 | "- Including continue statement with while loop can stop the current iteration and continue with the next" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 4, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "2\n", 85 | "3\n", 86 | "5\n" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "# Continue to the next iteration if i is 4:\n", 92 | "i = 1\n", 93 | "while i < 5:\n", 94 | " i += 1\n", 95 | " if i == 4:\n", 96 | " continue\n", 97 | " print(i)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "### While + Else\n", 105 | "- The else clause is executed only when your while condition becomes false." 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 5, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "Good Luck Learning Python Loops!\n", 118 | "Good Luck Learning Python Loops!\n", 119 | "Good Luck Learning Python Loops!\n", 120 | "Good Luck Learning Python Loops!\n", 121 | "Good Luck Learning Python Loops!\n", 122 | "Keep it up!\n" 123 | ] 124 | } 125 | ], 126 | "source": [ 127 | "i = 0\n", 128 | "while (i < 5): \n", 129 | " i = i + 1\n", 130 | " print(\"Good Luck Learning Python Loops!\") \n", 131 | "else: \n", 132 | " print(\"Keep it up!\")" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 6, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "name": "stdout", 142 | "output_type": "stream", 143 | "text": [ 144 | "1\n", 145 | "2\n", 146 | "3\n", 147 | "4\n", 148 | "i is not less than 5\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "# Print a message once the condition is false:\n", 154 | "i = 1\n", 155 | "while i < 5:\n", 156 | " print(i)\n", 157 | " i += 1\n", 158 | "else:\n", 159 | " print(\"i is not less than 5\")" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "### For\n", 167 | "- For loop is used for iterating over a sequence (i.e. either a list, a tuple, a dictionary, a set, or a string).With for loop we can execute a set of statements, once for each item in a list, tuple, set etc.\n", 168 | "- For loop does not require an indexing variable to set beforehand" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 7, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "List Iteration:\n", 181 | "1. Good\n", 182 | "2. Better\n", 183 | "3. Best\n" 184 | ] 185 | } 186 | ], 187 | "source": [ 188 | "# List Iteration\n", 189 | "print(\"List Iteration:\") \n", 190 | "a = [\"1. Good\", \"2. Better\", \"3. Best\"] \n", 191 | "for i in a: \n", 192 | " print(i)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 8, 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "name": "stdout", 202 | "output_type": "stream", 203 | "text": [ 204 | "Tuple Iteration:\n", 205 | "1. Good\n", 206 | "2. Better\n", 207 | "3. Best\n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "# Tuple Iteration\n", 213 | "print(\"Tuple Iteration:\") \n", 214 | "a = (\"1. Good\", \"2. Better\", \"3. Best\")\n", 215 | "for i in a:\n", 216 | " print(i)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 9, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "name": "stdout", 226 | "output_type": "stream", 227 | "text": [ 228 | "String Iteration:\n", 229 | "G\n", 230 | "o\n", 231 | "o\n", 232 | "d\n" 233 | ] 234 | } 235 | ], 236 | "source": [ 237 | "# String Iteration\n", 238 | "print(\"String Iteration:\") \n", 239 | "a = \"Good\"\n", 240 | "for i in a:\n", 241 | " print(i)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 10, 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "name": "stdout", 251 | "output_type": "stream", 252 | "text": [ 253 | "Dictionary Iteration:\n", 254 | "Good 1\n", 255 | "Better 2\n", 256 | "Best 3\n" 257 | ] 258 | } 259 | ], 260 | "source": [ 261 | "# Dictionary Iteration\n", 262 | "print(\"Dictionary Iteration:\") \n", 263 | "a = dict() \n", 264 | "a['Good'] = 1\n", 265 | "a['Better'] = 2\n", 266 | "a['Best'] = 3\n", 267 | "for i in a : \n", 268 | " print(\"%s %d\" %(i, a[i]))\n", 269 | "# Refer think link to know more about %s & %d\n", 270 | "# https://stackoverflow.com/questions/4288973/whats-the-difference-between-s-and-d-in-python-string-formatting" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "### For + Break\n", 278 | "- Including break statement in for loop can stop the loop before it has looped through all the items" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 11, 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "name": "stdout", 288 | "output_type": "stream", 289 | "text": [ 290 | "yellow\n", 291 | "orange\n" 292 | ] 293 | } 294 | ], 295 | "source": [ 296 | "# Exit the loop when x is \"orange\"\n", 297 | "color = [\"yellow\", \"orange\", \"red\"]\n", 298 | "for x in color:\n", 299 | " print(x)\n", 300 | " if x == \"orange\":\n", 301 | " break" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "### For + Continue\n", 309 | "- Including continue statement in for loop can stop the current iteration of the loop, and continue with the next" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 12, 315 | "metadata": {}, 316 | "outputs": [ 317 | { 318 | "name": "stdout", 319 | "output_type": "stream", 320 | "text": [ 321 | "yellow\n", 322 | "red\n" 323 | ] 324 | } 325 | ], 326 | "source": [ 327 | "# Do not print orange\n", 328 | "color = [\"yellow\", \"orange\", \"red\"]\n", 329 | "for x in color:\n", 330 | " if x == \"orange\":\n", 331 | " continue\n", 332 | " print(x)" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "### For + Range\n", 340 | "- range() function can be used in for loop to loop through a set of code a specified number of times\n", 341 | "- range() function defaults to 0 as a starting value\n", 342 | "- However it is possible to specify the starting value by adding a parameter\n", 343 | "- range(1, 5) which means values from 1 to 5 but not including 5\n", 344 | "- range() function defaults to increment the sequence by 1\n", 345 | "- However it is possible to specify the increment value by adding a third parameter - Ex: range(2, 20, 2)" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 13, 351 | "metadata": {}, 352 | "outputs": [ 353 | { 354 | "name": "stdout", 355 | "output_type": "stream", 356 | "text": [ 357 | "0\n", 358 | "1\n", 359 | "2\n", 360 | "3\n", 361 | "4\n" 362 | ] 363 | } 364 | ], 365 | "source": [ 366 | "# print numbers within specified range\n", 367 | "for x in range(5):\n", 368 | " print(x)" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 14, 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "name": "stdout", 378 | "output_type": "stream", 379 | "text": [ 380 | "1\n", 381 | "2\n", 382 | "3\n", 383 | "4\n" 384 | ] 385 | } 386 | ], 387 | "source": [ 388 | "# specify the starting value\n", 389 | "for x in range(1, 5):\n", 390 | " print(x)" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 15, 396 | "metadata": {}, 397 | "outputs": [ 398 | { 399 | "name": "stdout", 400 | "output_type": "stream", 401 | "text": [ 402 | "2\n", 403 | "4\n", 404 | "6\n", 405 | "8\n", 406 | "10\n", 407 | "12\n", 408 | "14\n", 409 | "16\n", 410 | "18\n" 411 | ] 412 | } 413 | ], 414 | "source": [ 415 | "# specify the increment value\n", 416 | "for x in range(2, 20, 2):\n", 417 | " print(x)" 418 | ] 419 | }, 420 | { 421 | "cell_type": "markdown", 422 | "metadata": {}, 423 | "source": [ 424 | "### For + Else\n", 425 | "- Including else in a for loop specifies a set of code to be executed when the loop is finished" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": 16, 431 | "metadata": {}, 432 | "outputs": [ 433 | { 434 | "name": "stdout", 435 | "output_type": "stream", 436 | "text": [ 437 | "0\n", 438 | "1\n", 439 | "2\n", 440 | "3\n", 441 | "4\n", 442 | "Going good so far\n" 443 | ] 444 | } 445 | ], 446 | "source": [ 447 | "# Print all numbers from 0 to 4 and print a message when the loop has ended:\n", 448 | "for x in range(5):\n", 449 | " print(x)\n", 450 | "else:\n", 451 | " print(\"Going good so far\")" 452 | ] 453 | }, 454 | { 455 | "cell_type": "markdown", 456 | "metadata": {}, 457 | "source": [ 458 | "### Nested Loop\n", 459 | "- A loop inside a loop is called nested loop\n", 460 | "- The inner loop will be executed one time for each iteration of the outer loop" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": 17, 466 | "metadata": {}, 467 | "outputs": [ 468 | { 469 | "name": "stdout", 470 | "output_type": "stream", 471 | "text": [ 472 | "red sedan\n", 473 | "red hatchback\n", 474 | "red wagon\n", 475 | "blue sedan\n", 476 | "blue hatchback\n", 477 | "blue wagon\n", 478 | "white sedan\n", 479 | "white hatchback\n", 480 | "white wagon\n" 481 | ] 482 | } 483 | ], 484 | "source": [ 485 | "color = [\"red\", \"blue\", \"white\"]\n", 486 | "car = [\"sedan\", \"hatchback\", \"wagon\"]\n", 487 | "\n", 488 | "for x in color:\n", 489 | " for y in car:\n", 490 | " print(x, y)" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": 18, 496 | "metadata": {}, 497 | "outputs": [ 498 | { 499 | "name": "stdout", 500 | "output_type": "stream", 501 | "text": [ 502 | "1 \n", 503 | "2 2 \n", 504 | "3 3 3 \n", 505 | "4 4 4 4 \n" 506 | ] 507 | } 508 | ], 509 | "source": [ 510 | "# Nested For loop - Printing pattern using a range of numbers\n", 511 | "for i in range(1, 5): \n", 512 | " for j in range(i): \n", 513 | " print(i, end=' ') \n", 514 | " print()" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 19, 520 | "metadata": {}, 521 | "outputs": [ 522 | { 523 | "name": "stdout", 524 | "output_type": "stream", 525 | "text": [ 526 | "* \n", 527 | "* * \n", 528 | "* * * \n", 529 | "* * * * \n", 530 | "* * * * * \n", 531 | "* * * * \n", 532 | "* * * \n", 533 | "* * \n", 534 | "* \n" 535 | ] 536 | } 537 | ], 538 | "source": [ 539 | "# Nested For loop - Printing pattern\n", 540 | "str1=''\n", 541 | "for i in range(0,9):\n", 542 | " if i<5:\n", 543 | " str1 += '* '\n", 544 | " print(str1)\n", 545 | " elif i>4:\n", 546 | " str1 = str1[:-2]\n", 547 | " print(str1)" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": 20, 553 | "metadata": {}, 554 | "outputs": [ 555 | { 556 | "name": "stdout", 557 | "output_type": "stream", 558 | "text": [ 559 | "1 , 5\n", 560 | "2 , 6\n", 561 | "3 , 7\n" 562 | ] 563 | } 564 | ], 565 | "source": [ 566 | "# Nested while loop\n", 567 | "i = 1\n", 568 | "j = 5\n", 569 | "while i < 4:\n", 570 | " while j < 8:\n", 571 | " print(i, \",\", j)\n", 572 | " j = j + 1\n", 573 | " i = i + 1" 574 | ] 575 | }, 576 | { 577 | "cell_type": "markdown", 578 | "metadata": {}, 579 | "source": [ 580 | "#### Extras\n", 581 | "1. Iterating using index\n", 582 | " - Index of elements can be used to iterate. Findthe length of the list and then iterate within the range of length\n", 583 | "2. Pass statement to skip empty for loop \n", 584 | " - For cannot be empty without statement. However using a pass statement can help skip the for loop. Pass is also used for empty control statement, function etc." 585 | ] 586 | }, 587 | { 588 | "cell_type": "code", 589 | "execution_count": 21, 590 | "metadata": {}, 591 | "outputs": [ 592 | { 593 | "name": "stdout", 594 | "output_type": "stream", 595 | "text": [ 596 | "Have\n", 597 | "Fun\n", 598 | "Learning\n" 599 | ] 600 | } 601 | ], 602 | "source": [ 603 | "# 1. Iterating using index\n", 604 | " \n", 605 | "mylist = [\"Have\", \"Fun\", \"Learning\"] \n", 606 | "for index in range(len(mylist)): \n", 607 | " print(mylist[index])" 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "execution_count": 22, 613 | "metadata": {}, 614 | "outputs": [], 615 | "source": [ 616 | "# 2. Pass statement in for loop\n", 617 | "for x in [0, 2, 1]:\n", 618 | " pass" 619 | ] 620 | }, 621 | { 622 | "cell_type": "code", 623 | "execution_count": 23, 624 | "metadata": {}, 625 | "outputs": [ 626 | { 627 | "name": "stdout", 628 | "output_type": "stream", 629 | "text": [ 630 | "Last Letter : y\n" 631 | ] 632 | } 633 | ], 634 | "source": [ 635 | "for letter in 'have a great day': \n", 636 | " pass\n", 637 | "print('Last Letter :', letter)" 638 | ] 639 | } 640 | ], 641 | "metadata": { 642 | "kernelspec": { 643 | "display_name": "Python 3", 644 | "language": "python", 645 | "name": "python3" 646 | }, 647 | "language_info": { 648 | "codemirror_mode": { 649 | "name": "ipython", 650 | "version": 3 651 | }, 652 | "file_extension": ".py", 653 | "mimetype": "text/x-python", 654 | "name": "python", 655 | "nbconvert_exporter": "python", 656 | "pygments_lexer": "ipython3", 657 | "version": "3.7.4" 658 | } 659 | }, 660 | "nbformat": 4, 661 | "nbformat_minor": 2 662 | } 663 | -------------------------------------------------------------------------------- /Python-Patterns.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pattern programs in Python" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Traingle Pattern" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 18, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | " * \r\n", 27 | " * * \r\n", 28 | " * * * \r\n", 29 | " * * * * \r\n", 30 | " * * * * * \r\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "def trianglepattern(n): \n", 36 | " \n", 37 | " # number of spaces \n", 38 | " k = 2*n - 2\n", 39 | " \n", 40 | " # loop to handle number of rows \n", 41 | " for i in range(0, n): \n", 42 | " \n", 43 | " # loop to handle number spaces \n", 44 | " # change the value as per need\n", 45 | " for j in range(0, k): \n", 46 | " print(end=\" \") \n", 47 | " \n", 48 | " # decreasing k after each loop \n", 49 | " k = k - 1\n", 50 | " \n", 51 | " # loop to handle number of columns \n", 52 | " # change the value as per need\n", 53 | " for j in range(0, i+1): \n", 54 | " \n", 55 | " # printing stars \n", 56 | " print(\"* \", end=\"\") \n", 57 | " \n", 58 | " # ending line after each row \n", 59 | " print(\"\\r\") \n", 60 | "\n", 61 | " # Driver Code \n", 62 | "n = 5\n", 63 | "trianglepattern(n)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Inverted Traingle Pattern" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 22, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | " * * * * * * \r\n", 83 | " * * * * * \r\n", 84 | " * * * * \r\n", 85 | " * * * \r\n", 86 | " * * \r\n", 87 | " * \r\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "def pattern(n):\n", 93 | " k = 2*n -2\n", 94 | " for i in range(n,-1,-1):\n", 95 | " for j in range(k,0,-1):\n", 96 | " print(end=\" \")\n", 97 | " k = k +1\n", 98 | " for j in range(0, i+1):\n", 99 | " print(\"*\", end=\" \")\n", 100 | " print(\"\\r\")\n", 101 | "\n", 102 | "pattern(5)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "### Right Angled Triangle" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 57, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "* \r\n", 122 | "* * \r\n", 123 | "* * * \r\n", 124 | "* * * * \r\n", 125 | "* * * * * \r\n" 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "def patternRT(n):\n", 131 | " for i in range(0,n):\n", 132 | " for j in range(0, i+1):\n", 133 | " print(\"* \" , end=\"\")\n", 134 | " print(\"\\r\")\n", 135 | "patternRT(5)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "### Reverse Right Angled Triangle" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 30, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "name": "stdout", 152 | "output_type": "stream", 153 | "text": [ 154 | " * \r\n", 155 | " * * \r\n", 156 | " * * * \r\n", 157 | " * * * * \r\n", 158 | "* * * * * \r\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "def traingleL(n):\n", 164 | " k = 2*n - 2\n", 165 | " for i in range(0, n):\n", 166 | " for j in range(0, k):\n", 167 | " print(end=\" \")\n", 168 | " k = k - 2\n", 169 | " for j in range(0, i+1):\n", 170 | " print(\"* \", end=\"\")\n", 171 | " print(\"\\r\") \n", 172 | "traingleL(5)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "### 90 degree rotated triangle - Right" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 47, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "* \r\n", 192 | "* * \r\n", 193 | "* * * \r\n", 194 | "* * * * \r\n", 195 | "* * * * * \r\n", 196 | "* * * * \r\n", 197 | "* * * \r\n", 198 | "* * \r\n", 199 | "* \r\n", 200 | "\r\n" 201 | ] 202 | } 203 | ], 204 | "source": [ 205 | "def triangle90R(n):\n", 206 | " for i in range (0, n):\n", 207 | " for j in range(0, i + 1):\n", 208 | " print(\"* \", end='')\n", 209 | " print(\"\\r\")\n", 210 | " for i in range (n, 0, -1):\n", 211 | " for j in range(0, i -1):\n", 212 | " print(\"* \", end='')\n", 213 | " print(\"\\r\")\n", 214 | "triangle90R(5)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "### 90 degree rotated triangle - Left" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 49, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "name": "stdout", 231 | "output_type": "stream", 232 | "text": [ 233 | " * \r\n", 234 | " * * \r\n", 235 | " * * * \r\n", 236 | " * * * * \r\n", 237 | "* * * * * \r\n", 238 | " * * * * \r\n", 239 | " * * * \r\n", 240 | " * * \r\n", 241 | " * \r\n" 242 | ] 243 | } 244 | ], 245 | "source": [ 246 | "def triangle90L(n):\n", 247 | " k = 2 * n - 2\n", 248 | " for i in range(0, n-1):\n", 249 | " for j in range(0, k):\n", 250 | " print(end=\" \")\n", 251 | " k = k - 2\n", 252 | " for j in range(0, i + 1):\n", 253 | " print(\"* \", end=\"\")\n", 254 | " print(\"\\r\")\n", 255 | " k = -1\n", 256 | " for i in range(n-1,-1,-1):\n", 257 | " for j in range(k,-1,-1):\n", 258 | " print(end=\" \")\n", 259 | " k = k + 2\n", 260 | " for j in range(0, i + 1):\n", 261 | " print(\"* \", end=\"\")\n", 262 | " print(\"\\r\")\n", 263 | " \n", 264 | " \n", 265 | "triangle90L(5)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "### Hour Glass Pattern" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 55, 278 | "metadata": {}, 279 | "outputs": [ 280 | { 281 | "name": "stdout", 282 | "output_type": "stream", 283 | "text": [ 284 | "* * * * * * * * * * \r\n", 285 | " * * * * * * * * * \r\n", 286 | " * * * * * * * * \r\n", 287 | " * * * * * * * \r\n", 288 | " * * * * * * \r\n", 289 | " * * * * * \r\n", 290 | " * * * * \r\n", 291 | " * * * \r\n", 292 | " * * \r\n", 293 | " * \r\n", 294 | " * * \r\n", 295 | " * * * \r\n", 296 | " * * * * \r\n", 297 | " * * * * * \r\n", 298 | " * * * * * * \r\n", 299 | " * * * * * * * \r\n", 300 | " * * * * * * * * \r\n", 301 | " * * * * * * * * * \r\n", 302 | "* * * * * * * * * * \r\n" 303 | ] 304 | } 305 | ], 306 | "source": [ 307 | "def hourglass(n):\n", 308 | " for i in range (0,n):\n", 309 | " for j in range (0,i):\n", 310 | " print(\" \", end=\"\")\n", 311 | " for z in range (0,n-i):\n", 312 | " print(\"* \", end=\"\")\n", 313 | " print(\"\\r\")\n", 314 | " for l in range (0,n-1):\n", 315 | " for m in range (0,n-2-l):\n", 316 | " print(\" \", end=\"\")\n", 317 | " for a in range (0,l+2):\n", 318 | " print(\"* \", end=\"\")\n", 319 | " print(\"\\r\")\n", 320 | "hourglass(10)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "### Downward Right Angled Traingle" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 58, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "name": "stdout", 337 | "output_type": "stream", 338 | "text": [ 339 | "* * * * * * \r\n", 340 | "* * * * * \r\n", 341 | "* * * * \r\n", 342 | "* * * \r\n", 343 | "* * \r\n", 344 | "* \r\n" 345 | ] 346 | } 347 | ], 348 | "source": [ 349 | "def triangleD(n):\n", 350 | " for i in range(n, -1, -1):\n", 351 | " for j in range(0, i + 1):\n", 352 | " print(\"* \", end=\"\")\n", 353 | " print(\"\\r\")\n", 354 | "\n", 355 | "triangleD(5)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "### Diamond Pattern" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 60, 368 | "metadata": {}, 369 | "outputs": [ 370 | { 371 | "name": "stdout", 372 | "output_type": "stream", 373 | "text": [ 374 | " * \r\n", 375 | " * * \r\n", 376 | " * * * \r\n", 377 | " * * * * \r\n", 378 | " * * * * * \r\n", 379 | " * * * * * * \r\n", 380 | " * * * * * \r\n", 381 | " * * * * \r\n", 382 | " * * * \r\n", 383 | " * * \r\n", 384 | " * \r\n" 385 | ] 386 | } 387 | ], 388 | "source": [ 389 | "def diamond(n):\n", 390 | " k = 2 * n - 2\n", 391 | " for i in range(0, n):\n", 392 | " for j in range(0 , k):\n", 393 | " print(end=\" \")\n", 394 | " k = k - 1\n", 395 | " for j in range(0 , i + 1 ):\n", 396 | " print(\"* \", end=\"\")\n", 397 | " print(\"\\r\")\n", 398 | " k = n - 2\n", 399 | " for i in range(n , -1, -1):\n", 400 | " for j in range(k , 0 , -1):\n", 401 | " print(end=\" \")\n", 402 | " k = k + 1\n", 403 | " for j in range(0 , i + 1):\n", 404 | " print(\"* \", end=\"\")\n", 405 | " print(\"\\r\")\n", 406 | "\n", 407 | "diamond(5)" 408 | ] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "### Pattern - Pant" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 84, 420 | "metadata": {}, 421 | "outputs": [ 422 | { 423 | "name": "stdout", 424 | "output_type": "stream", 425 | "text": [ 426 | "**************\n", 427 | "******__******\n", 428 | "*****____*****\n", 429 | "****______****\n", 430 | "***________***\n", 431 | "**__________**\n", 432 | "*____________*\n" 433 | ] 434 | } 435 | ], 436 | "source": [ 437 | "rows = 14\n", 438 | "print(\"*\" * rows, end=\"\\n\")\n", 439 | "i = (rows // 2) - 1\n", 440 | "j = 2\n", 441 | "while i != 0:\n", 442 | " while j <= (rows - 2):\n", 443 | " print(\"*\" * i, end=\"\")\n", 444 | " print(\"_\" * j, end=\"\")\n", 445 | " print(\"*\" * i, end=\"\\n\")\n", 446 | " i = i - 1\n", 447 | " j = j + 2\n" 448 | ] 449 | }, 450 | { 451 | "cell_type": "markdown", 452 | "metadata": {}, 453 | "source": [ 454 | "### Pascal’s Triangle\n", 455 | "[link](https://www.mathsisfun.com/pascals-triangle.html)" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": 69, 461 | "metadata": {}, 462 | "outputs": [ 463 | { 464 | "name": "stdout", 465 | "output_type": "stream", 466 | "text": [ 467 | "1 \n", 468 | "1 1 \n", 469 | "1 2 1 \n", 470 | "1 3 3 1 \n", 471 | "1 4 6 4 1 \n", 472 | "1 5 10 10 5 1 \n", 473 | "1 6 15 20 15 6 1 \n", 474 | "1 7 21 35 35 21 7 1 \n" 475 | ] 476 | } 477 | ], 478 | "source": [ 479 | "def pascal(n):\n", 480 | " for i in range(0, n):\n", 481 | " for j in range(0, i + 1):\n", 482 | " print(function(i, j),\" \", end=\"\")\n", 483 | " print()\n", 484 | " \n", 485 | "def function(n, k):\n", 486 | " res = 1\n", 487 | " if (k > n - k):\n", 488 | " k = n - k\n", 489 | " for i in range(0, k):\n", 490 | " res = res * (n - i)\n", 491 | " res = res // (i + 1)\n", 492 | " \n", 493 | " return res\n", 494 | "\n", 495 | "pascal(8)" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": 76, 501 | "metadata": {}, 502 | "outputs": [ 503 | { 504 | "name": "stdout", 505 | "output_type": "stream", 506 | "text": [ 507 | "1 \r\n", 508 | "1 2 \r\n", 509 | "1 2 3 \r\n", 510 | "1 2 3 4 \r\n" 511 | ] 512 | } 513 | ], 514 | "source": [ 515 | "def patternNum(n):\n", 516 | " for i in range(1,n):\n", 517 | " for j in range(1, i+1):\n", 518 | " print(j , end=\" \")\n", 519 | " print(\"\\r\")\n", 520 | "patternNum(5)" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": 75, 526 | "metadata": {}, 527 | "outputs": [ 528 | { 529 | "name": "stdout", 530 | "output_type": "stream", 531 | "text": [ 532 | "1 \r\n", 533 | "2 2 \r\n", 534 | "3 3 3 \r\n", 535 | "4 4 4 4 \r\n" 536 | ] 537 | } 538 | ], 539 | "source": [ 540 | "def patternNum2(n):\n", 541 | " for i in range(1,n):\n", 542 | " for j in range(1, i+1):\n", 543 | " print(i , end=\" \")\n", 544 | " print(\"\\r\")\n", 545 | "patternNum2(5)" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 77, 551 | "metadata": {}, 552 | "outputs": [ 553 | { 554 | "name": "stdout", 555 | "output_type": "stream", 556 | "text": [ 557 | "A \r\n", 558 | "B B \r\n", 559 | "C C C \r\n", 560 | "D D D D \r\n", 561 | "E E E E E \r\n" 562 | ] 563 | } 564 | ], 565 | "source": [ 566 | "def alphatriangle(n):\n", 567 | " x = 65\n", 568 | " for i in range(0, n):\n", 569 | " ch = chr(x)\n", 570 | " x += 1\n", 571 | " for j in range(0, i + 1):\n", 572 | " print(ch, end=\" \")\n", 573 | " print(\"\\r\")\n", 574 | "\n", 575 | "alphatriangle(5)" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": 79, 581 | "metadata": {}, 582 | "outputs": [ 583 | { 584 | "name": "stdout", 585 | "output_type": "stream", 586 | "text": [ 587 | "1 1 1 1 1 \r\n", 588 | "2 2 2 2 \r\n", 589 | "3 3 3 \r\n", 590 | "4 4 \r\n", 591 | "5 \r\n" 592 | ] 593 | } 594 | ], 595 | "source": [ 596 | "rows = 5\n", 597 | "b = 0\n", 598 | "for i in range(rows, 0, -1):\n", 599 | " b += 1\n", 600 | " for j in range(1, i + 1):\n", 601 | " print(b, end=' ')\n", 602 | " print('\\r')" 603 | ] 604 | }, 605 | { 606 | "cell_type": "markdown", 607 | "metadata": {}, 608 | "source": [ 609 | "### Inverted Triangle with same number" 610 | ] 611 | }, 612 | { 613 | "cell_type": "code", 614 | "execution_count": 80, 615 | "metadata": {}, 616 | "outputs": [ 617 | { 618 | "name": "stdout", 619 | "output_type": "stream", 620 | "text": [ 621 | "5 5 5 5 5 \r\n", 622 | "5 5 5 5 \r\n", 623 | "5 5 5 \r\n", 624 | "5 5 \r\n", 625 | "5 \r\n" 626 | ] 627 | } 628 | ], 629 | "source": [ 630 | "rows = 5\n", 631 | "num = rows\n", 632 | "for i in range(rows, 0, -1):\n", 633 | " for j in range(0, i):\n", 634 | " print(num, end=' ')\n", 635 | " print(\"\\r\")" 636 | ] 637 | }, 638 | { 639 | "cell_type": "markdown", 640 | "metadata": {}, 641 | "source": [ 642 | "### Inverted Triangle with descending order of numbers" 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": 81, 648 | "metadata": {}, 649 | "outputs": [ 650 | { 651 | "name": "stdout", 652 | "output_type": "stream", 653 | "text": [ 654 | "5 5 5 5 5 \r\n", 655 | "4 4 4 4 \r\n", 656 | "3 3 3 \r\n", 657 | "2 2 \r\n", 658 | "1 \r\n" 659 | ] 660 | } 661 | ], 662 | "source": [ 663 | "rows = 5\n", 664 | "for i in range(rows, 0, -1):\n", 665 | " num = i\n", 666 | " for j in range(0, i):\n", 667 | " print(num, end=' ')\n", 668 | " print(\"\\r\")" 669 | ] 670 | }, 671 | { 672 | "cell_type": "markdown", 673 | "metadata": {}, 674 | "source": [ 675 | "### Traingle with numbers in reverse order" 676 | ] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": 82, 681 | "metadata": {}, 682 | "outputs": [ 683 | { 684 | "name": "stdout", 685 | "output_type": "stream", 686 | "text": [ 687 | "1 \n", 688 | "2 1 \n", 689 | "3 2 1 \n", 690 | "4 3 2 1 \n", 691 | "5 4 3 2 1 \n" 692 | ] 693 | } 694 | ], 695 | "source": [ 696 | "rows = 6\n", 697 | "for row in range(1, rows):\n", 698 | " for column in range(row, 0, -1):\n", 699 | " print(column, end=' ')\n", 700 | " print(\"\")\n" 701 | ] 702 | }, 703 | { 704 | "cell_type": "markdown", 705 | "metadata": {}, 706 | "source": [ 707 | "### Display 1 to 10 number in triangle pattern" 708 | ] 709 | }, 710 | { 711 | "cell_type": "code", 712 | "execution_count": 83, 713 | "metadata": {}, 714 | "outputs": [ 715 | { 716 | "name": "stdout", 717 | "output_type": "stream", 718 | "text": [ 719 | "1 \n", 720 | "2 3 \n", 721 | "4 5 6 \n", 722 | "7 8 9 10 \n" 723 | ] 724 | } 725 | ], 726 | "source": [ 727 | "currentNumber = 1\n", 728 | "rows = 4 # Rows you want in your pattern\n", 729 | "stop = 2\n", 730 | "for i in range(rows):\n", 731 | " for column in range(1, stop):\n", 732 | " print(currentNumber, end=' ')\n", 733 | " currentNumber += 1\n", 734 | " print(\"\")\n", 735 | " stop += 1" 736 | ] 737 | }, 738 | { 739 | "cell_type": "markdown", 740 | "metadata": {}, 741 | "source": [ 742 | "### Pattern with alternate numbers" 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "execution_count": 85, 748 | "metadata": {}, 749 | "outputs": [ 750 | { 751 | "name": "stdout", 752 | "output_type": "stream", 753 | "text": [ 754 | "1 \n", 755 | "3 3 \n", 756 | "5 5 5 \n", 757 | "7 7 7 7 \n", 758 | "9 9 9 9 9 \n" 759 | ] 760 | } 761 | ], 762 | "source": [ 763 | "rows = 5\n", 764 | "i = 1\n", 765 | "while i <= rows:\n", 766 | " j = 1\n", 767 | " while j <= i:\n", 768 | " print((i * 2 - 1), end=\" \")\n", 769 | " j = j + 1\n", 770 | " i = i + 1\n", 771 | " print()" 772 | ] 773 | }, 774 | { 775 | "cell_type": "markdown", 776 | "metadata": {}, 777 | "source": [ 778 | "### Pattern with Even numbers" 779 | ] 780 | }, 781 | { 782 | "cell_type": "code", 783 | "execution_count": 86, 784 | "metadata": {}, 785 | "outputs": [ 786 | { 787 | "name": "stdout", 788 | "output_type": "stream", 789 | "text": [ 790 | "10 \r\n", 791 | "10 8 \r\n", 792 | "10 8 6 \r\n", 793 | "10 8 6 4 \r\n", 794 | "10 8 6 4 2 \r\n" 795 | ] 796 | } 797 | ], 798 | "source": [ 799 | "rows = 5\n", 800 | "LastEvenNumber = 2 * rows\n", 801 | "evenNumber = LastEvenNumber\n", 802 | "for i in range(1, rows+1):\n", 803 | " evenNumber = LastEvenNumber\n", 804 | " for j in range(i):\n", 805 | " print(evenNumber, end=' ')\n", 806 | " evenNumber -= 2\n", 807 | " print(\"\\r\")" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": null, 813 | "metadata": {}, 814 | "outputs": [], 815 | "source": [] 816 | } 817 | ], 818 | "metadata": { 819 | "kernelspec": { 820 | "display_name": "Python 3", 821 | "language": "python", 822 | "name": "python3" 823 | }, 824 | "language_info": { 825 | "codemirror_mode": { 826 | "name": "ipython", 827 | "version": 3 828 | }, 829 | "file_extension": ".py", 830 | "mimetype": "text/x-python", 831 | "name": "python", 832 | "nbconvert_exporter": "python", 833 | "pygments_lexer": "ipython3", 834 | "version": "3.7.4" 835 | } 836 | }, 837 | "nbformat": 4, 838 | "nbformat_minor": 2 839 | } 840 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bite-Sized-Learning-Python 2 | This contains key Python & ML topics in a condensed format to quickly revise the basics 3 | -------------------------------------------------------------------------------- /Regular Expressions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# RegEx:\n", 8 | "- RegEx (Regular Expression) is a sequence of characters that forms a search pattern\n", 9 | "- RegEx can be used to check if a string contains the specified search pattern\n", 10 | "\n", 11 | "## How to install?\n", 12 | "- `!pip install regex` - Run this command in jupyter notebook" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import re" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "## RegEx Functions\n", 29 | "\n", 30 | "The re module offers a set of functions that allows us to search a string for a match:\n", 31 | "\n", 32 | "- findall - Returns a list containing all matches\n", 33 | "- search - Returns a Match object if there is a match anywhere in the string\n", 34 | "- split\t- Returns a list where the string has been split at each match\n", 35 | "- sub\t- Replaces one or many matches with a string" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "### Metacharacters\n", 43 | "|Character | Description | Example |\n", 44 | "| ------------- |:-------------:| ------:|\n", 45 | "`[]`|\tA set of characters\t|\"[a-m]\"\t\n", 46 | "`\\`\t|Signals a special sequence (can also be used to escape special characters)\t|\"\\d\"\t\n", 47 | "`.`\t|Any character (except newline character)\t|\"he..o\"\t\n", 48 | "`^`\t|Starts with\t|\"^hello\"\t\n", 49 | "`$`\t|Ends with\t|\"world$\"\t\n", 50 | "`*`\t|Zero or more occurrences\t|\"aix*\"\t\n", 51 | "`+`\t|One or more occurrences\t|\"aix+\"\t\n", 52 | "`{}` |Exactly the specified number of occurrences\t|\"al{2}\"\t\n", 53 | "`|`\t|Either or\t|\"falls|stays\"\t\n", 54 | "`()`| Capture and group\t " 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "### Special Sequences\n", 62 | "|Character | Description | Example |\n", 63 | "| ------------- |:-------------:| ------:|\n", 64 | "`\\A`\t| Returns a match if the specified characters are at the beginning of the string\t| \"\\AThe\"\t\n", 65 | "`\\b`\t| Returns a match where the specified characters are at the beginning or at the end of a word\t| r\"\\bain\", r\"ain\\b\"\n", 66 | "`\\B`\t| Returns a match where the specified characters are present, but NOT at the beginning (or at the end) of a word| r\"\\Bain\" , r\"ain\\B\"\t\n", 67 | "`\\d`\t| Returns a match where the string contains digits (numbers from 0-9)\t| \"\\d\"\t\n", 68 | "`\\D`\t| Returns a match where the string DOES NOT contain digits\t| \"\\D\"\t\n", 69 | "`\\s`\t| Returns a match where the string contains a white space character\t| \"\\s\"\t\n", 70 | "`\\S`\t| Returns a match where the string DOES NOT contain a white space character\t| \"\\S\"\t\n", 71 | "`\\w`\t| Returns a match where the string contains any word characters (characters from a to Z, digits from 0-9, and the underscore _ character)\t| \"\\w\"\t\n", 72 | "`\\W`\t| Returns a match where the string DOES NOT contain any word characters\t| \"\\W\"\t\n", 73 | "`\\Z`\t| Returns a match if the specified characters are at the end of the string\t| \"India\\Z\"" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "### Sets\n", 81 | "|Set | Description |\n", 82 | "| ------------- |:-------------:|\n", 83 | "`[arn]`\t|Returns a match where one of the specified characters (a, r, or n) are present\t\n", 84 | "`[a-n]`\t|Returns a match for any lower case character, alphabetically between a and n\t\n", 85 | "`[^arn]`\t|Returns a match for any character EXCEPT a, r, and n\t\n", 86 | "`[0123]`\t|Returns a match where any of the specified digits (0, 1, 2, or 3) are present\t\n", 87 | "`[0-9]`\t|Returns a match for any digit between 0 and 9\t\n", 88 | "`[0-5][0-9]`\t|Returns a match for any two-digit numbers from 00 and 59\t\n", 89 | "`[a-zA-Z]`\t|Returns a match for any character alphabetically between a and z, lower case OR upper case\t\n", 90 | "`[+]`\t|In sets, +, *, ., |, (), $,{} has no special meaning, so [+] means: return a match for any + character in the string" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "### Supported Regular Expression Flags\n", 98 | "|Shott Name | Long Name | Effect |\n", 99 | "| ------------- |:-------------:| ------:|\n", 100 | "re.I\t|re.IGNORECASE\t|Makes matching of alphabetic characters case-insensitive\n", 101 | "re.M\t|re.MULTILINE\t|Causes start-of-string and end-of-string anchors to match embedded newlines\n", 102 | "re.S\t|re.DOTALL\t|Causes the dot metacharacter to match a newline\n", 103 | "re.X\t|re.VERBOSE\t|Allows inclusion of whitespace and comments within a regular expression\n", 104 | "----\t|re.DEBUG\t|Causes the regex parser to display debugging information to the console\n", 105 | "re.A\t|re.ASCII\t|Specifies ASCII encoding for character classification\n", 106 | "re.U\t|re.UNICODE\t|Specifies Unicode encoding for character classification\n", 107 | "re.L |re.LOCALE\t|Specifies encoding for character classification based on the current locale" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "### search() function\n", 115 | "Search the string to see if it starts with \"The\" and ends with \"good\":" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 2, 121 | "metadata": { 122 | "scrolled": true 123 | }, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "Found a match\n" 130 | ] 131 | } 132 | ], 133 | "source": [ 134 | "text = \"The food was good\"\n", 135 | "x = re.search(\"^The.*good$\", text)\n", 136 | "\n", 137 | "if x:\n", 138 | " print(\"Found a match\")\n", 139 | "else:\n", 140 | " print(\"No match\")" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "### findall() function\n", 148 | "The findall() function returns a list containing all matches" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 3, 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "name": "stdout", 158 | "output_type": "stream", 159 | "text": [ 160 | "['oo', 'oo']\n" 161 | ] 162 | } 163 | ], 164 | "source": [ 165 | "text = \"The food was good\"\n", 166 | "x = re.findall(\"oo\", text)\n", 167 | "print(x)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 4, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "The first white-space character is located in position: 6\n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "# Search for the first white-space character in the string:\n", 185 | "text = \"Indian food was good\"\n", 186 | "x = re.search(\"\\s\", text)\n", 187 | "\n", 188 | "print(\"The first white-space character is located in position:\", x.start())" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 17, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "name": "stdout", 198 | "output_type": "stream", 199 | "text": [ 200 | "['1729', '44']\n" 201 | ] 202 | } 203 | ], 204 | "source": [ 205 | "# Program to extract numbers from a string\n", 206 | "\n", 207 | "import re\n", 208 | "\n", 209 | "string = 'hello 1729. How is 44'\n", 210 | "pattern = '\\d+'\n", 211 | "\n", 212 | "result = re.findall(pattern, string) \n", 213 | "print(result)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 6, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "name": "stdout", 223 | "output_type": "stream", 224 | "text": [ 225 | "a#12@gmail.com\n", 226 | "invalid\n" 227 | ] 228 | } 229 | ], 230 | "source": [ 231 | "# Valid email ID\n", 232 | "# Given an email ID, you have to determine if it is valid or not\n", 233 | "import re\n", 234 | "def checkmail(email):\n", 235 | " check = bool(re.search(r\"^[\\w\\.\\+\\-]+\\@[A-Za-z]+\\.[a-z]{2,3}$\", email))\n", 236 | " if check:\n", 237 | " return 'valid'\n", 238 | " else:\n", 239 | " return 'invalid'\n", 240 | "email=input()\n", 241 | "print(checkmail(email))" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "### split() function\n", 249 | "Split at each white-space character" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 7, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "name": "stdout", 259 | "output_type": "stream", 260 | "text": [ 261 | "['Indian', 'food', 'was', 'good']\n" 262 | ] 263 | } 264 | ], 265 | "source": [ 266 | "text = \"Indian food was good\"\n", 267 | "x = re.split(\"\\s\", text)\n", 268 | "print(x)" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 8, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "name": "stdout", 278 | "output_type": "stream", 279 | "text": [ 280 | "['Indian', 'food was good']\n" 281 | ] 282 | } 283 | ], 284 | "source": [ 285 | "# You can control the number of occurrences by specifying the maxsplit parameter\n", 286 | "text = \"Indian food was good\"\n", 287 | "x = re.split(\"\\s\", text, 1)\n", 288 | "print(x)" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "### sub() Function\n", 296 | "The sub() function replaces the matches with the text of your choice" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 9, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "name": "stdout", 306 | "output_type": "stream", 307 | "text": [ 308 | "Indian$food$was$good\n" 309 | ] 310 | } 311 | ], 312 | "source": [ 313 | "# Replace every white-space character with the number $\n", 314 | "text = \"Indian food was good\"\n", 315 | "x = re.sub(\"\\s\", \"$\", text)\n", 316 | "print(x)" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 10, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "name": "stdout", 326 | "output_type": "stream", 327 | "text": [ 328 | "Indian$food was good\n" 329 | ] 330 | } 331 | ], 332 | "source": [ 333 | "# You can control the number of replacements by specifying the count parameter\n", 334 | "text = \"Indian food was good\"\n", 335 | "x = re.sub(\"\\s\", \"$\", text, 1)\n", 336 | "print(x)" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 11, 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "name": "stdout", 346 | "output_type": "stream", 347 | "text": [ 348 | "abc12de23f456\n" 349 | ] 350 | } 351 | ], 352 | "source": [ 353 | "# Program to remove all whitespaces\n", 354 | "import re\n", 355 | "\n", 356 | "# multiline string\n", 357 | "string = 'abc 12\\\n", 358 | "de 23 \\n f45 6'\n", 359 | "\n", 360 | "# matches all whitespace characters\n", 361 | "pattern = '\\s+'\n", 362 | "\n", 363 | "# empty string\n", 364 | "replace = ''\n", 365 | "\n", 366 | "new_string = re.sub(pattern, replace, string) \n", 367 | "print(new_string)" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 12, 373 | "metadata": {}, 374 | "outputs": [ 375 | { 376 | "name": "stdout", 377 | "output_type": "stream", 378 | "text": [ 379 | "Invalid\n" 380 | ] 381 | } 382 | ], 383 | "source": [ 384 | "'''\n", 385 | "# Password Validation\n", 386 | "The characteristics of a strong password include:\n", 387 | "1. it should be at least 8 characters long\n", 388 | "2. it should have at least one lowercase alphabet.\n", 389 | "3.it should have at least one uppercase alphabet\n", 390 | "4. it should have at least one number(0-9)\n", 391 | "5. it should have at least one special character( a special character is considered among the following: [@%$*])\n", 392 | "\n", 393 | "'''\n", 394 | "pwd= 'DataScience123'\n", 395 | "#write your code here\n", 396 | "import re \n", 397 | "flag = 0\n", 398 | "while True: \n", 399 | " if (len(pwd)<8): \n", 400 | " flag = -1\n", 401 | " break\n", 402 | " elif not re.search(\"[a-z]\", pwd): \n", 403 | " flag = -1\n", 404 | " break\n", 405 | " elif not re.search(\"[A-Z]\", pwd): \n", 406 | " flag = -1\n", 407 | " break\n", 408 | " elif not re.search(\"[0-9]\", pwd): \n", 409 | " flag = -1\n", 410 | " break\n", 411 | " elif not re.search(\"[@%$*]\", pwd): \n", 412 | " flag = -1\n", 413 | " break\n", 414 | " elif re.search(\"\\s\", pwd): \n", 415 | " flag = -1\n", 416 | " break\n", 417 | " else: \n", 418 | " flag = 0\n", 419 | " print(\"Valid\") \n", 420 | " break\n", 421 | "\n", 422 | "if flag ==-1:\n", 423 | " print(\"Invalid\")" 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "### Match Object\n", 431 | "A Match Object is an object containing information about the search and the result" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 13, 437 | "metadata": {}, 438 | "outputs": [ 439 | { 440 | "name": "stdout", 441 | "output_type": "stream", 442 | "text": [ 443 | "\n" 444 | ] 445 | } 446 | ], 447 | "source": [ 448 | "text = \"Indian food was good\"\n", 449 | "x = re.search(\"In\", text)\n", 450 | "print(x)" 451 | ] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "metadata": {}, 456 | "source": [ 457 | "For more details refer below linl\n", 458 | "- [RegEx_1](https://www.w3schools.com/python/python_regex.asp)\n", 459 | "- [RegEx_2](https://realpython.com/regex-python/)" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": 14, 465 | "metadata": {}, 466 | "outputs": [ 467 | { 468 | "name": "stdout", 469 | "output_type": "stream", 470 | "text": [ 471 | "[1, 2, 3, 4, 5, 6, 7, 8, 9]\n" 472 | ] 473 | } 474 | ], 475 | "source": [ 476 | "# Extras\n", 477 | "# Some common interveiw questions\n", 478 | "\n", 479 | "input_list = [[1,2,3],[4,5],[6,7,8,9]]\n", 480 | "flat = []\n", 481 | "for sublist in input_list:\n", 482 | " for item in sublist:\n", 483 | " flat.append(item)\n", 484 | "print(flat)" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": 15, 490 | "metadata": {}, 491 | "outputs": [ 492 | { 493 | "name": "stdout", 494 | "output_type": "stream", 495 | "text": [ 496 | "['a', 'c', 'd']\n" 497 | ] 498 | } 499 | ], 500 | "source": [ 501 | "# Given a string, you have to find the first n most frequent characters in it.\n", 502 | "# You have to print the three letters in alphabetically sorted order.\n", 503 | "from collections import Counter\n", 504 | "string= 'ddddaacccb'\n", 505 | "n=3\n", 506 | "\n", 507 | "a = Counter(string).most_common(n)\n", 508 | "b = [i[0] for i in a]\n", 509 | "b.sort()\n", 510 | "print(b)" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": null, 516 | "metadata": {}, 517 | "outputs": [], 518 | "source": [] 519 | } 520 | ], 521 | "metadata": { 522 | "kernelspec": { 523 | "display_name": "Python 3", 524 | "language": "python", 525 | "name": "python3" 526 | }, 527 | "language_info": { 528 | "codemirror_mode": { 529 | "name": "ipython", 530 | "version": 3 531 | }, 532 | "file_extension": ".py", 533 | "mimetype": "text/x-python", 534 | "name": "python", 535 | "nbconvert_exporter": "python", 536 | "pygments_lexer": "ipython3", 537 | "version": "3.7.4" 538 | } 539 | }, 540 | "nbformat": 4, 541 | "nbformat_minor": 2 542 | } 543 | -------------------------------------------------------------------------------- /Seaborn_Cheatsheet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/Seaborn_Cheatsheet.png -------------------------------------------------------------------------------- /Text to Speech.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Text To Speech\n", 8 | "\n", 9 | "## Text-to-Speech Technology\n", 10 | "\n", 11 | "- Text-to-speech (TTS) technology reads aloud digital text—the words on computers, smartphones and tablets\n", 12 | "- TTS can help kids who struggle with reading\n", 13 | "- There are TTS tools available for nearly every digital device\n", 14 | "\n", 15 | "#### Reference:-\n", 16 | " - [TTS](https://www.understood.org/en/school-learning/assistive-technology/assistive-technologies-basics/text-to-speech-technology-what-it-is-and-how-it-works)\n", 17 | " - [Speech Synthesis](https://en.wikipedia.org/wiki/Speech_synthesis)\n", 18 | "\n" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "#### Libraries to install\n", 26 | "\n", 27 | " - `!pip install gTTS`" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Speech Synthesis - Process\n", 35 | "\n", 36 | "" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 1, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "# Loading libraries\n", 46 | "\n", 47 | "from gtts import gTTS \n", 48 | "import os" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "- gTTS module needs internet and depends on google to get the audio data/conversion\n", 56 | "- [gTTS Documentation](https://gtts.readthedocs.io/en/latest/module.html)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "## Text to audio" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 2, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "text = \"Text-to-speech or TTS is a type of assistive technology that reads digital text aloud. It’s sometimes called “read aloud” technology.With a click of a button or the touch of a finger, TTS can take words on a computer or other digital device and convert them into audio\"" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "##### Selecting language" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 3, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "language = 'en'" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "##### Creating a variable and passing the text & language" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 4, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "speech = gTTS(text = text, lang = language, slow = False) \n", 105 | "#\" slow = False\" says the module that after conversion the audio shoule have a high/normal speed" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "##### Saving the converted sudio as a .mp3 file" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 5, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "speech.save(\"text.mp3\")" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "##### Playback the mp3 file" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 6, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/plain": [ 139 | "0" 140 | ] 141 | }, 142 | "execution_count": 6, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "os.system(\"start text.mp3\") # start command initiates to play the audio from your local windows media player" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "## Text file to audio\n", 156 | "\n", 157 | "##### Reading the text file and store it to a variable" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 7, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "file = open(\"Test-1.txt\", \"r\").read().replace(\"\\n\", \" \")" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "##### Language selection" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 8, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "language = 'en'" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "##### Passing the text file to the module" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 9, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "speech = gTTS(text = str(file), lang = language, slow = False)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "##### Saving the converted sudio as a .mp3 file" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 10, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "speech.save(\"Text1.mp3\")" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "##### Playback the mp3 file" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 11, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/plain": [ 232 | "0" 233 | ] 234 | }, 235 | "execution_count": 11, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "os.system(\"start Text1.mp3\")" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "For more details refer [KDnuggets](https://www.kdnuggets.com/2020/05/easy-text-speech-python.html)" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "## Text from an online source to audio" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 12, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "#Loading libraries\n", 265 | "from newspaper import Article\n", 266 | "import nltk" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "##### Input online article link" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 13, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "article = Article('https://bernardmarr.com/default.asp?contentID=2066')" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "##### Download & Parse the article" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 14, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "article.download()\n", 299 | "article.parse()" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "##### Download the ‘punkt’ package" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 15, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "name": "stderr", 316 | "output_type": "stream", 317 | "text": [ 318 | "[nltk_data] Downloading package punkt to\n", 319 | "[nltk_data] C:\\Users\\arock.000\\AppData\\Roaming\\nltk_data...\n", 320 | "[nltk_data] Package punkt is already up-to-date!\n" 321 | ] 322 | }, 323 | { 324 | "data": { 325 | "text/plain": [ 326 | "True" 327 | ] 328 | }, 329 | "execution_count": 15, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "nltk.download('punkt')" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "##### Implement Natural Language Processing (NLP)" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 16, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "article.nlp()" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "##### Creating a variable and storing the article's text in it" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 17, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "art_text = article.text" 368 | ] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": {}, 373 | "source": [ 374 | "##### Language selection" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 18, 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [ 383 | "language = 'en' #English" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "##### Passing the text file to the module" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 19, 396 | "metadata": {}, 397 | "outputs": [], 398 | "source": [ 399 | "aiarticle = gTTS(text=art_text, lang=language, slow=False)" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 20, 405 | "metadata": {}, 406 | "outputs": [], 407 | "source": [ 408 | "aiarticle .save(\"ai_article.mp3\")" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 21, 414 | "metadata": {}, 415 | "outputs": [ 416 | { 417 | "data": { 418 | "text/plain": [ 419 | "0" 420 | ] 421 | }, 422 | "execution_count": 21, 423 | "metadata": {}, 424 | "output_type": "execute_result" 425 | } 426 | ], 427 | "source": [ 428 | "# Playing the converted file\n", 429 | "os.system(\"start ai_article.mp3\")" 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": {}, 435 | "source": [ 436 | "[Reference](https://medium.com/@randerson112358/build-a-text-to-speech-program-using-python-b70de7105383)" 437 | ] 438 | }, 439 | { 440 | "cell_type": "markdown", 441 | "metadata": {}, 442 | "source": [ 443 | "# Speech to Text" 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "metadata": {}, 449 | "source": [ 450 | "[Speech Recognition](https://en.wikipedia.org/wiki/Speech_recognition)" 451 | ] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "metadata": {}, 456 | "source": [ 457 | "#### Libraries to install\n", 458 | "\n", 459 | " - `!pip install speechrecognition`\n", 460 | " - `!pip install pyttsx3`\n", 461 | " - `!pip install pipwin`\n", 462 | " - `!pipwin install pyaudio`\n", 463 | "\n", 464 | "- [Documentation for SpeechRecognition Library](https://pypi.org/project/SpeechRecognition)\n", 465 | " \n", 466 | "##### Errors\n", 467 | "- [Pyaudio Installation Errors](https://stackoverflow.com/questions/53866104/pyaudio-failed-to-install-windows-10)" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 22, 473 | "metadata": {}, 474 | "outputs": [ 475 | { 476 | "name": "stdout", 477 | "output_type": "stream", 478 | "text": [ 479 | "Talk\n", 480 | "Time over, thanks\n", 481 | "Text: speech recognition system basically translate spoken languages into text one of the classic example is Apple Siri thank you\n" 482 | ] 483 | } 484 | ], 485 | "source": [ 486 | "#import library\n", 487 | "\n", 488 | "import speech_recognition as sr\n", 489 | "\n", 490 | "# Initialize recognizer to recognize the speech\n", 491 | "\n", 492 | "r = sr.Recognizer()\n", 493 | "\n", 494 | "# Reading microphone, listeing and storing it to a variable\n", 495 | "\n", 496 | "with sr.Microphone() as source:\n", 497 | " print(\"Talk\")\n", 498 | " audio_text = r.listen(source)\n", 499 | " print(\"Time over, thanks\")\n", 500 | "# recoginize_() method will throw a request error if the API is unreachable, hence using exception handling\n", 501 | " \n", 502 | " try:\n", 503 | " # using google speech recognition\n", 504 | " print(\"Text: \"+r.recognize_google(audio_text))\n", 505 | " except:\n", 506 | " print(\"Sorry, I did not get that\")" 507 | ] 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "metadata": {}, 512 | "source": [ 513 | "#### Reference:\n", 514 | "- [SpeechtoText](https://www.geeksforgeeks.org/python-convert-speech-to-text-and-text-to-speech/)\n", 515 | "- [Google Language Support](https://cloud.google.com/speech-to-text/docs/languages)\n", 516 | "- [More](https://stackabuse.com/introduction-to-speech-recognition-with-python/)" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": null, 522 | "metadata": {}, 523 | "outputs": [], 524 | "source": [] 525 | } 526 | ], 527 | "metadata": { 528 | "kernelspec": { 529 | "display_name": "Python 3", 530 | "language": "python", 531 | "name": "python3" 532 | }, 533 | "language_info": { 534 | "codemirror_mode": { 535 | "name": "ipython", 536 | "version": 3 537 | }, 538 | "file_extension": ".py", 539 | "mimetype": "text/x-python", 540 | "name": "python", 541 | "nbconvert_exporter": "python", 542 | "pygments_lexer": "ipython3", 543 | "version": "3.7.4" 544 | } 545 | }, 546 | "nbformat": 4, 547 | "nbformat_minor": 2 548 | } 549 | --------------------------------------------------------------------------------