├── .gitignore
├── Adv Regression.ipynb
├── Auto_EDA.ipynb
├── Bite Sized Learning_Data Structures_Python.pdf
├── Bite Sized Learning_Loops_Python.pdf
├── Data Visualisation_Matplotlib.ipynb
├── Data Visualization_Seaborn.ipynb
├── Data Wrangling_Python_Pyspark.ipynb
├── Evaluation Metrics - Regression.ipynb
├── Excel Automation.ipynb
├── Gif Plots.ipynb
├── KNN Classifier.ipynb
├── List Comprehensions_Python.pdf
├── Pandas Basics.ipynb
├── Pandas Crosstab vs Pivot table.ipynb
├── Pandas Tricks.ipynb
├── Pandas_Cheat_Sheet.pdf
├── Pandas_Data Wrangling_CheatSheet.pdf
├── Polynomial Regression.ipynb
├── Python Cheat Sheet.pdf
├── Python Functions.ipynb
├── Python Loops.ipynb
├── Python-Patterns.ipynb
├── README.md
├── Regular Expressions.ipynb
├── Seaborn_Cheatsheet.png
├── Text to Speech.ipynb
└── User Defined Functions EDA.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/Bite Sized Learning_Data Structures_Python.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/Bite Sized Learning_Data Structures_Python.pdf


--------------------------------------------------------------------------------
/Bite Sized Learning_Loops_Python.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/Bite Sized Learning_Loops_Python.pdf


--------------------------------------------------------------------------------
/Data Wrangling_Python_Pyspark.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Data Wrangling"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "markdown",
  12 |    "metadata": {},
  13 |    "source": [
  14 |     "Data wrangling:\n",
  15 |     "The task that lies between the data acquisition and exploratory data analysis is what most call as data wrangling. It is the process of formatting,  merging, grouping, concatenating etc. for the purpose of analysing  or making it ready for the modelling purpose"
  16 |    ]
  17 |   },
  18 |   {
  19 |    "cell_type": "markdown",
  20 |    "metadata": {},
  21 |    "source": [
  22 |     "## Merging"
  23 |    ]
  24 |   },
  25 |   {
  26 |    "cell_type": "code",
  27 |    "execution_count": 1,
  28 |    "metadata": {},
  29 |    "outputs": [],
  30 |    "source": [
  31 |     "import pandas as pd\n",
  32 |     "import numpy as np\n",
  33 |     "#Create data frame\n",
  34 |     "left = pd.DataFrame({\n",
  35 |     "         'id':[1,2,3,4,5],\n",
  36 |     "         'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],\n",
  37 |     "         'subject_id':['sub1','sub2','sub4','sub6','sub5']})\n",
  38 |     "right = pd.DataFrame(\n",
  39 |     "         {'id':[1,2,3,4,5],\n",
  40 |     "         'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],\n",
  41 |     "         'subject_id':['sub2','sub4','sub3','sub6','sub5']})"
  42 |    ]
  43 |   },
  44 |   {
  45 |    "cell_type": "code",
  46 |    "execution_count": 2,
  47 |    "metadata": {},
  48 |    "outputs": [
  49 |     {
  50 |      "data": {
  51 |       "text/html": [
  52 |        "<div>\n",
  53 |        "<style scoped>\n",
  54 |        "    .dataframe tbody tr th:only-of-type {\n",
  55 |        "        vertical-align: middle;\n",
  56 |        "    }\n",
  57 |        "\n",
  58 |        "    .dataframe tbody tr th {\n",
  59 |        "        vertical-align: top;\n",
  60 |        "    }\n",
  61 |        "\n",
  62 |        "    .dataframe thead th {\n",
  63 |        "        text-align: right;\n",
  64 |        "    }\n",
  65 |        "</style>\n",
  66 |        "<table border=\"1\" class=\"dataframe\">\n",
  67 |        "  <thead>\n",
  68 |        "    <tr style=\"text-align: right;\">\n",
  69 |        "      <th></th>\n",
  70 |        "      <th>id</th>\n",
  71 |        "      <th>Name</th>\n",
  72 |        "      <th>subject_id</th>\n",
  73 |        "    </tr>\n",
  74 |        "  </thead>\n",
  75 |        "  <tbody>\n",
  76 |        "    <tr>\n",
  77 |        "      <th>0</th>\n",
  78 |        "      <td>1</td>\n",
  79 |        "      <td>Alex</td>\n",
  80 |        "      <td>sub1</td>\n",
  81 |        "    </tr>\n",
  82 |        "    <tr>\n",
  83 |        "      <th>1</th>\n",
  84 |        "      <td>2</td>\n",
  85 |        "      <td>Amy</td>\n",
  86 |        "      <td>sub2</td>\n",
  87 |        "    </tr>\n",
  88 |        "    <tr>\n",
  89 |        "      <th>2</th>\n",
  90 |        "      <td>3</td>\n",
  91 |        "      <td>Allen</td>\n",
  92 |        "      <td>sub4</td>\n",
  93 |        "    </tr>\n",
  94 |        "    <tr>\n",
  95 |        "      <th>3</th>\n",
  96 |        "      <td>4</td>\n",
  97 |        "      <td>Alice</td>\n",
  98 |        "      <td>sub6</td>\n",
  99 |        "    </tr>\n",
 100 |        "    <tr>\n",
 101 |        "      <th>4</th>\n",
 102 |        "      <td>5</td>\n",
 103 |        "      <td>Ayoung</td>\n",
 104 |        "      <td>sub5</td>\n",
 105 |        "    </tr>\n",
 106 |        "  </tbody>\n",
 107 |        "</table>\n",
 108 |        "</div>"
 109 |       ],
 110 |       "text/plain": [
 111 |        "   id    Name subject_id\n",
 112 |        "0   1    Alex       sub1\n",
 113 |        "1   2     Amy       sub2\n",
 114 |        "2   3   Allen       sub4\n",
 115 |        "3   4   Alice       sub6\n",
 116 |        "4   5  Ayoung       sub5"
 117 |       ]
 118 |      },
 119 |      "execution_count": 2,
 120 |      "metadata": {},
 121 |      "output_type": "execute_result"
 122 |     }
 123 |    ],
 124 |    "source": [
 125 |     "left.head()"
 126 |    ]
 127 |   },
 128 |   {
 129 |    "cell_type": "code",
 130 |    "execution_count": 3,
 131 |    "metadata": {},
 132 |    "outputs": [
 133 |     {
 134 |      "data": {
 135 |       "text/html": [
 136 |        "<div>\n",
 137 |        "<style scoped>\n",
 138 |        "    .dataframe tbody tr th:only-of-type {\n",
 139 |        "        vertical-align: middle;\n",
 140 |        "    }\n",
 141 |        "\n",
 142 |        "    .dataframe tbody tr th {\n",
 143 |        "        vertical-align: top;\n",
 144 |        "    }\n",
 145 |        "\n",
 146 |        "    .dataframe thead th {\n",
 147 |        "        text-align: right;\n",
 148 |        "    }\n",
 149 |        "</style>\n",
 150 |        "<table border=\"1\" class=\"dataframe\">\n",
 151 |        "  <thead>\n",
 152 |        "    <tr style=\"text-align: right;\">\n",
 153 |        "      <th></th>\n",
 154 |        "      <th>id</th>\n",
 155 |        "      <th>Name</th>\n",
 156 |        "      <th>subject_id</th>\n",
 157 |        "    </tr>\n",
 158 |        "  </thead>\n",
 159 |        "  <tbody>\n",
 160 |        "    <tr>\n",
 161 |        "      <th>0</th>\n",
 162 |        "      <td>1</td>\n",
 163 |        "      <td>Billy</td>\n",
 164 |        "      <td>sub2</td>\n",
 165 |        "    </tr>\n",
 166 |        "    <tr>\n",
 167 |        "      <th>1</th>\n",
 168 |        "      <td>2</td>\n",
 169 |        "      <td>Brian</td>\n",
 170 |        "      <td>sub4</td>\n",
 171 |        "    </tr>\n",
 172 |        "    <tr>\n",
 173 |        "      <th>2</th>\n",
 174 |        "      <td>3</td>\n",
 175 |        "      <td>Bran</td>\n",
 176 |        "      <td>sub3</td>\n",
 177 |        "    </tr>\n",
 178 |        "    <tr>\n",
 179 |        "      <th>3</th>\n",
 180 |        "      <td>4</td>\n",
 181 |        "      <td>Bryce</td>\n",
 182 |        "      <td>sub6</td>\n",
 183 |        "    </tr>\n",
 184 |        "    <tr>\n",
 185 |        "      <th>4</th>\n",
 186 |        "      <td>5</td>\n",
 187 |        "      <td>Betty</td>\n",
 188 |        "      <td>sub5</td>\n",
 189 |        "    </tr>\n",
 190 |        "  </tbody>\n",
 191 |        "</table>\n",
 192 |        "</div>"
 193 |       ],
 194 |       "text/plain": [
 195 |        "   id   Name subject_id\n",
 196 |        "0   1  Billy       sub2\n",
 197 |        "1   2  Brian       sub4\n",
 198 |        "2   3   Bran       sub3\n",
 199 |        "3   4  Bryce       sub6\n",
 200 |        "4   5  Betty       sub5"
 201 |       ]
 202 |      },
 203 |      "execution_count": 3,
 204 |      "metadata": {},
 205 |      "output_type": "execute_result"
 206 |     }
 207 |    ],
 208 |    "source": [
 209 |     "right.head()"
 210 |    ]
 211 |   },
 212 |   {
 213 |    "cell_type": "code",
 214 |    "execution_count": 4,
 215 |    "metadata": {},
 216 |    "outputs": [
 217 |     {
 218 |      "data": {
 219 |       "text/html": [
 220 |        "<div>\n",
 221 |        "<style scoped>\n",
 222 |        "    .dataframe tbody tr th:only-of-type {\n",
 223 |        "        vertical-align: middle;\n",
 224 |        "    }\n",
 225 |        "\n",
 226 |        "    .dataframe tbody tr th {\n",
 227 |        "        vertical-align: top;\n",
 228 |        "    }\n",
 229 |        "\n",
 230 |        "    .dataframe thead th {\n",
 231 |        "        text-align: right;\n",
 232 |        "    }\n",
 233 |        "</style>\n",
 234 |        "<table border=\"1\" class=\"dataframe\">\n",
 235 |        "  <thead>\n",
 236 |        "    <tr style=\"text-align: right;\">\n",
 237 |        "      <th></th>\n",
 238 |        "      <th>id</th>\n",
 239 |        "      <th>Name_x</th>\n",
 240 |        "      <th>subject_id_x</th>\n",
 241 |        "      <th>Name_y</th>\n",
 242 |        "      <th>subject_id_y</th>\n",
 243 |        "    </tr>\n",
 244 |        "  </thead>\n",
 245 |        "  <tbody>\n",
 246 |        "    <tr>\n",
 247 |        "      <th>0</th>\n",
 248 |        "      <td>1</td>\n",
 249 |        "      <td>Alex</td>\n",
 250 |        "      <td>sub1</td>\n",
 251 |        "      <td>Billy</td>\n",
 252 |        "      <td>sub2</td>\n",
 253 |        "    </tr>\n",
 254 |        "    <tr>\n",
 255 |        "      <th>1</th>\n",
 256 |        "      <td>2</td>\n",
 257 |        "      <td>Amy</td>\n",
 258 |        "      <td>sub2</td>\n",
 259 |        "      <td>Brian</td>\n",
 260 |        "      <td>sub4</td>\n",
 261 |        "    </tr>\n",
 262 |        "    <tr>\n",
 263 |        "      <th>2</th>\n",
 264 |        "      <td>3</td>\n",
 265 |        "      <td>Allen</td>\n",
 266 |        "      <td>sub4</td>\n",
 267 |        "      <td>Bran</td>\n",
 268 |        "      <td>sub3</td>\n",
 269 |        "    </tr>\n",
 270 |        "    <tr>\n",
 271 |        "      <th>3</th>\n",
 272 |        "      <td>4</td>\n",
 273 |        "      <td>Alice</td>\n",
 274 |        "      <td>sub6</td>\n",
 275 |        "      <td>Bryce</td>\n",
 276 |        "      <td>sub6</td>\n",
 277 |        "    </tr>\n",
 278 |        "    <tr>\n",
 279 |        "      <th>4</th>\n",
 280 |        "      <td>5</td>\n",
 281 |        "      <td>Ayoung</td>\n",
 282 |        "      <td>sub5</td>\n",
 283 |        "      <td>Betty</td>\n",
 284 |        "      <td>sub5</td>\n",
 285 |        "    </tr>\n",
 286 |        "  </tbody>\n",
 287 |        "</table>\n",
 288 |        "</div>"
 289 |       ],
 290 |       "text/plain": [
 291 |        "   id  Name_x subject_id_x Name_y subject_id_y\n",
 292 |        "0   1    Alex         sub1  Billy         sub2\n",
 293 |        "1   2     Amy         sub2  Brian         sub4\n",
 294 |        "2   3   Allen         sub4   Bran         sub3\n",
 295 |        "3   4   Alice         sub6  Bryce         sub6\n",
 296 |        "4   5  Ayoung         sub5  Betty         sub5"
 297 |       ]
 298 |      },
 299 |      "execution_count": 4,
 300 |      "metadata": {},
 301 |      "output_type": "execute_result"
 302 |     }
 303 |    ],
 304 |    "source": [
 305 |     "# merge based on one key\n",
 306 |     "merged_id = pd.merge(left, right, how='inner', on='id', sort=True)\n",
 307 |     "merged_id.head()"
 308 |    ]
 309 |   },
 310 |   {
 311 |    "cell_type": "code",
 312 |    "execution_count": 5,
 313 |    "metadata": {},
 314 |    "outputs": [
 315 |     {
 316 |      "data": {
 317 |       "text/html": [
 318 |        "<div>\n",
 319 |        "<style scoped>\n",
 320 |        "    .dataframe tbody tr th:only-of-type {\n",
 321 |        "        vertical-align: middle;\n",
 322 |        "    }\n",
 323 |        "\n",
 324 |        "    .dataframe tbody tr th {\n",
 325 |        "        vertical-align: top;\n",
 326 |        "    }\n",
 327 |        "\n",
 328 |        "    .dataframe thead th {\n",
 329 |        "        text-align: right;\n",
 330 |        "    }\n",
 331 |        "</style>\n",
 332 |        "<table border=\"1\" class=\"dataframe\">\n",
 333 |        "  <thead>\n",
 334 |        "    <tr style=\"text-align: right;\">\n",
 335 |        "      <th></th>\n",
 336 |        "      <th>id</th>\n",
 337 |        "      <th>Name_x</th>\n",
 338 |        "      <th>subject_id</th>\n",
 339 |        "      <th>Name_y</th>\n",
 340 |        "    </tr>\n",
 341 |        "  </thead>\n",
 342 |        "  <tbody>\n",
 343 |        "    <tr>\n",
 344 |        "      <th>0</th>\n",
 345 |        "      <td>4</td>\n",
 346 |        "      <td>Alice</td>\n",
 347 |        "      <td>sub6</td>\n",
 348 |        "      <td>Bryce</td>\n",
 349 |        "    </tr>\n",
 350 |        "    <tr>\n",
 351 |        "      <th>1</th>\n",
 352 |        "      <td>5</td>\n",
 353 |        "      <td>Ayoung</td>\n",
 354 |        "      <td>sub5</td>\n",
 355 |        "      <td>Betty</td>\n",
 356 |        "    </tr>\n",
 357 |        "  </tbody>\n",
 358 |        "</table>\n",
 359 |        "</div>"
 360 |       ],
 361 |       "text/plain": [
 362 |        "   id  Name_x subject_id Name_y\n",
 363 |        "0   4   Alice       sub6  Bryce\n",
 364 |        "1   5  Ayoung       sub5  Betty"
 365 |       ]
 366 |      },
 367 |      "execution_count": 5,
 368 |      "metadata": {},
 369 |      "output_type": "execute_result"
 370 |     }
 371 |    ],
 372 |    "source": [
 373 |     "# merge based on multiple keys\n",
 374 |     "merged_multiple = pd.merge(left,right,on=['id','subject_id'])\n",
 375 |     "merged_multiple.head()"
 376 |    ]
 377 |   },
 378 |   {
 379 |    "cell_type": "code",
 380 |    "execution_count": 6,
 381 |    "metadata": {},
 382 |    "outputs": [
 383 |     {
 384 |      "data": {
 385 |       "text/html": [
 386 |        "<div>\n",
 387 |        "<style scoped>\n",
 388 |        "    .dataframe tbody tr th:only-of-type {\n",
 389 |        "        vertical-align: middle;\n",
 390 |        "    }\n",
 391 |        "\n",
 392 |        "    .dataframe tbody tr th {\n",
 393 |        "        vertical-align: top;\n",
 394 |        "    }\n",
 395 |        "\n",
 396 |        "    .dataframe thead th {\n",
 397 |        "        text-align: right;\n",
 398 |        "    }\n",
 399 |        "</style>\n",
 400 |        "<table border=\"1\" class=\"dataframe\">\n",
 401 |        "  <thead>\n",
 402 |        "    <tr style=\"text-align: right;\">\n",
 403 |        "      <th></th>\n",
 404 |        "      <th>id_x</th>\n",
 405 |        "      <th>Name_x</th>\n",
 406 |        "      <th>subject_id</th>\n",
 407 |        "      <th>id_y</th>\n",
 408 |        "      <th>Name_y</th>\n",
 409 |        "    </tr>\n",
 410 |        "  </thead>\n",
 411 |        "  <tbody>\n",
 412 |        "    <tr>\n",
 413 |        "      <th>0</th>\n",
 414 |        "      <td>1</td>\n",
 415 |        "      <td>Alex</td>\n",
 416 |        "      <td>sub1</td>\n",
 417 |        "      <td>NaN</td>\n",
 418 |        "      <td>NaN</td>\n",
 419 |        "    </tr>\n",
 420 |        "    <tr>\n",
 421 |        "      <th>1</th>\n",
 422 |        "      <td>2</td>\n",
 423 |        "      <td>Amy</td>\n",
 424 |        "      <td>sub2</td>\n",
 425 |        "      <td>1.0</td>\n",
 426 |        "      <td>Billy</td>\n",
 427 |        "    </tr>\n",
 428 |        "    <tr>\n",
 429 |        "      <th>2</th>\n",
 430 |        "      <td>3</td>\n",
 431 |        "      <td>Allen</td>\n",
 432 |        "      <td>sub4</td>\n",
 433 |        "      <td>2.0</td>\n",
 434 |        "      <td>Brian</td>\n",
 435 |        "    </tr>\n",
 436 |        "    <tr>\n",
 437 |        "      <th>3</th>\n",
 438 |        "      <td>4</td>\n",
 439 |        "      <td>Alice</td>\n",
 440 |        "      <td>sub6</td>\n",
 441 |        "      <td>4.0</td>\n",
 442 |        "      <td>Bryce</td>\n",
 443 |        "    </tr>\n",
 444 |        "    <tr>\n",
 445 |        "      <th>4</th>\n",
 446 |        "      <td>5</td>\n",
 447 |        "      <td>Ayoung</td>\n",
 448 |        "      <td>sub5</td>\n",
 449 |        "      <td>5.0</td>\n",
 450 |        "      <td>Betty</td>\n",
 451 |        "    </tr>\n",
 452 |        "  </tbody>\n",
 453 |        "</table>\n",
 454 |        "</div>"
 455 |       ],
 456 |       "text/plain": [
 457 |        "   id_x  Name_x subject_id  id_y Name_y\n",
 458 |        "0     1    Alex       sub1   NaN    NaN\n",
 459 |        "1     2     Amy       sub2   1.0  Billy\n",
 460 |        "2     3   Allen       sub4   2.0  Brian\n",
 461 |        "3     4   Alice       sub6   4.0  Bryce\n",
 462 |        "4     5  Ayoung       sub5   5.0  Betty"
 463 |       ]
 464 |      },
 465 |      "execution_count": 6,
 466 |      "metadata": {},
 467 |      "output_type": "execute_result"
 468 |     }
 469 |    ],
 470 |    "source": [
 471 |     "# left join\n",
 472 |     "merge_left = pd.merge(left, right, on='subject_id', how='left')\n",
 473 |     "merge_left.head()"
 474 |    ]
 475 |   },
 476 |   {
 477 |    "cell_type": "code",
 478 |    "execution_count": 7,
 479 |    "metadata": {},
 480 |    "outputs": [
 481 |     {
 482 |      "data": {
 483 |       "text/html": [
 484 |        "<div>\n",
 485 |        "<style scoped>\n",
 486 |        "    .dataframe tbody tr th:only-of-type {\n",
 487 |        "        vertical-align: middle;\n",
 488 |        "    }\n",
 489 |        "\n",
 490 |        "    .dataframe tbody tr th {\n",
 491 |        "        vertical-align: top;\n",
 492 |        "    }\n",
 493 |        "\n",
 494 |        "    .dataframe thead th {\n",
 495 |        "        text-align: right;\n",
 496 |        "    }\n",
 497 |        "</style>\n",
 498 |        "<table border=\"1\" class=\"dataframe\">\n",
 499 |        "  <thead>\n",
 500 |        "    <tr style=\"text-align: right;\">\n",
 501 |        "      <th></th>\n",
 502 |        "      <th>id_x</th>\n",
 503 |        "      <th>Name_x</th>\n",
 504 |        "      <th>subject_id</th>\n",
 505 |        "      <th>id_y</th>\n",
 506 |        "      <th>Name_y</th>\n",
 507 |        "    </tr>\n",
 508 |        "  </thead>\n",
 509 |        "  <tbody>\n",
 510 |        "    <tr>\n",
 511 |        "      <th>0</th>\n",
 512 |        "      <td>2.0</td>\n",
 513 |        "      <td>Amy</td>\n",
 514 |        "      <td>sub2</td>\n",
 515 |        "      <td>1</td>\n",
 516 |        "      <td>Billy</td>\n",
 517 |        "    </tr>\n",
 518 |        "    <tr>\n",
 519 |        "      <th>1</th>\n",
 520 |        "      <td>3.0</td>\n",
 521 |        "      <td>Allen</td>\n",
 522 |        "      <td>sub4</td>\n",
 523 |        "      <td>2</td>\n",
 524 |        "      <td>Brian</td>\n",
 525 |        "    </tr>\n",
 526 |        "    <tr>\n",
 527 |        "      <th>2</th>\n",
 528 |        "      <td>4.0</td>\n",
 529 |        "      <td>Alice</td>\n",
 530 |        "      <td>sub6</td>\n",
 531 |        "      <td>4</td>\n",
 532 |        "      <td>Bryce</td>\n",
 533 |        "    </tr>\n",
 534 |        "    <tr>\n",
 535 |        "      <th>3</th>\n",
 536 |        "      <td>5.0</td>\n",
 537 |        "      <td>Ayoung</td>\n",
 538 |        "      <td>sub5</td>\n",
 539 |        "      <td>5</td>\n",
 540 |        "      <td>Betty</td>\n",
 541 |        "    </tr>\n",
 542 |        "    <tr>\n",
 543 |        "      <th>4</th>\n",
 544 |        "      <td>NaN</td>\n",
 545 |        "      <td>NaN</td>\n",
 546 |        "      <td>sub3</td>\n",
 547 |        "      <td>3</td>\n",
 548 |        "      <td>Bran</td>\n",
 549 |        "    </tr>\n",
 550 |        "  </tbody>\n",
 551 |        "</table>\n",
 552 |        "</div>"
 553 |       ],
 554 |       "text/plain": [
 555 |        "   id_x  Name_x subject_id  id_y Name_y\n",
 556 |        "0   2.0     Amy       sub2     1  Billy\n",
 557 |        "1   3.0   Allen       sub4     2  Brian\n",
 558 |        "2   4.0   Alice       sub6     4  Bryce\n",
 559 |        "3   5.0  Ayoung       sub5     5  Betty\n",
 560 |        "4   NaN     NaN       sub3     3   Bran"
 561 |       ]
 562 |      },
 563 |      "execution_count": 7,
 564 |      "metadata": {},
 565 |      "output_type": "execute_result"
 566 |     }
 567 |    ],
 568 |    "source": [
 569 |     "# right join\n",
 570 |     "merge_right = pd.merge(left, right, on='subject_id', how='right')\n",
 571 |     "merge_right.head()"
 572 |    ]
 573 |   },
 574 |   {
 575 |    "cell_type": "code",
 576 |    "execution_count": 8,
 577 |    "metadata": {},
 578 |    "outputs": [
 579 |     {
 580 |      "data": {
 581 |       "text/html": [
 582 |        "<div>\n",
 583 |        "<style scoped>\n",
 584 |        "    .dataframe tbody tr th:only-of-type {\n",
 585 |        "        vertical-align: middle;\n",
 586 |        "    }\n",
 587 |        "\n",
 588 |        "    .dataframe tbody tr th {\n",
 589 |        "        vertical-align: top;\n",
 590 |        "    }\n",
 591 |        "\n",
 592 |        "    .dataframe thead th {\n",
 593 |        "        text-align: right;\n",
 594 |        "    }\n",
 595 |        "</style>\n",
 596 |        "<table border=\"1\" class=\"dataframe\">\n",
 597 |        "  <thead>\n",
 598 |        "    <tr style=\"text-align: right;\">\n",
 599 |        "      <th></th>\n",
 600 |        "      <th>id_x</th>\n",
 601 |        "      <th>Name_x</th>\n",
 602 |        "      <th>subject_id</th>\n",
 603 |        "      <th>id_y</th>\n",
 604 |        "      <th>Name_y</th>\n",
 605 |        "    </tr>\n",
 606 |        "  </thead>\n",
 607 |        "  <tbody>\n",
 608 |        "    <tr>\n",
 609 |        "      <th>0</th>\n",
 610 |        "      <td>1.0</td>\n",
 611 |        "      <td>Alex</td>\n",
 612 |        "      <td>sub1</td>\n",
 613 |        "      <td>NaN</td>\n",
 614 |        "      <td>NaN</td>\n",
 615 |        "    </tr>\n",
 616 |        "    <tr>\n",
 617 |        "      <th>1</th>\n",
 618 |        "      <td>2.0</td>\n",
 619 |        "      <td>Amy</td>\n",
 620 |        "      <td>sub2</td>\n",
 621 |        "      <td>1.0</td>\n",
 622 |        "      <td>Billy</td>\n",
 623 |        "    </tr>\n",
 624 |        "    <tr>\n",
 625 |        "      <th>2</th>\n",
 626 |        "      <td>3.0</td>\n",
 627 |        "      <td>Allen</td>\n",
 628 |        "      <td>sub4</td>\n",
 629 |        "      <td>2.0</td>\n",
 630 |        "      <td>Brian</td>\n",
 631 |        "    </tr>\n",
 632 |        "    <tr>\n",
 633 |        "      <th>3</th>\n",
 634 |        "      <td>4.0</td>\n",
 635 |        "      <td>Alice</td>\n",
 636 |        "      <td>sub6</td>\n",
 637 |        "      <td>4.0</td>\n",
 638 |        "      <td>Bryce</td>\n",
 639 |        "    </tr>\n",
 640 |        "    <tr>\n",
 641 |        "      <th>4</th>\n",
 642 |        "      <td>5.0</td>\n",
 643 |        "      <td>Ayoung</td>\n",
 644 |        "      <td>sub5</td>\n",
 645 |        "      <td>5.0</td>\n",
 646 |        "      <td>Betty</td>\n",
 647 |        "    </tr>\n",
 648 |        "  </tbody>\n",
 649 |        "</table>\n",
 650 |        "</div>"
 651 |       ],
 652 |       "text/plain": [
 653 |        "   id_x  Name_x subject_id  id_y Name_y\n",
 654 |        "0   1.0    Alex       sub1   NaN    NaN\n",
 655 |        "1   2.0     Amy       sub2   1.0  Billy\n",
 656 |        "2   3.0   Allen       sub4   2.0  Brian\n",
 657 |        "3   4.0   Alice       sub6   4.0  Bryce\n",
 658 |        "4   5.0  Ayoung       sub5   5.0  Betty"
 659 |       ]
 660 |      },
 661 |      "execution_count": 8,
 662 |      "metadata": {},
 663 |      "output_type": "execute_result"
 664 |     }
 665 |    ],
 666 |    "source": [
 667 |     "# outer join\n",
 668 |     "merge_outer = pd.merge(left, right, how='outer', on='subject_id')\n",
 669 |     "merge_outer.head()"
 670 |    ]
 671 |   },
 672 |   {
 673 |    "cell_type": "code",
 674 |    "execution_count": 9,
 675 |    "metadata": {},
 676 |    "outputs": [
 677 |     {
 678 |      "data": {
 679 |       "text/html": [
 680 |        "<div>\n",
 681 |        "<style scoped>\n",
 682 |        "    .dataframe tbody tr th:only-of-type {\n",
 683 |        "        vertical-align: middle;\n",
 684 |        "    }\n",
 685 |        "\n",
 686 |        "    .dataframe tbody tr th {\n",
 687 |        "        vertical-align: top;\n",
 688 |        "    }\n",
 689 |        "\n",
 690 |        "    .dataframe thead th {\n",
 691 |        "        text-align: right;\n",
 692 |        "    }\n",
 693 |        "</style>\n",
 694 |        "<table border=\"1\" class=\"dataframe\">\n",
 695 |        "  <thead>\n",
 696 |        "    <tr style=\"text-align: right;\">\n",
 697 |        "      <th></th>\n",
 698 |        "      <th>id_x</th>\n",
 699 |        "      <th>Name_x</th>\n",
 700 |        "      <th>subject_id</th>\n",
 701 |        "      <th>id_y</th>\n",
 702 |        "      <th>Name_y</th>\n",
 703 |        "    </tr>\n",
 704 |        "  </thead>\n",
 705 |        "  <tbody>\n",
 706 |        "    <tr>\n",
 707 |        "      <th>0</th>\n",
 708 |        "      <td>2</td>\n",
 709 |        "      <td>Amy</td>\n",
 710 |        "      <td>sub2</td>\n",
 711 |        "      <td>1</td>\n",
 712 |        "      <td>Billy</td>\n",
 713 |        "    </tr>\n",
 714 |        "    <tr>\n",
 715 |        "      <th>1</th>\n",
 716 |        "      <td>3</td>\n",
 717 |        "      <td>Allen</td>\n",
 718 |        "      <td>sub4</td>\n",
 719 |        "      <td>2</td>\n",
 720 |        "      <td>Brian</td>\n",
 721 |        "    </tr>\n",
 722 |        "    <tr>\n",
 723 |        "      <th>2</th>\n",
 724 |        "      <td>4</td>\n",
 725 |        "      <td>Alice</td>\n",
 726 |        "      <td>sub6</td>\n",
 727 |        "      <td>4</td>\n",
 728 |        "      <td>Bryce</td>\n",
 729 |        "    </tr>\n",
 730 |        "    <tr>\n",
 731 |        "      <th>3</th>\n",
 732 |        "      <td>5</td>\n",
 733 |        "      <td>Ayoung</td>\n",
 734 |        "      <td>sub5</td>\n",
 735 |        "      <td>5</td>\n",
 736 |        "      <td>Betty</td>\n",
 737 |        "    </tr>\n",
 738 |        "  </tbody>\n",
 739 |        "</table>\n",
 740 |        "</div>"
 741 |       ],
 742 |       "text/plain": [
 743 |        "   id_x  Name_x subject_id  id_y Name_y\n",
 744 |        "0     2     Amy       sub2     1  Billy\n",
 745 |        "1     3   Allen       sub4     2  Brian\n",
 746 |        "2     4   Alice       sub6     4  Bryce\n",
 747 |        "3     5  Ayoung       sub5     5  Betty"
 748 |       ]
 749 |      },
 750 |      "execution_count": 9,
 751 |      "metadata": {},
 752 |      "output_type": "execute_result"
 753 |     }
 754 |    ],
 755 |    "source": [
 756 |     "# inner join\n",
 757 |     "merge_inner = pd.merge(left, right, how='inner', on='subject_id')\n",
 758 |     "merge_inner.head()"
 759 |    ]
 760 |   },
 761 |   {
 762 |    "cell_type": "code",
 763 |    "execution_count": 10,
 764 |    "metadata": {
 765 |     "scrolled": false
 766 |    },
 767 |    "outputs": [
 768 |     {
 769 |      "data": {
 770 |       "text/html": [
 771 |        "<img src=\"https://data36.com/wp-content/uploads/2018/08/4-pandas-merge-inner-outer-left-right-1024x771.png\" width=\"600\" height=\"400\"/>"
 772 |       ],
 773 |       "text/plain": [
 774 |        "<IPython.core.display.Image object>"
 775 |       ]
 776 |      },
 777 |      "execution_count": 10,
 778 |      "metadata": {},
 779 |      "output_type": "execute_result"
 780 |     }
 781 |    ],
 782 |    "source": [
 783 |     "from IPython.display import Image\n",
 784 |     "from IPython.core.display import HTML \n",
 785 |     "Image(url= \"https://data36.com/wp-content/uploads/2018/08/4-pandas-merge-inner-outer-left-right-1024x771.png\", width=600, height=400)"
 786 |    ]
 787 |   },
 788 |   {
 789 |    "cell_type": "markdown",
 790 |    "metadata": {},
 791 |    "source": [
 792 |     "## Grouping Data"
 793 |    ]
 794 |   },
 795 |   {
 796 |    "cell_type": "markdown",
 797 |    "metadata": {},
 798 |    "source": [
 799 |     "Grouping data sets is a frequent need in data analysis where we need the result in terms of various groups present in the data set"
 800 |    ]
 801 |   },
 802 |   {
 803 |    "cell_type": "code",
 804 |    "execution_count": 11,
 805 |    "metadata": {},
 806 |    "outputs": [
 807 |     {
 808 |      "data": {
 809 |       "text/html": [
 810 |        "<div>\n",
 811 |        "<style scoped>\n",
 812 |        "    .dataframe tbody tr th:only-of-type {\n",
 813 |        "        vertical-align: middle;\n",
 814 |        "    }\n",
 815 |        "\n",
 816 |        "    .dataframe tbody tr th {\n",
 817 |        "        vertical-align: top;\n",
 818 |        "    }\n",
 819 |        "\n",
 820 |        "    .dataframe thead th {\n",
 821 |        "        text-align: right;\n",
 822 |        "    }\n",
 823 |        "</style>\n",
 824 |        "<table border=\"1\" class=\"dataframe\">\n",
 825 |        "  <thead>\n",
 826 |        "    <tr style=\"text-align: right;\">\n",
 827 |        "      <th></th>\n",
 828 |        "      <th>Team</th>\n",
 829 |        "      <th>Rank</th>\n",
 830 |        "      <th>Year</th>\n",
 831 |        "      <th>Points</th>\n",
 832 |        "    </tr>\n",
 833 |        "  </thead>\n",
 834 |        "  <tbody>\n",
 835 |        "    <tr>\n",
 836 |        "      <th>0</th>\n",
 837 |        "      <td>Riders</td>\n",
 838 |        "      <td>1</td>\n",
 839 |        "      <td>2014</td>\n",
 840 |        "      <td>876</td>\n",
 841 |        "    </tr>\n",
 842 |        "    <tr>\n",
 843 |        "      <th>1</th>\n",
 844 |        "      <td>Riders</td>\n",
 845 |        "      <td>2</td>\n",
 846 |        "      <td>2015</td>\n",
 847 |        "      <td>789</td>\n",
 848 |        "    </tr>\n",
 849 |        "    <tr>\n",
 850 |        "      <th>2</th>\n",
 851 |        "      <td>Devils</td>\n",
 852 |        "      <td>2</td>\n",
 853 |        "      <td>2014</td>\n",
 854 |        "      <td>863</td>\n",
 855 |        "    </tr>\n",
 856 |        "    <tr>\n",
 857 |        "      <th>3</th>\n",
 858 |        "      <td>Devils</td>\n",
 859 |        "      <td>3</td>\n",
 860 |        "      <td>2015</td>\n",
 861 |        "      <td>673</td>\n",
 862 |        "    </tr>\n",
 863 |        "    <tr>\n",
 864 |        "      <th>4</th>\n",
 865 |        "      <td>Kings</td>\n",
 866 |        "      <td>3</td>\n",
 867 |        "      <td>2014</td>\n",
 868 |        "      <td>741</td>\n",
 869 |        "    </tr>\n",
 870 |        "    <tr>\n",
 871 |        "      <th>5</th>\n",
 872 |        "      <td>kings</td>\n",
 873 |        "      <td>4</td>\n",
 874 |        "      <td>2015</td>\n",
 875 |        "      <td>812</td>\n",
 876 |        "    </tr>\n",
 877 |        "    <tr>\n",
 878 |        "      <th>6</th>\n",
 879 |        "      <td>Kings</td>\n",
 880 |        "      <td>1</td>\n",
 881 |        "      <td>2016</td>\n",
 882 |        "      <td>756</td>\n",
 883 |        "    </tr>\n",
 884 |        "    <tr>\n",
 885 |        "      <th>7</th>\n",
 886 |        "      <td>Kings</td>\n",
 887 |        "      <td>1</td>\n",
 888 |        "      <td>2017</td>\n",
 889 |        "      <td>788</td>\n",
 890 |        "    </tr>\n",
 891 |        "    <tr>\n",
 892 |        "      <th>8</th>\n",
 893 |        "      <td>Riders</td>\n",
 894 |        "      <td>2</td>\n",
 895 |        "      <td>2016</td>\n",
 896 |        "      <td>694</td>\n",
 897 |        "    </tr>\n",
 898 |        "    <tr>\n",
 899 |        "      <th>9</th>\n",
 900 |        "      <td>Royals</td>\n",
 901 |        "      <td>4</td>\n",
 902 |        "      <td>2014</td>\n",
 903 |        "      <td>701</td>\n",
 904 |        "    </tr>\n",
 905 |        "    <tr>\n",
 906 |        "      <th>10</th>\n",
 907 |        "      <td>Royals</td>\n",
 908 |        "      <td>1</td>\n",
 909 |        "      <td>2015</td>\n",
 910 |        "      <td>804</td>\n",
 911 |        "    </tr>\n",
 912 |        "    <tr>\n",
 913 |        "      <th>11</th>\n",
 914 |        "      <td>Riders</td>\n",
 915 |        "      <td>2</td>\n",
 916 |        "      <td>2017</td>\n",
 917 |        "      <td>690</td>\n",
 918 |        "    </tr>\n",
 919 |        "  </tbody>\n",
 920 |        "</table>\n",
 921 |        "</div>"
 922 |       ],
 923 |       "text/plain": [
 924 |        "      Team  Rank  Year  Points\n",
 925 |        "0   Riders     1  2014     876\n",
 926 |        "1   Riders     2  2015     789\n",
 927 |        "2   Devils     2  2014     863\n",
 928 |        "3   Devils     3  2015     673\n",
 929 |        "4    Kings     3  2014     741\n",
 930 |        "5    kings     4  2015     812\n",
 931 |        "6    Kings     1  2016     756\n",
 932 |        "7    Kings     1  2017     788\n",
 933 |        "8   Riders     2  2016     694\n",
 934 |        "9   Royals     4  2014     701\n",
 935 |        "10  Royals     1  2015     804\n",
 936 |        "11  Riders     2  2017     690"
 937 |       ]
 938 |      },
 939 |      "execution_count": 11,
 940 |      "metadata": {},
 941 |      "output_type": "execute_result"
 942 |     }
 943 |    ],
 944 |    "source": [
 945 |     "ipl_data = {'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings',\n",
 946 |     "         'kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'],\n",
 947 |     "         'Rank': [1, 2, 2, 3, 3,4 ,1 ,1,2 , 4,1,2],\n",
 948 |     "         'Year': [2014,2015,2014,2015,2014,2015,2016,2017,2016,2014,2015,2017],\n",
 949 |     "         'Points':[876,789,863,673,741,812,756,788,694,701,804,690]}\n",
 950 |     "df = pd.DataFrame(ipl_data)\n",
 951 |     "df"
 952 |    ]
 953 |   },
 954 |   {
 955 |    "cell_type": "code",
 956 |    "execution_count": 12,
 957 |    "metadata": {},
 958 |    "outputs": [
 959 |     {
 960 |      "name": "stdout",
 961 |      "output_type": "stream",
 962 |      "text": [
 963 |       "2014\n",
 964 |       "     Team  Rank  Year  Points\n",
 965 |       "0  Riders     1  2014     876\n",
 966 |       "2  Devils     2  2014     863\n",
 967 |       "4   Kings     3  2014     741\n",
 968 |       "9  Royals     4  2014     701\n",
 969 |       "2015\n",
 970 |       "      Team  Rank  Year  Points\n",
 971 |       "1   Riders     2  2015     789\n",
 972 |       "3   Devils     3  2015     673\n",
 973 |       "5    kings     4  2015     812\n",
 974 |       "10  Royals     1  2015     804\n",
 975 |       "2016\n",
 976 |       "     Team  Rank  Year  Points\n",
 977 |       "6   Kings     1  2016     756\n",
 978 |       "8  Riders     2  2016     694\n",
 979 |       "2017\n",
 980 |       "      Team  Rank  Year  Points\n",
 981 |       "7    Kings     1  2017     788\n",
 982 |       "11  Riders     2  2017     690\n"
 983 |      ]
 984 |     }
 985 |    ],
 986 |    "source": [
 987 |     "# group by one column\n",
 988 |     "grouped_1 = df.groupby('Year')\n",
 989 |     "for name,group in grouped_1:\n",
 990 |     "    print(name)\n",
 991 |     "    print(group)"
 992 |    ]
 993 |   },
 994 |   {
 995 |    "cell_type": "code",
 996 |    "execution_count": 13,
 997 |    "metadata": {},
 998 |    "outputs": [
 999 |     {
1000 |      "data": {
1001 |       "text/html": [
1002 |        "<div>\n",
1003 |        "<style scoped>\n",
1004 |        "    .dataframe tbody tr th:only-of-type {\n",
1005 |        "        vertical-align: middle;\n",
1006 |        "    }\n",
1007 |        "\n",
1008 |        "    .dataframe tbody tr th {\n",
1009 |        "        vertical-align: top;\n",
1010 |        "    }\n",
1011 |        "\n",
1012 |        "    .dataframe thead th {\n",
1013 |        "        text-align: right;\n",
1014 |        "    }\n",
1015 |        "</style>\n",
1016 |        "<table border=\"1\" class=\"dataframe\">\n",
1017 |        "  <thead>\n",
1018 |        "    <tr style=\"text-align: right;\">\n",
1019 |        "      <th></th>\n",
1020 |        "      <th></th>\n",
1021 |        "      <th></th>\n",
1022 |        "      <th>Team</th>\n",
1023 |        "      <th>Rank</th>\n",
1024 |        "      <th>Year</th>\n",
1025 |        "      <th>Points</th>\n",
1026 |        "    </tr>\n",
1027 |        "    <tr>\n",
1028 |        "      <th>Team</th>\n",
1029 |        "      <th>Year</th>\n",
1030 |        "      <th></th>\n",
1031 |        "      <th></th>\n",
1032 |        "      <th></th>\n",
1033 |        "      <th></th>\n",
1034 |        "      <th></th>\n",
1035 |        "    </tr>\n",
1036 |        "  </thead>\n",
1037 |        "  <tbody>\n",
1038 |        "    <tr>\n",
1039 |        "      <th rowspan=\"2\" valign=\"top\">Devils</th>\n",
1040 |        "      <th>2014</th>\n",
1041 |        "      <th>2</th>\n",
1042 |        "      <td>Devils</td>\n",
1043 |        "      <td>2</td>\n",
1044 |        "      <td>2014</td>\n",
1045 |        "      <td>863</td>\n",
1046 |        "    </tr>\n",
1047 |        "    <tr>\n",
1048 |        "      <th>2015</th>\n",
1049 |        "      <th>3</th>\n",
1050 |        "      <td>Devils</td>\n",
1051 |        "      <td>3</td>\n",
1052 |        "      <td>2015</td>\n",
1053 |        "      <td>673</td>\n",
1054 |        "    </tr>\n",
1055 |        "    <tr>\n",
1056 |        "      <th rowspan=\"3\" valign=\"top\">Kings</th>\n",
1057 |        "      <th>2014</th>\n",
1058 |        "      <th>4</th>\n",
1059 |        "      <td>Kings</td>\n",
1060 |        "      <td>3</td>\n",
1061 |        "      <td>2014</td>\n",
1062 |        "      <td>741</td>\n",
1063 |        "    </tr>\n",
1064 |        "    <tr>\n",
1065 |        "      <th>2016</th>\n",
1066 |        "      <th>6</th>\n",
1067 |        "      <td>Kings</td>\n",
1068 |        "      <td>1</td>\n",
1069 |        "      <td>2016</td>\n",
1070 |        "      <td>756</td>\n",
1071 |        "    </tr>\n",
1072 |        "    <tr>\n",
1073 |        "      <th>2017</th>\n",
1074 |        "      <th>7</th>\n",
1075 |        "      <td>Kings</td>\n",
1076 |        "      <td>1</td>\n",
1077 |        "      <td>2017</td>\n",
1078 |        "      <td>788</td>\n",
1079 |        "    </tr>\n",
1080 |        "    <tr>\n",
1081 |        "      <th rowspan=\"4\" valign=\"top\">Riders</th>\n",
1082 |        "      <th>2014</th>\n",
1083 |        "      <th>0</th>\n",
1084 |        "      <td>Riders</td>\n",
1085 |        "      <td>1</td>\n",
1086 |        "      <td>2014</td>\n",
1087 |        "      <td>876</td>\n",
1088 |        "    </tr>\n",
1089 |        "    <tr>\n",
1090 |        "      <th>2015</th>\n",
1091 |        "      <th>1</th>\n",
1092 |        "      <td>Riders</td>\n",
1093 |        "      <td>2</td>\n",
1094 |        "      <td>2015</td>\n",
1095 |        "      <td>789</td>\n",
1096 |        "    </tr>\n",
1097 |        "    <tr>\n",
1098 |        "      <th>2016</th>\n",
1099 |        "      <th>8</th>\n",
1100 |        "      <td>Riders</td>\n",
1101 |        "      <td>2</td>\n",
1102 |        "      <td>2016</td>\n",
1103 |        "      <td>694</td>\n",
1104 |        "    </tr>\n",
1105 |        "    <tr>\n",
1106 |        "      <th>2017</th>\n",
1107 |        "      <th>11</th>\n",
1108 |        "      <td>Riders</td>\n",
1109 |        "      <td>2</td>\n",
1110 |        "      <td>2017</td>\n",
1111 |        "      <td>690</td>\n",
1112 |        "    </tr>\n",
1113 |        "    <tr>\n",
1114 |        "      <th rowspan=\"2\" valign=\"top\">Royals</th>\n",
1115 |        "      <th>2014</th>\n",
1116 |        "      <th>9</th>\n",
1117 |        "      <td>Royals</td>\n",
1118 |        "      <td>4</td>\n",
1119 |        "      <td>2014</td>\n",
1120 |        "      <td>701</td>\n",
1121 |        "    </tr>\n",
1122 |        "    <tr>\n",
1123 |        "      <th>2015</th>\n",
1124 |        "      <th>10</th>\n",
1125 |        "      <td>Royals</td>\n",
1126 |        "      <td>1</td>\n",
1127 |        "      <td>2015</td>\n",
1128 |        "      <td>804</td>\n",
1129 |        "    </tr>\n",
1130 |        "    <tr>\n",
1131 |        "      <th>kings</th>\n",
1132 |        "      <th>2015</th>\n",
1133 |        "      <th>5</th>\n",
1134 |        "      <td>kings</td>\n",
1135 |        "      <td>4</td>\n",
1136 |        "      <td>2015</td>\n",
1137 |        "      <td>812</td>\n",
1138 |        "    </tr>\n",
1139 |        "  </tbody>\n",
1140 |        "</table>\n",
1141 |        "</div>"
1142 |       ],
1143 |       "text/plain": [
1144 |        "                  Team  Rank  Year  Points\n",
1145 |        "Team   Year                               \n",
1146 |        "Devils 2014 2   Devils     2  2014     863\n",
1147 |        "       2015 3   Devils     3  2015     673\n",
1148 |        "Kings  2014 4    Kings     3  2014     741\n",
1149 |        "       2016 6    Kings     1  2016     756\n",
1150 |        "       2017 7    Kings     1  2017     788\n",
1151 |        "Riders 2014 0   Riders     1  2014     876\n",
1152 |        "       2015 1   Riders     2  2015     789\n",
1153 |        "       2016 8   Riders     2  2016     694\n",
1154 |        "       2017 11  Riders     2  2017     690\n",
1155 |        "Royals 2014 9   Royals     4  2014     701\n",
1156 |        "       2015 10  Royals     1  2015     804\n",
1157 |        "kings  2015 5    kings     4  2015     812"
1158 |       ]
1159 |      },
1160 |      "execution_count": 13,
1161 |      "metadata": {},
1162 |      "output_type": "execute_result"
1163 |     }
1164 |    ],
1165 |    "source": [
1166 |     "# group by one column\n",
1167 |     "grouped_2 = df.groupby(['Team','Year'])\n",
1168 |     "grouped_2.apply(lambda a: a[:])"
1169 |    ]
1170 |   },
1171 |   {
1172 |    "cell_type": "code",
1173 |    "execution_count": 14,
1174 |    "metadata": {},
1175 |    "outputs": [
1176 |     {
1177 |      "name": "stdout",
1178 |      "output_type": "stream",
1179 |      "text": [
1180 |       "Team\n",
1181 |       "Devils    768.000000\n",
1182 |       "Kings     761.666667\n",
1183 |       "Riders    762.250000\n",
1184 |       "Royals    752.500000\n",
1185 |       "kings     812.000000\n",
1186 |       "Name: Points, dtype: float64\n"
1187 |      ]
1188 |     }
1189 |    ],
1190 |    "source": [
1191 |     "# Aggregations\n",
1192 |     "grouped_a = df.groupby('Team')\n",
1193 |     "print(grouped_a['Points'].agg(np.mean))"
1194 |    ]
1195 |   },
1196 |   {
1197 |    "cell_type": "code",
1198 |    "execution_count": 15,
1199 |    "metadata": {},
1200 |    "outputs": [
1201 |     {
1202 |      "name": "stdout",
1203 |      "output_type": "stream",
1204 |      "text": [
1205 |       "         sum        mean         std\n",
1206 |       "Team                                \n",
1207 |       "Devils  1536  768.000000  134.350288\n",
1208 |       "Kings   2285  761.666667   24.006943\n",
1209 |       "Riders  3049  762.250000   88.567771\n",
1210 |       "Royals  1505  752.500000   72.831998\n",
1211 |       "kings    812  812.000000         NaN\n"
1212 |      ]
1213 |     }
1214 |    ],
1215 |    "source": [
1216 |     "# multiple Aggregations\n",
1217 |     "\n",
1218 |     "print(grouped_a['Points'].agg([np.sum, np.mean, np.std]))"
1219 |    ]
1220 |   },
1221 |   {
1222 |    "cell_type": "code",
1223 |    "execution_count": 16,
1224 |    "metadata": {},
1225 |    "outputs": [
1226 |     {
1227 |      "name": "stdout",
1228 |      "output_type": "stream",
1229 |      "text": [
1230 |       "        Rank      Year    Points\n",
1231 |       "0  -4.500000 -3.485685  3.852982\n",
1232 |       "1   1.500000 -1.161895  0.906086\n",
1233 |       "2  -2.121320 -2.121320  2.121320\n",
1234 |       "3   2.121320  2.121320 -2.121320\n",
1235 |       "4   3.464102 -3.273268 -2.582586\n",
1236 |       "5        NaN       NaN       NaN\n",
1237 |       "6  -1.732051  0.654654 -0.708128\n",
1238 |       "7  -1.732051  2.618615  3.290715\n",
1239 |       "8   1.500000  1.161895 -2.311789\n",
1240 |       "9   2.121320 -2.121320 -2.121320\n",
1241 |       "10 -2.121320  2.121320  2.121320\n",
1242 |       "11  1.500000  3.485685 -2.447278\n"
1243 |      ]
1244 |     }
1245 |    ],
1246 |    "source": [
1247 |     "# Transformation\n",
1248 |     "score = lambda x: (x - x.mean()) / x.std()*3\n",
1249 |     "print(grouped_a.transform(score))"
1250 |    ]
1251 |   },
1252 |   {
1253 |    "cell_type": "code",
1254 |    "execution_count": 17,
1255 |    "metadata": {},
1256 |    "outputs": [
1257 |     {
1258 |      "name": "stdout",
1259 |      "output_type": "stream",
1260 |      "text": [
1261 |       "      Team  Rank  Year  Points\n",
1262 |       "0   Riders     1  2014     876\n",
1263 |       "1   Riders     2  2015     789\n",
1264 |       "8   Riders     2  2016     694\n",
1265 |       "11  Riders     2  2017     690\n"
1266 |      ]
1267 |     }
1268 |    ],
1269 |    "source": [
1270 |     "# Filtration\n",
1271 |     "print(df.groupby('Team').filter(lambda x: len(x) >= 4))"
1272 |    ]
1273 |   },
1274 |   {
1275 |    "cell_type": "markdown",
1276 |    "metadata": {},
1277 |    "source": [
1278 |     "## Concatenating Data"
1279 |    ]
1280 |   },
1281 |   {
1282 |    "cell_type": "code",
1283 |    "execution_count": 18,
1284 |    "metadata": {},
1285 |    "outputs": [],
1286 |    "source": [
1287 |     "one = pd.DataFrame({\n",
1288 |     "         'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],\n",
1289 |     "         'subject_id':['sub1','sub2','sub4','sub6','sub5'],\n",
1290 |     "         'Marks_scored':[98,90,87,69,78]},\n",
1291 |     "         index=[1,2,3,4,5])\n",
1292 |     "two = pd.DataFrame({\n",
1293 |     "         'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],\n",
1294 |     "         'subject_id':['sub2','sub4','sub3','sub6','sub5'],\n",
1295 |     "         'Marks_scored':[89,80,79,97,88]},\n",
1296 |     "         index=[1,2,3,4,5])"
1297 |    ]
1298 |   },
1299 |   {
1300 |    "cell_type": "code",
1301 |    "execution_count": 19,
1302 |    "metadata": {},
1303 |    "outputs": [
1304 |     {
1305 |      "data": {
1306 |       "text/html": [
1307 |        "<div>\n",
1308 |        "<style scoped>\n",
1309 |        "    .dataframe tbody tr th:only-of-type {\n",
1310 |        "        vertical-align: middle;\n",
1311 |        "    }\n",
1312 |        "\n",
1313 |        "    .dataframe tbody tr th {\n",
1314 |        "        vertical-align: top;\n",
1315 |        "    }\n",
1316 |        "\n",
1317 |        "    .dataframe thead th {\n",
1318 |        "        text-align: right;\n",
1319 |        "    }\n",
1320 |        "</style>\n",
1321 |        "<table border=\"1\" class=\"dataframe\">\n",
1322 |        "  <thead>\n",
1323 |        "    <tr style=\"text-align: right;\">\n",
1324 |        "      <th></th>\n",
1325 |        "      <th>Name</th>\n",
1326 |        "      <th>subject_id</th>\n",
1327 |        "      <th>Marks_scored</th>\n",
1328 |        "    </tr>\n",
1329 |        "  </thead>\n",
1330 |        "  <tbody>\n",
1331 |        "    <tr>\n",
1332 |        "      <th>1</th>\n",
1333 |        "      <td>Alex</td>\n",
1334 |        "      <td>sub1</td>\n",
1335 |        "      <td>98</td>\n",
1336 |        "    </tr>\n",
1337 |        "    <tr>\n",
1338 |        "      <th>2</th>\n",
1339 |        "      <td>Amy</td>\n",
1340 |        "      <td>sub2</td>\n",
1341 |        "      <td>90</td>\n",
1342 |        "    </tr>\n",
1343 |        "    <tr>\n",
1344 |        "      <th>3</th>\n",
1345 |        "      <td>Allen</td>\n",
1346 |        "      <td>sub4</td>\n",
1347 |        "      <td>87</td>\n",
1348 |        "    </tr>\n",
1349 |        "    <tr>\n",
1350 |        "      <th>4</th>\n",
1351 |        "      <td>Alice</td>\n",
1352 |        "      <td>sub6</td>\n",
1353 |        "      <td>69</td>\n",
1354 |        "    </tr>\n",
1355 |        "    <tr>\n",
1356 |        "      <th>5</th>\n",
1357 |        "      <td>Ayoung</td>\n",
1358 |        "      <td>sub5</td>\n",
1359 |        "      <td>78</td>\n",
1360 |        "    </tr>\n",
1361 |        "  </tbody>\n",
1362 |        "</table>\n",
1363 |        "</div>"
1364 |       ],
1365 |       "text/plain": [
1366 |        "     Name subject_id  Marks_scored\n",
1367 |        "1    Alex       sub1            98\n",
1368 |        "2     Amy       sub2            90\n",
1369 |        "3   Allen       sub4            87\n",
1370 |        "4   Alice       sub6            69\n",
1371 |        "5  Ayoung       sub5            78"
1372 |       ]
1373 |      },
1374 |      "execution_count": 19,
1375 |      "metadata": {},
1376 |      "output_type": "execute_result"
1377 |     }
1378 |    ],
1379 |    "source": [
1380 |     "one"
1381 |    ]
1382 |   },
1383 |   {
1384 |    "cell_type": "code",
1385 |    "execution_count": 20,
1386 |    "metadata": {},
1387 |    "outputs": [
1388 |     {
1389 |      "data": {
1390 |       "text/html": [
1391 |        "<div>\n",
1392 |        "<style scoped>\n",
1393 |        "    .dataframe tbody tr th:only-of-type {\n",
1394 |        "        vertical-align: middle;\n",
1395 |        "    }\n",
1396 |        "\n",
1397 |        "    .dataframe tbody tr th {\n",
1398 |        "        vertical-align: top;\n",
1399 |        "    }\n",
1400 |        "\n",
1401 |        "    .dataframe thead th {\n",
1402 |        "        text-align: right;\n",
1403 |        "    }\n",
1404 |        "</style>\n",
1405 |        "<table border=\"1\" class=\"dataframe\">\n",
1406 |        "  <thead>\n",
1407 |        "    <tr style=\"text-align: right;\">\n",
1408 |        "      <th></th>\n",
1409 |        "      <th>Name</th>\n",
1410 |        "      <th>subject_id</th>\n",
1411 |        "      <th>Marks_scored</th>\n",
1412 |        "    </tr>\n",
1413 |        "  </thead>\n",
1414 |        "  <tbody>\n",
1415 |        "    <tr>\n",
1416 |        "      <th>1</th>\n",
1417 |        "      <td>Billy</td>\n",
1418 |        "      <td>sub2</td>\n",
1419 |        "      <td>89</td>\n",
1420 |        "    </tr>\n",
1421 |        "    <tr>\n",
1422 |        "      <th>2</th>\n",
1423 |        "      <td>Brian</td>\n",
1424 |        "      <td>sub4</td>\n",
1425 |        "      <td>80</td>\n",
1426 |        "    </tr>\n",
1427 |        "    <tr>\n",
1428 |        "      <th>3</th>\n",
1429 |        "      <td>Bran</td>\n",
1430 |        "      <td>sub3</td>\n",
1431 |        "      <td>79</td>\n",
1432 |        "    </tr>\n",
1433 |        "    <tr>\n",
1434 |        "      <th>4</th>\n",
1435 |        "      <td>Bryce</td>\n",
1436 |        "      <td>sub6</td>\n",
1437 |        "      <td>97</td>\n",
1438 |        "    </tr>\n",
1439 |        "    <tr>\n",
1440 |        "      <th>5</th>\n",
1441 |        "      <td>Betty</td>\n",
1442 |        "      <td>sub5</td>\n",
1443 |        "      <td>88</td>\n",
1444 |        "    </tr>\n",
1445 |        "  </tbody>\n",
1446 |        "</table>\n",
1447 |        "</div>"
1448 |       ],
1449 |       "text/plain": [
1450 |        "    Name subject_id  Marks_scored\n",
1451 |        "1  Billy       sub2            89\n",
1452 |        "2  Brian       sub4            80\n",
1453 |        "3   Bran       sub3            79\n",
1454 |        "4  Bryce       sub6            97\n",
1455 |        "5  Betty       sub5            88"
1456 |       ]
1457 |      },
1458 |      "execution_count": 20,
1459 |      "metadata": {},
1460 |      "output_type": "execute_result"
1461 |     }
1462 |    ],
1463 |    "source": [
1464 |     "two"
1465 |    ]
1466 |   },
1467 |   {
1468 |    "cell_type": "code",
1469 |    "execution_count": 21,
1470 |    "metadata": {},
1471 |    "outputs": [
1472 |     {
1473 |      "name": "stdout",
1474 |      "output_type": "stream",
1475 |      "text": [
1476 |       "     Name subject_id  Marks_scored\n",
1477 |       "1    Alex       sub1            98\n",
1478 |       "2     Amy       sub2            90\n",
1479 |       "3   Allen       sub4            87\n",
1480 |       "4   Alice       sub6            69\n",
1481 |       "5  Ayoung       sub5            78\n",
1482 |       "1   Billy       sub2            89\n",
1483 |       "2   Brian       sub4            80\n",
1484 |       "3    Bran       sub3            79\n",
1485 |       "4   Bryce       sub6            97\n",
1486 |       "5   Betty       sub5            88\n"
1487 |      ]
1488 |     }
1489 |    ],
1490 |    "source": [
1491 |     "print(pd.concat([one,two]))"
1492 |    ]
1493 |   },
1494 |   {
1495 |    "cell_type": "code",
1496 |    "execution_count": 22,
1497 |    "metadata": {},
1498 |    "outputs": [
1499 |     {
1500 |      "name": "stdout",
1501 |      "output_type": "stream",
1502 |      "text": [
1503 |       "     Name subject_id  Marks_scored   Name subject_id  Marks_scored\n",
1504 |       "1    Alex       sub1            98  Billy       sub2            89\n",
1505 |       "2     Amy       sub2            90  Brian       sub4            80\n",
1506 |       "3   Allen       sub4            87   Bran       sub3            79\n",
1507 |       "4   Alice       sub6            69  Bryce       sub6            97\n",
1508 |       "5  Ayoung       sub5            78  Betty       sub5            88\n"
1509 |      ]
1510 |     }
1511 |    ],
1512 |    "source": [
1513 |     "#concat along axis=1, columns\n",
1514 |     "print(pd.concat([one,two],axis=1))"
1515 |    ]
1516 |   },
1517 |   {
1518 |    "cell_type": "code",
1519 |    "execution_count": 23,
1520 |    "metadata": {},
1521 |    "outputs": [
1522 |     {
1523 |      "name": "stdout",
1524 |      "output_type": "stream",
1525 |      "text": [
1526 |       "     Name subject_id  Marks_scored\n",
1527 |       "1    Alex       sub1            98\n",
1528 |       "2     Amy       sub2            90\n",
1529 |       "3   Allen       sub4            87\n",
1530 |       "4   Alice       sub6            69\n",
1531 |       "5  Ayoung       sub5            78\n",
1532 |       "1   Billy       sub2            89\n",
1533 |       "2   Brian       sub4            80\n",
1534 |       "3    Bran       sub3            79\n",
1535 |       "4   Bryce       sub6            97\n",
1536 |       "5   Betty       sub5            88\n"
1537 |      ]
1538 |     }
1539 |    ],
1540 |    "source": [
1541 |     "# append\n",
1542 |     "print(one.append(two))"
1543 |    ]
1544 |   },
1545 |   {
1546 |    "cell_type": "markdown",
1547 |    "metadata": {},
1548 |    "source": [
1549 |     "Reference:-\n",
1550 |     "- [Tutorial Point - Merging](https://www.tutorialspoint.com/python_pandas/python_pandas_merging_joining.htm)\n",
1551 |     "- [Tutorial Point - Grouping](https://www.tutorialspoint.com/python_pandas/python_pandas_groupby.htm)\n",
1552 |     "- [Tutorial Point - concatenation](https://www.tutorialspoint.com/python_pandas/python_pandas_concatenation.htm)"
1553 |    ]
1554 |   },
1555 |   {
1556 |    "cell_type": "markdown",
1557 |    "metadata": {},
1558 |    "source": [
1559 |     "# PySpark"
1560 |    ]
1561 |   },
1562 |   {
1563 |    "cell_type": "markdown",
1564 |    "metadata": {},
1565 |    "source": [
1566 |     "Data sets used for this demonstration is log datasets from NASA Kennedy Space Center web server in Florida\n",
1567 |     "- [Part-1](ftp://ita.ee.lbl.gov/traces/NASA_access_log_Jul95.gz)\n",
1568 |     "- [Part-2](ftp://ita.ee.lbl.gov/traces/NASA_access_log_Aug95.gz)\n",
1569 |     "\n",
1570 |     "Make sure both the files are in the same directory as this notebook."
1571 |    ]
1572 |   },
1573 |   {
1574 |    "cell_type": "code",
1575 |    "execution_count": 24,
1576 |    "metadata": {},
1577 |    "outputs": [],
1578 |    "source": [
1579 |     "from pyspark.context import SparkContext\n",
1580 |     "from pyspark.sql.context import SQLContext\n",
1581 |     "from pyspark.sql.session import SparkSession\n",
1582 |     "    \n",
1583 |     "sc = SparkContext()\n",
1584 |     "sqlContext = SQLContext(sc)\n",
1585 |     "spark = SparkSession(sc)"
1586 |    ]
1587 |   },
1588 |   {
1589 |    "cell_type": "code",
1590 |    "execution_count": 25,
1591 |    "metadata": {},
1592 |    "outputs": [
1593 |     {
1594 |      "data": {
1595 |       "text/html": [
1596 |        "\n",
1597 |        "            <div>\n",
1598 |        "                <p><b>SparkSession - in-memory</b></p>\n",
1599 |        "                \n",
1600 |        "        <div>\n",
1601 |        "            <p><b>SparkContext</b></p>\n",
1602 |        "\n",
1603 |        "            <p><a href=\"http://192.168.0.102:4040\">Spark UI</a></p>\n",
1604 |        "\n",
1605 |        "            <dl>\n",
1606 |        "              <dt>Version</dt>\n",
1607 |        "                <dd><code>v2.4.5</code></dd>\n",
1608 |        "              <dt>Master</dt>\n",
1609 |        "                <dd><code>local[*]</code></dd>\n",
1610 |        "              <dt>AppName</dt>\n",
1611 |        "                <dd><code>pyspark-shell</code></dd>\n",
1612 |        "            </dl>\n",
1613 |        "        </div>\n",
1614 |        "        \n",
1615 |        "            </div>\n",
1616 |        "        "
1617 |       ],
1618 |       "text/plain": [
1619 |        "<pyspark.sql.session.SparkSession at 0x17355339f88>"
1620 |       ]
1621 |      },
1622 |      "execution_count": 25,
1623 |      "metadata": {},
1624 |      "output_type": "execute_result"
1625 |     }
1626 |    ],
1627 |    "source": [
1628 |     "spark"
1629 |    ]
1630 |   },
1631 |   {
1632 |    "cell_type": "code",
1633 |    "execution_count": 26,
1634 |    "metadata": {},
1635 |    "outputs": [
1636 |     {
1637 |      "data": {
1638 |       "text/plain": [
1639 |        "<pyspark.sql.context.SQLContext at 0x17354f8f1c8>"
1640 |       ]
1641 |      },
1642 |      "execution_count": 26,
1643 |      "metadata": {},
1644 |      "output_type": "execute_result"
1645 |     }
1646 |    ],
1647 |    "source": [
1648 |     "sqlContext"
1649 |    ]
1650 |   },
1651 |   {
1652 |    "cell_type": "code",
1653 |    "execution_count": 27,
1654 |    "metadata": {},
1655 |    "outputs": [],
1656 |    "source": [
1657 |     "import re"
1658 |    ]
1659 |   },
1660 |   {
1661 |    "cell_type": "markdown",
1662 |    "metadata": {},
1663 |    "source": [
1664 |     "- [Regex](https://www.w3schools.com/python/python_regex.asp)\n",
1665 |     "- [FindIter](https://www.tutorialspoint.com/How-do-we-use-re-finditer-method-in-Python-regular-expression)"
1666 |    ]
1667 |   },
1668 |   {
1669 |    "cell_type": "code",
1670 |    "execution_count": 28,
1671 |    "metadata": {},
1672 |    "outputs": [
1673 |     {
1674 |      "name": "stdout",
1675 |      "output_type": "stream",
1676 |      "text": [
1677 |       "<re.Match object; span=(19, 24), match='spark'> String match \"spark\" at 19:24\n",
1678 |       "<re.Match object; span=(30, 35), match='spark'> String match \"spark\" at 30:35\n"
1679 |      ]
1680 |     }
1681 |    ],
1682 |    "source": [
1683 |     "# python example of how regular expressions can be used\n",
1684 |     "s1 = 'Im searching for a spark in Pyspark'\n",
1685 |     "pattern = 'spark'\n",
1686 |     "for match in re.finditer(pattern, s1):\n",
1687 |     "    s = match.start()\n",
1688 |     "    e = match.end()\n",
1689 |     "    print(match, 'String match \"%s\" at %d:%d' % (s1[s:e], s, e))"
1690 |    ]
1691 |   },
1692 |   {
1693 |    "cell_type": "code",
1694 |    "execution_count": 29,
1695 |    "metadata": {},
1696 |    "outputs": [
1697 |     {
1698 |      "data": {
1699 |       "text/plain": [
1700 |        "['NASA_access_log_Aug95.gz', 'NASA_access_log_Jul95.gz']"
1701 |       ]
1702 |      },
1703 |      "execution_count": 29,
1704 |      "metadata": {},
1705 |      "output_type": "execute_result"
1706 |     }
1707 |    ],
1708 |    "source": [
1709 |     "# Load and View the Dataset using `sqlContext.read.text()` or `spark.read.text()`\n",
1710 |     "import glob\n",
1711 |     "\n",
1712 |     "raw_data_files = glob.glob('*.gz')\n",
1713 |     "raw_data_files"
1714 |    ]
1715 |   },
1716 |   {
1717 |    "cell_type": "code",
1718 |    "execution_count": 30,
1719 |    "metadata": {},
1720 |    "outputs": [
1721 |     {
1722 |      "name": "stdout",
1723 |      "output_type": "stream",
1724 |      "text": [
1725 |       "root\n",
1726 |       " |-- value: string (nullable = true)\n",
1727 |       "\n"
1728 |      ]
1729 |     }
1730 |    ],
1731 |    "source": [
1732 |     "base_df = spark.read.text(raw_data_files)\n",
1733 |     "base_df.printSchema()"
1734 |    ]
1735 |   },
1736 |   {
1737 |    "cell_type": "code",
1738 |    "execution_count": 31,
1739 |    "metadata": {},
1740 |    "outputs": [
1741 |     {
1742 |      "data": {
1743 |       "text/plain": [
1744 |        "pyspark.sql.dataframe.DataFrame"
1745 |       ]
1746 |      },
1747 |      "execution_count": 31,
1748 |      "metadata": {},
1749 |      "output_type": "execute_result"
1750 |     }
1751 |    ],
1752 |    "source": [
1753 |     "type(base_df)"
1754 |    ]
1755 |   },
1756 |   {
1757 |    "cell_type": "markdown",
1758 |    "metadata": {},
1759 |    "source": [
1760 |     "Converting data frame to RDD - Just to show the original data structure of spark\n",
1761 |     "\n",
1762 |     "[What is RDD?](https://databricks.com/glossary/what-is-rdd#:~:text=RDD%20was%20the%20primary%20user,that%20offers%20transformations%20and%20actions.)"
1763 |    ]
1764 |   },
1765 |   {
1766 |    "cell_type": "code",
1767 |    "execution_count": 32,
1768 |    "metadata": {},
1769 |    "outputs": [
1770 |     {
1771 |      "data": {
1772 |       "text/plain": [
1773 |        "pyspark.rdd.RDD"
1774 |       ]
1775 |      },
1776 |      "execution_count": 32,
1777 |      "metadata": {},
1778 |      "output_type": "execute_result"
1779 |     }
1780 |    ],
1781 |    "source": [
1782 |     "base_df_rdd = base_df.rdd\n",
1783 |     "type(base_df_rdd)"
1784 |    ]
1785 |   },
1786 |   {
1787 |    "cell_type": "code",
1788 |    "execution_count": 33,
1789 |    "metadata": {},
1790 |    "outputs": [
1791 |     {
1792 |      "name": "stdout",
1793 |      "output_type": "stream",
1794 |      "text": [
1795 |       "+-----------------------------------------------------------------------------------------------------------------------+\n",
1796 |       "|value                                                                                                                  |\n",
1797 |       "+-----------------------------------------------------------------------------------------------------------------------+\n",
1798 |       "|199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] \"GET /history/apollo/ HTTP/1.0\" 200 6245                                 |\n",
1799 |       "|unicomp6.unicomp.net - - [01/Jul/1995:00:00:06 -0400] \"GET /shuttle/countdown/ HTTP/1.0\" 200 3985                      |\n",
1800 |       "|199.120.110.21 - - [01/Jul/1995:00:00:09 -0400] \"GET /shuttle/missions/sts-73/mission-sts-73.html HTTP/1.0\" 200 4085   |\n",
1801 |       "|burger.letters.com - - [01/Jul/1995:00:00:11 -0400] \"GET /shuttle/countdown/liftoff.html HTTP/1.0\" 304 0               |\n",
1802 |       "|199.120.110.21 - - [01/Jul/1995:00:00:11 -0400] \"GET /shuttle/missions/sts-73/sts-73-patch-small.gif HTTP/1.0\" 200 4179|\n",
1803 |       "|burger.letters.com - - [01/Jul/1995:00:00:12 -0400] \"GET /images/NASA-logosmall.gif HTTP/1.0\" 304 0                    |\n",
1804 |       "|burger.letters.com - - [01/Jul/1995:00:00:12 -0400] \"GET /shuttle/countdown/video/livevideo.gif HTTP/1.0\" 200 0        |\n",
1805 |       "|205.212.115.106 - - [01/Jul/1995:00:00:12 -0400] \"GET /shuttle/countdown/countdown.html HTTP/1.0\" 200 3985             |\n",
1806 |       "|d104.aa.net - - [01/Jul/1995:00:00:13 -0400] \"GET /shuttle/countdown/ HTTP/1.0\" 200 3985                               |\n",
1807 |       "|129.94.144.152 - - [01/Jul/1995:00:00:13 -0400] \"GET / HTTP/1.0\" 200 7074                                              |\n",
1808 |       "+-----------------------------------------------------------------------------------------------------------------------+\n",
1809 |       "only showing top 10 rows\n",
1810 |       "\n"
1811 |      ]
1812 |     }
1813 |    ],
1814 |    "source": [
1815 |     "#view data\n",
1816 |     "base_df.show(10, truncate=False)"
1817 |    ]
1818 |   },
1819 |   {
1820 |    "cell_type": "code",
1821 |    "execution_count": 34,
1822 |    "metadata": {},
1823 |    "outputs": [
1824 |     {
1825 |      "data": {
1826 |       "text/plain": [
1827 |        "[Row(value='199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] \"GET /history/apollo/ HTTP/1.0\" 200 6245'),\n",
1828 |        " Row(value='unicomp6.unicomp.net - - [01/Jul/1995:00:00:06 -0400] \"GET /shuttle/countdown/ HTTP/1.0\" 200 3985'),\n",
1829 |        " Row(value='199.120.110.21 - - [01/Jul/1995:00:00:09 -0400] \"GET /shuttle/missions/sts-73/mission-sts-73.html HTTP/1.0\" 200 4085'),\n",
1830 |        " Row(value='burger.letters.com - - [01/Jul/1995:00:00:11 -0400] \"GET /shuttle/countdown/liftoff.html HTTP/1.0\" 304 0'),\n",
1831 |        " Row(value='199.120.110.21 - - [01/Jul/1995:00:00:11 -0400] \"GET /shuttle/missions/sts-73/sts-73-patch-small.gif HTTP/1.0\" 200 4179'),\n",
1832 |        " Row(value='burger.letters.com - - [01/Jul/1995:00:00:12 -0400] \"GET /images/NASA-logosmall.gif HTTP/1.0\" 304 0'),\n",
1833 |        " Row(value='burger.letters.com - - [01/Jul/1995:00:00:12 -0400] \"GET /shuttle/countdown/video/livevideo.gif HTTP/1.0\" 200 0'),\n",
1834 |        " Row(value='205.212.115.106 - - [01/Jul/1995:00:00:12 -0400] \"GET /shuttle/countdown/countdown.html HTTP/1.0\" 200 3985'),\n",
1835 |        " Row(value='d104.aa.net - - [01/Jul/1995:00:00:13 -0400] \"GET /shuttle/countdown/ HTTP/1.0\" 200 3985'),\n",
1836 |        " Row(value='129.94.144.152 - - [01/Jul/1995:00:00:13 -0400] \"GET / HTTP/1.0\" 200 7074')]"
1837 |       ]
1838 |      },
1839 |      "execution_count": 34,
1840 |      "metadata": {},
1841 |      "output_type": "execute_result"
1842 |     }
1843 |    ],
1844 |    "source": [
1845 |     "base_df_rdd.take(10)"
1846 |    ]
1847 |   },
1848 |   {
1849 |    "cell_type": "markdown",
1850 |    "metadata": {},
1851 |    "source": [
1852 |     "Above log data [which is similar to web server logs](https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format) is clearly semi structured and needs some processing and wrangling to make it useful for modelling purpose\n",
1853 |     "\n",
1854 |     "Let's clean and parse our log dataset to extract structured attributes with meaningful information from each log message."
1855 |    ]
1856 |   },
1857 |   {
1858 |    "cell_type": "markdown",
1859 |    "metadata": {},
1860 |    "source": [
1861 |     "Data has to be parsed into individual columns. Special built-in [regexp\\_extract()](http://spark.apache.org/docs/latest/api/python/pyspark.sql.html#pyspark.sql.functions.regexp_extract)\n",
1862 |     "function can do the parsing. This function matches a column against a regular expression with one or more [capture groups](http://regexone.com/lesson/capturing_groups) and allows extraction of the matched groups. One regular expression is used for each field to extract"
1863 |    ]
1864 |   },
1865 |   {
1866 |    "cell_type": "code",
1867 |    "execution_count": 35,
1868 |    "metadata": {},
1869 |    "outputs": [
1870 |     {
1871 |      "name": "stdout",
1872 |      "output_type": "stream",
1873 |      "text": [
1874 |       "(3461613, 1)\n"
1875 |      ]
1876 |     }
1877 |    ],
1878 |    "source": [
1879 |     "# look at our dataset dimensions\n",
1880 |     "#print((base_df.count(), len(base_df.columns)))\n",
1881 |     "print(\"(3461613, 1)\")"
1882 |    ]
1883 |   },
1884 |   {
1885 |    "cell_type": "code",
1886 |    "execution_count": 36,
1887 |    "metadata": {},
1888 |    "outputs": [
1889 |     {
1890 |      "data": {
1891 |       "text/plain": [
1892 |        "['199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] \"GET /history/apollo/ HTTP/1.0\" 200 6245',\n",
1893 |        " 'unicomp6.unicomp.net - - [01/Jul/1995:00:00:06 -0400] \"GET /shuttle/countdown/ HTTP/1.0\" 200 3985',\n",
1894 |        " '199.120.110.21 - - [01/Jul/1995:00:00:09 -0400] \"GET /shuttle/missions/sts-73/mission-sts-73.html HTTP/1.0\" 200 4085',\n",
1895 |        " 'burger.letters.com - - [01/Jul/1995:00:00:11 -0400] \"GET /shuttle/countdown/liftoff.html HTTP/1.0\" 304 0',\n",
1896 |        " '199.120.110.21 - - [01/Jul/1995:00:00:11 -0400] \"GET /shuttle/missions/sts-73/sts-73-patch-small.gif HTTP/1.0\" 200 4179',\n",
1897 |        " 'burger.letters.com - - [01/Jul/1995:00:00:12 -0400] \"GET /images/NASA-logosmall.gif HTTP/1.0\" 304 0',\n",
1898 |        " 'burger.letters.com - - [01/Jul/1995:00:00:12 -0400] \"GET /shuttle/countdown/video/livevideo.gif HTTP/1.0\" 200 0',\n",
1899 |        " '205.212.115.106 - - [01/Jul/1995:00:00:12 -0400] \"GET /shuttle/countdown/countdown.html HTTP/1.0\" 200 3985',\n",
1900 |        " 'd104.aa.net - - [01/Jul/1995:00:00:13 -0400] \"GET /shuttle/countdown/ HTTP/1.0\" 200 3985',\n",
1901 |        " '129.94.144.152 - - [01/Jul/1995:00:00:13 -0400] \"GET / HTTP/1.0\" 200 7074',\n",
1902 |        " 'unicomp6.unicomp.net - - [01/Jul/1995:00:00:14 -0400] \"GET /shuttle/countdown/count.gif HTTP/1.0\" 200 40310',\n",
1903 |        " 'unicomp6.unicomp.net - - [01/Jul/1995:00:00:14 -0400] \"GET /images/NASA-logosmall.gif HTTP/1.0\" 200 786',\n",
1904 |        " 'unicomp6.unicomp.net - - [01/Jul/1995:00:00:14 -0400] \"GET /images/KSC-logosmall.gif HTTP/1.0\" 200 1204',\n",
1905 |        " 'd104.aa.net - - [01/Jul/1995:00:00:15 -0400] \"GET /shuttle/countdown/count.gif HTTP/1.0\" 200 40310',\n",
1906 |        " 'd104.aa.net - - [01/Jul/1995:00:00:15 -0400] \"GET /images/NASA-logosmall.gif HTTP/1.0\" 200 786']"
1907 |       ]
1908 |      },
1909 |      "execution_count": 36,
1910 |      "metadata": {},
1911 |      "output_type": "execute_result"
1912 |     }
1913 |    ],
1914 |    "source": [
1915 |     "sample_logs = [item['value'] for item in base_df.take(15)]\n",
1916 |     "sample_logs"
1917 |    ]
1918 |   },
1919 |   {
1920 |    "cell_type": "code",
1921 |    "execution_count": 37,
1922 |    "metadata": {},
1923 |    "outputs": [
1924 |     {
1925 |      "data": {
1926 |       "text/plain": [
1927 |        "['199.72.81.55',\n",
1928 |        " 'unicomp6.unicomp.net',\n",
1929 |        " '199.120.110.21',\n",
1930 |        " 'burger.letters.com',\n",
1931 |        " '199.120.110.21',\n",
1932 |        " 'burger.letters.com',\n",
1933 |        " 'burger.letters.com',\n",
1934 |        " '205.212.115.106',\n",
1935 |        " 'd104.aa.net',\n",
1936 |        " '129.94.144.152',\n",
1937 |        " 'unicomp6.unicomp.net',\n",
1938 |        " 'unicomp6.unicomp.net',\n",
1939 |        " 'unicomp6.unicomp.net',\n",
1940 |        " 'd104.aa.net',\n",
1941 |        " 'd104.aa.net']"
1942 |       ]
1943 |      },
1944 |      "execution_count": 37,
1945 |      "metadata": {},
1946 |      "output_type": "execute_result"
1947 |     }
1948 |    ],
1949 |    "source": [
1950 |     "# Extracting hostnames\n",
1951 |     "# Regular expressions to extract the hostname from the logs:\n",
1952 |     "host_pattern = r'(^\\S+\\.[\\S+\\.]+\\S+)\\s'\n",
1953 |     "hosts = [re.search(host_pattern, item).group(1)\n",
1954 |     "           if re.search(host_pattern, item)\n",
1955 |     "           else 'no match'\n",
1956 |     "           for item in sample_logs]\n",
1957 |     "hosts"
1958 |    ]
1959 |   },
1960 |   {
1961 |    "cell_type": "code",
1962 |    "execution_count": 38,
1963 |    "metadata": {},
1964 |    "outputs": [
1965 |     {
1966 |      "data": {
1967 |       "text/plain": [
1968 |        "['01/Jul/1995:00:00:01 -0400',\n",
1969 |        " '01/Jul/1995:00:00:06 -0400',\n",
1970 |        " '01/Jul/1995:00:00:09 -0400',\n",
1971 |        " '01/Jul/1995:00:00:11 -0400',\n",
1972 |        " '01/Jul/1995:00:00:11 -0400',\n",
1973 |        " '01/Jul/1995:00:00:12 -0400',\n",
1974 |        " '01/Jul/1995:00:00:12 -0400',\n",
1975 |        " '01/Jul/1995:00:00:12 -0400',\n",
1976 |        " '01/Jul/1995:00:00:13 -0400',\n",
1977 |        " '01/Jul/1995:00:00:13 -0400',\n",
1978 |        " '01/Jul/1995:00:00:14 -0400',\n",
1979 |        " '01/Jul/1995:00:00:14 -0400',\n",
1980 |        " '01/Jul/1995:00:00:14 -0400',\n",
1981 |        " '01/Jul/1995:00:00:15 -0400',\n",
1982 |        " '01/Jul/1995:00:00:15 -0400']"
1983 |       ]
1984 |      },
1985 |      "execution_count": 38,
1986 |      "metadata": {},
1987 |      "output_type": "execute_result"
1988 |     }
1989 |    ],
1990 |    "source": [
1991 |     "# Extracting timestamps\n",
1992 |     "ts_pattern = r'\\[(\\d{2}/\\w{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} -\\d{4})]'\n",
1993 |     "timestamps = [re.search(ts_pattern, item).group(1) for item in sample_logs]\n",
1994 |     "timestamps"
1995 |    ]
1996 |   },
1997 |   {
1998 |    "cell_type": "code",
1999 |    "execution_count": 39,
2000 |    "metadata": {},
2001 |    "outputs": [
2002 |     {
2003 |      "data": {
2004 |       "text/plain": [
2005 |        "[('GET', '/history/apollo/', 'HTTP/1.0'),\n",
2006 |        " ('GET', '/shuttle/countdown/', 'HTTP/1.0'),\n",
2007 |        " ('GET', '/shuttle/missions/sts-73/mission-sts-73.html', 'HTTP/1.0'),\n",
2008 |        " ('GET', '/shuttle/countdown/liftoff.html', 'HTTP/1.0'),\n",
2009 |        " ('GET', '/shuttle/missions/sts-73/sts-73-patch-small.gif', 'HTTP/1.0'),\n",
2010 |        " ('GET', '/images/NASA-logosmall.gif', 'HTTP/1.0'),\n",
2011 |        " ('GET', '/shuttle/countdown/video/livevideo.gif', 'HTTP/1.0'),\n",
2012 |        " ('GET', '/shuttle/countdown/countdown.html', 'HTTP/1.0'),\n",
2013 |        " ('GET', '/shuttle/countdown/', 'HTTP/1.0'),\n",
2014 |        " ('GET', '/', 'HTTP/1.0'),\n",
2015 |        " ('GET', '/shuttle/countdown/count.gif', 'HTTP/1.0'),\n",
2016 |        " ('GET', '/images/NASA-logosmall.gif', 'HTTP/1.0'),\n",
2017 |        " ('GET', '/images/KSC-logosmall.gif', 'HTTP/1.0'),\n",
2018 |        " ('GET', '/shuttle/countdown/count.gif', 'HTTP/1.0'),\n",
2019 |        " ('GET', '/images/NASA-logosmall.gif', 'HTTP/1.0')]"
2020 |       ]
2021 |      },
2022 |      "execution_count": 39,
2023 |      "metadata": {},
2024 |      "output_type": "execute_result"
2025 |     }
2026 |    ],
2027 |    "source": [
2028 |     "# Extracting HTTP request method, URIs, and protocol\n",
2029 |     "\n",
2030 |     "method_uri_protocol_pattern = r'\\\"(\\S+)\\s(\\S+)\\s*(\\S*)\\\"'\n",
2031 |     "method_uri_protocol = [re.search(method_uri_protocol_pattern, item).groups()\n",
2032 |     "               if re.search(method_uri_protocol_pattern, item)\n",
2033 |     "               else 'no match'\n",
2034 |     "              for item in sample_logs]\n",
2035 |     "method_uri_protocol"
2036 |    ]
2037 |   },
2038 |   {
2039 |    "cell_type": "code",
2040 |    "execution_count": 40,
2041 |    "metadata": {},
2042 |    "outputs": [
2043 |     {
2044 |      "name": "stdout",
2045 |      "output_type": "stream",
2046 |      "text": [
2047 |       "['200', '200', '200', '304', '200', '304', '200', '200', '200', '200', '200', '200', '200', '200', '200']\n"
2048 |      ]
2049 |     }
2050 |    ],
2051 |    "source": [
2052 |     "# Extracting HTTP status codes\n",
2053 |     "\n",
2054 |     "status_pattern = r'\\s(\\d{3})\\s'\n",
2055 |     "status = [re.search(status_pattern, item).group(1) for item in sample_logs]\n",
2056 |     "print(status)"
2057 |    ]
2058 |   },
2059 |   {
2060 |    "cell_type": "code",
2061 |    "execution_count": 41,
2062 |    "metadata": {},
2063 |    "outputs": [
2064 |     {
2065 |      "name": "stdout",
2066 |      "output_type": "stream",
2067 |      "text": [
2068 |       "['6245', '3985', '4085', '0', '4179', '0', '0', '3985', '3985', '7074', '40310', '786', '1204', '40310', '786']\n"
2069 |      ]
2070 |     }
2071 |    ],
2072 |    "source": [
2073 |     "# Extracting HTTP response content size\n",
2074 |     "\n",
2075 |     "content_size_pattern = r'\\s(\\d+)$'\n",
2076 |     "content_size = [re.search(content_size_pattern, item).group(1) for item in sample_logs]\n",
2077 |     "print(content_size)"
2078 |    ]
2079 |   },
2080 |   {
2081 |    "cell_type": "code",
2082 |    "execution_count": 42,
2083 |    "metadata": {
2084 |     "scrolled": true
2085 |    },
2086 |    "outputs": [
2087 |     {
2088 |      "name": "stdout",
2089 |      "output_type": "stream",
2090 |      "text": [
2091 |       "+--------------------+--------------------+------+--------------------+--------+------+------------+\n",
2092 |       "|                host|           timestamp|method|            endpoint|protocol|status|content_size|\n",
2093 |       "+--------------------+--------------------+------+--------------------+--------+------+------------+\n",
2094 |       "|        199.72.81.55|01/Jul/1995:00:00...|   GET|    /history/apollo/|HTTP/1.0|   200|        6245|\n",
2095 |       "|unicomp6.unicomp.net|01/Jul/1995:00:00...|   GET| /shuttle/countdown/|HTTP/1.0|   200|        3985|\n",
2096 |       "|      199.120.110.21|01/Jul/1995:00:00...|   GET|/shuttle/missions...|HTTP/1.0|   200|        4085|\n",
2097 |       "|  burger.letters.com|01/Jul/1995:00:00...|   GET|/shuttle/countdow...|HTTP/1.0|   304|           0|\n",
2098 |       "|      199.120.110.21|01/Jul/1995:00:00...|   GET|/shuttle/missions...|HTTP/1.0|   200|        4179|\n",
2099 |       "|  burger.letters.com|01/Jul/1995:00:00...|   GET|/images/NASA-logo...|HTTP/1.0|   304|           0|\n",
2100 |       "|  burger.letters.com|01/Jul/1995:00:00...|   GET|/shuttle/countdow...|HTTP/1.0|   200|           0|\n",
2101 |       "|     205.212.115.106|01/Jul/1995:00:00...|   GET|/shuttle/countdow...|HTTP/1.0|   200|        3985|\n",
2102 |       "|         d104.aa.net|01/Jul/1995:00:00...|   GET| /shuttle/countdown/|HTTP/1.0|   200|        3985|\n",
2103 |       "|      129.94.144.152|01/Jul/1995:00:00...|   GET|                   /|HTTP/1.0|   200|        7074|\n",
2104 |       "+--------------------+--------------------+------+--------------------+--------+------+------------+\n",
2105 |       "only showing top 10 rows\n",
2106 |       "\n"
2107 |      ]
2108 |     }
2109 |    ],
2110 |    "source": [
2111 |     "# Putting it all together\n",
2112 |     "\n",
2113 |     "# regexp_extract(...) method to build our DataFrame with all of the log attributes neatly extracted in their own separate columns.\n",
2114 |     "\n",
2115 |     "from pyspark.sql.functions import regexp_extract\n",
2116 |     "\n",
2117 |     "logs_df = base_df.select(regexp_extract('value', host_pattern, 1).alias('host'),\n",
2118 |     "                         regexp_extract('value', ts_pattern, 1).alias('timestamp'),\n",
2119 |     "                         regexp_extract('value', method_uri_protocol_pattern, 1).alias('method'),\n",
2120 |     "                         regexp_extract('value', method_uri_protocol_pattern, 2).alias('endpoint'),\n",
2121 |     "                         regexp_extract('value', method_uri_protocol_pattern, 3).alias('protocol'),\n",
2122 |     "                         regexp_extract('value', status_pattern, 1).cast('integer').alias('status'),\n",
2123 |     "                         regexp_extract('value', content_size_pattern, 1).cast('integer').alias('content_size'))\n",
2124 |     "logs_df.show(10, truncate=True)\n"
2125 |    ]
2126 |   },
2127 |   {
2128 |    "cell_type": "markdown",
2129 |    "metadata": {},
2130 |    "source": [
2131 |     "## Reference:-\n",
2132 |     "\n",
2133 |     " - To know more and wrangle data via Pyspark refer this [link](https://opensource.com/article/19/5/log-data-apache-spark)"
2134 |    ]
2135 |   },
2136 |   {
2137 |    "cell_type": "code",
2138 |    "execution_count": null,
2139 |    "metadata": {},
2140 |    "outputs": [],
2141 |    "source": []
2142 |   }
2143 |  ],
2144 |  "metadata": {
2145 |   "kernelspec": {
2146 |    "display_name": "Python 3",
2147 |    "language": "python",
2148 |    "name": "python3"
2149 |   },
2150 |   "language_info": {
2151 |    "codemirror_mode": {
2152 |     "name": "ipython",
2153 |     "version": 3
2154 |    },
2155 |    "file_extension": ".py",
2156 |    "mimetype": "text/x-python",
2157 |    "name": "python",
2158 |    "nbconvert_exporter": "python",
2159 |    "pygments_lexer": "ipython3",
2160 |    "version": "3.7.4"
2161 |   }
2162 |  },
2163 |  "nbformat": 4,
2164 |  "nbformat_minor": 2
2165 | }
2166 | 


--------------------------------------------------------------------------------
/List Comprehensions_Python.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/List Comprehensions_Python.pdf


--------------------------------------------------------------------------------
/Pandas Crosstab vs Pivot table.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "af387817",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# import libraries\n",
 11 |     "import pandas as pd\n",
 12 |     "import numpy as np"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "id": "f8a9e062",
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "#Import dataset\n",
 23 |     "df = pd.read_csv('sample data.csv')"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "id": "ef83769a",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "# Crosstab\n",
 32 |     "\n",
 33 |     "`pd.crosstab()` -  It is used to get an initial view of the data. It is a tabular structure showing relationship between various variables\n",
 34 |     "\n",
 35 |     "Here you can quickly see the percentage of loan defaulters w.r.t loan grade"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 3,
 41 |    "id": "1b57fd9b",
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "data": {
 46 |       "text/html": [
 47 |        "<div>\n",
 48 |        "<style scoped>\n",
 49 |        "    .dataframe tbody tr th:only-of-type {\n",
 50 |        "        vertical-align: middle;\n",
 51 |        "    }\n",
 52 |        "\n",
 53 |        "    .dataframe tbody tr th {\n",
 54 |        "        vertical-align: top;\n",
 55 |        "    }\n",
 56 |        "\n",
 57 |        "    .dataframe thead th {\n",
 58 |        "        text-align: right;\n",
 59 |        "    }\n",
 60 |        "</style>\n",
 61 |        "<table border=\"1\" class=\"dataframe\">\n",
 62 |        "  <thead>\n",
 63 |        "    <tr style=\"text-align: right;\">\n",
 64 |        "      <th>Loan Status</th>\n",
 65 |        "      <th>0</th>\n",
 66 |        "      <th>1</th>\n",
 67 |        "    </tr>\n",
 68 |        "    <tr>\n",
 69 |        "      <th>Grade</th>\n",
 70 |        "      <th></th>\n",
 71 |        "      <th></th>\n",
 72 |        "    </tr>\n",
 73 |        "  </thead>\n",
 74 |        "  <tbody>\n",
 75 |        "    <tr>\n",
 76 |        "      <th>A</th>\n",
 77 |        "      <td>0.903497</td>\n",
 78 |        "      <td>0.096503</td>\n",
 79 |        "    </tr>\n",
 80 |        "    <tr>\n",
 81 |        "      <th>B</th>\n",
 82 |        "      <td>0.904952</td>\n",
 83 |        "      <td>0.095048</td>\n",
 84 |        "    </tr>\n",
 85 |        "    <tr>\n",
 86 |        "      <th>C</th>\n",
 87 |        "      <td>0.905355</td>\n",
 88 |        "      <td>0.094645</td>\n",
 89 |        "    </tr>\n",
 90 |        "    <tr>\n",
 91 |        "      <th>D</th>\n",
 92 |        "      <td>0.907136</td>\n",
 93 |        "      <td>0.092864</td>\n",
 94 |        "    </tr>\n",
 95 |        "    <tr>\n",
 96 |        "      <th>E</th>\n",
 97 |        "      <td>0.902224</td>\n",
 98 |        "      <td>0.097776</td>\n",
 99 |        "    </tr>\n",
100 |        "    <tr>\n",
101 |        "      <th>F</th>\n",
102 |        "      <td>0.914966</td>\n",
103 |        "      <td>0.085034</td>\n",
104 |        "    </tr>\n",
105 |        "    <tr>\n",
106 |        "      <th>G</th>\n",
107 |        "      <td>0.884921</td>\n",
108 |        "      <td>0.115079</td>\n",
109 |        "    </tr>\n",
110 |        "    <tr>\n",
111 |        "      <th>All</th>\n",
112 |        "      <td>0.904991</td>\n",
113 |        "      <td>0.095009</td>\n",
114 |        "    </tr>\n",
115 |        "  </tbody>\n",
116 |        "</table>\n",
117 |        "</div>"
118 |       ],
119 |       "text/plain": [
120 |        "Loan Status         0         1\n",
121 |        "Grade                          \n",
122 |        "A            0.903497  0.096503\n",
123 |        "B            0.904952  0.095048\n",
124 |        "C            0.905355  0.094645\n",
125 |        "D            0.907136  0.092864\n",
126 |        "E            0.902224  0.097776\n",
127 |        "F            0.914966  0.085034\n",
128 |        "G            0.884921  0.115079\n",
129 |        "All          0.904991  0.095009"
130 |       ]
131 |      },
132 |      "execution_count": 3,
133 |      "metadata": {},
134 |      "output_type": "execute_result"
135 |     }
136 |    ],
137 |    "source": [
138 |     "pd.crosstab(index=df['Grade'], columns=df['Loan Status'], margins=True, normalize = 'index')"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "id": "eb6dd7da",
144 |    "metadata": {},
145 |    "source": [
146 |     "# Pivot Table\n",
147 |     "\n",
148 |     "Similar to excel spreadsheet pivot functionality. Data can be visualized at various hierarchy of rows and columns. It lets you calculate, summarize and aggregate your data\n",
149 |     "\n",
150 |     "Here you can see the aggregation of loan amount (mean) based on loan status and grade"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 32,
156 |    "id": "eb14e4ce",
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "data": {
161 |       "text/html": [
162 |        "<div>\n",
163 |        "<style scoped>\n",
164 |        "    .dataframe tbody tr th:only-of-type {\n",
165 |        "        vertical-align: middle;\n",
166 |        "    }\n",
167 |        "\n",
168 |        "    .dataframe tbody tr th {\n",
169 |        "        vertical-align: top;\n",
170 |        "    }\n",
171 |        "\n",
172 |        "    .dataframe thead th {\n",
173 |        "        text-align: right;\n",
174 |        "    }\n",
175 |        "</style>\n",
176 |        "<table border=\"1\" class=\"dataframe\">\n",
177 |        "  <thead>\n",
178 |        "    <tr style=\"text-align: right;\">\n",
179 |        "      <th></th>\n",
180 |        "      <th></th>\n",
181 |        "      <th>Loan Amount</th>\n",
182 |        "    </tr>\n",
183 |        "    <tr>\n",
184 |        "      <th>Loan Status</th>\n",
185 |        "      <th>Grade</th>\n",
186 |        "      <th></th>\n",
187 |        "    </tr>\n",
188 |        "  </thead>\n",
189 |        "  <tbody>\n",
190 |        "    <tr>\n",
191 |        "      <th rowspan=\"7\" valign=\"top\">0</th>\n",
192 |        "      <th>A</th>\n",
193 |        "      <td>16252.193081</td>\n",
194 |        "    </tr>\n",
195 |        "    <tr>\n",
196 |        "      <th>B</th>\n",
197 |        "      <td>16615.844804</td>\n",
198 |        "    </tr>\n",
199 |        "    <tr>\n",
200 |        "      <th>C</th>\n",
201 |        "      <td>16059.379174</td>\n",
202 |        "    </tr>\n",
203 |        "    <tr>\n",
204 |        "      <th>D</th>\n",
205 |        "      <td>16594.997490</td>\n",
206 |        "    </tr>\n",
207 |        "    <tr>\n",
208 |        "      <th>E</th>\n",
209 |        "      <td>16908.963026</td>\n",
210 |        "    </tr>\n",
211 |        "    <tr>\n",
212 |        "      <th>F</th>\n",
213 |        "      <td>16920.323420</td>\n",
214 |        "    </tr>\n",
215 |        "    <tr>\n",
216 |        "      <th>G</th>\n",
217 |        "      <td>16745.901345</td>\n",
218 |        "    </tr>\n",
219 |        "    <tr>\n",
220 |        "      <th rowspan=\"7\" valign=\"top\">1</th>\n",
221 |        "      <th>A</th>\n",
222 |        "      <td>15789.390688</td>\n",
223 |        "    </tr>\n",
224 |        "    <tr>\n",
225 |        "      <th>B</th>\n",
226 |        "      <td>16442.690789</td>\n",
227 |        "    </tr>\n",
228 |        "    <tr>\n",
229 |        "      <th>C</th>\n",
230 |        "      <td>15737.199478</td>\n",
231 |        "    </tr>\n",
232 |        "    <tr>\n",
233 |        "      <th>D</th>\n",
234 |        "      <td>16458.419619</td>\n",
235 |        "    </tr>\n",
236 |        "    <tr>\n",
237 |        "      <th>E</th>\n",
238 |        "      <td>16335.898039</td>\n",
239 |        "    </tr>\n",
240 |        "    <tr>\n",
241 |        "      <th>F</th>\n",
242 |        "      <td>17092.786667</td>\n",
243 |        "    </tr>\n",
244 |        "    <tr>\n",
245 |        "      <th>G</th>\n",
246 |        "      <td>18612.172414</td>\n",
247 |        "    </tr>\n",
248 |        "  </tbody>\n",
249 |        "</table>\n",
250 |        "</div>"
251 |       ],
252 |       "text/plain": [
253 |        "                    Loan Amount\n",
254 |        "Loan Status Grade              \n",
255 |        "0           A      16252.193081\n",
256 |        "            B      16615.844804\n",
257 |        "            C      16059.379174\n",
258 |        "            D      16594.997490\n",
259 |        "            E      16908.963026\n",
260 |        "            F      16920.323420\n",
261 |        "            G      16745.901345\n",
262 |        "1           A      15789.390688\n",
263 |        "            B      16442.690789\n",
264 |        "            C      15737.199478\n",
265 |        "            D      16458.419619\n",
266 |        "            E      16335.898039\n",
267 |        "            F      17092.786667\n",
268 |        "            G      18612.172414"
269 |       ]
270 |      },
271 |      "execution_count": 32,
272 |      "metadata": {},
273 |      "output_type": "execute_result"
274 |     }
275 |    ],
276 |    "source": [
277 |     "pd.pivot_table(df, values = ['Loan Amount'], index = ['Loan Status', 'Grade'], aggfunc = np.mean)"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 33,
283 |    "id": "af45ee5e",
284 |    "metadata": {},
285 |    "outputs": [
286 |     {
287 |      "data": {
288 |       "text/html": [
289 |        "<div>\n",
290 |        "<style scoped>\n",
291 |        "    .dataframe tbody tr th:only-of-type {\n",
292 |        "        vertical-align: middle;\n",
293 |        "    }\n",
294 |        "\n",
295 |        "    .dataframe tbody tr th {\n",
296 |        "        vertical-align: top;\n",
297 |        "    }\n",
298 |        "\n",
299 |        "    .dataframe thead tr th {\n",
300 |        "        text-align: left;\n",
301 |        "    }\n",
302 |        "\n",
303 |        "    .dataframe thead tr:last-of-type th {\n",
304 |        "        text-align: right;\n",
305 |        "    }\n",
306 |        "</style>\n",
307 |        "<table border=\"1\" class=\"dataframe\">\n",
308 |        "  <thead>\n",
309 |        "    <tr>\n",
310 |        "      <th></th>\n",
311 |        "      <th colspan=\"6\" halign=\"left\">Loan Amount</th>\n",
312 |        "    </tr>\n",
313 |        "    <tr>\n",
314 |        "      <th>Employment Duration</th>\n",
315 |        "      <th colspan=\"2\" halign=\"left\">MORTGAGE</th>\n",
316 |        "      <th colspan=\"2\" halign=\"left\">OWN</th>\n",
317 |        "      <th colspan=\"2\" halign=\"left\">RENT</th>\n",
318 |        "    </tr>\n",
319 |        "    <tr>\n",
320 |        "      <th>Loan Status</th>\n",
321 |        "      <th>0</th>\n",
322 |        "      <th>1</th>\n",
323 |        "      <th>0</th>\n",
324 |        "      <th>1</th>\n",
325 |        "      <th>0</th>\n",
326 |        "      <th>1</th>\n",
327 |        "    </tr>\n",
328 |        "    <tr>\n",
329 |        "      <th>Grade</th>\n",
330 |        "      <th></th>\n",
331 |        "      <th></th>\n",
332 |        "      <th></th>\n",
333 |        "      <th></th>\n",
334 |        "      <th></th>\n",
335 |        "      <th></th>\n",
336 |        "    </tr>\n",
337 |        "  </thead>\n",
338 |        "  <tbody>\n",
339 |        "    <tr>\n",
340 |        "      <th>A</th>\n",
341 |        "      <td>16448.631916</td>\n",
342 |        "      <td>16081.432143</td>\n",
343 |        "      <td>15922.343693</td>\n",
344 |        "      <td>15881.758065</td>\n",
345 |        "      <td>16011.862069</td>\n",
346 |        "      <td>15213.743421</td>\n",
347 |        "    </tr>\n",
348 |        "    <tr>\n",
349 |        "      <th>B</th>\n",
350 |        "      <td>17086.940260</td>\n",
351 |        "      <td>17003.886574</td>\n",
352 |        "      <td>16320.042751</td>\n",
353 |        "      <td>15233.765957</td>\n",
354 |        "      <td>15870.433618</td>\n",
355 |        "      <td>15782.131673</td>\n",
356 |        "    </tr>\n",
357 |        "    <tr>\n",
358 |        "      <th>C</th>\n",
359 |        "      <td>16683.954201</td>\n",
360 |        "      <td>16461.773585</td>\n",
361 |        "      <td>15899.363636</td>\n",
362 |        "      <td>14337.859155</td>\n",
363 |        "      <td>15327.377866</td>\n",
364 |        "      <td>15215.772308</td>\n",
365 |        "    </tr>\n",
366 |        "    <tr>\n",
367 |        "      <th>D</th>\n",
368 |        "      <td>17193.639382</td>\n",
369 |        "      <td>17084.956044</td>\n",
370 |        "      <td>16732.007812</td>\n",
371 |        "      <td>15137.250000</td>\n",
372 |        "      <td>15753.386124</td>\n",
373 |        "      <td>16148.000000</td>\n",
374 |        "    </tr>\n",
375 |        "    <tr>\n",
376 |        "      <th>E</th>\n",
377 |        "      <td>17509.311889</td>\n",
378 |        "      <td>17437.475000</td>\n",
379 |        "      <td>16268.364929</td>\n",
380 |        "      <td>16632.739130</td>\n",
381 |        "      <td>16250.251641</td>\n",
382 |        "      <td>15094.678571</td>\n",
383 |        "    </tr>\n",
384 |        "    <tr>\n",
385 |        "      <th>F</th>\n",
386 |        "      <td>17342.458140</td>\n",
387 |        "      <td>18054.820513</td>\n",
388 |        "      <td>16262.329787</td>\n",
389 |        "      <td>12378.888889</td>\n",
390 |        "      <td>16497.473498</td>\n",
391 |        "      <td>17274.481481</td>\n",
392 |        "    </tr>\n",
393 |        "    <tr>\n",
394 |        "      <th>G</th>\n",
395 |        "      <td>16058.171429</td>\n",
396 |        "      <td>18264.285714</td>\n",
397 |        "      <td>16221.810811</td>\n",
398 |        "      <td>16140.600000</td>\n",
399 |        "      <td>17876.802469</td>\n",
400 |        "      <td>20335.000000</td>\n",
401 |        "    </tr>\n",
402 |        "  </tbody>\n",
403 |        "</table>\n",
404 |        "</div>"
405 |       ],
406 |       "text/plain": [
407 |        "                      Loan Amount                                            \\\n",
408 |        "Employment Duration      MORTGAGE                         OWN                 \n",
409 |        "Loan Status                     0             1             0             1   \n",
410 |        "Grade                                                                         \n",
411 |        "A                    16448.631916  16081.432143  15922.343693  15881.758065   \n",
412 |        "B                    17086.940260  17003.886574  16320.042751  15233.765957   \n",
413 |        "C                    16683.954201  16461.773585  15899.363636  14337.859155   \n",
414 |        "D                    17193.639382  17084.956044  16732.007812  15137.250000   \n",
415 |        "E                    17509.311889  17437.475000  16268.364929  16632.739130   \n",
416 |        "F                    17342.458140  18054.820513  16262.329787  12378.888889   \n",
417 |        "G                    16058.171429  18264.285714  16221.810811  16140.600000   \n",
418 |        "\n",
419 |        "                                                 \n",
420 |        "Employment Duration          RENT                \n",
421 |        "Loan Status                     0             1  \n",
422 |        "Grade                                            \n",
423 |        "A                    16011.862069  15213.743421  \n",
424 |        "B                    15870.433618  15782.131673  \n",
425 |        "C                    15327.377866  15215.772308  \n",
426 |        "D                    15753.386124  16148.000000  \n",
427 |        "E                    16250.251641  15094.678571  \n",
428 |        "F                    16497.473498  17274.481481  \n",
429 |        "G                    17876.802469  20335.000000  "
430 |       ]
431 |      },
432 |      "execution_count": 33,
433 |      "metadata": {},
434 |      "output_type": "execute_result"
435 |     }
436 |    ],
437 |    "source": [
438 |     "pd.pivot_table(df, values = ['Loan Amount'], index = ['Grade'], columns = ['Employment Duration','Loan Status'], aggfunc = np.mean)"
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "markdown",
443 |    "id": "e65c286d",
444 |    "metadata": {},
445 |    "source": [
446 |     "- `Crosstab` works with series or list of variables whereas `Pivot table` works only with dataframe\n",
447 |     "- `Pivot table` does not have the normalize argument. In `crosstab`, the normalize argument calculates percentages by dividing each cell by the sum of cells, as described below:\n",
448 |     "\n",
449 |     "   - normalize = `'index'` divides each cell by the sum of its row\n",
450 |     "   - normalize = `'columns'` divides each cell by the sum of its column\n",
451 |     "   - normalize = `'True'` divides each cell by the total of all cells in the table"
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "code",
456 |    "execution_count": null,
457 |    "id": "eac6d495",
458 |    "metadata": {},
459 |    "outputs": [],
460 |    "source": []
461 |   }
462 |  ],
463 |  "metadata": {
464 |   "kernelspec": {
465 |    "display_name": "Python 3 (ipykernel)",
466 |    "language": "python",
467 |    "name": "python3"
468 |   },
469 |   "language_info": {
470 |    "codemirror_mode": {
471 |     "name": "ipython",
472 |     "version": 3
473 |    },
474 |    "file_extension": ".py",
475 |    "mimetype": "text/x-python",
476 |    "name": "python",
477 |    "nbconvert_exporter": "python",
478 |    "pygments_lexer": "ipython3",
479 |    "version": "3.9.7"
480 |   }
481 |  },
482 |  "nbformat": 4,
483 |  "nbformat_minor": 5
484 | }
485 | 


--------------------------------------------------------------------------------
/Pandas_Cheat_Sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/Pandas_Cheat_Sheet.pdf


--------------------------------------------------------------------------------
/Pandas_Data Wrangling_CheatSheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/Pandas_Data Wrangling_CheatSheet.pdf


--------------------------------------------------------------------------------
/Polynomial Regression.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Polynomial Regression in Python\n",
  8 |     "- Regression analysis is a form of predictive modelling technique which investigates the relationship between a dependent and independent variable\n",
  9 |     "- Comparison with Linear Regression\n",
 10 |     "- [Dataset](https://media.geeksforgeeks.org/wp-content/uploads/data.csv)"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "### Import libraries and dataset\n",
 18 |     "- Import the important libraries and the dataset we are using to perform Polynomial Regression"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "# Importing the libraries \n",
 28 |     "import numpy as np \n",
 29 |     "import matplotlib.pyplot as plt \n",
 30 |     "import pandas as pd"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "metadata": {},
 37 |    "outputs": [
 38 |     {
 39 |      "data": {
 40 |       "text/html": [
 41 |        "<div>\n",
 42 |        "<style scoped>\n",
 43 |        "    .dataframe tbody tr th:only-of-type {\n",
 44 |        "        vertical-align: middle;\n",
 45 |        "    }\n",
 46 |        "\n",
 47 |        "    .dataframe tbody tr th {\n",
 48 |        "        vertical-align: top;\n",
 49 |        "    }\n",
 50 |        "\n",
 51 |        "    .dataframe thead th {\n",
 52 |        "        text-align: right;\n",
 53 |        "    }\n",
 54 |        "</style>\n",
 55 |        "<table border=\"1\" class=\"dataframe\">\n",
 56 |        "  <thead>\n",
 57 |        "    <tr style=\"text-align: right;\">\n",
 58 |        "      <th></th>\n",
 59 |        "      <th>sno</th>\n",
 60 |        "      <th>Temperature</th>\n",
 61 |        "      <th>Pressure</th>\n",
 62 |        "    </tr>\n",
 63 |        "  </thead>\n",
 64 |        "  <tbody>\n",
 65 |        "    <tr>\n",
 66 |        "      <th>0</th>\n",
 67 |        "      <td>1</td>\n",
 68 |        "      <td>0</td>\n",
 69 |        "      <td>0.0002</td>\n",
 70 |        "    </tr>\n",
 71 |        "    <tr>\n",
 72 |        "      <th>1</th>\n",
 73 |        "      <td>2</td>\n",
 74 |        "      <td>20</td>\n",
 75 |        "      <td>0.0012</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>2</th>\n",
 79 |        "      <td>3</td>\n",
 80 |        "      <td>40</td>\n",
 81 |        "      <td>0.0060</td>\n",
 82 |        "    </tr>\n",
 83 |        "    <tr>\n",
 84 |        "      <th>3</th>\n",
 85 |        "      <td>4</td>\n",
 86 |        "      <td>60</td>\n",
 87 |        "      <td>0.0300</td>\n",
 88 |        "    </tr>\n",
 89 |        "    <tr>\n",
 90 |        "      <th>4</th>\n",
 91 |        "      <td>5</td>\n",
 92 |        "      <td>80</td>\n",
 93 |        "      <td>0.0900</td>\n",
 94 |        "    </tr>\n",
 95 |        "    <tr>\n",
 96 |        "      <th>5</th>\n",
 97 |        "      <td>6</td>\n",
 98 |        "      <td>100</td>\n",
 99 |        "      <td>0.2700</td>\n",
100 |        "    </tr>\n",
101 |        "  </tbody>\n",
102 |        "</table>\n",
103 |        "</div>"
104 |       ],
105 |       "text/plain": [
106 |        "   sno  Temperature  Pressure\n",
107 |        "0    1            0    0.0002\n",
108 |        "1    2           20    0.0012\n",
109 |        "2    3           40    0.0060\n",
110 |        "3    4           60    0.0300\n",
111 |        "4    5           80    0.0900\n",
112 |        "5    6          100    0.2700"
113 |       ]
114 |      },
115 |      "execution_count": 3,
116 |      "metadata": {},
117 |      "output_type": "execute_result"
118 |     }
119 |    ],
120 |    "source": [
121 |     "# Importing the dataset \n",
122 |     "datas = pd.read_csv('C:/Users/arock.000/Downloads/data.csv') \n",
123 |     "datas"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "### Dividing the dataset into 2 components\n",
131 |     "- Divide dataset into two components that is X and y.X will contain the Column between 1 and 2. y will contain the 2 column."
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 5,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "X = datas.iloc[:, 1:2].values \n",
141 |     "y = datas.iloc[:, 2].values"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {},
147 |    "source": [
148 |     "### Fitting Linear Regression to the dataset\n",
149 |     "- Fitting the linear Regression model On two components."
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 6,
155 |    "metadata": {},
156 |    "outputs": [
157 |     {
158 |      "data": {
159 |       "text/plain": [
160 |        "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
161 |       ]
162 |      },
163 |      "execution_count": 6,
164 |      "metadata": {},
165 |      "output_type": "execute_result"
166 |     }
167 |    ],
168 |    "source": [
169 |     "# Fitting Linear Regression to the dataset \n",
170 |     "from sklearn.linear_model import LinearRegression \n",
171 |     "lin = LinearRegression() \n",
172 |     "  \n",
173 |     "lin.fit(X, y)"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "metadata": {},
179 |    "source": [
180 |     "### Fitting Polynomial Regression to the dataset\n",
181 |     "- Fitting the Polynomial Regression model on two components X and y."
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": 7,
187 |    "metadata": {},
188 |    "outputs": [
189 |     {
190 |      "data": {
191 |       "text/plain": [
192 |        "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
193 |       ]
194 |      },
195 |      "execution_count": 7,
196 |      "metadata": {},
197 |      "output_type": "execute_result"
198 |     }
199 |    ],
200 |    "source": [
201 |     "# Fitting Polynomial Regression to the dataset \n",
202 |     "from sklearn.preprocessing import PolynomialFeatures \n",
203 |     "  \n",
204 |     "poly = PolynomialFeatures(degree = 4) \n",
205 |     "X_poly = poly.fit_transform(X) \n",
206 |     "  \n",
207 |     "poly.fit(X_poly, y) \n",
208 |     "lin2 = LinearRegression() \n",
209 |     "lin2.fit(X_poly, y)"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "markdown",
214 |    "metadata": {},
215 |    "source": [
216 |     "### Visualization:\n",
217 |     "- Visualizing the Linear Regression results using scatter plot\n",
218 |     "- Visualising the Polynomial Regression results using scatter plot"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": 8,
224 |    "metadata": {},
225 |    "outputs": [
226 |     {
227 |      "data": {
228 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAAEWCAYAAABIVsEJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deZyVdfn/8dcbEBXDpcQlVi3U0NRyIs3MJTU0Sy3Nhcwt0cpc0hS1+tmCueGO4qgoFea+kBuSlfJ1ZXBhUUlEEAQFFUUlWa/fH5978oQzcubMnLlnznk/H4/zmHOv5/qMcq65P6siAjMzs6bqkHcAZmbWPjmBmJlZSZxAzMysJE4gZmZWEicQMzMriROImZmVxAnEKoqknSRNzTuOSiBpiqRd8o7D2i4nEGuXJM2QtPvK+yNiXERsnkdMK5N0tqSlkt6X9I6kxyTtkHdcxYqILSPiX3nHYW2XE4hZC5DUqZFDN0fEp4D1gX8Ct5bhsyXJ/5at1fl/OqsoknaRNLtge4akUyVNlPSupJslrVFwfB9JzxY8IWxdcGywpJclvSfpeUn7Fxw7QtKjki6W9DZw9ifFFRHLgFFAd0ndsnusI+k6SXMlvSbpD5I6Zsc6Shoq6U1Jr0g6XlLUJypJ/5I0RNKjwCJgU0lbSBor6W1JUyX9oCDevbMyvJd91qnZ/vUl3ZOV/21J4+qTUeFTnqTVJV0iaU72ukTS6oW/c0mnSJqXlefI0v4LWnviBGLV4AfAAGATYGvgCABJXwZGAMcCnwGuBkbXfzECLwM7AesAvwX+Imnjgvt+FZgObAAM+aQAJHUGfgS8BSzIdo8ElgGfB74E7An8ODt2DLAXsC3wZWC/Bm57GDAI6ArMB8YCN2bxHAJcKWnL7NzrgGMjoiuwFfCPbP8pwGygG7AhcCbQ0PxGZwHbZ/FsA/QHflVwfCPS76k7cDQwTNJ6n/Q7sfbPCcSqwWURMSci3gb+RvoShPQlfXVEPBkRyyNiJLCY9EVJRNyaXbciIm4GXiJ9cdabExGXR8SyiPhPI5/9A0nvAP/JPu+AiFgmaUNSgjgpIj6IiHnAxcDB9dcBl0bE7IhYAJzbwL1viIgp2dPNAGBGRFyfxfM0cDtwQHbuUqCfpLUjYkF2vH7/xkDviFiatSE1lEAGAr+LiHkRMZ+UUA8rOL40O740Iu4D3gfaRFuUlY8TiFWD1wveLwI+lb3vDZySVd+8k33R9wQ+CyDpRwXVW++Q/nJfv+Bes4r47FsiYl3SX/eTge0KPns1YG7B/a8mPT2QxVB4/4Y+q3Bfb+CrK5VlIOnJAOD7wN7ATEkPFzTmXwBMAx6UNF3S4EbK8VlgZsH2zGxfvbeyRFav8PdsFaqxhj+zajALGBIRH6t+ktQbuAb4JvB4RCyX9CyggtOKnso6It6UdCwwXtKN2WcvBtZf6Yu33lygR8F2z4Zuu1JZHo6IPRr5/PHAvpJWA44HbgF6RsR7pGqsU7Lqrn9KGh8RD610izmkJDUl2+6V7bMq5icQa89Wk7RGwaupfxBdAxwn6atZT6a1JH1bUldgLdIX9HyArFF4q+YEGxEvAmOA0yJiLvAgMFTS2pI6SPqcpJ2z028BTpTUXdK6wOmruP09wGaSDpO0Wvb6iqQvSOosaaCkdSJiKbAQWJ6Vax9Jn5ekgv3LG7j/X4FfSeomaX3gN8BfmvP7sPbPCcTas/tIbQv1r7ObcnFE1JHaJa4gNWxPI2tgj4jngaHA48AbwBeBR1sg5guAQZI2IDWqdwaezz7/NlJ7BKTk9iAwEXiGVNZlNPzlTvYksSepDWUOqdruPKC+Q8BhwAxJC4HjgB9m+/sCfye1WTwOXNnI2I8/AHVZPJOAp7N9VsXkBaXM2j5JewHDI6J33rGY1fMTiFkbJGnNbOxGJ0ndgf8H3Jl3XGaF/ARi1gZJ6gI8DGxBqp67FzgxIhbmGphZAScQMzMriauwzMysJFU1DmT99dePPn365B2GmVm7MmHChDcjotvK+6sqgfTp04e6urq8wzAza1ckzWxov6uwzMysJE4gZmZWEicQMzMriROImZmVxAnEzMxK4gRiZlbBRo2CPn2gQ4f0c9Solrt3VXXjNTOrJqNGwaBBsGhR2p45M20DDBzY/Pv7CcTMrEKdddZHyaPeokVpf0twAjEzq1Cvvtq0/U3lBGJmVqF69Wra/qZyAjEzq1BDhkCXLv+7r0uXtL8lOIGYmVWogQOhthZ69wYp/aytbZkGdHAvLDOzijZwYMsljJX5CcTMzEriBGJmZiVxAjEzs5I4gZiZWUlyTSCSBkiaKmmapMENHB8oaWL2ekzSNgXHZkiaJOlZSV5m0MysleXWC0tSR2AYsAcwGxgvaXREPF9w2ivAzhGxQNJeQC3w1YLju0bEm60WtJmZ/VeeTyD9gWkRMT0ilgA3AfsWnhARj0XEgmzzCaBHK8doZmaNyDOBdAdmFWzPzvY15mjg/oLtAB6UNEHSoDLEZ2ZmnyDPgYRqYF80eKK0KymBfL1g944RMUfSBsBYSS9GxCMNXDsIGATQq6UmgDEzs1yfQGYDPQu2ewBzVj5J0tbAtcC+EfFW/f6ImJP9nAfcSaoS+5iIqI2Imoio6datWwuGb2ZW3fJMIOOBvpI2kdQZOBgYXXiCpF7AHcBhEfHvgv1rSepa/x7YE5jcapGbmVl+VVgRsUzS8cAYoCMwIiKmSDouOz4c+A3wGeBKSQDLIqIG2BC4M9vXCbgxIh7IoRhmZlVLEQ02O1SkmpqaqKvzkBEzs6aQNCH74/1/eCS6mZmVxAnEzMxK4gRiZmYlcQIxM7OSOIGYmVlJnEDMzKwkTiBmZlYSJxAzMyuJE4iZmZXECcTMzEriBGJmZiVxAjEzs5I4gZiZWUmcQMzMrCROIGZmVhInEDMzK4kTiJmZlcQJxMzMSuIEYmZmJXECMTOzkjiBmJlZSZxAzMysJLkmEEkDJE2VNE3S4AaOD5Q0MXs9JmmbYq81M7Pyyi2BSOoIDAP2AvoBh0jqt9JprwA7R8TWwO+B2iZca2ZmZZTnE0h/YFpETI+IJcBNwL6FJ0TEYxGxINt8AuhR7LVmZlZeeSaQ7sCsgu3Z2b7GHA3c39RrJQ2SVCepbv78+c0I18zMCuWZQNTAvmjwRGlXUgI5vanXRkRtRNRERE23bt1KCtTMzD6uU46fPRvoWbDdA5iz8kmStgauBfaKiLeacq2ZmZVPnk8g44G+kjaR1Bk4GBhdeIKkXsAdwGER8e+mXGtmZuWV2xNIRCyTdDwwBugIjIiIKZKOy44PB34DfAa4UhLAsqw6qsFrcymImVmVUkSDTQcVqaamJurq6vIOw8ysXZE0ISJqVt7vkehmZlYSJxAzMyuJE4iZmZXECcTMrNJNmQJlaO92AjEzq1TjxsHee8NWW8G997b47Z1AzMwqSQTcdx/stBN84xswfjz84Q/w9a+3+EflORLdzMxayvLlcNtt8Mc/wnPPQc+ecNllcPTR0KVLWT7SCcTMrD1bvBj+/Gc47zyYNg023xyuvx4OPRQ6dy7rRzuBmJm1R++/D7W1MHQozJkD222XnkD22w86dmyVEJxAzMzak7ffhssvT9VTb78Nu+ySnjj22APU0ETl5eMEYmbWHsyZAxddBMOHwwcfwHe+A2ecATvskFtITiBmZm3Zyy/D+efDDTfAsmVwyCFw+unwxS/mHZkTiJlZmzRxYupRdcstsNpqcNRR8Mtfwqab5h3ZfzmBmJm1JY8+mhLHvffCpz4Fp5wCJ58MG2+cd2Qf4wRiZpa3CBgzJiWORx6Bz3wGfvc7OP54WG+9vKNrlBOImVleli+H22+Hc8+FZ56BHj3gkkvgxz+GtdbKO7pVcgIxM2ttS5Z8NPjvpZdgs81gxAgYOLDsg/9akhOImVlr+eADuOYauPBCeO01+NKX4NZbYf/9W23wX0tyAjEzK7cFC+CKK+DSS+Gtt2DnneG662DPPVt98F9LcgIxMyuXuXM/Gvz3/vuwzz5p8N/XvpZ3ZC3CCcTMrKVNn54G/11/fRr8d9BBMHgwbL113pG1KCcQM7OWMmlS6lF1003QqRMceWQa/Pe5z+UdWVnkuqCUpAGSpkqaJmlwA8e3kPS4pMWSTl3p2AxJkyQ9K6mu9aI2M1vJ44+nuam23hruvht+8Qt45ZVUdVWhyQNyfAKR1BEYBuwBzAbGSxodEc8XnPY2cAKwXyO32TUi3ixvpGZmDYiABx9Mg/8efhg+/Wn47W/T4L9Pfzrv6FpFnk8g/YFpETE9IpYANwH7Fp4QEfMiYjywNI8Azcw+pn7lv5oaGDAgLeJ00UUwcyb85jdVkzwg3wTSHZhVsD0721esAB6UNEHSoMZOkjRIUp2kuvnz55cYqplVvSVLUqP4llvCgQfCwoVw7bVpttyTT07zVlWZPBvRG+r8HE24fseImCNpA2CspBcj4pGP3TCiFqgFqKmpacr9zcxg0aKUKC68EGbNgm23hZtvhu9/v10O/mtJeSaQ2UDPgu0ewJxiL46IOdnPeZLuJFWJfSyBmJmVZMECGDYsDf57803YaSe4+upUbdWOB/+1pDyrsMYDfSVtIqkzcDAwupgLJa0lqWv9e2BPYHLZIjWz6vH662nBpt694de/hv79Ydy4NEvuXns5eRTI7QkkIpZJOh4YA3QERkTEFEnHZceHS9oIqAPWBlZIOgnoB6wP3Kn0H7ITcGNEPJBHOcysQrzyClxwQZrUcOnS1M4xeHCqsrIG5TqQMCLuA+5bad/wgvevk6q2VrYQ2Ka80ZlZVZg8+aPBfx07wuGHw2mnwec/n3dkbV7RCUTSmkCviJhaxnjMzFrHE0+kMRyjR6e1N048MQ0A7N6UzqDVrag2EEnfAZ4FHsi2t5VUVHuFmVmbEQFjx8Juu8EOO6S2jbPPTmM4hg518miiYp9Azib1cvoXQEQ8K6lPWSIyM2tpK1bAXXfBOefAhAnw2c+mhDFoUFWO32gpxSaQZRHxrtz7wMzak6VLYdSotPLfiy+mealqa+FHP4LVV887unav2AQyWdKhQEdJfUnzUz1WvrDMzJph0aK0YNOFF8Krr6ZJDv/6VzjggDRLrrWIYseB/BzYElgM3Ai8C5xUrqDMzEryzjupmqpPHzjhBOjZE+69F559Fg4+2Mmjha3yt5nNmjs6InYHzip/SGZmTfTGG3DJJXDllWmOqr32Siv/7bRT3pFVtFUmkIhYLmmRpHUi4t3WCMrMrCgzZnw0+G/x4o8G/33pS3lHVhWKfZ77EJgkaSzwQf3OiDihLFGZmX2S559Pg/9uvBE6dEiN4qedBpttlndkVaXYBHJv9jIzy89TT6XBf3fdBV26wM9/DqecAj0amrDCyq2oBBIRI8sdiJlZgyLgH/9IieOhh2DdddPCTT//Oay/ft7RVbWiEoikV2hgrY6I2LTFIzIzgzT4b/TolDieego22ii1dxx7LHTtmnd0RvFVWDUF79cADgSqZ91GM2s9S5emiQ3PPTe1dWyyCQwfniY5XGONvKOzAkWNA4mItwper0XEJcBuZY7NzKrJf/6TFnDq2zc1infokEaR//vf6anDyaPNKbYK68sFmx1ITyR+hjSz5nv3XbjqKrj4Ypg3L01yePnl8O1vpyRibVaxVVhDC94vA2YAP2jxaMysesybl5aLHTYsJZE994Qzz4RvfMOr/rUTxfbC2rXcgZhZlXj11TRH1bXXwocfwve/nwb/bbdd3pFZExW7HsiJktZWcq2kpyXtWe7gzKyCvPACHHFEmhH3qqvS3FTPPw+33urk0U4VW8F4VEQsBPYENgCOBM4tW1RmVjnq6tJTxpZbwi23wE9/Ci+/nKYf2WKLvKOzZii2DaS+QnJv4PqIeE5eHMTMGhMB//pXGsMxdiyss05q3zjxROjWLe/orIUUm0AmSHoQ2AQ4Q1JXYEX5wjKzdmnFCrjnnpQ4nngCNtwwLeZ03HGw9tp5R2ctrNgEcjSwLTA9IhZJ+jSpGsvMDJYtg5tvToljypS0HseVV6Y2jzXXzDs6K5Ni20B2AKZGxDuSfgj8irSoVLNIGiBpqqRpkgY3cHwLSY9LWizp1KZca2at4MMPU4P4ZpvBD3+Y9v35z/DSS/CTnzh5VLhiE8hVwCJJ2wCnATOBPzXng7OFqoYBewH9gEMk9VvptLdJy+deWMK1ZlYuCxfC+eenaUZ++lPYYIM0Q+7EiSmReOW/qlBsAlkWEQHsC1waEZfS/JHo/YFpETE9IpYAN2X3/6+ImBcR44GlTb3WzMpg/nz49a+hd284/XTYaqs0U+7jj8O++3rkeJUp9s+E9ySdARwG7JQ9AazWzM/uDswq2J4NfLWlr5U0CBgE0KtXr6ZHaWYwaxYMHQq1tWnOqu99Lw3++8pX8o7MclTsnwsHAYtJ40FeJ32BX9DMz26oG/DHpoxv7rURURsRNRFR083dB82aZupUOOqoNPhv2DD4wQ/S4L/bb3fysKKnMnld0u1A32zXm8Cdzfzs2UDPgu0ewJxWuNbMVuXpp1OPqttvh9VXT7PhnnpqqroyyxQ7lckxwG3A1dmu7sBdzfzs8UBfSZtI6gwcDIxuhWvNrCER8PDDMGBAmlrkwQdTNdXMmWl23ApIHqNGpR7GHTqkn6NG5R1R+1ZsG8jPSA3XTwJExEuSNmjOB0fEMknHA2OAjsCIiJgi6bjs+HBJGwF1wNrACkknAf0iYmFD1zYnHrOqFQH33gvnnJMawzfYID19/OQnaQR5hRg1CgYNgkWL0vbMmWkbYODA/OJqz5Q6V63iJOnJiPiqpGci4kuSOgFPR8TW5Q+x5dTU1ERdXV3eYZi1DcuWpYkM//hHmDQpPWH88pepzaMCx2/06ZOSxsp694YZM1o7mvZF0oSIqFl5f7GN6A9LOhNYU9IewK3A31oyQDNrJR9+CFdfDZtvDocemhLJyJFp8N/PflaRyQPSLPJN2W+rVmwCOR2YD0wCjgXuI41GN7P24r330jocm26a5qb6zGfgjjtg8uS0hOxqze2Z37Y11ovfvftLt8o2EEkdgIkRsRVwTflDMrMW9dZbcNllqSF8wQLYbbc03chuu1XVyn9DhvxvGwhAly5pv5VmlU8gEbECeE6S87RZezJ7Npx8cvoT+3e/g513hiefhIcegm9+s6qSB6SG8tra1OYhpZ+1tW5Ab45ie2FtDEyR9BTwQf3OiPhuWaIys9K99FKaQv1Pf0rTqx96aJp2ZMst844sdwMHOmG0pGITyG/LGoWZNd+zz6YeVbfdltozjjkm9arq0yfvyKxCfWICkbQGcBzweVID+nURsaw1AjOzIo0blxLH/fdD165w2mlw0klpMSezMlrVE8hI0ky44/ho6vQTyx2Uma1CREoY55wDjz6alokdMiRNrb7uunlHZ1ViVQmkX0R8EUDSdcBT5Q/JzBq1fHka/HfuufDcc9CzZ+phdfTRqUuRWStaVQL57zoc2dQjZQ7HzBq0eHFqFD//fJg2DbbYAq6/PjWQd+6cd3RWpVaVQLaRtDB7L9JI9IXZ+4iItcsanVm1e//91Nd06FCYMydNcnj77bDffl68yXL3iQkkIjq2ViBmVuDtt9PAv8suS+933RVuuAF2373qxm9Y2+WFi83aktdeg4suSnNVffABfPe7cMYZsP32eUdm9jFOIGZtwbRpqX1j5MjUUH7IIR+tOW7WRjmBmOXpuedSj6pbbkmD/44+Og3+22STvCMzWyUnELM8PPpoGvx3771p8N+pp6Z5qzbaKO/IzIrmBGLWWiJgzJg0+G/cOFh/ffj979MaHOutl3d0Zk3mBGJWbsuXp663554LzzwDPXrApZem6qq11so7OrOSOYGYlcuSJWndjfPOSzPkbr45jBiRpoP14D+rAE4gZi3tgw/gmmvS6n+vvQZf/nKafmT//aGjh1ZZ5XACMWspCxbAFVek6qm33koLOI0YAXvs4cF/VpGcQMyaa+7cNPhv+PA09cg++6TBf1/7Wt6RmZVVrpPpSBogaaqkaZIGN3Bcki7Ljk+U9OWCYzMkTZL0rKS61o3cDHj5ZTjuuLRg00UXpVHjEyfC3/7m5GFVIbcnEEkdgWHAHsBsYLyk0RHxfMFpewF9s9dXgauyn/V2jYg3Wylks2TixNSj6uaboVMnOPLINPjvc5/LOzKzVpXnE0h/YFpETI+IJcBNwL4rnbMv8KdIngDWlbRxawdqBsBjj8F3vgPbbJOeMn7xC3jllVR15eRhVSjPBNIdmFWwPTvbV+w5ATwoaYKkQY19iKRBkuok1c2fP78FwraqUj/4b5ddYMcd4fHH4Xe/g5kz4YIL4LOfzTtCs9zk2YjeULeUaMI5O0bEHEkbAGMlvRgRj3zs5IhaoBagpqZm5fubNWz5crjzzjTdyNNPQ/fucPHFcMwxHvxnlsnzCWQ20LNguwcwp9hzIqL+5zzgTlKVmFnzLFmSut726wcHHgjvvQfXXQfTp8NJJzl5mBXIM4GMB/pK2kRSZ+BgYPRK54wGfpT1xtoeeDci5kpaS1JXAElrAXsCk1szeKswH3yQxm987nMfrS9+yy3wwgtw1FEeOW7WgNyqsLI11o8HxgAdgRERMUXScdnx4cB9wN7ANGARcGR2+YbAndka7Z2AGyPigVYuglWCBQtg2LCUPN58E3baKY0i/9a3PPjPbBUUUT3NAjU1NVFX5yEjBrz+emrTuOqqVE317W+nwX877ph3ZGZtjqQJEVGz8n6PRLfqMn166j11/fWwdCn84AcweHDqmmtmTeIEYtVh8uQ0+O+mm9KEhkcckQb/ff7zeUdm1m45gVhle+KJ1BV39OjUg+qkk9IAQI/fMGu2XOfCMiuLCBg7FnbbDXbYAf7v/+Dss9PgvwsvdPLIjBqVpvHq0CH9HDUq74isvfETiFWOFSvgrrvSkrETJqREMXQoDBoEn/pU3tG1KaNGpV/LokVpe+bMtA1pvSuzYvgJxNq/pUvhhhtgyy3h+9+Hd99NXXGnT0/VVU4eH3PWWR8lj3qLFqX9ZsXyE4i1X4sWpVHiF14Ir76aelLddBMccIBX/luFV19t2n6zhvgJxNqfd95J1VR9+sAJJ0CvXnDvvfDMM3DQQU4eRejVq2n7zRriBGLtxxtvpDEbvXunupaaGnjkERg3Dvbe2yPHm2DIkDRbS6EuXdJ+s2I5gVjbN2MG/OxnKXGcfz4MGJBmyL3vvjT1iDXZwIFQW5t+pVL6WVvrBnRrGreBWNs1ZQqcdx7ceGPqa3r44XDaadC3b96RVYSBA50wrHmcQKztefLJNPjv7rtTvcoJJ6TeVD165B2ZmRVwArG2IQIeeigljn/8A9ZbD37zG/j5z2H99fOOzswa4ARi+VqxIj1p/PGPMH48bLxx6pY7aBB07Zp3dGb2CZxALB9Ll8Jf/5omOHzhBdh0U7j6avjRj2CNNfKOzsyK4ARires//0lLxl5wQZo/44tfTI3kBx4Infy/o1l74n+x1jrefReuvBIuuQTmzYOvfQ2uuCIt5OTxG2btkhOIlde8eSlpDBsGCxempWLPPDON33DiMGvXPJDQymPmzNSDqnfv1M7xrW+lGXIfeAC+8Y02lzw8tblZ0/kJxFrWCy+khHHjjSlJHHZYGvy3+eZ5R9YoT21uVho/gVjLGD8evve9NKX6bbfB8cfDyy+n2XLbcPIAT21uVqpcE4ikAZKmSpomaXADxyXpsuz4RElfLvballKNVRtFlzkiDfrbYw/o3x/++U/41a/Sn/AXXww9e7Zi1KXz1OZmJYqIXF5AR+BlYFOgM/Ac0G+lc/YG7gcEbA88Wey1Db222267aIq//CWiS5eI9E2ZXl26pP2VqqgyL18ecdddEf37pxM22iji/PMj3n03t7ibo3fv/y1v/at377wjM2sbgLpo4Ds1zyeQ/sC0iJgeEUuAm4B9VzpnX+BPWRmeANaVtHGR1zZbNVZtfGKZly6FP/85jd3Ybz+YPx+uugpeeQV++UtYe+1cYm4uT21uVpo8E0h3YFbB9uxsXzHnFHNts1Vj1UZDZVuD//DtmVfCZpulkeIdOqR6rX//G447rt2PHPfU5malybMXVkP9OKPIc4q5Nt1AGgQMAujVxOXWevVK1fkN7a9UhWXuykJ+wlWczMVsxBuw0fZw2WVp8F+Hyup/4anNzZouz2+B2UBhK2sPYE6R5xRzLQARURsRNRFR061btyYFWI1VG0OGQK815/N7fsWr9OI8BjO5wzaMPfOf8Nhj8J3vVFzyMLPS5PlNMB7oK2kTSZ2Bg4HRK50zGvhR1htre+DdiJhb5LXNVnVVG6++ysAnT+Dl5b05k3P4O7uzz0Z1vPGnMewxZJc2N/jPzPKVWxVWRCyTdDwwhtSrakRETJF0XHZ8OHAfqSfWNGARcOQnXVuOOKuiauPFF9PKf3/5CwCdfvhDOP10DthiCw7IOTQza7uUemhVh5qamqirq8s7jLZjwoS0Dscdd6SG8GOOgVNOqexGHjNrMkkTIqJm5f2eyqTaRMDDD8M558DYsbDOOmlywxNPhCa2EZlZdXMCqRYrVsA996QnjieegA03THNW/eQn7Xb8hpnlywmk0i1bBjffnJLF5MlpbpJhw+DII2HNNfOOzszaMSeQSvXhh3DDDXD++WmkeL9+aRT5QQfBaqvlHZ2ZVQAnkErz3nswfDhcdBG8/nqa5PDiiz1+w8xanBNIpXjzTbj00rRM7DvvwO67p+lGdt3V4zfMrCycQNq7WbNg6FC45po06+H++8MZZ8BXvpJ3ZGZW4ZxA2qupUz8a/BeRRjuefjp84Qt5R2ZmVcIJpL15+unUFff222H11eHYY+HUU9M8K2ZmrcgJpD2IgEceSYljzJg0bmPwYDjpJNhgg7yjM7Mq5QTSlkXAvfemUeOPP56SxR//mAb/rbNO3tGZWZVzAmmLli2DW29NyWLSpFQ9dcUVcNRRHvxnZm2GE0hb8uGHMHJkGvw3fXpqEB85Eg45xIP/zKzNcQJpC957D66+Og3+mzs3dcEdOhS++10P/jOzNssJJE9vvZWWiL38cliwAHbbLU03sttuHvxnZm2eE0geZg/oo2AAAAkCSURBVM9OTxi1tWnw3377pcF//fvnHZmZWdGcQFrTSy+lwX9/+lOaXv3QQ9Pgvy23zDsyM7MmcwJpDc8+m3pU3XZbagw/5hj45S/T1OpmZu2UE0g5jRuXEsf990PXrnDaaWnw34Yb5h2ZmVmzOYG0tIiUMM45Bx59NC0TO2QI/PSnsO66eUdnZtZinEBayvLlafDfuefCc89Bz56ph9XRR0OXLnlHZ2bW4pxAmmvx4tQoft558PLLsMUWcP31qYG8c+e8ozMzK5tcRqlJ+rSksZJeyn6u18h5AyRNlTRN0uCC/WdLek3Ss9lr79aLPvP++6kr7qabwqBBsN56aYbcKVPgiCOcPMys4uU1zHkw8FBE9AUeyrb/h6SOwDBgL6AfcIikfgWnXBwR22av+1ojaCAN/jv77DQ/1amnwuabw4MPwlNPwfe+55HjZlY18vq22xcYmb0fCezXwDn9gWkRMT0ilgA3Zdfl47XX4JRTUuL47W/h619PM+T+4x+wxx4eOW5mVSevBLJhRMwFyH42tKhFd2BWwfbsbF+94yVNlDSisSowAEmDJNVJqps/f35p0f7+96mq6tJL05KxkybB3XfD9tuXdj8zswpQtgQi6e+SJjfwKvYpoqE/6SP7eRXwOWBbYC4wtLGbRERtRNRERE23bt2aVIb/6tMn9aZ66aU0V9VWW5V2HzOzClK2XlgRsXtjxyS9IWnjiJgraWNgXgOnzQZ6Fmz3AOZk936j4F7XAPe0TNSNOOyw9DIzs//KqwprNHB49v5w4O4GzhkP9JW0iaTOwMHZdWRJp97+wOQyxmpmZg3IaxzIucAtko4GXgUOBJD0WeDaiNg7IpZJOh4YA3QERkTElOz68yVtS6rSmgEc29oFMDOrdoqIVZ9VIWpqaqKuri7vMMzM2hVJEyKiZuX9HrRgZmYlcQIxM7OSOIGYmVlJnEDMzKwkTiBmZlaSquqFJWk+MLPEy9cH3mzBcNoDl7k6uMzVoTll7h0RH5vKo6oSSHNIqmuoG1slc5mrg8tcHcpRZldhmZlZSZxAzMysJE4gxavNO4AcuMzVwWWuDi1eZreBmJlZSfwEYmZmJXECMTOzkjiBFEHSAElTJU2TNDjveFqapJ6S/inpBUlTJJ2Y7f+0pLGSXsp+Nrp0cHslqaOkZyTdk21XdJklrSvpNkkvZv+9d6iCMp+c/X89WdJfJa1RaWXOlvaeJ2lywb5GyyjpjOz7bKqkb5X6uU4gqyCpIzAM2AvoBxwiqV++UbW4ZcApEfEFYHvgZ1kZBwMPRURf4KFsu9KcCLxQsF3pZb4UeCAitgC2IZW9YsssqTtwAlATEVuR1hY6mMor8w3AgJX2NVjG7N/2wcCW2TVXZt9zTeYEsmr9gWkRMT0ilgA3AcWu694uRMTciHg6e/8e6UulO6mcI7PTRgL75RNheUjqAXwbuLZgd8WWWdLawDeA6wAiYklEvEMFlznTCVhTUiegC2lp7Ioqc0Q8Ary90u7GyrgvcFNELI6IV4BppO+5JnMCWbXuwKyC7dnZvookqQ/wJeBJYMOImAspyQAb5BdZWVwCnAasKNhXyWXeFJgPXJ9V210raS0quMwR8RpwIWnl07nAuxHxIBVc5gKNlbHFvtOcQFZNDeyryL7Pkj4F3A6cFBEL846nnCTtA8yLiAl5x9KKOgFfBq6KiC8BH9D+q24+UVbvvy+wCfBZYC1JP8w3qty12HeaE8iqzQZ6Fmz3ID0CVxRJq5GSx6iIuCPb/YakjbPjGwPz8oqvDHYEvitpBqlacjdJf6GyyzwbmB0RT2bbt5ESSiWXeXfglYiYHxFLgTuAr1HZZa7XWBlb7DvNCWTVxgN9JW0iqTOp8Wl0zjG1KEki1Yu/EBEXFRwaDRyevT8cuLu1YyuXiDgjInpERB/Sf9N/RMQPqewyvw7MkrR5tuubwPNUcJlJVVfbS+qS/X/+TVIbXyWXuV5jZRwNHCxpdUmbAH2Bp0r5AI9EL4KkvUn15R2BERExJOeQWpSkrwPjgEl81B5wJqkd5BagF+kf4oERsXJDXbsnaRfg1IjYR9JnqOAyS9qW1GmgMzAdOJL0h2Qll/m3wEGk3obPAD8GPkUFlVnSX4FdSFO2vwH8P+AuGimjpLOAo0i/k5Mi4v6SPtcJxMzMSuEqLDMzK4kTiJmZlcQJxMzMSuIEYmZmJXECMTOzknTKOwCztiLrwvtQtrkRsJw09QdA/2wutDYh63q8JCIeyzsWq15OIGaZiHgL2BZA0tnA+xFxYV7xSOoUEcsaObwL8D5QdAKR1DEilrdEbGbgKiyzTyRpO0kPS5ogaUzB1BD/knSxpEeydTW+IumObO2FP2Tn9MnW3RgpaWK2DkeXIu57jqSHgRMlfUfSk9nkh3+XtGE24eVxwMmSnpW0k6QbJB1QEPf72c9dlNZ6uRGYpLT+yQWSxmcxHduav0+rLE4gZo0TcDlwQERsB4wACmchWBIR3wCGk6aJ+BmwFXBEVh0GsDlQGxFbAwuBn2bzjn3SfdeNiJ0jYijwf8D22eSHNwGnRcSM7DMvjohtI2LcKsrRHzgrIvoBR5NmpP0K8BXgmGw6C7MmcxWWWeNWJyWEsWkaJTqSpgSvVz8n2iRgSv3U2ZKmkyareweYFRGPZuf9hbS40QOruO/NBe97ADdnTyidgVdKKMdT2boPAHsCWxc8raxDmguplPtalXMCMWucSIlhh0aOL85+rih4X79d/29r5bmCooj7flDw/nLgoogYnTWcn93INcvIahSySQM7N3I/AT+PiDGN3MesaK7CMmvcYqCbpB0gTXkvacsm3qNX/fXAIaQqqalNuO86wGvZ+8ML9r8HdC3YngFsl73fF1itkfuNAX6SVaMhabNsUSmzJnMCMWvcCuAA4DxJzwHPktaSaIoXgMMlTQQ+TVrMaUkT7ns2cKukccCbBfv/Buxf34gOXAPsLOkp4Kv871NHoWtJU7g/LWkycDWuibASeTZeszLJekvdExFb5RyKWVn4CcTMzEriJxAzMyuJn0DMzKwkTiBmZlYSJxAzMyuJE4iZmZXECcTMzEry/wFcSPPp2HMABwAAAABJRU5ErkJggg==\n",
229 |       "text/plain": [
230 |        "<Figure size 432x288 with 1 Axes>"
231 |       ]
232 |      },
233 |      "metadata": {
234 |       "needs_background": "light"
235 |      },
236 |      "output_type": "display_data"
237 |     }
238 |    ],
239 |    "source": [
240 |     "# Visualising the Linear Regression results \n",
241 |     "plt.scatter(X, y, color = 'blue') \n",
242 |     "  \n",
243 |     "plt.plot(X, lin.predict(X), color = 'red') \n",
244 |     "plt.title('Linear Regression') \n",
245 |     "plt.xlabel('Temperature') \n",
246 |     "plt.ylabel('Pressure') \n",
247 |     "  \n",
248 |     "plt.show()"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": 9,
254 |    "metadata": {},
255 |    "outputs": [
256 |     {
257 |      "data": {
258 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3dd5xU5dn/8c+XxQZ2RUWQYqJJ1FhXsIuPGks0xsQYDRo7mmgsKUZjfnnMk2jEXmJDLCRir0QT0Rg1KhYWY8NIQhAFsYAdERD2+v1xnw0jzi6zZfbMzH7fr9e8Zk6/7lk419z3Oee+FRGYmZktrlveAZiZWWVygjAzs6KcIMzMrCgnCDMzK8oJwszMinKCMDOzopwgrOJImippl7zjKCTpL5IOKXHdiou/o0iaLWndvOOwzuEEYWWTnSg/yU4qb0m6VtLyecfVFhGxR0SMau9+JF0naX72nbwr6QFJX+6IGDtDRCwfEVPyjsM6hxOEldveEbE8sDmwJfDLnOOpBGdn30kf4HXg6o4+gKTuHb1P63qcIKxTRMTrwF+AjQAkfUPSREnvS3pY0lcW30bSWpLmSFqtYN4WkmZKWkrSoZIek3SupPckvSJpj4J115Y0JvulPlnSUQXLTpd0q6TrJX0k6QVJ60s6VdLbkqZJ+lrB+g9LOjL7/AVJf5P0jqRZkkZLWrkN38knwC3ApovFfHtWxlckHV+wbDlJo7Ky/lPSyZKmFyyfKunnkp4HPpbUXdJWksZl3/NzkoYUrH+opClZ+V+RNDSb/0VJj0j6ICvfzQXbhKQvZp9XkvSHLNZXJf1SUreCfTf7t7Hq4ARhnULSOsCewD8krQ/cCJwI9AL+DPxJ0tKF20TEm8DDwP4Fsw8CboqIT7PpwcAkYHXgbOBqScqW3QhMB9YG9gPOlLRzwb72Bv4IrAL8AxhL+j/RB/g/4MrmigP8LtvvV4B1gNNL+yYKdiL1BA4EJmfT3YA/Ac9lMewMnChpt2yT/wUGAOsCu2bfxeIOBL4OrAysCdwL/BZYFfgpcLukXtmxLwb2iIgVgG2AZ7N9/Aa4n/S99AUuaaYIlwArZfHsCHwfOKxgeUt/G6sGEeGXX2V5AVOB2cD7wKvAZcBywP8DbilYrxupqWVIwXa7ZJ+/Czyefa4D3gQGZdOHApML9tMDCGAt0kl7IbBCwfLfAddln08HHihYtncWa102vUK2r5Wz6YeBI5sp5zeBfyxW7l2aWfc6YG72nTQCrwAbZ8sGA68ttv6pwLXZ5ynAbgXLjgSmL3bcwwumfw78cbH9jQUOAXpmMXwbWG6xdf4AjAD6Fok/gC9mf4t5wAYFy44GHl7S3ybvf5d+lf5yDcLK7ZsRsXJE9I+IH0ZqVlmblDAAiIhGYBrpV/Pi7gY2yO6c2RX4ICKeLlj+ZsF+5mQfl8+O8W5EfFSw7quLHeOtgs+fALMiYmHBdNO+PkPSGpJukvS6pA+B60m/kkt1bkSsTKoNfAJ8KZvfH1g7aw56X9L7wC9INQGyMk0r2E/h52Lz+gPfWWx/2wG9I+JjUvI9BnhD0r0FF8tPJtWSns6aAQ8vcpzVgaUp+Dvy+e+3ub+NVQknCMvDDNLJC4Cs2WEdUi3iMyJiLqmdfihwMKlJqNRjrCpphYJ5/Yodow1+R/o1vHFErEhq6ml100lEvAacAFwkaTnSyf2VLKE2vVaIiD2zTd4gNfk0WafYbgs+TyPVIAr31zMizsqOPzYidgV6Ay8DV2Xz34yIoyJibVKt4LKm6w4FZgGfUvB3pOO+X6sQThCWh1uAr0vaWdJSwE9IzRXjmln/D6Qmi2+Qfq0vUURMy/b3O0nLStoYOAIY3c7YITU/zQbel9QH+FlbdxQRD5CS2TDgaeDD7ELzcpLqJG0kacts9VuAUyWtkh33uCXs/npgb0m7ZftaVtIQSX0lral0o0BP0nc/m9Qkh6TvSGpKRO+Rks7Cwh1nNa1bgDMkrSCpP/BjSvz7WHVwgrBOFxGTSL+6LyH9Et2bdDvs/GbWf5zUXv9MRExtxaEOJDXjzADuBP43OyG3169Jt+1+QLoIfEc793cOqVmnO+m72JR0bWIWMJJ0IRjShfPp2bK/AreRTu5FZUlyH1Iz1UxSjeJnpP/33UiJeQbwLuki8w+zTbcEnpI0GxgDnBARrxQ5xI+Aj0nXRh4DbgCuaW3hrXIpwgMGWeWT9DfghogYmXcslULSD4ADImLHvGOx2uQahFW8rIllc+DmJa1byyT1lrStpG6SvkSqAdyZd1xWu/y0pVU0SaNIt5GesNgdSV3R0qRnMwaSblG9iXTrsFlZuInJzMyKchOTmZkVVVNNTKuvvnoMGDAg7zDMzKrGhAkTZkVEr2LLaipBDBgwgIaGhrzDMDOrGpJebW6Zm5jMzKwoJwgzMyvKCcLMzIpygjAzs6KcIMzMrCgnCDOzKjV6NAwYAN26pffRHdFXcYGaus3VzKyrGD0ahg2DOdlQTK++mqYBhg7tmGO4BmFmVoVOOy0lh415jm15DEjTp53WccdwgjAzq0KvvZbez+QX3Mm+LMPcz8zvCE4QZmZVqF8/2IgX+Dp/5iJOYB7L/nd+R/E1CDOzKnTGGdDtkLOZvbAnl2WDAfbokeZ3FCcIM7MqNHTbqTTGjVyzwvG8P3tV+vdLyaGjLlCDE4SZWXU6/3y61XXjyJd+zJF9y3MIX4MwM6s2s2bByJGputC3TNkBJwgzs+pzySXwySdw8sllPYwThJlZNfn4Y/j972GffeArXynroZwgzMyqyciR8O678POfl/1QThBmZtXi00/hvPNg++1h663LfjjfxWRmVi1uvBGmTYMrruiUw7kGYWZWDRob4eyz4atfhT326JRDljVBSNpd0iRJkyWdUmT5UEnPZ69xkjYpWDZV0guSnpXUUM44zcwq3r33wsSJ6c4lqVMOWbYmJkl1wKXArsB0YLykMRHxUsFqrwA7RsR7kvYARgCDC5bvFBGzyhWjmVnVGD4c+veH73630w5ZzhrEIGByREyJiPnATcA+hStExLiIeC+bfBIo3xMfZmbV6rHH4PHH4Sc/gaWW6rTDljNB9AGmFUxPz+Y15wjgLwXTAdwvaYKkYc1tJGmYpAZJDTNnzmxXwGZmFWn4cFhtNTj88E49bDnvYirWSBZFV5R2IiWI7QpmbxsRMyStATwg6eWI+PvndhgxgtQ0RX19fdH9m5lVrRdfhHvugV//Gnr27NRDl7MGMR1Yp2C6LzBj8ZUkbQyMBPaJiHea5kfEjOz9beBOUpOVmVnXcs45qR/vY4/t9EOXM0GMB9aTNFDS0sABwJjCFST1A+4ADo6IfxXM7ylphabPwNeAF8sYq5lZ5XntNbjhBjjqqNTE1MnK1sQUEQskHQeMBeqAayJioqRjsuVXAL8CVgMuU7pta0FE1ANrAndm87oDN0TEfeWK1cysIp1/fnr/8Y9zObwiaqfZvr6+Phoa/MiEmdWAd95J44futx+MGlW2w0iakP0w/xw/SW1mVol+/3uYM6fsXXq3xAnCzKzSfPxxGvNh771hww1zC8MJwsys0lxzTWpi6oQuvVviBGFmVkk+/RTOPRe23Ta9cuTuvs3MKsnNN6fbWy+9NO9IXIMwM6sYEalbjQ03hD33zDsa1yDMzCrGn/+cutYYNQq65f/7Pf8IzMwsGT4c1lkHDjww70gA1yDMzCrDuHHw6KNw4YWd2qV3S1yDMDOrBMOHw6qrwpFH5h3JfzlBmJnl7aWXYMwY+NGPOr1L75Y4QZiZ5e2cc2C55eC44/KO5DOcIMzM8jRtGlx/fWpaWn31vKP5DCcIM7M8XXBBev7hJz/JO5LPcYIwM8vLu+/CiBHpttb+/fOO5nOcIMzM8nLppann1hy79G6JE4SZWR7mzIGLL4avfx2++tW8oynKCcLMLA/XXguzZuXepXdLnCDMzDrbggWpS++tt4bttss7mma5qw0zs852yy0wdSpcdBFIeUfTLNcgzMw6U1OX3htsAHvtlXc0LXINwsysM913Hzz/fLoGUQFdereksqMzM6s1w4dD377wve/lHckSOUGYmXWWJ5+ERx6BH/8Yll4672iWyAnCzKyzDB8Oq6wCRx2VdyQlcYIwM+sML78Md9+demxdfvm8oymJE4SZWWc45xxYdtk05kOVKGuCkLS7pEmSJks6pcjyoZKez17jJG1S6rZmZlVj+nT44x/h8MOhV6+8oylZ2RKEpDrgUmAPYAPgQEkbLLbaK8COEbEx8BtgRCu2NTOrDhdeCI2NFdmld0vKWYMYBEyOiCkRMR+4CdincIWIGBcR72WTTwJ9S93WzKwqvPceXHklfPe7MHBg3tG0SjkTRB9gWsH09Gxec44A/tLabSUNk9QgqWHmzJntCNfMrAwuvxxmz67YLr1bUs4EUayDkSi6orQTKUE0dWtY8rYRMSIi6iOivlcVte2ZWRfwySepeWmPPWCTTZa8foUpZ1cb04F1Cqb7AjMWX0nSxsBIYI+IeKc125qZVbTrroOZMyu6S++WlLMGMR5YT9JASUsDBwBjCleQ1A+4Azg4Iv7Vmm3NzCpaU5fegwfDDjvkHU2blK0GERELJB0HjAXqgGsiYqKkY7LlVwC/AlYDLlPq8nZB1lxUdNtyxWpm1uFuuw2mTIHzzqvoLr1booiiTftVqb6+PhoaGvIOw8y6ugjYfHOYOxcmTqzoXlslTYiI+mLL3N23mVlHe+ABePZZuPrqik4OS1K9kZuZVaqzzoI+fWDo0LwjaRcnCDOzjjR+PDz0EJx0EiyzTN7RtIsThJlZRxo+HFZeGYYNyzuSdnOCMDPrKJMmwR13wLHHwgor5B1NuzlBmJl1lHPPTc1Kxx+fdyQdwgnCzKwjzJgBf/gDHHYYrLFG3tF0CCcIM7OOcOGF6enpn/4070g6jBOEmVl7vf8+XHEF7L8/rLtu3tF0GCcIM7P2uuIK+OijquzSuyVOEGZm7TF3bmpe2m032GyzvKPpUE4QZmbtMWoUvPVW1Xbp3RInCDOztlq4EM45B7bcEoYMyTuaDufO+szM2ur22+E//4Gzz67aLr1b4hqEmVlbRKRuNdZfH/bZJ+9oysI1CDOztnjwQXjmGbjqKqiryzuasnANwsysLc46C3r3hoMPzjuSsnGCMDNrrQkTUg2iBrr0bokThJlZaw0fDiutBEcfnXckZeUEYWbWGv/+N9x2G/zwh7DiinlHU1ZOEGZmrXHuubD00nDCCXlHUnZOEGZmpXrjDbjuOjj0UFhzzbyjKTsnCDOzUl10Uc116d2SkhOEpOUkfamcwZiZVawPPoDLL4f99oMvfjHvaDpFSQlC0t7As8B92fSmksaUMzAzs4py5ZXw4Yc12Slfc0qtQZwODALeB4iIZ4EB5QnJzKzCzJ0LF1wAu+4Km2+edzSdptSuNhZExAeqwc6ozMyW6I9/hDffhOuvzzuSTlVqgnhR0veAOknrAccD48oXlplZhWjq0nuLLeB//ifvaDpVqU1MPwI2BOYBNwAfACcuaSNJu0uaJGmypFOKLP+ypCckzZP008WWTZX0gqRnJTWUGKeZWce68870cNwpp9Rkl94tWWINQlIdMCYidgFOK3XH2XaXArsC04HxksZExEsFq71Lqo18s5nd7BQRs0o9pplZh2rq0nu99WDfffOOptMtsQYREQuBOZJWauW+BwGTI2JKRMwHbgI+02l6RLwdEeOBT1u5bzOz8nvoIWhoSM891GiX3i0p9RrEXOAFSQ8AHzfNjIjjW9imDzCtYHo6MLgVsQVwv6QAroyIEcVWkjQMGAbQr1+/VuzezGwJzjoL1loLvv/9vCPJRakJ4t7s1RrFGuuiFdtvGxEzJK0BPCDp5Yj4++d2mBLHCID6+vrW7N/MrHnPPAMPPJCSxLLL5h1NLkpKEBExqg37ng6sUzDdF5hR6sYRMSN7f1vSnaQmq88lCDOzsjj77NRb6zHH5B1JbkpKEJJeociv/4hYt4XNxgPrSRoIvA4cAHyvxOP1BLpFxEfZ568B/1fKtmZm7faf/8Ctt8LPfpbGfeiiSm1iqi/4vCzwHWDVljaIiAWSjgPGAnXANRExUdIx2fIrJK0FNAArAo2STgQ2AFYH7swezOsO3BAR95VeLDOzdjj3XOjevUt06d0SRbSt2V7SYxGxXQfH0y719fXR0OBHJsysHd56C/r3TxemRxS9N6amSJoQEfXFlpXaxFTY+Ug3Uo1ihQ6Izcysslx0Ecyfn5qXurhSm5jOK/i8AJgK7N/h0ZiZ5enDD+Gyy+Db304Px3Vxpd7FtFO5AzEzy92IEWnchy7UpXdLSh0P4gRJKyoZKekZSV8rd3BmZp1m3jw4/3zYeWeoL9ok3+WU2lnf4RHxIel20zWAw4CzyhaVmVlnu/76NOa0aw//VWqCaHoqek/g2oh4juJPSpuZVZ+mLr032wx22SXvaCpGqRepJ0i6HxgInCppBaCxfGGZmXWiu++GSZPgppu6XJfeLSk1QRwBbApMiYg5klYlNTOZmVW3pi69v/CFdPeS/VepTUxbA5Mi4n1JBwG/JA0aZGZW3R55BJ5+OnXp3b3U38xdQ6kJ4nLSmBCbACcDrwJ/KFtUZmad5ayzYI014JBD8o6k4pSaIBZE6pNjH+CiiLgIP0ltZtXu2Wdh7Fg48URYbrm8o6k4pdanPpJ0KnAwsH02nOhS5QvLzKwTnH02rLAC/OAHeUdSkUqtQXwXmEd6HuJN0mhx55QtKjOzcpsyBW6+GY4+GlZeOe9oKlJJCSJLCrcDy2SzZgF3lisoM7OyO++8dFH6pJPyjqRildrVxlHAbcCV2aw+wF3lCsrMrKzefhuuuQYOPhjWXjvvaCpWqU1MxwLbAh8CRMS/SV1umJlVn4svTn0vuUvvFpWaIOZFxPymCUndKTIEqZlZxfvoI7j0Uth3X/jSl/KOpqKVmiAekfQLYDlJuwK3An8qX1hmZmVy1VXw/vvulK8EpSaInwMzgReAo4E/k56mNjOrHvPnpy69hwyBQYPyjqbiLfE5CEndgOcjYiPgqvKHZGZWJqNHw+uvw9VX5x1JVVhiDSIiGoHnJPXrhHjMzMqjsTE9GLfppvA1j3dWilKfpO4NTJT0NPBx08yI+EZZojIz62hjxsDLL8MNN7hL7xKVmiB+XdYozMzKqalL74ED4TvfyTuaqtFigpC0LHAM8EXSBeqrI2JBZwRmZtZhHn0Unnwy3d7qLr1LtqRrEKOAelJy2AM4r+wRmZl1tLPOgl694DCPc9YaS0qlG0TEVwEkXQ08Xf6QzMw60PPPw1/+Ar/9rbv0bqUl1SA+bfrgpiUzq0pnnw3LLw8//GHekVSdJSWITSR9mL0+AjZu+izpwyXtXNLukiZJmizplCLLvyzpCUnzJP20NduamS3R1Klw000wbBisskre0VSdFpuYIqKurTvOBhW6FNgVmA6MlzQmIl4qWO1d4Hjgm23Y1sysZeedB926uUvvNiq1q422GARMjogpWUd/N5GGLP2viHg7IsZT0JRV6rZmZi2aOTM9MX3QQdC3b97RVKVyJog+wLSC6enZvHJva2YGl1wCn3ziLr3boZwJotijiqV2EV7ytpKGSWqQ1DBz5sySgzOzGjZ7Nvz+9/DNb8JXvpJ3NFWrnAliOrBOwXRfYEZHbxsRIyKiPiLqe/Xq1aZAzazGjBwJ773nLr3bqZwJYjywnqSBkpYGDgDGdMK2ZtaVzZ+fLk7vsANstVXe0VS1sj1zHhELJB0HjAXqgGsiYqKkY7LlV0haC2gAVgQaJZ1Iejjvw2LblitWM6shN94I06fDiBF5R1L1FFE7I4fW19dHQ0ND3mGYWQ5Gj4Zf/qKRe177Kt2W6s4z1zzL0IPca+uSSJoQEfXFlrnXKjOreqNHp2fhvjfnajbkJYZ+ej13HS0QDB2ad3TVq5zXIMzMOsX//uJTfjvnJK5iGI+yHTfzXebMgdNOyzuy6uYEYWbVbfp0Rr02hJO4kIs4np15kIVZ48hrr+UbWrVzE5OZVa+//hUOPJBNNJf942ZuZf/PLO7ngZLbxTUIM6s+jY3wm9+ksaXXXJOHho/n3h6fTQ49esAZZ+QUX41wgjCz6vLOO7DXXvCrX8H3vgdPPcXeP/syI0ZA//5puOn+/dNdrr5A3T5uYjKz6jF+POy3H7z5Jlx+ORx9dMoIpGTghNCxXIMws8oXkRLCdtulhPDYY3DMMf9NDlYeThBmVtk+/hgOPjiNCLfzzjBhAmy5Zd5RdQlOEGZWuV5+GQYNSt1n/Pa3cM89sNpqeUfVZfgahJlVpptvhiOOSLcj3X9/qj1Yp3INwswqy/z5cPzxcMABsMkm8I9/ODnkxAnCzCrHtGmpm+5LLknjSD/8MPTxYJJ5cROTmVWG++9PzzXMnw+33ppuZ7VcuQZhZvlqbIRf/xp23x1694aGBieHCuEahJnlZ9YsOOggGDs23cp6+eXQs2feUVnGCcLM8vHUU/Cd78Bbb8GVV8JRR/nBtwrjJiYz61wR6SL09ttDXR2MG5dG+3FyqDhOEGbWeWbPTheijz8edtsNnnkGttgi76isGU4QZtY5XnopdZFxyy1w5plw992wyip5R2Ut8DUIMyu/G29M1xh69kyD/Oy0U94RWQlcgzCz8pk3D449NjUrbbZZeirayaFqOEGYWXm8+mp6Kvqyy+CnP4W//Q3WXjvvqKwV3MRkZh3vvvvS6D0LFsAdd8C+++YdkbWBaxBm1nEWLkxDge65J/Ttm56KdnKoWq5BmFnHmDkz1RoeeAAOOwwuvRSWWy7vqKwdnCDMrP2eeCI9FT1rFowcmcZxsKrnJiYza7sIuOiidDF6mWVSonByqBllTRCSdpc0SdJkSacUWS5JF2fLn5e0ecGyqZJekPSspIZyxmlmbfDRR2lQnxNPTNccJkxIt7JazShbE5OkOuBSYFdgOjBe0piIeKlgtT2A9bLXYODy7L3JThExq1wxmlkbTZwI3/42TJ4Mw4fDz37mvpRqUDlrEIOAyRExJSLmAzcB+yy2zj7AHyJ5ElhZUu8yxmRm7XX99TBoEHzwATz4IJx8spNDjSpngugDTCuYnp7NK3WdAO6XNEHSsOYOImmYpAZJDTNnzuyAsM2sqLlz4Qc/SOM21NenjvZ23DHvqKyMypkgiv2kiFass21EbE5qhjpW0g7FDhIRIyKiPiLqe/Xq1fZozax5U6fCdtvBFVekGsODD6bR36ymlfM21+nAOgXTfYEZpa4TEU3vb0u6k9Rk9feyRWtmxd17b6o1NDbCXXfBPou3FFutKmcNYjywnqSBkpYGDgDGLLbOGOD72d1MWwEfRMQbknpKWgFAUk/ga8CLZYzVzBa3cCH88pew117Qv3+6S8nJoUspWw0iIhZIOg4YC9QB10TEREnHZMuvAP4M7AlMBuYAh2WbrwncqXThqztwQ0TcV65YzWwxb78NBx6YOtg78ki4+GI/Fd0FKWLxywLVq76+Phoa/MiEWbs8/jjsvz+8+y5cfjkcemjeEVkZSZoQEfXFlvlJajNLIuCCC2DIEOjRA5580smhi3NfTGYGH34Ihx8Ot9+eel+99lpYaaW8o7KcuQZh1tW98EJ6ruGuu+Dcc1OScHIwXIMw69pGjUoPv628Mjz0EGy/fd4RWQVxDcKsK5o7F4YNS9cYBg9OT0U7OdhinCDMuppXXoFtt4WrroJTT00D/Ky1Vt5RWQVygjCrQaNHw4AB0K1beh89Olvwpz/B5pvDlCnp85lnQne3NFtxThBmNWb06NR69Oqr6c7VV1+FHxy1gInfOBW+8Q1Yd93UpLTXXnmHahXOCcKsxpx2GsyZs2h6Td7k7k92ZcM/nZUyx+OPw8CB+QVoVcMJwqzGvPbaos/b8SjPsDmDeYpDGAVXXgnLLptfcFZV3PhoVkvefJOjVn+CL84cxzaMYzBPMYV12Y2xfNT/q3lHZ1XGCcKsWi1YAC++COPGLXq98gpXAvNYmgbqOZuTOYtTWNhjRUackXfAVm2cIMyqxXvvpf6RmpLB00/D7NlpWe/esM02cNxxsM023PHyZpx6+jK89hr06wdnnAFDh+YbvlUfJwizStTYCP/616Jk8MQT8NJLaVldHWyySXrIbZtt0qtfv8+MC33gVnDgoblEbjXECcKsEsyeDePHL0oITz6ZutsGWHVV2HrrVAXYZhvYckvo2TPfeK1LcIIw62xNDycUXjt4/vk0ghvABhvAt76VksHWW8P666cn3sw6mROEWbnNm5ceTCtsLnrjjbRs+eVTX0i/+EVKBlttBauskm+8ZhknCLOO9sYbKQk0JYOGBpg/Py1bd13YeedFtYONNnJXF1ax/C/TrD0WLEjjKTQlg+xWUwCWWSaNs3DCCSkZbL21O8WzquIEYdYa77676FbTJ56Ap56Cjz9Oy3r3Tr2kZreastlmKUmYVSknCLPmNDbCpEmfrR38859pWV0dbLopHHZYs7eamlU7JwizJrNnp4fPCq8fvPdeWtZ0q+lBB/lWU+synCCsa4qAqVMXJYNx4+C551KtAWDDDeHb315UO1h/fdcOrMtxgrDat2ABd1z2JjecOZUBbz3Fzj3GMWSpcSz3wZtpedOtpqedlpLB4MG+1dQMJwirZhHwwQfw+uuLXjNmfHb69deJt97iWxF8K9vsP3PW5a66XfjCodsw6MRt0q2mdXW5FsWsEjlBWGWaPz89T1DkhP+ZRFA4Mk6T1VaDPn3Sa9NNufi2Pkz8oA/T6csEtuBt1oSF0P8hmHpt5xfNrFo4QVjnikgXfps74Te93n7789suswysvXY68W++Oey996JE0DR/7bU/NyDOSddAFAmlcGAdM/s8J4guaPTo1Nze4V1Bz5u36ETf0i//uXM/v22vXotO9vX1iz4XvlZdtU0Xivv1S10fFZtvZs0ra4KQtDtwEVAHjIyIsxZbrmz5nsAc4NCIeKaUbTtK2U6WFappQPumlplXX03T0EK5I2DWrOIn/cJ5s2Z9fttll110gh88+LMn/KZf/b17l/WBsjPO+GyZAXr0SPPNrHllSxCS6oBLgV2B6cB4SWMi4qWC1fYA1n6bquYAAAhESURBVMteg4HLgcElbttuTSfLLeb8nf4Eja9249ojxOr/6sZue3RLPWhK6b1bG6fbuo+mVwdbfED7ZfmEtebM4O6fvM7Qbs388p8xY1FfQk0kWGONdILv1y91MlfsV//KK+d+e2hT4utKPwTMOoIiirXOdsCOpa2B0yNit2z6VICI+F3BOlcCD0fEjdn0JGAIMGBJ2xZTX18fDQ0NJcc4YED6Bf0xPejBJyVv12maEkd7k1TB9KTJ3QjSCbsXM1mNdz9/3B49ip/sF//Vv9RSnfyFmFlHkzQhIuqLLStnE1MfYFrB9HRSLWFJ6/QpcVsAJA0DhgH0a2WjctNFyt25jzoW0o3G7BWM/UtjemiqsTE1sTR24HQ59lni9KQZjXwypxERzGJ1XqcPr9OHBWv04fqHsiSw4oq5/+o3s/yVM0EUO8MsXl1pbp1Stk0zI0YAIyDVIFoTYNPFy0fZ4TPz+/cHdm/NnqrHR6OLt8ePOB/YILewzKwClXOYqunAOgXTfYEZJa5TyrbtdsYZ6eRYqNYvXg4dCiNGpCQopfcRI9web2afV84EMR5YT9JASUsDBwBjFltnDPB9JVsBH0TEGyVu225d9WQ5dGjqhqixMb3XennNrG3K1sQUEQskHQeMJd2qek1ETJR0TLb8CuDPpFtcJ5Nucz2spW3LEefQoT5BmpkVU7a7mPLQ2ruYzMy6upbuYipnE5OZmVUxJwgzMyvKCcLMzIpygjAzs6Jq6iK1pJlAkX47S7I6UKS3uZrmMte+rlZecJlbq39E9Cq2oKYSRHtIamjuSn6tcplrX1crL7jMHclNTGZmVpQThJmZFeUEsciIvAPIgctc+7paecFl7jC+BmFmZkW5BmFmZkU5QZiZWVFdPkFI2l3SJEmTJZ2SdzzlIGkdSQ9J+qekiZJOyOavKukBSf/O3lfJO9aOJqlO0j8k3ZNN13SZJa0s6TZJL2d/7627QJlPyv5dvyjpRknL1lqZJV0j6W1JLxbMa7aMkk7NzmmTJO3W1uN26QQhqQ64FNiDNJ7agZJqcVy1BcBPIuIrwFbAsVk5TwEejIj1gAez6VpzAvDPgulaL/NFwH0R8WVgE1LZa7bMkvoAxwP1EbERaXiAA6i9Ml/H58e5LFrG7P/2AcCG2TaXZee6VuvSCQIYBEyOiCkRMR+4Cdgn55g6XES8ERHPZJ8/Ip00+pDKOipbbRTwzXwiLA9JfYGvAyMLZtdsmSWtCOwAXA0QEfMj4n1quMyZ7sBykroDPUijT9ZUmSPi78C7i81uroz7ADdFxLyIeIU03s6gthy3qyeIPsC0gunp2byaJWkAsBnwFLBmNoIf2fsa+UVWFhcCJwONBfNquczrAjOBa7NmtZGSelLDZY6I14FzgdeAN0ijUt5PDZe5QHNl7LDzWldPECoyr2bv+5W0PHA7cGJEfJh3POUkaS/g7YiYkHcsnag7sDlweURsBnxM9TettChrd98HGAisDfSUdFC+UeWuw85rXT1BTAfWKZjuS6qe1hxJS5GSw+iIuCOb/Zak3tny3sDbecVXBtsC35A0ldR0+D+Srqe2yzwdmB4RT2XTt5ESRi2XeRfglYiYGRGfAncA21DbZW7SXBk77LzW1RPEeGA9SQMlLU26sDMm55g6nCSR2qX/GRHnFywaAxySfT4EuLuzYyuXiDg1IvpGxADS3/VvEXEQtV3mN4Fpkr6UzdoZeIkaLjOpaWkrST2yf+c7k66x1XKZmzRXxjHAAZKWkTQQWA94uk1HiIgu/QL2BP4F/Ac4Le94ylTG7UhVzOeBZ7PXnsBqpLsf/p29r5p3rGUq/xDgnuxzTZcZ2BRoyP7WdwGrdIEy/xp4GXgR+COwTK2VGbiRdI3lU1IN4YiWygiclp3TJgF7tPW47mrDzMyK6upNTGZm1gwnCDMzK8oJwszMinKCMDOzopwgzMysqO55B2DWGSQ13RIIsBawkNQtBcCgSH1xVQRJQ4D5ETEu71isa3OCsC4hIt4hPSOApNOB2RFxbl7xSOoeEQuaWTwEmA2UnCAk1UXEwo6IzayJm5isy5K0haRHJE2QNLag24KHJV0g6e/ZmApbSroj63f/t9k6A7IxF0ZJej4bg6FHCfs9U9IjwAmS9pb0VNax3l8lrZl1pngMcJKkZyVtL+k6SfsVxD07ex+iNM7HDcALSmNfnCNpfBbT0Z35fVrtcYKwrkrAJcB+EbEFcA1wRsHy+RGxA3AFqQuDY4GNgEOz5iqALwEjImJj4EPgh1mfVy3td+WI2DEizgMeA7aK1LHeTcDJETE1O+YFEbFpRDy6hHIMIvUAsAHp6doPImJLYEvgqKyrBbM2cROTdVXLkE74D6QufKgjdWXQpKlPrheAiZF1qyxpCqkjtPeBaRHxeLbe9aSBa+5bwn5vLvjcF7g5q2EsDbzShnI8HanPf4CvARsX1DZWIvXD05b9mjlBWJcl0ol/62aWz8veGws+N003/b9ZvJ+aKGG/Hxd8vgQ4PyLGZBemT29mmwVktf2sQ7qlm9mfgB9FxNhm9mPWKm5isq5qHtBL0taQukOXtGEr99GvaXvgQFKT0aRW7Hcl4PXs8yEF8z8CViiYngpskX3eB1iqmf2NBX6QNXMhaf1swCCzNnGCsK6qEdgPGC7pOVIPt9u0ch//BA6R9DywKmmgnvmt2O/pwK2SHgVmFcz/E7Bv00Vq4CpgR0lPA4P5bK2h0EhS997PKA1ufyVuJbB2cG+uZm2Q3W10T0RslHMoZmXjGoSZmRXlGoSZmRXlGoSZmRXlBGFmZkU5QZiZWVFOEGZmVpQThJmZFfX/AUBPu4zpZN8GAAAAAElFTkSuQmCC\n",
259 |       "text/plain": [
260 |        "<Figure size 432x288 with 1 Axes>"
261 |       ]
262 |      },
263 |      "metadata": {
264 |       "needs_background": "light"
265 |      },
266 |      "output_type": "display_data"
267 |     }
268 |    ],
269 |    "source": [
270 |     "# Visualising the Polynomial Regression results \n",
271 |     "plt.scatter(X, y, color = 'blue') \n",
272 |     "  \n",
273 |     "plt.plot(X, lin2.predict(poly.fit_transform(X)), color = 'red') \n",
274 |     "plt.title('Polynomial Regression') \n",
275 |     "plt.xlabel('Temperature') \n",
276 |     "plt.ylabel('Pressure') \n",
277 |     "  \n",
278 |     "plt.show() "
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "markdown",
283 |    "metadata": {},
284 |    "source": [
285 |     "### Prediction\n",
286 |     "- Predicting new result with both Linear and Polynomial Regression"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": 13,
292 |    "metadata": {},
293 |    "outputs": [
294 |     {
295 |      "data": {
296 |       "text/plain": [
297 |        "array([0.20675333])"
298 |       ]
299 |      },
300 |      "execution_count": 13,
301 |      "metadata": {},
302 |      "output_type": "execute_result"
303 |     }
304 |    ],
305 |    "source": [
306 |     "# Predicting a new result with Linear Regression \n",
307 |     "lin.predict([[110.0]])"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": 14,
313 |    "metadata": {},
314 |    "outputs": [
315 |     {
316 |      "data": {
317 |       "text/plain": [
318 |        "array([0.43295877])"
319 |       ]
320 |      },
321 |      "execution_count": 14,
322 |      "metadata": {},
323 |      "output_type": "execute_result"
324 |     }
325 |    ],
326 |    "source": [
327 |     "# Predicting a new result with Polynomial Regression \n",
328 |     "lin2.predict(poly.fit_transform([[110.0]]))"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "markdown",
333 |    "metadata": {},
334 |    "source": [
335 |     "- [Reference](https://www.geeksforgeeks.org/python-implementation-of-polynomial-regression/)\n",
336 |     "\n",
337 |     "### More examples\n",
338 |     "\n",
339 |     "- [link1](https://www.geeksforgeeks.org/polynomial-regression-for-non-linear-data-ml/?ref=rp)\n",
340 |     "- [link2](https://towardsdatascience.com/machine-learning-polynomial-regression-with-python-5328e4e8a386)\n",
341 |     "- [link3](https://medium.com/kharpann/performing-polynomial-regression-using-python-840eb666bfd8)\n",
342 |     "- [link4](https://towardsdatascience.com/introduction-to-linear-regression-and-polynomial-regression-f8adc96f31cb)\n",
343 |     "- [link5](https://github.com/rickwierenga/MLFundamentals/blob/master/1_Polynomial_Regression.ipynb)"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": null,
349 |    "metadata": {},
350 |    "outputs": [],
351 |    "source": []
352 |   }
353 |  ],
354 |  "metadata": {
355 |   "kernelspec": {
356 |    "display_name": "Python 3",
357 |    "language": "python",
358 |    "name": "python3"
359 |   },
360 |   "language_info": {
361 |    "codemirror_mode": {
362 |     "name": "ipython",
363 |     "version": 3
364 |    },
365 |    "file_extension": ".py",
366 |    "mimetype": "text/x-python",
367 |    "name": "python",
368 |    "nbconvert_exporter": "python",
369 |    "pygments_lexer": "ipython3",
370 |    "version": "3.7.4"
371 |   }
372 |  },
373 |  "nbformat": 4,
374 |  "nbformat_minor": 2
375 | }
376 | 


--------------------------------------------------------------------------------
/Python Cheat Sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/Python Cheat Sheet.pdf


--------------------------------------------------------------------------------
/Python Functions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Functions\n",
  8 |     "\n",
  9 |     "- A function is a block of reusable code that is used to perform a specific action\n",
 10 |     "- It helps in eliminating the need to write the same code over and over again\n",
 11 |     "- Built-in functions | User defined functions\n",
 12 |     "#### General syntax:\n",
 13 |     "  - def function_name(parameter list/argument):\n",
 14 |     "       - statements, i.e. the function body\n",
 15 |     "       \n",
 16 |     "- Refer this link for details of built in functions. https://www.w3schools.com/python/python_ref_functions.asp"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 1,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "name": "stdout",
 26 |      "output_type": "stream",
 27 |      "text": [
 28 |       "Welcome to Small Group Coaching Session\n"
 29 |      ]
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "def my_function():\n",
 34 |     "    print(\"Welcome to Small Group Coaching Session\")\n",
 35 |     "\n",
 36 |     "my_function()"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 2,
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "name": "stdout",
 46 |      "output_type": "stream",
 47 |      "text": [
 48 |       "Hello Learners\n",
 49 |       "Hello Everyone\n"
 50 |      ]
 51 |     }
 52 |    ],
 53 |    "source": [
 54 |     "# Passing Arguments\n",
 55 |     "def my_function(name):\n",
 56 |     "    print(\"Hello \" + name)\n",
 57 |     "\n",
 58 |     "my_function(\"Learners\")\n",
 59 |     "my_function(\"Everyone\")\n",
 60 |     "\n",
 61 |     "# Always remember to pass all the arguments that was defined in the function while calling it"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 3,
 67 |    "metadata": {},
 68 |    "outputs": [
 69 |     {
 70 |      "data": {
 71 |       "text/plain": [
 72 |        "15"
 73 |       ]
 74 |      },
 75 |      "execution_count": 3,
 76 |      "metadata": {},
 77 |      "output_type": "execute_result"
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "# Return values\n",
 82 |     "\n",
 83 |     "def add_10(x):\n",
 84 |     "    return 10 + x\n",
 85 |     "\n",
 86 |     "add_10(5)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 4,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "name": "stdout",
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "Welcome Harry\n",
 99 |       "Welcome Rachel\n",
100 |       "Welcome Linda\n"
101 |      ]
102 |     }
103 |    ],
104 |    "source": [
105 |     "# Arbitary arguments\n",
106 |     "# when you do not know how many numbers of arguments are to be passed into a function\n",
107 |     "\n",
108 |     "def greet(*names):\n",
109 |     "    for name in names:\n",
110 |     "        print(\"Welcome\",name)\n",
111 |     "\n",
112 |     "greet(\"Harry\",\"Rachel\",\"Linda\") #argument is a tuple"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 5,
118 |    "metadata": {},
119 |    "outputs": [
120 |     {
121 |      "name": "stdout",
122 |      "output_type": "stream",
123 |      "text": [
124 |       "Dog is a pet\n",
125 |       "Cat is a pet\n"
126 |      ]
127 |     }
128 |    ],
129 |    "source": [
130 |     "# Default Arguments\n",
131 |     "\n",
132 |     "def default_arg(disease = \"Dog\"):\n",
133 |     "    print(disease + \" is a pet\")\n",
134 |     "\n",
135 |     "default_arg()\n",
136 |     "default_arg(\"Cat\")"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "#### Anonymous functions - Lambda\n",
144 |     "\n",
145 |     "- Functions that are not declared i.e. without the def keyword are called anonymous functions\n",
146 |     "- The keyword for anonymous function is Lamba.\n",
147 |     "- Lambda can take any number of arguments and returns one value.\n",
148 |     "- It can be directly printed\n",
149 |     "\n",
150 |     "##### Syntax of lambda Functions\n",
151 |     "- lambda [argument1, argument2, argument3....]:expression"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 6,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "name": "stdout",
161 |      "output_type": "stream",
162 |      "text": [
163 |       "9\n"
164 |      ]
165 |     }
166 |    ],
167 |    "source": [
168 |     "#Anonymous functions - lambda\n",
169 |     "\n",
170 |     "square = lambda x: x*x\n",
171 |     "print(square(3))"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 7,
177 |    "metadata": {},
178 |    "outputs": [
179 |     {
180 |      "data": {
181 |       "text/plain": [
182 |        "[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]"
183 |       ]
184 |      },
185 |      "execution_count": 7,
186 |      "metadata": {},
187 |      "output_type": "execute_result"
188 |     }
189 |    ],
190 |    "source": [
191 |     "squares = list(map(lambda x: x**2, range(10)))\n",
192 |     "squares"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 8,
198 |    "metadata": {},
199 |    "outputs": [
200 |     {
201 |      "data": {
202 |       "text/plain": [
203 |        "[16, 4, 4, 9]"
204 |       ]
205 |      },
206 |      "execution_count": 8,
207 |      "metadata": {},
208 |      "output_type": "execute_result"
209 |     }
210 |    ],
211 |    "source": [
212 |     "vec = [4, 2, -2, 3]\n",
213 |     "squares_1 = list(map(lambda x: x**2, vec))\n",
214 |     "squares_1 "
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "markdown",
219 |    "metadata": {},
220 |    "source": [
221 |     "#### Recursive functions\n",
222 |     "\n",
223 |     "- A python function can call other functions. It can even call itself. These type of construct are called as recursive functions in python\n",
224 |     "- Example-1: Finding the factorial of an integer\n",
225 |     "- Example-2: Finding the fibonacci series\n",
226 |     "  - Factorial of a number is the product of all the integers from 1 to that number.Say factorial of 4 is 4! = 1*2*3*4 = 24\n",
227 |     "  - The Fibonacci series is the series of numbers starting with 0. The subsequent number is found by adding up the two numbers before it. 0, 1, 1, 2, 3, 5, 8, 13, 21, 34, ..."
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": 9,
233 |    "metadata": {},
234 |    "outputs": [
235 |     {
236 |      "name": "stdout",
237 |      "output_type": "stream",
238 |      "text": [
239 |       "Factorial of  4 is 24\n"
240 |      ]
241 |     }
242 |    ],
243 |    "source": [
244 |     "# Factorial\n",
245 |     "def find_factorial(x):\n",
246 |     "    if x == 1:\n",
247 |     "        return 1\n",
248 |     "    else:\n",
249 |     "        return (x * find_factorial(x-1))\n",
250 |     "\n",
251 |     "\n",
252 |     "a = 4\n",
253 |     "print(\"Factorial of \", a, \"is\", find_factorial(a))"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": 10,
259 |    "metadata": {},
260 |    "outputs": [
261 |     {
262 |      "name": "stdout",
263 |      "output_type": "stream",
264 |      "text": [
265 |       "Recursion Results\n",
266 |       "1\n",
267 |       "3\n",
268 |       "6\n",
269 |       "10\n",
270 |       "15\n"
271 |      ]
272 |     },
273 |     {
274 |      "data": {
275 |       "text/plain": [
276 |        "15"
277 |       ]
278 |      },
279 |      "execution_count": 10,
280 |      "metadata": {},
281 |      "output_type": "execute_result"
282 |     }
283 |    ],
284 |    "source": [
285 |     "#  Fibonacci series\n",
286 |     "def recur(k):\n",
287 |     "    if(k > 0):\n",
288 |     "        result = k + recur(k-1)\n",
289 |     "        print(result)\n",
290 |     "    else:\n",
291 |     "        result = 0\n",
292 |     "    return result\n",
293 |     "\n",
294 |     "print(\"Recursion Results\")\n",
295 |     "recur(5)"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "markdown",
300 |    "metadata": {},
301 |    "source": [
302 |     "##### Note:\n",
303 |     "- If the base case of recursion is not defined properly then the code would run indefinitely\n",
304 |     "- Debugging is difficult as the function calls iteself in a loop\n",
305 |     "- Reference: https://beginnersbook.com/2018/02/python-recursion/"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": 11,
311 |    "metadata": {},
312 |    "outputs": [
313 |     {
314 |      "name": "stdout",
315 |      "output_type": "stream",
316 |      "text": [
317 |       "please enter value for key no 0 :1234567890\n",
318 |       "please enter value for key no 1 :23\n",
319 |       "please enter value for key no 2 :2\n",
320 |       "please enter value for key no 3 :234\n",
321 |       "please enter value for key no 4 :1234\n",
322 |       "please enter value for key no 5 :1236\n",
323 |       "please enter value for key no 6 :677\n",
324 |       "please enter value for key no 7 :78\n",
325 |       "please enter value for key no 8 :89\n",
326 |       "please enter value for key no 9 :67\n",
327 |       "{'key no 0': '1234567890', 'key no 1': '23', 'key no 2': '2', 'key no 3': '234', 'key no 4': '1234', 'key no 5': '1236', 'key no 6': '677', 'key no 7': '78', 'key no 8': '89', 'key no 9': '67'}\n"
328 |      ]
329 |     }
330 |    ],
331 |    "source": [
332 |     "# Extras: Restricting the value entry in a dictionary\n",
333 |     "# Challenge: Find the bug with this code and post in comments\n",
334 |     "\n",
335 |     "dict1={}\n",
336 |     "key='key no '\n",
337 |     "i=0\n",
338 |     "while i<10:\n",
339 |     "    try:\n",
340 |     "        value=input('please enter value for '+key+str(i)+' :')\n",
341 |     "        if len(value)<=10 and value.isnumeric()==True:\n",
342 |     "            dict1[key+str(i)]=value\n",
343 |     "            i+=1\n",
344 |     "    except:\n",
345 |     "        print('wrong value try again')\n",
346 |     "        i-=1\n",
347 |     "print(dict1)"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 12,
353 |    "metadata": {},
354 |    "outputs": [
355 |     {
356 |      "name": "stdout",
357 |      "output_type": "stream",
358 |      "text": [
359 |       "13\n"
360 |      ]
361 |     }
362 |    ],
363 |    "source": [
364 |     "# Extras: function to print the nth value of fibonacci series\n",
365 |     "def Fibonacci(n):\n",
366 |     "    if n<0:\n",
367 |     "        print(\"Incorrect input\")\n",
368 |     "    elif n==1:\n",
369 |     "        return 0\n",
370 |     "    elif n==2:\n",
371 |     "        return 1\n",
372 |     "    else:\n",
373 |     "        return Fibonacci(n-1)+Fibonacci(n-2) \n",
374 |     "\n",
375 |     "print(Fibonacci(8))"
376 |    ]
377 |   }
378 |  ],
379 |  "metadata": {
380 |   "kernelspec": {
381 |    "display_name": "Python 3",
382 |    "language": "python",
383 |    "name": "python3"
384 |   },
385 |   "language_info": {
386 |    "codemirror_mode": {
387 |     "name": "ipython",
388 |     "version": 3
389 |    },
390 |    "file_extension": ".py",
391 |    "mimetype": "text/x-python",
392 |    "name": "python",
393 |    "nbconvert_exporter": "python",
394 |    "pygments_lexer": "ipython3",
395 |    "version": "3.7.4"
396 |   }
397 |  },
398 |  "nbformat": 4,
399 |  "nbformat_minor": 2
400 | }
401 | 


--------------------------------------------------------------------------------
/Python Loops.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Python Loops\n",
  8 |     "\n",
  9 |     "### While Loop\n",
 10 |     " - While loop is used to execute a set of statements repeatedly until a given a condition is satisfied. When the condition becomes false, the code immediately after the loop is executed"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "# Simple program to illustrate while loop\n",
 20 |     "\n",
 21 |     "i = 0\n",
 22 |     "while (i < 4):     \n",
 23 |     "    i = i + 1\n",
 24 |     "    print(\"Good Luck Learning Python Loops!\")"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "# Print i as long as i is less than 5:\n",
 34 |     "\n",
 35 |     "i = 1\n",
 36 |     "while i < 5:\n",
 37 |     "    print(i)\n",
 38 |     "    i += 1"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "### While + Break\n",
 46 |     " - Including break statement in while loop can stop the loop even if the while condition is true"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 3,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "# Exit the loop when i is 4:\n",
 56 |     "\n",
 57 |     "i = 1\n",
 58 |     "while i < 5:\n",
 59 |     "    print(i)\n",
 60 |     "    if i == 4:\n",
 61 |     "        break\n",
 62 |     "        #i += 1, try this code by removing the # to learn the importance of break statement\n",
 63 |     "        # and falling into a trap of never ending loop \n",
 64 |     "    i += 1"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "### While + Continue\n",
 72 |     "- Including continue statement with while loop can stop the current iteration and continue with the next"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 4,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "name": "stdout",
 82 |      "output_type": "stream",
 83 |      "text": [
 84 |       "2\n",
 85 |       "3\n",
 86 |       "5\n"
 87 |      ]
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "# Continue to the next iteration if i is 4:\n",
 92 |     "i = 1\n",
 93 |     "while i < 5:\n",
 94 |     "    i += 1\n",
 95 |     "    if i == 4:\n",
 96 |     "        continue\n",
 97 |     "    print(i)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "### While + Else\n",
105 |     "- The else clause is executed only when your while condition becomes false."
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 5,
111 |    "metadata": {},
112 |    "outputs": [
113 |     {
114 |      "name": "stdout",
115 |      "output_type": "stream",
116 |      "text": [
117 |       "Good Luck Learning Python Loops!\n",
118 |       "Good Luck Learning Python Loops!\n",
119 |       "Good Luck Learning Python Loops!\n",
120 |       "Good Luck Learning Python Loops!\n",
121 |       "Good Luck Learning Python Loops!\n",
122 |       "Keep it up!\n"
123 |      ]
124 |     }
125 |    ],
126 |    "source": [
127 |     "i = 0\n",
128 |     "while (i < 5):     \n",
129 |     "    i = i + 1\n",
130 |     "    print(\"Good Luck Learning Python Loops!\") \n",
131 |     "else: \n",
132 |     "    print(\"Keep it up!\")"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 6,
138 |    "metadata": {},
139 |    "outputs": [
140 |     {
141 |      "name": "stdout",
142 |      "output_type": "stream",
143 |      "text": [
144 |       "1\n",
145 |       "2\n",
146 |       "3\n",
147 |       "4\n",
148 |       "i is not less than 5\n"
149 |      ]
150 |     }
151 |    ],
152 |    "source": [
153 |     "# Print a message once the condition is false:\n",
154 |     "i = 1\n",
155 |     "while i < 5:\n",
156 |     "    print(i)\n",
157 |     "    i += 1\n",
158 |     "else:\n",
159 |     "    print(\"i is not less than 5\")"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "### For\n",
167 |     "- For loop is used for iterating over a sequence (i.e. either a list, a tuple, a dictionary, a set, or a string).With for loop we can execute a set of statements, once for each item in a list, tuple, set etc.\n",
168 |     "- For loop does not require an indexing variable to set beforehand"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 7,
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "name": "stdout",
178 |      "output_type": "stream",
179 |      "text": [
180 |       "List Iteration:\n",
181 |       "1. Good\n",
182 |       "2. Better\n",
183 |       "3. Best\n"
184 |      ]
185 |     }
186 |    ],
187 |    "source": [
188 |     "# List Iteration\n",
189 |     "print(\"List Iteration:\") \n",
190 |     "a = [\"1. Good\", \"2. Better\", \"3. Best\"] \n",
191 |     "for i in a: \n",
192 |     "    print(i)"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 8,
198 |    "metadata": {},
199 |    "outputs": [
200 |     {
201 |      "name": "stdout",
202 |      "output_type": "stream",
203 |      "text": [
204 |       "Tuple Iteration:\n",
205 |       "1. Good\n",
206 |       "2. Better\n",
207 |       "3. Best\n"
208 |      ]
209 |     }
210 |    ],
211 |    "source": [
212 |     "# Tuple Iteration\n",
213 |     "print(\"Tuple Iteration:\") \n",
214 |     "a = (\"1. Good\", \"2. Better\", \"3. Best\")\n",
215 |     "for i in a:\n",
216 |     "    print(i)"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 9,
222 |    "metadata": {},
223 |    "outputs": [
224 |     {
225 |      "name": "stdout",
226 |      "output_type": "stream",
227 |      "text": [
228 |       "String Iteration:\n",
229 |       "G\n",
230 |       "o\n",
231 |       "o\n",
232 |       "d\n"
233 |      ]
234 |     }
235 |    ],
236 |    "source": [
237 |     "# String Iteration\n",
238 |     "print(\"String Iteration:\") \n",
239 |     "a = \"Good\"\n",
240 |     "for i in a:\n",
241 |     "    print(i)"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": 10,
247 |    "metadata": {},
248 |    "outputs": [
249 |     {
250 |      "name": "stdout",
251 |      "output_type": "stream",
252 |      "text": [
253 |       "Dictionary Iteration:\n",
254 |       "Good  1\n",
255 |       "Better  2\n",
256 |       "Best  3\n"
257 |      ]
258 |     }
259 |    ],
260 |    "source": [
261 |     "# Dictionary Iteration\n",
262 |     "print(\"Dictionary Iteration:\")    \n",
263 |     "a = dict()  \n",
264 |     "a['Good'] = 1\n",
265 |     "a['Better'] = 2\n",
266 |     "a['Best'] = 3\n",
267 |     "for i in a : \n",
268 |     "    print(\"%s  %d\" %(i, a[i]))\n",
269 |     "# Refer think link to know more about %s & %d\n",
270 |     "# https://stackoverflow.com/questions/4288973/whats-the-difference-between-s-and-d-in-python-string-formatting"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "markdown",
275 |    "metadata": {},
276 |    "source": [
277 |     "### For + Break\n",
278 |     "- Including break statement in for loop can stop the loop before it has looped through all the items"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": 11,
284 |    "metadata": {},
285 |    "outputs": [
286 |     {
287 |      "name": "stdout",
288 |      "output_type": "stream",
289 |      "text": [
290 |       "yellow\n",
291 |       "orange\n"
292 |      ]
293 |     }
294 |    ],
295 |    "source": [
296 |     "# Exit the loop when x is \"orange\"\n",
297 |     "color = [\"yellow\", \"orange\", \"red\"]\n",
298 |     "for x in color:\n",
299 |     "    print(x)\n",
300 |     "    if x == \"orange\":\n",
301 |     "        break"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "metadata": {},
307 |    "source": [
308 |     "### For + Continue\n",
309 |     "- Including continue statement in for loop can stop the current iteration of the loop, and continue with the next"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": 12,
315 |    "metadata": {},
316 |    "outputs": [
317 |     {
318 |      "name": "stdout",
319 |      "output_type": "stream",
320 |      "text": [
321 |       "yellow\n",
322 |       "red\n"
323 |      ]
324 |     }
325 |    ],
326 |    "source": [
327 |     "# Do not print orange\n",
328 |     "color = [\"yellow\", \"orange\", \"red\"]\n",
329 |     "for x in color:\n",
330 |     "    if x == \"orange\":\n",
331 |     "        continue\n",
332 |     "    print(x)"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "markdown",
337 |    "metadata": {},
338 |    "source": [
339 |     "### For + Range\n",
340 |     "- range() function can be used in for loop to loop through a set of code a specified number of times\n",
341 |     "- range() function defaults to 0 as a starting value\n",
342 |     "- However it is possible to specify the starting value by adding a parameter\n",
343 |     "- range(1, 5) which means values from 1 to 5 but not including 5\n",
344 |     "- range() function defaults to increment the sequence by 1\n",
345 |     "- However it is possible to specify the increment value by adding a third parameter - Ex: range(2, 20, 2)"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": 13,
351 |    "metadata": {},
352 |    "outputs": [
353 |     {
354 |      "name": "stdout",
355 |      "output_type": "stream",
356 |      "text": [
357 |       "0\n",
358 |       "1\n",
359 |       "2\n",
360 |       "3\n",
361 |       "4\n"
362 |      ]
363 |     }
364 |    ],
365 |    "source": [
366 |     "# print numbers within specified range\n",
367 |     "for x in range(5):\n",
368 |     "    print(x)"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": 14,
374 |    "metadata": {},
375 |    "outputs": [
376 |     {
377 |      "name": "stdout",
378 |      "output_type": "stream",
379 |      "text": [
380 |       "1\n",
381 |       "2\n",
382 |       "3\n",
383 |       "4\n"
384 |      ]
385 |     }
386 |    ],
387 |    "source": [
388 |     "# specify the starting value\n",
389 |     "for x in range(1, 5):\n",
390 |     "    print(x)"
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "code",
395 |    "execution_count": 15,
396 |    "metadata": {},
397 |    "outputs": [
398 |     {
399 |      "name": "stdout",
400 |      "output_type": "stream",
401 |      "text": [
402 |       "2\n",
403 |       "4\n",
404 |       "6\n",
405 |       "8\n",
406 |       "10\n",
407 |       "12\n",
408 |       "14\n",
409 |       "16\n",
410 |       "18\n"
411 |      ]
412 |     }
413 |    ],
414 |    "source": [
415 |     "# specify the increment value\n",
416 |     "for x in range(2, 20, 2):\n",
417 |     "    print(x)"
418 |    ]
419 |   },
420 |   {
421 |    "cell_type": "markdown",
422 |    "metadata": {},
423 |    "source": [
424 |     "### For + Else\n",
425 |     "- Including else in a for loop specifies a set of code to be executed when the loop is finished"
426 |    ]
427 |   },
428 |   {
429 |    "cell_type": "code",
430 |    "execution_count": 16,
431 |    "metadata": {},
432 |    "outputs": [
433 |     {
434 |      "name": "stdout",
435 |      "output_type": "stream",
436 |      "text": [
437 |       "0\n",
438 |       "1\n",
439 |       "2\n",
440 |       "3\n",
441 |       "4\n",
442 |       "Going good so far\n"
443 |      ]
444 |     }
445 |    ],
446 |    "source": [
447 |     "# Print all numbers from 0 to 4 and print a message when the loop has ended:\n",
448 |     "for x in range(5):\n",
449 |     "    print(x)\n",
450 |     "else:\n",
451 |     "    print(\"Going good so far\")"
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "markdown",
456 |    "metadata": {},
457 |    "source": [
458 |     "### Nested Loop\n",
459 |     "- A loop inside a loop is called nested loop\n",
460 |     "- The inner loop will be executed one time for each iteration of the outer loop"
461 |    ]
462 |   },
463 |   {
464 |    "cell_type": "code",
465 |    "execution_count": 17,
466 |    "metadata": {},
467 |    "outputs": [
468 |     {
469 |      "name": "stdout",
470 |      "output_type": "stream",
471 |      "text": [
472 |       "red sedan\n",
473 |       "red hatchback\n",
474 |       "red wagon\n",
475 |       "blue sedan\n",
476 |       "blue hatchback\n",
477 |       "blue wagon\n",
478 |       "white sedan\n",
479 |       "white hatchback\n",
480 |       "white wagon\n"
481 |      ]
482 |     }
483 |    ],
484 |    "source": [
485 |     "color = [\"red\", \"blue\", \"white\"]\n",
486 |     "car = [\"sedan\", \"hatchback\", \"wagon\"]\n",
487 |     "\n",
488 |     "for x in color:\n",
489 |     "    for y in car:\n",
490 |     "        print(x, y)"
491 |    ]
492 |   },
493 |   {
494 |    "cell_type": "code",
495 |    "execution_count": 18,
496 |    "metadata": {},
497 |    "outputs": [
498 |     {
499 |      "name": "stdout",
500 |      "output_type": "stream",
501 |      "text": [
502 |       "1 \n",
503 |       "2 2 \n",
504 |       "3 3 3 \n",
505 |       "4 4 4 4 \n"
506 |      ]
507 |     }
508 |    ],
509 |    "source": [
510 |     "# Nested For loop - Printing pattern using a range of numbers\n",
511 |     "for i in range(1, 5): \n",
512 |     "    for j in range(i): \n",
513 |     "         print(i, end=' ') \n",
514 |     "    print()"
515 |    ]
516 |   },
517 |   {
518 |    "cell_type": "code",
519 |    "execution_count": 19,
520 |    "metadata": {},
521 |    "outputs": [
522 |     {
523 |      "name": "stdout",
524 |      "output_type": "stream",
525 |      "text": [
526 |       "* \n",
527 |       "* * \n",
528 |       "* * * \n",
529 |       "* * * * \n",
530 |       "* * * * * \n",
531 |       "* * * * \n",
532 |       "* * * \n",
533 |       "* * \n",
534 |       "* \n"
535 |      ]
536 |     }
537 |    ],
538 |    "source": [
539 |     "# Nested For loop - Printing pattern\n",
540 |     "str1=''\n",
541 |     "for i in range(0,9):\n",
542 |     "    if i<5:\n",
543 |     "        str1 += '* '\n",
544 |     "        print(str1)\n",
545 |     "    elif i>4:\n",
546 |     "        str1 = str1[:-2]\n",
547 |     "        print(str1)"
548 |    ]
549 |   },
550 |   {
551 |    "cell_type": "code",
552 |    "execution_count": 20,
553 |    "metadata": {},
554 |    "outputs": [
555 |     {
556 |      "name": "stdout",
557 |      "output_type": "stream",
558 |      "text": [
559 |       "1 , 5\n",
560 |       "2 , 6\n",
561 |       "3 , 7\n"
562 |      ]
563 |     }
564 |    ],
565 |    "source": [
566 |     "# Nested while loop\n",
567 |     "i = 1\n",
568 |     "j = 5\n",
569 |     "while i < 4:\n",
570 |     "    while j < 8:\n",
571 |     "        print(i, \",\", j)\n",
572 |     "        j = j + 1\n",
573 |     "        i = i + 1"
574 |    ]
575 |   },
576 |   {
577 |    "cell_type": "markdown",
578 |    "metadata": {},
579 |    "source": [
580 |     "#### Extras\n",
581 |     "1. Iterating using index\n",
582 |     "   - Index of elements can be used to iterate. Findthe length of the list and then iterate within the range of length\n",
583 |     "2. Pass statement to skip empty for loop \n",
584 |     "   - For cannot be empty without statement. However using a pass statement can help skip the for loop. Pass is also used for empty control statement, function etc."
585 |    ]
586 |   },
587 |   {
588 |    "cell_type": "code",
589 |    "execution_count": 21,
590 |    "metadata": {},
591 |    "outputs": [
592 |     {
593 |      "name": "stdout",
594 |      "output_type": "stream",
595 |      "text": [
596 |       "Have\n",
597 |       "Fun\n",
598 |       "Learning\n"
599 |      ]
600 |     }
601 |    ],
602 |    "source": [
603 |     "# 1. Iterating using index\n",
604 |     "  \n",
605 |     "mylist = [\"Have\", \"Fun\", \"Learning\"] \n",
606 |     "for index in range(len(mylist)): \n",
607 |     "    print(mylist[index])"
608 |    ]
609 |   },
610 |   {
611 |    "cell_type": "code",
612 |    "execution_count": 22,
613 |    "metadata": {},
614 |    "outputs": [],
615 |    "source": [
616 |     "# 2. Pass statement in for loop\n",
617 |     "for x in [0, 2, 1]:\n",
618 |     "    pass"
619 |    ]
620 |   },
621 |   {
622 |    "cell_type": "code",
623 |    "execution_count": 23,
624 |    "metadata": {},
625 |    "outputs": [
626 |     {
627 |      "name": "stdout",
628 |      "output_type": "stream",
629 |      "text": [
630 |       "Last Letter : y\n"
631 |      ]
632 |     }
633 |    ],
634 |    "source": [
635 |     "for letter in 'have a great day': \n",
636 |     "    pass\n",
637 |     "print('Last Letter :', letter)"
638 |    ]
639 |   }
640 |  ],
641 |  "metadata": {
642 |   "kernelspec": {
643 |    "display_name": "Python 3",
644 |    "language": "python",
645 |    "name": "python3"
646 |   },
647 |   "language_info": {
648 |    "codemirror_mode": {
649 |     "name": "ipython",
650 |     "version": 3
651 |    },
652 |    "file_extension": ".py",
653 |    "mimetype": "text/x-python",
654 |    "name": "python",
655 |    "nbconvert_exporter": "python",
656 |    "pygments_lexer": "ipython3",
657 |    "version": "3.7.4"
658 |   }
659 |  },
660 |  "nbformat": 4,
661 |  "nbformat_minor": 2
662 | }
663 | 


--------------------------------------------------------------------------------
/Python-Patterns.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Pattern programs in Python"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Traingle Pattern"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 18,
 20 |    "metadata": {},
 21 |    "outputs": [
 22 |     {
 23 |      "name": "stdout",
 24 |      "output_type": "stream",
 25 |      "text": [
 26 |       "        * \r\n",
 27 |       "       * * \r\n",
 28 |       "      * * * \r\n",
 29 |       "     * * * * \r\n",
 30 |       "    * * * * * \r\n"
 31 |      ]
 32 |     }
 33 |    ],
 34 |    "source": [
 35 |     "def trianglepattern(n): \n",
 36 |     "      \n",
 37 |     "    # number of spaces \n",
 38 |     "    k = 2*n - 2\n",
 39 |     "  \n",
 40 |     "    # loop to handle number of rows \n",
 41 |     "    for i in range(0, n): \n",
 42 |     "      \n",
 43 |     "        # loop to handle number spaces \n",
 44 |     "        # change the value as per need\n",
 45 |     "        for j in range(0, k): \n",
 46 |     "            print(end=\" \") \n",
 47 |     "      \n",
 48 |     "        # decreasing k after each loop \n",
 49 |     "        k = k - 1\n",
 50 |     "      \n",
 51 |     "        # loop to handle number of columns \n",
 52 |     "        # change the value as per need\n",
 53 |     "        for j in range(0, i+1): \n",
 54 |     "          \n",
 55 |     "            # printing stars \n",
 56 |     "            print(\"* \", end=\"\") \n",
 57 |     "      \n",
 58 |     "        # ending line after each row \n",
 59 |     "        print(\"\\r\") \n",
 60 |     "\n",
 61 |     "        # Driver Code \n",
 62 |     "n = 5\n",
 63 |     "trianglepattern(n)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "### Inverted Traingle Pattern"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 22,
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "name": "stdout",
 80 |      "output_type": "stream",
 81 |      "text": [
 82 |       "        * * * * * * \r\n",
 83 |       "         * * * * * \r\n",
 84 |       "          * * * * \r\n",
 85 |       "           * * * \r\n",
 86 |       "            * * \r\n",
 87 |       "             * \r\n"
 88 |      ]
 89 |     }
 90 |    ],
 91 |    "source": [
 92 |     "def pattern(n):\n",
 93 |     "    k = 2*n -2\n",
 94 |     "    for i in range(n,-1,-1):\n",
 95 |     "        for j in range(k,0,-1):\n",
 96 |     "            print(end=\" \")\n",
 97 |     "        k = k +1\n",
 98 |     "        for j in range(0, i+1):\n",
 99 |     "            print(\"*\", end=\" \")\n",
100 |     "        print(\"\\r\")\n",
101 |     "\n",
102 |     "pattern(5)"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "metadata": {},
108 |    "source": [
109 |     "### Right Angled Triangle"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 57,
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "name": "stdout",
119 |      "output_type": "stream",
120 |      "text": [
121 |       "* \r\n",
122 |       "* * \r\n",
123 |       "* * * \r\n",
124 |       "* * * * \r\n",
125 |       "* * * * * \r\n"
126 |      ]
127 |     }
128 |    ],
129 |    "source": [
130 |     "def patternRT(n):\n",
131 |     "    for i in range(0,n):\n",
132 |     "        for j in range(0, i+1):\n",
133 |     "            print(\"* \" , end=\"\")\n",
134 |     "        print(\"\\r\")\n",
135 |     "patternRT(5)"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "### Reverse Right Angled Triangle"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 30,
148 |    "metadata": {},
149 |    "outputs": [
150 |     {
151 |      "name": "stdout",
152 |      "output_type": "stream",
153 |      "text": [
154 |       "        * \r\n",
155 |       "      * * \r\n",
156 |       "    * * * \r\n",
157 |       "  * * * * \r\n",
158 |       "* * * * * \r\n"
159 |      ]
160 |     }
161 |    ],
162 |    "source": [
163 |     "def traingleL(n):\n",
164 |     "    k = 2*n - 2\n",
165 |     "    for i in range(0, n):\n",
166 |     "        for j in range(0, k):\n",
167 |     "            print(end=\" \")\n",
168 |     "        k = k - 2\n",
169 |     "        for j in range(0, i+1):\n",
170 |     "            print(\"* \", end=\"\")\n",
171 |     "        print(\"\\r\") \n",
172 |     "traingleL(5)"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {},
178 |    "source": [
179 |     "### 90 degree rotated triangle - Right"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 47,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "name": "stdout",
189 |      "output_type": "stream",
190 |      "text": [
191 |       "* \r\n",
192 |       "* * \r\n",
193 |       "* * * \r\n",
194 |       "* * * * \r\n",
195 |       "* * * * * \r\n",
196 |       "* * * * \r\n",
197 |       "* * * \r\n",
198 |       "* * \r\n",
199 |       "* \r\n",
200 |       "\r\n"
201 |      ]
202 |     }
203 |    ],
204 |    "source": [
205 |     "def triangle90R(n):\n",
206 |     "    for i in range (0, n):\n",
207 |     "        for j in range(0, i + 1):\n",
208 |     "            print(\"* \", end='')\n",
209 |     "        print(\"\\r\")\n",
210 |     "    for i in range (n, 0, -1):\n",
211 |     "        for j in range(0, i -1):\n",
212 |     "            print(\"* \", end='')\n",
213 |     "        print(\"\\r\")\n",
214 |     "triangle90R(5)"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "markdown",
219 |    "metadata": {},
220 |    "source": [
221 |     "### 90 degree rotated triangle - Left"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 49,
227 |    "metadata": {},
228 |    "outputs": [
229 |     {
230 |      "name": "stdout",
231 |      "output_type": "stream",
232 |      "text": [
233 |       "        * \r\n",
234 |       "      * * \r\n",
235 |       "    * * * \r\n",
236 |       "  * * * * \r\n",
237 |       "* * * * * \r\n",
238 |       "  * * * * \r\n",
239 |       "    * * * \r\n",
240 |       "      * * \r\n",
241 |       "        * \r\n"
242 |      ]
243 |     }
244 |    ],
245 |    "source": [
246 |     "def triangle90L(n):\n",
247 |     "    k = 2 * n - 2\n",
248 |     "    for i in range(0, n-1):\n",
249 |     "        for j in range(0, k):\n",
250 |     "            print(end=\" \")\n",
251 |     "        k = k - 2\n",
252 |     "        for j in range(0, i + 1):\n",
253 |     "            print(\"* \", end=\"\")\n",
254 |     "        print(\"\\r\")\n",
255 |     "    k = -1\n",
256 |     "    for i in range(n-1,-1,-1):\n",
257 |     "        for j in range(k,-1,-1):\n",
258 |     "            print(end=\" \")\n",
259 |     "        k = k + 2\n",
260 |     "        for j in range(0, i + 1):\n",
261 |     "            print(\"* \", end=\"\")\n",
262 |     "        print(\"\\r\")\n",
263 |     " \n",
264 |     " \n",
265 |     "triangle90L(5)"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {},
271 |    "source": [
272 |     "### Hour Glass Pattern"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 55,
278 |    "metadata": {},
279 |    "outputs": [
280 |     {
281 |      "name": "stdout",
282 |      "output_type": "stream",
283 |      "text": [
284 |       "* * * * * * * * * * \r\n",
285 |       " * * * * * * * * * \r\n",
286 |       "  * * * * * * * * \r\n",
287 |       "   * * * * * * * \r\n",
288 |       "    * * * * * * \r\n",
289 |       "     * * * * * \r\n",
290 |       "      * * * * \r\n",
291 |       "       * * * \r\n",
292 |       "        * * \r\n",
293 |       "         * \r\n",
294 |       "        * * \r\n",
295 |       "       * * * \r\n",
296 |       "      * * * * \r\n",
297 |       "     * * * * * \r\n",
298 |       "    * * * * * * \r\n",
299 |       "   * * * * * * * \r\n",
300 |       "  * * * * * * * * \r\n",
301 |       " * * * * * * * * * \r\n",
302 |       "* * * * * * * * * * \r\n"
303 |      ]
304 |     }
305 |    ],
306 |    "source": [
307 |     "def hourglass(n):\n",
308 |     "    for i in range (0,n):\n",
309 |     "        for j in range (0,i):\n",
310 |     "            print(\" \", end=\"\")\n",
311 |     "        for z in range (0,n-i):\n",
312 |     "            print(\"* \", end=\"\")\n",
313 |     "        print(\"\\r\")\n",
314 |     "    for l in range (0,n-1):\n",
315 |     "        for m in range (0,n-2-l):\n",
316 |     "            print(\" \", end=\"\")\n",
317 |     "        for a in range (0,l+2):\n",
318 |     "            print(\"* \", end=\"\")\n",
319 |     "        print(\"\\r\")\n",
320 |     "hourglass(10)"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "markdown",
325 |    "metadata": {},
326 |    "source": [
327 |     "### Downward Right Angled Traingle"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": 58,
333 |    "metadata": {},
334 |    "outputs": [
335 |     {
336 |      "name": "stdout",
337 |      "output_type": "stream",
338 |      "text": [
339 |       "* * * * * * \r\n",
340 |       "* * * * * \r\n",
341 |       "* * * * \r\n",
342 |       "* * * \r\n",
343 |       "* * \r\n",
344 |       "* \r\n"
345 |      ]
346 |     }
347 |    ],
348 |    "source": [
349 |     "def triangleD(n):\n",
350 |     "    for i in range(n, -1, -1):\n",
351 |     "        for j in range(0, i + 1):\n",
352 |     "            print(\"* \", end=\"\")\n",
353 |     "        print(\"\\r\")\n",
354 |     "\n",
355 |     "triangleD(5)"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "markdown",
360 |    "metadata": {},
361 |    "source": [
362 |     "### Diamond Pattern"
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "code",
367 |    "execution_count": 60,
368 |    "metadata": {},
369 |    "outputs": [
370 |     {
371 |      "name": "stdout",
372 |      "output_type": "stream",
373 |      "text": [
374 |       "        * \r\n",
375 |       "       * * \r\n",
376 |       "      * * * \r\n",
377 |       "     * * * * \r\n",
378 |       "    * * * * * \r\n",
379 |       "   * * * * * * \r\n",
380 |       "    * * * * * \r\n",
381 |       "     * * * * \r\n",
382 |       "      * * * \r\n",
383 |       "       * * \r\n",
384 |       "        * \r\n"
385 |      ]
386 |     }
387 |    ],
388 |    "source": [
389 |     "def diamond(n):\n",
390 |     "    k = 2 * n - 2\n",
391 |     "    for i in range(0, n):\n",
392 |     "        for j in range(0 , k):\n",
393 |     "            print(end=\" \")\n",
394 |     "        k = k - 1\n",
395 |     "        for j in range(0 , i + 1 ):\n",
396 |     "            print(\"* \", end=\"\")\n",
397 |     "        print(\"\\r\")\n",
398 |     "    k = n - 2\n",
399 |     "    for i in range(n , -1, -1):\n",
400 |     "        for j in range(k , 0 , -1):\n",
401 |     "            print(end=\" \")\n",
402 |     "        k = k + 1\n",
403 |     "        for j in range(0 , i + 1):\n",
404 |     "            print(\"* \", end=\"\")\n",
405 |     "        print(\"\\r\")\n",
406 |     "\n",
407 |     "diamond(5)"
408 |    ]
409 |   },
410 |   {
411 |    "cell_type": "markdown",
412 |    "metadata": {},
413 |    "source": [
414 |     "### Pattern  - Pant"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "code",
419 |    "execution_count": 84,
420 |    "metadata": {},
421 |    "outputs": [
422 |     {
423 |      "name": "stdout",
424 |      "output_type": "stream",
425 |      "text": [
426 |       "**************\n",
427 |       "******__******\n",
428 |       "*****____*****\n",
429 |       "****______****\n",
430 |       "***________***\n",
431 |       "**__________**\n",
432 |       "*____________*\n"
433 |      ]
434 |     }
435 |    ],
436 |    "source": [
437 |     "rows = 14\n",
438 |     "print(\"*\" * rows, end=\"\\n\")\n",
439 |     "i = (rows // 2) - 1\n",
440 |     "j = 2\n",
441 |     "while i != 0:\n",
442 |     "    while j <= (rows - 2):\n",
443 |     "        print(\"*\" * i, end=\"\")\n",
444 |     "        print(\"_\" * j, end=\"\")\n",
445 |     "        print(\"*\" * i, end=\"\\n\")\n",
446 |     "        i = i - 1\n",
447 |     "        j = j + 2\n"
448 |    ]
449 |   },
450 |   {
451 |    "cell_type": "markdown",
452 |    "metadata": {},
453 |    "source": [
454 |     "### Pascal’s Triangle\n",
455 |     "[link](https://www.mathsisfun.com/pascals-triangle.html)"
456 |    ]
457 |   },
458 |   {
459 |    "cell_type": "code",
460 |    "execution_count": 69,
461 |    "metadata": {},
462 |    "outputs": [
463 |     {
464 |      "name": "stdout",
465 |      "output_type": "stream",
466 |      "text": [
467 |       "1  \n",
468 |       "1  1  \n",
469 |       "1  2  1  \n",
470 |       "1  3  3  1  \n",
471 |       "1  4  6  4  1  \n",
472 |       "1  5  10  10  5  1  \n",
473 |       "1  6  15  20  15  6  1  \n",
474 |       "1  7  21  35  35  21  7  1  \n"
475 |      ]
476 |     }
477 |    ],
478 |    "source": [
479 |     "def pascal(n):\n",
480 |     "    for i in range(0, n):\n",
481 |     "        for j in range(0, i + 1):\n",
482 |     "            print(function(i, j),\" \", end=\"\")\n",
483 |     "        print()\n",
484 |     "    \n",
485 |     "def function(n, k):\n",
486 |     "    res = 1\n",
487 |     "    if (k > n - k):\n",
488 |     "        k = n - k\n",
489 |     "    for i in range(0, k):\n",
490 |     "        res = res * (n - i)\n",
491 |     "        res = res // (i + 1)\n",
492 |     "        \n",
493 |     "    return res\n",
494 |     "\n",
495 |     "pascal(8)"
496 |    ]
497 |   },
498 |   {
499 |    "cell_type": "code",
500 |    "execution_count": 76,
501 |    "metadata": {},
502 |    "outputs": [
503 |     {
504 |      "name": "stdout",
505 |      "output_type": "stream",
506 |      "text": [
507 |       "1 \r\n",
508 |       "1 2 \r\n",
509 |       "1 2 3 \r\n",
510 |       "1 2 3 4 \r\n"
511 |      ]
512 |     }
513 |    ],
514 |    "source": [
515 |     "def patternNum(n):\n",
516 |     "    for i in range(1,n):\n",
517 |     "        for j in range(1, i+1):\n",
518 |     "            print(j , end=\" \")\n",
519 |     "        print(\"\\r\")\n",
520 |     "patternNum(5)"
521 |    ]
522 |   },
523 |   {
524 |    "cell_type": "code",
525 |    "execution_count": 75,
526 |    "metadata": {},
527 |    "outputs": [
528 |     {
529 |      "name": "stdout",
530 |      "output_type": "stream",
531 |      "text": [
532 |       "1 \r\n",
533 |       "2 2 \r\n",
534 |       "3 3 3 \r\n",
535 |       "4 4 4 4 \r\n"
536 |      ]
537 |     }
538 |    ],
539 |    "source": [
540 |     "def patternNum2(n):\n",
541 |     "    for i in range(1,n):\n",
542 |     "        for j in range(1, i+1):\n",
543 |     "            print(i , end=\" \")\n",
544 |     "        print(\"\\r\")\n",
545 |     "patternNum2(5)"
546 |    ]
547 |   },
548 |   {
549 |    "cell_type": "code",
550 |    "execution_count": 77,
551 |    "metadata": {},
552 |    "outputs": [
553 |     {
554 |      "name": "stdout",
555 |      "output_type": "stream",
556 |      "text": [
557 |       "A \r\n",
558 |       "B B \r\n",
559 |       "C C C \r\n",
560 |       "D D D D \r\n",
561 |       "E E E E E \r\n"
562 |      ]
563 |     }
564 |    ],
565 |    "source": [
566 |     "def alphatriangle(n):\n",
567 |     "    x = 65\n",
568 |     "    for i in range(0, n):\n",
569 |     "        ch = chr(x)\n",
570 |     "        x += 1\n",
571 |     "        for j in range(0, i + 1):\n",
572 |     "            print(ch, end=\" \")\n",
573 |     "        print(\"\\r\")\n",
574 |     "\n",
575 |     "alphatriangle(5)"
576 |    ]
577 |   },
578 |   {
579 |    "cell_type": "code",
580 |    "execution_count": 79,
581 |    "metadata": {},
582 |    "outputs": [
583 |     {
584 |      "name": "stdout",
585 |      "output_type": "stream",
586 |      "text": [
587 |       "1 1 1 1 1 \r\n",
588 |       "2 2 2 2 \r\n",
589 |       "3 3 3 \r\n",
590 |       "4 4 \r\n",
591 |       "5 \r\n"
592 |      ]
593 |     }
594 |    ],
595 |    "source": [
596 |     "rows = 5\n",
597 |     "b = 0\n",
598 |     "for i in range(rows, 0, -1):\n",
599 |     "    b += 1\n",
600 |     "    for j in range(1, i + 1):\n",
601 |     "        print(b, end=' ')\n",
602 |     "    print('\\r')"
603 |    ]
604 |   },
605 |   {
606 |    "cell_type": "markdown",
607 |    "metadata": {},
608 |    "source": [
609 |     "### Inverted Triangle with same number"
610 |    ]
611 |   },
612 |   {
613 |    "cell_type": "code",
614 |    "execution_count": 80,
615 |    "metadata": {},
616 |    "outputs": [
617 |     {
618 |      "name": "stdout",
619 |      "output_type": "stream",
620 |      "text": [
621 |       "5 5 5 5 5 \r\n",
622 |       "5 5 5 5 \r\n",
623 |       "5 5 5 \r\n",
624 |       "5 5 \r\n",
625 |       "5 \r\n"
626 |      ]
627 |     }
628 |    ],
629 |    "source": [
630 |     "rows = 5\n",
631 |     "num = rows\n",
632 |     "for i in range(rows, 0, -1):\n",
633 |     "    for j in range(0, i):\n",
634 |     "        print(num, end=' ')\n",
635 |     "    print(\"\\r\")"
636 |    ]
637 |   },
638 |   {
639 |    "cell_type": "markdown",
640 |    "metadata": {},
641 |    "source": [
642 |     "### Inverted Triangle with descending order of numbers"
643 |    ]
644 |   },
645 |   {
646 |    "cell_type": "code",
647 |    "execution_count": 81,
648 |    "metadata": {},
649 |    "outputs": [
650 |     {
651 |      "name": "stdout",
652 |      "output_type": "stream",
653 |      "text": [
654 |       "5 5 5 5 5 \r\n",
655 |       "4 4 4 4 \r\n",
656 |       "3 3 3 \r\n",
657 |       "2 2 \r\n",
658 |       "1 \r\n"
659 |      ]
660 |     }
661 |    ],
662 |    "source": [
663 |     "rows = 5\n",
664 |     "for i in range(rows, 0, -1):\n",
665 |     "    num = i\n",
666 |     "    for j in range(0, i):\n",
667 |     "        print(num, end=' ')\n",
668 |     "    print(\"\\r\")"
669 |    ]
670 |   },
671 |   {
672 |    "cell_type": "markdown",
673 |    "metadata": {},
674 |    "source": [
675 |     "### Traingle with numbers in reverse order"
676 |    ]
677 |   },
678 |   {
679 |    "cell_type": "code",
680 |    "execution_count": 82,
681 |    "metadata": {},
682 |    "outputs": [
683 |     {
684 |      "name": "stdout",
685 |      "output_type": "stream",
686 |      "text": [
687 |       "1 \n",
688 |       "2 1 \n",
689 |       "3 2 1 \n",
690 |       "4 3 2 1 \n",
691 |       "5 4 3 2 1 \n"
692 |      ]
693 |     }
694 |    ],
695 |    "source": [
696 |     "rows = 6\n",
697 |     "for row in range(1, rows):\n",
698 |     "    for column in range(row, 0, -1):\n",
699 |     "        print(column, end=' ')\n",
700 |     "    print(\"\")\n"
701 |    ]
702 |   },
703 |   {
704 |    "cell_type": "markdown",
705 |    "metadata": {},
706 |    "source": [
707 |     "### Display 1 to 10 number in triangle pattern"
708 |    ]
709 |   },
710 |   {
711 |    "cell_type": "code",
712 |    "execution_count": 83,
713 |    "metadata": {},
714 |    "outputs": [
715 |     {
716 |      "name": "stdout",
717 |      "output_type": "stream",
718 |      "text": [
719 |       "1 \n",
720 |       "2 3 \n",
721 |       "4 5 6 \n",
722 |       "7 8 9 10 \n"
723 |      ]
724 |     }
725 |    ],
726 |    "source": [
727 |     "currentNumber = 1\n",
728 |     "rows = 4  # Rows you want in your pattern\n",
729 |     "stop = 2\n",
730 |     "for i in range(rows):\n",
731 |     "    for column in range(1, stop):\n",
732 |     "        print(currentNumber, end=' ')\n",
733 |     "        currentNumber += 1\n",
734 |     "    print(\"\")\n",
735 |     "    stop += 1"
736 |    ]
737 |   },
738 |   {
739 |    "cell_type": "markdown",
740 |    "metadata": {},
741 |    "source": [
742 |     "### Pattern with alternate numbers"
743 |    ]
744 |   },
745 |   {
746 |    "cell_type": "code",
747 |    "execution_count": 85,
748 |    "metadata": {},
749 |    "outputs": [
750 |     {
751 |      "name": "stdout",
752 |      "output_type": "stream",
753 |      "text": [
754 |       "1 \n",
755 |       "3 3 \n",
756 |       "5 5 5 \n",
757 |       "7 7 7 7 \n",
758 |       "9 9 9 9 9 \n"
759 |      ]
760 |     }
761 |    ],
762 |    "source": [
763 |     "rows = 5\n",
764 |     "i = 1\n",
765 |     "while i <= rows:\n",
766 |     "    j = 1\n",
767 |     "    while j <= i:\n",
768 |     "        print((i * 2 - 1), end=\" \")\n",
769 |     "        j = j + 1\n",
770 |     "    i = i + 1\n",
771 |     "    print()"
772 |    ]
773 |   },
774 |   {
775 |    "cell_type": "markdown",
776 |    "metadata": {},
777 |    "source": [
778 |     "### Pattern with Even numbers"
779 |    ]
780 |   },
781 |   {
782 |    "cell_type": "code",
783 |    "execution_count": 86,
784 |    "metadata": {},
785 |    "outputs": [
786 |     {
787 |      "name": "stdout",
788 |      "output_type": "stream",
789 |      "text": [
790 |       "10 \r\n",
791 |       "10 8 \r\n",
792 |       "10 8 6 \r\n",
793 |       "10 8 6 4 \r\n",
794 |       "10 8 6 4 2 \r\n"
795 |      ]
796 |     }
797 |    ],
798 |    "source": [
799 |     "rows = 5\n",
800 |     "LastEvenNumber = 2 * rows\n",
801 |     "evenNumber = LastEvenNumber\n",
802 |     "for i in range(1, rows+1):\n",
803 |     "    evenNumber = LastEvenNumber\n",
804 |     "    for j in range(i):\n",
805 |     "        print(evenNumber, end=' ')\n",
806 |     "        evenNumber -= 2\n",
807 |     "    print(\"\\r\")"
808 |    ]
809 |   },
810 |   {
811 |    "cell_type": "code",
812 |    "execution_count": null,
813 |    "metadata": {},
814 |    "outputs": [],
815 |    "source": []
816 |   }
817 |  ],
818 |  "metadata": {
819 |   "kernelspec": {
820 |    "display_name": "Python 3",
821 |    "language": "python",
822 |    "name": "python3"
823 |   },
824 |   "language_info": {
825 |    "codemirror_mode": {
826 |     "name": "ipython",
827 |     "version": 3
828 |    },
829 |    "file_extension": ".py",
830 |    "mimetype": "text/x-python",
831 |    "name": "python",
832 |    "nbconvert_exporter": "python",
833 |    "pygments_lexer": "ipython3",
834 |    "version": "3.7.4"
835 |   }
836 |  },
837 |  "nbformat": 4,
838 |  "nbformat_minor": 2
839 | }
840 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Bite-Sized-Learning-Python
2 | This contains key Python & ML topics in a condensed format to quickly revise the basics
3 | 


--------------------------------------------------------------------------------
/Regular Expressions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# RegEx:\n",
  8 |     "- RegEx (Regular Expression) is a sequence of characters that forms a search pattern\n",
  9 |     "- RegEx can be used to check if a string contains the specified search pattern\n",
 10 |     "\n",
 11 |     "## How to install?\n",
 12 |     "- `!pip install regex` - Run this command in jupyter notebook"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 1,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "import re"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "## RegEx Functions\n",
 29 |     "\n",
 30 |     "The re module offers a set of functions that allows us to search a string for a match:\n",
 31 |     "\n",
 32 |     "- findall - Returns a list containing all matches\n",
 33 |     "- search - Returns a Match object if there is a match anywhere in the string\n",
 34 |     "- split\t- Returns a list where the string has been split at each match\n",
 35 |     "- sub\t- Replaces one or many matches with a string"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "### Metacharacters\n",
 43 |     "|Character         | Description          | Example  |\n",
 44 |     "| ------------- |:-------------:| ------:|\n",
 45 |     "`[]`|\tA set of characters\t|\"[a-m]\"\t\n",
 46 |     "`\\`\t|Signals a special sequence (can also be used to escape special characters)\t|\"\\d\"\t\n",
 47 |     "`.`\t|Any character (except newline character)\t|\"he..o\"\t\n",
 48 |     "`^`\t|Starts with\t|\"^hello\"\t\n",
 49 |     "`$`\t|Ends with\t|\"world$\"\t\n",
 50 |     "`*`\t|Zero or more occurrences\t|\"aix*\"\t\n",
 51 |     "`+`\t|One or more occurrences\t|\"aix+\"\t\n",
 52 |     "`{}` |Exactly the specified number of occurrences\t|\"al{2}\"\t\n",
 53 |     "`|`\t|Either or\t|\"falls|stays\"\t\n",
 54 |     "`()`| Capture and group\t "
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "### Special Sequences\n",
 62 |     "|Character       | Description          | Example  |\n",
 63 |     "| ------------- |:-------------:| ------:|\n",
 64 |     "`\\A`\t| Returns a match if the specified characters are at the beginning of the string\t| \"\\AThe\"\t\n",
 65 |     "`\\b`\t| Returns a match where the specified characters are at the beginning or at the end of a word\t| r\"\\bain\", r\"ain\\b\"\n",
 66 |     "`\\B`\t| Returns a match where the specified characters are present, but NOT at the beginning (or at the end) of a word| r\"\\Bain\" , r\"ain\\B\"\t\n",
 67 |     "`\\d`\t| Returns a match where the string contains digits (numbers from 0-9)\t| \"\\d\"\t\n",
 68 |     "`\\D`\t| Returns a match where the string DOES NOT contain digits\t| \"\\D\"\t\n",
 69 |     "`\\s`\t| Returns a match where the string contains a white space character\t| \"\\s\"\t\n",
 70 |     "`\\S`\t| Returns a match where the string DOES NOT contain a white space character\t| \"\\S\"\t\n",
 71 |     "`\\w`\t| Returns a match where the string contains any word characters (characters from a to Z, digits from 0-9, and the underscore _ character)\t| \"\\w\"\t\n",
 72 |     "`\\W`\t| Returns a match where the string DOES NOT contain any word characters\t| \"\\W\"\t\n",
 73 |     "`\\Z`\t| Returns a match if the specified characters are at the end of the string\t| \"India\\Z\""
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "### Sets\n",
 81 |     "|Set        | Description          |\n",
 82 |     "| ------------- |:-------------:|\n",
 83 |     "`[arn]`\t|Returns a match where one of the specified characters (a, r, or n) are present\t\n",
 84 |     "`[a-n]`\t|Returns a match for any lower case character, alphabetically between a and n\t\n",
 85 |     "`[^arn]`\t|Returns a match for any character EXCEPT a, r, and n\t\n",
 86 |     "`[0123]`\t|Returns a match where any of the specified digits (0, 1, 2, or 3) are present\t\n",
 87 |     "`[0-9]`\t|Returns a match for any digit between 0 and 9\t\n",
 88 |     "`[0-5][0-9]`\t|Returns a match for any two-digit numbers from 00 and 59\t\n",
 89 |     "`[a-zA-Z]`\t|Returns a match for any character alphabetically between a and z, lower case OR upper case\t\n",
 90 |     "`[+]`\t|In sets, +, *, ., |, (), $,{} has no special meaning, so [+] means: return a match for any + character in the string"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "### Supported Regular Expression Flags\n",
 98 |     "|Shott Name      | Long Name        | Effect |\n",
 99 |     "| ------------- |:-------------:| ------:|\n",
100 |     "re.I\t|re.IGNORECASE\t|Makes matching of alphabetic characters case-insensitive\n",
101 |     "re.M\t|re.MULTILINE\t|Causes start-of-string and end-of-string anchors to match embedded newlines\n",
102 |     "re.S\t|re.DOTALL\t|Causes the dot metacharacter to match a newline\n",
103 |     "re.X\t|re.VERBOSE\t|Allows inclusion of whitespace and comments within a regular expression\n",
104 |     "----\t|re.DEBUG\t|Causes the regex parser to display debugging information to the console\n",
105 |     "re.A\t|re.ASCII\t|Specifies ASCII encoding for character classification\n",
106 |     "re.U\t|re.UNICODE\t|Specifies Unicode encoding for character classification\n",
107 |     "re.L    |re.LOCALE\t|Specifies encoding for character classification based on the current locale"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "### search() function\n",
115 |     "Search the string to see if it starts with \"The\" and ends with \"good\":"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 2,
121 |    "metadata": {
122 |     "scrolled": true
123 |    },
124 |    "outputs": [
125 |     {
126 |      "name": "stdout",
127 |      "output_type": "stream",
128 |      "text": [
129 |       "Found a match\n"
130 |      ]
131 |     }
132 |    ],
133 |    "source": [
134 |     "text = \"The food was good\"\n",
135 |     "x = re.search(\"^The.*good$\", text)\n",
136 |     "\n",
137 |     "if x:\n",
138 |     "    print(\"Found a match\")\n",
139 |     "else:\n",
140 |     "    print(\"No match\")"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "metadata": {},
146 |    "source": [
147 |     "### findall() function\n",
148 |     "The findall() function returns a list containing all matches"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 3,
154 |    "metadata": {},
155 |    "outputs": [
156 |     {
157 |      "name": "stdout",
158 |      "output_type": "stream",
159 |      "text": [
160 |       "['oo', 'oo']\n"
161 |      ]
162 |     }
163 |    ],
164 |    "source": [
165 |     "text = \"The food was good\"\n",
166 |     "x = re.findall(\"oo\", text)\n",
167 |     "print(x)"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 4,
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "name": "stdout",
177 |      "output_type": "stream",
178 |      "text": [
179 |       "The first white-space character is located in position: 6\n"
180 |      ]
181 |     }
182 |    ],
183 |    "source": [
184 |     "# Search for the first white-space character in the string:\n",
185 |     "text = \"Indian food was good\"\n",
186 |     "x = re.search(\"\\s\", text)\n",
187 |     "\n",
188 |     "print(\"The first white-space character is located in position:\", x.start())"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 17,
194 |    "metadata": {},
195 |    "outputs": [
196 |     {
197 |      "name": "stdout",
198 |      "output_type": "stream",
199 |      "text": [
200 |       "['1729', '44']\n"
201 |      ]
202 |     }
203 |    ],
204 |    "source": [
205 |     "# Program to extract numbers from a string\n",
206 |     "\n",
207 |     "import re\n",
208 |     "\n",
209 |     "string = 'hello 1729. How is 44'\n",
210 |     "pattern = '\\d+'\n",
211 |     "\n",
212 |     "result = re.findall(pattern, string) \n",
213 |     "print(result)"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 6,
219 |    "metadata": {},
220 |    "outputs": [
221 |     {
222 |      "name": "stdout",
223 |      "output_type": "stream",
224 |      "text": [
225 |       "a#12@gmail.com\n",
226 |       "invalid\n"
227 |      ]
228 |     }
229 |    ],
230 |    "source": [
231 |     "# Valid email ID\n",
232 |     "# Given an email ID, you have to determine if it is valid or not\n",
233 |     "import re\n",
234 |     "def checkmail(email):\n",
235 |     "    check =  bool(re.search(r\"^[\\w\\.\\+\\-]+\\@[A-Za-z]+\\.[a-z]{2,3}$\", email))\n",
236 |     "    if check:\n",
237 |     "        return 'valid'\n",
238 |     "    else:\n",
239 |     "        return 'invalid'\n",
240 |     "email=input()\n",
241 |     "print(checkmail(email))"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "markdown",
246 |    "metadata": {},
247 |    "source": [
248 |     "### split() function\n",
249 |     "Split at each white-space character"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": 7,
255 |    "metadata": {},
256 |    "outputs": [
257 |     {
258 |      "name": "stdout",
259 |      "output_type": "stream",
260 |      "text": [
261 |       "['Indian', 'food', 'was', 'good']\n"
262 |      ]
263 |     }
264 |    ],
265 |    "source": [
266 |     "text = \"Indian food was good\"\n",
267 |     "x = re.split(\"\\s\", text)\n",
268 |     "print(x)"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": 8,
274 |    "metadata": {},
275 |    "outputs": [
276 |     {
277 |      "name": "stdout",
278 |      "output_type": "stream",
279 |      "text": [
280 |       "['Indian', 'food was good']\n"
281 |      ]
282 |     }
283 |    ],
284 |    "source": [
285 |     "# You can control the number of occurrences by specifying the maxsplit parameter\n",
286 |     "text = \"Indian food was good\"\n",
287 |     "x = re.split(\"\\s\", text, 1)\n",
288 |     "print(x)"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "metadata": {},
294 |    "source": [
295 |     "### sub() Function\n",
296 |     "The sub() function replaces the matches with the text of your choice"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 9,
302 |    "metadata": {},
303 |    "outputs": [
304 |     {
305 |      "name": "stdout",
306 |      "output_type": "stream",
307 |      "text": [
308 |       "Indian$food$was$good\n"
309 |      ]
310 |     }
311 |    ],
312 |    "source": [
313 |     "# Replace every white-space character with the number $\n",
314 |     "text = \"Indian food was good\"\n",
315 |     "x = re.sub(\"\\s\", \"$\", text)\n",
316 |     "print(x)"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": 10,
322 |    "metadata": {},
323 |    "outputs": [
324 |     {
325 |      "name": "stdout",
326 |      "output_type": "stream",
327 |      "text": [
328 |       "Indian$food was good\n"
329 |      ]
330 |     }
331 |    ],
332 |    "source": [
333 |     "# You can control the number of replacements by specifying the count parameter\n",
334 |     "text = \"Indian food was good\"\n",
335 |     "x = re.sub(\"\\s\", \"$\", text, 1)\n",
336 |     "print(x)"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": 11,
342 |    "metadata": {},
343 |    "outputs": [
344 |     {
345 |      "name": "stdout",
346 |      "output_type": "stream",
347 |      "text": [
348 |       "abc12de23f456\n"
349 |      ]
350 |     }
351 |    ],
352 |    "source": [
353 |     "# Program to remove all whitespaces\n",
354 |     "import re\n",
355 |     "\n",
356 |     "# multiline string\n",
357 |     "string = 'abc 12\\\n",
358 |     "de 23 \\n f45 6'\n",
359 |     "\n",
360 |     "# matches all whitespace characters\n",
361 |     "pattern = '\\s+'\n",
362 |     "\n",
363 |     "# empty string\n",
364 |     "replace = ''\n",
365 |     "\n",
366 |     "new_string = re.sub(pattern, replace, string) \n",
367 |     "print(new_string)"
368 |    ]
369 |   },
370 |   {
371 |    "cell_type": "code",
372 |    "execution_count": 12,
373 |    "metadata": {},
374 |    "outputs": [
375 |     {
376 |      "name": "stdout",
377 |      "output_type": "stream",
378 |      "text": [
379 |       "Invalid\n"
380 |      ]
381 |     }
382 |    ],
383 |    "source": [
384 |     "'''\n",
385 |     "# Password Validation\n",
386 |     "The characteristics of a strong password include:\n",
387 |     "1. it should be at least 8 characters long\n",
388 |     "2. it should have at least one lowercase alphabet.\n",
389 |     "3.it should have at least one uppercase alphabet\n",
390 |     "4. it should have at least one number(0-9)\n",
391 |     "5. it should have at least one special character( a special character is considered among the following: [@%$*])\n",
392 |     "\n",
393 |     "'''\n",
394 |     "pwd= 'DataScience123'\n",
395 |     "#write your code here\n",
396 |     "import re \n",
397 |     "flag = 0\n",
398 |     "while True:   \n",
399 |     "    if (len(pwd)<8): \n",
400 |     "        flag = -1\n",
401 |     "        break\n",
402 |     "    elif not re.search(\"[a-z]\", pwd): \n",
403 |     "        flag = -1\n",
404 |     "        break\n",
405 |     "    elif not re.search(\"[A-Z]\", pwd): \n",
406 |     "        flag = -1\n",
407 |     "        break\n",
408 |     "    elif not re.search(\"[0-9]\", pwd): \n",
409 |     "        flag = -1\n",
410 |     "        break\n",
411 |     "    elif not re.search(\"[@%$*]\", pwd): \n",
412 |     "        flag = -1\n",
413 |     "        break\n",
414 |     "    elif re.search(\"\\s\", pwd): \n",
415 |     "        flag = -1\n",
416 |     "        break\n",
417 |     "    else: \n",
418 |     "        flag = 0\n",
419 |     "        print(\"Valid\") \n",
420 |     "        break\n",
421 |     "\n",
422 |     "if flag ==-1:\n",
423 |     "    print(\"Invalid\")"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "markdown",
428 |    "metadata": {},
429 |    "source": [
430 |     "### Match Object\n",
431 |     "A Match Object is an object containing information about the search and the result"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": 13,
437 |    "metadata": {},
438 |    "outputs": [
439 |     {
440 |      "name": "stdout",
441 |      "output_type": "stream",
442 |      "text": [
443 |       "<re.Match object; span=(0, 2), match='In'>\n"
444 |      ]
445 |     }
446 |    ],
447 |    "source": [
448 |     "text = \"Indian food was good\"\n",
449 |     "x = re.search(\"In\", text)\n",
450 |     "print(x)"
451 |    ]
452 |   },
453 |   {
454 |    "cell_type": "markdown",
455 |    "metadata": {},
456 |    "source": [
457 |     "For more details refer below linl\n",
458 |     "- [RegEx_1](https://www.w3schools.com/python/python_regex.asp)\n",
459 |     "- [RegEx_2](https://realpython.com/regex-python/)"
460 |    ]
461 |   },
462 |   {
463 |    "cell_type": "code",
464 |    "execution_count": 14,
465 |    "metadata": {},
466 |    "outputs": [
467 |     {
468 |      "name": "stdout",
469 |      "output_type": "stream",
470 |      "text": [
471 |       "[1, 2, 3, 4, 5, 6, 7, 8, 9]\n"
472 |      ]
473 |     }
474 |    ],
475 |    "source": [
476 |     "# Extras\n",
477 |     "# Some common interveiw questions\n",
478 |     "\n",
479 |     "input_list = [[1,2,3],[4,5],[6,7,8,9]]\n",
480 |     "flat = []\n",
481 |     "for sublist in input_list:\n",
482 |     "    for item in sublist:\n",
483 |     "        flat.append(item)\n",
484 |     "print(flat)"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "code",
489 |    "execution_count": 15,
490 |    "metadata": {},
491 |    "outputs": [
492 |     {
493 |      "name": "stdout",
494 |      "output_type": "stream",
495 |      "text": [
496 |       "['a', 'c', 'd']\n"
497 |      ]
498 |     }
499 |    ],
500 |    "source": [
501 |     "# Given a string, you have to find the first n most frequent characters in it.\n",
502 |     "# You have to print the three letters in alphabetically sorted order.\n",
503 |     "from collections import Counter\n",
504 |     "string= 'ddddaacccb'\n",
505 |     "n=3\n",
506 |     "\n",
507 |     "a = Counter(string).most_common(n)\n",
508 |     "b = [i[0] for i in a]\n",
509 |     "b.sort()\n",
510 |     "print(b)"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "code",
515 |    "execution_count": null,
516 |    "metadata": {},
517 |    "outputs": [],
518 |    "source": []
519 |   }
520 |  ],
521 |  "metadata": {
522 |   "kernelspec": {
523 |    "display_name": "Python 3",
524 |    "language": "python",
525 |    "name": "python3"
526 |   },
527 |   "language_info": {
528 |    "codemirror_mode": {
529 |     "name": "ipython",
530 |     "version": 3
531 |    },
532 |    "file_extension": ".py",
533 |    "mimetype": "text/x-python",
534 |    "name": "python",
535 |    "nbconvert_exporter": "python",
536 |    "pygments_lexer": "ipython3",
537 |    "version": "3.7.4"
538 |   }
539 |  },
540 |  "nbformat": 4,
541 |  "nbformat_minor": 2
542 | }
543 | 


--------------------------------------------------------------------------------
/Seaborn_Cheatsheet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsual/Bite-Sized-Learning-Python/6c5ebe36f2bd1a2e8cdd97181a50d6c9088b7371/Seaborn_Cheatsheet.png


--------------------------------------------------------------------------------
/Text to Speech.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Text To Speech\n",
  8 |     "\n",
  9 |     "## Text-to-Speech Technology\n",
 10 |     "\n",
 11 |     "- Text-to-speech (TTS) technology reads aloud digital text—the words on computers, smartphones and tablets\n",
 12 |     "- TTS can help kids who struggle with reading\n",
 13 |     "- There are TTS tools available for nearly every digital device\n",
 14 |     "\n",
 15 |     "#### Reference:-\n",
 16 |     " - [TTS](https://www.understood.org/en/school-learning/assistive-technology/assistive-technologies-basics/text-to-speech-technology-what-it-is-and-how-it-works)\n",
 17 |     " - [Speech Synthesis](https://en.wikipedia.org/wiki/Speech_synthesis)\n",
 18 |     "\n"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "#### Libraries to install\n",
 26 |     "\n",
 27 |     " - `!pip install gTTS`"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "### Speech Synthesis - Process\n",
 35 |     "\n",
 36 |     "<img align = \"left\" width=\"700\" height =\"700\" src = \"https://upload.wikimedia.org/wikipedia/commons/thumb/b/b5/TTS_System.svg/825px-TTS_System.svg.png\">"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 1,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "# Loading libraries\n",
 46 |     "\n",
 47 |     "from gtts import gTTS \n",
 48 |     "import os"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "- gTTS module needs internet and depends on google to get the audio data/conversion\n",
 56 |     "- [gTTS Documentation](https://gtts.readthedocs.io/en/latest/module.html)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "## Text to audio"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 2,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "text = \"Text-to-speech or TTS is a type of  assistive technology that reads digital text aloud. It’s sometimes called “read aloud” technology.With a click of a button or the touch of a finger, TTS can take words on a computer or other digital device and convert them into audio\""
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "##### Selecting language"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 3,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "language = 'en'"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "##### Creating a variable and passing the text & language"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 4,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "speech = gTTS(text = text, lang = language, slow = False) \n",
105 |     "#\" slow = False\" says the module that after conversion the audio shoule have a high/normal speed"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "##### Saving the converted sudio as a .mp3 file"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 5,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "speech.save(\"text.mp3\")"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "##### Playback the mp3 file"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 6,
134 |    "metadata": {},
135 |    "outputs": [
136 |     {
137 |      "data": {
138 |       "text/plain": [
139 |        "0"
140 |       ]
141 |      },
142 |      "execution_count": 6,
143 |      "metadata": {},
144 |      "output_type": "execute_result"
145 |     }
146 |    ],
147 |    "source": [
148 |     "os.system(\"start text.mp3\") # start command initiates to play the audio from your local windows media player"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "## Text file to audio\n",
156 |     "\n",
157 |     "##### Reading the text file and store it to a variable"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 7,
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": [
166 |     "file = open(\"Test-1.txt\", \"r\").read().replace(\"\\n\", \" \")"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "##### Language selection"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 8,
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": [
182 |     "language = 'en'"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "markdown",
187 |    "metadata": {},
188 |    "source": [
189 |     "##### Passing the text file to the module"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 9,
195 |    "metadata": {},
196 |    "outputs": [],
197 |    "source": [
198 |     "speech = gTTS(text = str(file), lang = language, slow = False)"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "markdown",
203 |    "metadata": {},
204 |    "source": [
205 |     "##### Saving the converted sudio as a .mp3 file"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": 10,
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": [
214 |     "speech.save(\"Text1.mp3\")"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "markdown",
219 |    "metadata": {},
220 |    "source": [
221 |     "##### Playback the mp3 file"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 11,
227 |    "metadata": {},
228 |    "outputs": [
229 |     {
230 |      "data": {
231 |       "text/plain": [
232 |        "0"
233 |       ]
234 |      },
235 |      "execution_count": 11,
236 |      "metadata": {},
237 |      "output_type": "execute_result"
238 |     }
239 |    ],
240 |    "source": [
241 |     "os.system(\"start Text1.mp3\")"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "markdown",
246 |    "metadata": {},
247 |    "source": [
248 |     "For more details refer [KDnuggets](https://www.kdnuggets.com/2020/05/easy-text-speech-python.html)"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "markdown",
253 |    "metadata": {},
254 |    "source": [
255 |     "## Text from an online source to audio"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": 12,
261 |    "metadata": {},
262 |    "outputs": [],
263 |    "source": [
264 |     "#Loading libraries\n",
265 |     "from newspaper import Article\n",
266 |     "import nltk"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "markdown",
271 |    "metadata": {},
272 |    "source": [
273 |     "##### Input online article link"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 13,
279 |    "metadata": {},
280 |    "outputs": [],
281 |    "source": [
282 |     "article = Article('https://bernardmarr.com/default.asp?contentID=2066')"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "markdown",
287 |    "metadata": {},
288 |    "source": [
289 |     "##### Download & Parse the article"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": 14,
295 |    "metadata": {},
296 |    "outputs": [],
297 |    "source": [
298 |     "article.download()\n",
299 |     "article.parse()"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "markdown",
304 |    "metadata": {},
305 |    "source": [
306 |     "##### Download the ‘punkt’ package"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "code",
311 |    "execution_count": 15,
312 |    "metadata": {},
313 |    "outputs": [
314 |     {
315 |      "name": "stderr",
316 |      "output_type": "stream",
317 |      "text": [
318 |       "[nltk_data] Downloading package punkt to\n",
319 |       "[nltk_data]     C:\\Users\\arock.000\\AppData\\Roaming\\nltk_data...\n",
320 |       "[nltk_data]   Package punkt is already up-to-date!\n"
321 |      ]
322 |     },
323 |     {
324 |      "data": {
325 |       "text/plain": [
326 |        "True"
327 |       ]
328 |      },
329 |      "execution_count": 15,
330 |      "metadata": {},
331 |      "output_type": "execute_result"
332 |     }
333 |    ],
334 |    "source": [
335 |     "nltk.download('punkt')"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "markdown",
340 |    "metadata": {},
341 |    "source": [
342 |     "##### Implement Natural Language Processing (NLP)"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": 16,
348 |    "metadata": {},
349 |    "outputs": [],
350 |    "source": [
351 |     "article.nlp()"
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "markdown",
356 |    "metadata": {},
357 |    "source": [
358 |     "##### Creating a variable and storing the article's text in it"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": 17,
364 |    "metadata": {},
365 |    "outputs": [],
366 |    "source": [
367 |     "art_text = article.text"
368 |    ]
369 |   },
370 |   {
371 |    "cell_type": "markdown",
372 |    "metadata": {},
373 |    "source": [
374 |     "##### Language selection"
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "code",
379 |    "execution_count": 18,
380 |    "metadata": {},
381 |    "outputs": [],
382 |    "source": [
383 |     "language = 'en' #English"
384 |    ]
385 |   },
386 |   {
387 |    "cell_type": "markdown",
388 |    "metadata": {},
389 |    "source": [
390 |     "##### Passing the text file to the module"
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "code",
395 |    "execution_count": 19,
396 |    "metadata": {},
397 |    "outputs": [],
398 |    "source": [
399 |     "aiarticle = gTTS(text=art_text, lang=language, slow=False)"
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "execution_count": 20,
405 |    "metadata": {},
406 |    "outputs": [],
407 |    "source": [
408 |     "aiarticle .save(\"ai_article.mp3\")"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "code",
413 |    "execution_count": 21,
414 |    "metadata": {},
415 |    "outputs": [
416 |     {
417 |      "data": {
418 |       "text/plain": [
419 |        "0"
420 |       ]
421 |      },
422 |      "execution_count": 21,
423 |      "metadata": {},
424 |      "output_type": "execute_result"
425 |     }
426 |    ],
427 |    "source": [
428 |     "# Playing the converted file\n",
429 |     "os.system(\"start ai_article.mp3\")"
430 |    ]
431 |   },
432 |   {
433 |    "cell_type": "markdown",
434 |    "metadata": {},
435 |    "source": [
436 |     "[Reference](https://medium.com/@randerson112358/build-a-text-to-speech-program-using-python-b70de7105383)"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "markdown",
441 |    "metadata": {},
442 |    "source": [
443 |     "# Speech to Text"
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "markdown",
448 |    "metadata": {},
449 |    "source": [
450 |     "[Speech Recognition](https://en.wikipedia.org/wiki/Speech_recognition)"
451 |    ]
452 |   },
453 |   {
454 |    "cell_type": "markdown",
455 |    "metadata": {},
456 |    "source": [
457 |     "#### Libraries to install\n",
458 |     "\n",
459 |     " - `!pip install speechrecognition`\n",
460 |     " - `!pip install pyttsx3`\n",
461 |     " - `!pip install pipwin`\n",
462 |     " - `!pipwin install pyaudio`\n",
463 |     "\n",
464 |     "- [Documentation for SpeechRecognition Library](https://pypi.org/project/SpeechRecognition)\n",
465 |     " \n",
466 |     "##### Errors\n",
467 |     "- [Pyaudio Installation Errors](https://stackoverflow.com/questions/53866104/pyaudio-failed-to-install-windows-10)"
468 |    ]
469 |   },
470 |   {
471 |    "cell_type": "code",
472 |    "execution_count": 22,
473 |    "metadata": {},
474 |    "outputs": [
475 |     {
476 |      "name": "stdout",
477 |      "output_type": "stream",
478 |      "text": [
479 |       "Talk\n",
480 |       "Time over, thanks\n",
481 |       "Text: speech recognition system basically translate spoken languages into text one of the classic example is Apple Siri thank you\n"
482 |      ]
483 |     }
484 |    ],
485 |    "source": [
486 |     "#import library\n",
487 |     "\n",
488 |     "import speech_recognition as sr\n",
489 |     "\n",
490 |     "# Initialize recognizer to recognize the speech\n",
491 |     "\n",
492 |     "r = sr.Recognizer()\n",
493 |     "\n",
494 |     "# Reading microphone, listeing and storing it to a variable\n",
495 |     "\n",
496 |     "with sr.Microphone() as source:\n",
497 |     "    print(\"Talk\")\n",
498 |     "    audio_text = r.listen(source)\n",
499 |     "    print(\"Time over, thanks\")\n",
500 |     "# recoginize_() method will throw a request error if the API is unreachable, hence using exception handling\n",
501 |     "    \n",
502 |     "    try:\n",
503 |     "        # using google speech recognition\n",
504 |     "        print(\"Text: \"+r.recognize_google(audio_text))\n",
505 |     "    except:\n",
506 |     "         print(\"Sorry, I did not get that\")"
507 |    ]
508 |   },
509 |   {
510 |    "cell_type": "markdown",
511 |    "metadata": {},
512 |    "source": [
513 |     "#### Reference:\n",
514 |     "- [SpeechtoText](https://www.geeksforgeeks.org/python-convert-speech-to-text-and-text-to-speech/)\n",
515 |     "- [Google Language Support](https://cloud.google.com/speech-to-text/docs/languages)\n",
516 |     "- [More](https://stackabuse.com/introduction-to-speech-recognition-with-python/)"
517 |    ]
518 |   },
519 |   {
520 |    "cell_type": "code",
521 |    "execution_count": null,
522 |    "metadata": {},
523 |    "outputs": [],
524 |    "source": []
525 |   }
526 |  ],
527 |  "metadata": {
528 |   "kernelspec": {
529 |    "display_name": "Python 3",
530 |    "language": "python",
531 |    "name": "python3"
532 |   },
533 |   "language_info": {
534 |    "codemirror_mode": {
535 |     "name": "ipython",
536 |     "version": 3
537 |    },
538 |    "file_extension": ".py",
539 |    "mimetype": "text/x-python",
540 |    "name": "python",
541 |    "nbconvert_exporter": "python",
542 |    "pygments_lexer": "ipython3",
543 |    "version": "3.7.4"
544 |   }
545 |  },
546 |  "nbformat": 4,
547 |  "nbformat_minor": 2
548 | }
549 | 


--------------------------------------------------------------------------------