├── 0. datasets link.txt ├── 2.1. Google_Colaboratory_Basics.ipynb ├── 2.2. Python_Basics.ipynb ├── 2.3. Basic_Data_Types_in_Python.ipynb ├── 2.4. List_Tuple_Set_Dictionary.ipynb ├── 2.5. Operators_in_Python.ipynb ├── 2.6. if_else_statement_in_Python.ipynb ├── 2.7. Loops_in_Python.ipynb ├── 2.8. Functions_in_Python.ipynb ├── 3.1.Complete_Numpy_Tutorial_in_Python.ipynb ├── 3.2. Complete_Pandas_Tutorial_in_Python.ipynb ├── 3.3. Matplotlib_Tutorial_in_Python.ipynb ├── 3.4. Seaborn_Tutorial_in_Python.ipynb ├── 4.10. Text_Data_Pre_Processing_Use_Case.ipynb ├── 4.2. Importing_Datasets_through_Kaggle_API.ipynb ├── 4.3. Handling_Missing_Values.ipynb ├── 4.4. Data_Standardization.ipynb ├── 4.5. Label_Encoding.ipynb ├── 4.6. Train_Test_Split.ipynb ├── 4.7. Handling_imbalanced_Dataset.ipynb ├── 4.8. Feature_extraction_of_Text_data_using_Tf_idf_Vectorizer.ipynb ├── 4.9. Numerical_Dataset_Pre_Processing_Use_Case.ipynb ├── ML Use Case 1. Rock_vs_Mine_Prediction.ipynb ├── ML Use Case 3. Spam_Mail_Prediction_using_Machine_Learning.ipynb └── ML Use case 2. Diabetes_Prediction.ipynb /0. datasets link.txt: -------------------------------------------------------------------------------- 1 | All Datasets link: https://drive.google.com/drive/folders/1NEs0rpFelfzSWAJ6y832EDpW9ImQH4QJ?usp=sharing 2 | -------------------------------------------------------------------------------- /2.1. Google_Colaboratory_Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "5vIKHkjTlaOT" 21 | }, 22 | "source": [ 23 | "System Specifications" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "metadata": { 29 | "colab": { 30 | "base_uri": "https://localhost:8080/" 31 | }, 32 | "id": "TyiPSq1olCpS", 33 | "outputId": "98abecda-ce70-4bc1-aec6-b6138433c2b7" 34 | }, 35 | "source": [ 36 | "!cat /proc/cpuinfo" 37 | ], 38 | "execution_count": null, 39 | "outputs": [ 40 | { 41 | "output_type": "stream", 42 | "text": [ 43 | "processor\t: 0\n", 44 | "vendor_id\t: GenuineIntel\n", 45 | "cpu family\t: 6\n", 46 | "model\t\t: 63\n", 47 | "model name\t: Intel(R) Xeon(R) CPU @ 2.30GHz\n", 48 | "stepping\t: 0\n", 49 | "microcode\t: 0x1\n", 50 | "cpu MHz\t\t: 2299.998\n", 51 | "cache size\t: 46080 KB\n", 52 | "physical id\t: 0\n", 53 | "siblings\t: 2\n", 54 | "core id\t\t: 0\n", 55 | "cpu cores\t: 1\n", 56 | "apicid\t\t: 0\n", 57 | "initial apicid\t: 0\n", 58 | "fpu\t\t: yes\n", 59 | "fpu_exception\t: yes\n", 60 | "cpuid level\t: 13\n", 61 | "wp\t\t: yes\n", 62 | "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities\n", 63 | "bugs\t\t: cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs\n", 64 | "bogomips\t: 4599.99\n", 65 | "clflush size\t: 64\n", 66 | "cache_alignment\t: 64\n", 67 | "address sizes\t: 46 bits physical, 48 bits virtual\n", 68 | "power management:\n", 69 | "\n", 70 | "processor\t: 1\n", 71 | "vendor_id\t: GenuineIntel\n", 72 | "cpu family\t: 6\n", 73 | "model\t\t: 63\n", 74 | "model name\t: Intel(R) Xeon(R) CPU @ 2.30GHz\n", 75 | "stepping\t: 0\n", 76 | "microcode\t: 0x1\n", 77 | "cpu MHz\t\t: 2299.998\n", 78 | "cache size\t: 46080 KB\n", 79 | "physical id\t: 0\n", 80 | "siblings\t: 2\n", 81 | "core id\t\t: 0\n", 82 | "cpu cores\t: 1\n", 83 | "apicid\t\t: 1\n", 84 | "initial apicid\t: 1\n", 85 | "fpu\t\t: yes\n", 86 | "fpu_exception\t: yes\n", 87 | "cpuid level\t: 13\n", 88 | "wp\t\t: yes\n", 89 | "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities\n", 90 | "bugs\t\t: cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs\n", 91 | "bogomips\t: 4599.99\n", 92 | "clflush size\t: 64\n", 93 | "cache_alignment\t: 64\n", 94 | "address sizes\t: 46 bits physical, 48 bits virtual\n", 95 | "power management:\n", 96 | "\n" 97 | ], 98 | "name": "stdout" 99 | } 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "metadata": { 105 | "colab": { 106 | "base_uri": "https://localhost:8080/" 107 | }, 108 | "id": "0Zun3WHplom1", 109 | "outputId": "bfb6719c-9520-4db4-ae2f-59a71f0f0f8e" 110 | }, 111 | "source": [ 112 | "!cat /proc/meminfo" 113 | ], 114 | "execution_count": null, 115 | "outputs": [ 116 | { 117 | "output_type": "stream", 118 | "text": [ 119 | "MemTotal: 13305332 kB\n", 120 | "MemFree: 10739068 kB\n", 121 | "MemAvailable: 12518448 kB\n", 122 | "Buffers: 83580 kB\n", 123 | "Cached: 1841060 kB\n", 124 | "SwapCached: 0 kB\n", 125 | "Active: 984448 kB\n", 126 | "Inactive: 1350192 kB\n", 127 | "Active(anon): 382636 kB\n", 128 | "Inactive(anon): 416 kB\n", 129 | "Active(file): 601812 kB\n", 130 | "Inactive(file): 1349776 kB\n", 131 | "Unevictable: 0 kB\n", 132 | "Mlocked: 0 kB\n", 133 | "SwapTotal: 0 kB\n", 134 | "SwapFree: 0 kB\n", 135 | "Dirty: 760 kB\n", 136 | "Writeback: 0 kB\n", 137 | "AnonPages: 410028 kB\n", 138 | "Mapped: 226724 kB\n", 139 | "Shmem: 1144 kB\n", 140 | "KReclaimable: 138524 kB\n", 141 | "Slab: 182956 kB\n", 142 | "SReclaimable: 138524 kB\n", 143 | "SUnreclaim: 44432 kB\n", 144 | "KernelStack: 4320 kB\n", 145 | "PageTables: 5524 kB\n", 146 | "NFS_Unstable: 0 kB\n", 147 | "Bounce: 0 kB\n", 148 | "WritebackTmp: 0 kB\n", 149 | "CommitLimit: 6652664 kB\n", 150 | "Committed_AS: 3161348 kB\n", 151 | "VmallocTotal: 34359738367 kB\n", 152 | "VmallocUsed: 6668 kB\n", 153 | "VmallocChunk: 0 kB\n", 154 | "Percpu: 1400 kB\n", 155 | "AnonHugePages: 0 kB\n", 156 | "ShmemHugePages: 0 kB\n", 157 | "ShmemPmdMapped: 0 kB\n", 158 | "FileHugePages: 0 kB\n", 159 | "FilePmdMapped: 0 kB\n", 160 | "HugePages_Total: 0\n", 161 | "HugePages_Free: 0\n", 162 | "HugePages_Rsvd: 0\n", 163 | "HugePages_Surp: 0\n", 164 | "Hugepagesize: 2048 kB\n", 165 | "Hugetlb: 0 kB\n", 166 | "DirectMap4k: 89288 kB\n", 167 | "DirectMap2M: 5152768 kB\n", 168 | "DirectMap1G: 10485760 kB\n" 169 | ], 170 | "name": "stdout" 171 | } 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": { 177 | "id": "dTDRst8jmOLk" 178 | }, 179 | "source": [ 180 | "Installing Libraries" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "metadata": { 186 | "colab": { 187 | "base_uri": "https://localhost:8080/" 188 | }, 189 | "id": "X1CUZFiolw_0", 190 | "outputId": "8a60e585-2168-4a6b-a0c1-772eac5826f7" 191 | }, 192 | "source": [ 193 | "!pip install pandas" 194 | ], 195 | "execution_count": null, 196 | "outputs": [ 197 | { 198 | "output_type": "stream", 199 | "text": [ 200 | "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (1.1.5)\n", 201 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas) (2018.9)\n", 202 | "Requirement already satisfied: numpy>=1.15.4 in /usr/local/lib/python3.7/dist-packages (from pandas) (1.19.5)\n", 203 | "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas) (2.8.1)\n", 204 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n" 205 | ], 206 | "name": "stdout" 207 | } 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "metadata": { 213 | "id": "l0l4wa_amYId" 214 | }, 215 | "source": [ 216 | "import pandas as pd" 217 | ], 218 | "execution_count": null, 219 | "outputs": [] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "metadata": { 224 | "id": "vxc28ALHmenl" 225 | }, 226 | "source": [ 227 | "df = pd.read_csv('/content/BostonHousing.csv')" 228 | ], 229 | "execution_count": null, 230 | "outputs": [] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "metadata": { 235 | "colab": { 236 | "base_uri": "https://localhost:8080/", 237 | "height": 196 238 | }, 239 | "id": "A2ohNqWimrI7", 240 | "outputId": "5df5156d-40a5-49cb-ee63-a8755d4c376b" 241 | }, 242 | "source": [ 243 | "df.head()" 244 | ], 245 | "execution_count": null, 246 | "outputs": [ 247 | { 248 | "output_type": "execute_result", 249 | "data": { 250 | "text/html": [ 251 | "
\n", 252 | "\n", 265 | "\n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | "
crimzninduschasnoxrmagedisradtaxptratioblstatprice
00.0063218.02.3100.5386.57565.24.0900129615.3396.904.9824.0
10.027310.07.0700.4696.42178.94.9671224217.8396.909.1421.6
20.027290.07.0700.4697.18561.14.9671224217.8392.834.0334.7
30.032370.02.1800.4586.99845.86.0622322218.7394.632.9433.4
40.069050.02.1800.4587.14754.26.0622322218.7396.905.3336.2
\n", 373 | "
" 374 | ], 375 | "text/plain": [ 376 | " crim zn indus chas nox ... tax ptratio b lstat price\n", 377 | "0 0.00632 18.0 2.31 0 0.538 ... 296 15.3 396.90 4.98 24.0\n", 378 | "1 0.02731 0.0 7.07 0 0.469 ... 242 17.8 396.90 9.14 21.6\n", 379 | "2 0.02729 0.0 7.07 0 0.469 ... 242 17.8 392.83 4.03 34.7\n", 380 | "3 0.03237 0.0 2.18 0 0.458 ... 222 18.7 394.63 2.94 33.4\n", 381 | "4 0.06905 0.0 2.18 0 0.458 ... 222 18.7 396.90 5.33 36.2\n", 382 | "\n", 383 | "[5 rows x 14 columns]" 384 | ] 385 | }, 386 | "metadata": { 387 | "tags": [] 388 | }, 389 | "execution_count": 6 390 | } 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "metadata": { 396 | "colab": { 397 | "base_uri": "https://localhost:8080/" 398 | }, 399 | "id": "wN1PzbbKmu4i", 400 | "outputId": "963c27ce-c37c-4315-f122-3af59a3a414b" 401 | }, 402 | "source": [ 403 | "print('Machine Learning')" 404 | ], 405 | "execution_count": null, 406 | "outputs": [ 407 | { 408 | "output_type": "stream", 409 | "text": [ 410 | "Machine Learning\n" 411 | ], 412 | "name": "stdout" 413 | } 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "metadata": { 419 | "colab": { 420 | "base_uri": "https://localhost:8080/" 421 | }, 422 | "id": "j6NenuZfm2if", 423 | "outputId": "585f53e5-faa6-405b-b75b-c22bbebb13e2" 424 | }, 425 | "source": [ 426 | "!ls" 427 | ], 428 | "execution_count": null, 429 | "outputs": [ 430 | { 431 | "output_type": "stream", 432 | "text": [ 433 | "BostonHousing.csv sample_data\n" 434 | ], 435 | "name": "stdout" 436 | } 437 | ] 438 | } 439 | ] 440 | } -------------------------------------------------------------------------------- /2.2. Python_Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "collapsed_sections": [ 8 | "-WvGy_yEMZqy", 9 | "VoF-mwOrNd47", 10 | "TF5JGYWwOCZ9", 11 | "0Z9JChvaPpsm" 12 | ] 13 | }, 14 | "kernelspec": { 15 | "name": "python3", 16 | "display_name": "Python 3" 17 | } 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "Osmh3yTEEv9q" 24 | }, 25 | "source": [ 26 | "Programming Languages used for Machine Learning:\n", 27 | "\n", 28 | "\n", 29 | "1. Python\n", 30 | "2. R\n", 31 | "\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": { 37 | "id": "-WvGy_yEMZqy" 38 | }, 39 | "source": [ 40 | "#print function" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "metadata": { 46 | "id": "GtMDf70aAf6y", 47 | "colab": { 48 | "base_uri": "https://localhost:8080/" 49 | }, 50 | "outputId": "2490e348-2413-4793-94fc-6b59a3d16b6d" 51 | }, 52 | "source": [ 53 | "print(\"Machine Learning\")" 54 | ], 55 | "execution_count": null, 56 | "outputs": [ 57 | { 58 | "output_type": "stream", 59 | "text": [ 60 | "Machine Learning\n" 61 | ], 62 | "name": "stdout" 63 | } 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "metadata": { 69 | "colab": { 70 | "base_uri": "https://localhost:8080/" 71 | }, 72 | "id": "jUmCKDnfMwQI", 73 | "outputId": "a4fdd49f-7662-4fed-87f6-9a447485b8c3" 74 | }, 75 | "source": [ 76 | "print(\"Machine Learning\" + \" Projects\")" 77 | ], 78 | "execution_count": null, 79 | "outputs": [ 80 | { 81 | "output_type": "stream", 82 | "text": [ 83 | "Machine Learning Projects\n" 84 | ], 85 | "name": "stdout" 86 | } 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "metadata": { 92 | "colab": { 93 | "base_uri": "https://localhost:8080/" 94 | }, 95 | "id": "cALbaqqGNBHZ", 96 | "outputId": "dd9ff49b-cc05-419d-8b5b-4fe0c2389de3" 97 | }, 98 | "source": [ 99 | "print(8)" 100 | ], 101 | "execution_count": null, 102 | "outputs": [ 103 | { 104 | "output_type": "stream", 105 | "text": [ 106 | "8\n" 107 | ], 108 | "name": "stdout" 109 | } 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "metadata": { 115 | "colab": { 116 | "base_uri": "https://localhost:8080/" 117 | }, 118 | "id": "VIIKdqyhNJct", 119 | "outputId": "363e9cc6-6a7a-4c14-ffde-3550abd9cb64" 120 | }, 121 | "source": [ 122 | "print(8+3)" 123 | ], 124 | "execution_count": null, 125 | "outputs": [ 126 | { 127 | "output_type": "stream", 128 | "text": [ 129 | "11\n" 130 | ], 131 | "name": "stdout" 132 | } 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": { 138 | "id": "VoF-mwOrNd47" 139 | }, 140 | "source": [ 141 | "#Basic Data types:\n", 142 | "\n", 143 | "\n", 144 | "1. int\n", 145 | "2. float\n", 146 | "3. str\n", 147 | "\n", 148 | "\n", 149 | "\n" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "metadata": { 155 | "colab": { 156 | "base_uri": "https://localhost:8080/" 157 | }, 158 | "id": "c2C2CWT2NpAD", 159 | "outputId": "693ca04f-08c3-485f-dfe4-b81611cc7d12" 160 | }, 161 | "source": [ 162 | "type(8)" 163 | ], 164 | "execution_count": null, 165 | "outputs": [ 166 | { 167 | "output_type": "execute_result", 168 | "data": { 169 | "text/plain": [ 170 | "int" 171 | ] 172 | }, 173 | "metadata": { 174 | "tags": [] 175 | }, 176 | "execution_count": 10 177 | } 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "metadata": { 183 | "colab": { 184 | "base_uri": "https://localhost:8080/" 185 | }, 186 | "id": "ruZzAZODN171", 187 | "outputId": "94a876c3-3bcd-4bc1-85a2-5b10be2d9646" 188 | }, 189 | "source": [ 190 | "type(5.3)" 191 | ], 192 | "execution_count": null, 193 | "outputs": [ 194 | { 195 | "output_type": "execute_result", 196 | "data": { 197 | "text/plain": [ 198 | "float" 199 | ] 200 | }, 201 | "metadata": { 202 | "tags": [] 203 | }, 204 | "execution_count": 11 205 | } 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "metadata": { 211 | "colab": { 212 | "base_uri": "https://localhost:8080/" 213 | }, 214 | "id": "UM8_vIwfN4dn", 215 | "outputId": "a58c3967-088c-4b38-c280-dce8831cc6aa" 216 | }, 217 | "source": [ 218 | "type(\"english\")" 219 | ], 220 | "execution_count": null, 221 | "outputs": [ 222 | { 223 | "output_type": "execute_result", 224 | "data": { 225 | "text/plain": [ 226 | "str" 227 | ] 228 | }, 229 | "metadata": { 230 | "tags": [] 231 | }, 232 | "execution_count": 12 233 | } 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": { 239 | "id": "TF5JGYWwOCZ9" 240 | }, 241 | "source": [ 242 | "#Constants & Variables" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "metadata": { 248 | "colab": { 249 | "base_uri": "https://localhost:8080/" 250 | }, 251 | "id": "ln8ikHIZN8mZ", 252 | "outputId": "79f115f1-fe00-4124-c487-7f24919d75e7" 253 | }, 254 | "source": [ 255 | "marvel_super_hero = \"Iron Man\"\n", 256 | "print(marvel_super_hero)" 257 | ], 258 | "execution_count": null, 259 | "outputs": [ 260 | { 261 | "output_type": "stream", 262 | "text": [ 263 | "Iron Man\n" 264 | ], 265 | "name": "stdout" 266 | } 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "metadata": { 272 | "colab": { 273 | "base_uri": "https://localhost:8080/" 274 | }, 275 | "id": "2MT4Km5gOecW", 276 | "outputId": "586a1876-c3a0-4fbb-c05b-2f7ac065fc9e" 277 | }, 278 | "source": [ 279 | "marvel_super_hero = \"Captain America\"\n", 280 | "print(marvel_super_hero)" 281 | ], 282 | "execution_count": null, 283 | "outputs": [ 284 | { 285 | "output_type": "stream", 286 | "text": [ 287 | "Captain America\n" 288 | ], 289 | "name": "stdout" 290 | } 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "metadata": { 296 | "colab": { 297 | "base_uri": "https://localhost:8080/" 298 | }, 299 | "id": "26ZG7J36O1Gm", 300 | "outputId": "4a1473ac-f3ec-4b72-fce0-584e3bd2c5d6" 301 | }, 302 | "source": [ 303 | "hero1 , hero2 , hero3 = \"Iron Man\" , \"Captain America\" , \"Bat Man\"\n", 304 | "print(hero1)\n", 305 | "print(hero2)\n", 306 | "print(hero3)" 307 | ], 308 | "execution_count": null, 309 | "outputs": [ 310 | { 311 | "output_type": "stream", 312 | "text": [ 313 | "Iron Man\n", 314 | "Captain America\n", 315 | "Bat Man\n" 316 | ], 317 | "name": "stdout" 318 | } 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "metadata": { 324 | "colab": { 325 | "base_uri": "https://localhost:8080/" 326 | }, 327 | "id": "Y9nqEMNPPM26", 328 | "outputId": "283f2b35-9761-4baa-fd9b-75842416d2a3" 329 | }, 330 | "source": [ 331 | "x = y = z = 23\n", 332 | "print(x)\n", 333 | "print(y)\n", 334 | "print(z)" 335 | ], 336 | "execution_count": null, 337 | "outputs": [ 338 | { 339 | "output_type": "stream", 340 | "text": [ 341 | "23\n", 342 | "23\n", 343 | "23\n" 344 | ], 345 | "name": "stdout" 346 | } 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": { 352 | "id": "0Z9JChvaPpsm" 353 | }, 354 | "source": [ 355 | "# input Function" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "metadata": { 361 | "colab": { 362 | "base_uri": "https://localhost:8080/" 363 | }, 364 | "id": "roZJU-IXPrvk", 365 | "outputId": "44179667-ddfc-4d42-bf5b-606397aea2ee" 366 | }, 367 | "source": [ 368 | "number_1 = int(input(\"Enter the first number : \"))\n", 369 | "number_2 = int(input(\"Enter the second number : \"))\n", 370 | "\n", 371 | "sum = number_1 + number_2\n", 372 | "print(sum)" 373 | ], 374 | "execution_count": null, 375 | "outputs": [ 376 | { 377 | "output_type": "stream", 378 | "text": [ 379 | "Enter the first number : 23\n", 380 | "Enter the second number : 23\n", 381 | "46\n" 382 | ], 383 | "name": "stdout" 384 | } 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "metadata": { 390 | "colab": { 391 | "base_uri": "https://localhost:8080/" 392 | }, 393 | "id": "HKcz9StaP-PF", 394 | "outputId": "001eeff8-e36b-4286-a12a-e26e54ab7ed9" 395 | }, 396 | "source": [ 397 | "# changing the data type in python:\n", 398 | "num = 5\n", 399 | "print(float(num))\n" 400 | ], 401 | "execution_count": null, 402 | "outputs": [ 403 | { 404 | "output_type": "stream", 405 | "text": [ 406 | "5.0\n" 407 | ], 408 | "name": "stdout" 409 | } 410 | ] 411 | } 412 | ] 413 | } -------------------------------------------------------------------------------- /2.3. Basic_Data_Types_in_Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "apWclQaRyr9i" 21 | }, 22 | "source": [ 23 | "Basic Data Types in Python:\n", 24 | "1. Integer\n", 25 | "2. Floating Point\n", 26 | "3. Complex\n", 27 | "4. Boolean\n", 28 | "5. String" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "metadata": { 34 | "colab": { 35 | "base_uri": "https://localhost:8080/" 36 | }, 37 | "id": "uQkDMpadyPlK", 38 | "outputId": "c17f5897-256e-46cc-c418-6b92928ce0b1" 39 | }, 40 | "source": [ 41 | "# integers\n", 42 | "a = 8\n", 43 | "print(a)" 44 | ], 45 | "execution_count": null, 46 | "outputs": [ 47 | { 48 | "output_type": "stream", 49 | "text": [ 50 | "8\n" 51 | ], 52 | "name": "stdout" 53 | } 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "metadata": { 59 | "colab": { 60 | "base_uri": "https://localhost:8080/" 61 | }, 62 | "id": "ckHsQXyk0eel", 63 | "outputId": "f7aa3afe-16f9-4e72-a669-8d0fb0524b62" 64 | }, 65 | "source": [ 66 | "type(a)" 67 | ], 68 | "execution_count": null, 69 | "outputs": [ 70 | { 71 | "output_type": "execute_result", 72 | "data": { 73 | "text/plain": [ 74 | "int" 75 | ] 76 | }, 77 | "metadata": { 78 | "tags": [] 79 | }, 80 | "execution_count": 2 81 | } 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "metadata": { 87 | "colab": { 88 | "base_uri": "https://localhost:8080/" 89 | }, 90 | "id": "xVOI4yFx0mhC", 91 | "outputId": "db02e23c-dd5f-4e71-80ba-3a8148a6cd74" 92 | }, 93 | "source": [ 94 | "# floating point\n", 95 | "b = 2.3\n", 96 | "print(b)\n", 97 | "type(b)" 98 | ], 99 | "execution_count": null, 100 | "outputs": [ 101 | { 102 | "output_type": "stream", 103 | "text": [ 104 | "2.3\n" 105 | ], 106 | "name": "stdout" 107 | }, 108 | { 109 | "output_type": "execute_result", 110 | "data": { 111 | "text/plain": [ 112 | "float" 113 | ] 114 | }, 115 | "metadata": { 116 | "tags": [] 117 | }, 118 | "execution_count": 3 119 | } 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "metadata": { 125 | "colab": { 126 | "base_uri": "https://localhost:8080/" 127 | }, 128 | "id": "WzWhjFIE0vkS", 129 | "outputId": "73623a3d-f2b4-4003-a7c9-4bdaa7d9a1ac" 130 | }, 131 | "source": [ 132 | "# complex numbers\n", 133 | "c = 1 + 3j\n", 134 | "print(c)\n", 135 | "type(c)" 136 | ], 137 | "execution_count": null, 138 | "outputs": [ 139 | { 140 | "output_type": "stream", 141 | "text": [ 142 | "(1+3j)\n" 143 | ], 144 | "name": "stdout" 145 | }, 146 | { 147 | "output_type": "execute_result", 148 | "data": { 149 | "text/plain": [ 150 | "complex" 151 | ] 152 | }, 153 | "metadata": { 154 | "tags": [] 155 | }, 156 | "execution_count": 4 157 | } 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": { 163 | "id": "71h0zZTz1Cl7" 164 | }, 165 | "source": [ 166 | "Conversion of one data type to another" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "metadata": { 172 | "colab": { 173 | "base_uri": "https://localhost:8080/" 174 | }, 175 | "id": "gx311AYb09AG", 176 | "outputId": "247fa75c-199c-4f28-cd39-280d974a8fef" 177 | }, 178 | "source": [ 179 | "# int to float\n", 180 | "x = 10\n", 181 | "print(x)\n", 182 | "type(x)" 183 | ], 184 | "execution_count": null, 185 | "outputs": [ 186 | { 187 | "output_type": "stream", 188 | "text": [ 189 | "10\n" 190 | ], 191 | "name": "stdout" 192 | }, 193 | { 194 | "output_type": "execute_result", 195 | "data": { 196 | "text/plain": [ 197 | "int" 198 | ] 199 | }, 200 | "metadata": { 201 | "tags": [] 202 | }, 203 | "execution_count": 5 204 | } 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "metadata": { 210 | "colab": { 211 | "base_uri": "https://localhost:8080/" 212 | }, 213 | "id": "w_XTYMqg1MWk", 214 | "outputId": "016aba8f-9d9e-43d9-8a5e-f9f0c07d144c" 215 | }, 216 | "source": [ 217 | "y = float(x)\n", 218 | "print(y)\n", 219 | "type(y)" 220 | ], 221 | "execution_count": null, 222 | "outputs": [ 223 | { 224 | "output_type": "stream", 225 | "text": [ 226 | "10.0\n" 227 | ], 228 | "name": "stdout" 229 | }, 230 | { 231 | "output_type": "execute_result", 232 | "data": { 233 | "text/plain": [ 234 | "float" 235 | ] 236 | }, 237 | "metadata": { 238 | "tags": [] 239 | }, 240 | "execution_count": 6 241 | } 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "metadata": { 247 | "colab": { 248 | "base_uri": "https://localhost:8080/" 249 | }, 250 | "id": "hE3F0HUx1XnD", 251 | "outputId": "3483c6b6-c42c-47a8-ad5f-b0b2daf04d15" 252 | }, 253 | "source": [ 254 | "# float to int\n", 255 | "x = 5.88\n", 256 | "print(x)\n", 257 | "type(x)" 258 | ], 259 | "execution_count": null, 260 | "outputs": [ 261 | { 262 | "output_type": "stream", 263 | "text": [ 264 | "5.88\n" 265 | ], 266 | "name": "stdout" 267 | }, 268 | { 269 | "output_type": "execute_result", 270 | "data": { 271 | "text/plain": [ 272 | "float" 273 | ] 274 | }, 275 | "metadata": { 276 | "tags": [] 277 | }, 278 | "execution_count": 7 279 | } 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "metadata": { 285 | "colab": { 286 | "base_uri": "https://localhost:8080/" 287 | }, 288 | "id": "7DQnV6kO1jG1", 289 | "outputId": "88458029-b5de-4291-d443-20f5cce74ff9" 290 | }, 291 | "source": [ 292 | "y = int(x)\n", 293 | "print(y)\n", 294 | "type(y)" 295 | ], 296 | "execution_count": null, 297 | "outputs": [ 298 | { 299 | "output_type": "stream", 300 | "text": [ 301 | "5\n" 302 | ], 303 | "name": "stdout" 304 | }, 305 | { 306 | "output_type": "execute_result", 307 | "data": { 308 | "text/plain": [ 309 | "int" 310 | ] 311 | }, 312 | "metadata": { 313 | "tags": [] 314 | }, 315 | "execution_count": 8 316 | } 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": { 322 | "id": "XkbofyMg103l" 323 | }, 324 | "source": [ 325 | "Boolean" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": { 331 | "id": "-ZIfAmQ9121L" 332 | }, 333 | "source": [ 334 | "1. True\n", 335 | "2. False" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "metadata": { 341 | "colab": { 342 | "base_uri": "https://localhost:8080/" 343 | }, 344 | "id": "t_qqKUR91qEX", 345 | "outputId": "da4017f7-65b3-4832-f182-78032f939863" 346 | }, 347 | "source": [ 348 | "a = True\n", 349 | "print(a)\n", 350 | "type(a)" 351 | ], 352 | "execution_count": null, 353 | "outputs": [ 354 | { 355 | "output_type": "stream", 356 | "text": [ 357 | "True\n" 358 | ], 359 | "name": "stdout" 360 | }, 361 | { 362 | "output_type": "execute_result", 363 | "data": { 364 | "text/plain": [ 365 | "bool" 366 | ] 367 | }, 368 | "metadata": { 369 | "tags": [] 370 | }, 371 | "execution_count": 9 372 | } 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "metadata": { 378 | "colab": { 379 | "base_uri": "https://localhost:8080/" 380 | }, 381 | "id": "43oxnDeO2BLR", 382 | "outputId": "0914b00f-efcf-4ab0-e357-420e5a9c91f0" 383 | }, 384 | "source": [ 385 | "b = False\n", 386 | "print(b)\n", 387 | "type(b)" 388 | ], 389 | "execution_count": null, 390 | "outputs": [ 391 | { 392 | "output_type": "stream", 393 | "text": [ 394 | "False\n" 395 | ], 396 | "name": "stdout" 397 | }, 398 | { 399 | "output_type": "execute_result", 400 | "data": { 401 | "text/plain": [ 402 | "bool" 403 | ] 404 | }, 405 | "metadata": { 406 | "tags": [] 407 | }, 408 | "execution_count": 10 409 | } 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "metadata": { 415 | "colab": { 416 | "base_uri": "https://localhost:8080/" 417 | }, 418 | "id": "Sgjm0EoQ2GzH", 419 | "outputId": "cc029bff-4376-4ac6-d161-76a82352ea71" 420 | }, 421 | "source": [ 422 | "a = 7 < 3\n", 423 | "print(a)\n", 424 | "type(a)" 425 | ], 426 | "execution_count": null, 427 | "outputs": [ 428 | { 429 | "output_type": "stream", 430 | "text": [ 431 | "False\n" 432 | ], 433 | "name": "stdout" 434 | }, 435 | { 436 | "output_type": "execute_result", 437 | "data": { 438 | "text/plain": [ 439 | "bool" 440 | ] 441 | }, 442 | "metadata": { 443 | "tags": [] 444 | }, 445 | "execution_count": 12 446 | } 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "metadata": { 452 | "colab": { 453 | "base_uri": "https://localhost:8080/" 454 | }, 455 | "id": "dUfCMr5h2Vly", 456 | "outputId": "676f19ce-6b97-4ab5-d872-1c868a15d668" 457 | }, 458 | "source": [ 459 | "a = 7 > 3\n", 460 | "print(a)\n", 461 | "type(a)" 462 | ], 463 | "execution_count": null, 464 | "outputs": [ 465 | { 466 | "output_type": "stream", 467 | "text": [ 468 | "True\n" 469 | ], 470 | "name": "stdout" 471 | }, 472 | { 473 | "output_type": "execute_result", 474 | "data": { 475 | "text/plain": [ 476 | "bool" 477 | ] 478 | }, 479 | "metadata": { 480 | "tags": [] 481 | }, 482 | "execution_count": 13 483 | } 484 | ] 485 | }, 486 | { 487 | "cell_type": "markdown", 488 | "metadata": { 489 | "id": "duVUENIZ2jwq" 490 | }, 491 | "source": [ 492 | "String" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "metadata": { 498 | "colab": { 499 | "base_uri": "https://localhost:8080/" 500 | }, 501 | "id": "e_LEA7R52fUz", 502 | "outputId": "ccb4be4a-8b78-4395-9bd4-9e16322e5c5e" 503 | }, 504 | "source": [ 505 | "print(\"Machine Learning\")" 506 | ], 507 | "execution_count": null, 508 | "outputs": [ 509 | { 510 | "output_type": "stream", 511 | "text": [ 512 | "Machine Learning\n" 513 | ], 514 | "name": "stdout" 515 | } 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "metadata": { 521 | "colab": { 522 | "base_uri": "https://localhost:8080/" 523 | }, 524 | "id": "CSs8QHe32oP1", 525 | "outputId": "7ec108ea-42cd-49dd-d1bf-b17038632c60" 526 | }, 527 | "source": [ 528 | "print('Machine Learning')" 529 | ], 530 | "execution_count": null, 531 | "outputs": [ 532 | { 533 | "output_type": "stream", 534 | "text": [ 535 | "Machine Learning\n" 536 | ], 537 | "name": "stdout" 538 | } 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "metadata": { 544 | "colab": { 545 | "base_uri": "https://localhost:8080/" 546 | }, 547 | "id": "H3ye_W-52tJL", 548 | "outputId": "957d63a3-289d-4c9c-92a4-8c6d309fb868" 549 | }, 550 | "source": [ 551 | "my_string = \"Machine Learning\"\n", 552 | "print(my_string)\n", 553 | "type(my_string)" 554 | ], 555 | "execution_count": null, 556 | "outputs": [ 557 | { 558 | "output_type": "stream", 559 | "text": [ 560 | "Machine Learning\n" 561 | ], 562 | "name": "stdout" 563 | }, 564 | { 565 | "output_type": "execute_result", 566 | "data": { 567 | "text/plain": [ 568 | "str" 569 | ] 570 | }, 571 | "metadata": { 572 | "tags": [] 573 | }, 574 | "execution_count": 16 575 | } 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "metadata": { 581 | "colab": { 582 | "base_uri": "https://localhost:8080/" 583 | }, 584 | "id": "mZ9rRFBv29c6", 585 | "outputId": "53d15686-897e-4f86-94ac-971585df85b2" 586 | }, 587 | "source": [ 588 | "print(\"Hello\"*5)" 589 | ], 590 | "execution_count": null, 591 | "outputs": [ 592 | { 593 | "output_type": "stream", 594 | "text": [ 595 | "HelloHelloHelloHelloHello\n" 596 | ], 597 | "name": "stdout" 598 | } 599 | ] 600 | }, 601 | { 602 | "cell_type": "markdown", 603 | "metadata": { 604 | "id": "8uj-eOPM3LEX" 605 | }, 606 | "source": [ 607 | "Slicing" 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "metadata": { 613 | "id": "mKF6ngiV3FiZ" 614 | }, 615 | "source": [ 616 | "my_string = \"Programming\"" 617 | ], 618 | "execution_count": null, 619 | "outputs": [] 620 | }, 621 | { 622 | "cell_type": "code", 623 | "metadata": { 624 | "colab": { 625 | "base_uri": "https://localhost:8080/" 626 | }, 627 | "id": "Rj636qLe3QUi", 628 | "outputId": "dd0f7061-7411-42ff-b879-7185e1502775" 629 | }, 630 | "source": [ 631 | "print(my_string[1:5]) # values from index 1 to 5-1 will be sliced" 632 | ], 633 | "execution_count": null, 634 | "outputs": [ 635 | { 636 | "output_type": "stream", 637 | "text": [ 638 | "rogr\n" 639 | ], 640 | "name": "stdout" 641 | } 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "metadata": { 647 | "colab": { 648 | "base_uri": "https://localhost:8080/" 649 | }, 650 | "id": "SeLPwiOx3kri", 651 | "outputId": "7e6e534b-98aa-49d8-d7b5-bca91f4cf2cb" 652 | }, 653 | "source": [ 654 | "# step\n", 655 | "print(my_string[0:10:2])" 656 | ], 657 | "execution_count": null, 658 | "outputs": [ 659 | { 660 | "output_type": "stream", 661 | "text": [ 662 | "Pormi\n" 663 | ], 664 | "name": "stdout" 665 | } 666 | ] 667 | }, 668 | { 669 | "cell_type": "markdown", 670 | "metadata": { 671 | "id": "NxwxzlVV4HgP" 672 | }, 673 | "source": [ 674 | "String Concatenation" 675 | ] 676 | }, 677 | { 678 | "cell_type": "code", 679 | "metadata": { 680 | "colab": { 681 | "base_uri": "https://localhost:8080/" 682 | }, 683 | "id": "TYqgwM5U390H", 684 | "outputId": "f1b883f4-3f34-4d8e-8b15-e44ffcfbd645" 685 | }, 686 | "source": [ 687 | "word_1 = 'Machine '\n", 688 | "word_2 = 'Learning'\n", 689 | "\n", 690 | "print(word_1+word_2)" 691 | ], 692 | "execution_count": null, 693 | "outputs": [ 694 | { 695 | "output_type": "stream", 696 | "text": [ 697 | "Machine Learning\n" 698 | ], 699 | "name": "stdout" 700 | } 701 | ] 702 | } 703 | ] 704 | } -------------------------------------------------------------------------------- /2.4. List_Tuple_Set_Dictionary.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "collapsed_sections": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | } 13 | }, 14 | "cells": [ 15 | { 16 | "cell_type": "markdown", 17 | "metadata": { 18 | "id": "M6FYZhxWa-2t" 19 | }, 20 | "source": [ 21 | "Types of Objects in Python:\n", 22 | "1. Immutable Objects\n", 23 | "2. Mutable Objects" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": { 29 | "id": "jtGu0nbsbDOW" 30 | }, 31 | "source": [ 32 | "Immutable Objects:\n", 33 | "1. int\n", 34 | "2. float\n", 35 | "3. string\n", 36 | "4. bool\n", 37 | "5. tuple" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": { 43 | "id": "ekg3pl-kbFQR" 44 | }, 45 | "source": [ 46 | "Mutable Objects:\n", 47 | "1. List\n", 48 | "2. Set \n", 49 | "3. Dictionary" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": { 55 | "id": "yLbT5WmhfD1C" 56 | }, 57 | "source": [ 58 | "List" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "metadata": { 64 | "colab": { 65 | "base_uri": "https://localhost:8080/" 66 | }, 67 | "id": "UMGYv02qa8yY", 68 | "outputId": "ab9a8c00-da19-43c0-9f40-8909b5f36116" 69 | }, 70 | "source": [ 71 | "# list should be included in the square brackets\n", 72 | "my_list = [1,2,3,4,5]\n", 73 | "print(my_list)\n", 74 | "type(my_list)" 75 | ], 76 | "execution_count": null, 77 | "outputs": [ 78 | { 79 | "output_type": "stream", 80 | "text": [ 81 | "[1, 2, 3, 4, 5]\n" 82 | ], 83 | "name": "stdout" 84 | }, 85 | { 86 | "output_type": "execute_result", 87 | "data": { 88 | "text/plain": [ 89 | "list" 90 | ] 91 | }, 92 | "metadata": { 93 | "tags": [] 94 | }, 95 | "execution_count": 1 96 | } 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "metadata": { 102 | "colab": { 103 | "base_uri": "https://localhost:8080/" 104 | }, 105 | "id": "vNOsdYcOfZrM", 106 | "outputId": "57f8576e-16c0-4d2b-aea3-dfb1aa90c459" 107 | }, 108 | "source": [ 109 | "# lists can have multiple data types\n", 110 | "my_list = [2, 3, 1.8, 'English', True]\n", 111 | "print(my_list)" 112 | ], 113 | "execution_count": null, 114 | "outputs": [ 115 | { 116 | "output_type": "stream", 117 | "text": [ 118 | "[2, 3, 1.8, 'English', True]\n" 119 | ], 120 | "name": "stdout" 121 | } 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": { 127 | "id": "kR0CJMjefx3n" 128 | }, 129 | "source": [ 130 | "Lists are Mutable --> Changeable" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "metadata": { 136 | "colab": { 137 | "base_uri": "https://localhost:8080/" 138 | }, 139 | "id": "cDqAI-_tfvrD", 140 | "outputId": "2fb2c7bf-ee32-4324-da7e-2160337af74c" 141 | }, 142 | "source": [ 143 | "# add elements to a list\n", 144 | "my_list = [2, 3, 1.8, 'English', True]\n", 145 | "my_list.append(6)\n", 146 | "print(my_list)" 147 | ], 148 | "execution_count": null, 149 | "outputs": [ 150 | { 151 | "output_type": "stream", 152 | "text": [ 153 | "[2, 3, 1.8, 'English', True, 6]\n" 154 | ], 155 | "name": "stdout" 156 | } 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "metadata": { 162 | "colab": { 163 | "base_uri": "https://localhost:8080/" 164 | }, 165 | "id": "HGY-f8TxgEuB", 166 | "outputId": "9b8e0118-f59e-45d4-b5f8-e2640ccdeb31" 167 | }, 168 | "source": [ 169 | "# print elements of a list using their index\n", 170 | "print(my_list[0])\n", 171 | "print(my_list[2])" 172 | ], 173 | "execution_count": null, 174 | "outputs": [ 175 | { 176 | "output_type": "stream", 177 | "text": [ 178 | "2\n", 179 | "1.8\n" 180 | ], 181 | "name": "stdout" 182 | } 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "metadata": { 188 | "colab": { 189 | "base_uri": "https://localhost:8080/" 190 | }, 191 | "id": "SRJrejNegWyM", 192 | "outputId": "27e590f9-2120-4889-8847-3d16bc65b532" 193 | }, 194 | "source": [ 195 | "# lists allow duplicate values\n", 196 | "list_1 = [1,2,3,4,5,12,2,3]\n", 197 | "print(list_1)" 198 | ], 199 | "execution_count": null, 200 | "outputs": [ 201 | { 202 | "output_type": "stream", 203 | "text": [ 204 | "[1, 2, 3, 4, 5, 12, 2, 3]\n" 205 | ], 206 | "name": "stdout" 207 | } 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "metadata": { 213 | "colab": { 214 | "base_uri": "https://localhost:8080/" 215 | }, 216 | "id": "OhSx_qZkgk3R", 217 | "outputId": "361672c3-066f-403e-e1b3-e7fc9705f50a" 218 | }, 219 | "source": [ 220 | "print(len(list_1))" 221 | ], 222 | "execution_count": null, 223 | "outputs": [ 224 | { 225 | "output_type": "stream", 226 | "text": [ 227 | "8\n" 228 | ], 229 | "name": "stdout" 230 | } 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "metadata": { 236 | "colab": { 237 | "base_uri": "https://localhost:8080/" 238 | }, 239 | "id": "KoBSERrlgvn4", 240 | "outputId": "3ee49f45-da7b-4725-f266-e08db56c8f0f" 241 | }, 242 | "source": [ 243 | "# initiating an empty list\n", 244 | "list_2 = []\n", 245 | "print(list_2)" 246 | ], 247 | "execution_count": null, 248 | "outputs": [ 249 | { 250 | "output_type": "stream", 251 | "text": [ 252 | "[]\n" 253 | ], 254 | "name": "stdout" 255 | } 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "metadata": { 261 | "colab": { 262 | "base_uri": "https://localhost:8080/" 263 | }, 264 | "id": "-PJaFQltg4YM", 265 | "outputId": "5e84ec9a-b23f-4423-884c-6dfc5f6ba47a" 266 | }, 267 | "source": [ 268 | "list_2.append(5)\n", 269 | "print(list_2)" 270 | ], 271 | "execution_count": null, 272 | "outputs": [ 273 | { 274 | "output_type": "stream", 275 | "text": [ 276 | "[5]\n" 277 | ], 278 | "name": "stdout" 279 | } 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "metadata": { 285 | "colab": { 286 | "base_uri": "https://localhost:8080/" 287 | }, 288 | "id": "nU3FsBd6g-B2", 289 | "outputId": "5696a372-ca3b-4ee3-8b3c-6b775037588f" 290 | }, 291 | "source": [ 292 | "# delete an item in a list\n", 293 | "list_2 = [2, 3, 1.8, 'English', True, 6]\n", 294 | "print(list_2)\n", 295 | "\n", 296 | "del list_2[2]\n", 297 | "print(list_2)" 298 | ], 299 | "execution_count": null, 300 | "outputs": [ 301 | { 302 | "output_type": "stream", 303 | "text": [ 304 | "[2, 3, 1.8, 'English', True, 6]\n", 305 | "[2, 3, 'English', True, 6]\n" 306 | ], 307 | "name": "stdout" 308 | } 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "metadata": { 314 | "colab": { 315 | "base_uri": "https://localhost:8080/" 316 | }, 317 | "id": "XbQAGCA0hX3Q", 318 | "outputId": "68e91d96-56ad-4c7e-b7e2-653c247eceff" 319 | }, 320 | "source": [ 321 | "# join two lists\n", 322 | "list_3 = [1,2,3,4,5]\n", 323 | "list_4 = [6,7,8,9,10]\n", 324 | "\n", 325 | "list_5 = list_3 + list_4\n", 326 | "print(list_5)" 327 | ], 328 | "execution_count": null, 329 | "outputs": [ 330 | { 331 | "output_type": "stream", 332 | "text": [ 333 | "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n" 334 | ], 335 | "name": "stdout" 336 | } 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": { 342 | "id": "XoQULhPEhsSk" 343 | }, 344 | "source": [ 345 | "Tuple" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "metadata": { 351 | "colab": { 352 | "base_uri": "https://localhost:8080/" 353 | }, 354 | "id": "VSTos6r1hoAo", 355 | "outputId": "b5c78c19-51bb-4732-cdef-7aeb0ff0683b" 356 | }, 357 | "source": [ 358 | "tuple_1 = (2,3,4,5)\n", 359 | "print(tuple_1)\n", 360 | "type(tuple_1)" 361 | ], 362 | "execution_count": null, 363 | "outputs": [ 364 | { 365 | "output_type": "stream", 366 | "text": [ 367 | "(2, 3, 4, 5)\n" 368 | ], 369 | "name": "stdout" 370 | }, 371 | { 372 | "output_type": "execute_result", 373 | "data": { 374 | "text/plain": [ 375 | "tuple" 376 | ] 377 | }, 378 | "metadata": { 379 | "tags": [] 380 | }, 381 | "execution_count": 11 382 | } 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "metadata": { 388 | "colab": { 389 | "base_uri": "https://localhost:8080/" 390 | }, 391 | "id": "Tdrh8JsAh37C", 392 | "outputId": "a0a52f85-7058-4001-d799-b8a6fc298153" 393 | }, 394 | "source": [ 395 | "# tuple allows multiple data types\n", 396 | "tuple_2 = (1,2,3.5, 'Machine Learning', False)\n", 397 | "print(tuple_2)" 398 | ], 399 | "execution_count": null, 400 | "outputs": [ 401 | { 402 | "output_type": "stream", 403 | "text": [ 404 | "(1, 2, 3.5, 'Machine Learning', False)\n" 405 | ], 406 | "name": "stdout" 407 | } 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "metadata": { 413 | "colab": { 414 | "base_uri": "https://localhost:8080/" 415 | }, 416 | "id": "tHxHt22fiIAg", 417 | "outputId": "c09d925e-9289-4cd5-9fff-52b47d8e75ef" 418 | }, 419 | "source": [ 420 | "# converting a list to a tuple\n", 421 | "\n", 422 | "my_list = [3,4,5,6]\n", 423 | "print(my_list)\n", 424 | "\n", 425 | "my_tuple = tuple(my_list)\n", 426 | "print(my_tuple)" 427 | ], 428 | "execution_count": null, 429 | "outputs": [ 430 | { 431 | "output_type": "stream", 432 | "text": [ 433 | "[3, 4, 5, 6]\n", 434 | "(3, 4, 5, 6)\n" 435 | ], 436 | "name": "stdout" 437 | } 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "metadata": { 443 | "colab": { 444 | "base_uri": "https://localhost:8080/" 445 | }, 446 | "id": "9QRTalV1iay1", 447 | "outputId": "9f983fcd-54f8-42f4-93fb-b468e78b39df" 448 | }, 449 | "source": [ 450 | "print(my_tuple[0])\n", 451 | "print(my_tuple[1])" 452 | ], 453 | "execution_count": null, 454 | "outputs": [ 455 | { 456 | "output_type": "stream", 457 | "text": [ 458 | "3\n", 459 | "4\n" 460 | ], 461 | "name": "stdout" 462 | } 463 | ] 464 | }, 465 | { 466 | "cell_type": "markdown", 467 | "metadata": { 468 | "id": "nzLtSOHCirRc" 469 | }, 470 | "source": [ 471 | "Tuples are immutable --> Unchangeable" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "metadata": { 477 | "colab": { 478 | "base_uri": "https://localhost:8080/", 479 | "height": 171 480 | }, 481 | "id": "OXIstbesipDG", 482 | "outputId": "39957510-c58e-4568-b548-1921f85435b1" 483 | }, 484 | "source": [ 485 | "my_tuple.append(6)" 486 | ], 487 | "execution_count": null, 488 | "outputs": [ 489 | { 490 | "output_type": "error", 491 | "ename": "AttributeError", 492 | "evalue": "ignored", 493 | "traceback": [ 494 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 495 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", 496 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmy_tuple\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 497 | "\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'append'" 498 | ] 499 | } 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "metadata": { 505 | "colab": { 506 | "base_uri": "https://localhost:8080/" 507 | }, 508 | "id": "AlOVmz6Sizsu", 509 | "outputId": "8426c1ba-c637-413f-bf99-9a4f6423d99e" 510 | }, 511 | "source": [ 512 | "print(len(my_tuple))" 513 | ], 514 | "execution_count": null, 515 | "outputs": [ 516 | { 517 | "output_type": "stream", 518 | "text": [ 519 | "4\n" 520 | ], 521 | "name": "stdout" 522 | } 523 | ] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": { 528 | "id": "ORc0vJDHjAwS" 529 | }, 530 | "source": [ 531 | "Set" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "metadata": { 537 | "colab": { 538 | "base_uri": "https://localhost:8080/" 539 | }, 540 | "id": "RjaxKroji9mw", 541 | "outputId": "97cfe499-855c-46a8-dc3f-15d6d17ffd9b" 542 | }, 543 | "source": [ 544 | "# set --> Curly brackets\n", 545 | "my_set = {1,2,3,4,5}\n", 546 | "print(my_set)\n", 547 | "type(my_set)" 548 | ], 549 | "execution_count": null, 550 | "outputs": [ 551 | { 552 | "output_type": "stream", 553 | "text": [ 554 | "{1, 2, 3, 4, 5}\n" 555 | ], 556 | "name": "stdout" 557 | }, 558 | { 559 | "output_type": "execute_result", 560 | "data": { 561 | "text/plain": [ 562 | "set" 563 | ] 564 | }, 565 | "metadata": { 566 | "tags": [] 567 | }, 568 | "execution_count": 17 569 | } 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "metadata": { 575 | "colab": { 576 | "base_uri": "https://localhost:8080/", 577 | "height": 171 578 | }, 579 | "id": "8ngbZxf0jMXk", 580 | "outputId": "f928e779-a7ee-453b-fbe3-9af839c25356" 581 | }, 582 | "source": [ 583 | "print(my_set[0])" 584 | ], 585 | "execution_count": null, 586 | "outputs": [ 587 | { 588 | "output_type": "error", 589 | "ename": "TypeError", 590 | "evalue": "ignored", 591 | "traceback": [ 592 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 593 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 594 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmy_set\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 595 | "\u001b[0;31mTypeError\u001b[0m: 'set' object does not support indexing" 596 | ] 597 | } 598 | ] 599 | }, 600 | { 601 | "cell_type": "code", 602 | "metadata": { 603 | "colab": { 604 | "base_uri": "https://localhost:8080/" 605 | }, 606 | "id": "65IgJYA5jVzr", 607 | "outputId": "77a0b6cc-72d2-495e-f4cc-a1cf60175622" 608 | }, 609 | "source": [ 610 | "# convert a list to a set\n", 611 | "list_5 = [4,5,6,7,8]\n", 612 | "\n", 613 | "x = set(list_5)\n", 614 | "print(x)" 615 | ], 616 | "execution_count": null, 617 | "outputs": [ 618 | { 619 | "output_type": "stream", 620 | "text": [ 621 | "{4, 5, 6, 7, 8}\n" 622 | ], 623 | "name": "stdout" 624 | } 625 | ] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "metadata": { 630 | "colab": { 631 | "base_uri": "https://localhost:8080/" 632 | }, 633 | "id": "_0D3WDuZjrI_", 634 | "outputId": "a56d3ed5-0c3c-4993-f46b-a7314862e915" 635 | }, 636 | "source": [ 637 | "# set does not allow duplicate values\n", 638 | "set_3 = {1,2,3,4,5,1,2,3}\n", 639 | "print(set_3)" 640 | ], 641 | "execution_count": null, 642 | "outputs": [ 643 | { 644 | "output_type": "stream", 645 | "text": [ 646 | "{1, 2, 3, 4, 5}\n" 647 | ], 648 | "name": "stdout" 649 | } 650 | ] 651 | }, 652 | { 653 | "cell_type": "markdown", 654 | "metadata": { 655 | "id": "W6i0Eo53j7Sx" 656 | }, 657 | "source": [ 658 | "Dictionary" 659 | ] 660 | }, 661 | { 662 | "cell_type": "markdown", 663 | "metadata": { 664 | "id": "8zHFSdZdj-o9" 665 | }, 666 | "source": [ 667 | "Key-Value Pair" 668 | ] 669 | }, 670 | { 671 | "cell_type": "code", 672 | "metadata": { 673 | "colab": { 674 | "base_uri": "https://localhost:8080/" 675 | }, 676 | "id": "RZJjR0oYj3iK", 677 | "outputId": "928c40b2-ad74-4f8f-d941-aa9afff89b7d" 678 | }, 679 | "source": [ 680 | "my_dictionary = {'name':'David','age':30,'country':'India'}\n", 681 | "print(my_dictionary)\n", 682 | "type(my_dictionary)" 683 | ], 684 | "execution_count": null, 685 | "outputs": [ 686 | { 687 | "output_type": "stream", 688 | "text": [ 689 | "{'name': 'David', 'age': 30, 'country': 'India'}\n" 690 | ], 691 | "name": "stdout" 692 | }, 693 | { 694 | "output_type": "execute_result", 695 | "data": { 696 | "text/plain": [ 697 | "dict" 698 | ] 699 | }, 700 | "metadata": { 701 | "tags": [] 702 | }, 703 | "execution_count": 21 704 | } 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "metadata": { 710 | "colab": { 711 | "base_uri": "https://localhost:8080/" 712 | }, 713 | "id": "gOCf8w7OkVjE", 714 | "outputId": "f781a022-30aa-4022-a996-fee5b54df4b0" 715 | }, 716 | "source": [ 717 | "print(my_dictionary['name'])\n", 718 | "print(my_dictionary['age'])\n", 719 | "print(my_dictionary['country'])" 720 | ], 721 | "execution_count": null, 722 | "outputs": [ 723 | { 724 | "output_type": "stream", 725 | "text": [ 726 | "David\n", 727 | "30\n", 728 | "India\n" 729 | ], 730 | "name": "stdout" 731 | } 732 | ] 733 | }, 734 | { 735 | "cell_type": "code", 736 | "metadata": { 737 | "colab": { 738 | "base_uri": "https://localhost:8080/" 739 | }, 740 | "id": "8AOHTq6bkoq2", 741 | "outputId": "8954d823-76bf-4bdc-b7bf-50e668bc19e5" 742 | }, 743 | "source": [ 744 | "# dictionary does not allow duplicate values\n", 745 | "dictionary_2 = {'name':'David','age':30,'country':'India','name':'David','age':30,'country':'India'}\n", 746 | "print(dictionary_2)" 747 | ], 748 | "execution_count": null, 749 | "outputs": [ 750 | { 751 | "output_type": "stream", 752 | "text": [ 753 | "{'name': 'David', 'age': 30, 'country': 'India'}\n" 754 | ], 755 | "name": "stdout" 756 | } 757 | ] 758 | }, 759 | { 760 | "cell_type": "code", 761 | "metadata": { 762 | "id": "zwQtG_KYk6q9" 763 | }, 764 | "source": [], 765 | "execution_count": null, 766 | "outputs": [] 767 | } 768 | ] 769 | } -------------------------------------------------------------------------------- /2.5. Operators_in_Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "collapsed_sections": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | } 13 | }, 14 | "cells": [ 15 | { 16 | "cell_type": "markdown", 17 | "metadata": { 18 | "id": "z8iVDs11-ogJ" 19 | }, 20 | "source": [ 21 | "Operators in Python:\n", 22 | "1. Arithmetic Operators\n", 23 | "2. Assignment Operators\n", 24 | "3. Comparison Operators\n", 25 | "4. Logical Operators\n", 26 | "5. Identity Operators\n", 27 | "6. Membership Operators" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": { 33 | "id": "5NTu_QYw_212" 34 | }, 35 | "source": [ 36 | "1. Arithmetic Operators" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "metadata": { 42 | "colab": { 43 | "base_uri": "https://localhost:8080/" 44 | }, 45 | "id": "CXYtGkkZ59n1", 46 | "outputId": "ed373db5-6740-40fa-c477-d0f2f67fddc3" 47 | }, 48 | "source": [ 49 | "num_1 = 20\n", 50 | "num_2 = 10\n", 51 | "\n", 52 | "# addition\n", 53 | "sum = num_1 + num_2\n", 54 | "print('sum = ',sum)\n", 55 | "\n", 56 | "# subtraction\n", 57 | "diff = num_1 - num_2\n", 58 | "print('difference = ',diff)\n", 59 | "\n", 60 | "# multiplication\n", 61 | "pro = num_1 * num_2\n", 62 | "print('product = ',pro)\n", 63 | "\n", 64 | "# division\n", 65 | "quo = num_1 / num_2\n", 66 | "print('quotient = ',quo)\n", 67 | "\n", 68 | "# exponent\n", 69 | "exp = num_1**num_2 # 20^10\n", 70 | "print('exponent = ',exp)\n", 71 | "\n", 72 | "# modulus\n", 73 | "mod = num_1 % num_2\n", 74 | "print('reminder = ',mod)" 75 | ], 76 | "execution_count": null, 77 | "outputs": [ 78 | { 79 | "output_type": "stream", 80 | "text": [ 81 | "sum = 30\n", 82 | "difference = 10\n", 83 | "product = 200\n", 84 | "quotient = 2.0\n", 85 | "exponent = 10240000000000\n", 86 | "reminder = 0\n" 87 | ], 88 | "name": "stdout" 89 | } 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": { 95 | "id": "E12rvExzBIh7" 96 | }, 97 | "source": [ 98 | "2. Assignment Operators" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": { 104 | "id": "K-LS4tHQBiPZ" 105 | }, 106 | "source": [ 107 | "+=\n", 108 | "\n", 109 | "-=\n", 110 | "\n", 111 | "*=\n", 112 | "\n", 113 | "**=\n", 114 | "\n", 115 | "/=\n", 116 | "\n", 117 | "%=" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "metadata": { 123 | "colab": { 124 | "base_uri": "https://localhost:8080/" 125 | }, 126 | "id": "YwWiUzaNBDaD", 127 | "outputId": "a187ca00-043d-466f-a074-f182ccc4496e" 128 | }, 129 | "source": [ 130 | "a = 5\n", 131 | "print(a)" 132 | ], 133 | "execution_count": null, 134 | "outputs": [ 135 | { 136 | "output_type": "stream", 137 | "text": [ 138 | "5\n" 139 | ], 140 | "name": "stdout" 141 | } 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "metadata": { 147 | "colab": { 148 | "base_uri": "https://localhost:8080/" 149 | }, 150 | "id": "LxJoSCRVBPrv", 151 | "outputId": "cdcc508a-8abf-476b-f952-a708de062360" 152 | }, 153 | "source": [ 154 | "a = 5\n", 155 | "a += 5 # a = a + 5\n", 156 | "print(a)" 157 | ], 158 | "execution_count": null, 159 | "outputs": [ 160 | { 161 | "output_type": "stream", 162 | "text": [ 163 | "10\n" 164 | ], 165 | "name": "stdout" 166 | } 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "metadata": { 172 | "colab": { 173 | "base_uri": "https://localhost:8080/" 174 | }, 175 | "id": "F5hy5OvoBY0t", 176 | "outputId": "c1b988e4-bee0-4faa-c76f-491cffc4ee0e" 177 | }, 178 | "source": [ 179 | "b = 5\n", 180 | "b -= 2 # b = b-2\n", 181 | "print(b)" 182 | ], 183 | "execution_count": null, 184 | "outputs": [ 185 | { 186 | "output_type": "stream", 187 | "text": [ 188 | "3\n" 189 | ], 190 | "name": "stdout" 191 | } 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": { 197 | "id": "0B6StO-HBv1E" 198 | }, 199 | "source": [ 200 | "3. Comparison Operators" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "metadata": { 206 | "colab": { 207 | "base_uri": "https://localhost:8080/" 208 | }, 209 | "id": "9L2aTaKUBftz", 210 | "outputId": "b70b186f-7cc5-4887-aa81-418e1e1cac59" 211 | }, 212 | "source": [ 213 | "a = 5\n", 214 | "b = 10\n", 215 | "\n", 216 | "print(a == b) # equal to\n", 217 | "print(a != b) # not equal to\n", 218 | "print(a > b) # greater than\n", 219 | "print(a < b)\n", 220 | "print(a <= b)\n", 221 | "print(a >= b) " 222 | ], 223 | "execution_count": null, 224 | "outputs": [ 225 | { 226 | "output_type": "stream", 227 | "text": [ 228 | "False\n", 229 | "True\n", 230 | "False\n", 231 | "True\n", 232 | "True\n", 233 | "False\n" 234 | ], 235 | "name": "stdout" 236 | } 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": { 242 | "id": "5vG_Gp-DCc-u" 243 | }, 244 | "source": [ 245 | "4. Logical Operators" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": { 251 | "id": "0k1QjwN7Cfx5" 252 | }, 253 | "source": [ 254 | "and\n", 255 | "\n", 256 | "or\n", 257 | "\n", 258 | "not" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "metadata": { 264 | "colab": { 265 | "base_uri": "https://localhost:8080/" 266 | }, 267 | "id": "XKVEPY3UCVqy", 268 | "outputId": "48152426-7541-4483-a272-85090c9c20f3" 269 | }, 270 | "source": [ 271 | "a = 10\n", 272 | "\n", 273 | "print( a>20 and a>5)\n", 274 | "print( a>20 or a>5)\n", 275 | "print( not( a>8 and a>5))" 276 | ], 277 | "execution_count": null, 278 | "outputs": [ 279 | { 280 | "output_type": "stream", 281 | "text": [ 282 | "False\n", 283 | "True\n", 284 | "False\n" 285 | ], 286 | "name": "stdout" 287 | } 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": { 293 | "id": "xzOpIj7eDKrz" 294 | }, 295 | "source": [ 296 | "5. Identity Operator:\n", 297 | "\n", 298 | "is \n", 299 | "\n", 300 | "is not" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "metadata": { 306 | "colab": { 307 | "base_uri": "https://localhost:8080/" 308 | }, 309 | "id": "005EmRZMDERy", 310 | "outputId": "7c75b240-62d6-4aaa-be75-51da8afc2795" 311 | }, 312 | "source": [ 313 | "x = 5\n", 314 | "y = 5\n", 315 | "\n", 316 | "print(x is y)" 317 | ], 318 | "execution_count": null, 319 | "outputs": [ 320 | { 321 | "output_type": "stream", 322 | "text": [ 323 | "True\n" 324 | ], 325 | "name": "stdout" 326 | } 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "metadata": { 332 | "colab": { 333 | "base_uri": "https://localhost:8080/" 334 | }, 335 | "id": "ZQy2V1ViDT5x", 336 | "outputId": "3782b1dc-8dda-4598-9d45-b3f9e859a6f2" 337 | }, 338 | "source": [ 339 | "x = 5\n", 340 | "y = 10\n", 341 | "\n", 342 | "print(x is y)" 343 | ], 344 | "execution_count": null, 345 | "outputs": [ 346 | { 347 | "output_type": "stream", 348 | "text": [ 349 | "False\n" 350 | ], 351 | "name": "stdout" 352 | } 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "metadata": { 358 | "colab": { 359 | "base_uri": "https://localhost:8080/" 360 | }, 361 | "id": "bSZFIncfDYVc", 362 | "outputId": "c301afff-7c5d-45e6-eac8-ed31719e8804" 363 | }, 364 | "source": [ 365 | "x = 5\n", 366 | "y = 5\n", 367 | "\n", 368 | "print(x is not y)" 369 | ], 370 | "execution_count": null, 371 | "outputs": [ 372 | { 373 | "output_type": "stream", 374 | "text": [ 375 | "False\n" 376 | ], 377 | "name": "stdout" 378 | } 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "metadata": { 384 | "colab": { 385 | "base_uri": "https://localhost:8080/" 386 | }, 387 | "id": "uVAX_AejDcsX", 388 | "outputId": "abbad84c-bc89-44e0-fff6-b21b7539743a" 389 | }, 390 | "source": [ 391 | "x = 5\n", 392 | "y = 10\n", 393 | "\n", 394 | "print(x is not y)" 395 | ], 396 | "execution_count": null, 397 | "outputs": [ 398 | { 399 | "output_type": "stream", 400 | "text": [ 401 | "True\n" 402 | ], 403 | "name": "stdout" 404 | } 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": { 410 | "id": "mvIH9bSEDlSw" 411 | }, 412 | "source": [ 413 | "6. Membership Operator:\n", 414 | "\n", 415 | "in \n", 416 | "\n", 417 | "not in" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "metadata": { 423 | "colab": { 424 | "base_uri": "https://localhost:8080/" 425 | }, 426 | "id": "IkUTVRTmDfic", 427 | "outputId": "afc0d8cd-4e9e-4516-e23e-569752a176cb" 428 | }, 429 | "source": [ 430 | "a = 5\n", 431 | "b =10\n", 432 | "\n", 433 | "c = [1,2,3,4,5]\n", 434 | "print( a in c)\n", 435 | "print( b in c)" 436 | ], 437 | "execution_count": null, 438 | "outputs": [ 439 | { 440 | "output_type": "stream", 441 | "text": [ 442 | "True\n", 443 | "False\n" 444 | ], 445 | "name": "stdout" 446 | } 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "metadata": { 452 | "colab": { 453 | "base_uri": "https://localhost:8080/" 454 | }, 455 | "id": "LOYpdrbnD4Fb", 456 | "outputId": "782c2ff9-cc82-4797-c9af-1020a7bf037d" 457 | }, 458 | "source": [ 459 | "a = 5\n", 460 | "b =10\n", 461 | "\n", 462 | "c = [1,2,3,4,5]\n", 463 | "print( a not in c)\n", 464 | "print( b not in c)" 465 | ], 466 | "execution_count": null, 467 | "outputs": [ 468 | { 469 | "output_type": "stream", 470 | "text": [ 471 | "False\n", 472 | "True\n" 473 | ], 474 | "name": "stdout" 475 | } 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "metadata": { 481 | "id": "qkbtedVdEATX" 482 | }, 483 | "source": [], 484 | "execution_count": null, 485 | "outputs": [] 486 | } 487 | ] 488 | } -------------------------------------------------------------------------------- /2.6. if_else_statement_in_Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "collapsed_sections": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | } 13 | }, 14 | "cells": [ 15 | { 16 | "cell_type": "markdown", 17 | "metadata": { 18 | "id": "9yPP5tJoRu3j" 19 | }, 20 | "source": [ 21 | "simple if else statement" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "metadata": { 27 | "colab": { 28 | "base_uri": "https://localhost:8080/" 29 | }, 30 | "id": "aGRxtmdPQgBb", 31 | "outputId": "f90436e0-dd24-40a4-f3ba-644ee4a6ca49" 32 | }, 33 | "source": [ 34 | "a = 30\n", 35 | "b = 50\n", 36 | "\n", 37 | "if (a>b):\n", 38 | " print('a is the greatest number')\n", 39 | "else:\n", 40 | " print('b is the greatest number')" 41 | ], 42 | "execution_count": null, 43 | "outputs": [ 44 | { 45 | "output_type": "stream", 46 | "text": [ 47 | "b is the greatest number\n" 48 | ], 49 | "name": "stdout" 50 | } 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "metadata": { 56 | "colab": { 57 | "base_uri": "https://localhost:8080/" 58 | }, 59 | "id": "nUFrXZovSGIZ", 60 | "outputId": "6ecee5ab-551c-4cf4-cc2d-0d9c541ebd7f" 61 | }, 62 | "source": [ 63 | "a = int(input('Enter the first number : '))\n", 64 | "b = int(input('Enter the second number : '))\n", 65 | "\n", 66 | "if (a>b):\n", 67 | " print('First number is the greatest')\n", 68 | "else:\n", 69 | " print('Second number is the greatest')" 70 | ], 71 | "execution_count": null, 72 | "outputs": [ 73 | { 74 | "output_type": "stream", 75 | "text": [ 76 | "Enter the first number : 16\n", 77 | "Enter the second number : 20\n", 78 | "Second number is the greatest\n" 79 | ], 80 | "name": "stdout" 81 | } 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": { 87 | "id": "GJixx9xSSoo-" 88 | }, 89 | "source": [ 90 | "if elif else statement" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "metadata": { 96 | "colab": { 97 | "base_uri": "https://localhost:8080/" 98 | }, 99 | "id": "y_LUudPwSX6I", 100 | "outputId": "d4e25b8f-cba9-405a-b34e-d003b14e277a" 101 | }, 102 | "source": [ 103 | "a = 15\n", 104 | "b = 25\n", 105 | "c = 30\n", 106 | "\n", 107 | "if (bc):\n", 108 | " print('a is the greatest number')\n", 109 | "elif (ac):\n", 110 | " print('b is the greatest number')\n", 111 | "else:\n", 112 | " print('c is the greatest number')\n" 113 | ], 114 | "execution_count": null, 115 | "outputs": [ 116 | { 117 | "output_type": "stream", 118 | "text": [ 119 | "c is the greatest number\n" 120 | ], 121 | "name": "stdout" 122 | } 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": { 128 | "id": "cr5QqpJBTe2N" 129 | }, 130 | "source": [ 131 | "nested if statement" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "metadata": { 137 | "colab": { 138 | "base_uri": "https://localhost:8080/" 139 | }, 140 | "id": "YXooMdY1Tb8Z", 141 | "outputId": "ea061bc4-ec14-4383-d342-abbf1d1419aa" 142 | }, 143 | "source": [ 144 | "a = 20\n", 145 | "b = 40\n", 146 | "c = 60\n", 147 | "\n", 148 | "if (a>b):\n", 149 | " if (a>c):\n", 150 | " print('a is the greatest number')\n", 151 | " else:\n", 152 | " print('c is the greatest number')\n", 153 | "else:\n", 154 | " if (b>c):\n", 155 | " print('b is the greatest number')\n", 156 | " else:\n", 157 | " print('c is the greatest number') " 158 | ], 159 | "execution_count": null, 160 | "outputs": [ 161 | { 162 | "output_type": "stream", 163 | "text": [ 164 | "c is the greatest number\n" 165 | ], 166 | "name": "stdout" 167 | } 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "metadata": { 173 | "id": "d1jIWFJQUaa_" 174 | }, 175 | "source": [], 176 | "execution_count": null, 177 | "outputs": [] 178 | } 179 | ] 180 | } -------------------------------------------------------------------------------- /2.7. Loops_in_Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | } 12 | }, 13 | "cells": [ 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "id": "l169EevJ4PYv" 18 | }, 19 | "source": [ 20 | "For loop" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "metadata": { 26 | "colab": { 27 | "base_uri": "https://localhost:8080/" 28 | }, 29 | "id": "uy9loXNN3yon", 30 | "outputId": "2754a351-7c82-42db-f525-70b101937bfd" 31 | }, 32 | "source": [ 33 | "laptop1 = int(input('Enter the price of the laptop : '))\n", 34 | "laptop2 = int(input('Enter the price of the laptop : '))\n", 35 | "laptop3 = int(input('Enter the price of the laptop : '))\n", 36 | "laptop4 = int(input('Enter the price of the laptop : '))\n", 37 | "laptop5 = int(input('Enter the price of the laptop : '))" 38 | ], 39 | "execution_count": null, 40 | "outputs": [ 41 | { 42 | "output_type": "stream", 43 | "text": [ 44 | "Enter the price of the laptop : 20000\n", 45 | "Enter the price of the laptop : 30000\n", 46 | "Enter the price of the laptop : 40000\n", 47 | "Enter the price of the laptop : 50000\n", 48 | "Enter the price of the laptop : 60000\n" 49 | ], 50 | "name": "stdout" 51 | } 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "metadata": { 57 | "colab": { 58 | "base_uri": "https://localhost:8080/" 59 | }, 60 | "id": "W0YAxl-e4XfP", 61 | "outputId": "56f97315-444b-4618-b15c-5f177c795bd7" 62 | }, 63 | "source": [ 64 | "for i in range(7): # 0,1,2,3,4,5,6\n", 65 | " laptop_price = int(input('Enter the price of the laptop : '))" 66 | ], 67 | "execution_count": null, 68 | "outputs": [ 69 | { 70 | "output_type": "stream", 71 | "text": [ 72 | "Enter the price of the laptop : 6\n", 73 | "Enter the price of the laptop : 7\n", 74 | "Enter the price of the laptop : 8\n", 75 | "Enter the price of the laptop : 9\n", 76 | "Enter the price of the laptop : 2\n", 77 | "Enter the price of the laptop : 3\n", 78 | "Enter the price of the laptop : 4\n" 79 | ], 80 | "name": "stdout" 81 | } 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "metadata": { 87 | "colab": { 88 | "base_uri": "https://localhost:8080/" 89 | }, 90 | "id": "-9bvL0U_5QnH", 91 | "outputId": "89a12700-fe97-4870-8ebe-5ad59de4c9ed" 92 | }, 93 | "source": [ 94 | "numbers = [50, 100, 150, 200]\n", 95 | "\n", 96 | "print(numbers[0])\n", 97 | "print(numbers[1])\n", 98 | "print(numbers[2])\n", 99 | "print(numbers[3])" 100 | ], 101 | "execution_count": null, 102 | "outputs": [ 103 | { 104 | "output_type": "stream", 105 | "text": [ 106 | "50\n", 107 | "100\n", 108 | "150\n", 109 | "200\n" 110 | ], 111 | "name": "stdout" 112 | } 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "metadata": { 118 | "colab": { 119 | "base_uri": "https://localhost:8080/" 120 | }, 121 | "id": "I2OmCKt_6J3g", 122 | "outputId": "9d248bca-1edd-4605-a195-4b5c7c9fee63" 123 | }, 124 | "source": [ 125 | "numbers = [50, 100, 150, 200]\n", 126 | "\n", 127 | "for i in numbers:\n", 128 | " print(i)" 129 | ], 130 | "execution_count": null, 131 | "outputs": [ 132 | { 133 | "output_type": "stream", 134 | "text": [ 135 | "50\n", 136 | "100\n", 137 | "150\n", 138 | "200\n" 139 | ], 140 | "name": "stdout" 141 | } 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": { 147 | "id": "LZw0F_Ga6aeC" 148 | }, 149 | "source": [ 150 | "While Loop" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": { 156 | "id": "mjmPzJiV6src" 157 | }, 158 | "source": [ 159 | "while condition:\n", 160 | "\n", 161 | " statement" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "metadata": { 167 | "colab": { 168 | "base_uri": "https://localhost:8080/" 169 | }, 170 | "id": "7UCze9SW6RO6", 171 | "outputId": "f8ae520b-14bd-47aa-fd6d-7870824a84c5" 172 | }, 173 | "source": [ 174 | "i = 0\n", 175 | "\n", 176 | "while i<10:\n", 177 | " print(i)\n", 178 | " i += 1 " 179 | ], 180 | "execution_count": null, 181 | "outputs": [ 182 | { 183 | "output_type": "stream", 184 | "text": [ 185 | "0\n", 186 | "1\n", 187 | "2\n", 188 | "3\n", 189 | "4\n", 190 | "5\n", 191 | "6\n", 192 | "7\n", 193 | "8\n", 194 | "9\n" 195 | ], 196 | "name": "stdout" 197 | } 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "metadata": { 203 | "id": "5aYvGqVj7Lxw" 204 | }, 205 | "source": [ 206 | "i = 5\n", 207 | "\n", 208 | "while i<3:\n", 209 | " print(i)\n", 210 | " i +=1" 211 | ], 212 | "execution_count": null, 213 | "outputs": [] 214 | } 215 | ] 216 | } -------------------------------------------------------------------------------- /2.8. Functions_in_Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | } 12 | }, 13 | "cells": [ 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "id": "P6doAW5l8ba2" 18 | }, 19 | "source": [ 20 | "Functions: \n", 21 | "\n", 22 | "Function is a block of code that can be reused in a Program" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "id": "3YuBEtBR86X8" 29 | }, 30 | "source": [ 31 | "Factorial of a Number" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": { 37 | "id": "Z8ySi-x58_8w" 38 | }, 39 | "source": [ 40 | "Factorial of a number is the product of all the positive integers less than or equal to the given number" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": { 46 | "id": "3tUJEQyC9LLh" 47 | }, 48 | "source": [ 49 | "Factorial of 5 = 5 x 4 x 3 x 2 x 1 = 120" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "metadata": { 55 | "colab": { 56 | "base_uri": "https://localhost:8080/" 57 | }, 58 | "id": "Z3F1xOof8LbY", 59 | "outputId": "76a7da9c-11c8-4732-f49d-25a164242fe9" 60 | }, 61 | "source": [ 62 | "number = int(input(' Enter a number to find its factorial : '))\n", 63 | "\n", 64 | "factorial = 1\n", 65 | "\n", 66 | "if number == 0:\n", 67 | " print(' The Factorial of 0 is 1')\n", 68 | "\n", 69 | "else:\n", 70 | " for i in range(1, number+1): \n", 71 | " factorial = factorial*i\n", 72 | " print('The factorial of ',number,' is',factorial) " 73 | ], 74 | "execution_count": null, 75 | "outputs": [ 76 | { 77 | "output_type": "stream", 78 | "text": [ 79 | " Enter a number to find its factorial : 10\n", 80 | "The factorial of 10 is 3628800\n" 81 | ], 82 | "name": "stdout" 83 | } 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": { 89 | "id": "3Rv9GbOh-7XI" 90 | }, 91 | "source": [ 92 | "Factoraial Function" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "metadata": { 98 | "id": "pZH6ilR8-uQh" 99 | }, 100 | "source": [ 101 | "def factorial_value(num):\n", 102 | "\n", 103 | " factorial = 1\n", 104 | "\n", 105 | " if num == 0:\n", 106 | " return factorial\n", 107 | "\n", 108 | " else:\n", 109 | " for i in range(1, num+1):\n", 110 | " factorial = factorial*i\n", 111 | " return factorial" 112 | ], 113 | "execution_count": null, 114 | "outputs": [] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "metadata": { 119 | "colab": { 120 | "base_uri": "https://localhost:8080/" 121 | }, 122 | "id": "Nwf8f6su_ige", 123 | "outputId": "983dcfbf-e5fd-4770-918a-795fccc59eb2" 124 | }, 125 | "source": [ 126 | "print(factorial_value(5))" 127 | ], 128 | "execution_count": null, 129 | "outputs": [ 130 | { 131 | "output_type": "stream", 132 | "text": [ 133 | "120\n" 134 | ], 135 | "name": "stdout" 136 | } 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "metadata": { 142 | "colab": { 143 | "base_uri": "https://localhost:8080/" 144 | }, 145 | "id": "YXEVZyna_oKL", 146 | "outputId": "e9513dbd-4314-4d9c-d719-4eccb05867cd" 147 | }, 148 | "source": [ 149 | "print(factorial_value(10))" 150 | ], 151 | "execution_count": null, 152 | "outputs": [ 153 | { 154 | "output_type": "stream", 155 | "text": [ 156 | "3628800\n" 157 | ], 158 | "name": "stdout" 159 | } 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "metadata": { 165 | "colab": { 166 | "base_uri": "https://localhost:8080/" 167 | }, 168 | "id": "Gk0kwebb_y3U", 169 | "outputId": "f7532445-7844-4ffb-cf00-ee1086416724" 170 | }, 171 | "source": [ 172 | "print(factorial_value(6))" 173 | ], 174 | "execution_count": null, 175 | "outputs": [ 176 | { 177 | "output_type": "stream", 178 | "text": [ 179 | "720\n" 180 | ], 181 | "name": "stdout" 182 | } 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "metadata": { 188 | "id": "n9R58coF_8Nr" 189 | }, 190 | "source": [], 191 | "execution_count": null, 192 | "outputs": [] 193 | } 194 | ] 195 | } -------------------------------------------------------------------------------- /4.2. Importing_Datasets_through_Kaggle_API.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "VY1GEpb4HiNh" 21 | }, 22 | "source": [ 23 | "API - Application Programming Interface" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "metadata": { 29 | "colab": { 30 | "base_uri": "https://localhost:8080/" 31 | }, 32 | "id": "B63zSIF-HhJr", 33 | "outputId": "fc28310c-56da-485e-e95e-67eeaa1ebc23" 34 | }, 35 | "source": [ 36 | "# installing the Kaggle library\n", 37 | "!pip install kaggle" 38 | ], 39 | "execution_count": null, 40 | "outputs": [ 41 | { 42 | "output_type": "stream", 43 | "text": [ 44 | "Requirement already satisfied: kaggle in /usr/local/lib/python3.7/dist-packages (1.5.10)\n", 45 | "Requirement already satisfied: certifi in /usr/local/lib/python3.7/dist-packages (from kaggle) (2020.12.5)\n", 46 | "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.23.0)\n", 47 | "Requirement already satisfied: urllib3 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.24.3)\n", 48 | "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.15.0)\n", 49 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from kaggle) (4.41.1)\n", 50 | "Requirement already satisfied: python-slugify in /usr/local/lib/python3.7/dist-packages (from kaggle) (4.0.1)\n", 51 | "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.8.1)\n", 52 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (2.10)\n", 53 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (3.0.4)\n", 54 | "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.7/dist-packages (from python-slugify->kaggle) (1.3)\n" 55 | ], 56 | "name": "stdout" 57 | } 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": { 63 | "id": "DvywBbPPMR7p" 64 | }, 65 | "source": [ 66 | "Upload your Kaggle.json file" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "metadata": { 72 | "id": "zzVfbVRJMOs5" 73 | }, 74 | "source": [ 75 | "# configuring the path of Kaggle.json file\n", 76 | "!mkdir -p ~/.kaggle\n", 77 | "!cp kaggle.json ~/.kaggle/\n", 78 | "!chmod 600 ~/.kaggle/kaggle.json" 79 | ], 80 | "execution_count": null, 81 | "outputs": [] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": { 86 | "id": "aYMRBfj6NV0F" 87 | }, 88 | "source": [ 89 | "Importing the Earthquake Dataset" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "metadata": { 95 | "colab": { 96 | "base_uri": "https://localhost:8080/" 97 | }, 98 | "id": "MSWENgDtNUHG", 99 | "outputId": "13d67e37-db51-4070-ba35-3ca0fba750a5" 100 | }, 101 | "source": [ 102 | "# API to fetch the dataset from Kaggle\n", 103 | "!kaggle competitions download -c LANL-Earthquake-Prediction" 104 | ], 105 | "execution_count": null, 106 | "outputs": [ 107 | { 108 | "output_type": "stream", 109 | "text": [ 110 | "Warning: Looks like you're using an outdated API Version, please consider updating (server 1.5.12 / client 1.5.4)\n", 111 | "Downloading seg_010eab.csv to /content\n", 112 | " 0% 0.00/316k [00:00\n", 84 | "\n", 97 | "\n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
\n", 175 | "" 176 | ], 177 | "text/plain": [ 178 | " Pregnancies Glucose BloodPressure ... DiabetesPedigreeFunction Age Outcome\n", 179 | "0 6 148 72 ... 0.627 50 1\n", 180 | "1 1 85 66 ... 0.351 31 0\n", 181 | "2 8 183 64 ... 0.672 32 1\n", 182 | "3 1 89 66 ... 0.167 21 0\n", 183 | "4 0 137 40 ... 2.288 33 1\n", 184 | "\n", 185 | "[5 rows x 9 columns]" 186 | ] 187 | }, 188 | "metadata": { 189 | "tags": [] 190 | }, 191 | "execution_count": 3 192 | } 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "metadata": { 198 | "colab": { 199 | "base_uri": "https://localhost:8080/" 200 | }, 201 | "id": "lynParo6pEMB", 202 | "outputId": "265e8eec-578c-4ab8-f0af-8d7889e183d5" 203 | }, 204 | "source": [ 205 | "# number of rows and Columns in this dataset\n", 206 | "diabetes_dataset.shape" 207 | ], 208 | "execution_count": null, 209 | "outputs": [ 210 | { 211 | "output_type": "execute_result", 212 | "data": { 213 | "text/plain": [ 214 | "(768, 9)" 215 | ] 216 | }, 217 | "metadata": { 218 | "tags": [] 219 | }, 220 | "execution_count": 4 221 | } 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "metadata": { 227 | "colab": { 228 | "base_uri": "https://localhost:8080/", 229 | "height": 288 230 | }, 231 | "id": "3NDJOlrEpmoL", 232 | "outputId": "fbe0a905-1de9-44d7-9901-840578263c0f" 233 | }, 234 | "source": [ 235 | "# getting the statistical measures of the data\n", 236 | "diabetes_dataset.describe()" 237 | ], 238 | "execution_count": null, 239 | "outputs": [ 240 | { 241 | "output_type": "execute_result", 242 | "data": { 243 | "text/html": [ 244 | "
\n", 245 | "\n", 258 | "\n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
count768.000000768.000000768.000000768.000000768.000000768.000000768.000000768.000000768.000000
mean3.845052120.89453169.10546920.53645879.79947931.9925780.47187633.2408850.348958
std3.36957831.97261819.35580715.952218115.2440027.8841600.33132911.7602320.476951
min0.0000000.0000000.0000000.0000000.0000000.0000000.07800021.0000000.000000
25%1.00000099.00000062.0000000.0000000.00000027.3000000.24375024.0000000.000000
50%3.000000117.00000072.00000023.00000030.50000032.0000000.37250029.0000000.000000
75%6.000000140.25000080.00000032.000000127.25000036.6000000.62625041.0000001.000000
max17.000000199.000000122.00000099.000000846.00000067.1000002.42000081.0000001.000000
\n", 372 | "
" 373 | ], 374 | "text/plain": [ 375 | " Pregnancies Glucose ... Age Outcome\n", 376 | "count 768.000000 768.000000 ... 768.000000 768.000000\n", 377 | "mean 3.845052 120.894531 ... 33.240885 0.348958\n", 378 | "std 3.369578 31.972618 ... 11.760232 0.476951\n", 379 | "min 0.000000 0.000000 ... 21.000000 0.000000\n", 380 | "25% 1.000000 99.000000 ... 24.000000 0.000000\n", 381 | "50% 3.000000 117.000000 ... 29.000000 0.000000\n", 382 | "75% 6.000000 140.250000 ... 41.000000 1.000000\n", 383 | "max 17.000000 199.000000 ... 81.000000 1.000000\n", 384 | "\n", 385 | "[8 rows x 9 columns]" 386 | ] 387 | }, 388 | "metadata": { 389 | "tags": [] 390 | }, 391 | "execution_count": 5 392 | } 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "metadata": { 398 | "colab": { 399 | "base_uri": "https://localhost:8080/" 400 | }, 401 | "id": "LrpHzaGpp5dQ", 402 | "outputId": "cdf3f133-5fdc-4a59-e202-267e358bc831" 403 | }, 404 | "source": [ 405 | "diabetes_dataset['Outcome'].value_counts()" 406 | ], 407 | "execution_count": null, 408 | "outputs": [ 409 | { 410 | "output_type": "execute_result", 411 | "data": { 412 | "text/plain": [ 413 | "0 500\n", 414 | "1 268\n", 415 | "Name: Outcome, dtype: int64" 416 | ] 417 | }, 418 | "metadata": { 419 | "tags": [] 420 | }, 421 | "execution_count": 6 422 | } 423 | ] 424 | }, 425 | { 426 | "cell_type": "markdown", 427 | "metadata": { 428 | "id": "cB1qRaNcqeh5" 429 | }, 430 | "source": [ 431 | "0 --> Non-Diabetic\n", 432 | "\n", 433 | "1 --> Diabetic" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "metadata": { 439 | "colab": { 440 | "base_uri": "https://localhost:8080/", 441 | "height": 138 442 | }, 443 | "id": "I6MWR0k_qSCK", 444 | "outputId": "710f530f-638b-4b27-a818-dc1d063435f8" 445 | }, 446 | "source": [ 447 | "diabetes_dataset.groupby('Outcome').mean()" 448 | ], 449 | "execution_count": null, 450 | "outputs": [ 451 | { 452 | "output_type": "execute_result", 453 | "data": { 454 | "text/html": [ 455 | "
\n", 456 | "\n", 469 | "\n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAge
Outcome
03.298000109.98000068.18400019.66400068.79200030.3042000.42973431.190000
14.865672141.25746370.82462722.164179100.33582135.1425370.55050037.067164
\n", 519 | "
" 520 | ], 521 | "text/plain": [ 522 | " Pregnancies Glucose ... DiabetesPedigreeFunction Age\n", 523 | "Outcome ... \n", 524 | "0 3.298000 109.980000 ... 0.429734 31.190000\n", 525 | "1 4.865672 141.257463 ... 0.550500 37.067164\n", 526 | "\n", 527 | "[2 rows x 8 columns]" 528 | ] 529 | }, 530 | "metadata": { 531 | "tags": [] 532 | }, 533 | "execution_count": 7 534 | } 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "metadata": { 540 | "id": "RoDW7l9mqqHZ" 541 | }, 542 | "source": [ 543 | "# separating the data and labels\n", 544 | "X = diabetes_dataset.drop(columns = 'Outcome', axis=1)\n", 545 | "Y = diabetes_dataset['Outcome']" 546 | ], 547 | "execution_count": null, 548 | "outputs": [] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "metadata": { 553 | "colab": { 554 | "base_uri": "https://localhost:8080/" 555 | }, 556 | "id": "3eiRW9M9raMm", 557 | "outputId": "9e149494-2c46-4a1f-dd2b-f2c90e00dd69" 558 | }, 559 | "source": [ 560 | "print(X)" 561 | ], 562 | "execution_count": null, 563 | "outputs": [ 564 | { 565 | "output_type": "stream", 566 | "text": [ 567 | " Pregnancies Glucose BloodPressure ... BMI DiabetesPedigreeFunction Age\n", 568 | "0 6 148 72 ... 33.6 0.627 50\n", 569 | "1 1 85 66 ... 26.6 0.351 31\n", 570 | "2 8 183 64 ... 23.3 0.672 32\n", 571 | "3 1 89 66 ... 28.1 0.167 21\n", 572 | "4 0 137 40 ... 43.1 2.288 33\n", 573 | ".. ... ... ... ... ... ... ...\n", 574 | "763 10 101 76 ... 32.9 0.171 63\n", 575 | "764 2 122 70 ... 36.8 0.340 27\n", 576 | "765 5 121 72 ... 26.2 0.245 30\n", 577 | "766 1 126 60 ... 30.1 0.349 47\n", 578 | "767 1 93 70 ... 30.4 0.315 23\n", 579 | "\n", 580 | "[768 rows x 8 columns]\n" 581 | ], 582 | "name": "stdout" 583 | } 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "metadata": { 589 | "colab": { 590 | "base_uri": "https://localhost:8080/" 591 | }, 592 | "id": "AoxgTJAMrcCl", 593 | "outputId": "874e0649-124b-4f96-e899-b36d2b5b2680" 594 | }, 595 | "source": [ 596 | "print(Y)" 597 | ], 598 | "execution_count": null, 599 | "outputs": [ 600 | { 601 | "output_type": "stream", 602 | "text": [ 603 | "0 1\n", 604 | "1 0\n", 605 | "2 1\n", 606 | "3 0\n", 607 | "4 1\n", 608 | " ..\n", 609 | "763 0\n", 610 | "764 0\n", 611 | "765 0\n", 612 | "766 1\n", 613 | "767 0\n", 614 | "Name: Outcome, Length: 768, dtype: int64\n" 615 | ], 616 | "name": "stdout" 617 | } 618 | ] 619 | }, 620 | { 621 | "cell_type": "markdown", 622 | "metadata": { 623 | "id": "umAbo_kqrlzI" 624 | }, 625 | "source": [ 626 | "Data Standardization" 627 | ] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "metadata": { 632 | "id": "njfM5X60rgnc" 633 | }, 634 | "source": [ 635 | "scaler = StandardScaler()" 636 | ], 637 | "execution_count": null, 638 | "outputs": [] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "metadata": { 643 | "colab": { 644 | "base_uri": "https://localhost:8080/" 645 | }, 646 | "id": "g0ai5ARbr53p", 647 | "outputId": "b9816a11-a5a6-4c93-844d-1caec72b1c0b" 648 | }, 649 | "source": [ 650 | "scaler.fit(X)" 651 | ], 652 | "execution_count": null, 653 | "outputs": [ 654 | { 655 | "output_type": "execute_result", 656 | "data": { 657 | "text/plain": [ 658 | "StandardScaler(copy=True, with_mean=True, with_std=True)" 659 | ] 660 | }, 661 | "metadata": { 662 | "tags": [] 663 | }, 664 | "execution_count": 12 665 | } 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "metadata": { 671 | "id": "FHxNwPuZr-kD" 672 | }, 673 | "source": [ 674 | "standardized_data = scaler.transform(X)" 675 | ], 676 | "execution_count": null, 677 | "outputs": [] 678 | }, 679 | { 680 | "cell_type": "code", 681 | "metadata": { 682 | "colab": { 683 | "base_uri": "https://localhost:8080/" 684 | }, 685 | "id": "fjMwZ5x6sPUJ", 686 | "outputId": "2f0d65d3-01cd-48ed-f009-e998fd3f1174" 687 | }, 688 | "source": [ 689 | "print(standardized_data)" 690 | ], 691 | "execution_count": null, 692 | "outputs": [ 693 | { 694 | "output_type": "stream", 695 | "text": [ 696 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n", 697 | " 1.4259954 ]\n", 698 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n", 699 | " -0.19067191]\n", 700 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n", 701 | " -0.10558415]\n", 702 | " ...\n", 703 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n", 704 | " -0.27575966]\n", 705 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n", 706 | " 1.17073215]\n", 707 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n", 708 | " -0.87137393]]\n" 709 | ], 710 | "name": "stdout" 711 | } 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "metadata": { 717 | "id": "ZxWSl4SGsRjE" 718 | }, 719 | "source": [ 720 | "X = standardized_data\n", 721 | "Y = diabetes_dataset['Outcome']" 722 | ], 723 | "execution_count": null, 724 | "outputs": [] 725 | }, 726 | { 727 | "cell_type": "code", 728 | "metadata": { 729 | "colab": { 730 | "base_uri": "https://localhost:8080/" 731 | }, 732 | "id": "lhJF_7QjsjmP", 733 | "outputId": "b6abea49-bd15-46d0-ef64-e7f6f8e008cc" 734 | }, 735 | "source": [ 736 | "print(X)\n", 737 | "print(Y)" 738 | ], 739 | "execution_count": null, 740 | "outputs": [ 741 | { 742 | "output_type": "stream", 743 | "text": [ 744 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n", 745 | " 1.4259954 ]\n", 746 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n", 747 | " -0.19067191]\n", 748 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n", 749 | " -0.10558415]\n", 750 | " ...\n", 751 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n", 752 | " -0.27575966]\n", 753 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n", 754 | " 1.17073215]\n", 755 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n", 756 | " -0.87137393]]\n", 757 | "0 1\n", 758 | "1 0\n", 759 | "2 1\n", 760 | "3 0\n", 761 | "4 1\n", 762 | " ..\n", 763 | "763 0\n", 764 | "764 0\n", 765 | "765 0\n", 766 | "766 1\n", 767 | "767 0\n", 768 | "Name: Outcome, Length: 768, dtype: int64\n" 769 | ], 770 | "name": "stdout" 771 | } 772 | ] 773 | }, 774 | { 775 | "cell_type": "markdown", 776 | "metadata": { 777 | "id": "A7CNR7qr2mYr" 778 | }, 779 | "source": [ 780 | "SPLITTING THE DATA INTO TRAINING DATA & TESTING DATA" 781 | ] 782 | }, 783 | { 784 | "cell_type": "code", 785 | "metadata": { 786 | "id": "LqQB0FZg2rPE" 787 | }, 788 | "source": [ 789 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)" 790 | ], 791 | "execution_count": null, 792 | "outputs": [] 793 | }, 794 | { 795 | "cell_type": "code", 796 | "metadata": { 797 | "colab": { 798 | "base_uri": "https://localhost:8080/" 799 | }, 800 | "id": "89adFns13NtL", 801 | "outputId": "9fe4ecf5-869a-4a16-d836-a500497fa497" 802 | }, 803 | "source": [ 804 | "print(X.shape, X_train.shape, X_test.shape)" 805 | ], 806 | "execution_count": null, 807 | "outputs": [ 808 | { 809 | "output_type": "stream", 810 | "text": [ 811 | "(768, 8) (614, 8) (154, 8)\n" 812 | ], 813 | "name": "stdout" 814 | } 815 | ] 816 | }, 817 | { 818 | "cell_type": "code", 819 | "metadata": { 820 | "id": "qSCgPmrF3S_2" 821 | }, 822 | "source": [], 823 | "execution_count": null, 824 | "outputs": [] 825 | } 826 | ] 827 | } -------------------------------------------------------------------------------- /4.8. Feature_extraction_of_Text_data_using_Tf_idf_Vectorizer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "collapsed_sections": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | } 13 | }, 14 | "cells": [ 15 | { 16 | "cell_type": "markdown", 17 | "metadata": { 18 | "id": "FaIBmnXCknPl" 19 | }, 20 | "source": [ 21 | "About the Dataset:\n", 22 | "\n", 23 | "1. id: unique id for a news article\n", 24 | "2. title: the title of a news article\n", 25 | "3. author: author of the news article\n", 26 | "4. text: the text of the article; could be incomplete\n", 27 | "5. label: a label that marks whether the news article is real or fake:\n", 28 | " 1: Fake news\n", 29 | " 0: real News\n", 30 | "\n", 31 | "\n", 32 | "\n" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": { 38 | "id": "k399dHafvL5N" 39 | }, 40 | "source": [ 41 | "Importing the Dependencies" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "metadata": { 47 | "id": "-fetC5yqkPVe" 48 | }, 49 | "source": [ 50 | "import numpy as np\n", 51 | "import pandas as pd\n", 52 | "import re\n", 53 | "from nltk.corpus import stopwords\n", 54 | "from nltk.stem.porter import PorterStemmer\n", 55 | "from sklearn.feature_extraction.text import TfidfVectorizer" 56 | ], 57 | "execution_count": null, 58 | "outputs": [] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "metadata": { 63 | "colab": { 64 | "base_uri": "https://localhost:8080/" 65 | }, 66 | "id": "1AC1YpmGwIDw", 67 | "outputId": "98e60824-d963-4486-bf45-4c9a13bb3f33" 68 | }, 69 | "source": [ 70 | "import nltk\n", 71 | "nltk.download('stopwords')" 72 | ], 73 | "execution_count": null, 74 | "outputs": [ 75 | { 76 | "output_type": "stream", 77 | "text": [ 78 | "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", 79 | "[nltk_data] Unzipping corpora/stopwords.zip.\n" 80 | ], 81 | "name": "stdout" 82 | }, 83 | { 84 | "output_type": "execute_result", 85 | "data": { 86 | "text/plain": [ 87 | "True" 88 | ] 89 | }, 90 | "metadata": { 91 | "tags": [] 92 | }, 93 | "execution_count": 2 94 | } 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "metadata": { 100 | "colab": { 101 | "base_uri": "https://localhost:8080/" 102 | }, 103 | "id": "dxIOt3DowpUR", 104 | "outputId": "96f9e591-c086-4958-f15d-bf6d9cf16fcb" 105 | }, 106 | "source": [ 107 | "# printing the stopwords in English\n", 108 | "print(stopwords.words('english'))" 109 | ], 110 | "execution_count": null, 111 | "outputs": [ 112 | { 113 | "output_type": "stream", 114 | "text": [ 115 | "['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', \"you're\", \"you've\", \"you'll\", \"you'd\", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', \"she's\", 'her', 'hers', 'herself', 'it', \"it's\", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', \"that'll\", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', \"don't\", 'should', \"should've\", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', \"aren't\", 'couldn', \"couldn't\", 'didn', \"didn't\", 'doesn', \"doesn't\", 'hadn', \"hadn't\", 'hasn', \"hasn't\", 'haven', \"haven't\", 'isn', \"isn't\", 'ma', 'mightn', \"mightn't\", 'mustn', \"mustn't\", 'needn', \"needn't\", 'shan', \"shan't\", 'shouldn', \"shouldn't\", 'wasn', \"wasn't\", 'weren', \"weren't\", 'won', \"won't\", 'wouldn', \"wouldn't\"]\n" 116 | ], 117 | "name": "stdout" 118 | } 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": { 124 | "id": "NjeGd1CLw_6R" 125 | }, 126 | "source": [ 127 | "Data Pre-processing" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "metadata": { 133 | "id": "nCGcpu_1wzLw" 134 | }, 135 | "source": [ 136 | "# loading the dataset to a pandas DataFrame\n", 137 | "news_dataset = pd.read_csv('/content/train.csv')" 138 | ], 139 | "execution_count": null, 140 | "outputs": [] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "metadata": { 145 | "colab": { 146 | "base_uri": "https://localhost:8080/" 147 | }, 148 | "id": "aRgmbYSbxV4-", 149 | "outputId": "dfa04f84-b122-45bb-e561-967c49dd9213" 150 | }, 151 | "source": [ 152 | "news_dataset.shape" 153 | ], 154 | "execution_count": null, 155 | "outputs": [ 156 | { 157 | "output_type": "execute_result", 158 | "data": { 159 | "text/plain": [ 160 | "(20800, 5)" 161 | ] 162 | }, 163 | "metadata": { 164 | "tags": [] 165 | }, 166 | "execution_count": 5 167 | } 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "metadata": { 173 | "colab": { 174 | "base_uri": "https://localhost:8080/", 175 | "height": 196 176 | }, 177 | "id": "jjJ1eB6RxZaS", 178 | "outputId": "e37737d1-94ca-479d-b59a-982c8d1fd666" 179 | }, 180 | "source": [ 181 | "# print the first 5 rows of the dataframe\n", 182 | "news_dataset.head()" 183 | ], 184 | "execution_count": null, 185 | "outputs": [ 186 | { 187 | "output_type": "execute_result", 188 | "data": { 189 | "text/html": [ 190 | "
\n", 191 | "\n", 204 | "\n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | "
idtitleauthortextlabel
00House Dem Aide: We Didn’t Even See Comey’s Let...Darrell LucusHouse Dem Aide: We Didn’t Even See Comey’s Let...1
11FLYNN: Hillary Clinton, Big Woman on Campus - ...Daniel J. FlynnEver get the feeling your life circles the rou...0
22Why the Truth Might Get You FiredConsortiumnews.comWhy the Truth Might Get You Fired October 29, ...1
3315 Civilians Killed In Single US Airstrike Hav...Jessica PurkissVideos 15 Civilians Killed In Single US Airstr...1
44Iranian woman jailed for fictional unpublished...Howard PortnoyPrint \\nAn Iranian woman has been sentenced to...1
\n", 258 | "
" 259 | ], 260 | "text/plain": [ 261 | " id ... label\n", 262 | "0 0 ... 1\n", 263 | "1 1 ... 0\n", 264 | "2 2 ... 1\n", 265 | "3 3 ... 1\n", 266 | "4 4 ... 1\n", 267 | "\n", 268 | "[5 rows x 5 columns]" 269 | ] 270 | }, 271 | "metadata": { 272 | "tags": [] 273 | }, 274 | "execution_count": 6 275 | } 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "metadata": { 281 | "colab": { 282 | "base_uri": "https://localhost:8080/" 283 | }, 284 | "id": "QYkDi4SwxlKi", 285 | "outputId": "85e27df0-e210-4d0b-c8df-167199c5e108" 286 | }, 287 | "source": [ 288 | "# counting the number of missing values in the dataset\n", 289 | "news_dataset.isnull().sum()" 290 | ], 291 | "execution_count": null, 292 | "outputs": [ 293 | { 294 | "output_type": "execute_result", 295 | "data": { 296 | "text/plain": [ 297 | "id 0\n", 298 | "title 558\n", 299 | "author 1957\n", 300 | "text 39\n", 301 | "label 0\n", 302 | "dtype: int64" 303 | ] 304 | }, 305 | "metadata": { 306 | "tags": [] 307 | }, 308 | "execution_count": 7 309 | } 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "metadata": { 315 | "id": "Mc04lQrhx57m" 316 | }, 317 | "source": [ 318 | "# replacing the null values with empty string\n", 319 | "news_dataset = news_dataset.fillna('')" 320 | ], 321 | "execution_count": null, 322 | "outputs": [] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "metadata": { 327 | "id": "H7TZgHszygxj" 328 | }, 329 | "source": [ 330 | "# merging the author name and news title\n", 331 | "news_dataset['content'] = news_dataset['author']+' '+news_dataset['title']" 332 | ], 333 | "execution_count": null, 334 | "outputs": [] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "metadata": { 339 | "colab": { 340 | "base_uri": "https://localhost:8080/" 341 | }, 342 | "id": "cbF6GBBpzBey", 343 | "outputId": "52ea9eee-2c59-4831-c676-8b667b3f6496" 344 | }, 345 | "source": [ 346 | "print(news_dataset['content'])" 347 | ], 348 | "execution_count": null, 349 | "outputs": [ 350 | { 351 | "output_type": "stream", 352 | "text": [ 353 | "0 Darrell Lucus House Dem Aide: We Didn’t Even S...\n", 354 | "1 Daniel J. Flynn FLYNN: Hillary Clinton, Big Wo...\n", 355 | "2 Consortiumnews.com Why the Truth Might Get You...\n", 356 | "3 Jessica Purkiss 15 Civilians Killed In Single ...\n", 357 | "4 Howard Portnoy Iranian woman jailed for fictio...\n", 358 | " ... \n", 359 | "20795 Jerome Hudson Rapper T.I.: Trump a ’Poster Chi...\n", 360 | "20796 Benjamin Hoffman N.F.L. Playoffs: Schedule, Ma...\n", 361 | "20797 Michael J. de la Merced and Rachel Abrams Macy...\n", 362 | "20798 Alex Ansary NATO, Russia To Hold Parallel Exer...\n", 363 | "20799 David Swanson What Keeps the F-35 Alive\n", 364 | "Name: content, Length: 20800, dtype: object\n" 365 | ], 366 | "name": "stdout" 367 | } 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "metadata": { 373 | "id": "LfBtAvLtzEo6" 374 | }, 375 | "source": [ 376 | "# separating the data & label\n", 377 | "X = news_dataset.drop(columns='label', axis=1)\n", 378 | "Y = news_dataset['label']" 379 | ], 380 | "execution_count": null, 381 | "outputs": [] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "metadata": { 386 | "colab": { 387 | "base_uri": "https://localhost:8080/" 388 | }, 389 | "id": "oHPBr540zl1h", 390 | "outputId": "5940cef9-760c-4030-ae66-7ff51787ec2e" 391 | }, 392 | "source": [ 393 | "print(X)\n", 394 | "print(Y)" 395 | ], 396 | "execution_count": null, 397 | "outputs": [ 398 | { 399 | "output_type": "stream", 400 | "text": [ 401 | " id ... content\n", 402 | "0 0 ... Darrell Lucus House Dem Aide: We Didn’t Even S...\n", 403 | "1 1 ... Daniel J. Flynn FLYNN: Hillary Clinton, Big Wo...\n", 404 | "2 2 ... Consortiumnews.com Why the Truth Might Get You...\n", 405 | "3 3 ... Jessica Purkiss 15 Civilians Killed In Single ...\n", 406 | "4 4 ... Howard Portnoy Iranian woman jailed for fictio...\n", 407 | "... ... ... ...\n", 408 | "20795 20795 ... Jerome Hudson Rapper T.I.: Trump a ’Poster Chi...\n", 409 | "20796 20796 ... Benjamin Hoffman N.F.L. Playoffs: Schedule, Ma...\n", 410 | "20797 20797 ... Michael J. de la Merced and Rachel Abrams Macy...\n", 411 | "20798 20798 ... Alex Ansary NATO, Russia To Hold Parallel Exer...\n", 412 | "20799 20799 ... David Swanson What Keeps the F-35 Alive\n", 413 | "\n", 414 | "[20800 rows x 5 columns]\n", 415 | "0 1\n", 416 | "1 0\n", 417 | "2 1\n", 418 | "3 1\n", 419 | "4 1\n", 420 | " ..\n", 421 | "20795 0\n", 422 | "20796 0\n", 423 | "20797 0\n", 424 | "20798 1\n", 425 | "20799 1\n", 426 | "Name: label, Length: 20800, dtype: int64\n" 427 | ], 428 | "name": "stdout" 429 | } 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": { 435 | "id": "0NwFcpqcz37a" 436 | }, 437 | "source": [ 438 | "Stemming:\n", 439 | "\n", 440 | "Stemming is the process of reducing a word to its Root word\n", 441 | "\n", 442 | "example:\n", 443 | "actor, actress, acting --> act" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "metadata": { 449 | "id": "Ga_DaZxhzoWM" 450 | }, 451 | "source": [ 452 | "port_stem = PorterStemmer()" 453 | ], 454 | "execution_count": null, 455 | "outputs": [] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "metadata": { 460 | "id": "zY-n0dCh0e-y" 461 | }, 462 | "source": [ 463 | "def stemming(content):\n", 464 | " stemmed_content = re.sub('[^a-zA-Z]',' ',content)\n", 465 | " stemmed_content = stemmed_content.lower()\n", 466 | " stemmed_content = stemmed_content.split()\n", 467 | " stemmed_content = [port_stem.stem(word) for word in stemmed_content \n", 468 | " if not word in stopwords.words('english')]\n", 469 | " stemmed_content = ' '.join(stemmed_content)\n", 470 | " return stemmed_content" 471 | ], 472 | "execution_count": null, 473 | "outputs": [] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "metadata": { 478 | "id": "MBUIk4c94yTL" 479 | }, 480 | "source": [ 481 | "news_dataset['content'] = news_dataset['content'].apply(stemming)" 482 | ], 483 | "execution_count": null, 484 | "outputs": [] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "metadata": { 489 | "colab": { 490 | "base_uri": "https://localhost:8080/" 491 | }, 492 | "id": "xmwK-zyO5Stg", 493 | "outputId": "58ca7971-cb4c-44de-aa95-d49f85c3a748" 494 | }, 495 | "source": [ 496 | "print(news_dataset['content'])" 497 | ], 498 | "execution_count": null, 499 | "outputs": [ 500 | { 501 | "output_type": "stream", 502 | "text": [ 503 | "0 darrel lucu hous dem aid even see comey letter...\n", 504 | "1 daniel j flynn flynn hillari clinton big woman...\n", 505 | "2 consortiumnew com truth might get fire\n", 506 | "3 jessica purkiss civilian kill singl us airstri...\n", 507 | "4 howard portnoy iranian woman jail fiction unpu...\n", 508 | " ... \n", 509 | "20795 jerom hudson rapper trump poster child white s...\n", 510 | "20796 benjamin hoffman n f l playoff schedul matchup...\n", 511 | "20797 michael j de la merc rachel abram maci said re...\n", 512 | "20798 alex ansari nato russia hold parallel exercis ...\n", 513 | "20799 david swanson keep f aliv\n", 514 | "Name: content, Length: 20800, dtype: object\n" 515 | ], 516 | "name": "stdout" 517 | } 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "metadata": { 523 | "id": "5ZIidnta5k5h" 524 | }, 525 | "source": [ 526 | "#separating the data and label\n", 527 | "X = news_dataset['content'].values\n", 528 | "Y = news_dataset['label'].values" 529 | ], 530 | "execution_count": null, 531 | "outputs": [] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "metadata": { 536 | "colab": { 537 | "base_uri": "https://localhost:8080/" 538 | }, 539 | "id": "3nA_SBZX6BeH", 540 | "outputId": "c14ccd15-cc79-4ecc-ef24-f33fdd4d3a52" 541 | }, 542 | "source": [ 543 | "print(X)" 544 | ], 545 | "execution_count": null, 546 | "outputs": [ 547 | { 548 | "output_type": "stream", 549 | "text": [ 550 | "['darrel lucu hous dem aid even see comey letter jason chaffetz tweet'\n", 551 | " 'daniel j flynn flynn hillari clinton big woman campu breitbart'\n", 552 | " 'consortiumnew com truth might get fire' ...\n", 553 | " 'michael j de la merc rachel abram maci said receiv takeov approach hudson bay new york time'\n", 554 | " 'alex ansari nato russia hold parallel exercis balkan'\n", 555 | " 'david swanson keep f aliv']\n" 556 | ], 557 | "name": "stdout" 558 | } 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "metadata": { 564 | "colab": { 565 | "base_uri": "https://localhost:8080/" 566 | }, 567 | "id": "NgkFGXkg6HS4", 568 | "outputId": "57984eb9-1d8c-4090-abfe-7dc7f3358a71" 569 | }, 570 | "source": [ 571 | "print(Y)" 572 | ], 573 | "execution_count": null, 574 | "outputs": [ 575 | { 576 | "output_type": "stream", 577 | "text": [ 578 | "[1 0 1 ... 0 1 1]\n" 579 | ], 580 | "name": "stdout" 581 | } 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "metadata": { 587 | "colab": { 588 | "base_uri": "https://localhost:8080/" 589 | }, 590 | "id": "Iu2ZEBkL6QTm", 591 | "outputId": "a16d34f4-7e56-4458-8e77-3546fcff7124" 592 | }, 593 | "source": [ 594 | "Y.shape" 595 | ], 596 | "execution_count": null, 597 | "outputs": [ 598 | { 599 | "output_type": "execute_result", 600 | "data": { 601 | "text/plain": [ 602 | "(20800,)" 603 | ] 604 | }, 605 | "metadata": { 606 | "tags": [] 607 | }, 608 | "execution_count": 20 609 | } 610 | ] 611 | }, 612 | { 613 | "cell_type": "markdown", 614 | "metadata": { 615 | "id": "-UloskM52Fc4" 616 | }, 617 | "source": [ 618 | "Tf-Idf" 619 | ] 620 | }, 621 | { 622 | "cell_type": "code", 623 | "metadata": { 624 | "id": "8cESLGsJxxDA" 625 | }, 626 | "source": [ 627 | "# convert the textual data to Feature Vectors\n", 628 | "vectorizer = TfidfVectorizer()" 629 | ], 630 | "execution_count": null, 631 | "outputs": [] 632 | }, 633 | { 634 | "cell_type": "code", 635 | "metadata": { 636 | "id": "Q_LP0Hwr2SfR" 637 | }, 638 | "source": [ 639 | "vectorizer.fit(X)\n", 640 | "\n", 641 | "X = vectorizer.transform(X)" 642 | ], 643 | "execution_count": null, 644 | "outputs": [] 645 | }, 646 | { 647 | "cell_type": "code", 648 | "metadata": { 649 | "colab": { 650 | "base_uri": "https://localhost:8080/" 651 | }, 652 | "id": "5qW7RTiy2jUH", 653 | "outputId": "2f62b77c-56d4-4abd-c011-125c60be753f" 654 | }, 655 | "source": [ 656 | "print(X)" 657 | ], 658 | "execution_count": null, 659 | "outputs": [ 660 | { 661 | "output_type": "stream", 662 | "text": [ 663 | " (0, 15686)\t0.28485063562728646\n", 664 | " (0, 13473)\t0.2565896679337957\n", 665 | " (0, 8909)\t0.3635963806326075\n", 666 | " (0, 8630)\t0.29212514087043684\n", 667 | " (0, 7692)\t0.24785219520671603\n", 668 | " (0, 7005)\t0.21874169089359144\n", 669 | " (0, 4973)\t0.233316966909351\n", 670 | " (0, 3792)\t0.2705332480845492\n", 671 | " (0, 3600)\t0.3598939188262559\n", 672 | " (0, 2959)\t0.2468450128533713\n", 673 | " (0, 2483)\t0.3676519686797209\n", 674 | " (0, 267)\t0.27010124977708766\n", 675 | " (1, 16799)\t0.30071745655510157\n", 676 | " (1, 6816)\t0.1904660198296849\n", 677 | " (1, 5503)\t0.7143299355715573\n", 678 | " (1, 3568)\t0.26373768806048464\n", 679 | " (1, 2813)\t0.19094574062359204\n", 680 | " (1, 2223)\t0.3827320386859759\n", 681 | " (1, 1894)\t0.15521974226349364\n", 682 | " (1, 1497)\t0.2939891562094648\n", 683 | " (2, 15611)\t0.41544962664721613\n", 684 | " (2, 9620)\t0.49351492943649944\n", 685 | " (2, 5968)\t0.3474613386728292\n", 686 | " (2, 5389)\t0.3866530551182615\n", 687 | " (2, 3103)\t0.46097489583229645\n", 688 | " :\t:\n", 689 | " (20797, 13122)\t0.2482526352197606\n", 690 | " (20797, 12344)\t0.27263457663336677\n", 691 | " (20797, 12138)\t0.24778257724396507\n", 692 | " (20797, 10306)\t0.08038079000566466\n", 693 | " (20797, 9588)\t0.174553480255222\n", 694 | " (20797, 9518)\t0.2954204003420313\n", 695 | " (20797, 8988)\t0.36160868928090795\n", 696 | " (20797, 8364)\t0.22322585870464118\n", 697 | " (20797, 7042)\t0.21799048897828688\n", 698 | " (20797, 3643)\t0.21155500613623743\n", 699 | " (20797, 1287)\t0.33538056804139865\n", 700 | " (20797, 699)\t0.30685846079762347\n", 701 | " (20797, 43)\t0.29710241860700626\n", 702 | " (20798, 13046)\t0.22363267488270608\n", 703 | " (20798, 11052)\t0.4460515589182236\n", 704 | " (20798, 10177)\t0.3192496370187028\n", 705 | " (20798, 6889)\t0.32496285694299426\n", 706 | " (20798, 5032)\t0.4083701450239529\n", 707 | " (20798, 1125)\t0.4460515589182236\n", 708 | " (20798, 588)\t0.3112141524638974\n", 709 | " (20798, 350)\t0.28446937819072576\n", 710 | " (20799, 14852)\t0.5677577267055112\n", 711 | " (20799, 8036)\t0.45983893273780013\n", 712 | " (20799, 3623)\t0.37927626273066584\n", 713 | " (20799, 377)\t0.5677577267055112\n" 714 | ], 715 | "name": "stdout" 716 | } 717 | ] 718 | } 719 | ] 720 | } -------------------------------------------------------------------------------- /4.9. Numerical_Dataset_Pre_Processing_Use_Case.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "hu0h4VviKzRu" 21 | }, 22 | "source": [ 23 | "Importing the Dependencies" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "metadata": { 29 | "id": "R9KKo3H1HzTT" 30 | }, 31 | "source": [ 32 | "import numpy as np\n", 33 | "import pandas as pd\n", 34 | "from sklearn.preprocessing import StandardScaler\n", 35 | "from sklearn.model_selection import train_test_split" 36 | ], 37 | "execution_count": null, 38 | "outputs": [] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": { 43 | "id": "na-dUNZPLMgV" 44 | }, 45 | "source": [ 46 | "Data Collection & Pre-Processing" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "metadata": { 52 | "id": "iiOFXfCcLKeW" 53 | }, 54 | "source": [ 55 | "# loading the data from csv file to a pandas dataframe\n", 56 | "diabetes_data = pd.read_csv('/content/diabetes.csv')" 57 | ], 58 | "execution_count": null, 59 | "outputs": [] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "metadata": { 64 | "colab": { 65 | "base_uri": "https://localhost:8080/", 66 | "height": 196 67 | }, 68 | "id": "UsdxwqGwLfFK", 69 | "outputId": "cbd7646d-0290-4581-a293-3393dcea51f4" 70 | }, 71 | "source": [ 72 | "# first 5 rows of the dataframe\n", 73 | "diabetes_data.head()" 74 | ], 75 | "execution_count": null, 76 | "outputs": [ 77 | { 78 | "output_type": "execute_result", 79 | "data": { 80 | "text/html": [ 81 | "
\n", 82 | "\n", 95 | "\n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
\n", 173 | "
" 174 | ], 175 | "text/plain": [ 176 | " Pregnancies Glucose BloodPressure ... DiabetesPedigreeFunction Age Outcome\n", 177 | "0 6 148 72 ... 0.627 50 1\n", 178 | "1 1 85 66 ... 0.351 31 0\n", 179 | "2 8 183 64 ... 0.672 32 1\n", 180 | "3 1 89 66 ... 0.167 21 0\n", 181 | "4 0 137 40 ... 2.288 33 1\n", 182 | "\n", 183 | "[5 rows x 9 columns]" 184 | ] 185 | }, 186 | "metadata": { 187 | "tags": [] 188 | }, 189 | "execution_count": 3 190 | } 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "metadata": { 196 | "colab": { 197 | "base_uri": "https://localhost:8080/" 198 | }, 199 | "id": "8SZEYDmELlsN", 200 | "outputId": "9ba2d65d-ad25-4c96-9a25-203c92621729" 201 | }, 202 | "source": [ 203 | "# number of rows & columns\n", 204 | "diabetes_data.shape" 205 | ], 206 | "execution_count": null, 207 | "outputs": [ 208 | { 209 | "output_type": "execute_result", 210 | "data": { 211 | "text/plain": [ 212 | "(768, 9)" 213 | ] 214 | }, 215 | "metadata": { 216 | "tags": [] 217 | }, 218 | "execution_count": 4 219 | } 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "metadata": { 225 | "colab": { 226 | "base_uri": "https://localhost:8080/", 227 | "height": 286 228 | }, 229 | "id": "jR6mgreIL-JL", 230 | "outputId": "3cd34a55-8679-490f-fc2c-7ce50f8133f7" 231 | }, 232 | "source": [ 233 | "diabetes_data.describe()" 234 | ], 235 | "execution_count": null, 236 | "outputs": [ 237 | { 238 | "output_type": "execute_result", 239 | "data": { 240 | "text/html": [ 241 | "
\n", 242 | "\n", 255 | "\n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
count768.000000768.000000768.000000768.000000768.000000768.000000768.000000768.000000768.000000
mean3.845052120.89453169.10546920.53645879.79947931.9925780.47187633.2408850.348958
std3.36957831.97261819.35580715.952218115.2440027.8841600.33132911.7602320.476951
min0.0000000.0000000.0000000.0000000.0000000.0000000.07800021.0000000.000000
25%1.00000099.00000062.0000000.0000000.00000027.3000000.24375024.0000000.000000
50%3.000000117.00000072.00000023.00000030.50000032.0000000.37250029.0000000.000000
75%6.000000140.25000080.00000032.000000127.25000036.6000000.62625041.0000001.000000
max17.000000199.000000122.00000099.000000846.00000067.1000002.42000081.0000001.000000
\n", 369 | "
" 370 | ], 371 | "text/plain": [ 372 | " Pregnancies Glucose ... Age Outcome\n", 373 | "count 768.000000 768.000000 ... 768.000000 768.000000\n", 374 | "mean 3.845052 120.894531 ... 33.240885 0.348958\n", 375 | "std 3.369578 31.972618 ... 11.760232 0.476951\n", 376 | "min 0.000000 0.000000 ... 21.000000 0.000000\n", 377 | "25% 1.000000 99.000000 ... 24.000000 0.000000\n", 378 | "50% 3.000000 117.000000 ... 29.000000 0.000000\n", 379 | "75% 6.000000 140.250000 ... 41.000000 1.000000\n", 380 | "max 17.000000 199.000000 ... 81.000000 1.000000\n", 381 | "\n", 382 | "[8 rows x 9 columns]" 383 | ] 384 | }, 385 | "metadata": { 386 | "tags": [] 387 | }, 388 | "execution_count": 5 389 | } 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": { 395 | "id": "9goF-f7bMXLe" 396 | }, 397 | "source": [ 398 | "Separating Features and Target" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "metadata": { 404 | "id": "we2FkbpkMI1b" 405 | }, 406 | "source": [ 407 | "X = diabetes_data.drop(columns='Outcome', axis =1)\n", 408 | "Y = diabetes_data['Outcome']" 409 | ], 410 | "execution_count": null, 411 | "outputs": [] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "metadata": { 416 | "colab": { 417 | "base_uri": "https://localhost:8080/" 418 | }, 419 | "id": "kv-xFACDMo_t", 420 | "outputId": "208f9f4e-c615-47e2-8887-28d759d17e8d" 421 | }, 422 | "source": [ 423 | "print(X)" 424 | ], 425 | "execution_count": null, 426 | "outputs": [ 427 | { 428 | "output_type": "stream", 429 | "text": [ 430 | " Pregnancies Glucose BloodPressure ... BMI DiabetesPedigreeFunction Age\n", 431 | "0 6 148 72 ... 33.6 0.627 50\n", 432 | "1 1 85 66 ... 26.6 0.351 31\n", 433 | "2 8 183 64 ... 23.3 0.672 32\n", 434 | "3 1 89 66 ... 28.1 0.167 21\n", 435 | "4 0 137 40 ... 43.1 2.288 33\n", 436 | ".. ... ... ... ... ... ... ...\n", 437 | "763 10 101 76 ... 32.9 0.171 63\n", 438 | "764 2 122 70 ... 36.8 0.340 27\n", 439 | "765 5 121 72 ... 26.2 0.245 30\n", 440 | "766 1 126 60 ... 30.1 0.349 47\n", 441 | "767 1 93 70 ... 30.4 0.315 23\n", 442 | "\n", 443 | "[768 rows x 8 columns]\n" 444 | ], 445 | "name": "stdout" 446 | } 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "metadata": { 452 | "colab": { 453 | "base_uri": "https://localhost:8080/" 454 | }, 455 | "id": "rv6wGnC6MqYu", 456 | "outputId": "f27ffa9f-db2f-4341-ae46-3447f0a1b1e8" 457 | }, 458 | "source": [ 459 | "print(Y)" 460 | ], 461 | "execution_count": null, 462 | "outputs": [ 463 | { 464 | "output_type": "stream", 465 | "text": [ 466 | "0 1\n", 467 | "1 0\n", 468 | "2 1\n", 469 | "3 0\n", 470 | "4 1\n", 471 | " ..\n", 472 | "763 0\n", 473 | "764 0\n", 474 | "765 0\n", 475 | "766 1\n", 476 | "767 0\n", 477 | "Name: Outcome, Length: 768, dtype: int64\n" 478 | ], 479 | "name": "stdout" 480 | } 481 | ] 482 | }, 483 | { 484 | "cell_type": "markdown", 485 | "metadata": { 486 | "id": "7lHxwsgFMvPp" 487 | }, 488 | "source": [ 489 | "0 --> Non - Diabetic\n", 490 | "\n", 491 | "1 --> Diabetic" 492 | ] 493 | }, 494 | { 495 | "cell_type": "markdown", 496 | "metadata": { 497 | "id": "LCSwJAQrNIC5" 498 | }, 499 | "source": [ 500 | "Data Standardization" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "metadata": { 506 | "id": "u4_i4PvqMs2N" 507 | }, 508 | "source": [ 509 | "scaler = StandardScaler()" 510 | ], 511 | "execution_count": null, 512 | "outputs": [] 513 | }, 514 | { 515 | "cell_type": "code", 516 | "metadata": { 517 | "id": "FjqSqB1VNdBC" 518 | }, 519 | "source": [ 520 | "standardized_data = scaler.fit_transform(X)" 521 | ], 522 | "execution_count": null, 523 | "outputs": [] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "metadata": { 528 | "colab": { 529 | "base_uri": "https://localhost:8080/" 530 | }, 531 | "id": "EWDTbdqWNlIR", 532 | "outputId": "bffc90af-1a62-4322-8c46-8a291f42c600" 533 | }, 534 | "source": [ 535 | "print(standardized_data)" 536 | ], 537 | "execution_count": null, 538 | "outputs": [ 539 | { 540 | "output_type": "stream", 541 | "text": [ 542 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n", 543 | " 1.4259954 ]\n", 544 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n", 545 | " -0.19067191]\n", 546 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n", 547 | " -0.10558415]\n", 548 | " ...\n", 549 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n", 550 | " -0.27575966]\n", 551 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n", 552 | " 1.17073215]\n", 553 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n", 554 | " -0.87137393]]\n" 555 | ], 556 | "name": "stdout" 557 | } 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "metadata": { 563 | "id": "_Ne5Lr4PNy-9" 564 | }, 565 | "source": [ 566 | "X = standardized_data" 567 | ], 568 | "execution_count": null, 569 | "outputs": [] 570 | }, 571 | { 572 | "cell_type": "code", 573 | "metadata": { 574 | "colab": { 575 | "base_uri": "https://localhost:8080/" 576 | }, 577 | "id": "md1uJwDON5Bz", 578 | "outputId": "62b14f3e-402a-404d-91d3-68c5e2eb56b2" 579 | }, 580 | "source": [ 581 | "print(X)" 582 | ], 583 | "execution_count": null, 584 | "outputs": [ 585 | { 586 | "output_type": "stream", 587 | "text": [ 588 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n", 589 | " 1.4259954 ]\n", 590 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n", 591 | " -0.19067191]\n", 592 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n", 593 | " -0.10558415]\n", 594 | " ...\n", 595 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n", 596 | " -0.27575966]\n", 597 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n", 598 | " 1.17073215]\n", 599 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n", 600 | " -0.87137393]]\n" 601 | ], 602 | "name": "stdout" 603 | } 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "metadata": { 609 | "colab": { 610 | "base_uri": "https://localhost:8080/" 611 | }, 612 | "id": "7gaS5qPkN6Fg", 613 | "outputId": "22d2bee5-f5d3-4043-c37c-33ca1823f532" 614 | }, 615 | "source": [ 616 | "print(Y)" 617 | ], 618 | "execution_count": null, 619 | "outputs": [ 620 | { 621 | "output_type": "stream", 622 | "text": [ 623 | "0 1\n", 624 | "1 0\n", 625 | "2 1\n", 626 | "3 0\n", 627 | "4 1\n", 628 | " ..\n", 629 | "763 0\n", 630 | "764 0\n", 631 | "765 0\n", 632 | "766 1\n", 633 | "767 0\n", 634 | "Name: Outcome, Length: 768, dtype: int64\n" 635 | ], 636 | "name": "stdout" 637 | } 638 | ] 639 | }, 640 | { 641 | "cell_type": "markdown", 642 | "metadata": { 643 | "id": "8lZAGBiGOAYt" 644 | }, 645 | "source": [ 646 | "Splitting the dataset into Training data & Testing Data" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "metadata": { 652 | "id": "MVtlCAEqN7tq" 653 | }, 654 | "source": [ 655 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)" 656 | ], 657 | "execution_count": null, 658 | "outputs": [] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "metadata": { 663 | "colab": { 664 | "base_uri": "https://localhost:8080/" 665 | }, 666 | "id": "zLr8D8JTOj3a", 667 | "outputId": "907bbe92-5629-4403-b0b9-555dc6641688" 668 | }, 669 | "source": [ 670 | "print(X.shape, X_train.shape, X_test.shape)" 671 | ], 672 | "execution_count": null, 673 | "outputs": [ 674 | { 675 | "output_type": "stream", 676 | "text": [ 677 | "(768, 8) (614, 8) (154, 8)\n" 678 | ], 679 | "name": "stdout" 680 | } 681 | ] 682 | }, 683 | { 684 | "cell_type": "code", 685 | "metadata": { 686 | "id": "LP8FiNo2OqLc" 687 | }, 688 | "source": [], 689 | "execution_count": null, 690 | "outputs": [] 691 | } 692 | ] 693 | } -------------------------------------------------------------------------------- /ML Use Case 3. Spam_Mail_Prediction_using_Machine_Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "collapsed_sections": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "kqB21QOgMg-G" 22 | }, 23 | "source": [ 24 | "Importing the Dependencies" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "metadata": { 30 | "id": "rALI06-oHusw" 31 | }, 32 | "source": [ 33 | "import numpy as np\n", 34 | "import pandas as pd\n", 35 | "from sklearn.model_selection import train_test_split\n", 36 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 37 | "from sklearn.linear_model import LogisticRegression\n", 38 | "from sklearn.metrics import accuracy_score" 39 | ], 40 | "execution_count": null, 41 | "outputs": [] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": { 46 | "id": "YyKe9o2ONeFv" 47 | }, 48 | "source": [ 49 | "Data Collection & Pre-Processing" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "metadata": { 55 | "id": "CpStHH8KNcYB" 56 | }, 57 | "source": [ 58 | "# loading the data from csv file to a pandas Dataframe\n", 59 | "raw_mail_data = pd.read_csv('/content/mail_data.csv')" 60 | ], 61 | "execution_count": null, 62 | "outputs": [] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "metadata": { 67 | "colab": { 68 | "base_uri": "https://localhost:8080/" 69 | }, 70 | "id": "pdn-7VE2NxsZ", 71 | "outputId": "28c19d96-23a2-43c0-86ad-5c1aee7f1b58" 72 | }, 73 | "source": [ 74 | "print(raw_mail_data)" 75 | ], 76 | "execution_count": null, 77 | "outputs": [ 78 | { 79 | "output_type": "stream", 80 | "name": "stdout", 81 | "text": [ 82 | " Category Message\n", 83 | "0 ham Go until jurong point, crazy.. Available only ...\n", 84 | "1 ham Ok lar... Joking wif u oni...\n", 85 | "2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n", 86 | "3 ham U dun say so early hor... U c already then say...\n", 87 | "4 ham Nah I don't think he goes to usf, he lives aro...\n", 88 | "... ... ...\n", 89 | "5567 spam This is the 2nd time we have tried 2 contact u...\n", 90 | "5568 ham Will ü b going to esplanade fr home?\n", 91 | "5569 ham Pity, * was in mood for that. So...any other s...\n", 92 | "5570 ham The guy did some bitching but I acted like i'd...\n", 93 | "5571 ham Rofl. Its true to its name\n", 94 | "\n", 95 | "[5572 rows x 2 columns]\n" 96 | ] 97 | } 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "metadata": { 103 | "id": "yhakjIE1N011" 104 | }, 105 | "source": [ 106 | "# replace the null values with a null string\n", 107 | "mail_data = raw_mail_data.where((pd.notnull(raw_mail_data)),'')" 108 | ], 109 | "execution_count": null, 110 | "outputs": [] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "metadata": { 115 | "colab": { 116 | "base_uri": "https://localhost:8080/", 117 | "height": 202 118 | }, 119 | "id": "SJey6H-SOWeK", 120 | "outputId": "af1b0dfd-2ff9-4af9-cfcd-d0c177dd6ab9" 121 | }, 122 | "source": [ 123 | "# printing the first 5 rows of the dataframe\n", 124 | "mail_data.head()" 125 | ], 126 | "execution_count": null, 127 | "outputs": [ 128 | { 129 | "output_type": "execute_result", 130 | "data": { 131 | "text/html": [ 132 | "
\n", 133 | "\n", 146 | "\n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | "
CategoryMessage
0hamGo until jurong point, crazy.. Available only ...
1hamOk lar... Joking wif u oni...
2spamFree entry in 2 a wkly comp to win FA Cup fina...
3hamU dun say so early hor... U c already then say...
4hamNah I don't think he goes to usf, he lives aro...
\n", 182 | "
" 183 | ], 184 | "text/plain": [ 185 | " Category Message\n", 186 | "0 ham Go until jurong point, crazy.. Available only ...\n", 187 | "1 ham Ok lar... Joking wif u oni...\n", 188 | "2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n", 189 | "3 ham U dun say so early hor... U c already then say...\n", 190 | "4 ham Nah I don't think he goes to usf, he lives aro..." 191 | ] 192 | }, 193 | "metadata": {}, 194 | "execution_count": 5 195 | } 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "metadata": { 201 | "colab": { 202 | "base_uri": "https://localhost:8080/" 203 | }, 204 | "id": "IbK82N2gOdar", 205 | "outputId": "4d1840a1-22b5-468f-d4d0-a4528ef4313c" 206 | }, 207 | "source": [ 208 | "# checking the number of rows and columns in the dataframe\n", 209 | "mail_data.shape" 210 | ], 211 | "execution_count": null, 212 | "outputs": [ 213 | { 214 | "output_type": "execute_result", 215 | "data": { 216 | "text/plain": [ 217 | "(5572, 2)" 218 | ] 219 | }, 220 | "metadata": {}, 221 | "execution_count": 6 222 | } 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": { 228 | "id": "vhR4U3ATPBdk" 229 | }, 230 | "source": [ 231 | "Label Encoding" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "metadata": { 237 | "id": "9EW7QSgeOt4p" 238 | }, 239 | "source": [ 240 | "# label spam mail as 0; ham mail as 1;\n", 241 | "\n", 242 | "mail_data.loc[mail_data['Category'] == 'spam', 'Category',] = 0\n", 243 | "mail_data.loc[mail_data['Category'] == 'ham', 'Category',] = 1" 244 | ], 245 | "execution_count": null, 246 | "outputs": [] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": { 251 | "id": "uxZK1fWwPwII" 252 | }, 253 | "source": [ 254 | "spam - 0\n", 255 | "\n", 256 | "ham - 1" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "metadata": { 262 | "id": "t8Rt-FaNPtPE" 263 | }, 264 | "source": [ 265 | "# separating the data as texts and label\n", 266 | "\n", 267 | "X = mail_data['Message']\n", 268 | "\n", 269 | "Y = mail_data['Category']" 270 | ], 271 | "execution_count": null, 272 | "outputs": [] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "metadata": { 277 | "colab": { 278 | "base_uri": "https://localhost:8080/" 279 | }, 280 | "id": "QnQeUBGtQPP7", 281 | "outputId": "a2640f4b-2a1d-4742-9742-3ecbb6017668" 282 | }, 283 | "source": [ 284 | "print(X)" 285 | ], 286 | "execution_count": null, 287 | "outputs": [ 288 | { 289 | "output_type": "stream", 290 | "name": "stdout", 291 | "text": [ 292 | "0 Go until jurong point, crazy.. Available only ...\n", 293 | "1 Ok lar... Joking wif u oni...\n", 294 | "2 Free entry in 2 a wkly comp to win FA Cup fina...\n", 295 | "3 U dun say so early hor... U c already then say...\n", 296 | "4 Nah I don't think he goes to usf, he lives aro...\n", 297 | " ... \n", 298 | "5567 This is the 2nd time we have tried 2 contact u...\n", 299 | "5568 Will ü b going to esplanade fr home?\n", 300 | "5569 Pity, * was in mood for that. So...any other s...\n", 301 | "5570 The guy did some bitching but I acted like i'd...\n", 302 | "5571 Rofl. Its true to its name\n", 303 | "Name: Message, Length: 5572, dtype: object\n" 304 | ] 305 | } 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "metadata": { 311 | "colab": { 312 | "base_uri": "https://localhost:8080/" 313 | }, 314 | "id": "cuWDNy5KQQjY", 315 | "outputId": "1a0a109b-d63a-4cf0-fe4e-b486f1d3d623" 316 | }, 317 | "source": [ 318 | "print(Y)" 319 | ], 320 | "execution_count": null, 321 | "outputs": [ 322 | { 323 | "output_type": "stream", 324 | "name": "stdout", 325 | "text": [ 326 | "0 1\n", 327 | "1 1\n", 328 | "2 0\n", 329 | "3 1\n", 330 | "4 1\n", 331 | " ..\n", 332 | "5567 0\n", 333 | "5568 1\n", 334 | "5569 1\n", 335 | "5570 1\n", 336 | "5571 1\n", 337 | "Name: Category, Length: 5572, dtype: object\n" 338 | ] 339 | } 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": { 345 | "id": "jvHyqdH8QZPH" 346 | }, 347 | "source": [ 348 | "Splitting the data into training data & test data" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "metadata": { 354 | "id": "RO2GmbSNQSQH" 355 | }, 356 | "source": [ 357 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=3)" 358 | ], 359 | "execution_count": null, 360 | "outputs": [] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "metadata": { 365 | "colab": { 366 | "base_uri": "https://localhost:8080/" 367 | }, 368 | "id": "tS2c7A4NRa46", 369 | "outputId": "5d44247f-65d0-457d-8a94-0fd8b45a3b72" 370 | }, 371 | "source": [ 372 | "print(X.shape)\n", 373 | "print(X_train.shape)\n", 374 | "print(X_test.shape)" 375 | ], 376 | "execution_count": null, 377 | "outputs": [ 378 | { 379 | "output_type": "stream", 380 | "name": "stdout", 381 | "text": [ 382 | "(5572,)\n", 383 | "(4457,)\n", 384 | "(1115,)\n" 385 | ] 386 | } 387 | ] 388 | }, 389 | { 390 | "cell_type": "markdown", 391 | "metadata": { 392 | "id": "wYQpiACGSBYM" 393 | }, 394 | "source": [ 395 | "Feature Extraction" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "metadata": { 401 | "id": "nLs847nSRibm" 402 | }, 403 | "source": [ 404 | "# transform the text data to feature vectors that can be used as input to the Logistic regression\n", 405 | "\n", 406 | "feature_extraction = TfidfVectorizer(min_df = 1, stop_words='english', lowercase='True')\n", 407 | "\n", 408 | "X_train_features = feature_extraction.fit_transform(X_train)\n", 409 | "X_test_features = feature_extraction.transform(X_test)\n", 410 | "\n", 411 | "# convert Y_train and Y_test values as integers\n", 412 | "\n", 413 | "Y_train = Y_train.astype('int')\n", 414 | "Y_test = Y_test.astype('int')" 415 | ], 416 | "execution_count": null, 417 | "outputs": [] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "metadata": { 422 | "id": "dBMAcw9RUkUY" 423 | }, 424 | "source": [ 425 | "print(X_train)" 426 | ], 427 | "execution_count": null, 428 | "outputs": [] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "metadata": { 433 | "id": "1NFuGogZUpt0" 434 | }, 435 | "source": [ 436 | "print(X_train_features)" 437 | ], 438 | "execution_count": null, 439 | "outputs": [] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": { 444 | "id": "q86FvELbU_SV" 445 | }, 446 | "source": [ 447 | "Training the Model" 448 | ] 449 | }, 450 | { 451 | "cell_type": "markdown", 452 | "metadata": { 453 | "id": "hV6BAIZQVBbo" 454 | }, 455 | "source": [ 456 | "Logistic Regression" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "metadata": { 462 | "id": "1JeAOwzpUv0V" 463 | }, 464 | "source": [ 465 | "model = LogisticRegression()" 466 | ], 467 | "execution_count": null, 468 | "outputs": [] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "metadata": { 473 | "colab": { 474 | "base_uri": "https://localhost:8080/" 475 | }, 476 | "id": "gWGRHWAPVI_z", 477 | "outputId": "1c5e15dd-0e07-4871-c4fa-b908ee400b55" 478 | }, 479 | "source": [ 480 | "# training the Logistic Regression model with the training data\n", 481 | "model.fit(X_train_features, Y_train)" 482 | ], 483 | "execution_count": null, 484 | "outputs": [ 485 | { 486 | "output_type": "execute_result", 487 | "data": { 488 | "text/plain": [ 489 | "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", 490 | " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", 491 | " multi_class='auto', n_jobs=None, penalty='l2',\n", 492 | " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n", 493 | " warm_start=False)" 494 | ] 495 | }, 496 | "metadata": {}, 497 | "execution_count": 18 498 | } 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "metadata": { 504 | "id": "wZ01fa8dVeL5" 505 | }, 506 | "source": [ 507 | "Evaluating the trained model" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "metadata": { 513 | "id": "ExiF2kKxVYtC" 514 | }, 515 | "source": [ 516 | "# prediction on training data\n", 517 | "\n", 518 | "prediction_on_training_data = model.predict(X_train_features)\n", 519 | "accuracy_on_training_data = accuracy_score(Y_train, prediction_on_training_data)" 520 | ], 521 | "execution_count": null, 522 | "outputs": [] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "metadata": { 527 | "colab": { 528 | "base_uri": "https://localhost:8080/" 529 | }, 530 | "id": "o7t4DI5UWCkB", 531 | "outputId": "49fafbb0-0e7f-40c7-9ab7-4aea165731ee" 532 | }, 533 | "source": [ 534 | "print('Accuracy on training data : ', accuracy_on_training_data)" 535 | ], 536 | "execution_count": null, 537 | "outputs": [ 538 | { 539 | "output_type": "stream", 540 | "name": "stdout", 541 | "text": [ 542 | "Accuracy on training data : 0.9670181736594121\n" 543 | ] 544 | } 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "metadata": { 550 | "id": "cTin5rXTWKg3" 551 | }, 552 | "source": [ 553 | "# prediction on test data\n", 554 | "\n", 555 | "prediction_on_test_data = model.predict(X_test_features)\n", 556 | "accuracy_on_test_data = accuracy_score(Y_test, prediction_on_test_data)" 557 | ], 558 | "execution_count": null, 559 | "outputs": [] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "metadata": { 564 | "colab": { 565 | "base_uri": "https://localhost:8080/" 566 | }, 567 | "id": "4gvoMK4OWnJY", 568 | "outputId": "7bf56da4-1987-4828-ea00-95c30fb083d1" 569 | }, 570 | "source": [ 571 | "print('Accuracy on test data : ', accuracy_on_test_data)" 572 | ], 573 | "execution_count": null, 574 | "outputs": [ 575 | { 576 | "output_type": "stream", 577 | "name": "stdout", 578 | "text": [ 579 | "Accuracy on test data : 0.9659192825112107\n" 580 | ] 581 | } 582 | ] 583 | }, 584 | { 585 | "cell_type": "markdown", 586 | "metadata": { 587 | "id": "bXdOKxYAXaHC" 588 | }, 589 | "source": [ 590 | "Building a Predictive System" 591 | ] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "metadata": { 596 | "colab": { 597 | "base_uri": "https://localhost:8080/" 598 | }, 599 | "id": "h60z1__mWql6", 600 | "outputId": "3aac53f3-13f2-4afb-e9f2-75d337cbcd44" 601 | }, 602 | "source": [ 603 | "input_mail = [\"I've been searching for the right words to thank you for this breather. I promise i wont take your help for granted and will fulfil my promise. You have been wonderful and a blessing at all times\"]\n", 604 | "\n", 605 | "# convert text to feature vectors\n", 606 | "input_data_features = feature_extraction.transform(input_mail)\n", 607 | "\n", 608 | "# making prediction\n", 609 | "\n", 610 | "prediction = model.predict(input_data_features)\n", 611 | "print(prediction)\n", 612 | "\n", 613 | "\n", 614 | "if (prediction[0]==1):\n", 615 | " print('Ham mail')\n", 616 | "\n", 617 | "else:\n", 618 | " print('Spam mail')" 619 | ], 620 | "execution_count": null, 621 | "outputs": [ 622 | { 623 | "output_type": "stream", 624 | "name": "stdout", 625 | "text": [ 626 | "[1]\n", 627 | "Ham mail\n" 628 | ] 629 | } 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "metadata": { 635 | "id": "v_LqbM_ZYwS1" 636 | }, 637 | "source": [], 638 | "execution_count": null, 639 | "outputs": [] 640 | } 641 | ] 642 | } --------------------------------------------------------------------------------