├── 0. datasets link.txt
├── 2.1. Google_Colaboratory_Basics.ipynb
├── 2.2. Python_Basics.ipynb
├── 2.3. Basic_Data_Types_in_Python.ipynb
├── 2.4. List_Tuple_Set_Dictionary.ipynb
├── 2.5. Operators_in_Python.ipynb
├── 2.6. if_else_statement_in_Python.ipynb
├── 2.7. Loops_in_Python.ipynb
├── 2.8. Functions_in_Python.ipynb
├── 3.1.Complete_Numpy_Tutorial_in_Python.ipynb
├── 3.2. Complete_Pandas_Tutorial_in_Python.ipynb
├── 3.3. Matplotlib_Tutorial_in_Python.ipynb
├── 3.4. Seaborn_Tutorial_in_Python.ipynb
├── 4.10. Text_Data_Pre_Processing_Use_Case.ipynb
├── 4.2. Importing_Datasets_through_Kaggle_API.ipynb
├── 4.3. Handling_Missing_Values.ipynb
├── 4.4. Data_Standardization.ipynb
├── 4.5. Label_Encoding.ipynb
├── 4.6. Train_Test_Split.ipynb
├── 4.7. Handling_imbalanced_Dataset.ipynb
├── 4.8. Feature_extraction_of_Text_data_using_Tf_idf_Vectorizer.ipynb
├── 4.9. Numerical_Dataset_Pre_Processing_Use_Case.ipynb
├── ML Use Case 1. Rock_vs_Mine_Prediction.ipynb
├── ML Use Case 3. Spam_Mail_Prediction_using_Machine_Learning.ipynb
└── ML Use case 2. Diabetes_Prediction.ipynb
/0. datasets link.txt:
--------------------------------------------------------------------------------
1 | All Datasets link: https://drive.google.com/drive/folders/1NEs0rpFelfzSWAJ6y832EDpW9ImQH4QJ?usp=sharing
2 |
--------------------------------------------------------------------------------
/2.1. Google_Colaboratory_Basics.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | },
12 | "language_info": {
13 | "name": "python"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "id": "5vIKHkjTlaOT"
21 | },
22 | "source": [
23 | "System Specifications"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "metadata": {
29 | "colab": {
30 | "base_uri": "https://localhost:8080/"
31 | },
32 | "id": "TyiPSq1olCpS",
33 | "outputId": "98abecda-ce70-4bc1-aec6-b6138433c2b7"
34 | },
35 | "source": [
36 | "!cat /proc/cpuinfo"
37 | ],
38 | "execution_count": null,
39 | "outputs": [
40 | {
41 | "output_type": "stream",
42 | "text": [
43 | "processor\t: 0\n",
44 | "vendor_id\t: GenuineIntel\n",
45 | "cpu family\t: 6\n",
46 | "model\t\t: 63\n",
47 | "model name\t: Intel(R) Xeon(R) CPU @ 2.30GHz\n",
48 | "stepping\t: 0\n",
49 | "microcode\t: 0x1\n",
50 | "cpu MHz\t\t: 2299.998\n",
51 | "cache size\t: 46080 KB\n",
52 | "physical id\t: 0\n",
53 | "siblings\t: 2\n",
54 | "core id\t\t: 0\n",
55 | "cpu cores\t: 1\n",
56 | "apicid\t\t: 0\n",
57 | "initial apicid\t: 0\n",
58 | "fpu\t\t: yes\n",
59 | "fpu_exception\t: yes\n",
60 | "cpuid level\t: 13\n",
61 | "wp\t\t: yes\n",
62 | "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities\n",
63 | "bugs\t\t: cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs\n",
64 | "bogomips\t: 4599.99\n",
65 | "clflush size\t: 64\n",
66 | "cache_alignment\t: 64\n",
67 | "address sizes\t: 46 bits physical, 48 bits virtual\n",
68 | "power management:\n",
69 | "\n",
70 | "processor\t: 1\n",
71 | "vendor_id\t: GenuineIntel\n",
72 | "cpu family\t: 6\n",
73 | "model\t\t: 63\n",
74 | "model name\t: Intel(R) Xeon(R) CPU @ 2.30GHz\n",
75 | "stepping\t: 0\n",
76 | "microcode\t: 0x1\n",
77 | "cpu MHz\t\t: 2299.998\n",
78 | "cache size\t: 46080 KB\n",
79 | "physical id\t: 0\n",
80 | "siblings\t: 2\n",
81 | "core id\t\t: 0\n",
82 | "cpu cores\t: 1\n",
83 | "apicid\t\t: 1\n",
84 | "initial apicid\t: 1\n",
85 | "fpu\t\t: yes\n",
86 | "fpu_exception\t: yes\n",
87 | "cpuid level\t: 13\n",
88 | "wp\t\t: yes\n",
89 | "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm invpcid_single ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid xsaveopt arat md_clear arch_capabilities\n",
90 | "bugs\t\t: cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs\n",
91 | "bogomips\t: 4599.99\n",
92 | "clflush size\t: 64\n",
93 | "cache_alignment\t: 64\n",
94 | "address sizes\t: 46 bits physical, 48 bits virtual\n",
95 | "power management:\n",
96 | "\n"
97 | ],
98 | "name": "stdout"
99 | }
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "metadata": {
105 | "colab": {
106 | "base_uri": "https://localhost:8080/"
107 | },
108 | "id": "0Zun3WHplom1",
109 | "outputId": "bfb6719c-9520-4db4-ae2f-59a71f0f0f8e"
110 | },
111 | "source": [
112 | "!cat /proc/meminfo"
113 | ],
114 | "execution_count": null,
115 | "outputs": [
116 | {
117 | "output_type": "stream",
118 | "text": [
119 | "MemTotal: 13305332 kB\n",
120 | "MemFree: 10739068 kB\n",
121 | "MemAvailable: 12518448 kB\n",
122 | "Buffers: 83580 kB\n",
123 | "Cached: 1841060 kB\n",
124 | "SwapCached: 0 kB\n",
125 | "Active: 984448 kB\n",
126 | "Inactive: 1350192 kB\n",
127 | "Active(anon): 382636 kB\n",
128 | "Inactive(anon): 416 kB\n",
129 | "Active(file): 601812 kB\n",
130 | "Inactive(file): 1349776 kB\n",
131 | "Unevictable: 0 kB\n",
132 | "Mlocked: 0 kB\n",
133 | "SwapTotal: 0 kB\n",
134 | "SwapFree: 0 kB\n",
135 | "Dirty: 760 kB\n",
136 | "Writeback: 0 kB\n",
137 | "AnonPages: 410028 kB\n",
138 | "Mapped: 226724 kB\n",
139 | "Shmem: 1144 kB\n",
140 | "KReclaimable: 138524 kB\n",
141 | "Slab: 182956 kB\n",
142 | "SReclaimable: 138524 kB\n",
143 | "SUnreclaim: 44432 kB\n",
144 | "KernelStack: 4320 kB\n",
145 | "PageTables: 5524 kB\n",
146 | "NFS_Unstable: 0 kB\n",
147 | "Bounce: 0 kB\n",
148 | "WritebackTmp: 0 kB\n",
149 | "CommitLimit: 6652664 kB\n",
150 | "Committed_AS: 3161348 kB\n",
151 | "VmallocTotal: 34359738367 kB\n",
152 | "VmallocUsed: 6668 kB\n",
153 | "VmallocChunk: 0 kB\n",
154 | "Percpu: 1400 kB\n",
155 | "AnonHugePages: 0 kB\n",
156 | "ShmemHugePages: 0 kB\n",
157 | "ShmemPmdMapped: 0 kB\n",
158 | "FileHugePages: 0 kB\n",
159 | "FilePmdMapped: 0 kB\n",
160 | "HugePages_Total: 0\n",
161 | "HugePages_Free: 0\n",
162 | "HugePages_Rsvd: 0\n",
163 | "HugePages_Surp: 0\n",
164 | "Hugepagesize: 2048 kB\n",
165 | "Hugetlb: 0 kB\n",
166 | "DirectMap4k: 89288 kB\n",
167 | "DirectMap2M: 5152768 kB\n",
168 | "DirectMap1G: 10485760 kB\n"
169 | ],
170 | "name": "stdout"
171 | }
172 | ]
173 | },
174 | {
175 | "cell_type": "markdown",
176 | "metadata": {
177 | "id": "dTDRst8jmOLk"
178 | },
179 | "source": [
180 | "Installing Libraries"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "metadata": {
186 | "colab": {
187 | "base_uri": "https://localhost:8080/"
188 | },
189 | "id": "X1CUZFiolw_0",
190 | "outputId": "8a60e585-2168-4a6b-a0c1-772eac5826f7"
191 | },
192 | "source": [
193 | "!pip install pandas"
194 | ],
195 | "execution_count": null,
196 | "outputs": [
197 | {
198 | "output_type": "stream",
199 | "text": [
200 | "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (1.1.5)\n",
201 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas) (2018.9)\n",
202 | "Requirement already satisfied: numpy>=1.15.4 in /usr/local/lib/python3.7/dist-packages (from pandas) (1.19.5)\n",
203 | "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas) (2.8.1)\n",
204 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n"
205 | ],
206 | "name": "stdout"
207 | }
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "metadata": {
213 | "id": "l0l4wa_amYId"
214 | },
215 | "source": [
216 | "import pandas as pd"
217 | ],
218 | "execution_count": null,
219 | "outputs": []
220 | },
221 | {
222 | "cell_type": "code",
223 | "metadata": {
224 | "id": "vxc28ALHmenl"
225 | },
226 | "source": [
227 | "df = pd.read_csv('/content/BostonHousing.csv')"
228 | ],
229 | "execution_count": null,
230 | "outputs": []
231 | },
232 | {
233 | "cell_type": "code",
234 | "metadata": {
235 | "colab": {
236 | "base_uri": "https://localhost:8080/",
237 | "height": 196
238 | },
239 | "id": "A2ohNqWimrI7",
240 | "outputId": "5df5156d-40a5-49cb-ee63-a8755d4c376b"
241 | },
242 | "source": [
243 | "df.head()"
244 | ],
245 | "execution_count": null,
246 | "outputs": [
247 | {
248 | "output_type": "execute_result",
249 | "data": {
250 | "text/html": [
251 | "
\n",
252 | "\n",
265 | "
\n",
266 | " \n",
267 | " \n",
268 | " | \n",
269 | " crim | \n",
270 | " zn | \n",
271 | " indus | \n",
272 | " chas | \n",
273 | " nox | \n",
274 | " rm | \n",
275 | " age | \n",
276 | " dis | \n",
277 | " rad | \n",
278 | " tax | \n",
279 | " ptratio | \n",
280 | " b | \n",
281 | " lstat | \n",
282 | " price | \n",
283 | "
\n",
284 | " \n",
285 | " \n",
286 | " \n",
287 | " 0 | \n",
288 | " 0.00632 | \n",
289 | " 18.0 | \n",
290 | " 2.31 | \n",
291 | " 0 | \n",
292 | " 0.538 | \n",
293 | " 6.575 | \n",
294 | " 65.2 | \n",
295 | " 4.0900 | \n",
296 | " 1 | \n",
297 | " 296 | \n",
298 | " 15.3 | \n",
299 | " 396.90 | \n",
300 | " 4.98 | \n",
301 | " 24.0 | \n",
302 | "
\n",
303 | " \n",
304 | " 1 | \n",
305 | " 0.02731 | \n",
306 | " 0.0 | \n",
307 | " 7.07 | \n",
308 | " 0 | \n",
309 | " 0.469 | \n",
310 | " 6.421 | \n",
311 | " 78.9 | \n",
312 | " 4.9671 | \n",
313 | " 2 | \n",
314 | " 242 | \n",
315 | " 17.8 | \n",
316 | " 396.90 | \n",
317 | " 9.14 | \n",
318 | " 21.6 | \n",
319 | "
\n",
320 | " \n",
321 | " 2 | \n",
322 | " 0.02729 | \n",
323 | " 0.0 | \n",
324 | " 7.07 | \n",
325 | " 0 | \n",
326 | " 0.469 | \n",
327 | " 7.185 | \n",
328 | " 61.1 | \n",
329 | " 4.9671 | \n",
330 | " 2 | \n",
331 | " 242 | \n",
332 | " 17.8 | \n",
333 | " 392.83 | \n",
334 | " 4.03 | \n",
335 | " 34.7 | \n",
336 | "
\n",
337 | " \n",
338 | " 3 | \n",
339 | " 0.03237 | \n",
340 | " 0.0 | \n",
341 | " 2.18 | \n",
342 | " 0 | \n",
343 | " 0.458 | \n",
344 | " 6.998 | \n",
345 | " 45.8 | \n",
346 | " 6.0622 | \n",
347 | " 3 | \n",
348 | " 222 | \n",
349 | " 18.7 | \n",
350 | " 394.63 | \n",
351 | " 2.94 | \n",
352 | " 33.4 | \n",
353 | "
\n",
354 | " \n",
355 | " 4 | \n",
356 | " 0.06905 | \n",
357 | " 0.0 | \n",
358 | " 2.18 | \n",
359 | " 0 | \n",
360 | " 0.458 | \n",
361 | " 7.147 | \n",
362 | " 54.2 | \n",
363 | " 6.0622 | \n",
364 | " 3 | \n",
365 | " 222 | \n",
366 | " 18.7 | \n",
367 | " 396.90 | \n",
368 | " 5.33 | \n",
369 | " 36.2 | \n",
370 | "
\n",
371 | " \n",
372 | "
\n",
373 | "
"
374 | ],
375 | "text/plain": [
376 | " crim zn indus chas nox ... tax ptratio b lstat price\n",
377 | "0 0.00632 18.0 2.31 0 0.538 ... 296 15.3 396.90 4.98 24.0\n",
378 | "1 0.02731 0.0 7.07 0 0.469 ... 242 17.8 396.90 9.14 21.6\n",
379 | "2 0.02729 0.0 7.07 0 0.469 ... 242 17.8 392.83 4.03 34.7\n",
380 | "3 0.03237 0.0 2.18 0 0.458 ... 222 18.7 394.63 2.94 33.4\n",
381 | "4 0.06905 0.0 2.18 0 0.458 ... 222 18.7 396.90 5.33 36.2\n",
382 | "\n",
383 | "[5 rows x 14 columns]"
384 | ]
385 | },
386 | "metadata": {
387 | "tags": []
388 | },
389 | "execution_count": 6
390 | }
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "metadata": {
396 | "colab": {
397 | "base_uri": "https://localhost:8080/"
398 | },
399 | "id": "wN1PzbbKmu4i",
400 | "outputId": "963c27ce-c37c-4315-f122-3af59a3a414b"
401 | },
402 | "source": [
403 | "print('Machine Learning')"
404 | ],
405 | "execution_count": null,
406 | "outputs": [
407 | {
408 | "output_type": "stream",
409 | "text": [
410 | "Machine Learning\n"
411 | ],
412 | "name": "stdout"
413 | }
414 | ]
415 | },
416 | {
417 | "cell_type": "code",
418 | "metadata": {
419 | "colab": {
420 | "base_uri": "https://localhost:8080/"
421 | },
422 | "id": "j6NenuZfm2if",
423 | "outputId": "585f53e5-faa6-405b-b75b-c22bbebb13e2"
424 | },
425 | "source": [
426 | "!ls"
427 | ],
428 | "execution_count": null,
429 | "outputs": [
430 | {
431 | "output_type": "stream",
432 | "text": [
433 | "BostonHousing.csv sample_data\n"
434 | ],
435 | "name": "stdout"
436 | }
437 | ]
438 | }
439 | ]
440 | }
--------------------------------------------------------------------------------
/2.2. Python_Basics.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": [
8 | "-WvGy_yEMZqy",
9 | "VoF-mwOrNd47",
10 | "TF5JGYWwOCZ9",
11 | "0Z9JChvaPpsm"
12 | ]
13 | },
14 | "kernelspec": {
15 | "name": "python3",
16 | "display_name": "Python 3"
17 | }
18 | },
19 | "cells": [
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {
23 | "id": "Osmh3yTEEv9q"
24 | },
25 | "source": [
26 | "Programming Languages used for Machine Learning:\n",
27 | "\n",
28 | "\n",
29 | "1. Python\n",
30 | "2. R\n",
31 | "\n"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {
37 | "id": "-WvGy_yEMZqy"
38 | },
39 | "source": [
40 | "#print function"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "metadata": {
46 | "id": "GtMDf70aAf6y",
47 | "colab": {
48 | "base_uri": "https://localhost:8080/"
49 | },
50 | "outputId": "2490e348-2413-4793-94fc-6b59a3d16b6d"
51 | },
52 | "source": [
53 | "print(\"Machine Learning\")"
54 | ],
55 | "execution_count": null,
56 | "outputs": [
57 | {
58 | "output_type": "stream",
59 | "text": [
60 | "Machine Learning\n"
61 | ],
62 | "name": "stdout"
63 | }
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "metadata": {
69 | "colab": {
70 | "base_uri": "https://localhost:8080/"
71 | },
72 | "id": "jUmCKDnfMwQI",
73 | "outputId": "a4fdd49f-7662-4fed-87f6-9a447485b8c3"
74 | },
75 | "source": [
76 | "print(\"Machine Learning\" + \" Projects\")"
77 | ],
78 | "execution_count": null,
79 | "outputs": [
80 | {
81 | "output_type": "stream",
82 | "text": [
83 | "Machine Learning Projects\n"
84 | ],
85 | "name": "stdout"
86 | }
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "metadata": {
92 | "colab": {
93 | "base_uri": "https://localhost:8080/"
94 | },
95 | "id": "cALbaqqGNBHZ",
96 | "outputId": "dd9ff49b-cc05-419d-8b5b-4fe0c2389de3"
97 | },
98 | "source": [
99 | "print(8)"
100 | ],
101 | "execution_count": null,
102 | "outputs": [
103 | {
104 | "output_type": "stream",
105 | "text": [
106 | "8\n"
107 | ],
108 | "name": "stdout"
109 | }
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "metadata": {
115 | "colab": {
116 | "base_uri": "https://localhost:8080/"
117 | },
118 | "id": "VIIKdqyhNJct",
119 | "outputId": "363e9cc6-6a7a-4c14-ffde-3550abd9cb64"
120 | },
121 | "source": [
122 | "print(8+3)"
123 | ],
124 | "execution_count": null,
125 | "outputs": [
126 | {
127 | "output_type": "stream",
128 | "text": [
129 | "11\n"
130 | ],
131 | "name": "stdout"
132 | }
133 | ]
134 | },
135 | {
136 | "cell_type": "markdown",
137 | "metadata": {
138 | "id": "VoF-mwOrNd47"
139 | },
140 | "source": [
141 | "#Basic Data types:\n",
142 | "\n",
143 | "\n",
144 | "1. int\n",
145 | "2. float\n",
146 | "3. str\n",
147 | "\n",
148 | "\n",
149 | "\n"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "metadata": {
155 | "colab": {
156 | "base_uri": "https://localhost:8080/"
157 | },
158 | "id": "c2C2CWT2NpAD",
159 | "outputId": "693ca04f-08c3-485f-dfe4-b81611cc7d12"
160 | },
161 | "source": [
162 | "type(8)"
163 | ],
164 | "execution_count": null,
165 | "outputs": [
166 | {
167 | "output_type": "execute_result",
168 | "data": {
169 | "text/plain": [
170 | "int"
171 | ]
172 | },
173 | "metadata": {
174 | "tags": []
175 | },
176 | "execution_count": 10
177 | }
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "metadata": {
183 | "colab": {
184 | "base_uri": "https://localhost:8080/"
185 | },
186 | "id": "ruZzAZODN171",
187 | "outputId": "94a876c3-3bcd-4bc1-85a2-5b10be2d9646"
188 | },
189 | "source": [
190 | "type(5.3)"
191 | ],
192 | "execution_count": null,
193 | "outputs": [
194 | {
195 | "output_type": "execute_result",
196 | "data": {
197 | "text/plain": [
198 | "float"
199 | ]
200 | },
201 | "metadata": {
202 | "tags": []
203 | },
204 | "execution_count": 11
205 | }
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "metadata": {
211 | "colab": {
212 | "base_uri": "https://localhost:8080/"
213 | },
214 | "id": "UM8_vIwfN4dn",
215 | "outputId": "a58c3967-088c-4b38-c280-dce8831cc6aa"
216 | },
217 | "source": [
218 | "type(\"english\")"
219 | ],
220 | "execution_count": null,
221 | "outputs": [
222 | {
223 | "output_type": "execute_result",
224 | "data": {
225 | "text/plain": [
226 | "str"
227 | ]
228 | },
229 | "metadata": {
230 | "tags": []
231 | },
232 | "execution_count": 12
233 | }
234 | ]
235 | },
236 | {
237 | "cell_type": "markdown",
238 | "metadata": {
239 | "id": "TF5JGYWwOCZ9"
240 | },
241 | "source": [
242 | "#Constants & Variables"
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "metadata": {
248 | "colab": {
249 | "base_uri": "https://localhost:8080/"
250 | },
251 | "id": "ln8ikHIZN8mZ",
252 | "outputId": "79f115f1-fe00-4124-c487-7f24919d75e7"
253 | },
254 | "source": [
255 | "marvel_super_hero = \"Iron Man\"\n",
256 | "print(marvel_super_hero)"
257 | ],
258 | "execution_count": null,
259 | "outputs": [
260 | {
261 | "output_type": "stream",
262 | "text": [
263 | "Iron Man\n"
264 | ],
265 | "name": "stdout"
266 | }
267 | ]
268 | },
269 | {
270 | "cell_type": "code",
271 | "metadata": {
272 | "colab": {
273 | "base_uri": "https://localhost:8080/"
274 | },
275 | "id": "2MT4Km5gOecW",
276 | "outputId": "586a1876-c3a0-4fbb-c05b-2f7ac065fc9e"
277 | },
278 | "source": [
279 | "marvel_super_hero = \"Captain America\"\n",
280 | "print(marvel_super_hero)"
281 | ],
282 | "execution_count": null,
283 | "outputs": [
284 | {
285 | "output_type": "stream",
286 | "text": [
287 | "Captain America\n"
288 | ],
289 | "name": "stdout"
290 | }
291 | ]
292 | },
293 | {
294 | "cell_type": "code",
295 | "metadata": {
296 | "colab": {
297 | "base_uri": "https://localhost:8080/"
298 | },
299 | "id": "26ZG7J36O1Gm",
300 | "outputId": "4a1473ac-f3ec-4b72-fce0-584e3bd2c5d6"
301 | },
302 | "source": [
303 | "hero1 , hero2 , hero3 = \"Iron Man\" , \"Captain America\" , \"Bat Man\"\n",
304 | "print(hero1)\n",
305 | "print(hero2)\n",
306 | "print(hero3)"
307 | ],
308 | "execution_count": null,
309 | "outputs": [
310 | {
311 | "output_type": "stream",
312 | "text": [
313 | "Iron Man\n",
314 | "Captain America\n",
315 | "Bat Man\n"
316 | ],
317 | "name": "stdout"
318 | }
319 | ]
320 | },
321 | {
322 | "cell_type": "code",
323 | "metadata": {
324 | "colab": {
325 | "base_uri": "https://localhost:8080/"
326 | },
327 | "id": "Y9nqEMNPPM26",
328 | "outputId": "283f2b35-9761-4baa-fd9b-75842416d2a3"
329 | },
330 | "source": [
331 | "x = y = z = 23\n",
332 | "print(x)\n",
333 | "print(y)\n",
334 | "print(z)"
335 | ],
336 | "execution_count": null,
337 | "outputs": [
338 | {
339 | "output_type": "stream",
340 | "text": [
341 | "23\n",
342 | "23\n",
343 | "23\n"
344 | ],
345 | "name": "stdout"
346 | }
347 | ]
348 | },
349 | {
350 | "cell_type": "markdown",
351 | "metadata": {
352 | "id": "0Z9JChvaPpsm"
353 | },
354 | "source": [
355 | "# input Function"
356 | ]
357 | },
358 | {
359 | "cell_type": "code",
360 | "metadata": {
361 | "colab": {
362 | "base_uri": "https://localhost:8080/"
363 | },
364 | "id": "roZJU-IXPrvk",
365 | "outputId": "44179667-ddfc-4d42-bf5b-606397aea2ee"
366 | },
367 | "source": [
368 | "number_1 = int(input(\"Enter the first number : \"))\n",
369 | "number_2 = int(input(\"Enter the second number : \"))\n",
370 | "\n",
371 | "sum = number_1 + number_2\n",
372 | "print(sum)"
373 | ],
374 | "execution_count": null,
375 | "outputs": [
376 | {
377 | "output_type": "stream",
378 | "text": [
379 | "Enter the first number : 23\n",
380 | "Enter the second number : 23\n",
381 | "46\n"
382 | ],
383 | "name": "stdout"
384 | }
385 | ]
386 | },
387 | {
388 | "cell_type": "code",
389 | "metadata": {
390 | "colab": {
391 | "base_uri": "https://localhost:8080/"
392 | },
393 | "id": "HKcz9StaP-PF",
394 | "outputId": "001eeff8-e36b-4286-a12a-e26e54ab7ed9"
395 | },
396 | "source": [
397 | "# changing the data type in python:\n",
398 | "num = 5\n",
399 | "print(float(num))\n"
400 | ],
401 | "execution_count": null,
402 | "outputs": [
403 | {
404 | "output_type": "stream",
405 | "text": [
406 | "5.0\n"
407 | ],
408 | "name": "stdout"
409 | }
410 | ]
411 | }
412 | ]
413 | }
--------------------------------------------------------------------------------
/2.3. Basic_Data_Types_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | },
12 | "language_info": {
13 | "name": "python"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "id": "apWclQaRyr9i"
21 | },
22 | "source": [
23 | "Basic Data Types in Python:\n",
24 | "1. Integer\n",
25 | "2. Floating Point\n",
26 | "3. Complex\n",
27 | "4. Boolean\n",
28 | "5. String"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "metadata": {
34 | "colab": {
35 | "base_uri": "https://localhost:8080/"
36 | },
37 | "id": "uQkDMpadyPlK",
38 | "outputId": "c17f5897-256e-46cc-c418-6b92928ce0b1"
39 | },
40 | "source": [
41 | "# integers\n",
42 | "a = 8\n",
43 | "print(a)"
44 | ],
45 | "execution_count": null,
46 | "outputs": [
47 | {
48 | "output_type": "stream",
49 | "text": [
50 | "8\n"
51 | ],
52 | "name": "stdout"
53 | }
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "metadata": {
59 | "colab": {
60 | "base_uri": "https://localhost:8080/"
61 | },
62 | "id": "ckHsQXyk0eel",
63 | "outputId": "f7aa3afe-16f9-4e72-a669-8d0fb0524b62"
64 | },
65 | "source": [
66 | "type(a)"
67 | ],
68 | "execution_count": null,
69 | "outputs": [
70 | {
71 | "output_type": "execute_result",
72 | "data": {
73 | "text/plain": [
74 | "int"
75 | ]
76 | },
77 | "metadata": {
78 | "tags": []
79 | },
80 | "execution_count": 2
81 | }
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "metadata": {
87 | "colab": {
88 | "base_uri": "https://localhost:8080/"
89 | },
90 | "id": "xVOI4yFx0mhC",
91 | "outputId": "db02e23c-dd5f-4e71-80ba-3a8148a6cd74"
92 | },
93 | "source": [
94 | "# floating point\n",
95 | "b = 2.3\n",
96 | "print(b)\n",
97 | "type(b)"
98 | ],
99 | "execution_count": null,
100 | "outputs": [
101 | {
102 | "output_type": "stream",
103 | "text": [
104 | "2.3\n"
105 | ],
106 | "name": "stdout"
107 | },
108 | {
109 | "output_type": "execute_result",
110 | "data": {
111 | "text/plain": [
112 | "float"
113 | ]
114 | },
115 | "metadata": {
116 | "tags": []
117 | },
118 | "execution_count": 3
119 | }
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "metadata": {
125 | "colab": {
126 | "base_uri": "https://localhost:8080/"
127 | },
128 | "id": "WzWhjFIE0vkS",
129 | "outputId": "73623a3d-f2b4-4003-a7c9-4bdaa7d9a1ac"
130 | },
131 | "source": [
132 | "# complex numbers\n",
133 | "c = 1 + 3j\n",
134 | "print(c)\n",
135 | "type(c)"
136 | ],
137 | "execution_count": null,
138 | "outputs": [
139 | {
140 | "output_type": "stream",
141 | "text": [
142 | "(1+3j)\n"
143 | ],
144 | "name": "stdout"
145 | },
146 | {
147 | "output_type": "execute_result",
148 | "data": {
149 | "text/plain": [
150 | "complex"
151 | ]
152 | },
153 | "metadata": {
154 | "tags": []
155 | },
156 | "execution_count": 4
157 | }
158 | ]
159 | },
160 | {
161 | "cell_type": "markdown",
162 | "metadata": {
163 | "id": "71h0zZTz1Cl7"
164 | },
165 | "source": [
166 | "Conversion of one data type to another"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "metadata": {
172 | "colab": {
173 | "base_uri": "https://localhost:8080/"
174 | },
175 | "id": "gx311AYb09AG",
176 | "outputId": "247fa75c-199c-4f28-cd39-280d974a8fef"
177 | },
178 | "source": [
179 | "# int to float\n",
180 | "x = 10\n",
181 | "print(x)\n",
182 | "type(x)"
183 | ],
184 | "execution_count": null,
185 | "outputs": [
186 | {
187 | "output_type": "stream",
188 | "text": [
189 | "10\n"
190 | ],
191 | "name": "stdout"
192 | },
193 | {
194 | "output_type": "execute_result",
195 | "data": {
196 | "text/plain": [
197 | "int"
198 | ]
199 | },
200 | "metadata": {
201 | "tags": []
202 | },
203 | "execution_count": 5
204 | }
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "metadata": {
210 | "colab": {
211 | "base_uri": "https://localhost:8080/"
212 | },
213 | "id": "w_XTYMqg1MWk",
214 | "outputId": "016aba8f-9d9e-43d9-8a5e-f9f0c07d144c"
215 | },
216 | "source": [
217 | "y = float(x)\n",
218 | "print(y)\n",
219 | "type(y)"
220 | ],
221 | "execution_count": null,
222 | "outputs": [
223 | {
224 | "output_type": "stream",
225 | "text": [
226 | "10.0\n"
227 | ],
228 | "name": "stdout"
229 | },
230 | {
231 | "output_type": "execute_result",
232 | "data": {
233 | "text/plain": [
234 | "float"
235 | ]
236 | },
237 | "metadata": {
238 | "tags": []
239 | },
240 | "execution_count": 6
241 | }
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "metadata": {
247 | "colab": {
248 | "base_uri": "https://localhost:8080/"
249 | },
250 | "id": "hE3F0HUx1XnD",
251 | "outputId": "3483c6b6-c42c-47a8-ad5f-b0b2daf04d15"
252 | },
253 | "source": [
254 | "# float to int\n",
255 | "x = 5.88\n",
256 | "print(x)\n",
257 | "type(x)"
258 | ],
259 | "execution_count": null,
260 | "outputs": [
261 | {
262 | "output_type": "stream",
263 | "text": [
264 | "5.88\n"
265 | ],
266 | "name": "stdout"
267 | },
268 | {
269 | "output_type": "execute_result",
270 | "data": {
271 | "text/plain": [
272 | "float"
273 | ]
274 | },
275 | "metadata": {
276 | "tags": []
277 | },
278 | "execution_count": 7
279 | }
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "metadata": {
285 | "colab": {
286 | "base_uri": "https://localhost:8080/"
287 | },
288 | "id": "7DQnV6kO1jG1",
289 | "outputId": "88458029-b5de-4291-d443-20f5cce74ff9"
290 | },
291 | "source": [
292 | "y = int(x)\n",
293 | "print(y)\n",
294 | "type(y)"
295 | ],
296 | "execution_count": null,
297 | "outputs": [
298 | {
299 | "output_type": "stream",
300 | "text": [
301 | "5\n"
302 | ],
303 | "name": "stdout"
304 | },
305 | {
306 | "output_type": "execute_result",
307 | "data": {
308 | "text/plain": [
309 | "int"
310 | ]
311 | },
312 | "metadata": {
313 | "tags": []
314 | },
315 | "execution_count": 8
316 | }
317 | ]
318 | },
319 | {
320 | "cell_type": "markdown",
321 | "metadata": {
322 | "id": "XkbofyMg103l"
323 | },
324 | "source": [
325 | "Boolean"
326 | ]
327 | },
328 | {
329 | "cell_type": "markdown",
330 | "metadata": {
331 | "id": "-ZIfAmQ9121L"
332 | },
333 | "source": [
334 | "1. True\n",
335 | "2. False"
336 | ]
337 | },
338 | {
339 | "cell_type": "code",
340 | "metadata": {
341 | "colab": {
342 | "base_uri": "https://localhost:8080/"
343 | },
344 | "id": "t_qqKUR91qEX",
345 | "outputId": "da4017f7-65b3-4832-f182-78032f939863"
346 | },
347 | "source": [
348 | "a = True\n",
349 | "print(a)\n",
350 | "type(a)"
351 | ],
352 | "execution_count": null,
353 | "outputs": [
354 | {
355 | "output_type": "stream",
356 | "text": [
357 | "True\n"
358 | ],
359 | "name": "stdout"
360 | },
361 | {
362 | "output_type": "execute_result",
363 | "data": {
364 | "text/plain": [
365 | "bool"
366 | ]
367 | },
368 | "metadata": {
369 | "tags": []
370 | },
371 | "execution_count": 9
372 | }
373 | ]
374 | },
375 | {
376 | "cell_type": "code",
377 | "metadata": {
378 | "colab": {
379 | "base_uri": "https://localhost:8080/"
380 | },
381 | "id": "43oxnDeO2BLR",
382 | "outputId": "0914b00f-efcf-4ab0-e357-420e5a9c91f0"
383 | },
384 | "source": [
385 | "b = False\n",
386 | "print(b)\n",
387 | "type(b)"
388 | ],
389 | "execution_count": null,
390 | "outputs": [
391 | {
392 | "output_type": "stream",
393 | "text": [
394 | "False\n"
395 | ],
396 | "name": "stdout"
397 | },
398 | {
399 | "output_type": "execute_result",
400 | "data": {
401 | "text/plain": [
402 | "bool"
403 | ]
404 | },
405 | "metadata": {
406 | "tags": []
407 | },
408 | "execution_count": 10
409 | }
410 | ]
411 | },
412 | {
413 | "cell_type": "code",
414 | "metadata": {
415 | "colab": {
416 | "base_uri": "https://localhost:8080/"
417 | },
418 | "id": "Sgjm0EoQ2GzH",
419 | "outputId": "cc029bff-4376-4ac6-d161-76a82352ea71"
420 | },
421 | "source": [
422 | "a = 7 < 3\n",
423 | "print(a)\n",
424 | "type(a)"
425 | ],
426 | "execution_count": null,
427 | "outputs": [
428 | {
429 | "output_type": "stream",
430 | "text": [
431 | "False\n"
432 | ],
433 | "name": "stdout"
434 | },
435 | {
436 | "output_type": "execute_result",
437 | "data": {
438 | "text/plain": [
439 | "bool"
440 | ]
441 | },
442 | "metadata": {
443 | "tags": []
444 | },
445 | "execution_count": 12
446 | }
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "metadata": {
452 | "colab": {
453 | "base_uri": "https://localhost:8080/"
454 | },
455 | "id": "dUfCMr5h2Vly",
456 | "outputId": "676f19ce-6b97-4ab5-d872-1c868a15d668"
457 | },
458 | "source": [
459 | "a = 7 > 3\n",
460 | "print(a)\n",
461 | "type(a)"
462 | ],
463 | "execution_count": null,
464 | "outputs": [
465 | {
466 | "output_type": "stream",
467 | "text": [
468 | "True\n"
469 | ],
470 | "name": "stdout"
471 | },
472 | {
473 | "output_type": "execute_result",
474 | "data": {
475 | "text/plain": [
476 | "bool"
477 | ]
478 | },
479 | "metadata": {
480 | "tags": []
481 | },
482 | "execution_count": 13
483 | }
484 | ]
485 | },
486 | {
487 | "cell_type": "markdown",
488 | "metadata": {
489 | "id": "duVUENIZ2jwq"
490 | },
491 | "source": [
492 | "String"
493 | ]
494 | },
495 | {
496 | "cell_type": "code",
497 | "metadata": {
498 | "colab": {
499 | "base_uri": "https://localhost:8080/"
500 | },
501 | "id": "e_LEA7R52fUz",
502 | "outputId": "ccb4be4a-8b78-4395-9bd4-9e16322e5c5e"
503 | },
504 | "source": [
505 | "print(\"Machine Learning\")"
506 | ],
507 | "execution_count": null,
508 | "outputs": [
509 | {
510 | "output_type": "stream",
511 | "text": [
512 | "Machine Learning\n"
513 | ],
514 | "name": "stdout"
515 | }
516 | ]
517 | },
518 | {
519 | "cell_type": "code",
520 | "metadata": {
521 | "colab": {
522 | "base_uri": "https://localhost:8080/"
523 | },
524 | "id": "CSs8QHe32oP1",
525 | "outputId": "7ec108ea-42cd-49dd-d1bf-b17038632c60"
526 | },
527 | "source": [
528 | "print('Machine Learning')"
529 | ],
530 | "execution_count": null,
531 | "outputs": [
532 | {
533 | "output_type": "stream",
534 | "text": [
535 | "Machine Learning\n"
536 | ],
537 | "name": "stdout"
538 | }
539 | ]
540 | },
541 | {
542 | "cell_type": "code",
543 | "metadata": {
544 | "colab": {
545 | "base_uri": "https://localhost:8080/"
546 | },
547 | "id": "H3ye_W-52tJL",
548 | "outputId": "957d63a3-289d-4c9c-92a4-8c6d309fb868"
549 | },
550 | "source": [
551 | "my_string = \"Machine Learning\"\n",
552 | "print(my_string)\n",
553 | "type(my_string)"
554 | ],
555 | "execution_count": null,
556 | "outputs": [
557 | {
558 | "output_type": "stream",
559 | "text": [
560 | "Machine Learning\n"
561 | ],
562 | "name": "stdout"
563 | },
564 | {
565 | "output_type": "execute_result",
566 | "data": {
567 | "text/plain": [
568 | "str"
569 | ]
570 | },
571 | "metadata": {
572 | "tags": []
573 | },
574 | "execution_count": 16
575 | }
576 | ]
577 | },
578 | {
579 | "cell_type": "code",
580 | "metadata": {
581 | "colab": {
582 | "base_uri": "https://localhost:8080/"
583 | },
584 | "id": "mZ9rRFBv29c6",
585 | "outputId": "53d15686-897e-4f86-94ac-971585df85b2"
586 | },
587 | "source": [
588 | "print(\"Hello\"*5)"
589 | ],
590 | "execution_count": null,
591 | "outputs": [
592 | {
593 | "output_type": "stream",
594 | "text": [
595 | "HelloHelloHelloHelloHello\n"
596 | ],
597 | "name": "stdout"
598 | }
599 | ]
600 | },
601 | {
602 | "cell_type": "markdown",
603 | "metadata": {
604 | "id": "8uj-eOPM3LEX"
605 | },
606 | "source": [
607 | "Slicing"
608 | ]
609 | },
610 | {
611 | "cell_type": "code",
612 | "metadata": {
613 | "id": "mKF6ngiV3FiZ"
614 | },
615 | "source": [
616 | "my_string = \"Programming\""
617 | ],
618 | "execution_count": null,
619 | "outputs": []
620 | },
621 | {
622 | "cell_type": "code",
623 | "metadata": {
624 | "colab": {
625 | "base_uri": "https://localhost:8080/"
626 | },
627 | "id": "Rj636qLe3QUi",
628 | "outputId": "dd0f7061-7411-42ff-b879-7185e1502775"
629 | },
630 | "source": [
631 | "print(my_string[1:5]) # values from index 1 to 5-1 will be sliced"
632 | ],
633 | "execution_count": null,
634 | "outputs": [
635 | {
636 | "output_type": "stream",
637 | "text": [
638 | "rogr\n"
639 | ],
640 | "name": "stdout"
641 | }
642 | ]
643 | },
644 | {
645 | "cell_type": "code",
646 | "metadata": {
647 | "colab": {
648 | "base_uri": "https://localhost:8080/"
649 | },
650 | "id": "SeLPwiOx3kri",
651 | "outputId": "7e6e534b-98aa-49d8-d7b5-bca91f4cf2cb"
652 | },
653 | "source": [
654 | "# step\n",
655 | "print(my_string[0:10:2])"
656 | ],
657 | "execution_count": null,
658 | "outputs": [
659 | {
660 | "output_type": "stream",
661 | "text": [
662 | "Pormi\n"
663 | ],
664 | "name": "stdout"
665 | }
666 | ]
667 | },
668 | {
669 | "cell_type": "markdown",
670 | "metadata": {
671 | "id": "NxwxzlVV4HgP"
672 | },
673 | "source": [
674 | "String Concatenation"
675 | ]
676 | },
677 | {
678 | "cell_type": "code",
679 | "metadata": {
680 | "colab": {
681 | "base_uri": "https://localhost:8080/"
682 | },
683 | "id": "TYqgwM5U390H",
684 | "outputId": "f1b883f4-3f34-4d8e-8b15-e44ffcfbd645"
685 | },
686 | "source": [
687 | "word_1 = 'Machine '\n",
688 | "word_2 = 'Learning'\n",
689 | "\n",
690 | "print(word_1+word_2)"
691 | ],
692 | "execution_count": null,
693 | "outputs": [
694 | {
695 | "output_type": "stream",
696 | "text": [
697 | "Machine Learning\n"
698 | ],
699 | "name": "stdout"
700 | }
701 | ]
702 | }
703 | ]
704 | }
--------------------------------------------------------------------------------
/2.4. List_Tuple_Set_Dictionary.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | }
13 | },
14 | "cells": [
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {
18 | "id": "M6FYZhxWa-2t"
19 | },
20 | "source": [
21 | "Types of Objects in Python:\n",
22 | "1. Immutable Objects\n",
23 | "2. Mutable Objects"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {
29 | "id": "jtGu0nbsbDOW"
30 | },
31 | "source": [
32 | "Immutable Objects:\n",
33 | "1. int\n",
34 | "2. float\n",
35 | "3. string\n",
36 | "4. bool\n",
37 | "5. tuple"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {
43 | "id": "ekg3pl-kbFQR"
44 | },
45 | "source": [
46 | "Mutable Objects:\n",
47 | "1. List\n",
48 | "2. Set \n",
49 | "3. Dictionary"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {
55 | "id": "yLbT5WmhfD1C"
56 | },
57 | "source": [
58 | "List"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "metadata": {
64 | "colab": {
65 | "base_uri": "https://localhost:8080/"
66 | },
67 | "id": "UMGYv02qa8yY",
68 | "outputId": "ab9a8c00-da19-43c0-9f40-8909b5f36116"
69 | },
70 | "source": [
71 | "# list should be included in the square brackets\n",
72 | "my_list = [1,2,3,4,5]\n",
73 | "print(my_list)\n",
74 | "type(my_list)"
75 | ],
76 | "execution_count": null,
77 | "outputs": [
78 | {
79 | "output_type": "stream",
80 | "text": [
81 | "[1, 2, 3, 4, 5]\n"
82 | ],
83 | "name": "stdout"
84 | },
85 | {
86 | "output_type": "execute_result",
87 | "data": {
88 | "text/plain": [
89 | "list"
90 | ]
91 | },
92 | "metadata": {
93 | "tags": []
94 | },
95 | "execution_count": 1
96 | }
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "metadata": {
102 | "colab": {
103 | "base_uri": "https://localhost:8080/"
104 | },
105 | "id": "vNOsdYcOfZrM",
106 | "outputId": "57f8576e-16c0-4d2b-aea3-dfb1aa90c459"
107 | },
108 | "source": [
109 | "# lists can have multiple data types\n",
110 | "my_list = [2, 3, 1.8, 'English', True]\n",
111 | "print(my_list)"
112 | ],
113 | "execution_count": null,
114 | "outputs": [
115 | {
116 | "output_type": "stream",
117 | "text": [
118 | "[2, 3, 1.8, 'English', True]\n"
119 | ],
120 | "name": "stdout"
121 | }
122 | ]
123 | },
124 | {
125 | "cell_type": "markdown",
126 | "metadata": {
127 | "id": "kR0CJMjefx3n"
128 | },
129 | "source": [
130 | "Lists are Mutable --> Changeable"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "metadata": {
136 | "colab": {
137 | "base_uri": "https://localhost:8080/"
138 | },
139 | "id": "cDqAI-_tfvrD",
140 | "outputId": "2fb2c7bf-ee32-4324-da7e-2160337af74c"
141 | },
142 | "source": [
143 | "# add elements to a list\n",
144 | "my_list = [2, 3, 1.8, 'English', True]\n",
145 | "my_list.append(6)\n",
146 | "print(my_list)"
147 | ],
148 | "execution_count": null,
149 | "outputs": [
150 | {
151 | "output_type": "stream",
152 | "text": [
153 | "[2, 3, 1.8, 'English', True, 6]\n"
154 | ],
155 | "name": "stdout"
156 | }
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "metadata": {
162 | "colab": {
163 | "base_uri": "https://localhost:8080/"
164 | },
165 | "id": "HGY-f8TxgEuB",
166 | "outputId": "9b8e0118-f59e-45d4-b5f8-e2640ccdeb31"
167 | },
168 | "source": [
169 | "# print elements of a list using their index\n",
170 | "print(my_list[0])\n",
171 | "print(my_list[2])"
172 | ],
173 | "execution_count": null,
174 | "outputs": [
175 | {
176 | "output_type": "stream",
177 | "text": [
178 | "2\n",
179 | "1.8\n"
180 | ],
181 | "name": "stdout"
182 | }
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "metadata": {
188 | "colab": {
189 | "base_uri": "https://localhost:8080/"
190 | },
191 | "id": "SRJrejNegWyM",
192 | "outputId": "27e590f9-2120-4889-8847-3d16bc65b532"
193 | },
194 | "source": [
195 | "# lists allow duplicate values\n",
196 | "list_1 = [1,2,3,4,5,12,2,3]\n",
197 | "print(list_1)"
198 | ],
199 | "execution_count": null,
200 | "outputs": [
201 | {
202 | "output_type": "stream",
203 | "text": [
204 | "[1, 2, 3, 4, 5, 12, 2, 3]\n"
205 | ],
206 | "name": "stdout"
207 | }
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "metadata": {
213 | "colab": {
214 | "base_uri": "https://localhost:8080/"
215 | },
216 | "id": "OhSx_qZkgk3R",
217 | "outputId": "361672c3-066f-403e-e1b3-e7fc9705f50a"
218 | },
219 | "source": [
220 | "print(len(list_1))"
221 | ],
222 | "execution_count": null,
223 | "outputs": [
224 | {
225 | "output_type": "stream",
226 | "text": [
227 | "8\n"
228 | ],
229 | "name": "stdout"
230 | }
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "metadata": {
236 | "colab": {
237 | "base_uri": "https://localhost:8080/"
238 | },
239 | "id": "KoBSERrlgvn4",
240 | "outputId": "3ee49f45-da7b-4725-f266-e08db56c8f0f"
241 | },
242 | "source": [
243 | "# initiating an empty list\n",
244 | "list_2 = []\n",
245 | "print(list_2)"
246 | ],
247 | "execution_count": null,
248 | "outputs": [
249 | {
250 | "output_type": "stream",
251 | "text": [
252 | "[]\n"
253 | ],
254 | "name": "stdout"
255 | }
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "metadata": {
261 | "colab": {
262 | "base_uri": "https://localhost:8080/"
263 | },
264 | "id": "-PJaFQltg4YM",
265 | "outputId": "5e84ec9a-b23f-4423-884c-6dfc5f6ba47a"
266 | },
267 | "source": [
268 | "list_2.append(5)\n",
269 | "print(list_2)"
270 | ],
271 | "execution_count": null,
272 | "outputs": [
273 | {
274 | "output_type": "stream",
275 | "text": [
276 | "[5]\n"
277 | ],
278 | "name": "stdout"
279 | }
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "metadata": {
285 | "colab": {
286 | "base_uri": "https://localhost:8080/"
287 | },
288 | "id": "nU3FsBd6g-B2",
289 | "outputId": "5696a372-ca3b-4ee3-8b3c-6b775037588f"
290 | },
291 | "source": [
292 | "# delete an item in a list\n",
293 | "list_2 = [2, 3, 1.8, 'English', True, 6]\n",
294 | "print(list_2)\n",
295 | "\n",
296 | "del list_2[2]\n",
297 | "print(list_2)"
298 | ],
299 | "execution_count": null,
300 | "outputs": [
301 | {
302 | "output_type": "stream",
303 | "text": [
304 | "[2, 3, 1.8, 'English', True, 6]\n",
305 | "[2, 3, 'English', True, 6]\n"
306 | ],
307 | "name": "stdout"
308 | }
309 | ]
310 | },
311 | {
312 | "cell_type": "code",
313 | "metadata": {
314 | "colab": {
315 | "base_uri": "https://localhost:8080/"
316 | },
317 | "id": "XbQAGCA0hX3Q",
318 | "outputId": "68e91d96-56ad-4c7e-b7e2-653c247eceff"
319 | },
320 | "source": [
321 | "# join two lists\n",
322 | "list_3 = [1,2,3,4,5]\n",
323 | "list_4 = [6,7,8,9,10]\n",
324 | "\n",
325 | "list_5 = list_3 + list_4\n",
326 | "print(list_5)"
327 | ],
328 | "execution_count": null,
329 | "outputs": [
330 | {
331 | "output_type": "stream",
332 | "text": [
333 | "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n"
334 | ],
335 | "name": "stdout"
336 | }
337 | ]
338 | },
339 | {
340 | "cell_type": "markdown",
341 | "metadata": {
342 | "id": "XoQULhPEhsSk"
343 | },
344 | "source": [
345 | "Tuple"
346 | ]
347 | },
348 | {
349 | "cell_type": "code",
350 | "metadata": {
351 | "colab": {
352 | "base_uri": "https://localhost:8080/"
353 | },
354 | "id": "VSTos6r1hoAo",
355 | "outputId": "b5c78c19-51bb-4732-cdef-7aeb0ff0683b"
356 | },
357 | "source": [
358 | "tuple_1 = (2,3,4,5)\n",
359 | "print(tuple_1)\n",
360 | "type(tuple_1)"
361 | ],
362 | "execution_count": null,
363 | "outputs": [
364 | {
365 | "output_type": "stream",
366 | "text": [
367 | "(2, 3, 4, 5)\n"
368 | ],
369 | "name": "stdout"
370 | },
371 | {
372 | "output_type": "execute_result",
373 | "data": {
374 | "text/plain": [
375 | "tuple"
376 | ]
377 | },
378 | "metadata": {
379 | "tags": []
380 | },
381 | "execution_count": 11
382 | }
383 | ]
384 | },
385 | {
386 | "cell_type": "code",
387 | "metadata": {
388 | "colab": {
389 | "base_uri": "https://localhost:8080/"
390 | },
391 | "id": "Tdrh8JsAh37C",
392 | "outputId": "a0a52f85-7058-4001-d799-b8a6fc298153"
393 | },
394 | "source": [
395 | "# tuple allows multiple data types\n",
396 | "tuple_2 = (1,2,3.5, 'Machine Learning', False)\n",
397 | "print(tuple_2)"
398 | ],
399 | "execution_count": null,
400 | "outputs": [
401 | {
402 | "output_type": "stream",
403 | "text": [
404 | "(1, 2, 3.5, 'Machine Learning', False)\n"
405 | ],
406 | "name": "stdout"
407 | }
408 | ]
409 | },
410 | {
411 | "cell_type": "code",
412 | "metadata": {
413 | "colab": {
414 | "base_uri": "https://localhost:8080/"
415 | },
416 | "id": "tHxHt22fiIAg",
417 | "outputId": "c09d925e-9289-4cd5-9fff-52b47d8e75ef"
418 | },
419 | "source": [
420 | "# converting a list to a tuple\n",
421 | "\n",
422 | "my_list = [3,4,5,6]\n",
423 | "print(my_list)\n",
424 | "\n",
425 | "my_tuple = tuple(my_list)\n",
426 | "print(my_tuple)"
427 | ],
428 | "execution_count": null,
429 | "outputs": [
430 | {
431 | "output_type": "stream",
432 | "text": [
433 | "[3, 4, 5, 6]\n",
434 | "(3, 4, 5, 6)\n"
435 | ],
436 | "name": "stdout"
437 | }
438 | ]
439 | },
440 | {
441 | "cell_type": "code",
442 | "metadata": {
443 | "colab": {
444 | "base_uri": "https://localhost:8080/"
445 | },
446 | "id": "9QRTalV1iay1",
447 | "outputId": "9f983fcd-54f8-42f4-93fb-b468e78b39df"
448 | },
449 | "source": [
450 | "print(my_tuple[0])\n",
451 | "print(my_tuple[1])"
452 | ],
453 | "execution_count": null,
454 | "outputs": [
455 | {
456 | "output_type": "stream",
457 | "text": [
458 | "3\n",
459 | "4\n"
460 | ],
461 | "name": "stdout"
462 | }
463 | ]
464 | },
465 | {
466 | "cell_type": "markdown",
467 | "metadata": {
468 | "id": "nzLtSOHCirRc"
469 | },
470 | "source": [
471 | "Tuples are immutable --> Unchangeable"
472 | ]
473 | },
474 | {
475 | "cell_type": "code",
476 | "metadata": {
477 | "colab": {
478 | "base_uri": "https://localhost:8080/",
479 | "height": 171
480 | },
481 | "id": "OXIstbesipDG",
482 | "outputId": "39957510-c58e-4568-b548-1921f85435b1"
483 | },
484 | "source": [
485 | "my_tuple.append(6)"
486 | ],
487 | "execution_count": null,
488 | "outputs": [
489 | {
490 | "output_type": "error",
491 | "ename": "AttributeError",
492 | "evalue": "ignored",
493 | "traceback": [
494 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
495 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
496 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmy_tuple\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
497 | "\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'append'"
498 | ]
499 | }
500 | ]
501 | },
502 | {
503 | "cell_type": "code",
504 | "metadata": {
505 | "colab": {
506 | "base_uri": "https://localhost:8080/"
507 | },
508 | "id": "AlOVmz6Sizsu",
509 | "outputId": "8426c1ba-c637-413f-bf99-9a4f6423d99e"
510 | },
511 | "source": [
512 | "print(len(my_tuple))"
513 | ],
514 | "execution_count": null,
515 | "outputs": [
516 | {
517 | "output_type": "stream",
518 | "text": [
519 | "4\n"
520 | ],
521 | "name": "stdout"
522 | }
523 | ]
524 | },
525 | {
526 | "cell_type": "markdown",
527 | "metadata": {
528 | "id": "ORc0vJDHjAwS"
529 | },
530 | "source": [
531 | "Set"
532 | ]
533 | },
534 | {
535 | "cell_type": "code",
536 | "metadata": {
537 | "colab": {
538 | "base_uri": "https://localhost:8080/"
539 | },
540 | "id": "RjaxKroji9mw",
541 | "outputId": "97cfe499-855c-46a8-dc3f-15d6d17ffd9b"
542 | },
543 | "source": [
544 | "# set --> Curly brackets\n",
545 | "my_set = {1,2,3,4,5}\n",
546 | "print(my_set)\n",
547 | "type(my_set)"
548 | ],
549 | "execution_count": null,
550 | "outputs": [
551 | {
552 | "output_type": "stream",
553 | "text": [
554 | "{1, 2, 3, 4, 5}\n"
555 | ],
556 | "name": "stdout"
557 | },
558 | {
559 | "output_type": "execute_result",
560 | "data": {
561 | "text/plain": [
562 | "set"
563 | ]
564 | },
565 | "metadata": {
566 | "tags": []
567 | },
568 | "execution_count": 17
569 | }
570 | ]
571 | },
572 | {
573 | "cell_type": "code",
574 | "metadata": {
575 | "colab": {
576 | "base_uri": "https://localhost:8080/",
577 | "height": 171
578 | },
579 | "id": "8ngbZxf0jMXk",
580 | "outputId": "f928e779-a7ee-453b-fbe3-9af839c25356"
581 | },
582 | "source": [
583 | "print(my_set[0])"
584 | ],
585 | "execution_count": null,
586 | "outputs": [
587 | {
588 | "output_type": "error",
589 | "ename": "TypeError",
590 | "evalue": "ignored",
591 | "traceback": [
592 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
593 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
594 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmy_set\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
595 | "\u001b[0;31mTypeError\u001b[0m: 'set' object does not support indexing"
596 | ]
597 | }
598 | ]
599 | },
600 | {
601 | "cell_type": "code",
602 | "metadata": {
603 | "colab": {
604 | "base_uri": "https://localhost:8080/"
605 | },
606 | "id": "65IgJYA5jVzr",
607 | "outputId": "77a0b6cc-72d2-495e-f4cc-a1cf60175622"
608 | },
609 | "source": [
610 | "# convert a list to a set\n",
611 | "list_5 = [4,5,6,7,8]\n",
612 | "\n",
613 | "x = set(list_5)\n",
614 | "print(x)"
615 | ],
616 | "execution_count": null,
617 | "outputs": [
618 | {
619 | "output_type": "stream",
620 | "text": [
621 | "{4, 5, 6, 7, 8}\n"
622 | ],
623 | "name": "stdout"
624 | }
625 | ]
626 | },
627 | {
628 | "cell_type": "code",
629 | "metadata": {
630 | "colab": {
631 | "base_uri": "https://localhost:8080/"
632 | },
633 | "id": "_0D3WDuZjrI_",
634 | "outputId": "a56d3ed5-0c3c-4993-f46b-a7314862e915"
635 | },
636 | "source": [
637 | "# set does not allow duplicate values\n",
638 | "set_3 = {1,2,3,4,5,1,2,3}\n",
639 | "print(set_3)"
640 | ],
641 | "execution_count": null,
642 | "outputs": [
643 | {
644 | "output_type": "stream",
645 | "text": [
646 | "{1, 2, 3, 4, 5}\n"
647 | ],
648 | "name": "stdout"
649 | }
650 | ]
651 | },
652 | {
653 | "cell_type": "markdown",
654 | "metadata": {
655 | "id": "W6i0Eo53j7Sx"
656 | },
657 | "source": [
658 | "Dictionary"
659 | ]
660 | },
661 | {
662 | "cell_type": "markdown",
663 | "metadata": {
664 | "id": "8zHFSdZdj-o9"
665 | },
666 | "source": [
667 | "Key-Value Pair"
668 | ]
669 | },
670 | {
671 | "cell_type": "code",
672 | "metadata": {
673 | "colab": {
674 | "base_uri": "https://localhost:8080/"
675 | },
676 | "id": "RZJjR0oYj3iK",
677 | "outputId": "928c40b2-ad74-4f8f-d941-aa9afff89b7d"
678 | },
679 | "source": [
680 | "my_dictionary = {'name':'David','age':30,'country':'India'}\n",
681 | "print(my_dictionary)\n",
682 | "type(my_dictionary)"
683 | ],
684 | "execution_count": null,
685 | "outputs": [
686 | {
687 | "output_type": "stream",
688 | "text": [
689 | "{'name': 'David', 'age': 30, 'country': 'India'}\n"
690 | ],
691 | "name": "stdout"
692 | },
693 | {
694 | "output_type": "execute_result",
695 | "data": {
696 | "text/plain": [
697 | "dict"
698 | ]
699 | },
700 | "metadata": {
701 | "tags": []
702 | },
703 | "execution_count": 21
704 | }
705 | ]
706 | },
707 | {
708 | "cell_type": "code",
709 | "metadata": {
710 | "colab": {
711 | "base_uri": "https://localhost:8080/"
712 | },
713 | "id": "gOCf8w7OkVjE",
714 | "outputId": "f781a022-30aa-4022-a996-fee5b54df4b0"
715 | },
716 | "source": [
717 | "print(my_dictionary['name'])\n",
718 | "print(my_dictionary['age'])\n",
719 | "print(my_dictionary['country'])"
720 | ],
721 | "execution_count": null,
722 | "outputs": [
723 | {
724 | "output_type": "stream",
725 | "text": [
726 | "David\n",
727 | "30\n",
728 | "India\n"
729 | ],
730 | "name": "stdout"
731 | }
732 | ]
733 | },
734 | {
735 | "cell_type": "code",
736 | "metadata": {
737 | "colab": {
738 | "base_uri": "https://localhost:8080/"
739 | },
740 | "id": "8AOHTq6bkoq2",
741 | "outputId": "8954d823-76bf-4bdc-b7bf-50e668bc19e5"
742 | },
743 | "source": [
744 | "# dictionary does not allow duplicate values\n",
745 | "dictionary_2 = {'name':'David','age':30,'country':'India','name':'David','age':30,'country':'India'}\n",
746 | "print(dictionary_2)"
747 | ],
748 | "execution_count": null,
749 | "outputs": [
750 | {
751 | "output_type": "stream",
752 | "text": [
753 | "{'name': 'David', 'age': 30, 'country': 'India'}\n"
754 | ],
755 | "name": "stdout"
756 | }
757 | ]
758 | },
759 | {
760 | "cell_type": "code",
761 | "metadata": {
762 | "id": "zwQtG_KYk6q9"
763 | },
764 | "source": [],
765 | "execution_count": null,
766 | "outputs": []
767 | }
768 | ]
769 | }
--------------------------------------------------------------------------------
/2.5. Operators_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | }
13 | },
14 | "cells": [
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {
18 | "id": "z8iVDs11-ogJ"
19 | },
20 | "source": [
21 | "Operators in Python:\n",
22 | "1. Arithmetic Operators\n",
23 | "2. Assignment Operators\n",
24 | "3. Comparison Operators\n",
25 | "4. Logical Operators\n",
26 | "5. Identity Operators\n",
27 | "6. Membership Operators"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {
33 | "id": "5NTu_QYw_212"
34 | },
35 | "source": [
36 | "1. Arithmetic Operators"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "metadata": {
42 | "colab": {
43 | "base_uri": "https://localhost:8080/"
44 | },
45 | "id": "CXYtGkkZ59n1",
46 | "outputId": "ed373db5-6740-40fa-c477-d0f2f67fddc3"
47 | },
48 | "source": [
49 | "num_1 = 20\n",
50 | "num_2 = 10\n",
51 | "\n",
52 | "# addition\n",
53 | "sum = num_1 + num_2\n",
54 | "print('sum = ',sum)\n",
55 | "\n",
56 | "# subtraction\n",
57 | "diff = num_1 - num_2\n",
58 | "print('difference = ',diff)\n",
59 | "\n",
60 | "# multiplication\n",
61 | "pro = num_1 * num_2\n",
62 | "print('product = ',pro)\n",
63 | "\n",
64 | "# division\n",
65 | "quo = num_1 / num_2\n",
66 | "print('quotient = ',quo)\n",
67 | "\n",
68 | "# exponent\n",
69 | "exp = num_1**num_2 # 20^10\n",
70 | "print('exponent = ',exp)\n",
71 | "\n",
72 | "# modulus\n",
73 | "mod = num_1 % num_2\n",
74 | "print('reminder = ',mod)"
75 | ],
76 | "execution_count": null,
77 | "outputs": [
78 | {
79 | "output_type": "stream",
80 | "text": [
81 | "sum = 30\n",
82 | "difference = 10\n",
83 | "product = 200\n",
84 | "quotient = 2.0\n",
85 | "exponent = 10240000000000\n",
86 | "reminder = 0\n"
87 | ],
88 | "name": "stdout"
89 | }
90 | ]
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "metadata": {
95 | "id": "E12rvExzBIh7"
96 | },
97 | "source": [
98 | "2. Assignment Operators"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {
104 | "id": "K-LS4tHQBiPZ"
105 | },
106 | "source": [
107 | "+=\n",
108 | "\n",
109 | "-=\n",
110 | "\n",
111 | "*=\n",
112 | "\n",
113 | "**=\n",
114 | "\n",
115 | "/=\n",
116 | "\n",
117 | "%="
118 | ]
119 | },
120 | {
121 | "cell_type": "code",
122 | "metadata": {
123 | "colab": {
124 | "base_uri": "https://localhost:8080/"
125 | },
126 | "id": "YwWiUzaNBDaD",
127 | "outputId": "a187ca00-043d-466f-a074-f182ccc4496e"
128 | },
129 | "source": [
130 | "a = 5\n",
131 | "print(a)"
132 | ],
133 | "execution_count": null,
134 | "outputs": [
135 | {
136 | "output_type": "stream",
137 | "text": [
138 | "5\n"
139 | ],
140 | "name": "stdout"
141 | }
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "metadata": {
147 | "colab": {
148 | "base_uri": "https://localhost:8080/"
149 | },
150 | "id": "LxJoSCRVBPrv",
151 | "outputId": "cdcc508a-8abf-476b-f952-a708de062360"
152 | },
153 | "source": [
154 | "a = 5\n",
155 | "a += 5 # a = a + 5\n",
156 | "print(a)"
157 | ],
158 | "execution_count": null,
159 | "outputs": [
160 | {
161 | "output_type": "stream",
162 | "text": [
163 | "10\n"
164 | ],
165 | "name": "stdout"
166 | }
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "metadata": {
172 | "colab": {
173 | "base_uri": "https://localhost:8080/"
174 | },
175 | "id": "F5hy5OvoBY0t",
176 | "outputId": "c1b988e4-bee0-4faa-c76f-491cffc4ee0e"
177 | },
178 | "source": [
179 | "b = 5\n",
180 | "b -= 2 # b = b-2\n",
181 | "print(b)"
182 | ],
183 | "execution_count": null,
184 | "outputs": [
185 | {
186 | "output_type": "stream",
187 | "text": [
188 | "3\n"
189 | ],
190 | "name": "stdout"
191 | }
192 | ]
193 | },
194 | {
195 | "cell_type": "markdown",
196 | "metadata": {
197 | "id": "0B6StO-HBv1E"
198 | },
199 | "source": [
200 | "3. Comparison Operators"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "metadata": {
206 | "colab": {
207 | "base_uri": "https://localhost:8080/"
208 | },
209 | "id": "9L2aTaKUBftz",
210 | "outputId": "b70b186f-7cc5-4887-aa81-418e1e1cac59"
211 | },
212 | "source": [
213 | "a = 5\n",
214 | "b = 10\n",
215 | "\n",
216 | "print(a == b) # equal to\n",
217 | "print(a != b) # not equal to\n",
218 | "print(a > b) # greater than\n",
219 | "print(a < b)\n",
220 | "print(a <= b)\n",
221 | "print(a >= b) "
222 | ],
223 | "execution_count": null,
224 | "outputs": [
225 | {
226 | "output_type": "stream",
227 | "text": [
228 | "False\n",
229 | "True\n",
230 | "False\n",
231 | "True\n",
232 | "True\n",
233 | "False\n"
234 | ],
235 | "name": "stdout"
236 | }
237 | ]
238 | },
239 | {
240 | "cell_type": "markdown",
241 | "metadata": {
242 | "id": "5vG_Gp-DCc-u"
243 | },
244 | "source": [
245 | "4. Logical Operators"
246 | ]
247 | },
248 | {
249 | "cell_type": "markdown",
250 | "metadata": {
251 | "id": "0k1QjwN7Cfx5"
252 | },
253 | "source": [
254 | "and\n",
255 | "\n",
256 | "or\n",
257 | "\n",
258 | "not"
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "metadata": {
264 | "colab": {
265 | "base_uri": "https://localhost:8080/"
266 | },
267 | "id": "XKVEPY3UCVqy",
268 | "outputId": "48152426-7541-4483-a272-85090c9c20f3"
269 | },
270 | "source": [
271 | "a = 10\n",
272 | "\n",
273 | "print( a>20 and a>5)\n",
274 | "print( a>20 or a>5)\n",
275 | "print( not( a>8 and a>5))"
276 | ],
277 | "execution_count": null,
278 | "outputs": [
279 | {
280 | "output_type": "stream",
281 | "text": [
282 | "False\n",
283 | "True\n",
284 | "False\n"
285 | ],
286 | "name": "stdout"
287 | }
288 | ]
289 | },
290 | {
291 | "cell_type": "markdown",
292 | "metadata": {
293 | "id": "xzOpIj7eDKrz"
294 | },
295 | "source": [
296 | "5. Identity Operator:\n",
297 | "\n",
298 | "is \n",
299 | "\n",
300 | "is not"
301 | ]
302 | },
303 | {
304 | "cell_type": "code",
305 | "metadata": {
306 | "colab": {
307 | "base_uri": "https://localhost:8080/"
308 | },
309 | "id": "005EmRZMDERy",
310 | "outputId": "7c75b240-62d6-4aaa-be75-51da8afc2795"
311 | },
312 | "source": [
313 | "x = 5\n",
314 | "y = 5\n",
315 | "\n",
316 | "print(x is y)"
317 | ],
318 | "execution_count": null,
319 | "outputs": [
320 | {
321 | "output_type": "stream",
322 | "text": [
323 | "True\n"
324 | ],
325 | "name": "stdout"
326 | }
327 | ]
328 | },
329 | {
330 | "cell_type": "code",
331 | "metadata": {
332 | "colab": {
333 | "base_uri": "https://localhost:8080/"
334 | },
335 | "id": "ZQy2V1ViDT5x",
336 | "outputId": "3782b1dc-8dda-4598-9d45-b3f9e859a6f2"
337 | },
338 | "source": [
339 | "x = 5\n",
340 | "y = 10\n",
341 | "\n",
342 | "print(x is y)"
343 | ],
344 | "execution_count": null,
345 | "outputs": [
346 | {
347 | "output_type": "stream",
348 | "text": [
349 | "False\n"
350 | ],
351 | "name": "stdout"
352 | }
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "metadata": {
358 | "colab": {
359 | "base_uri": "https://localhost:8080/"
360 | },
361 | "id": "bSZFIncfDYVc",
362 | "outputId": "c301afff-7c5d-45e6-eac8-ed31719e8804"
363 | },
364 | "source": [
365 | "x = 5\n",
366 | "y = 5\n",
367 | "\n",
368 | "print(x is not y)"
369 | ],
370 | "execution_count": null,
371 | "outputs": [
372 | {
373 | "output_type": "stream",
374 | "text": [
375 | "False\n"
376 | ],
377 | "name": "stdout"
378 | }
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | "metadata": {
384 | "colab": {
385 | "base_uri": "https://localhost:8080/"
386 | },
387 | "id": "uVAX_AejDcsX",
388 | "outputId": "abbad84c-bc89-44e0-fff6-b21b7539743a"
389 | },
390 | "source": [
391 | "x = 5\n",
392 | "y = 10\n",
393 | "\n",
394 | "print(x is not y)"
395 | ],
396 | "execution_count": null,
397 | "outputs": [
398 | {
399 | "output_type": "stream",
400 | "text": [
401 | "True\n"
402 | ],
403 | "name": "stdout"
404 | }
405 | ]
406 | },
407 | {
408 | "cell_type": "markdown",
409 | "metadata": {
410 | "id": "mvIH9bSEDlSw"
411 | },
412 | "source": [
413 | "6. Membership Operator:\n",
414 | "\n",
415 | "in \n",
416 | "\n",
417 | "not in"
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "metadata": {
423 | "colab": {
424 | "base_uri": "https://localhost:8080/"
425 | },
426 | "id": "IkUTVRTmDfic",
427 | "outputId": "afc0d8cd-4e9e-4516-e23e-569752a176cb"
428 | },
429 | "source": [
430 | "a = 5\n",
431 | "b =10\n",
432 | "\n",
433 | "c = [1,2,3,4,5]\n",
434 | "print( a in c)\n",
435 | "print( b in c)"
436 | ],
437 | "execution_count": null,
438 | "outputs": [
439 | {
440 | "output_type": "stream",
441 | "text": [
442 | "True\n",
443 | "False\n"
444 | ],
445 | "name": "stdout"
446 | }
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "metadata": {
452 | "colab": {
453 | "base_uri": "https://localhost:8080/"
454 | },
455 | "id": "LOYpdrbnD4Fb",
456 | "outputId": "782c2ff9-cc82-4797-c9af-1020a7bf037d"
457 | },
458 | "source": [
459 | "a = 5\n",
460 | "b =10\n",
461 | "\n",
462 | "c = [1,2,3,4,5]\n",
463 | "print( a not in c)\n",
464 | "print( b not in c)"
465 | ],
466 | "execution_count": null,
467 | "outputs": [
468 | {
469 | "output_type": "stream",
470 | "text": [
471 | "False\n",
472 | "True\n"
473 | ],
474 | "name": "stdout"
475 | }
476 | ]
477 | },
478 | {
479 | "cell_type": "code",
480 | "metadata": {
481 | "id": "qkbtedVdEATX"
482 | },
483 | "source": [],
484 | "execution_count": null,
485 | "outputs": []
486 | }
487 | ]
488 | }
--------------------------------------------------------------------------------
/2.6. if_else_statement_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | }
13 | },
14 | "cells": [
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {
18 | "id": "9yPP5tJoRu3j"
19 | },
20 | "source": [
21 | "simple if else statement"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "metadata": {
27 | "colab": {
28 | "base_uri": "https://localhost:8080/"
29 | },
30 | "id": "aGRxtmdPQgBb",
31 | "outputId": "f90436e0-dd24-40a4-f3ba-644ee4a6ca49"
32 | },
33 | "source": [
34 | "a = 30\n",
35 | "b = 50\n",
36 | "\n",
37 | "if (a>b):\n",
38 | " print('a is the greatest number')\n",
39 | "else:\n",
40 | " print('b is the greatest number')"
41 | ],
42 | "execution_count": null,
43 | "outputs": [
44 | {
45 | "output_type": "stream",
46 | "text": [
47 | "b is the greatest number\n"
48 | ],
49 | "name": "stdout"
50 | }
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "metadata": {
56 | "colab": {
57 | "base_uri": "https://localhost:8080/"
58 | },
59 | "id": "nUFrXZovSGIZ",
60 | "outputId": "6ecee5ab-551c-4cf4-cc2d-0d9c541ebd7f"
61 | },
62 | "source": [
63 | "a = int(input('Enter the first number : '))\n",
64 | "b = int(input('Enter the second number : '))\n",
65 | "\n",
66 | "if (a>b):\n",
67 | " print('First number is the greatest')\n",
68 | "else:\n",
69 | " print('Second number is the greatest')"
70 | ],
71 | "execution_count": null,
72 | "outputs": [
73 | {
74 | "output_type": "stream",
75 | "text": [
76 | "Enter the first number : 16\n",
77 | "Enter the second number : 20\n",
78 | "Second number is the greatest\n"
79 | ],
80 | "name": "stdout"
81 | }
82 | ]
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "metadata": {
87 | "id": "GJixx9xSSoo-"
88 | },
89 | "source": [
90 | "if elif else statement"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "metadata": {
96 | "colab": {
97 | "base_uri": "https://localhost:8080/"
98 | },
99 | "id": "y_LUudPwSX6I",
100 | "outputId": "d4e25b8f-cba9-405a-b34e-d003b14e277a"
101 | },
102 | "source": [
103 | "a = 15\n",
104 | "b = 25\n",
105 | "c = 30\n",
106 | "\n",
107 | "if (bc):\n",
108 | " print('a is the greatest number')\n",
109 | "elif (ac):\n",
110 | " print('b is the greatest number')\n",
111 | "else:\n",
112 | " print('c is the greatest number')\n"
113 | ],
114 | "execution_count": null,
115 | "outputs": [
116 | {
117 | "output_type": "stream",
118 | "text": [
119 | "c is the greatest number\n"
120 | ],
121 | "name": "stdout"
122 | }
123 | ]
124 | },
125 | {
126 | "cell_type": "markdown",
127 | "metadata": {
128 | "id": "cr5QqpJBTe2N"
129 | },
130 | "source": [
131 | "nested if statement"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "metadata": {
137 | "colab": {
138 | "base_uri": "https://localhost:8080/"
139 | },
140 | "id": "YXooMdY1Tb8Z",
141 | "outputId": "ea061bc4-ec14-4383-d342-abbf1d1419aa"
142 | },
143 | "source": [
144 | "a = 20\n",
145 | "b = 40\n",
146 | "c = 60\n",
147 | "\n",
148 | "if (a>b):\n",
149 | " if (a>c):\n",
150 | " print('a is the greatest number')\n",
151 | " else:\n",
152 | " print('c is the greatest number')\n",
153 | "else:\n",
154 | " if (b>c):\n",
155 | " print('b is the greatest number')\n",
156 | " else:\n",
157 | " print('c is the greatest number') "
158 | ],
159 | "execution_count": null,
160 | "outputs": [
161 | {
162 | "output_type": "stream",
163 | "text": [
164 | "c is the greatest number\n"
165 | ],
166 | "name": "stdout"
167 | }
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "metadata": {
173 | "id": "d1jIWFJQUaa_"
174 | },
175 | "source": [],
176 | "execution_count": null,
177 | "outputs": []
178 | }
179 | ]
180 | }
--------------------------------------------------------------------------------
/2.7. Loops_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | }
12 | },
13 | "cells": [
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {
17 | "id": "l169EevJ4PYv"
18 | },
19 | "source": [
20 | "For loop"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "metadata": {
26 | "colab": {
27 | "base_uri": "https://localhost:8080/"
28 | },
29 | "id": "uy9loXNN3yon",
30 | "outputId": "2754a351-7c82-42db-f525-70b101937bfd"
31 | },
32 | "source": [
33 | "laptop1 = int(input('Enter the price of the laptop : '))\n",
34 | "laptop2 = int(input('Enter the price of the laptop : '))\n",
35 | "laptop3 = int(input('Enter the price of the laptop : '))\n",
36 | "laptop4 = int(input('Enter the price of the laptop : '))\n",
37 | "laptop5 = int(input('Enter the price of the laptop : '))"
38 | ],
39 | "execution_count": null,
40 | "outputs": [
41 | {
42 | "output_type": "stream",
43 | "text": [
44 | "Enter the price of the laptop : 20000\n",
45 | "Enter the price of the laptop : 30000\n",
46 | "Enter the price of the laptop : 40000\n",
47 | "Enter the price of the laptop : 50000\n",
48 | "Enter the price of the laptop : 60000\n"
49 | ],
50 | "name": "stdout"
51 | }
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "metadata": {
57 | "colab": {
58 | "base_uri": "https://localhost:8080/"
59 | },
60 | "id": "W0YAxl-e4XfP",
61 | "outputId": "56f97315-444b-4618-b15c-5f177c795bd7"
62 | },
63 | "source": [
64 | "for i in range(7): # 0,1,2,3,4,5,6\n",
65 | " laptop_price = int(input('Enter the price of the laptop : '))"
66 | ],
67 | "execution_count": null,
68 | "outputs": [
69 | {
70 | "output_type": "stream",
71 | "text": [
72 | "Enter the price of the laptop : 6\n",
73 | "Enter the price of the laptop : 7\n",
74 | "Enter the price of the laptop : 8\n",
75 | "Enter the price of the laptop : 9\n",
76 | "Enter the price of the laptop : 2\n",
77 | "Enter the price of the laptop : 3\n",
78 | "Enter the price of the laptop : 4\n"
79 | ],
80 | "name": "stdout"
81 | }
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "metadata": {
87 | "colab": {
88 | "base_uri": "https://localhost:8080/"
89 | },
90 | "id": "-9bvL0U_5QnH",
91 | "outputId": "89a12700-fe97-4870-8ebe-5ad59de4c9ed"
92 | },
93 | "source": [
94 | "numbers = [50, 100, 150, 200]\n",
95 | "\n",
96 | "print(numbers[0])\n",
97 | "print(numbers[1])\n",
98 | "print(numbers[2])\n",
99 | "print(numbers[3])"
100 | ],
101 | "execution_count": null,
102 | "outputs": [
103 | {
104 | "output_type": "stream",
105 | "text": [
106 | "50\n",
107 | "100\n",
108 | "150\n",
109 | "200\n"
110 | ],
111 | "name": "stdout"
112 | }
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "metadata": {
118 | "colab": {
119 | "base_uri": "https://localhost:8080/"
120 | },
121 | "id": "I2OmCKt_6J3g",
122 | "outputId": "9d248bca-1edd-4605-a195-4b5c7c9fee63"
123 | },
124 | "source": [
125 | "numbers = [50, 100, 150, 200]\n",
126 | "\n",
127 | "for i in numbers:\n",
128 | " print(i)"
129 | ],
130 | "execution_count": null,
131 | "outputs": [
132 | {
133 | "output_type": "stream",
134 | "text": [
135 | "50\n",
136 | "100\n",
137 | "150\n",
138 | "200\n"
139 | ],
140 | "name": "stdout"
141 | }
142 | ]
143 | },
144 | {
145 | "cell_type": "markdown",
146 | "metadata": {
147 | "id": "LZw0F_Ga6aeC"
148 | },
149 | "source": [
150 | "While Loop"
151 | ]
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "metadata": {
156 | "id": "mjmPzJiV6src"
157 | },
158 | "source": [
159 | "while condition:\n",
160 | "\n",
161 | " statement"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "metadata": {
167 | "colab": {
168 | "base_uri": "https://localhost:8080/"
169 | },
170 | "id": "7UCze9SW6RO6",
171 | "outputId": "f8ae520b-14bd-47aa-fd6d-7870824a84c5"
172 | },
173 | "source": [
174 | "i = 0\n",
175 | "\n",
176 | "while i<10:\n",
177 | " print(i)\n",
178 | " i += 1 "
179 | ],
180 | "execution_count": null,
181 | "outputs": [
182 | {
183 | "output_type": "stream",
184 | "text": [
185 | "0\n",
186 | "1\n",
187 | "2\n",
188 | "3\n",
189 | "4\n",
190 | "5\n",
191 | "6\n",
192 | "7\n",
193 | "8\n",
194 | "9\n"
195 | ],
196 | "name": "stdout"
197 | }
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "metadata": {
203 | "id": "5aYvGqVj7Lxw"
204 | },
205 | "source": [
206 | "i = 5\n",
207 | "\n",
208 | "while i<3:\n",
209 | " print(i)\n",
210 | " i +=1"
211 | ],
212 | "execution_count": null,
213 | "outputs": []
214 | }
215 | ]
216 | }
--------------------------------------------------------------------------------
/2.8. Functions_in_Python.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | }
12 | },
13 | "cells": [
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {
17 | "id": "P6doAW5l8ba2"
18 | },
19 | "source": [
20 | "Functions: \n",
21 | "\n",
22 | "Function is a block of code that can be reused in a Program"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {
28 | "id": "3YuBEtBR86X8"
29 | },
30 | "source": [
31 | "Factorial of a Number"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {
37 | "id": "Z8ySi-x58_8w"
38 | },
39 | "source": [
40 | "Factorial of a number is the product of all the positive integers less than or equal to the given number"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {
46 | "id": "3tUJEQyC9LLh"
47 | },
48 | "source": [
49 | "Factorial of 5 = 5 x 4 x 3 x 2 x 1 = 120"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "metadata": {
55 | "colab": {
56 | "base_uri": "https://localhost:8080/"
57 | },
58 | "id": "Z3F1xOof8LbY",
59 | "outputId": "76a7da9c-11c8-4732-f49d-25a164242fe9"
60 | },
61 | "source": [
62 | "number = int(input(' Enter a number to find its factorial : '))\n",
63 | "\n",
64 | "factorial = 1\n",
65 | "\n",
66 | "if number == 0:\n",
67 | " print(' The Factorial of 0 is 1')\n",
68 | "\n",
69 | "else:\n",
70 | " for i in range(1, number+1): \n",
71 | " factorial = factorial*i\n",
72 | " print('The factorial of ',number,' is',factorial) "
73 | ],
74 | "execution_count": null,
75 | "outputs": [
76 | {
77 | "output_type": "stream",
78 | "text": [
79 | " Enter a number to find its factorial : 10\n",
80 | "The factorial of 10 is 3628800\n"
81 | ],
82 | "name": "stdout"
83 | }
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {
89 | "id": "3Rv9GbOh-7XI"
90 | },
91 | "source": [
92 | "Factoraial Function"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "metadata": {
98 | "id": "pZH6ilR8-uQh"
99 | },
100 | "source": [
101 | "def factorial_value(num):\n",
102 | "\n",
103 | " factorial = 1\n",
104 | "\n",
105 | " if num == 0:\n",
106 | " return factorial\n",
107 | "\n",
108 | " else:\n",
109 | " for i in range(1, num+1):\n",
110 | " factorial = factorial*i\n",
111 | " return factorial"
112 | ],
113 | "execution_count": null,
114 | "outputs": []
115 | },
116 | {
117 | "cell_type": "code",
118 | "metadata": {
119 | "colab": {
120 | "base_uri": "https://localhost:8080/"
121 | },
122 | "id": "Nwf8f6su_ige",
123 | "outputId": "983dcfbf-e5fd-4770-918a-795fccc59eb2"
124 | },
125 | "source": [
126 | "print(factorial_value(5))"
127 | ],
128 | "execution_count": null,
129 | "outputs": [
130 | {
131 | "output_type": "stream",
132 | "text": [
133 | "120\n"
134 | ],
135 | "name": "stdout"
136 | }
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "metadata": {
142 | "colab": {
143 | "base_uri": "https://localhost:8080/"
144 | },
145 | "id": "YXEVZyna_oKL",
146 | "outputId": "e9513dbd-4314-4d9c-d719-4eccb05867cd"
147 | },
148 | "source": [
149 | "print(factorial_value(10))"
150 | ],
151 | "execution_count": null,
152 | "outputs": [
153 | {
154 | "output_type": "stream",
155 | "text": [
156 | "3628800\n"
157 | ],
158 | "name": "stdout"
159 | }
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "metadata": {
165 | "colab": {
166 | "base_uri": "https://localhost:8080/"
167 | },
168 | "id": "Gk0kwebb_y3U",
169 | "outputId": "f7532445-7844-4ffb-cf00-ee1086416724"
170 | },
171 | "source": [
172 | "print(factorial_value(6))"
173 | ],
174 | "execution_count": null,
175 | "outputs": [
176 | {
177 | "output_type": "stream",
178 | "text": [
179 | "720\n"
180 | ],
181 | "name": "stdout"
182 | }
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "metadata": {
188 | "id": "n9R58coF_8Nr"
189 | },
190 | "source": [],
191 | "execution_count": null,
192 | "outputs": []
193 | }
194 | ]
195 | }
--------------------------------------------------------------------------------
/4.2. Importing_Datasets_through_Kaggle_API.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | },
12 | "language_info": {
13 | "name": "python"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "id": "VY1GEpb4HiNh"
21 | },
22 | "source": [
23 | "API - Application Programming Interface"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "metadata": {
29 | "colab": {
30 | "base_uri": "https://localhost:8080/"
31 | },
32 | "id": "B63zSIF-HhJr",
33 | "outputId": "fc28310c-56da-485e-e95e-67eeaa1ebc23"
34 | },
35 | "source": [
36 | "# installing the Kaggle library\n",
37 | "!pip install kaggle"
38 | ],
39 | "execution_count": null,
40 | "outputs": [
41 | {
42 | "output_type": "stream",
43 | "text": [
44 | "Requirement already satisfied: kaggle in /usr/local/lib/python3.7/dist-packages (1.5.10)\n",
45 | "Requirement already satisfied: certifi in /usr/local/lib/python3.7/dist-packages (from kaggle) (2020.12.5)\n",
46 | "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.23.0)\n",
47 | "Requirement already satisfied: urllib3 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.24.3)\n",
48 | "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.15.0)\n",
49 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from kaggle) (4.41.1)\n",
50 | "Requirement already satisfied: python-slugify in /usr/local/lib/python3.7/dist-packages (from kaggle) (4.0.1)\n",
51 | "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.8.1)\n",
52 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (2.10)\n",
53 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (3.0.4)\n",
54 | "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.7/dist-packages (from python-slugify->kaggle) (1.3)\n"
55 | ],
56 | "name": "stdout"
57 | }
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {
63 | "id": "DvywBbPPMR7p"
64 | },
65 | "source": [
66 | "Upload your Kaggle.json file"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "metadata": {
72 | "id": "zzVfbVRJMOs5"
73 | },
74 | "source": [
75 | "# configuring the path of Kaggle.json file\n",
76 | "!mkdir -p ~/.kaggle\n",
77 | "!cp kaggle.json ~/.kaggle/\n",
78 | "!chmod 600 ~/.kaggle/kaggle.json"
79 | ],
80 | "execution_count": null,
81 | "outputs": []
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {
86 | "id": "aYMRBfj6NV0F"
87 | },
88 | "source": [
89 | "Importing the Earthquake Dataset"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "metadata": {
95 | "colab": {
96 | "base_uri": "https://localhost:8080/"
97 | },
98 | "id": "MSWENgDtNUHG",
99 | "outputId": "13d67e37-db51-4070-ba35-3ca0fba750a5"
100 | },
101 | "source": [
102 | "# API to fetch the dataset from Kaggle\n",
103 | "!kaggle competitions download -c LANL-Earthquake-Prediction"
104 | ],
105 | "execution_count": null,
106 | "outputs": [
107 | {
108 | "output_type": "stream",
109 | "text": [
110 | "Warning: Looks like you're using an outdated API Version, please consider updating (server 1.5.12 / client 1.5.4)\n",
111 | "Downloading seg_010eab.csv to /content\n",
112 | " 0% 0.00/316k [00:00, ?B/s]\n",
113 | "100% 316k/316k [00:00<00:00, 43.6MB/s]\n",
114 | "Downloading seg_004ee5.csv to /content\n",
115 | " 0% 0.00/324k [00:00, ?B/s]\n",
116 | "100% 324k/324k [00:00<00:00, 5.17MB/s]\n",
117 | "Downloading seg_007a37.csv to /content\n",
118 | " 0% 0.00/325k [00:00, ?B/s]\n",
119 | "100% 325k/325k [00:00<00:00, 105MB/s]\n",
120 | "Downloading seg_004f1f.csv to /content\n",
121 | " 0% 0.00/324k [00:00, ?B/s]\n",
122 | "100% 324k/324k [00:00<00:00, 105MB/s]\n",
123 | "Downloading seg_0042cc.csv to /content\n",
124 | " 0% 0.00/319k [00:00, ?B/s]\n",
125 | "100% 319k/319k [00:00<00:00, 102MB/s]\n",
126 | "Downloading seg_004cd2.csv to /content\n",
127 | " 0% 0.00/315k [00:00, ?B/s]\n",
128 | "100% 315k/315k [00:00<00:00, 97.9MB/s]\n",
129 | "Downloading seg_003339.csv to /content\n",
130 | " 0% 0.00/310k [00:00, ?B/s]\n",
131 | "100% 310k/310k [00:00<00:00, 100MB/s]\n",
132 | "Downloading seg_006e4a.csv to /content\n",
133 | " 0% 0.00/325k [00:00, ?B/s]\n",
134 | "100% 325k/325k [00:00<00:00, 106MB/s]\n",
135 | "Downloading seg_0012b5.csv to /content\n",
136 | " 0% 0.00/321k [00:00, ?B/s]\n",
137 | "100% 321k/321k [00:00<00:00, 159MB/s]\n",
138 | "Downloading seg_00a37e.csv to /content\n",
139 | " 0% 0.00/332k [00:00, ?B/s]\n",
140 | "100% 332k/332k [00:00<00:00, 102MB/s]\n",
141 | "Downloading seg_00030f.csv to /content\n",
142 | " 0% 0.00/321k [00:00, ?B/s]\n",
143 | "100% 321k/321k [00:00<00:00, 96.3MB/s]\n",
144 | "Downloading seg_00184e.csv to /content\n",
145 | " 0% 0.00/320k [00:00, ?B/s]\n",
146 | "100% 320k/320k [00:00<00:00, 99.3MB/s]\n",
147 | "Downloading seg_00cc91.csv to /content\n",
148 | " 0% 0.00/329k [00:00, ?B/s]\n",
149 | "100% 329k/329k [00:00<00:00, 74.6MB/s]\n",
150 | "Downloading seg_0125d9.csv to /content\n",
151 | " 0% 0.00/320k [00:00, ?B/s]\n",
152 | "100% 320k/320k [00:00<00:00, 93.9MB/s]\n",
153 | "Downloading seg_004314.csv to /content\n",
154 | " 0% 0.00/360k [00:00, ?B/s]\n",
155 | "100% 360k/360k [00:00<00:00, 119MB/s]\n",
156 | "Downloading seg_00f3b9.csv to /content\n",
157 | " 0% 0.00/336k [00:00, ?B/s]\n",
158 | "100% 336k/336k [00:00<00:00, 107MB/s]\n",
159 | "Downloading seg_00be11.csv to /content\n",
160 | " 0% 0.00/330k [00:00, ?B/s]\n",
161 | "100% 330k/330k [00:00<00:00, 101MB/s]\n",
162 | "Downloading seg_00e5f7.csv to /content\n",
163 | " 0% 0.00/319k [00:00, ?B/s]\n",
164 | "100% 319k/319k [00:00<00:00, 89.9MB/s]\n",
165 | "Downloading seg_00648a.csv to /content\n",
166 | " 0% 0.00/329k [00:00, ?B/s]\n",
167 | "100% 329k/329k [00:00<00:00, 45.2MB/s]\n",
168 | "Downloading seg_00c35b.csv to /content\n",
169 | " 0% 0.00/312k [00:00, ?B/s]\n",
170 | "100% 312k/312k [00:00<00:00, 134MB/s]\n",
171 | "Downloading sample_submission.csv to /content\n",
172 | " 0% 0.00/33.3k [00:00, ?B/s]\n",
173 | "100% 33.3k/33.3k [00:00<00:00, 25.8MB/s]\n",
174 | "Downloading train.csv.zip to /content\n",
175 | "100% 2.02G/2.03G [00:31<00:00, 50.4MB/s]\n",
176 | "100% 2.03G/2.03G [00:31<00:00, 68.3MB/s]\n"
177 | ],
178 | "name": "stdout"
179 | }
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "metadata": {
185 | "colab": {
186 | "base_uri": "https://localhost:8080/"
187 | },
188 | "id": "AxFD302lNj30",
189 | "outputId": "339a7424-6840-40b4-f757-e33c3097147a"
190 | },
191 | "source": [
192 | "# extracting the compessed Dataset\n",
193 | "from zipfile import ZipFile\n",
194 | "dataset = '/content/train.csv.zip'\n",
195 | "\n",
196 | "with ZipFile(dataset,'r') as zip:\n",
197 | " zip.extractall()\n",
198 | " print('The dataset is extracted')"
199 | ],
200 | "execution_count": null,
201 | "outputs": [
202 | {
203 | "output_type": "stream",
204 | "text": [
205 | "The dataset is extracted\n"
206 | ],
207 | "name": "stdout"
208 | }
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "metadata": {
214 | "id": "cu_dydlPOUbJ"
215 | },
216 | "source": [],
217 | "execution_count": null,
218 | "outputs": []
219 | }
220 | ]
221 | }
--------------------------------------------------------------------------------
/4.6. Train_Test_Split.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | }
13 | },
14 | "cells": [
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {
18 | "id": "LnPbntVRnfvV"
19 | },
20 | "source": [
21 | "Importing the Dependencies"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "metadata": {
27 | "id": "-71UtHzNVWjB"
28 | },
29 | "source": [
30 | "import numpy as np\n",
31 | "import pandas as pd\n",
32 | "from sklearn.preprocessing import StandardScaler\n",
33 | "from sklearn.model_selection import train_test_split\n",
34 | "from sklearn import svm\n",
35 | "from sklearn.metrics import accuracy_score"
36 | ],
37 | "execution_count": null,
38 | "outputs": []
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {
43 | "id": "bmfOfG8joBBy"
44 | },
45 | "source": [
46 | "Data Collection and Analysis\n",
47 | "\n",
48 | "PIMA Diabetes Dataset"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "metadata": {
54 | "id": "Xpw6Mj_pn_TL"
55 | },
56 | "source": [
57 | "# loading the diabetes dataset to a pandas DataFrame\n",
58 | "diabetes_dataset = pd.read_csv('/content/diabetes.csv') "
59 | ],
60 | "execution_count": null,
61 | "outputs": []
62 | },
63 | {
64 | "cell_type": "code",
65 | "metadata": {
66 | "colab": {
67 | "base_uri": "https://localhost:8080/",
68 | "height": 198
69 | },
70 | "id": "-tjO09ncovoh",
71 | "outputId": "b61e6e8d-50ea-417e-8096-d6626a69f073"
72 | },
73 | "source": [
74 | "# printing the first 5 rows of the dataset\n",
75 | "diabetes_dataset.head()"
76 | ],
77 | "execution_count": null,
78 | "outputs": [
79 | {
80 | "output_type": "execute_result",
81 | "data": {
82 | "text/html": [
83 | "\n",
84 | "\n",
97 | "
\n",
98 | " \n",
99 | " \n",
100 | " | \n",
101 | " Pregnancies | \n",
102 | " Glucose | \n",
103 | " BloodPressure | \n",
104 | " SkinThickness | \n",
105 | " Insulin | \n",
106 | " BMI | \n",
107 | " DiabetesPedigreeFunction | \n",
108 | " Age | \n",
109 | " Outcome | \n",
110 | "
\n",
111 | " \n",
112 | " \n",
113 | " \n",
114 | " 0 | \n",
115 | " 6 | \n",
116 | " 148 | \n",
117 | " 72 | \n",
118 | " 35 | \n",
119 | " 0 | \n",
120 | " 33.6 | \n",
121 | " 0.627 | \n",
122 | " 50 | \n",
123 | " 1 | \n",
124 | "
\n",
125 | " \n",
126 | " 1 | \n",
127 | " 1 | \n",
128 | " 85 | \n",
129 | " 66 | \n",
130 | " 29 | \n",
131 | " 0 | \n",
132 | " 26.6 | \n",
133 | " 0.351 | \n",
134 | " 31 | \n",
135 | " 0 | \n",
136 | "
\n",
137 | " \n",
138 | " 2 | \n",
139 | " 8 | \n",
140 | " 183 | \n",
141 | " 64 | \n",
142 | " 0 | \n",
143 | " 0 | \n",
144 | " 23.3 | \n",
145 | " 0.672 | \n",
146 | " 32 | \n",
147 | " 1 | \n",
148 | "
\n",
149 | " \n",
150 | " 3 | \n",
151 | " 1 | \n",
152 | " 89 | \n",
153 | " 66 | \n",
154 | " 23 | \n",
155 | " 94 | \n",
156 | " 28.1 | \n",
157 | " 0.167 | \n",
158 | " 21 | \n",
159 | " 0 | \n",
160 | "
\n",
161 | " \n",
162 | " 4 | \n",
163 | " 0 | \n",
164 | " 137 | \n",
165 | " 40 | \n",
166 | " 35 | \n",
167 | " 168 | \n",
168 | " 43.1 | \n",
169 | " 2.288 | \n",
170 | " 33 | \n",
171 | " 1 | \n",
172 | "
\n",
173 | " \n",
174 | "
\n",
175 | "
"
176 | ],
177 | "text/plain": [
178 | " Pregnancies Glucose BloodPressure ... DiabetesPedigreeFunction Age Outcome\n",
179 | "0 6 148 72 ... 0.627 50 1\n",
180 | "1 1 85 66 ... 0.351 31 0\n",
181 | "2 8 183 64 ... 0.672 32 1\n",
182 | "3 1 89 66 ... 0.167 21 0\n",
183 | "4 0 137 40 ... 2.288 33 1\n",
184 | "\n",
185 | "[5 rows x 9 columns]"
186 | ]
187 | },
188 | "metadata": {
189 | "tags": []
190 | },
191 | "execution_count": 3
192 | }
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "metadata": {
198 | "colab": {
199 | "base_uri": "https://localhost:8080/"
200 | },
201 | "id": "lynParo6pEMB",
202 | "outputId": "265e8eec-578c-4ab8-f0af-8d7889e183d5"
203 | },
204 | "source": [
205 | "# number of rows and Columns in this dataset\n",
206 | "diabetes_dataset.shape"
207 | ],
208 | "execution_count": null,
209 | "outputs": [
210 | {
211 | "output_type": "execute_result",
212 | "data": {
213 | "text/plain": [
214 | "(768, 9)"
215 | ]
216 | },
217 | "metadata": {
218 | "tags": []
219 | },
220 | "execution_count": 4
221 | }
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "metadata": {
227 | "colab": {
228 | "base_uri": "https://localhost:8080/",
229 | "height": 288
230 | },
231 | "id": "3NDJOlrEpmoL",
232 | "outputId": "fbe0a905-1de9-44d7-9901-840578263c0f"
233 | },
234 | "source": [
235 | "# getting the statistical measures of the data\n",
236 | "diabetes_dataset.describe()"
237 | ],
238 | "execution_count": null,
239 | "outputs": [
240 | {
241 | "output_type": "execute_result",
242 | "data": {
243 | "text/html": [
244 | "\n",
245 | "\n",
258 | "
\n",
259 | " \n",
260 | " \n",
261 | " | \n",
262 | " Pregnancies | \n",
263 | " Glucose | \n",
264 | " BloodPressure | \n",
265 | " SkinThickness | \n",
266 | " Insulin | \n",
267 | " BMI | \n",
268 | " DiabetesPedigreeFunction | \n",
269 | " Age | \n",
270 | " Outcome | \n",
271 | "
\n",
272 | " \n",
273 | " \n",
274 | " \n",
275 | " count | \n",
276 | " 768.000000 | \n",
277 | " 768.000000 | \n",
278 | " 768.000000 | \n",
279 | " 768.000000 | \n",
280 | " 768.000000 | \n",
281 | " 768.000000 | \n",
282 | " 768.000000 | \n",
283 | " 768.000000 | \n",
284 | " 768.000000 | \n",
285 | "
\n",
286 | " \n",
287 | " mean | \n",
288 | " 3.845052 | \n",
289 | " 120.894531 | \n",
290 | " 69.105469 | \n",
291 | " 20.536458 | \n",
292 | " 79.799479 | \n",
293 | " 31.992578 | \n",
294 | " 0.471876 | \n",
295 | " 33.240885 | \n",
296 | " 0.348958 | \n",
297 | "
\n",
298 | " \n",
299 | " std | \n",
300 | " 3.369578 | \n",
301 | " 31.972618 | \n",
302 | " 19.355807 | \n",
303 | " 15.952218 | \n",
304 | " 115.244002 | \n",
305 | " 7.884160 | \n",
306 | " 0.331329 | \n",
307 | " 11.760232 | \n",
308 | " 0.476951 | \n",
309 | "
\n",
310 | " \n",
311 | " min | \n",
312 | " 0.000000 | \n",
313 | " 0.000000 | \n",
314 | " 0.000000 | \n",
315 | " 0.000000 | \n",
316 | " 0.000000 | \n",
317 | " 0.000000 | \n",
318 | " 0.078000 | \n",
319 | " 21.000000 | \n",
320 | " 0.000000 | \n",
321 | "
\n",
322 | " \n",
323 | " 25% | \n",
324 | " 1.000000 | \n",
325 | " 99.000000 | \n",
326 | " 62.000000 | \n",
327 | " 0.000000 | \n",
328 | " 0.000000 | \n",
329 | " 27.300000 | \n",
330 | " 0.243750 | \n",
331 | " 24.000000 | \n",
332 | " 0.000000 | \n",
333 | "
\n",
334 | " \n",
335 | " 50% | \n",
336 | " 3.000000 | \n",
337 | " 117.000000 | \n",
338 | " 72.000000 | \n",
339 | " 23.000000 | \n",
340 | " 30.500000 | \n",
341 | " 32.000000 | \n",
342 | " 0.372500 | \n",
343 | " 29.000000 | \n",
344 | " 0.000000 | \n",
345 | "
\n",
346 | " \n",
347 | " 75% | \n",
348 | " 6.000000 | \n",
349 | " 140.250000 | \n",
350 | " 80.000000 | \n",
351 | " 32.000000 | \n",
352 | " 127.250000 | \n",
353 | " 36.600000 | \n",
354 | " 0.626250 | \n",
355 | " 41.000000 | \n",
356 | " 1.000000 | \n",
357 | "
\n",
358 | " \n",
359 | " max | \n",
360 | " 17.000000 | \n",
361 | " 199.000000 | \n",
362 | " 122.000000 | \n",
363 | " 99.000000 | \n",
364 | " 846.000000 | \n",
365 | " 67.100000 | \n",
366 | " 2.420000 | \n",
367 | " 81.000000 | \n",
368 | " 1.000000 | \n",
369 | "
\n",
370 | " \n",
371 | "
\n",
372 | "
"
373 | ],
374 | "text/plain": [
375 | " Pregnancies Glucose ... Age Outcome\n",
376 | "count 768.000000 768.000000 ... 768.000000 768.000000\n",
377 | "mean 3.845052 120.894531 ... 33.240885 0.348958\n",
378 | "std 3.369578 31.972618 ... 11.760232 0.476951\n",
379 | "min 0.000000 0.000000 ... 21.000000 0.000000\n",
380 | "25% 1.000000 99.000000 ... 24.000000 0.000000\n",
381 | "50% 3.000000 117.000000 ... 29.000000 0.000000\n",
382 | "75% 6.000000 140.250000 ... 41.000000 1.000000\n",
383 | "max 17.000000 199.000000 ... 81.000000 1.000000\n",
384 | "\n",
385 | "[8 rows x 9 columns]"
386 | ]
387 | },
388 | "metadata": {
389 | "tags": []
390 | },
391 | "execution_count": 5
392 | }
393 | ]
394 | },
395 | {
396 | "cell_type": "code",
397 | "metadata": {
398 | "colab": {
399 | "base_uri": "https://localhost:8080/"
400 | },
401 | "id": "LrpHzaGpp5dQ",
402 | "outputId": "cdf3f133-5fdc-4a59-e202-267e358bc831"
403 | },
404 | "source": [
405 | "diabetes_dataset['Outcome'].value_counts()"
406 | ],
407 | "execution_count": null,
408 | "outputs": [
409 | {
410 | "output_type": "execute_result",
411 | "data": {
412 | "text/plain": [
413 | "0 500\n",
414 | "1 268\n",
415 | "Name: Outcome, dtype: int64"
416 | ]
417 | },
418 | "metadata": {
419 | "tags": []
420 | },
421 | "execution_count": 6
422 | }
423 | ]
424 | },
425 | {
426 | "cell_type": "markdown",
427 | "metadata": {
428 | "id": "cB1qRaNcqeh5"
429 | },
430 | "source": [
431 | "0 --> Non-Diabetic\n",
432 | "\n",
433 | "1 --> Diabetic"
434 | ]
435 | },
436 | {
437 | "cell_type": "code",
438 | "metadata": {
439 | "colab": {
440 | "base_uri": "https://localhost:8080/",
441 | "height": 138
442 | },
443 | "id": "I6MWR0k_qSCK",
444 | "outputId": "710f530f-638b-4b27-a818-dc1d063435f8"
445 | },
446 | "source": [
447 | "diabetes_dataset.groupby('Outcome').mean()"
448 | ],
449 | "execution_count": null,
450 | "outputs": [
451 | {
452 | "output_type": "execute_result",
453 | "data": {
454 | "text/html": [
455 | "\n",
456 | "\n",
469 | "
\n",
470 | " \n",
471 | " \n",
472 | " | \n",
473 | " Pregnancies | \n",
474 | " Glucose | \n",
475 | " BloodPressure | \n",
476 | " SkinThickness | \n",
477 | " Insulin | \n",
478 | " BMI | \n",
479 | " DiabetesPedigreeFunction | \n",
480 | " Age | \n",
481 | "
\n",
482 | " \n",
483 | " Outcome | \n",
484 | " | \n",
485 | " | \n",
486 | " | \n",
487 | " | \n",
488 | " | \n",
489 | " | \n",
490 | " | \n",
491 | " | \n",
492 | "
\n",
493 | " \n",
494 | " \n",
495 | " \n",
496 | " 0 | \n",
497 | " 3.298000 | \n",
498 | " 109.980000 | \n",
499 | " 68.184000 | \n",
500 | " 19.664000 | \n",
501 | " 68.792000 | \n",
502 | " 30.304200 | \n",
503 | " 0.429734 | \n",
504 | " 31.190000 | \n",
505 | "
\n",
506 | " \n",
507 | " 1 | \n",
508 | " 4.865672 | \n",
509 | " 141.257463 | \n",
510 | " 70.824627 | \n",
511 | " 22.164179 | \n",
512 | " 100.335821 | \n",
513 | " 35.142537 | \n",
514 | " 0.550500 | \n",
515 | " 37.067164 | \n",
516 | "
\n",
517 | " \n",
518 | "
\n",
519 | "
"
520 | ],
521 | "text/plain": [
522 | " Pregnancies Glucose ... DiabetesPedigreeFunction Age\n",
523 | "Outcome ... \n",
524 | "0 3.298000 109.980000 ... 0.429734 31.190000\n",
525 | "1 4.865672 141.257463 ... 0.550500 37.067164\n",
526 | "\n",
527 | "[2 rows x 8 columns]"
528 | ]
529 | },
530 | "metadata": {
531 | "tags": []
532 | },
533 | "execution_count": 7
534 | }
535 | ]
536 | },
537 | {
538 | "cell_type": "code",
539 | "metadata": {
540 | "id": "RoDW7l9mqqHZ"
541 | },
542 | "source": [
543 | "# separating the data and labels\n",
544 | "X = diabetes_dataset.drop(columns = 'Outcome', axis=1)\n",
545 | "Y = diabetes_dataset['Outcome']"
546 | ],
547 | "execution_count": null,
548 | "outputs": []
549 | },
550 | {
551 | "cell_type": "code",
552 | "metadata": {
553 | "colab": {
554 | "base_uri": "https://localhost:8080/"
555 | },
556 | "id": "3eiRW9M9raMm",
557 | "outputId": "9e149494-2c46-4a1f-dd2b-f2c90e00dd69"
558 | },
559 | "source": [
560 | "print(X)"
561 | ],
562 | "execution_count": null,
563 | "outputs": [
564 | {
565 | "output_type": "stream",
566 | "text": [
567 | " Pregnancies Glucose BloodPressure ... BMI DiabetesPedigreeFunction Age\n",
568 | "0 6 148 72 ... 33.6 0.627 50\n",
569 | "1 1 85 66 ... 26.6 0.351 31\n",
570 | "2 8 183 64 ... 23.3 0.672 32\n",
571 | "3 1 89 66 ... 28.1 0.167 21\n",
572 | "4 0 137 40 ... 43.1 2.288 33\n",
573 | ".. ... ... ... ... ... ... ...\n",
574 | "763 10 101 76 ... 32.9 0.171 63\n",
575 | "764 2 122 70 ... 36.8 0.340 27\n",
576 | "765 5 121 72 ... 26.2 0.245 30\n",
577 | "766 1 126 60 ... 30.1 0.349 47\n",
578 | "767 1 93 70 ... 30.4 0.315 23\n",
579 | "\n",
580 | "[768 rows x 8 columns]\n"
581 | ],
582 | "name": "stdout"
583 | }
584 | ]
585 | },
586 | {
587 | "cell_type": "code",
588 | "metadata": {
589 | "colab": {
590 | "base_uri": "https://localhost:8080/"
591 | },
592 | "id": "AoxgTJAMrcCl",
593 | "outputId": "874e0649-124b-4f96-e899-b36d2b5b2680"
594 | },
595 | "source": [
596 | "print(Y)"
597 | ],
598 | "execution_count": null,
599 | "outputs": [
600 | {
601 | "output_type": "stream",
602 | "text": [
603 | "0 1\n",
604 | "1 0\n",
605 | "2 1\n",
606 | "3 0\n",
607 | "4 1\n",
608 | " ..\n",
609 | "763 0\n",
610 | "764 0\n",
611 | "765 0\n",
612 | "766 1\n",
613 | "767 0\n",
614 | "Name: Outcome, Length: 768, dtype: int64\n"
615 | ],
616 | "name": "stdout"
617 | }
618 | ]
619 | },
620 | {
621 | "cell_type": "markdown",
622 | "metadata": {
623 | "id": "umAbo_kqrlzI"
624 | },
625 | "source": [
626 | "Data Standardization"
627 | ]
628 | },
629 | {
630 | "cell_type": "code",
631 | "metadata": {
632 | "id": "njfM5X60rgnc"
633 | },
634 | "source": [
635 | "scaler = StandardScaler()"
636 | ],
637 | "execution_count": null,
638 | "outputs": []
639 | },
640 | {
641 | "cell_type": "code",
642 | "metadata": {
643 | "colab": {
644 | "base_uri": "https://localhost:8080/"
645 | },
646 | "id": "g0ai5ARbr53p",
647 | "outputId": "b9816a11-a5a6-4c93-844d-1caec72b1c0b"
648 | },
649 | "source": [
650 | "scaler.fit(X)"
651 | ],
652 | "execution_count": null,
653 | "outputs": [
654 | {
655 | "output_type": "execute_result",
656 | "data": {
657 | "text/plain": [
658 | "StandardScaler(copy=True, with_mean=True, with_std=True)"
659 | ]
660 | },
661 | "metadata": {
662 | "tags": []
663 | },
664 | "execution_count": 12
665 | }
666 | ]
667 | },
668 | {
669 | "cell_type": "code",
670 | "metadata": {
671 | "id": "FHxNwPuZr-kD"
672 | },
673 | "source": [
674 | "standardized_data = scaler.transform(X)"
675 | ],
676 | "execution_count": null,
677 | "outputs": []
678 | },
679 | {
680 | "cell_type": "code",
681 | "metadata": {
682 | "colab": {
683 | "base_uri": "https://localhost:8080/"
684 | },
685 | "id": "fjMwZ5x6sPUJ",
686 | "outputId": "2f0d65d3-01cd-48ed-f009-e998fd3f1174"
687 | },
688 | "source": [
689 | "print(standardized_data)"
690 | ],
691 | "execution_count": null,
692 | "outputs": [
693 | {
694 | "output_type": "stream",
695 | "text": [
696 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
697 | " 1.4259954 ]\n",
698 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
699 | " -0.19067191]\n",
700 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
701 | " -0.10558415]\n",
702 | " ...\n",
703 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
704 | " -0.27575966]\n",
705 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
706 | " 1.17073215]\n",
707 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
708 | " -0.87137393]]\n"
709 | ],
710 | "name": "stdout"
711 | }
712 | ]
713 | },
714 | {
715 | "cell_type": "code",
716 | "metadata": {
717 | "id": "ZxWSl4SGsRjE"
718 | },
719 | "source": [
720 | "X = standardized_data\n",
721 | "Y = diabetes_dataset['Outcome']"
722 | ],
723 | "execution_count": null,
724 | "outputs": []
725 | },
726 | {
727 | "cell_type": "code",
728 | "metadata": {
729 | "colab": {
730 | "base_uri": "https://localhost:8080/"
731 | },
732 | "id": "lhJF_7QjsjmP",
733 | "outputId": "b6abea49-bd15-46d0-ef64-e7f6f8e008cc"
734 | },
735 | "source": [
736 | "print(X)\n",
737 | "print(Y)"
738 | ],
739 | "execution_count": null,
740 | "outputs": [
741 | {
742 | "output_type": "stream",
743 | "text": [
744 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
745 | " 1.4259954 ]\n",
746 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
747 | " -0.19067191]\n",
748 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
749 | " -0.10558415]\n",
750 | " ...\n",
751 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
752 | " -0.27575966]\n",
753 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
754 | " 1.17073215]\n",
755 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
756 | " -0.87137393]]\n",
757 | "0 1\n",
758 | "1 0\n",
759 | "2 1\n",
760 | "3 0\n",
761 | "4 1\n",
762 | " ..\n",
763 | "763 0\n",
764 | "764 0\n",
765 | "765 0\n",
766 | "766 1\n",
767 | "767 0\n",
768 | "Name: Outcome, Length: 768, dtype: int64\n"
769 | ],
770 | "name": "stdout"
771 | }
772 | ]
773 | },
774 | {
775 | "cell_type": "markdown",
776 | "metadata": {
777 | "id": "A7CNR7qr2mYr"
778 | },
779 | "source": [
780 | "SPLITTING THE DATA INTO TRAINING DATA & TESTING DATA"
781 | ]
782 | },
783 | {
784 | "cell_type": "code",
785 | "metadata": {
786 | "id": "LqQB0FZg2rPE"
787 | },
788 | "source": [
789 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)"
790 | ],
791 | "execution_count": null,
792 | "outputs": []
793 | },
794 | {
795 | "cell_type": "code",
796 | "metadata": {
797 | "colab": {
798 | "base_uri": "https://localhost:8080/"
799 | },
800 | "id": "89adFns13NtL",
801 | "outputId": "9fe4ecf5-869a-4a16-d836-a500497fa497"
802 | },
803 | "source": [
804 | "print(X.shape, X_train.shape, X_test.shape)"
805 | ],
806 | "execution_count": null,
807 | "outputs": [
808 | {
809 | "output_type": "stream",
810 | "text": [
811 | "(768, 8) (614, 8) (154, 8)\n"
812 | ],
813 | "name": "stdout"
814 | }
815 | ]
816 | },
817 | {
818 | "cell_type": "code",
819 | "metadata": {
820 | "id": "qSCgPmrF3S_2"
821 | },
822 | "source": [],
823 | "execution_count": null,
824 | "outputs": []
825 | }
826 | ]
827 | }
--------------------------------------------------------------------------------
/4.8. Feature_extraction_of_Text_data_using_Tf_idf_Vectorizer.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | }
13 | },
14 | "cells": [
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {
18 | "id": "FaIBmnXCknPl"
19 | },
20 | "source": [
21 | "About the Dataset:\n",
22 | "\n",
23 | "1. id: unique id for a news article\n",
24 | "2. title: the title of a news article\n",
25 | "3. author: author of the news article\n",
26 | "4. text: the text of the article; could be incomplete\n",
27 | "5. label: a label that marks whether the news article is real or fake:\n",
28 | " 1: Fake news\n",
29 | " 0: real News\n",
30 | "\n",
31 | "\n",
32 | "\n"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {
38 | "id": "k399dHafvL5N"
39 | },
40 | "source": [
41 | "Importing the Dependencies"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "metadata": {
47 | "id": "-fetC5yqkPVe"
48 | },
49 | "source": [
50 | "import numpy as np\n",
51 | "import pandas as pd\n",
52 | "import re\n",
53 | "from nltk.corpus import stopwords\n",
54 | "from nltk.stem.porter import PorterStemmer\n",
55 | "from sklearn.feature_extraction.text import TfidfVectorizer"
56 | ],
57 | "execution_count": null,
58 | "outputs": []
59 | },
60 | {
61 | "cell_type": "code",
62 | "metadata": {
63 | "colab": {
64 | "base_uri": "https://localhost:8080/"
65 | },
66 | "id": "1AC1YpmGwIDw",
67 | "outputId": "98e60824-d963-4486-bf45-4c9a13bb3f33"
68 | },
69 | "source": [
70 | "import nltk\n",
71 | "nltk.download('stopwords')"
72 | ],
73 | "execution_count": null,
74 | "outputs": [
75 | {
76 | "output_type": "stream",
77 | "text": [
78 | "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
79 | "[nltk_data] Unzipping corpora/stopwords.zip.\n"
80 | ],
81 | "name": "stdout"
82 | },
83 | {
84 | "output_type": "execute_result",
85 | "data": {
86 | "text/plain": [
87 | "True"
88 | ]
89 | },
90 | "metadata": {
91 | "tags": []
92 | },
93 | "execution_count": 2
94 | }
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "metadata": {
100 | "colab": {
101 | "base_uri": "https://localhost:8080/"
102 | },
103 | "id": "dxIOt3DowpUR",
104 | "outputId": "96f9e591-c086-4958-f15d-bf6d9cf16fcb"
105 | },
106 | "source": [
107 | "# printing the stopwords in English\n",
108 | "print(stopwords.words('english'))"
109 | ],
110 | "execution_count": null,
111 | "outputs": [
112 | {
113 | "output_type": "stream",
114 | "text": [
115 | "['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', \"you're\", \"you've\", \"you'll\", \"you'd\", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', \"she's\", 'her', 'hers', 'herself', 'it', \"it's\", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', \"that'll\", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', \"don't\", 'should', \"should've\", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', \"aren't\", 'couldn', \"couldn't\", 'didn', \"didn't\", 'doesn', \"doesn't\", 'hadn', \"hadn't\", 'hasn', \"hasn't\", 'haven', \"haven't\", 'isn', \"isn't\", 'ma', 'mightn', \"mightn't\", 'mustn', \"mustn't\", 'needn', \"needn't\", 'shan', \"shan't\", 'shouldn', \"shouldn't\", 'wasn', \"wasn't\", 'weren', \"weren't\", 'won', \"won't\", 'wouldn', \"wouldn't\"]\n"
116 | ],
117 | "name": "stdout"
118 | }
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "metadata": {
124 | "id": "NjeGd1CLw_6R"
125 | },
126 | "source": [
127 | "Data Pre-processing"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "metadata": {
133 | "id": "nCGcpu_1wzLw"
134 | },
135 | "source": [
136 | "# loading the dataset to a pandas DataFrame\n",
137 | "news_dataset = pd.read_csv('/content/train.csv')"
138 | ],
139 | "execution_count": null,
140 | "outputs": []
141 | },
142 | {
143 | "cell_type": "code",
144 | "metadata": {
145 | "colab": {
146 | "base_uri": "https://localhost:8080/"
147 | },
148 | "id": "aRgmbYSbxV4-",
149 | "outputId": "dfa04f84-b122-45bb-e561-967c49dd9213"
150 | },
151 | "source": [
152 | "news_dataset.shape"
153 | ],
154 | "execution_count": null,
155 | "outputs": [
156 | {
157 | "output_type": "execute_result",
158 | "data": {
159 | "text/plain": [
160 | "(20800, 5)"
161 | ]
162 | },
163 | "metadata": {
164 | "tags": []
165 | },
166 | "execution_count": 5
167 | }
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "metadata": {
173 | "colab": {
174 | "base_uri": "https://localhost:8080/",
175 | "height": 196
176 | },
177 | "id": "jjJ1eB6RxZaS",
178 | "outputId": "e37737d1-94ca-479d-b59a-982c8d1fd666"
179 | },
180 | "source": [
181 | "# print the first 5 rows of the dataframe\n",
182 | "news_dataset.head()"
183 | ],
184 | "execution_count": null,
185 | "outputs": [
186 | {
187 | "output_type": "execute_result",
188 | "data": {
189 | "text/html": [
190 | "\n",
191 | "\n",
204 | "
\n",
205 | " \n",
206 | " \n",
207 | " | \n",
208 | " id | \n",
209 | " title | \n",
210 | " author | \n",
211 | " text | \n",
212 | " label | \n",
213 | "
\n",
214 | " \n",
215 | " \n",
216 | " \n",
217 | " 0 | \n",
218 | " 0 | \n",
219 | " House Dem Aide: We Didn’t Even See Comey’s Let... | \n",
220 | " Darrell Lucus | \n",
221 | " House Dem Aide: We Didn’t Even See Comey’s Let... | \n",
222 | " 1 | \n",
223 | "
\n",
224 | " \n",
225 | " 1 | \n",
226 | " 1 | \n",
227 | " FLYNN: Hillary Clinton, Big Woman on Campus - ... | \n",
228 | " Daniel J. Flynn | \n",
229 | " Ever get the feeling your life circles the rou... | \n",
230 | " 0 | \n",
231 | "
\n",
232 | " \n",
233 | " 2 | \n",
234 | " 2 | \n",
235 | " Why the Truth Might Get You Fired | \n",
236 | " Consortiumnews.com | \n",
237 | " Why the Truth Might Get You Fired October 29, ... | \n",
238 | " 1 | \n",
239 | "
\n",
240 | " \n",
241 | " 3 | \n",
242 | " 3 | \n",
243 | " 15 Civilians Killed In Single US Airstrike Hav... | \n",
244 | " Jessica Purkiss | \n",
245 | " Videos 15 Civilians Killed In Single US Airstr... | \n",
246 | " 1 | \n",
247 | "
\n",
248 | " \n",
249 | " 4 | \n",
250 | " 4 | \n",
251 | " Iranian woman jailed for fictional unpublished... | \n",
252 | " Howard Portnoy | \n",
253 | " Print \\nAn Iranian woman has been sentenced to... | \n",
254 | " 1 | \n",
255 | "
\n",
256 | " \n",
257 | "
\n",
258 | "
"
259 | ],
260 | "text/plain": [
261 | " id ... label\n",
262 | "0 0 ... 1\n",
263 | "1 1 ... 0\n",
264 | "2 2 ... 1\n",
265 | "3 3 ... 1\n",
266 | "4 4 ... 1\n",
267 | "\n",
268 | "[5 rows x 5 columns]"
269 | ]
270 | },
271 | "metadata": {
272 | "tags": []
273 | },
274 | "execution_count": 6
275 | }
276 | ]
277 | },
278 | {
279 | "cell_type": "code",
280 | "metadata": {
281 | "colab": {
282 | "base_uri": "https://localhost:8080/"
283 | },
284 | "id": "QYkDi4SwxlKi",
285 | "outputId": "85e27df0-e210-4d0b-c8df-167199c5e108"
286 | },
287 | "source": [
288 | "# counting the number of missing values in the dataset\n",
289 | "news_dataset.isnull().sum()"
290 | ],
291 | "execution_count": null,
292 | "outputs": [
293 | {
294 | "output_type": "execute_result",
295 | "data": {
296 | "text/plain": [
297 | "id 0\n",
298 | "title 558\n",
299 | "author 1957\n",
300 | "text 39\n",
301 | "label 0\n",
302 | "dtype: int64"
303 | ]
304 | },
305 | "metadata": {
306 | "tags": []
307 | },
308 | "execution_count": 7
309 | }
310 | ]
311 | },
312 | {
313 | "cell_type": "code",
314 | "metadata": {
315 | "id": "Mc04lQrhx57m"
316 | },
317 | "source": [
318 | "# replacing the null values with empty string\n",
319 | "news_dataset = news_dataset.fillna('')"
320 | ],
321 | "execution_count": null,
322 | "outputs": []
323 | },
324 | {
325 | "cell_type": "code",
326 | "metadata": {
327 | "id": "H7TZgHszygxj"
328 | },
329 | "source": [
330 | "# merging the author name and news title\n",
331 | "news_dataset['content'] = news_dataset['author']+' '+news_dataset['title']"
332 | ],
333 | "execution_count": null,
334 | "outputs": []
335 | },
336 | {
337 | "cell_type": "code",
338 | "metadata": {
339 | "colab": {
340 | "base_uri": "https://localhost:8080/"
341 | },
342 | "id": "cbF6GBBpzBey",
343 | "outputId": "52ea9eee-2c59-4831-c676-8b667b3f6496"
344 | },
345 | "source": [
346 | "print(news_dataset['content'])"
347 | ],
348 | "execution_count": null,
349 | "outputs": [
350 | {
351 | "output_type": "stream",
352 | "text": [
353 | "0 Darrell Lucus House Dem Aide: We Didn’t Even S...\n",
354 | "1 Daniel J. Flynn FLYNN: Hillary Clinton, Big Wo...\n",
355 | "2 Consortiumnews.com Why the Truth Might Get You...\n",
356 | "3 Jessica Purkiss 15 Civilians Killed In Single ...\n",
357 | "4 Howard Portnoy Iranian woman jailed for fictio...\n",
358 | " ... \n",
359 | "20795 Jerome Hudson Rapper T.I.: Trump a ’Poster Chi...\n",
360 | "20796 Benjamin Hoffman N.F.L. Playoffs: Schedule, Ma...\n",
361 | "20797 Michael J. de la Merced and Rachel Abrams Macy...\n",
362 | "20798 Alex Ansary NATO, Russia To Hold Parallel Exer...\n",
363 | "20799 David Swanson What Keeps the F-35 Alive\n",
364 | "Name: content, Length: 20800, dtype: object\n"
365 | ],
366 | "name": "stdout"
367 | }
368 | ]
369 | },
370 | {
371 | "cell_type": "code",
372 | "metadata": {
373 | "id": "LfBtAvLtzEo6"
374 | },
375 | "source": [
376 | "# separating the data & label\n",
377 | "X = news_dataset.drop(columns='label', axis=1)\n",
378 | "Y = news_dataset['label']"
379 | ],
380 | "execution_count": null,
381 | "outputs": []
382 | },
383 | {
384 | "cell_type": "code",
385 | "metadata": {
386 | "colab": {
387 | "base_uri": "https://localhost:8080/"
388 | },
389 | "id": "oHPBr540zl1h",
390 | "outputId": "5940cef9-760c-4030-ae66-7ff51787ec2e"
391 | },
392 | "source": [
393 | "print(X)\n",
394 | "print(Y)"
395 | ],
396 | "execution_count": null,
397 | "outputs": [
398 | {
399 | "output_type": "stream",
400 | "text": [
401 | " id ... content\n",
402 | "0 0 ... Darrell Lucus House Dem Aide: We Didn’t Even S...\n",
403 | "1 1 ... Daniel J. Flynn FLYNN: Hillary Clinton, Big Wo...\n",
404 | "2 2 ... Consortiumnews.com Why the Truth Might Get You...\n",
405 | "3 3 ... Jessica Purkiss 15 Civilians Killed In Single ...\n",
406 | "4 4 ... Howard Portnoy Iranian woman jailed for fictio...\n",
407 | "... ... ... ...\n",
408 | "20795 20795 ... Jerome Hudson Rapper T.I.: Trump a ’Poster Chi...\n",
409 | "20796 20796 ... Benjamin Hoffman N.F.L. Playoffs: Schedule, Ma...\n",
410 | "20797 20797 ... Michael J. de la Merced and Rachel Abrams Macy...\n",
411 | "20798 20798 ... Alex Ansary NATO, Russia To Hold Parallel Exer...\n",
412 | "20799 20799 ... David Swanson What Keeps the F-35 Alive\n",
413 | "\n",
414 | "[20800 rows x 5 columns]\n",
415 | "0 1\n",
416 | "1 0\n",
417 | "2 1\n",
418 | "3 1\n",
419 | "4 1\n",
420 | " ..\n",
421 | "20795 0\n",
422 | "20796 0\n",
423 | "20797 0\n",
424 | "20798 1\n",
425 | "20799 1\n",
426 | "Name: label, Length: 20800, dtype: int64\n"
427 | ],
428 | "name": "stdout"
429 | }
430 | ]
431 | },
432 | {
433 | "cell_type": "markdown",
434 | "metadata": {
435 | "id": "0NwFcpqcz37a"
436 | },
437 | "source": [
438 | "Stemming:\n",
439 | "\n",
440 | "Stemming is the process of reducing a word to its Root word\n",
441 | "\n",
442 | "example:\n",
443 | "actor, actress, acting --> act"
444 | ]
445 | },
446 | {
447 | "cell_type": "code",
448 | "metadata": {
449 | "id": "Ga_DaZxhzoWM"
450 | },
451 | "source": [
452 | "port_stem = PorterStemmer()"
453 | ],
454 | "execution_count": null,
455 | "outputs": []
456 | },
457 | {
458 | "cell_type": "code",
459 | "metadata": {
460 | "id": "zY-n0dCh0e-y"
461 | },
462 | "source": [
463 | "def stemming(content):\n",
464 | " stemmed_content = re.sub('[^a-zA-Z]',' ',content)\n",
465 | " stemmed_content = stemmed_content.lower()\n",
466 | " stemmed_content = stemmed_content.split()\n",
467 | " stemmed_content = [port_stem.stem(word) for word in stemmed_content \n",
468 | " if not word in stopwords.words('english')]\n",
469 | " stemmed_content = ' '.join(stemmed_content)\n",
470 | " return stemmed_content"
471 | ],
472 | "execution_count": null,
473 | "outputs": []
474 | },
475 | {
476 | "cell_type": "code",
477 | "metadata": {
478 | "id": "MBUIk4c94yTL"
479 | },
480 | "source": [
481 | "news_dataset['content'] = news_dataset['content'].apply(stemming)"
482 | ],
483 | "execution_count": null,
484 | "outputs": []
485 | },
486 | {
487 | "cell_type": "code",
488 | "metadata": {
489 | "colab": {
490 | "base_uri": "https://localhost:8080/"
491 | },
492 | "id": "xmwK-zyO5Stg",
493 | "outputId": "58ca7971-cb4c-44de-aa95-d49f85c3a748"
494 | },
495 | "source": [
496 | "print(news_dataset['content'])"
497 | ],
498 | "execution_count": null,
499 | "outputs": [
500 | {
501 | "output_type": "stream",
502 | "text": [
503 | "0 darrel lucu hous dem aid even see comey letter...\n",
504 | "1 daniel j flynn flynn hillari clinton big woman...\n",
505 | "2 consortiumnew com truth might get fire\n",
506 | "3 jessica purkiss civilian kill singl us airstri...\n",
507 | "4 howard portnoy iranian woman jail fiction unpu...\n",
508 | " ... \n",
509 | "20795 jerom hudson rapper trump poster child white s...\n",
510 | "20796 benjamin hoffman n f l playoff schedul matchup...\n",
511 | "20797 michael j de la merc rachel abram maci said re...\n",
512 | "20798 alex ansari nato russia hold parallel exercis ...\n",
513 | "20799 david swanson keep f aliv\n",
514 | "Name: content, Length: 20800, dtype: object\n"
515 | ],
516 | "name": "stdout"
517 | }
518 | ]
519 | },
520 | {
521 | "cell_type": "code",
522 | "metadata": {
523 | "id": "5ZIidnta5k5h"
524 | },
525 | "source": [
526 | "#separating the data and label\n",
527 | "X = news_dataset['content'].values\n",
528 | "Y = news_dataset['label'].values"
529 | ],
530 | "execution_count": null,
531 | "outputs": []
532 | },
533 | {
534 | "cell_type": "code",
535 | "metadata": {
536 | "colab": {
537 | "base_uri": "https://localhost:8080/"
538 | },
539 | "id": "3nA_SBZX6BeH",
540 | "outputId": "c14ccd15-cc79-4ecc-ef24-f33fdd4d3a52"
541 | },
542 | "source": [
543 | "print(X)"
544 | ],
545 | "execution_count": null,
546 | "outputs": [
547 | {
548 | "output_type": "stream",
549 | "text": [
550 | "['darrel lucu hous dem aid even see comey letter jason chaffetz tweet'\n",
551 | " 'daniel j flynn flynn hillari clinton big woman campu breitbart'\n",
552 | " 'consortiumnew com truth might get fire' ...\n",
553 | " 'michael j de la merc rachel abram maci said receiv takeov approach hudson bay new york time'\n",
554 | " 'alex ansari nato russia hold parallel exercis balkan'\n",
555 | " 'david swanson keep f aliv']\n"
556 | ],
557 | "name": "stdout"
558 | }
559 | ]
560 | },
561 | {
562 | "cell_type": "code",
563 | "metadata": {
564 | "colab": {
565 | "base_uri": "https://localhost:8080/"
566 | },
567 | "id": "NgkFGXkg6HS4",
568 | "outputId": "57984eb9-1d8c-4090-abfe-7dc7f3358a71"
569 | },
570 | "source": [
571 | "print(Y)"
572 | ],
573 | "execution_count": null,
574 | "outputs": [
575 | {
576 | "output_type": "stream",
577 | "text": [
578 | "[1 0 1 ... 0 1 1]\n"
579 | ],
580 | "name": "stdout"
581 | }
582 | ]
583 | },
584 | {
585 | "cell_type": "code",
586 | "metadata": {
587 | "colab": {
588 | "base_uri": "https://localhost:8080/"
589 | },
590 | "id": "Iu2ZEBkL6QTm",
591 | "outputId": "a16d34f4-7e56-4458-8e77-3546fcff7124"
592 | },
593 | "source": [
594 | "Y.shape"
595 | ],
596 | "execution_count": null,
597 | "outputs": [
598 | {
599 | "output_type": "execute_result",
600 | "data": {
601 | "text/plain": [
602 | "(20800,)"
603 | ]
604 | },
605 | "metadata": {
606 | "tags": []
607 | },
608 | "execution_count": 20
609 | }
610 | ]
611 | },
612 | {
613 | "cell_type": "markdown",
614 | "metadata": {
615 | "id": "-UloskM52Fc4"
616 | },
617 | "source": [
618 | "Tf-Idf"
619 | ]
620 | },
621 | {
622 | "cell_type": "code",
623 | "metadata": {
624 | "id": "8cESLGsJxxDA"
625 | },
626 | "source": [
627 | "# convert the textual data to Feature Vectors\n",
628 | "vectorizer = TfidfVectorizer()"
629 | ],
630 | "execution_count": null,
631 | "outputs": []
632 | },
633 | {
634 | "cell_type": "code",
635 | "metadata": {
636 | "id": "Q_LP0Hwr2SfR"
637 | },
638 | "source": [
639 | "vectorizer.fit(X)\n",
640 | "\n",
641 | "X = vectorizer.transform(X)"
642 | ],
643 | "execution_count": null,
644 | "outputs": []
645 | },
646 | {
647 | "cell_type": "code",
648 | "metadata": {
649 | "colab": {
650 | "base_uri": "https://localhost:8080/"
651 | },
652 | "id": "5qW7RTiy2jUH",
653 | "outputId": "2f62b77c-56d4-4abd-c011-125c60be753f"
654 | },
655 | "source": [
656 | "print(X)"
657 | ],
658 | "execution_count": null,
659 | "outputs": [
660 | {
661 | "output_type": "stream",
662 | "text": [
663 | " (0, 15686)\t0.28485063562728646\n",
664 | " (0, 13473)\t0.2565896679337957\n",
665 | " (0, 8909)\t0.3635963806326075\n",
666 | " (0, 8630)\t0.29212514087043684\n",
667 | " (0, 7692)\t0.24785219520671603\n",
668 | " (0, 7005)\t0.21874169089359144\n",
669 | " (0, 4973)\t0.233316966909351\n",
670 | " (0, 3792)\t0.2705332480845492\n",
671 | " (0, 3600)\t0.3598939188262559\n",
672 | " (0, 2959)\t0.2468450128533713\n",
673 | " (0, 2483)\t0.3676519686797209\n",
674 | " (0, 267)\t0.27010124977708766\n",
675 | " (1, 16799)\t0.30071745655510157\n",
676 | " (1, 6816)\t0.1904660198296849\n",
677 | " (1, 5503)\t0.7143299355715573\n",
678 | " (1, 3568)\t0.26373768806048464\n",
679 | " (1, 2813)\t0.19094574062359204\n",
680 | " (1, 2223)\t0.3827320386859759\n",
681 | " (1, 1894)\t0.15521974226349364\n",
682 | " (1, 1497)\t0.2939891562094648\n",
683 | " (2, 15611)\t0.41544962664721613\n",
684 | " (2, 9620)\t0.49351492943649944\n",
685 | " (2, 5968)\t0.3474613386728292\n",
686 | " (2, 5389)\t0.3866530551182615\n",
687 | " (2, 3103)\t0.46097489583229645\n",
688 | " :\t:\n",
689 | " (20797, 13122)\t0.2482526352197606\n",
690 | " (20797, 12344)\t0.27263457663336677\n",
691 | " (20797, 12138)\t0.24778257724396507\n",
692 | " (20797, 10306)\t0.08038079000566466\n",
693 | " (20797, 9588)\t0.174553480255222\n",
694 | " (20797, 9518)\t0.2954204003420313\n",
695 | " (20797, 8988)\t0.36160868928090795\n",
696 | " (20797, 8364)\t0.22322585870464118\n",
697 | " (20797, 7042)\t0.21799048897828688\n",
698 | " (20797, 3643)\t0.21155500613623743\n",
699 | " (20797, 1287)\t0.33538056804139865\n",
700 | " (20797, 699)\t0.30685846079762347\n",
701 | " (20797, 43)\t0.29710241860700626\n",
702 | " (20798, 13046)\t0.22363267488270608\n",
703 | " (20798, 11052)\t0.4460515589182236\n",
704 | " (20798, 10177)\t0.3192496370187028\n",
705 | " (20798, 6889)\t0.32496285694299426\n",
706 | " (20798, 5032)\t0.4083701450239529\n",
707 | " (20798, 1125)\t0.4460515589182236\n",
708 | " (20798, 588)\t0.3112141524638974\n",
709 | " (20798, 350)\t0.28446937819072576\n",
710 | " (20799, 14852)\t0.5677577267055112\n",
711 | " (20799, 8036)\t0.45983893273780013\n",
712 | " (20799, 3623)\t0.37927626273066584\n",
713 | " (20799, 377)\t0.5677577267055112\n"
714 | ],
715 | "name": "stdout"
716 | }
717 | ]
718 | }
719 | ]
720 | }
--------------------------------------------------------------------------------
/4.9. Numerical_Dataset_Pre_Processing_Use_Case.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | },
12 | "language_info": {
13 | "name": "python"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "id": "hu0h4VviKzRu"
21 | },
22 | "source": [
23 | "Importing the Dependencies"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "metadata": {
29 | "id": "R9KKo3H1HzTT"
30 | },
31 | "source": [
32 | "import numpy as np\n",
33 | "import pandas as pd\n",
34 | "from sklearn.preprocessing import StandardScaler\n",
35 | "from sklearn.model_selection import train_test_split"
36 | ],
37 | "execution_count": null,
38 | "outputs": []
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {
43 | "id": "na-dUNZPLMgV"
44 | },
45 | "source": [
46 | "Data Collection & Pre-Processing"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "metadata": {
52 | "id": "iiOFXfCcLKeW"
53 | },
54 | "source": [
55 | "# loading the data from csv file to a pandas dataframe\n",
56 | "diabetes_data = pd.read_csv('/content/diabetes.csv')"
57 | ],
58 | "execution_count": null,
59 | "outputs": []
60 | },
61 | {
62 | "cell_type": "code",
63 | "metadata": {
64 | "colab": {
65 | "base_uri": "https://localhost:8080/",
66 | "height": 196
67 | },
68 | "id": "UsdxwqGwLfFK",
69 | "outputId": "cbd7646d-0290-4581-a293-3393dcea51f4"
70 | },
71 | "source": [
72 | "# first 5 rows of the dataframe\n",
73 | "diabetes_data.head()"
74 | ],
75 | "execution_count": null,
76 | "outputs": [
77 | {
78 | "output_type": "execute_result",
79 | "data": {
80 | "text/html": [
81 | "\n",
82 | "\n",
95 | "
\n",
96 | " \n",
97 | " \n",
98 | " | \n",
99 | " Pregnancies | \n",
100 | " Glucose | \n",
101 | " BloodPressure | \n",
102 | " SkinThickness | \n",
103 | " Insulin | \n",
104 | " BMI | \n",
105 | " DiabetesPedigreeFunction | \n",
106 | " Age | \n",
107 | " Outcome | \n",
108 | "
\n",
109 | " \n",
110 | " \n",
111 | " \n",
112 | " 0 | \n",
113 | " 6 | \n",
114 | " 148 | \n",
115 | " 72 | \n",
116 | " 35 | \n",
117 | " 0 | \n",
118 | " 33.6 | \n",
119 | " 0.627 | \n",
120 | " 50 | \n",
121 | " 1 | \n",
122 | "
\n",
123 | " \n",
124 | " 1 | \n",
125 | " 1 | \n",
126 | " 85 | \n",
127 | " 66 | \n",
128 | " 29 | \n",
129 | " 0 | \n",
130 | " 26.6 | \n",
131 | " 0.351 | \n",
132 | " 31 | \n",
133 | " 0 | \n",
134 | "
\n",
135 | " \n",
136 | " 2 | \n",
137 | " 8 | \n",
138 | " 183 | \n",
139 | " 64 | \n",
140 | " 0 | \n",
141 | " 0 | \n",
142 | " 23.3 | \n",
143 | " 0.672 | \n",
144 | " 32 | \n",
145 | " 1 | \n",
146 | "
\n",
147 | " \n",
148 | " 3 | \n",
149 | " 1 | \n",
150 | " 89 | \n",
151 | " 66 | \n",
152 | " 23 | \n",
153 | " 94 | \n",
154 | " 28.1 | \n",
155 | " 0.167 | \n",
156 | " 21 | \n",
157 | " 0 | \n",
158 | "
\n",
159 | " \n",
160 | " 4 | \n",
161 | " 0 | \n",
162 | " 137 | \n",
163 | " 40 | \n",
164 | " 35 | \n",
165 | " 168 | \n",
166 | " 43.1 | \n",
167 | " 2.288 | \n",
168 | " 33 | \n",
169 | " 1 | \n",
170 | "
\n",
171 | " \n",
172 | "
\n",
173 | "
"
174 | ],
175 | "text/plain": [
176 | " Pregnancies Glucose BloodPressure ... DiabetesPedigreeFunction Age Outcome\n",
177 | "0 6 148 72 ... 0.627 50 1\n",
178 | "1 1 85 66 ... 0.351 31 0\n",
179 | "2 8 183 64 ... 0.672 32 1\n",
180 | "3 1 89 66 ... 0.167 21 0\n",
181 | "4 0 137 40 ... 2.288 33 1\n",
182 | "\n",
183 | "[5 rows x 9 columns]"
184 | ]
185 | },
186 | "metadata": {
187 | "tags": []
188 | },
189 | "execution_count": 3
190 | }
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "metadata": {
196 | "colab": {
197 | "base_uri": "https://localhost:8080/"
198 | },
199 | "id": "8SZEYDmELlsN",
200 | "outputId": "9ba2d65d-ad25-4c96-9a25-203c92621729"
201 | },
202 | "source": [
203 | "# number of rows & columns\n",
204 | "diabetes_data.shape"
205 | ],
206 | "execution_count": null,
207 | "outputs": [
208 | {
209 | "output_type": "execute_result",
210 | "data": {
211 | "text/plain": [
212 | "(768, 9)"
213 | ]
214 | },
215 | "metadata": {
216 | "tags": []
217 | },
218 | "execution_count": 4
219 | }
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "metadata": {
225 | "colab": {
226 | "base_uri": "https://localhost:8080/",
227 | "height": 286
228 | },
229 | "id": "jR6mgreIL-JL",
230 | "outputId": "3cd34a55-8679-490f-fc2c-7ce50f8133f7"
231 | },
232 | "source": [
233 | "diabetes_data.describe()"
234 | ],
235 | "execution_count": null,
236 | "outputs": [
237 | {
238 | "output_type": "execute_result",
239 | "data": {
240 | "text/html": [
241 | "\n",
242 | "\n",
255 | "
\n",
256 | " \n",
257 | " \n",
258 | " | \n",
259 | " Pregnancies | \n",
260 | " Glucose | \n",
261 | " BloodPressure | \n",
262 | " SkinThickness | \n",
263 | " Insulin | \n",
264 | " BMI | \n",
265 | " DiabetesPedigreeFunction | \n",
266 | " Age | \n",
267 | " Outcome | \n",
268 | "
\n",
269 | " \n",
270 | " \n",
271 | " \n",
272 | " count | \n",
273 | " 768.000000 | \n",
274 | " 768.000000 | \n",
275 | " 768.000000 | \n",
276 | " 768.000000 | \n",
277 | " 768.000000 | \n",
278 | " 768.000000 | \n",
279 | " 768.000000 | \n",
280 | " 768.000000 | \n",
281 | " 768.000000 | \n",
282 | "
\n",
283 | " \n",
284 | " mean | \n",
285 | " 3.845052 | \n",
286 | " 120.894531 | \n",
287 | " 69.105469 | \n",
288 | " 20.536458 | \n",
289 | " 79.799479 | \n",
290 | " 31.992578 | \n",
291 | " 0.471876 | \n",
292 | " 33.240885 | \n",
293 | " 0.348958 | \n",
294 | "
\n",
295 | " \n",
296 | " std | \n",
297 | " 3.369578 | \n",
298 | " 31.972618 | \n",
299 | " 19.355807 | \n",
300 | " 15.952218 | \n",
301 | " 115.244002 | \n",
302 | " 7.884160 | \n",
303 | " 0.331329 | \n",
304 | " 11.760232 | \n",
305 | " 0.476951 | \n",
306 | "
\n",
307 | " \n",
308 | " min | \n",
309 | " 0.000000 | \n",
310 | " 0.000000 | \n",
311 | " 0.000000 | \n",
312 | " 0.000000 | \n",
313 | " 0.000000 | \n",
314 | " 0.000000 | \n",
315 | " 0.078000 | \n",
316 | " 21.000000 | \n",
317 | " 0.000000 | \n",
318 | "
\n",
319 | " \n",
320 | " 25% | \n",
321 | " 1.000000 | \n",
322 | " 99.000000 | \n",
323 | " 62.000000 | \n",
324 | " 0.000000 | \n",
325 | " 0.000000 | \n",
326 | " 27.300000 | \n",
327 | " 0.243750 | \n",
328 | " 24.000000 | \n",
329 | " 0.000000 | \n",
330 | "
\n",
331 | " \n",
332 | " 50% | \n",
333 | " 3.000000 | \n",
334 | " 117.000000 | \n",
335 | " 72.000000 | \n",
336 | " 23.000000 | \n",
337 | " 30.500000 | \n",
338 | " 32.000000 | \n",
339 | " 0.372500 | \n",
340 | " 29.000000 | \n",
341 | " 0.000000 | \n",
342 | "
\n",
343 | " \n",
344 | " 75% | \n",
345 | " 6.000000 | \n",
346 | " 140.250000 | \n",
347 | " 80.000000 | \n",
348 | " 32.000000 | \n",
349 | " 127.250000 | \n",
350 | " 36.600000 | \n",
351 | " 0.626250 | \n",
352 | " 41.000000 | \n",
353 | " 1.000000 | \n",
354 | "
\n",
355 | " \n",
356 | " max | \n",
357 | " 17.000000 | \n",
358 | " 199.000000 | \n",
359 | " 122.000000 | \n",
360 | " 99.000000 | \n",
361 | " 846.000000 | \n",
362 | " 67.100000 | \n",
363 | " 2.420000 | \n",
364 | " 81.000000 | \n",
365 | " 1.000000 | \n",
366 | "
\n",
367 | " \n",
368 | "
\n",
369 | "
"
370 | ],
371 | "text/plain": [
372 | " Pregnancies Glucose ... Age Outcome\n",
373 | "count 768.000000 768.000000 ... 768.000000 768.000000\n",
374 | "mean 3.845052 120.894531 ... 33.240885 0.348958\n",
375 | "std 3.369578 31.972618 ... 11.760232 0.476951\n",
376 | "min 0.000000 0.000000 ... 21.000000 0.000000\n",
377 | "25% 1.000000 99.000000 ... 24.000000 0.000000\n",
378 | "50% 3.000000 117.000000 ... 29.000000 0.000000\n",
379 | "75% 6.000000 140.250000 ... 41.000000 1.000000\n",
380 | "max 17.000000 199.000000 ... 81.000000 1.000000\n",
381 | "\n",
382 | "[8 rows x 9 columns]"
383 | ]
384 | },
385 | "metadata": {
386 | "tags": []
387 | },
388 | "execution_count": 5
389 | }
390 | ]
391 | },
392 | {
393 | "cell_type": "markdown",
394 | "metadata": {
395 | "id": "9goF-f7bMXLe"
396 | },
397 | "source": [
398 | "Separating Features and Target"
399 | ]
400 | },
401 | {
402 | "cell_type": "code",
403 | "metadata": {
404 | "id": "we2FkbpkMI1b"
405 | },
406 | "source": [
407 | "X = diabetes_data.drop(columns='Outcome', axis =1)\n",
408 | "Y = diabetes_data['Outcome']"
409 | ],
410 | "execution_count": null,
411 | "outputs": []
412 | },
413 | {
414 | "cell_type": "code",
415 | "metadata": {
416 | "colab": {
417 | "base_uri": "https://localhost:8080/"
418 | },
419 | "id": "kv-xFACDMo_t",
420 | "outputId": "208f9f4e-c615-47e2-8887-28d759d17e8d"
421 | },
422 | "source": [
423 | "print(X)"
424 | ],
425 | "execution_count": null,
426 | "outputs": [
427 | {
428 | "output_type": "stream",
429 | "text": [
430 | " Pregnancies Glucose BloodPressure ... BMI DiabetesPedigreeFunction Age\n",
431 | "0 6 148 72 ... 33.6 0.627 50\n",
432 | "1 1 85 66 ... 26.6 0.351 31\n",
433 | "2 8 183 64 ... 23.3 0.672 32\n",
434 | "3 1 89 66 ... 28.1 0.167 21\n",
435 | "4 0 137 40 ... 43.1 2.288 33\n",
436 | ".. ... ... ... ... ... ... ...\n",
437 | "763 10 101 76 ... 32.9 0.171 63\n",
438 | "764 2 122 70 ... 36.8 0.340 27\n",
439 | "765 5 121 72 ... 26.2 0.245 30\n",
440 | "766 1 126 60 ... 30.1 0.349 47\n",
441 | "767 1 93 70 ... 30.4 0.315 23\n",
442 | "\n",
443 | "[768 rows x 8 columns]\n"
444 | ],
445 | "name": "stdout"
446 | }
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "metadata": {
452 | "colab": {
453 | "base_uri": "https://localhost:8080/"
454 | },
455 | "id": "rv6wGnC6MqYu",
456 | "outputId": "f27ffa9f-db2f-4341-ae46-3447f0a1b1e8"
457 | },
458 | "source": [
459 | "print(Y)"
460 | ],
461 | "execution_count": null,
462 | "outputs": [
463 | {
464 | "output_type": "stream",
465 | "text": [
466 | "0 1\n",
467 | "1 0\n",
468 | "2 1\n",
469 | "3 0\n",
470 | "4 1\n",
471 | " ..\n",
472 | "763 0\n",
473 | "764 0\n",
474 | "765 0\n",
475 | "766 1\n",
476 | "767 0\n",
477 | "Name: Outcome, Length: 768, dtype: int64\n"
478 | ],
479 | "name": "stdout"
480 | }
481 | ]
482 | },
483 | {
484 | "cell_type": "markdown",
485 | "metadata": {
486 | "id": "7lHxwsgFMvPp"
487 | },
488 | "source": [
489 | "0 --> Non - Diabetic\n",
490 | "\n",
491 | "1 --> Diabetic"
492 | ]
493 | },
494 | {
495 | "cell_type": "markdown",
496 | "metadata": {
497 | "id": "LCSwJAQrNIC5"
498 | },
499 | "source": [
500 | "Data Standardization"
501 | ]
502 | },
503 | {
504 | "cell_type": "code",
505 | "metadata": {
506 | "id": "u4_i4PvqMs2N"
507 | },
508 | "source": [
509 | "scaler = StandardScaler()"
510 | ],
511 | "execution_count": null,
512 | "outputs": []
513 | },
514 | {
515 | "cell_type": "code",
516 | "metadata": {
517 | "id": "FjqSqB1VNdBC"
518 | },
519 | "source": [
520 | "standardized_data = scaler.fit_transform(X)"
521 | ],
522 | "execution_count": null,
523 | "outputs": []
524 | },
525 | {
526 | "cell_type": "code",
527 | "metadata": {
528 | "colab": {
529 | "base_uri": "https://localhost:8080/"
530 | },
531 | "id": "EWDTbdqWNlIR",
532 | "outputId": "bffc90af-1a62-4322-8c46-8a291f42c600"
533 | },
534 | "source": [
535 | "print(standardized_data)"
536 | ],
537 | "execution_count": null,
538 | "outputs": [
539 | {
540 | "output_type": "stream",
541 | "text": [
542 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
543 | " 1.4259954 ]\n",
544 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
545 | " -0.19067191]\n",
546 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
547 | " -0.10558415]\n",
548 | " ...\n",
549 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
550 | " -0.27575966]\n",
551 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
552 | " 1.17073215]\n",
553 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
554 | " -0.87137393]]\n"
555 | ],
556 | "name": "stdout"
557 | }
558 | ]
559 | },
560 | {
561 | "cell_type": "code",
562 | "metadata": {
563 | "id": "_Ne5Lr4PNy-9"
564 | },
565 | "source": [
566 | "X = standardized_data"
567 | ],
568 | "execution_count": null,
569 | "outputs": []
570 | },
571 | {
572 | "cell_type": "code",
573 | "metadata": {
574 | "colab": {
575 | "base_uri": "https://localhost:8080/"
576 | },
577 | "id": "md1uJwDON5Bz",
578 | "outputId": "62b14f3e-402a-404d-91d3-68c5e2eb56b2"
579 | },
580 | "source": [
581 | "print(X)"
582 | ],
583 | "execution_count": null,
584 | "outputs": [
585 | {
586 | "output_type": "stream",
587 | "text": [
588 | "[[ 0.63994726 0.84832379 0.14964075 ... 0.20401277 0.46849198\n",
589 | " 1.4259954 ]\n",
590 | " [-0.84488505 -1.12339636 -0.16054575 ... -0.68442195 -0.36506078\n",
591 | " -0.19067191]\n",
592 | " [ 1.23388019 1.94372388 -0.26394125 ... -1.10325546 0.60439732\n",
593 | " -0.10558415]\n",
594 | " ...\n",
595 | " [ 0.3429808 0.00330087 0.14964075 ... -0.73518964 -0.68519336\n",
596 | " -0.27575966]\n",
597 | " [-0.84488505 0.1597866 -0.47073225 ... -0.24020459 -0.37110101\n",
598 | " 1.17073215]\n",
599 | " [-0.84488505 -0.8730192 0.04624525 ... -0.20212881 -0.47378505\n",
600 | " -0.87137393]]\n"
601 | ],
602 | "name": "stdout"
603 | }
604 | ]
605 | },
606 | {
607 | "cell_type": "code",
608 | "metadata": {
609 | "colab": {
610 | "base_uri": "https://localhost:8080/"
611 | },
612 | "id": "7gaS5qPkN6Fg",
613 | "outputId": "22d2bee5-f5d3-4043-c37c-33ca1823f532"
614 | },
615 | "source": [
616 | "print(Y)"
617 | ],
618 | "execution_count": null,
619 | "outputs": [
620 | {
621 | "output_type": "stream",
622 | "text": [
623 | "0 1\n",
624 | "1 0\n",
625 | "2 1\n",
626 | "3 0\n",
627 | "4 1\n",
628 | " ..\n",
629 | "763 0\n",
630 | "764 0\n",
631 | "765 0\n",
632 | "766 1\n",
633 | "767 0\n",
634 | "Name: Outcome, Length: 768, dtype: int64\n"
635 | ],
636 | "name": "stdout"
637 | }
638 | ]
639 | },
640 | {
641 | "cell_type": "markdown",
642 | "metadata": {
643 | "id": "8lZAGBiGOAYt"
644 | },
645 | "source": [
646 | "Splitting the dataset into Training data & Testing Data"
647 | ]
648 | },
649 | {
650 | "cell_type": "code",
651 | "metadata": {
652 | "id": "MVtlCAEqN7tq"
653 | },
654 | "source": [
655 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)"
656 | ],
657 | "execution_count": null,
658 | "outputs": []
659 | },
660 | {
661 | "cell_type": "code",
662 | "metadata": {
663 | "colab": {
664 | "base_uri": "https://localhost:8080/"
665 | },
666 | "id": "zLr8D8JTOj3a",
667 | "outputId": "907bbe92-5629-4403-b0b9-555dc6641688"
668 | },
669 | "source": [
670 | "print(X.shape, X_train.shape, X_test.shape)"
671 | ],
672 | "execution_count": null,
673 | "outputs": [
674 | {
675 | "output_type": "stream",
676 | "text": [
677 | "(768, 8) (614, 8) (154, 8)\n"
678 | ],
679 | "name": "stdout"
680 | }
681 | ]
682 | },
683 | {
684 | "cell_type": "code",
685 | "metadata": {
686 | "id": "LP8FiNo2OqLc"
687 | },
688 | "source": [],
689 | "execution_count": null,
690 | "outputs": []
691 | }
692 | ]
693 | }
--------------------------------------------------------------------------------
/ML Use Case 3. Spam_Mail_Prediction_using_Machine_Learning.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "collapsed_sections": []
8 | },
9 | "kernelspec": {
10 | "name": "python3",
11 | "display_name": "Python 3"
12 | },
13 | "language_info": {
14 | "name": "python"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "kqB21QOgMg-G"
22 | },
23 | "source": [
24 | "Importing the Dependencies"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "metadata": {
30 | "id": "rALI06-oHusw"
31 | },
32 | "source": [
33 | "import numpy as np\n",
34 | "import pandas as pd\n",
35 | "from sklearn.model_selection import train_test_split\n",
36 | "from sklearn.feature_extraction.text import TfidfVectorizer\n",
37 | "from sklearn.linear_model import LogisticRegression\n",
38 | "from sklearn.metrics import accuracy_score"
39 | ],
40 | "execution_count": null,
41 | "outputs": []
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {
46 | "id": "YyKe9o2ONeFv"
47 | },
48 | "source": [
49 | "Data Collection & Pre-Processing"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "metadata": {
55 | "id": "CpStHH8KNcYB"
56 | },
57 | "source": [
58 | "# loading the data from csv file to a pandas Dataframe\n",
59 | "raw_mail_data = pd.read_csv('/content/mail_data.csv')"
60 | ],
61 | "execution_count": null,
62 | "outputs": []
63 | },
64 | {
65 | "cell_type": "code",
66 | "metadata": {
67 | "colab": {
68 | "base_uri": "https://localhost:8080/"
69 | },
70 | "id": "pdn-7VE2NxsZ",
71 | "outputId": "28c19d96-23a2-43c0-86ad-5c1aee7f1b58"
72 | },
73 | "source": [
74 | "print(raw_mail_data)"
75 | ],
76 | "execution_count": null,
77 | "outputs": [
78 | {
79 | "output_type": "stream",
80 | "name": "stdout",
81 | "text": [
82 | " Category Message\n",
83 | "0 ham Go until jurong point, crazy.. Available only ...\n",
84 | "1 ham Ok lar... Joking wif u oni...\n",
85 | "2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n",
86 | "3 ham U dun say so early hor... U c already then say...\n",
87 | "4 ham Nah I don't think he goes to usf, he lives aro...\n",
88 | "... ... ...\n",
89 | "5567 spam This is the 2nd time we have tried 2 contact u...\n",
90 | "5568 ham Will ü b going to esplanade fr home?\n",
91 | "5569 ham Pity, * was in mood for that. So...any other s...\n",
92 | "5570 ham The guy did some bitching but I acted like i'd...\n",
93 | "5571 ham Rofl. Its true to its name\n",
94 | "\n",
95 | "[5572 rows x 2 columns]\n"
96 | ]
97 | }
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "metadata": {
103 | "id": "yhakjIE1N011"
104 | },
105 | "source": [
106 | "# replace the null values with a null string\n",
107 | "mail_data = raw_mail_data.where((pd.notnull(raw_mail_data)),'')"
108 | ],
109 | "execution_count": null,
110 | "outputs": []
111 | },
112 | {
113 | "cell_type": "code",
114 | "metadata": {
115 | "colab": {
116 | "base_uri": "https://localhost:8080/",
117 | "height": 202
118 | },
119 | "id": "SJey6H-SOWeK",
120 | "outputId": "af1b0dfd-2ff9-4af9-cfcd-d0c177dd6ab9"
121 | },
122 | "source": [
123 | "# printing the first 5 rows of the dataframe\n",
124 | "mail_data.head()"
125 | ],
126 | "execution_count": null,
127 | "outputs": [
128 | {
129 | "output_type": "execute_result",
130 | "data": {
131 | "text/html": [
132 | "\n",
133 | "\n",
146 | "
\n",
147 | " \n",
148 | " \n",
149 | " | \n",
150 | " Category | \n",
151 | " Message | \n",
152 | "
\n",
153 | " \n",
154 | " \n",
155 | " \n",
156 | " 0 | \n",
157 | " ham | \n",
158 | " Go until jurong point, crazy.. Available only ... | \n",
159 | "
\n",
160 | " \n",
161 | " 1 | \n",
162 | " ham | \n",
163 | " Ok lar... Joking wif u oni... | \n",
164 | "
\n",
165 | " \n",
166 | " 2 | \n",
167 | " spam | \n",
168 | " Free entry in 2 a wkly comp to win FA Cup fina... | \n",
169 | "
\n",
170 | " \n",
171 | " 3 | \n",
172 | " ham | \n",
173 | " U dun say so early hor... U c already then say... | \n",
174 | "
\n",
175 | " \n",
176 | " 4 | \n",
177 | " ham | \n",
178 | " Nah I don't think he goes to usf, he lives aro... | \n",
179 | "
\n",
180 | " \n",
181 | "
\n",
182 | "
"
183 | ],
184 | "text/plain": [
185 | " Category Message\n",
186 | "0 ham Go until jurong point, crazy.. Available only ...\n",
187 | "1 ham Ok lar... Joking wif u oni...\n",
188 | "2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n",
189 | "3 ham U dun say so early hor... U c already then say...\n",
190 | "4 ham Nah I don't think he goes to usf, he lives aro..."
191 | ]
192 | },
193 | "metadata": {},
194 | "execution_count": 5
195 | }
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "metadata": {
201 | "colab": {
202 | "base_uri": "https://localhost:8080/"
203 | },
204 | "id": "IbK82N2gOdar",
205 | "outputId": "4d1840a1-22b5-468f-d4d0-a4528ef4313c"
206 | },
207 | "source": [
208 | "# checking the number of rows and columns in the dataframe\n",
209 | "mail_data.shape"
210 | ],
211 | "execution_count": null,
212 | "outputs": [
213 | {
214 | "output_type": "execute_result",
215 | "data": {
216 | "text/plain": [
217 | "(5572, 2)"
218 | ]
219 | },
220 | "metadata": {},
221 | "execution_count": 6
222 | }
223 | ]
224 | },
225 | {
226 | "cell_type": "markdown",
227 | "metadata": {
228 | "id": "vhR4U3ATPBdk"
229 | },
230 | "source": [
231 | "Label Encoding"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "metadata": {
237 | "id": "9EW7QSgeOt4p"
238 | },
239 | "source": [
240 | "# label spam mail as 0; ham mail as 1;\n",
241 | "\n",
242 | "mail_data.loc[mail_data['Category'] == 'spam', 'Category',] = 0\n",
243 | "mail_data.loc[mail_data['Category'] == 'ham', 'Category',] = 1"
244 | ],
245 | "execution_count": null,
246 | "outputs": []
247 | },
248 | {
249 | "cell_type": "markdown",
250 | "metadata": {
251 | "id": "uxZK1fWwPwII"
252 | },
253 | "source": [
254 | "spam - 0\n",
255 | "\n",
256 | "ham - 1"
257 | ]
258 | },
259 | {
260 | "cell_type": "code",
261 | "metadata": {
262 | "id": "t8Rt-FaNPtPE"
263 | },
264 | "source": [
265 | "# separating the data as texts and label\n",
266 | "\n",
267 | "X = mail_data['Message']\n",
268 | "\n",
269 | "Y = mail_data['Category']"
270 | ],
271 | "execution_count": null,
272 | "outputs": []
273 | },
274 | {
275 | "cell_type": "code",
276 | "metadata": {
277 | "colab": {
278 | "base_uri": "https://localhost:8080/"
279 | },
280 | "id": "QnQeUBGtQPP7",
281 | "outputId": "a2640f4b-2a1d-4742-9742-3ecbb6017668"
282 | },
283 | "source": [
284 | "print(X)"
285 | ],
286 | "execution_count": null,
287 | "outputs": [
288 | {
289 | "output_type": "stream",
290 | "name": "stdout",
291 | "text": [
292 | "0 Go until jurong point, crazy.. Available only ...\n",
293 | "1 Ok lar... Joking wif u oni...\n",
294 | "2 Free entry in 2 a wkly comp to win FA Cup fina...\n",
295 | "3 U dun say so early hor... U c already then say...\n",
296 | "4 Nah I don't think he goes to usf, he lives aro...\n",
297 | " ... \n",
298 | "5567 This is the 2nd time we have tried 2 contact u...\n",
299 | "5568 Will ü b going to esplanade fr home?\n",
300 | "5569 Pity, * was in mood for that. So...any other s...\n",
301 | "5570 The guy did some bitching but I acted like i'd...\n",
302 | "5571 Rofl. Its true to its name\n",
303 | "Name: Message, Length: 5572, dtype: object\n"
304 | ]
305 | }
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "metadata": {
311 | "colab": {
312 | "base_uri": "https://localhost:8080/"
313 | },
314 | "id": "cuWDNy5KQQjY",
315 | "outputId": "1a0a109b-d63a-4cf0-fe4e-b486f1d3d623"
316 | },
317 | "source": [
318 | "print(Y)"
319 | ],
320 | "execution_count": null,
321 | "outputs": [
322 | {
323 | "output_type": "stream",
324 | "name": "stdout",
325 | "text": [
326 | "0 1\n",
327 | "1 1\n",
328 | "2 0\n",
329 | "3 1\n",
330 | "4 1\n",
331 | " ..\n",
332 | "5567 0\n",
333 | "5568 1\n",
334 | "5569 1\n",
335 | "5570 1\n",
336 | "5571 1\n",
337 | "Name: Category, Length: 5572, dtype: object\n"
338 | ]
339 | }
340 | ]
341 | },
342 | {
343 | "cell_type": "markdown",
344 | "metadata": {
345 | "id": "jvHyqdH8QZPH"
346 | },
347 | "source": [
348 | "Splitting the data into training data & test data"
349 | ]
350 | },
351 | {
352 | "cell_type": "code",
353 | "metadata": {
354 | "id": "RO2GmbSNQSQH"
355 | },
356 | "source": [
357 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=3)"
358 | ],
359 | "execution_count": null,
360 | "outputs": []
361 | },
362 | {
363 | "cell_type": "code",
364 | "metadata": {
365 | "colab": {
366 | "base_uri": "https://localhost:8080/"
367 | },
368 | "id": "tS2c7A4NRa46",
369 | "outputId": "5d44247f-65d0-457d-8a94-0fd8b45a3b72"
370 | },
371 | "source": [
372 | "print(X.shape)\n",
373 | "print(X_train.shape)\n",
374 | "print(X_test.shape)"
375 | ],
376 | "execution_count": null,
377 | "outputs": [
378 | {
379 | "output_type": "stream",
380 | "name": "stdout",
381 | "text": [
382 | "(5572,)\n",
383 | "(4457,)\n",
384 | "(1115,)\n"
385 | ]
386 | }
387 | ]
388 | },
389 | {
390 | "cell_type": "markdown",
391 | "metadata": {
392 | "id": "wYQpiACGSBYM"
393 | },
394 | "source": [
395 | "Feature Extraction"
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "metadata": {
401 | "id": "nLs847nSRibm"
402 | },
403 | "source": [
404 | "# transform the text data to feature vectors that can be used as input to the Logistic regression\n",
405 | "\n",
406 | "feature_extraction = TfidfVectorizer(min_df = 1, stop_words='english', lowercase='True')\n",
407 | "\n",
408 | "X_train_features = feature_extraction.fit_transform(X_train)\n",
409 | "X_test_features = feature_extraction.transform(X_test)\n",
410 | "\n",
411 | "# convert Y_train and Y_test values as integers\n",
412 | "\n",
413 | "Y_train = Y_train.astype('int')\n",
414 | "Y_test = Y_test.astype('int')"
415 | ],
416 | "execution_count": null,
417 | "outputs": []
418 | },
419 | {
420 | "cell_type": "code",
421 | "metadata": {
422 | "id": "dBMAcw9RUkUY"
423 | },
424 | "source": [
425 | "print(X_train)"
426 | ],
427 | "execution_count": null,
428 | "outputs": []
429 | },
430 | {
431 | "cell_type": "code",
432 | "metadata": {
433 | "id": "1NFuGogZUpt0"
434 | },
435 | "source": [
436 | "print(X_train_features)"
437 | ],
438 | "execution_count": null,
439 | "outputs": []
440 | },
441 | {
442 | "cell_type": "markdown",
443 | "metadata": {
444 | "id": "q86FvELbU_SV"
445 | },
446 | "source": [
447 | "Training the Model"
448 | ]
449 | },
450 | {
451 | "cell_type": "markdown",
452 | "metadata": {
453 | "id": "hV6BAIZQVBbo"
454 | },
455 | "source": [
456 | "Logistic Regression"
457 | ]
458 | },
459 | {
460 | "cell_type": "code",
461 | "metadata": {
462 | "id": "1JeAOwzpUv0V"
463 | },
464 | "source": [
465 | "model = LogisticRegression()"
466 | ],
467 | "execution_count": null,
468 | "outputs": []
469 | },
470 | {
471 | "cell_type": "code",
472 | "metadata": {
473 | "colab": {
474 | "base_uri": "https://localhost:8080/"
475 | },
476 | "id": "gWGRHWAPVI_z",
477 | "outputId": "1c5e15dd-0e07-4871-c4fa-b908ee400b55"
478 | },
479 | "source": [
480 | "# training the Logistic Regression model with the training data\n",
481 | "model.fit(X_train_features, Y_train)"
482 | ],
483 | "execution_count": null,
484 | "outputs": [
485 | {
486 | "output_type": "execute_result",
487 | "data": {
488 | "text/plain": [
489 | "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
490 | " intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
491 | " multi_class='auto', n_jobs=None, penalty='l2',\n",
492 | " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
493 | " warm_start=False)"
494 | ]
495 | },
496 | "metadata": {},
497 | "execution_count": 18
498 | }
499 | ]
500 | },
501 | {
502 | "cell_type": "markdown",
503 | "metadata": {
504 | "id": "wZ01fa8dVeL5"
505 | },
506 | "source": [
507 | "Evaluating the trained model"
508 | ]
509 | },
510 | {
511 | "cell_type": "code",
512 | "metadata": {
513 | "id": "ExiF2kKxVYtC"
514 | },
515 | "source": [
516 | "# prediction on training data\n",
517 | "\n",
518 | "prediction_on_training_data = model.predict(X_train_features)\n",
519 | "accuracy_on_training_data = accuracy_score(Y_train, prediction_on_training_data)"
520 | ],
521 | "execution_count": null,
522 | "outputs": []
523 | },
524 | {
525 | "cell_type": "code",
526 | "metadata": {
527 | "colab": {
528 | "base_uri": "https://localhost:8080/"
529 | },
530 | "id": "o7t4DI5UWCkB",
531 | "outputId": "49fafbb0-0e7f-40c7-9ab7-4aea165731ee"
532 | },
533 | "source": [
534 | "print('Accuracy on training data : ', accuracy_on_training_data)"
535 | ],
536 | "execution_count": null,
537 | "outputs": [
538 | {
539 | "output_type": "stream",
540 | "name": "stdout",
541 | "text": [
542 | "Accuracy on training data : 0.9670181736594121\n"
543 | ]
544 | }
545 | ]
546 | },
547 | {
548 | "cell_type": "code",
549 | "metadata": {
550 | "id": "cTin5rXTWKg3"
551 | },
552 | "source": [
553 | "# prediction on test data\n",
554 | "\n",
555 | "prediction_on_test_data = model.predict(X_test_features)\n",
556 | "accuracy_on_test_data = accuracy_score(Y_test, prediction_on_test_data)"
557 | ],
558 | "execution_count": null,
559 | "outputs": []
560 | },
561 | {
562 | "cell_type": "code",
563 | "metadata": {
564 | "colab": {
565 | "base_uri": "https://localhost:8080/"
566 | },
567 | "id": "4gvoMK4OWnJY",
568 | "outputId": "7bf56da4-1987-4828-ea00-95c30fb083d1"
569 | },
570 | "source": [
571 | "print('Accuracy on test data : ', accuracy_on_test_data)"
572 | ],
573 | "execution_count": null,
574 | "outputs": [
575 | {
576 | "output_type": "stream",
577 | "name": "stdout",
578 | "text": [
579 | "Accuracy on test data : 0.9659192825112107\n"
580 | ]
581 | }
582 | ]
583 | },
584 | {
585 | "cell_type": "markdown",
586 | "metadata": {
587 | "id": "bXdOKxYAXaHC"
588 | },
589 | "source": [
590 | "Building a Predictive System"
591 | ]
592 | },
593 | {
594 | "cell_type": "code",
595 | "metadata": {
596 | "colab": {
597 | "base_uri": "https://localhost:8080/"
598 | },
599 | "id": "h60z1__mWql6",
600 | "outputId": "3aac53f3-13f2-4afb-e9f2-75d337cbcd44"
601 | },
602 | "source": [
603 | "input_mail = [\"I've been searching for the right words to thank you for this breather. I promise i wont take your help for granted and will fulfil my promise. You have been wonderful and a blessing at all times\"]\n",
604 | "\n",
605 | "# convert text to feature vectors\n",
606 | "input_data_features = feature_extraction.transform(input_mail)\n",
607 | "\n",
608 | "# making prediction\n",
609 | "\n",
610 | "prediction = model.predict(input_data_features)\n",
611 | "print(prediction)\n",
612 | "\n",
613 | "\n",
614 | "if (prediction[0]==1):\n",
615 | " print('Ham mail')\n",
616 | "\n",
617 | "else:\n",
618 | " print('Spam mail')"
619 | ],
620 | "execution_count": null,
621 | "outputs": [
622 | {
623 | "output_type": "stream",
624 | "name": "stdout",
625 | "text": [
626 | "[1]\n",
627 | "Ham mail\n"
628 | ]
629 | }
630 | ]
631 | },
632 | {
633 | "cell_type": "code",
634 | "metadata": {
635 | "id": "v_LqbM_ZYwS1"
636 | },
637 | "source": [],
638 | "execution_count": null,
639 | "outputs": []
640 | }
641 | ]
642 | }
--------------------------------------------------------------------------------