├── AUTOML source code.ipynb
├── Anomaly Detection .ipynb
├── Auto regressor Time series.ipynb
├── Blackadam.ipynb
├── Cricket score clustering(KMeans Clustering).ipynb
├── Decision_Tree.ipynb
├── Digit recognizer using KNN ( Kaggle comp ).ipynb
├── ETL scriptrfm.ipynb
├── Ensemble learning.ipynb
├── Hyper parameter tuning.ipynb
├── KNN algorithm.ipynb
├── LICENSE
├── LP1.py
├── Logistic regression.ipynb
├── MovieRecommedation.ipynb
├── Multivariate LR project.ipynb
├── Ridge regression - Jigsaw kaggle competition.ipynb
├── Salary prediction ( Linear Regression ).ipynb
├── Salary_Data.csv
├── Sample scores.csv
├── Time series analysis.ipynb
├── Untitled4.ipynb
├── Weather prediction (NaiveBayes algorithm ).ipynb
├── adboost algorithm.ipynb
├── airline-passenger-traffic(1).csv
├── amex-prediction.ipynb
├── brain-stroke-prediction-with-less-visualizations.ipynb
├── car data.csv
├── food-demand.ipynb
├── hm-recommender.ipynb
├── overfitting-vs-underfitting-simple-explanation.ipynb
├── price-elasticity.ipynb
└── walmart-sales-advanced-analysis-and-prediction.ipynb
/AUTOML source code.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "eight-refrigerator",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": []
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "id": "future-morrison",
15 | "metadata": {},
16 | "outputs": [
17 | {
18 | "data": {
19 | "application/vnd.jupyter.widget-view+json": {
20 | "model_id": "",
21 | "version_major": 2,
22 | "version_minor": 0
23 | },
24 | "text/plain": [
25 | "Optimization Progress: 0%| | 0/300 [00:00, ?pipeline/s]"
26 | ]
27 | },
28 | "metadata": {},
29 | "output_type": "display_data"
30 | },
31 | {
32 | "name": "stdout",
33 | "output_type": "stream",
34 | "text": [
35 | "\n",
36 | "Generation 1 - Current best internal CV score: -12.183902491076243\n",
37 | "\n",
38 | "Generation 2 - Current best internal CV score: -12.183902491076243\n",
39 | "\n",
40 | "Generation 3 - Current best internal CV score: -12.064332520818585\n",
41 | "\n",
42 | "Generation 4 - Current best internal CV score: -12.064332520818585\n",
43 | "\n",
44 | "Generation 5 - Current best internal CV score: -12.064332520818585\n",
45 | "\n",
46 | "Best pipeline: XGBRegressor(RobustScaler(input_matrix), learning_rate=0.1, max_depth=9, min_child_weight=14, n_estimators=100, n_jobs=1, objective=reg:squarederror, subsample=0.7000000000000001, verbosity=0)\n",
47 | "-11.070680162041242\n"
48 | ]
49 | }
50 | ],
51 | "source": [
52 | "from tpot import TPOTRegressor\n",
53 | "from sklearn.datasets import load_boston\n",
54 | "from sklearn.model_selection import train_test_split\n",
55 | "\n",
56 | "housing = load_boston()\n",
57 | "X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target,\n",
58 | " train_size=0.75, test_size=0.25, random_state=42)\n",
59 | "\n",
60 | "tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2,random_state=42)\n",
61 | "tpot.fit(X_train, y_train)\n",
62 | "print(tpot.score(X_test, y_test))\n",
63 | "tpot.export('tpot_boston_pipeline.py')"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "id": "alike-mambo",
70 | "metadata": {},
71 | "outputs": [],
72 | "source": []
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 1,
77 | "id": "peaceful-satin",
78 | "metadata": {},
79 | "outputs": [
80 | {
81 | "data": {
82 | "application/vnd.jupyter.widget-view+json": {
83 | "model_id": "",
84 | "version_major": 2,
85 | "version_minor": 0
86 | },
87 | "text/plain": [
88 | "Optimization Progress: 0%| | 0/300 [00:00, ?pipeline/s]"
89 | ]
90 | },
91 | "metadata": {},
92 | "output_type": "display_data"
93 | },
94 | {
95 | "name": "stdout",
96 | "output_type": "stream",
97 | "text": [
98 | "\n",
99 | "Generation 1 - Current best internal CV score: 0.9643728486851163\n",
100 | "\n",
101 | "Generation 2 - Current best internal CV score: 0.9725375189315709\n",
102 | "\n",
103 | "Generation 3 - Current best internal CV score: 0.9747597411537934\n",
104 | "\n",
105 | "Generation 4 - Current best internal CV score: 0.9777199504337052\n",
106 | "\n",
107 | "Generation 5 - Current best internal CV score: 0.9777199504337052\n",
108 | "\n",
109 | "Best pipeline: DecisionTreeClassifier(LinearSVC(PolynomialFeatures(input_matrix, degree=2, include_bias=False, interaction_only=False), C=20.0, dual=True, loss=squared_hinge, penalty=l2, tol=1e-05), criterion=entropy, max_depth=6, min_samples_leaf=13, min_samples_split=5)\n",
110 | "0.9888888888888889\n"
111 | ]
112 | }
113 | ],
114 | "source": [
115 | "from tpot import TPOTClassifier\n",
116 | "from sklearn.datasets import load_digits\n",
117 | "from sklearn.model_selection import train_test_split\n",
118 | "\n",
119 | "digits = load_digits()\n",
120 | "X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n",
121 | " train_size=0.75, test_size=0.25, random_state=42)\n",
122 | "\n",
123 | "tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)\n",
124 | "tpot.fit(X_train, y_train)\n",
125 | "print(tpot.score(X_test, y_test))\n",
126 | "tpot.export('tpot_digits_pipeline.py')"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": null,
132 | "id": "exceptional-moment",
133 | "metadata": {},
134 | "outputs": [],
135 | "source": []
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "id": "floral-street",
141 | "metadata": {},
142 | "outputs": [],
143 | "source": []
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": null,
148 | "id": "intermediate-scratch",
149 | "metadata": {},
150 | "outputs": [],
151 | "source": []
152 | }
153 | ],
154 | "metadata": {
155 | "kernelspec": {
156 | "display_name": "Python 3",
157 | "language": "python",
158 | "name": "python3"
159 | },
160 | "language_info": {
161 | "codemirror_mode": {
162 | "name": "ipython",
163 | "version": 3
164 | },
165 | "file_extension": ".py",
166 | "mimetype": "text/x-python",
167 | "name": "python",
168 | "nbconvert_exporter": "python",
169 | "pygments_lexer": "ipython3",
170 | "version": "3.8.8"
171 | }
172 | },
173 | "nbformat": 4,
174 | "nbformat_minor": 5
175 | }
176 |
--------------------------------------------------------------------------------
/Anomaly Detection .ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "id": "classical-banner",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import numpy as np\n",
11 | "import pandas as pd\n",
12 | "from sklearn.ensemble import IsolationForest"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 3,
18 | "id": "final-channels",
19 | "metadata": {},
20 | "outputs": [
21 | {
22 | "data": {
23 | "text/html": [
24 | "
\n",
25 | "\n",
38 | "
\n",
39 | " \n",
40 | " \n",
41 | " | \n",
42 | " Overs | \n",
43 | " Scores | \n",
44 | "
\n",
45 | " \n",
46 | " \n",
47 | " \n",
48 | " 0 | \n",
49 | " 1 | \n",
50 | " 15 | \n",
51 | "
\n",
52 | " \n",
53 | " 1 | \n",
54 | " 2 | \n",
55 | " 10 | \n",
56 | "
\n",
57 | " \n",
58 | " 2 | \n",
59 | " 3 | \n",
60 | " 17 | \n",
61 | "
\n",
62 | " \n",
63 | " 3 | \n",
64 | " 4 | \n",
65 | " 10 | \n",
66 | "
\n",
67 | " \n",
68 | " 4 | \n",
69 | " 5 | \n",
70 | " 12 | \n",
71 | "
\n",
72 | " \n",
73 | " 5 | \n",
74 | " 6 | \n",
75 | " 20 | \n",
76 | "
\n",
77 | " \n",
78 | " 6 | \n",
79 | " 7 | \n",
80 | " 100 | \n",
81 | "
\n",
82 | " \n",
83 | " 7 | \n",
84 | " 8 | \n",
85 | " 7 | \n",
86 | "
\n",
87 | " \n",
88 | " 8 | \n",
89 | " 9 | \n",
90 | " 8 | \n",
91 | "
\n",
92 | " \n",
93 | " 9 | \n",
94 | " 10 | \n",
95 | " 11 | \n",
96 | "
\n",
97 | " \n",
98 | " 10 | \n",
99 | " 11 | \n",
100 | " 100 | \n",
101 | "
\n",
102 | " \n",
103 | " 11 | \n",
104 | " 12 | \n",
105 | " 14 | \n",
106 | "
\n",
107 | " \n",
108 | " 12 | \n",
109 | " 13 | \n",
110 | " 3 | \n",
111 | "
\n",
112 | " \n",
113 | " 13 | \n",
114 | " 14 | \n",
115 | " 100 | \n",
116 | "
\n",
117 | " \n",
118 | " 14 | \n",
119 | " 15 | \n",
120 | " 11 | \n",
121 | "
\n",
122 | " \n",
123 | " 15 | \n",
124 | " 16 | \n",
125 | " 13 | \n",
126 | "
\n",
127 | " \n",
128 | " 16 | \n",
129 | " 17 | \n",
130 | " 100 | \n",
131 | "
\n",
132 | " \n",
133 | " 17 | \n",
134 | " 18 | \n",
135 | " 16 | \n",
136 | "
\n",
137 | " \n",
138 | " 18 | \n",
139 | " 19 | \n",
140 | " 26 | \n",
141 | "
\n",
142 | " \n",
143 | " 19 | \n",
144 | " 20 | \n",
145 | " 30 | \n",
146 | "
\n",
147 | " \n",
148 | "
\n",
149 | "
"
150 | ],
151 | "text/plain": [
152 | " Overs Scores\n",
153 | "0 1 15\n",
154 | "1 2 10\n",
155 | "2 3 17\n",
156 | "3 4 10\n",
157 | "4 5 12\n",
158 | "5 6 20\n",
159 | "6 7 100\n",
160 | "7 8 7\n",
161 | "8 9 8\n",
162 | "9 10 11\n",
163 | "10 11 100\n",
164 | "11 12 14\n",
165 | "12 13 3\n",
166 | "13 14 100\n",
167 | "14 15 11\n",
168 | "15 16 13\n",
169 | "16 17 100\n",
170 | "17 18 16\n",
171 | "18 19 26\n",
172 | "19 20 30"
173 | ]
174 | },
175 | "execution_count": 3,
176 | "metadata": {},
177 | "output_type": "execute_result"
178 | }
179 | ],
180 | "source": [
181 | "df = pd.read_csv('Sample scores.csv')\n",
182 | "df"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 4,
188 | "id": "prerequisite-invasion",
189 | "metadata": {},
190 | "outputs": [
191 | {
192 | "data": {
193 | "text/plain": [
194 | "IsolationForest(contamination=0.2, n_estimators=1000)"
195 | ]
196 | },
197 | "execution_count": 4,
198 | "metadata": {},
199 | "output_type": "execute_result"
200 | }
201 | ],
202 | "source": [
203 | "model=IsolationForest(n_estimators=1000,max_samples='auto',contamination=float(0.2),max_features=1.0)\n",
204 | "model.fit(df[['Scores']])"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": 5,
210 | "id": "planned-wilson",
211 | "metadata": {},
212 | "outputs": [
213 | {
214 | "data": {
215 | "text/html": [
216 | "\n",
217 | "\n",
230 | "
\n",
231 | " \n",
232 | " \n",
233 | " | \n",
234 | " Overs | \n",
235 | " Scores | \n",
236 | " scores | \n",
237 | " anomaly | \n",
238 | "
\n",
239 | " \n",
240 | " \n",
241 | " \n",
242 | " 0 | \n",
243 | " 1 | \n",
244 | " 15 | \n",
245 | " 0.217062 | \n",
246 | " 1 | \n",
247 | "
\n",
248 | " \n",
249 | " 1 | \n",
250 | " 2 | \n",
251 | " 10 | \n",
252 | " 0.238335 | \n",
253 | " 1 | \n",
254 | "
\n",
255 | " \n",
256 | " 2 | \n",
257 | " 3 | \n",
258 | " 17 | \n",
259 | " 0.182980 | \n",
260 | " 1 | \n",
261 | "
\n",
262 | " \n",
263 | " 3 | \n",
264 | " 4 | \n",
265 | " 10 | \n",
266 | " 0.238335 | \n",
267 | " 1 | \n",
268 | "
\n",
269 | " \n",
270 | " 4 | \n",
271 | " 5 | \n",
272 | " 12 | \n",
273 | " 0.234626 | \n",
274 | " 1 | \n",
275 | "
\n",
276 | " \n",
277 | " 5 | \n",
278 | " 6 | \n",
279 | " 20 | \n",
280 | " 0.109175 | \n",
281 | " 1 | \n",
282 | "
\n",
283 | " \n",
284 | " 6 | \n",
285 | " 7 | \n",
286 | " 100 | \n",
287 | " -0.021292 | \n",
288 | " -1 | \n",
289 | "
\n",
290 | " \n",
291 | " 7 | \n",
292 | " 8 | \n",
293 | " 7 | \n",
294 | " 0.144497 | \n",
295 | " 1 | \n",
296 | "
\n",
297 | " \n",
298 | " 8 | \n",
299 | " 9 | \n",
300 | " 8 | \n",
301 | " 0.177666 | \n",
302 | " 1 | \n",
303 | "
\n",
304 | " \n",
305 | " 9 | \n",
306 | " 10 | \n",
307 | " 11 | \n",
308 | " 0.248936 | \n",
309 | " 1 | \n",
310 | "
\n",
311 | " \n",
312 | " 10 | \n",
313 | " 11 | \n",
314 | " 100 | \n",
315 | " -0.021292 | \n",
316 | " -1 | \n",
317 | "
\n",
318 | " \n",
319 | " 11 | \n",
320 | " 12 | \n",
321 | " 14 | \n",
322 | " 0.223662 | \n",
323 | " 1 | \n",
324 | "
\n",
325 | " \n",
326 | " 12 | \n",
327 | " 13 | \n",
328 | " 3 | \n",
329 | " 0.005323 | \n",
330 | " 1 | \n",
331 | "
\n",
332 | " \n",
333 | " 13 | \n",
334 | " 14 | \n",
335 | " 100 | \n",
336 | " -0.021292 | \n",
337 | " -1 | \n",
338 | "
\n",
339 | " \n",
340 | " 14 | \n",
341 | " 15 | \n",
342 | " 11 | \n",
343 | " 0.248936 | \n",
344 | " 1 | \n",
345 | "
\n",
346 | " \n",
347 | " 15 | \n",
348 | " 16 | \n",
349 | " 13 | \n",
350 | " 0.229416 | \n",
351 | " 1 | \n",
352 | "
\n",
353 | " \n",
354 | " 16 | \n",
355 | " 17 | \n",
356 | " 100 | \n",
357 | " -0.021292 | \n",
358 | " -1 | \n",
359 | "
\n",
360 | " \n",
361 | " 17 | \n",
362 | " 18 | \n",
363 | " 16 | \n",
364 | " 0.206693 | \n",
365 | " 1 | \n",
366 | "
\n",
367 | " \n",
368 | " 18 | \n",
369 | " 19 | \n",
370 | " 26 | \n",
371 | " 0.058269 | \n",
372 | " 1 | \n",
373 | "
\n",
374 | " \n",
375 | " 19 | \n",
376 | " 20 | \n",
377 | " 30 | \n",
378 | " 0.008441 | \n",
379 | " 1 | \n",
380 | "
\n",
381 | " \n",
382 | "
\n",
383 | "
"
384 | ],
385 | "text/plain": [
386 | " Overs Scores scores anomaly\n",
387 | "0 1 15 0.217062 1\n",
388 | "1 2 10 0.238335 1\n",
389 | "2 3 17 0.182980 1\n",
390 | "3 4 10 0.238335 1\n",
391 | "4 5 12 0.234626 1\n",
392 | "5 6 20 0.109175 1\n",
393 | "6 7 100 -0.021292 -1\n",
394 | "7 8 7 0.144497 1\n",
395 | "8 9 8 0.177666 1\n",
396 | "9 10 11 0.248936 1\n",
397 | "10 11 100 -0.021292 -1\n",
398 | "11 12 14 0.223662 1\n",
399 | "12 13 3 0.005323 1\n",
400 | "13 14 100 -0.021292 -1\n",
401 | "14 15 11 0.248936 1\n",
402 | "15 16 13 0.229416 1\n",
403 | "16 17 100 -0.021292 -1\n",
404 | "17 18 16 0.206693 1\n",
405 | "18 19 26 0.058269 1\n",
406 | "19 20 30 0.008441 1"
407 | ]
408 | },
409 | "execution_count": 5,
410 | "metadata": {},
411 | "output_type": "execute_result"
412 | }
413 | ],
414 | "source": [
415 | "df['scores']=model.decision_function(df[['Scores']])\n",
416 | "df['anomaly']=model.predict(df[['Scores']])\n",
417 | "df.head(20)"
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "execution_count": 6,
423 | "id": "posted-pencil",
424 | "metadata": {},
425 | "outputs": [
426 | {
427 | "data": {
428 | "text/plain": [
429 | "4"
430 | ]
431 | },
432 | "execution_count": 6,
433 | "metadata": {},
434 | "output_type": "execute_result"
435 | }
436 | ],
437 | "source": [
438 | "outliers_counter = len(df[df['Scores'] > 36])\n",
439 | "outliers_counter"
440 | ]
441 | },
442 | {
443 | "cell_type": "code",
444 | "execution_count": null,
445 | "id": "developed-potato",
446 | "metadata": {},
447 | "outputs": [],
448 | "source": []
449 | }
450 | ],
451 | "metadata": {
452 | "kernelspec": {
453 | "display_name": "Python 3",
454 | "language": "python",
455 | "name": "python3"
456 | },
457 | "language_info": {
458 | "codemirror_mode": {
459 | "name": "ipython",
460 | "version": 3
461 | },
462 | "file_extension": ".py",
463 | "mimetype": "text/x-python",
464 | "name": "python",
465 | "nbconvert_exporter": "python",
466 | "pygments_lexer": "ipython3",
467 | "version": "3.8.8"
468 | }
469 | },
470 | "nbformat": 4,
471 | "nbformat_minor": 5
472 | }
473 |
--------------------------------------------------------------------------------
/Cricket score clustering(KMeans Clustering).ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "sixth-membership",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "from pandas import pandas as pd\n",
11 | "from pandas import DataFrame\n",
12 | "import matplotlib.pyplot as plt\n",
13 | "from sklearn.cluster import KMeans"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 2,
19 | "id": "color-shower",
20 | "metadata": {},
21 | "outputs": [
22 | {
23 | "data": {
24 | "text/html": [
25 | "\n",
26 | "\n",
39 | "
\n",
40 | " \n",
41 | " \n",
42 | " | \n",
43 | " Overs | \n",
44 | " Scores | \n",
45 | "
\n",
46 | " \n",
47 | " \n",
48 | " \n",
49 | " 0 | \n",
50 | " 1 | \n",
51 | " 15 | \n",
52 | "
\n",
53 | " \n",
54 | " 1 | \n",
55 | " 2 | \n",
56 | " 10 | \n",
57 | "
\n",
58 | " \n",
59 | " 2 | \n",
60 | " 3 | \n",
61 | " 17 | \n",
62 | "
\n",
63 | " \n",
64 | " 3 | \n",
65 | " 4 | \n",
66 | " 10 | \n",
67 | "
\n",
68 | " \n",
69 | " 4 | \n",
70 | " 5 | \n",
71 | " 12 | \n",
72 | "
\n",
73 | " \n",
74 | " 5 | \n",
75 | " 6 | \n",
76 | " 20 | \n",
77 | "
\n",
78 | " \n",
79 | " 6 | \n",
80 | " 7 | \n",
81 | " 10 | \n",
82 | "
\n",
83 | " \n",
84 | " 7 | \n",
85 | " 8 | \n",
86 | " 7 | \n",
87 | "
\n",
88 | " \n",
89 | " 8 | \n",
90 | " 9 | \n",
91 | " 8 | \n",
92 | "
\n",
93 | " \n",
94 | " 9 | \n",
95 | " 10 | \n",
96 | " 11 | \n",
97 | "
\n",
98 | " \n",
99 | " 10 | \n",
100 | " 11 | \n",
101 | " 4 | \n",
102 | "
\n",
103 | " \n",
104 | " 11 | \n",
105 | " 12 | \n",
106 | " 14 | \n",
107 | "
\n",
108 | " \n",
109 | " 12 | \n",
110 | " 13 | \n",
111 | " 3 | \n",
112 | "
\n",
113 | " \n",
114 | " 13 | \n",
115 | " 14 | \n",
116 | " 6 | \n",
117 | "
\n",
118 | " \n",
119 | " 14 | \n",
120 | " 15 | \n",
121 | " 11 | \n",
122 | "
\n",
123 | " \n",
124 | " 15 | \n",
125 | " 16 | \n",
126 | " 13 | \n",
127 | "
\n",
128 | " \n",
129 | " 16 | \n",
130 | " 17 | \n",
131 | " 20 | \n",
132 | "
\n",
133 | " \n",
134 | " 17 | \n",
135 | " 18 | \n",
136 | " 16 | \n",
137 | "
\n",
138 | " \n",
139 | " 18 | \n",
140 | " 19 | \n",
141 | " 26 | \n",
142 | "
\n",
143 | " \n",
144 | " 19 | \n",
145 | " 20 | \n",
146 | " 30 | \n",
147 | "
\n",
148 | " \n",
149 | "
\n",
150 | "
"
151 | ],
152 | "text/plain": [
153 | " Overs Scores\n",
154 | "0 1 15\n",
155 | "1 2 10\n",
156 | "2 3 17\n",
157 | "3 4 10\n",
158 | "4 5 12\n",
159 | "5 6 20\n",
160 | "6 7 10\n",
161 | "7 8 7\n",
162 | "8 9 8\n",
163 | "9 10 11\n",
164 | "10 11 4\n",
165 | "11 12 14\n",
166 | "12 13 3\n",
167 | "13 14 6\n",
168 | "14 15 11\n",
169 | "15 16 13\n",
170 | "16 17 20\n",
171 | "17 18 16\n",
172 | "18 19 26\n",
173 | "19 20 30"
174 | ]
175 | },
176 | "execution_count": 2,
177 | "metadata": {},
178 | "output_type": "execute_result"
179 | }
180 | ],
181 | "source": [
182 | "data = pd.read_csv('Sample scores.csv')\n",
183 | "data"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": 3,
189 | "id": "seeing-collaboration",
190 | "metadata": {},
191 | "outputs": [
192 | {
193 | "data": {
194 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8+yak3AAAACXBIWXMAAAsTAAALEwEAmpwYAAATQ0lEQVR4nO3df2xdd3nH8fezNKwWreaWWFnilqV0VSZGRdJ5VRkd6mAjpapoyCZENUEZiMBEJyqxTE2ZaP9AK5ABYtPEFNaKMBUog9StWFnoClLHJiqcpjSFkPWHUq1Ompi1pkXzRpo+++MeU9v1b/vcc32/75d05evvOdfn0fHJx/d+z3NOIjORJJXjl5ouQJLUXga/JBXG4Jekwhj8klQYg1+SCnNa0wXMx5o1a3LDhg1NlyFJK8r+/ft/kpl9U8dXRPBv2LCBoaGhpsuQpBUlIp6YbtypHkkqjMEvSYUx+CWpMAa/JBXG4JekwtTW1RMRpwP3Ab9cbedrmXljRJwHfAV4BbAfeGdm/ryuOiRpJRo8MMyufYc5OjrG+t4edmzZyNbN/cvys+t8x/9/wBsz87XAJuDyiLgE+ATwmcz8deAZ4L011iBJK87ggWF27j3I8OgYCQyPjrFz70EGDwwvy8+vLfiz5WfVt6urRwJvBL5Wje8BttZVgyStRLv2HWbs5KlJY2MnT7Fr3+Fl+fm1zvFHxKqIeBA4AdwDPAaMZubz1SpPAtN+domI7RExFBFDIyMjdZYpSR3l6OjYgsYXqtbgz8xTmbkJOAe4GPiNBbx2d2YOZOZAX99LrjiWpK61vrdnQeML1ZaunswcBb4DvA7ojYjxk8rnAMszaSVJXWLHlo30rF41aaxn9Sp2bNm4LD+/tuCPiL6I6K2e9wB/AByi9Qfgj6rVrgHurKsGSVqJtm7u5+ZtF9Lf20MA/b093LztwmXr6qnzJm3rgD0RsYrWH5ivZuY3IuJHwFci4mPAAeCWGmuQpBVp6+b+ZQv6qWoL/sx8CNg8zfjjtOb7JUkN8MpdSSqMwS9JhTH4JakwBr8kFcbgl6TCGPySVBiDX5IKY/BLUmEMfkkqjMEvSYUx+CWpMAa/JBXG4Jekwhj8klQYg1+SCmPwS1JhDH5JKozBL0mFMfglqTAGvyQVxuCXpMKc1nQBktSNBg8Ms2vfYY6OjrG+t4cdWzaydXN/02UBBr8kLbvBA8Ps3HuQsZOnABgeHWPn3oMAHRH+TvVI0jLbte/wL0J/3NjJU+zad7ihiiYz+CVpmR0dHVvQeLsZ/JK0zNb39ixovN0MfklaZju2bKRn9apJYz2rV7Fjy8aGKprMk7uStMzGT+AW19UTEecCXwTWAgnszszPRsRNwPuAkWrVGzLz7rrqkKQmbN3c3zFBP1Wd7/ifBz6cmQ9ExJnA/oi4p1r2mcz86xq3LUmaQW3Bn5nHgGPV8+ci4hDQmX/+JKkgbTm5GxEbgM3A/dXQtRHxUETcGhFnzfCa7RExFBFDIyMj060iSVqE2oM/Is4Avg5cl5nPAp8Dzgc20fpE8KnpXpeZuzNzIDMH+vr66i5TkopRa/BHxGpaoX9bZu4FyMzjmXkqM18APg9cXGcNkqTJagv+iAjgFuBQZn56wvi6Cau9DXi4rhokSS9VZ1fP64F3Agcj4sFq7Abg6ojYRKvF8wjw/hprkCRNUWdXz3eBmGaRPfuS1CBv2SBJhTH4JakwBr8kFcbgl6TCGPySVBiDX5IKY/BLUmEMfkkqjMEvSYUx+CWpMAa/JBXG4Jekwhj8klQYg1+SCmPwS1JhDH5JKozBL0mFMfglqTAGvyQVxuCXpMIY/JJUGINfkgpj8EtSYQx+SSqMwS9JhTH4JakwBr8kFea0un5wRJwLfBFYCySwOzM/GxFnA7cDG4AjwNsz85m66tDiDB4YZte+wxwdHWN9bw87tmxk6+b+psuS5sXjd3Z1vuN/HvhwZr4auAT4YES8GrgeuDczLwDurb5XBxk8MMzOvQcZHh0jgeHRMXbuPcjggeGmS5Pm5PE7t9qCPzOPZeYD1fPngENAP3AVsKdabQ+wta4atDi79h1m7OSpSWNjJ0+xa9/hhiqS5s/jd25tmeOPiA3AZuB+YG1mHqsWPUVrKmi612yPiKGIGBoZGWlHmaocHR1b0LjUSTx+51Z78EfEGcDXgesy89mJyzIzac3/v0Rm7s7Mgcwc6Ovrq7tMTbC+t2dB41In8fidW63BHxGraYX+bZm5txo+HhHrquXrgBN11qCF27FlIz2rV00a61m9ih1bNjZUkTR/Hr9zqy34IyKAW4BDmfnpCYvuAq6pnl8D3FlXDVqcrZv7uXnbhfT39hBAf28PN2+70K4IrQgev3OL1mxLDT844lLg34CDwAvV8A205vm/CrwSeIJWO+fTs/2sgYGBHBoaqqVOSepWEbE/MwemjtfWx5+Z3wVihsVvqmu7kqTZeeWuJBXG4Jekwhj8klQYg1+SCmPwS1JhDH5JKozBL0mFMfglqTAGvyQVxuCXpMIY/JJUGINfkgpj8EtSYWq7O2fpBg8Ms2vfYY6OjrG+t4cdWzZ6P3BJHcHgr8HggWF27j34i//weXh0jJ17DwIY/pIa51RPDXbtO/yL0B83dvIUu/YdbqgiSXqRwV+Do6NjCxqXpHYy+GuwvrdnQeOS1E4Gfw12bNlIz+pVk8Z6Vq9ix5aNDVUkSS+aM/gj4s8i4qx2FNMttm7u5+ZtF9Lf20MA/b093LztQk/sSuoI8+nqWQt8PyIeAG4F9mVm1lvWyrd1c79BL6kjzfmOPzP/ErgAuAV4N/BIRPxVRJxfc22S1IjBA8O8/uPf5rzr/5nXf/zbDB4YbrqkZTWvOf7qHf5T1eN54CzgaxHxyRprk6S2G78OZ3h0jOTF63C6KfznM8f/oYjYD3wS+Hfgwsz8U+C3gD+suT5JaqsSrsOZzxz/2cC2zHxi4mBmvhARV9ZTliQ1o4TrcOYzx3/j1NCfsOzQ8pckSc0p4Toc+/glaYISrsPxJm2SNMF4G3Y33123tuCPiFuBK4ETmfmaauwm4H3ASLXaDZl5dx3b97bIkhar26/DqXOq5wvA5dOMfyYzN1WP2kK/29uxJGmxagv+zLwPeLqunz+bEtqxJGmxmji5e21EPBQRt852D6CI2B4RQxExNDIyMtNq0yqhHUuSFqvdwf854HxgE3AM+NRMK2bm7swcyMyBvr6+BW2khHYsSVqstgZ/Zh7PzFOZ+QLweeDiOrZTQjuWJC1WW9s5I2JdZh6rvn0b8HAd2ymhHUuSFqvOds4vA5cBayLiSeBG4LKI2AQkcAR4f13b7/Z2LHU325FVp9qCPzOvnmb4lrq2J3WL8Xbk8c608XZkwPDXsvCWDVKHsR1ZdTP4pQ5jO7LqZvBLHcZ2ZNXN4Jc6jO3Iqpt355Q6jO3IqpvBL3Ug25FVJ4NfUsfxOoZ6GfySOorXMdTPk7uSOorXMdTP4JfUUbyOoX4Gv6SO4nUM9TP4JXUUr2Oonyd3JXUUr2Oon8HfpWyH00rmdQz1Mvi7kO1wkmbjHH8Xsh1O0mwM/i5kO5yk2Rj8Xch2OEmzMfi7kO1wkmbjyd0uZDucpNkY/F3KdrilsR12adx/nc3gl6awHXZp3H+dzzl+aQrbYZfG/df5DH5pCtthl8b91/kMfmkK22GXxv3X+Qx+aQrbYZfG/df5PLkrTWE77NK4/zpfZGbTNcxpYGAgh4aGmi5DklaUiNifmQNTx2t7xx8RtwJXAicy8zXV2NnA7cAG4Ajw9sx8pq4alqLpPuSmt79UK71+qZvVOcf/BeDyKWPXA/dm5gXAvdX3HWe8D3l4dIzkxT7kwQPDRWx/qVZ6/VK3qy34M/M+4Okpw1cBe6rne4CtdW1/KZruQ256+0u10uuXul27u3rWZuax6vlTwNqZVoyI7RExFBFDIyMj7amu0nQfctPbX6qVXr/U7Rpr58zWWeUZzyxn5u7MHMjMgb6+vjZW1nwfctPbX6qVXr/U7dod/McjYh1A9fVEm7c/L033ITe9/aVa6fVL3a7dffx3AdcAH6++3tnm7c9L033ITW9/qVZ6/VK3q62PPyK+DFwGrAGOAzcCg8BXgVcCT9Bq55x6Avgl7OOXpIVrex9/Zl49w6I31bVNSdLcvFePJBXG4Jekwhj8klQYg1+SCmPwS1JhvB+/upJ3B5VmZvCr64zfHXT8RnHjdwcFDH8Jp3rUhbw7qDQ7g19dx7uDSrMz+NV1vDuoNDuDX13Hu4NKs/PkrrqOdweVZmfwqytt3dxv0EszMPjVkezDl+pj8Kvj2Icv1cuTu+o49uFL9TL41XHsw5fqZfCr49iHL9XL4FfHsQ9fqpcnd9Vx7MOX6mXwqyPZh98s22m7m8EvaRLbabufc/ySJrGdtvsZ/JImsZ22+xn8kiaxnbb7GfySJrGdtvt5clfSJLbTdr9Ggj8ijgDPAaeA5zNzoIk6JE3Pdtru1uQ7/t/LzJ80uH1JKpJz/JJUmKaCP4FvRcT+iNg+3QoRsT0ihiJiaGRkpM3lSVL3air4L83Mi4C3AB+MiDdMXSEzd2fmQGYO9PX1tb9CSepSjQR/Zg5XX08AdwAXN1GHJJWo7cEfES+PiDPHnwNvBh5udx2SVKomunrWAndExPj2v5SZ/9JAHZJUpLYHf2Y+Dry23duV2snbGquTeeWutMy8rbE6nX380jLztsbqdAa/tMy8rbE6ncEvLTNva6xOZ/BLy8zbGqvTeXJXWmbe1lidzuCXatD0bY1tJ9VsDH6py9hOqrk4xy91GdtJNReDX+oytpNqLga/1GVsJ9VcDH6py9hOqrl4clfqMraTai4Gv9SFmm4nVWdzqkeSCmPwS1JhDH5JKozBL0mFMfglqTCRmU3XMKeIGAGeaLqOGawBftJ0EbOwvqWxvqWxvqVbSo2/lpl9UwdXRPB3sogYysyBpuuYifUtjfUtjfUtXR01OtUjSYUx+CWpMAb/0u1uuoA5WN/SWN/SWN/SLXuNzvFLUmF8xy9JhTH4JakwBv88RMS5EfGdiPhRRPwwIj40zTqXRcRPI+LB6vHRNtd4JCIOVtsemmZ5RMTfRMSjEfFQRFzUxto2TtgvD0bEsxFx3ZR12rr/IuLWiDgREQ9PGDs7Iu6JiEeqr2fN8NprqnUeiYhr2ljfroj4cfX7uyMiemd47azHQo313RQRwxN+h1fM8NrLI+JwdSxe38b6bp9Q25GIeHCG17Zj/02bKW07BjPTxxwPYB1wUfX8TOA/gVdPWecy4BsN1ngEWDPL8iuAbwIBXALc31Cdq4CnaF1Y0tj+A94AXAQ8PGHsk8D11fPrgU9M87qzgcerr2dVz89qU31vBk6rnn9iuvrmcyzUWN9NwJ/P4/f/GPAq4GXAD6b+W6qrvinLPwV8tMH9N22mtOsY9B3/PGTmscx8oHr+HHAIWGk3O78K+GK2fA/ojYh1DdTxJuCxzGz0SuzMvA94esrwVcCe6vkeYOs0L90C3JOZT2fmM8A9wOXtqC8zv5WZz1fffg84Z7m3O18z7L/5uBh4NDMfz8yfA1+htd+X1Wz1RUQAbwe+vNzbna9ZMqUtx6DBv0ARsQHYDNw/zeLXRcQPIuKbEfGb7a2MBL4VEfsjYvs0y/uB/5rw/ZM088frHcz8D67J/QewNjOPVc+fAtZOs06n7Mf30PoEN525joU6XVtNRd06wzRFJ+y/3wWOZ+YjMyxv6/6bkiltOQYN/gWIiDOArwPXZeazUxY/QGv64rXA3wKDbS7v0sy8CHgL8MGIeEObtz+niHgZ8Fbgn6ZZ3PT+myRbn6k7stc5Ij4CPA/cNsMqTR0LnwPOBzYBx2hNp3Siq5n93X7b9t9smVLnMWjwz1NErKb1C7otM/dOXZ6Zz2bmz6rndwOrI2JNu+rLzOHq6wngDlofqScaBs6d8P051Vg7vQV4IDOPT13Q9P6rHB+f/qq+nphmnUb3Y0S8G7gS+OMqGF5iHsdCLTLzeGaeyswXgM/PsN2m999pwDbg9pnWadf+myFT2nIMGvzzUM0J3gIcysxPz7DOr1brEREX09q3/92m+l4eEWeOP6d1EvDhKavdBbwrWi4BfjrhI2W7zPhOq8n9N8FdwHiHxDXAndOssw94c0ScVU1lvLkaq11EXA78BfDWzPyfGdaZz7FQV30Tzxm9bYbtfh+4ICLOqz4BvoPWfm+X3wd+nJlPTrewXftvlkxpzzFY55nrbnkAl9L6yPUQ8GD1uAL4APCBap1rgR/S6lL4HvA7bazvVdV2f1DV8JFqfGJ9AfwdrY6Kg8BAm/fhy2kF+a9MGGts/9H6A3QMOElrjvS9wCuAe4FHgH8Fzq7WHQD+YcJr3wM8Wj3+pI31PUprbnf8GPz7at31wN2zHQttqu8fq2PrIVoBtm5qfdX3V9DqYnmsnfVV418YP+YmrNvE/pspU9pyDHrLBkkqjFM9klQYg1+SCmPwS1JhDH5JKozBL0mFMfglqTAGvyQVxuCXFiEifru6Gdnp1dWeP4yI1zRdlzQfXsAlLVJEfAw4HegBnszMmxsuSZoXg19apOpeM98H/pfWLSZONVySNC9O9UiL9wrgDFr/g9LpDdcizZvv+KVFioi7aP0PUufRuiHZtQ2XJM3LaU0XIK1EEfEu4GRmfikiVgH/ERFvzMxvN12bNBff8UtSYZzjl6TCGPySVBiDX5IKY/BLUmEMfkkqjMEvSYUx+CWpMP8Pfv4FmII56ugAAAAASUVORK5CYII=\n",
195 | "text/plain": [
196 | ""
197 | ]
198 | },
199 | "metadata": {
200 | "needs_background": "light"
201 | },
202 | "output_type": "display_data"
203 | }
204 | ],
205 | "source": [
206 | "plt.scatter(data['Overs'],data['Scores'])\n",
207 | "plt.xlabel('x')\n",
208 | "plt.ylabel('y')\n",
209 | "plt.show()"
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": 4,
215 | "id": "confused-stuff",
216 | "metadata": {},
217 | "outputs": [
218 | {
219 | "data": {
220 | "text/html": [
221 | "\n",
222 | "\n",
235 | "
\n",
236 | " \n",
237 | " \n",
238 | " | \n",
239 | " Scores | \n",
240 | " Overs | \n",
241 | "
\n",
242 | " \n",
243 | " \n",
244 | " \n",
245 | " 0 | \n",
246 | " 15 | \n",
247 | " 1 | \n",
248 | "
\n",
249 | " \n",
250 | " 1 | \n",
251 | " 10 | \n",
252 | " 2 | \n",
253 | "
\n",
254 | " \n",
255 | " 2 | \n",
256 | " 17 | \n",
257 | " 3 | \n",
258 | "
\n",
259 | " \n",
260 | " 3 | \n",
261 | " 10 | \n",
262 | " 4 | \n",
263 | "
\n",
264 | " \n",
265 | " 4 | \n",
266 | " 12 | \n",
267 | " 5 | \n",
268 | "
\n",
269 | " \n",
270 | " 5 | \n",
271 | " 20 | \n",
272 | " 6 | \n",
273 | "
\n",
274 | " \n",
275 | " 6 | \n",
276 | " 10 | \n",
277 | " 7 | \n",
278 | "
\n",
279 | " \n",
280 | " 7 | \n",
281 | " 7 | \n",
282 | " 8 | \n",
283 | "
\n",
284 | " \n",
285 | " 8 | \n",
286 | " 8 | \n",
287 | " 9 | \n",
288 | "
\n",
289 | " \n",
290 | " 9 | \n",
291 | " 11 | \n",
292 | " 10 | \n",
293 | "
\n",
294 | " \n",
295 | " 10 | \n",
296 | " 4 | \n",
297 | " 11 | \n",
298 | "
\n",
299 | " \n",
300 | " 11 | \n",
301 | " 14 | \n",
302 | " 12 | \n",
303 | "
\n",
304 | " \n",
305 | " 12 | \n",
306 | " 3 | \n",
307 | " 13 | \n",
308 | "
\n",
309 | " \n",
310 | " 13 | \n",
311 | " 6 | \n",
312 | " 14 | \n",
313 | "
\n",
314 | " \n",
315 | " 14 | \n",
316 | " 11 | \n",
317 | " 15 | \n",
318 | "
\n",
319 | " \n",
320 | " 15 | \n",
321 | " 13 | \n",
322 | " 16 | \n",
323 | "
\n",
324 | " \n",
325 | " 16 | \n",
326 | " 20 | \n",
327 | " 17 | \n",
328 | "
\n",
329 | " \n",
330 | " 17 | \n",
331 | " 16 | \n",
332 | " 18 | \n",
333 | "
\n",
334 | " \n",
335 | " 18 | \n",
336 | " 26 | \n",
337 | " 19 | \n",
338 | "
\n",
339 | " \n",
340 | " 19 | \n",
341 | " 30 | \n",
342 | " 20 | \n",
343 | "
\n",
344 | " \n",
345 | "
\n",
346 | "
"
347 | ],
348 | "text/plain": [
349 | " Scores Overs\n",
350 | "0 15 1\n",
351 | "1 10 2\n",
352 | "2 17 3\n",
353 | "3 10 4\n",
354 | "4 12 5\n",
355 | "5 20 6\n",
356 | "6 10 7\n",
357 | "7 7 8\n",
358 | "8 8 9\n",
359 | "9 11 10\n",
360 | "10 4 11\n",
361 | "11 14 12\n",
362 | "12 3 13\n",
363 | "13 6 14\n",
364 | "14 11 15\n",
365 | "15 13 16\n",
366 | "16 20 17\n",
367 | "17 16 18\n",
368 | "18 26 19\n",
369 | "19 30 20"
370 | ]
371 | },
372 | "execution_count": 4,
373 | "metadata": {},
374 | "output_type": "execute_result"
375 | }
376 | ],
377 | "source": [
378 | "df = DataFrame(data,columns=['Scores','Overs'])\n",
379 | "df"
380 | ]
381 | },
382 | {
383 | "cell_type": "code",
384 | "execution_count": 5,
385 | "id": "dependent-gibson",
386 | "metadata": {},
387 | "outputs": [],
388 | "source": [
389 | "kmeans = KMeans(n_clusters=3).fit(df)"
390 | ]
391 | },
392 | {
393 | "cell_type": "code",
394 | "execution_count": 6,
395 | "id": "closing-jacksonville",
396 | "metadata": {},
397 | "outputs": [
398 | {
399 | "name": "stdout",
400 | "output_type": "stream",
401 | "text": [
402 | "[[13.42857143 4. ]\n",
403 | " [23. 18.5 ]\n",
404 | " [ 8.55555556 12. ]]\n"
405 | ]
406 | }
407 | ],
408 | "source": [
409 | "centroids = kmeans.cluster_centers_\n",
410 | "print(centroids)"
411 | ]
412 | },
413 | {
414 | "cell_type": "code",
415 | "execution_count": 7,
416 | "id": "handy-proportion",
417 | "metadata": {},
418 | "outputs": [
419 | {
420 | "data": {
421 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8+yak3AAAACXBIWXMAAAsTAAALEwEAmpwYAAAhoUlEQVR4nO3deZhcZZn38e9dW+9Ntg6EEAiQYIQoARtkiRJQAUEFHEkAF3QY4ygjMiLCoO/AMA7zzjCAvi5oGJCgAkFAQSYqDMsAKpqORJZkQoIEWULS2Xtfqu73j1OddNJVne6kq05Vn9/nuvrq6uepqnPnUPz69HOe8xxzd0REJDpiYRcgIiLFpeAXEYkYBb+ISMQo+EVEIkbBLyISMYmwCxiKCRMm+NSpU8MuQ0SkrCxdunSDuzfs2l4WwT916lSamprCLkNEpKyY2au52jXUIyISMQp+EZGIUfCLiESMgl9EJGLK4uSuiMhIaOnq4u4XnuPnK1fg7pw5/W18/J1HMqayKuzSiqpgwW9mlcCTQEV2O/e6+9VmdjBwNzAeWAp80t27C1WHiAjA5o4OPnL3j9nY0U5nby8Ar2zZzI+eW8YD532CfWtrQ66weAo51NMFnOLuRwKzgNPN7Djg34Cb3H0asBm4qIA1iIgAcP1vn2J9W+v20AfoSqfZ2NHOPz/5eIiVFV/Bgt8Drdkfk9kvB04B7s22LwTOLlQNIiJ9Hli5gp5MZkB72p1H/ryannQ6hKrCUdCTu2YWN7NlwHrgEeBlYIu79/3KfR2YnOe1882sycyampubC1mmiIxy7r7Tkf6uMu50K/hHhrun3X0WcABwLDBjGK9d4O6N7t7Y0DDgimMRkSEzM6aPG5+3f7/aOqqTySJWFK6iTOd09y3A48DxwBgz6zupfADwRjFqEJFou/yE91CZGDifpSqR4CvHn4iZhVBVOAoW/GbWYGZjso+rgA8AKwh+AXws+7QLgQcKVYOISJ/3HXIo15x0CrWp1Pav6mSSy094D2fNODzs8oqqkPP4JwELzSxO8AvmHnd/yMyWA3eb2TeAZ4FbC1iDiMh2c494B2fPOJxlb63F3Zm13yQqcvwVMNoV7F/s7s8BR+Vo/zPBeL+ISNGl4nGOnXxA2GWESks2iIhEjIJfRCRiFPwiIhGj4BcRiRgFv4hIxCj4RUQiRsEvIhIxCn4RkYhR8IuIRIyCX0QkYhT8IiIRo+AXEYkYBb+ISMQo+EVEIkbBLyISMQp+EZGIUfCLiESMgl9EJGIU/CIiERO9uwyLyKiwtqWFX65+ibaebo7Z/wDePfkAzCzsssqCgl9Eys7NS37P//vD7wDoTqepSiY5eMxYfnzOuexTWRlydaVPQz0iUlae+ssavrPkGbrSabrSaRxo7+nhpY0buOzhX4ZdXllQ8ItIWflB0x/o6O0d0N6TyfD0a6/S3N4WQlXlRcEvImXllS1b8vZVxOO8sW1b8YopUwp+ESkrB9TX5+3rTqeZVFtXxGrKk4JfRMrK/KOPoSqRHNCeiMVo3H8y+9bWhlBVeVHwi0hZed8hh/LJd86iMpEgEQsirCaZ5ID6em467cyQqysPms4pImXnytnvZe4RM3lw5f/S0t3FcZOncPLBh2z/RSCDK1jwm9kU4A5gX8CBBe7+LTO7Bvgs0Jx96lXuvrhQdYjI6HTI2HFcetwJYZdRlgp5xN8LXObufzSzOmCpmT2S7bvJ3f+jgNsWEZE8Chb87r4WWJt93GJmK4DJhdqeiIgMTVEGxMxsKnAU8Pts09+Z2XNmdpuZjc3zmvlm1mRmTc3NzbmeIiIie6DgwW9mtcB9wKXuvg24GTgUmEXwF8ENuV7n7gvcvdHdGxsaGgpdpohIZBQ0+M0sSRD6P3H3+wHcfZ27p909A9wCHFvIGkREZGcFC34L1ke9FVjh7jf2a5/U72nnAC8UqgYRERmokLN6TgQ+CTxvZsuybVcB55vZLIIpnmuAzxWwBhER2UUhZ/U8DeS6K4Lm7IuIhEiXuYmIRIyCX0QkYhT8IiIRo+AXEYkYBb+ISMQo+EVEIkbBLyISMQp+EZGIUfCLiESMgl9EJGIU/CIiEaPgFxGJGAW/iEjEKPhFRCJGwS8iEjEKfhGRiFHwi4hEjIJfRCRiFPwiIhGj4BcRiRgFv4hIxCj4RUQiRsEvIhIxCn4RkYhR8IuIRIyCX0QkYhJhFyDFs/4vzTxyx/+w4Y1NvO2Yacw570QqqyvCLktGia2dnfzsf5ezatNGDh4zlo++/XDGVVWHXZbkYO5emDc2mwLcAewLOLDA3b9lZuOARcBUYA0w1903D/ZejY2N3tTUVJA6o+KhHzzMzX9/O55xerp7qaypJFWZ4IYnrmXqEVPCLk/K3DOvv8bf/OJnZNzp7O2lMpHAgO+e8RHmTD047PIiy8yWunvjru2FHOrpBS5z98OB44CLzexw4ErgUXefDjya/VkKaM2Lr/H9Ly+ku7OHnu5eADrbOmnZ1MrXzryOQv3yl2ho6+7ms7/4Ge09PXT2Zj9fvb109PZy8eIH2dLZEXKFsquCBb+7r3X3P2YftwArgMnAWcDC7NMWAmcXqgYJPPjdX20P/P7coWVTK88/tSKEqmS0WLz6JfIdOjjwwP/q81VqinJy18ymAkcBvwf2dfe12a63CIaCcr1mvpk1mVlTc3NzMcoctV5/6U0y6Uze/nWvav/Knntj2zbae3py9nX29vLq1i3FLUh2q+DBb2a1wH3Ape6+rX+fB2MMOQ8W3H2Buze6e2NDQ0OhyxzVDn7nQSSS8Zx97s4Bh+1f5IpkNDl47FhqksmcfdWJJIeNn1DkimR3Chr8ZpYkCP2fuPv92eZ1ZjYp2z8JWF/IGgTOuvh04omBwR+LGRMPnMCMY6eFUJWMFqcfOp1kLPeBRSxmfOiwGUWuSHanYMFvZgbcCqxw9xv7dT0IXJh9fCHwQKFqkMD+h+7HFT+6hIqqFJU1FcRiRlVdJQ1TJnDd4q8R/KcS2TMViQR3nPMxxlRUUpNMEsOoSSapS1Vw+1l/RW0qFXaJsotCTuecDTwFPA/0DTBfRTDOfw9wIPAqwXTOTYO9l6Zzjoy2rW08ee8zbF63lUOPPIjG02cRj+c+UhMZrq7eXv77zy+zZusWDqiv57RDp1GZyD0EJMWRbzpnwYJ/JCn4RUSGL4x5/CIiUoK0ZIOISClqaYFFi2DVKpg+HebNg7q6EXlrBb+ISKl5+mk44wzIZKCtDWpq4MtfhsWLYfbsvX57DfWIiJSSlpYg9FtagtCH4Htfe2vrXm9CwS8iUkoWLQqO9HPJZIL+vaTgFxEpJatW7TjS31VbG6xevdebGFLwm9mhZlaRfTzHzC4xszF7vXUREdnZ9OnBmH4uNTUwbe+vtB/qEf99QNrMpgELgCnAnXu9dRER2dm8eRDLE82xWNC/l4Ya/Bl37wXOAb7t7pcDk/Z66yIisrO6umD2Tl3djiP/mpod7bW1e72JoU7n7DGz8wnW1vlwtk3XYouIFMLs2fDmm8GJ3NWrg+GdefNGJPRh6MH/GeBvgX9x91fM7GDgRyNSgYiIDFRbCxddVJC3HlLwu/tyM7uCYGE13P0V4N8KUpGIiBTUkILfzD4M/AeQAg42s1nAte7+kQLWVja6u3p4+v7fs+TXy6iuq+T9nziJGcdO03LHIlKShjrUcw1wLPAEgLsvM7NDClRTWdn01ma+dMLX2bphGx2tnVjM+PUPn+Ckc4/nK7d9QeEvIiVnqLN6etx96y5t+W/iGiH/fuF3aH59Ix2tnQB4xulq7+LJe3/HY3c+HXJ1IiIDDTX4XzSzC4C4mU03s28Dvy1gXWVh87otPPfkCtK96QF9nW1d3HvjL0KoSkRkcEMN/i8CRwBdBBdubQUuLVBNZWPTW1tIVuQfLdv45qA3FhMRCcVux/jNLA78l7ufDHyt8CWVj30PaqCnqzdv/5QZk4tYjYjI0Oz2iN/d00DGzPYpQj1lpXZMDSfNPZ5U5cBr2SqqKzj/Hz4aQlUiIoMb6qyeVuB5M3sE2L5snLtfUpCqysiXbp7Pxjc3s/x3K8mkM8QTcdK9GS68di6Npx4ZdnkiJc/dWbGhmea2NqaPH8/+dfVhlzTqDTX4789+yS4qqyv490f+kdXLXuH5J1dQUV3BCWc1MqZBfyCJ7M7KjRv4/EMPsL69jbgZ3ek0J045iG+efia1qVTY5Y1a5u5De6JZCjgs++NKd+8pWFW7aGxs9KampmJtTkSKYEtnB3MW3sq2rq6d2lPxOO+atD8/+ejckCobPcxsqbs37to+1PX45wCrgO8C3wNeMrP3jmSBIhIti158ge4cU6G702mefWstL23cEEJV0TDUoZ4bgFPdfSWAmR0G3AW8q1CFicjo9szrf6EznXtWXMyMP617i8PGTyhyVdEw1Hn8yb7QB3D3l9CyzCKyF8ZXVZNvQZOYGWMqKotaT5QMNfibzOw/s7ddnGNmtwAadBeRPTZv5juoTOQedHB33nvQ1OIWFCFDDf7PA8uBS7Jfy7NtIiJ7pHHSZM6ZcTjViR2DBzEzKhMJvnnamVTk+aUge29Is3rMrAbozF7M1Xc1b4W7txe4PmD4s3o2r9vCwqsX8fhdv6G7q4cZ757GRdd9nJknzihglSIyXO7OE6++wsI/Pcu61lbeue9+/M1RjUwfPz7s0kaFfLN6hhr8zwDvd/fW7M+1wMPufsIgr7kN+BCw3t1nZtuuAT4LNGefdpW7L97d9ocT/Ns2tjD/yMvY0ryNdM+OGQMVVSn+8d6vcOwHjxrS+4iIlLu9ms4JVPaFPkD2cfVuXnM7cHqO9pvcfVb2a7ehP1z33vgLtm1s2Sn0Abo6uvnm3/6AoV63ICIyWg01+NvM7Oi+H8ysEegY7AXu/iRQ9OUpH7vz6bwLp7VsauW1lW8WuSIRkdIy1LMnlwI/NbO+1JwEzNvDbf6dmX2KYFbQZe6+OdeTzGw+MB/gwAMPHPKb51obv997DtovIhIFgx7xm9kxZrafuy8BZgCLgB7gV8Are7C9m4FDgVnAWoILw3Jy9wXu3ujujQ0NDUPewPEfaSSejOfsS1YkOfDtWipZRKJtd0M9PwC6s4+PB64iWLZhM7BguBtz93Xunnb3DHALwX18R9R5V5xNZU3FgHvdVlSnmH/9J4nHc/9SEBGJit0Ff9zd+8bp5wEL3P0+d/8/wLThbszMJvX78RzgheG+x+5MPLCBb//uOo48+QgSyTiJVIKJB03gK7d+gdM+ffJIb04iynteIrP1n8ls/jyZ1h/gGd1tTcrH7sb442aWcPde4H1kx9yH8lozuwuYA0wws9eBq4E5ZjYLcGAN8Lk9K3twU942mev/+2o6Wjvo7uyhfnzdgL8ARPZUpnUBtH6HYNQzDV2/wdu+D2Nvx1K6B4OUvt0F/13A/5jZBoJZPE8BmNk0gvvu5uXu5+dovnVPitxTVbVVVNVWFXOTMsp5z/Js6Hf2a+0EB9/8OZj4G4LrG0VK16DB7+7/YmaPEsziedh3TIKPEdyAXSRSvP0n7Djttasu6H4GKk4sZkkiw7bb6Zzu/kyOtpcKU45IiUu/CWTydDpk1hezGpE9MtQLuEQEIPkOIN8tAR0S04tZjcgeUfCLDINVXwA5x/DjED8YS84sek0iw6XgFxkGi++Hjfk+WC1YDVAJVgWJadjYW8Iub8R472t458N49xKCy25kNNGC1yLDZBXHw8RnoOt/ILMREjMgeeSomDLsmTZ8y6XBSWpLAg5WDWO+g6W0su1ooeAX2QNmKaj8QNhljDjf8qUg9OkG78o2tuGbPwMTfoXF9wu1PhkZGuoREQC89y/Q/XtyTlf13uxUVhkNFPwiEuh9ESzfIEA3dC8pajlSOAp+EQnY2MH7YxOKU4cUnIJfRAKpY4CK3H1WhVXnWoVFypGCv4zpNpIykszi2NhvB9NTSfbrqILKD0Mq7y22pcxoVk+ZSafT3HvDL7jvpofYvG4rYybW81eXfohzv/IR4okyWxyspQUWLYJVq2D6dJg3D+rqwq4q0ix1DExYjLfdAd1/hPgErPrjkDpxVExXlYCVw1FjY2OjNzU1hV1GSfjGeTfxzENNdLXvmHlRUZ3imNOP4up7vxJiZcP09NNwxhmQyUBbG9TUQCwGixfD7NlhVycyKpjZUndv3LVdQz1l5OU/rRkQ+gBd7d0s+dUyVv3xzyFVNkwtLUHot7QEoQ/B97721tZw6xMZ5RT8ZeS3Dy6hp6s3Z19PZze/+fkfilzRHlq0KDjSzyWTCfpFpGAU/GXEMw55hubcy+hk76pVO470d9XWBqtXF7cekYhR8JeRd5/5LpKVuZcErqhOcdyHBgzllabp04Mx/VxqamDasG/nLCLDoOAvI29rPJRZJx9Bqmrn8E9VpZg5++3MOLZMAnPevOBEbi6xWNAvIgWj4C8z19x/OR/7+w9RXV9FPBGjuq6Kcy45g2sf+Gr5TLerqwtm79TV7Tjyr6nZ0V5bO+y39HQzmZZvk9n0GTJbrsC7nx3hosuPZ1rJtP2YzKaLyGz+It75uJZYFkDTOctWJpOhq72LiuoKYvmOnktda2twInf16mB4Z968PQv97mX45k+Dp4EuguOZCqi+gFj9FSNcdHnw9Jv4xnMh0wp0BI1WDclGbOz3sbxr8shokm86p/7rl6lYLEZVbVXYZeyd2lq46KK9egv3NL7lC+Dt/VozQAe034lXvh9LvWuvtlGOfMsVwb0C+t8f2Nuhewnefg9Wc0FotUn4yvRQUSSrewl4R57OzkguJezpDdDzLLlvCt8B7XcUuyQpMQp+KW+ZDYN0OqTfKlopJSOzOXv3rHz9m4pXi5QkBb+Ut+SM7Nh+LilIzSpmNaUhPnmQfUKwzyTSFPxS1iwxDZIz2Wk1ye2dcaz6E0WvKWwWq4bquUBljt5KrObiYpckJUYnd3fRsrmVZx99Hnc46n0zqR9X2NUiX13+GqufXcOYifXMOnlmSa6w2dXRxR//+3k6WjuZeeLbmHhgQ9gl7cTG3oxv/lvoeREsBhiQwMZ8G4vvH3Z5obC6K/DMZuh8GCwOxIK/Auq/jlW8O+zyJGQK/ix3545/+in3/PvPSSSD3dLb08vcr57Np64+d8TnyLdsbuWac65nZdNqYvEYhpGsSHDN/Zczc/bbR3Rbe+Pxu3/DjfO/TyxmuDu9PWlmn/NuLv/hF0imBhlHLiKL7YONvwvvWQm9yyE2DlInYIONc49yZklszA14+g3obgKrhNRsLJbnimmJlILN4zez24APAevdfWa2bRywCJgKrAHmuvvm3b1XMebx//K2R/nel35IZ1vXTu2V1RV84Vuf4YMXvW9Et3fZnKtZ/sxL9HbvvOhaZW0lP1zxTSZMHj+i29sTy595ia++/58GrAZaUZXiAxfO4Uvf+2xIlYnIUISxLPPtwOm7tF0JPOru04FHsz+XhB9fe++A0AfobO/iR9f+dES39ery11i5ZPWA0AdI96R58Hu/HtHt7ak7r7uf7o7uAe1dHd08fPvjtG1rz/EqESl1BQt+d38S2HXe2FnAwuzjhcDZhdr+cPT29NL8Wv5pgRve2ERPd8+Ibe/lZWuIJXLv+p6uHl787coR29beWLX0z/kWAyWRSvDm6ghOlRQZBYo9q2dfd1+bffwWsG++J5rZfDNrMrOm5ubmghYVT8RJ5Vn1EiBVkdw+7j8SxkzcByP3OQMzK4lhHoB9GvKf2O7t7mWfhvoiViMiIyW06ZwenFzIe4LB3Re4e6O7NzY0FHYWiZlx6qfnkEgNDPdkKsEHLjxpRE/uHnnyESQrc594TFWl+PDnTx2xbe2Nj15yJpU1FQPaLWYccuRUJk6ZEEJVIrK3ih3868xsEkD2+/oibz+vi667gMnT9tsp6CprKth/2n78zb9+fES3FY/Hueb+y6mqrSRZEfwCMDMqqiv4yBdOY+aJpXGBzQcuPIlZp7yDytod88ErqlPUj6vlH358SYiVicjeKOjqnGY2FXio36ye64GN7v5/zexKYJy7f3V371Os1Tm7u3p48qe/47E7n8LdOeWC93DS3BNIVRRmWuCGNzfx0M2/5oXfrGTC5HF8+POnccQJbyvItvZUJpNhyS+f5Ze3PUbblnbefebRnP7Xp1A7RtMCRUpdvlk9hZzOeRcwB5gArAOuBn4O3AMcCLxKMJ1ztwuHaFlmEZHhK/qyzO5+fp6ukZ0QLyIiw6K1ekREIkbBLyISMQp+EZGIUfCLiESMVueUkuKZLXj7PdD1BFg1Vv0xqPgAZqW3XLVIuVLwS8nw3r/gG8/N3jg9WDDPe5ogeReM/c9IL7MsMpI01CMlw7deDr6VvtAPGtuh+1m8fVFodYmMNgp+KQmeXhfcQYtMjt5OaP9xsUsSGbUU/FIaMlthsKEc31K0UkRGOwW/lIbEFPBcR/t9/UcUrxaRUU7BLyXBrAqqzwOqcvRWYrUXF7skkVFLs3qkYDy9AbqfBhxSJ2LxiYM+3+oux70FOh7MDvtY8Nr6b2Cpo4tRskgkKPhlxLk73nI9tN8Blv2IeS9e/XGs7sq8N7UxS2D7XIfXXgo9z4JVQeo4zPLfHU1Ehk/BLyPO2++B9p8A3eD9btbecTcen4LVfGLQ11t8IsRPK2yRIhGmMX4ZeW3fAzoGtnsHtN1MIW/+IyK7p+CXEeWehsza/E/IbAS68/eLSMEp+GWExcAGuy1jBaClF0TCpOCXEWVmUHUukOuEbAqqPoqZPnYiYdL/gTLirPZSSEwHq+7XWg2JQ7C6y8Iqq+x5pg3PtIZdhowCmtUjI85i1TD+p9D1KN7xEOBY1ZlQ8X6tsLkHvHsZvu1a6F0R/Jw4DKv/OpY6JuTKpFwp+KUgzBJQeRpWqWmZe8N7nsc3fQro3NHYuwLfdBGM+yGWeldotUn50lCPSAnzbf/GTqG/XSfe8q/FLkdGCQW/SIlyd+hpyv+Enhdw19RYGT4Fv0hJy728hcjeUPCLlCgzg9R7yBv+qWO0jpHsEQW/SAmz+iuy02L7h78FN6KvuyqssqTMKfhFSpglDsXG3wcV7ye4KC4JqTnYuHuw5NvDLk/KVCjTOc1sDdACpIFed28Mow6RcmCJQ7Cx3w27DBlFwpzHf7K7bwhx+yIikaShHhGRiAkr+B142MyWmtn8XE8ws/lm1mRmTc3NzUUuT0Rk9Aor+Ge7+9HAB4GLzey9uz7B3Re4e6O7NzY0NBS/QhGRUSqU4Hf3N7Lf1wM/A44Now4RkSgqevCbWY2Z1fU9Bk4FXih2HSIQ3DHMu57A2+7AOx/DvTfskkQKLoxZPfsCPzOzvu3f6e6/CqEOiTjvWYVv/jR4O3gPWBKogHG3YcnDi1tMSwssWgSrVsH06TBvHtTVFbcGiQwrhxtfNzY2elPTIItViQyTeze+/j3gmwd22j7YxCcxqypOMU8/DWecAZkMtLVBTQ3EYrB4McyeXZwaZFQys6W5rpPSdE6Jps6HyXvTd++Bzl8Wp46WliD0W1qC0Ifge197q+64JSNPwS+R5L2rwdvy9LbjPSuLU8iiRcGRfi6ZTNAvMsIU/BJJFt8P8g7lVEJ8/+IUsmrVjiP9XbW1werVxalDIkXBL9FUeUZwGWEeVvXh4tQxfXowpp9LTQ1Mm1acOiRSFPwSSRarx8Z8E6gkWPWS7PdK2Oc/sNi4gm3bM9vw9HrcM8HsnVie/w1jsaBfZITpZusSWVZ5MjQ8grffA+mXIX4IVj03GAYqAO99Gd/6j9CzDIhBrB6v/TKxxYvzz+qprS1ILRJtCn6JNIvvi9V9seDb8fRafONc8Fa2jzFlmmHbP5E56gpib74ZnMhdvToY3pk3T6EvBaPgFykCb70FvIOBJxY6ofVGfOJc7KKLwihNIkhj/CLF0PUYkG85iAz0ripmNRJxCn6RYrDB/rjOoD++pZgU/CLFUHkWO2YP7cLqITG9qOVItCn4RYrAai6EWAOQ3KWnEtvnG2QXLRQpCgW/SBFYrB6bcD9Unw82BqiE1HHYuIVYxUlhlycRo4FFkSKx2Fis/utQ//WwS5GI0xG/iEjEKPhFRCJGwS8iEjEKfhGRiFHwi4hETFncc9fMmoE2YEPYtZSgCWi/7Er7ZCDtk4GisE8OcveGXRvLIvgBzKwp102Do077ZSDtk4G0TwaK8j7RUI+ISMQo+EVEIqacgn9B2AWUKO2XgbRPBtI+GSiy+6RsxvhFRGRklNMRv4iIjAAFv4hIxJRF8JvZ6Wa20sxWm9mVYddTCsxsjZk9b2bLzKwp7HrCYGa3mdl6M3uhX9s4M3vEzFZlv48Ns8Yw5Nkv15jZG9nPyzIzOyPMGovJzKaY2eNmttzMXjSzL2XbI/tZKfngN7M48F3gg8DhwPlmdni4VZWMk919VlTnIgO3A6fv0nYl8Ki7Twcezf4cNbczcL8A3JT9vMxy98VFrilMvcBl7n44cBxwcTZDIvtZKfngB44FVrv7n929G7gbOCvkmqQEuPuTwKZdms8CFmYfLwTOLmZNpSDPfoksd1/r7n/MPm4BVgCTifBnpRyCfzLwWr+fX8+2RZ0DD5vZUjObH3YxJWRfd1+bffwWsG+YxZSYvzOz57JDQZEZ1ujPzKYCRwG/J8KflXIIfslttrsfTTAEdrGZvTfsgkqNB3OVNV85cDNwKDALWAvcEGo1ITCzWuA+4FJ339a/L2qflXII/jeAKf1+PiDbFmnu/kb2+3rgZwRDYgLrzGwSQPb7+pDrKQnuvs7d0+6eAW4hYp8XM0sShP5P3P3+bHNkPyvlEPxLgOlmdrCZpYDzgAdDrilUZlZjZnV9j4FTgRcGf1VkPAhcmH18IfBAiLWUjL6AyzqHCH1ezMyAW4EV7n5jv67IflbK4srd7NSzbwJx4DZ3/5dwKwqXmR1CcJQPkADujOI+MbO7gDkEy+uuA64Gfg7cAxwIvArMdfdInejMs1/mEAzzOLAG+Fy/8e1RzcxmA08BzwOZbPNVBOP8kfyslEXwi4jIyCmHoR4RERlBCn4RkYhR8IuIRIyCX0QkYhT8IiIRo+AXyTKzA8zsgexqjS+b2bey146IjCoKfhG2X+RzP/Dz7GqNhwG1wF5dH2FmiREoT2REKfhFAqcAne7+QwB3TwN/D/y1mf3BzI7oe6KZPWFmjdkrqG/L9j9rZmdl+z9tZg+a2WPAo2Y2ycyezK6D/4KZvSeMf6BIHwW/SOAIYGn/huxCXn8B/guYC9uXPpjk7k3A14DH3P1Y4GTg+uwSGgBHAx9z95OAC4Bfu/ss4EhgWcH/NSKDUPCL7N4TwMeyj+cC92YfnwpcaWbLss+pJLj8H+CRfpf/LwE+Y2bXAO/IrgkvEhoFv0hgOfCu/g1mVk8Q5EuAjWb2TmAesKjvKcBf9bur1YHuviLb19b3Ptkbo7yXYFXZ283sU4X9p4gMTsEvEngUqO4L5ewtP28Abnf3doKw/yqwj7s/l33Nr4EvZk8MY2ZH5XpjMzsIWOfutwD/STAMJBIaBb8I22/EcQ5wrpmtAl4COglWcYRgeOc8gtUc+/wzkASeM7MXsz/nMgf4k5k9S/AXw7dG/B8gMgxanVNEJGJ0xC8iEjEKfhGRiFHwi4hEjIJfRCRiFPwiIhGj4BcRiRgFv4hIxPx/GFp8h8IzitIAAAAASUVORK5CYII=\n",
422 | "text/plain": [
423 | ""
424 | ]
425 | },
426 | "metadata": {
427 | "needs_background": "light"
428 | },
429 | "output_type": "display_data"
430 | }
431 | ],
432 | "source": [
433 | "plt.scatter(df['Overs'], df['Scores'],c= kmeans.labels_.astype(float), s=50, alpha=1)\n",
434 | "plt.scatter(centroids[:, 0], centroids[:, 1], c='red', s=50)\n",
435 | "plt.xlabel('Overs')\n",
436 | "plt.ylabel('Scores')\n",
437 | "plt.show()"
438 | ]
439 | },
440 | {
441 | "cell_type": "code",
442 | "execution_count": null,
443 | "id": "attractive-national",
444 | "metadata": {},
445 | "outputs": [],
446 | "source": []
447 | }
448 | ],
449 | "metadata": {
450 | "kernelspec": {
451 | "display_name": "Python 3",
452 | "language": "python",
453 | "name": "python3"
454 | },
455 | "language_info": {
456 | "codemirror_mode": {
457 | "name": "ipython",
458 | "version": 3
459 | },
460 | "file_extension": ".py",
461 | "mimetype": "text/x-python",
462 | "name": "python",
463 | "nbconvert_exporter": "python",
464 | "pygments_lexer": "ipython3",
465 | "version": "3.8.8"
466 | }
467 | },
468 | "nbformat": 4,
469 | "nbformat_minor": 5
470 | }
471 |
--------------------------------------------------------------------------------
/Decision_Tree.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Decision Tree.ipynb",
7 | "provenance": [],
8 | "authorship_tag": "ABX9TyOROAalO6i5bSeT2NjVmuLB",
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | },
15 | "language_info": {
16 | "name": "python"
17 | }
18 | },
19 | "cells": [
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {
23 | "id": "view-in-github",
24 | "colab_type": "text"
25 | },
26 | "source": [
27 | "
"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 3,
33 | "metadata": {
34 | "colab": {
35 | "base_uri": "https://localhost:8080/"
36 | },
37 | "id": "ex889e5uNZHJ",
38 | "outputId": "e3215fb7-e1db-40af-83de-f76ed01bca7f"
39 | },
40 | "outputs": [
41 | {
42 | "output_type": "execute_result",
43 | "data": {
44 | "text/plain": [
45 | "0 203\n",
46 | "1 96\n",
47 | "Name: DEATH_EVENT, dtype: int64"
48 | ]
49 | },
50 | "metadata": {},
51 | "execution_count": 3
52 | }
53 | ],
54 | "source": [
55 | "# importing libraries \n",
56 | "import numpy as nm \n",
57 | "import matplotlib.pyplot as mtp \n",
58 | "import pandas as pd \n",
59 | " \n",
60 | "#importing datasets \n",
61 | "data_set= pd.read_csv('/content/heart_failure_clinical_records_dataset.csv') \n",
62 | " \n",
63 | "#Extracting Independent and dependent Variable \n",
64 | "data_set.DEATH_EVENT.value_counts()\n",
65 | " \n"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "source": [
71 | "#feature allocation\n",
72 | "X=data_set.drop([\"DEATH_EVENT\"],axis=1)\n",
73 | "y=data_set[\"DEATH_EVENT\"]"
74 | ],
75 | "metadata": {
76 | "id": "azj_jpjyO80e"
77 | },
78 | "execution_count": 5,
79 | "outputs": []
80 | },
81 | {
82 | "cell_type": "code",
83 | "source": [
84 | "# Splitting the dataset into training and test set. \n",
85 | "from sklearn.model_selection import train_test_split \n",
86 | "x_train, x_test, y_train, y_test= train_test_split(X, y, test_size= 0.25, random_state=0) "
87 | ],
88 | "metadata": {
89 | "id": "rxi68tNYO9OE"
90 | },
91 | "execution_count": 6,
92 | "outputs": []
93 | },
94 | {
95 | "cell_type": "code",
96 | "source": [
97 | ""
98 | ],
99 | "metadata": {
100 | "id": "DhHNpvlNP2sV"
101 | },
102 | "execution_count": 3,
103 | "outputs": []
104 | },
105 | {
106 | "cell_type": "code",
107 | "source": [
108 | "#Fitting Decision Tree classifier to the training set \n",
109 | "from sklearn.tree import DecisionTreeClassifier \n",
110 | "classifier= DecisionTreeClassifier(criterion='entropy', random_state=0) \n",
111 | "classifier.fit(x_train, y_train) "
112 | ],
113 | "metadata": {
114 | "colab": {
115 | "base_uri": "https://localhost:8080/"
116 | },
117 | "id": "wRd1hpbkPiK8",
118 | "outputId": "0bb6e6b3-717c-48f7-ff64-8f98c5905488"
119 | },
120 | "execution_count": 10,
121 | "outputs": [
122 | {
123 | "output_type": "execute_result",
124 | "data": {
125 | "text/plain": [
126 | "DecisionTreeClassifier(criterion='entropy', random_state=0)"
127 | ]
128 | },
129 | "metadata": {},
130 | "execution_count": 10
131 | }
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "source": [
137 | "y_pred= classifier.predict(x_test)"
138 | ],
139 | "metadata": {
140 | "id": "GuUiw3NgPoz0"
141 | },
142 | "execution_count": 11,
143 | "outputs": []
144 | },
145 | {
146 | "cell_type": "code",
147 | "source": [
148 | "print(y_pred)"
149 | ],
150 | "metadata": {
151 | "colab": {
152 | "base_uri": "https://localhost:8080/"
153 | },
154 | "id": "ndKQQVzFPwed",
155 | "outputId": "bffd3833-a348-46ff-818d-a5c5462f09d4"
156 | },
157 | "execution_count": 16,
158 | "outputs": [
159 | {
160 | "output_type": "stream",
161 | "name": "stdout",
162 | "text": [
163 | "[0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 1 0 0 0 0 1 1 0 0 1 0 1 0 0 0 0 1 1 0 0 1 0\n",
164 | " 1 0 1 0 1 0 0 1 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0 1\n",
165 | " 0]\n"
166 | ]
167 | }
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "source": [
173 | ""
174 | ],
175 | "metadata": {
176 | "id": "OM2JwcQ7Q9B7"
177 | },
178 | "execution_count": 9,
179 | "outputs": []
180 | },
181 | {
182 | "cell_type": "code",
183 | "source": [
184 | "from sklearn.metrics import confusion_matrix \n",
185 | "cm= confusion_matrix(y_test, y_pred) "
186 | ],
187 | "metadata": {
188 | "id": "YbVRNx-DPzAp"
189 | },
190 | "execution_count": 12,
191 | "outputs": []
192 | },
193 | {
194 | "cell_type": "code",
195 | "source": [
196 | "print(cm)"
197 | ],
198 | "metadata": {
199 | "colab": {
200 | "base_uri": "https://localhost:8080/"
201 | },
202 | "id": "sHFHRZimQJst",
203 | "outputId": "f8b62630-d901-439c-9245-eff9ae2a2271"
204 | },
205 | "execution_count": 13,
206 | "outputs": [
207 | {
208 | "output_type": "stream",
209 | "name": "stdout",
210 | "text": [
211 | "[[40 8]\n",
212 | " [ 6 21]]\n"
213 | ]
214 | }
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "source": [
220 | "from sklearn.metrics import accuracy_score\n",
221 | "accuracy_score(y_test, y_pred)"
222 | ],
223 | "metadata": {
224 | "colab": {
225 | "base_uri": "https://localhost:8080/"
226 | },
227 | "id": "FlvctoxSQLQO",
228 | "outputId": "3f730a8a-f038-4436-d044-011561a957e6"
229 | },
230 | "execution_count": 14,
231 | "outputs": [
232 | {
233 | "output_type": "execute_result",
234 | "data": {
235 | "text/plain": [
236 | "0.8133333333333334"
237 | ]
238 | },
239 | "metadata": {},
240 | "execution_count": 14
241 | }
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "source": [
247 | ""
248 | ],
249 | "metadata": {
250 | "id": "kHL4j9jAQxPv"
251 | },
252 | "execution_count": null,
253 | "outputs": []
254 | }
255 | ]
256 | }
--------------------------------------------------------------------------------
/Ensemble learning.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 80,
6 | "id": "proof-secretariat",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 81,
16 | "id": "frank-league",
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "data": {
21 | "text/html": [
22 | "\n",
23 | "\n",
36 | "
\n",
37 | " \n",
38 | " \n",
39 | " | \n",
40 | " Pregnancies | \n",
41 | " Glucose | \n",
42 | " BloodPressure | \n",
43 | " SkinThickness | \n",
44 | " Insulin | \n",
45 | " BMI | \n",
46 | " DiabetesPedigreeFunction | \n",
47 | " Age | \n",
48 | " Outcome | \n",
49 | "
\n",
50 | " \n",
51 | " \n",
52 | " \n",
53 | " 0 | \n",
54 | " 6 | \n",
55 | " 148 | \n",
56 | " 72 | \n",
57 | " 35 | \n",
58 | " 0 | \n",
59 | " 33.6 | \n",
60 | " 0.627 | \n",
61 | " 50 | \n",
62 | " 1 | \n",
63 | "
\n",
64 | " \n",
65 | " 1 | \n",
66 | " 1 | \n",
67 | " 85 | \n",
68 | " 66 | \n",
69 | " 29 | \n",
70 | " 0 | \n",
71 | " 26.6 | \n",
72 | " 0.351 | \n",
73 | " 31 | \n",
74 | " 0 | \n",
75 | "
\n",
76 | " \n",
77 | " 2 | \n",
78 | " 8 | \n",
79 | " 183 | \n",
80 | " 64 | \n",
81 | " 0 | \n",
82 | " 0 | \n",
83 | " 23.3 | \n",
84 | " 0.672 | \n",
85 | " 32 | \n",
86 | " 1 | \n",
87 | "
\n",
88 | " \n",
89 | " 3 | \n",
90 | " 1 | \n",
91 | " 89 | \n",
92 | " 66 | \n",
93 | " 23 | \n",
94 | " 94 | \n",
95 | " 28.1 | \n",
96 | " 0.167 | \n",
97 | " 21 | \n",
98 | " 0 | \n",
99 | "
\n",
100 | " \n",
101 | " 4 | \n",
102 | " 0 | \n",
103 | " 137 | \n",
104 | " 40 | \n",
105 | " 35 | \n",
106 | " 168 | \n",
107 | " 43.1 | \n",
108 | " 2.288 | \n",
109 | " 33 | \n",
110 | " 1 | \n",
111 | "
\n",
112 | " \n",
113 | " ... | \n",
114 | " ... | \n",
115 | " ... | \n",
116 | " ... | \n",
117 | " ... | \n",
118 | " ... | \n",
119 | " ... | \n",
120 | " ... | \n",
121 | " ... | \n",
122 | " ... | \n",
123 | "
\n",
124 | " \n",
125 | " 763 | \n",
126 | " 10 | \n",
127 | " 101 | \n",
128 | " 76 | \n",
129 | " 48 | \n",
130 | " 180 | \n",
131 | " 32.9 | \n",
132 | " 0.171 | \n",
133 | " 63 | \n",
134 | " 0 | \n",
135 | "
\n",
136 | " \n",
137 | " 764 | \n",
138 | " 2 | \n",
139 | " 122 | \n",
140 | " 70 | \n",
141 | " 27 | \n",
142 | " 0 | \n",
143 | " 36.8 | \n",
144 | " 0.340 | \n",
145 | " 27 | \n",
146 | " 0 | \n",
147 | "
\n",
148 | " \n",
149 | " 765 | \n",
150 | " 5 | \n",
151 | " 121 | \n",
152 | " 72 | \n",
153 | " 23 | \n",
154 | " 112 | \n",
155 | " 26.2 | \n",
156 | " 0.245 | \n",
157 | " 30 | \n",
158 | " 0 | \n",
159 | "
\n",
160 | " \n",
161 | " 766 | \n",
162 | " 1 | \n",
163 | " 126 | \n",
164 | " 60 | \n",
165 | " 0 | \n",
166 | " 0 | \n",
167 | " 30.1 | \n",
168 | " 0.349 | \n",
169 | " 47 | \n",
170 | " 1 | \n",
171 | "
\n",
172 | " \n",
173 | " 767 | \n",
174 | " 1 | \n",
175 | " 93 | \n",
176 | " 70 | \n",
177 | " 31 | \n",
178 | " 0 | \n",
179 | " 30.4 | \n",
180 | " 0.315 | \n",
181 | " 23 | \n",
182 | " 0 | \n",
183 | "
\n",
184 | " \n",
185 | "
\n",
186 | "
768 rows × 9 columns
\n",
187 | "
"
188 | ],
189 | "text/plain": [
190 | " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
191 | "0 6 148 72 35 0 33.6 \n",
192 | "1 1 85 66 29 0 26.6 \n",
193 | "2 8 183 64 0 0 23.3 \n",
194 | "3 1 89 66 23 94 28.1 \n",
195 | "4 0 137 40 35 168 43.1 \n",
196 | ".. ... ... ... ... ... ... \n",
197 | "763 10 101 76 48 180 32.9 \n",
198 | "764 2 122 70 27 0 36.8 \n",
199 | "765 5 121 72 23 112 26.2 \n",
200 | "766 1 126 60 0 0 30.1 \n",
201 | "767 1 93 70 31 0 30.4 \n",
202 | "\n",
203 | " DiabetesPedigreeFunction Age Outcome \n",
204 | "0 0.627 50 1 \n",
205 | "1 0.351 31 0 \n",
206 | "2 0.672 32 1 \n",
207 | "3 0.167 21 0 \n",
208 | "4 2.288 33 1 \n",
209 | ".. ... ... ... \n",
210 | "763 0.171 63 0 \n",
211 | "764 0.340 27 0 \n",
212 | "765 0.245 30 0 \n",
213 | "766 0.349 47 1 \n",
214 | "767 0.315 23 0 \n",
215 | "\n",
216 | "[768 rows x 9 columns]"
217 | ]
218 | },
219 | "execution_count": 81,
220 | "metadata": {},
221 | "output_type": "execute_result"
222 | }
223 | ],
224 | "source": [
225 | "df=pd.read_csv('diabetes.csv')\n",
226 | "df"
227 | ]
228 | },
229 | {
230 | "cell_type": "code",
231 | "execution_count": 82,
232 | "id": "electronic-oakland",
233 | "metadata": {},
234 | "outputs": [],
235 | "source": [
236 | "feature_cols = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age']\n",
237 | "X = df[feature_cols] #features\n",
238 | "y = df.Outcome #outcomes or target"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": 83,
244 | "id": "available-boards",
245 | "metadata": {},
246 | "outputs": [],
247 | "source": [
248 | "from sklearn.model_selection import train_test_split\n",
249 | "X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)"
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "execution_count": 84,
255 | "id": "proud-remainder",
256 | "metadata": {},
257 | "outputs": [],
258 | "source": [
259 | "from sklearn.naive_bayes import GaussianNB\n",
260 | "from sklearn.tree import DecisionTreeClassifier\n",
261 | "from sklearn.linear_model import LogisticRegression\n",
262 | "from sklearn.metrics import accuracy_score\n",
263 | "from sklearn.metrics import classification_report"
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": 85,
269 | "id": "burning-karma",
270 | "metadata": {},
271 | "outputs": [],
272 | "source": [
273 | "classifiers=[['Naive Bayes :', GaussianNB()],['LogisticRegression :', LogisticRegression(max_iter = 1000)], ['DecisionTree :',DecisionTreeClassifier()]]"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": 86,
279 | "id": "daily-conference",
280 | "metadata": {},
281 | "outputs": [],
282 | "source": [
283 | "predictions_df = pd.DataFrame()\n",
284 | "predictions_df['action'] = y_test"
285 | ]
286 | },
287 | {
288 | "cell_type": "code",
289 | "execution_count": 87,
290 | "id": "reasonable-ideal",
291 | "metadata": {},
292 | "outputs": [
293 | {
294 | "name": "stdout",
295 | "output_type": "stream",
296 | "text": [
297 | "Naive Bayes : 0.70995670995671\n",
298 | "LogisticRegression : 0.7445887445887446\n",
299 | "DecisionTree : 0.6103896103896104\n"
300 | ]
301 | }
302 | ],
303 | "source": [
304 | "for name,classifier in classifiers:\n",
305 | " classifier = classifier\n",
306 | " classifier.fit(X_train, y_train.ravel())\n",
307 | " predictions = classifier.predict(X_test)\n",
308 | " predictions_df[name.strip(\":\")] = predictions\n",
309 | " print(name, accuracy_score(y_test, predictions))"
310 | ]
311 | },
312 | {
313 | "cell_type": "code",
314 | "execution_count": 95,
315 | "id": "provincial-adventure",
316 | "metadata": {},
317 | "outputs": [
318 | {
319 | "data": {
320 | "text/html": [
321 | "\n",
322 | "\n",
335 | "
\n",
336 | " \n",
337 | " \n",
338 | " | \n",
339 | " action | \n",
340 | " Naive Bayes | \n",
341 | " LogisticRegression | \n",
342 | " DecisionTree | \n",
343 | "
\n",
344 | " \n",
345 | " \n",
346 | " \n",
347 | " 574 | \n",
348 | " 0 | \n",
349 | " 1 | \n",
350 | " 0 | \n",
351 | " 0 | \n",
352 | "
\n",
353 | " \n",
354 | " 539 | \n",
355 | " 1 | \n",
356 | " 1 | \n",
357 | " 1 | \n",
358 | " 1 | \n",
359 | "
\n",
360 | " \n",
361 | " 613 | \n",
362 | " 0 | \n",
363 | " 0 | \n",
364 | " 0 | \n",
365 | " 0 | \n",
366 | "
\n",
367 | " \n",
368 | " 612 | \n",
369 | " 1 | \n",
370 | " 1 | \n",
371 | " 1 | \n",
372 | " 1 | \n",
373 | "
\n",
374 | " \n",
375 | " 122 | \n",
376 | " 0 | \n",
377 | " 0 | \n",
378 | " 0 | \n",
379 | " 0 | \n",
380 | "
\n",
381 | " \n",
382 | "
\n",
383 | "
"
384 | ],
385 | "text/plain": [
386 | " action Naive Bayes LogisticRegression DecisionTree \n",
387 | "574 0 1 0 0\n",
388 | "539 1 1 1 1\n",
389 | "613 0 0 0 0\n",
390 | "612 1 1 1 1\n",
391 | "122 0 0 0 0"
392 | ]
393 | },
394 | "execution_count": 95,
395 | "metadata": {},
396 | "output_type": "execute_result"
397 | }
398 | ],
399 | "source": [
400 | "predictions_df.head()"
401 | ]
402 | },
403 | {
404 | "cell_type": "code",
405 | "execution_count": null,
406 | "id": "cutting-grade",
407 | "metadata": {},
408 | "outputs": [],
409 | "source": []
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": 96,
414 | "id": "defensive-secretariat",
415 | "metadata": {},
416 | "outputs": [
417 | {
418 | "name": "stdout",
419 | "output_type": "stream",
420 | "text": [
421 | "[0 1 0 1 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 1 1 0 0 1 0\n",
422 | " 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0\n",
423 | " 1 0 1 1 1 1 1 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 0 1 0 0 1 1\n",
424 | " 0 1 1 1 0 1 0 0 1 0 0 1 1 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 1 0 1\n",
425 | " 0 0 0 0 1 0 0 0 0 0 1 1 0 1 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0\n",
426 | " 1 0 0 0 0 0 0 1 0 0 1 1 0 1 1 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 1 1 0 1 0\n",
427 | " 0 1 0 1 0 0 0 0 1]\n"
428 | ]
429 | },
430 | {
431 | "name": "stderr",
432 | "output_type": "stream",
433 | "text": [
434 | "c:\\users\\admin\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
435 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
436 | "\n",
437 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
438 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
439 | "Please also refer to the documentation for alternative solver options:\n",
440 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
441 | " n_iter_i = _check_optimize_result(\n"
442 | ]
443 | }
444 | ],
445 | "source": [
446 | "from sklearn.ensemble import VotingClassifier\n",
447 | "clf1 = GaussianNB()\n",
448 | "clf2 = DecisionTreeClassifier()\n",
449 | "clf3 = LogisticRegression()\n",
450 | "\n",
451 | "\n",
452 | " \n",
453 | "eclf1 = VotingClassifier(estimators=[('Gaussian', clf1), ('DecisionTree', clf2), ('Logisitic', clf3)], voting='hard')\n",
454 | "eclf1.fit(X_train, y_train)\n",
455 | "predictions = eclf1.predict(X_test)\n",
456 | "print(predictions)"
457 | ]
458 | },
459 | {
460 | "cell_type": "code",
461 | "execution_count": null,
462 | "id": "advisory-synthesis",
463 | "metadata": {},
464 | "outputs": [],
465 | "source": []
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": null,
470 | "id": "stopped-bibliography",
471 | "metadata": {},
472 | "outputs": [],
473 | "source": []
474 | },
475 | {
476 | "cell_type": "code",
477 | "execution_count": null,
478 | "id": "subjective-mainstream",
479 | "metadata": {},
480 | "outputs": [],
481 | "source": []
482 | },
483 | {
484 | "cell_type": "code",
485 | "execution_count": 90,
486 | "id": "bound-pastor",
487 | "metadata": {},
488 | "outputs": [
489 | {
490 | "name": "stdout",
491 | "output_type": "stream",
492 | "text": [
493 | "Hard Voting Score 0\n"
494 | ]
495 | }
496 | ],
497 | "source": [
498 | "# Voting Classifier with hard voting\n",
499 | "vot_hard = VotingClassifier(estimators = classifiers, voting ='hard')\n",
500 | "vot_hard.fit(X_train, y_train)\n",
501 | "y_pred = vot_hard.predict(X_test)\n",
502 | " \n",
503 | "# using accuracy_score metric to predict accuracy\n",
504 | "score = accuracy_score(y_test, y_pred)\n",
505 | "print(\"Hard Voting Score % d\" % score)"
506 | ]
507 | },
508 | {
509 | "cell_type": "code",
510 | "execution_count": 91,
511 | "id": "separate-compiler",
512 | "metadata": {},
513 | "outputs": [
514 | {
515 | "name": "stdout",
516 | "output_type": "stream",
517 | "text": [
518 | "Soft Voting Score 0\n"
519 | ]
520 | }
521 | ],
522 | "source": [
523 | "# Voting Classifier with soft voting\n",
524 | "vot_soft = VotingClassifier(estimators = classifiers, voting ='soft')\n",
525 | "vot_soft.fit(X_train, y_train)\n",
526 | "y_pred = vot_soft.predict(X_test)\n",
527 | "\n",
528 | "score1 = accuracy_score(y_test, y_pred)\n",
529 | "print(\"Soft Voting Score % d\" % score1)"
530 | ]
531 | },
532 | {
533 | "cell_type": "code",
534 | "execution_count": 98,
535 | "id": "african-athletics",
536 | "metadata": {},
537 | "outputs": [
538 | {
539 | "name": "stderr",
540 | "output_type": "stream",
541 | "text": [
542 | "c:\\users\\admin\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
543 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
544 | "\n",
545 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
546 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
547 | "Please also refer to the documentation for alternative solver options:\n",
548 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
549 | " n_iter_i = _check_optimize_result(\n",
550 | "c:\\users\\admin\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
551 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
552 | "\n",
553 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
554 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
555 | "Please also refer to the documentation for alternative solver options:\n",
556 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
557 | " n_iter_i = _check_optimize_result(\n",
558 | "c:\\users\\admin\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
559 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
560 | "\n",
561 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
562 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
563 | "Please also refer to the documentation for alternative solver options:\n",
564 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
565 | " n_iter_i = _check_optimize_result(\n",
566 | "c:\\users\\admin\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
567 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
568 | "\n",
569 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
570 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
571 | "Please also refer to the documentation for alternative solver options:\n",
572 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
573 | " n_iter_i = _check_optimize_result(\n",
574 | "c:\\users\\admin\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
575 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
576 | "\n",
577 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
578 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
579 | "Please also refer to the documentation for alternative solver options:\n",
580 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
581 | " n_iter_i = _check_optimize_result(\n",
582 | "c:\\users\\admin\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
583 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
584 | "\n",
585 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
586 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
587 | "Please also refer to the documentation for alternative solver options:\n",
588 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
589 | " n_iter_i = _check_optimize_result(\n",
590 | "c:\\users\\admin\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
591 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
592 | "\n",
593 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
594 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
595 | "Please also refer to the documentation for alternative solver options:\n",
596 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
597 | " n_iter_i = _check_optimize_result(\n",
598 | "c:\\users\\admin\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
599 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
600 | "\n",
601 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
602 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
603 | "Please also refer to the documentation for alternative solver options:\n",
604 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
605 | " n_iter_i = _check_optimize_result(\n",
606 | "c:\\users\\admin\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
607 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
608 | "\n",
609 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
610 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
611 | "Please also refer to the documentation for alternative solver options:\n",
612 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
613 | " n_iter_i = _check_optimize_result(\n",
614 | "c:\\users\\admin\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
615 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
616 | "\n",
617 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
618 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
619 | "Please also refer to the documentation for alternative solver options:\n",
620 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
621 | " n_iter_i = _check_optimize_result(\n"
622 | ]
623 | },
624 | {
625 | "name": "stdout",
626 | "output_type": "stream",
627 | "text": [
628 | "[0.7597484276729559, 0.6983228511530398, 0.7729210342417889]\n"
629 | ]
630 | }
631 | ],
632 | "source": [
633 | "\n",
634 | "from sklearn.model_selection import cross_val_score\n",
635 | "c = []\n",
636 | "c.append(cross_val_score(clf1,X_train,y_train,scoring='accuracy',cv=10).mean())\n",
637 | "c.append(cross_val_score(clf2,X_train,y_train,scoring='accuracy',cv=10).mean())\n",
638 | "c.append(cross_val_score(clf3,X_train,y_train,scoring='accuracy',cv=10).mean())\n",
639 | "print(c)"
640 | ]
641 | },
642 | {
643 | "cell_type": "code",
644 | "execution_count": null,
645 | "id": "differential-energy",
646 | "metadata": {},
647 | "outputs": [],
648 | "source": []
649 | }
650 | ],
651 | "metadata": {
652 | "kernelspec": {
653 | "display_name": "Python 3",
654 | "language": "python",
655 | "name": "python3"
656 | },
657 | "language_info": {
658 | "codemirror_mode": {
659 | "name": "ipython",
660 | "version": 3
661 | },
662 | "file_extension": ".py",
663 | "mimetype": "text/x-python",
664 | "name": "python",
665 | "nbconvert_exporter": "python",
666 | "pygments_lexer": "ipython3",
667 | "version": "3.8.8"
668 | }
669 | },
670 | "nbformat": 4,
671 | "nbformat_minor": 5
672 | }
673 |
--------------------------------------------------------------------------------
/Hyper parameter tuning.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 69,
6 | "id": "97b98548",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd\n",
11 | "from sklearn.svm import SVC\n",
12 | "from sklearn.metrics import confusion_matrix, classification_report\n",
13 | "from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
14 | "import seaborn as sns"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 90,
20 | "id": "84729c58",
21 | "metadata": {},
22 | "outputs": [
23 | {
24 | "data": {
25 | "text/html": [
26 | "\n",
27 | "\n",
40 | "
\n",
41 | " \n",
42 | " \n",
43 | " | \n",
44 | " fixed acidity | \n",
45 | " volatile acidity | \n",
46 | " citric acid | \n",
47 | " residual sugar | \n",
48 | " chlorides | \n",
49 | " free sulfur dioxide | \n",
50 | " total sulfur dioxide | \n",
51 | " density | \n",
52 | " pH | \n",
53 | " sulphates | \n",
54 | " alcohol | \n",
55 | " quality | \n",
56 | "
\n",
57 | " \n",
58 | " \n",
59 | " \n",
60 | " 0 | \n",
61 | " 7.4 | \n",
62 | " 0.700 | \n",
63 | " 0.00 | \n",
64 | " 1.9 | \n",
65 | " 0.076 | \n",
66 | " 11.0 | \n",
67 | " 34.0 | \n",
68 | " 0.99780 | \n",
69 | " 3.51 | \n",
70 | " 0.56 | \n",
71 | " 9.4 | \n",
72 | " 5 | \n",
73 | "
\n",
74 | " \n",
75 | " 1 | \n",
76 | " 7.8 | \n",
77 | " 0.880 | \n",
78 | " 0.00 | \n",
79 | " 2.6 | \n",
80 | " 0.098 | \n",
81 | " 25.0 | \n",
82 | " 67.0 | \n",
83 | " 0.99680 | \n",
84 | " 3.20 | \n",
85 | " 0.68 | \n",
86 | " 9.8 | \n",
87 | " 5 | \n",
88 | "
\n",
89 | " \n",
90 | " 2 | \n",
91 | " 7.8 | \n",
92 | " 0.760 | \n",
93 | " 0.04 | \n",
94 | " 2.3 | \n",
95 | " 0.092 | \n",
96 | " 15.0 | \n",
97 | " 54.0 | \n",
98 | " 0.99700 | \n",
99 | " 3.26 | \n",
100 | " 0.65 | \n",
101 | " 9.8 | \n",
102 | " 5 | \n",
103 | "
\n",
104 | " \n",
105 | " 3 | \n",
106 | " 11.2 | \n",
107 | " 0.280 | \n",
108 | " 0.56 | \n",
109 | " 1.9 | \n",
110 | " 0.075 | \n",
111 | " 17.0 | \n",
112 | " 60.0 | \n",
113 | " 0.99800 | \n",
114 | " 3.16 | \n",
115 | " 0.58 | \n",
116 | " 9.8 | \n",
117 | " 6 | \n",
118 | "
\n",
119 | " \n",
120 | " 4 | \n",
121 | " 7.4 | \n",
122 | " 0.700 | \n",
123 | " 0.00 | \n",
124 | " 1.9 | \n",
125 | " 0.076 | \n",
126 | " 11.0 | \n",
127 | " 34.0 | \n",
128 | " 0.99780 | \n",
129 | " 3.51 | \n",
130 | " 0.56 | \n",
131 | " 9.4 | \n",
132 | " 5 | \n",
133 | "
\n",
134 | " \n",
135 | " ... | \n",
136 | " ... | \n",
137 | " ... | \n",
138 | " ... | \n",
139 | " ... | \n",
140 | " ... | \n",
141 | " ... | \n",
142 | " ... | \n",
143 | " ... | \n",
144 | " ... | \n",
145 | " ... | \n",
146 | " ... | \n",
147 | " ... | \n",
148 | "
\n",
149 | " \n",
150 | " 1594 | \n",
151 | " 6.2 | \n",
152 | " 0.600 | \n",
153 | " 0.08 | \n",
154 | " 2.0 | \n",
155 | " 0.090 | \n",
156 | " 32.0 | \n",
157 | " 44.0 | \n",
158 | " 0.99490 | \n",
159 | " 3.45 | \n",
160 | " 0.58 | \n",
161 | " 10.5 | \n",
162 | " 5 | \n",
163 | "
\n",
164 | " \n",
165 | " 1595 | \n",
166 | " 5.9 | \n",
167 | " 0.550 | \n",
168 | " 0.10 | \n",
169 | " 2.2 | \n",
170 | " 0.062 | \n",
171 | " 39.0 | \n",
172 | " 51.0 | \n",
173 | " 0.99512 | \n",
174 | " 3.52 | \n",
175 | " 0.76 | \n",
176 | " 11.2 | \n",
177 | " 6 | \n",
178 | "
\n",
179 | " \n",
180 | " 1596 | \n",
181 | " 6.3 | \n",
182 | " 0.510 | \n",
183 | " 0.13 | \n",
184 | " 2.3 | \n",
185 | " 0.076 | \n",
186 | " 29.0 | \n",
187 | " 40.0 | \n",
188 | " 0.99574 | \n",
189 | " 3.42 | \n",
190 | " 0.75 | \n",
191 | " 11.0 | \n",
192 | " 6 | \n",
193 | "
\n",
194 | " \n",
195 | " 1597 | \n",
196 | " 5.9 | \n",
197 | " 0.645 | \n",
198 | " 0.12 | \n",
199 | " 2.0 | \n",
200 | " 0.075 | \n",
201 | " 32.0 | \n",
202 | " 44.0 | \n",
203 | " 0.99547 | \n",
204 | " 3.57 | \n",
205 | " 0.71 | \n",
206 | " 10.2 | \n",
207 | " 5 | \n",
208 | "
\n",
209 | " \n",
210 | " 1598 | \n",
211 | " 6.0 | \n",
212 | " 0.310 | \n",
213 | " 0.47 | \n",
214 | " 3.6 | \n",
215 | " 0.067 | \n",
216 | " 18.0 | \n",
217 | " 42.0 | \n",
218 | " 0.99549 | \n",
219 | " 3.39 | \n",
220 | " 0.66 | \n",
221 | " 11.0 | \n",
222 | " 6 | \n",
223 | "
\n",
224 | " \n",
225 | "
\n",
226 | "
1599 rows × 12 columns
\n",
227 | "
"
228 | ],
229 | "text/plain": [
230 | " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
231 | "0 7.4 0.700 0.00 1.9 0.076 \n",
232 | "1 7.8 0.880 0.00 2.6 0.098 \n",
233 | "2 7.8 0.760 0.04 2.3 0.092 \n",
234 | "3 11.2 0.280 0.56 1.9 0.075 \n",
235 | "4 7.4 0.700 0.00 1.9 0.076 \n",
236 | "... ... ... ... ... ... \n",
237 | "1594 6.2 0.600 0.08 2.0 0.090 \n",
238 | "1595 5.9 0.550 0.10 2.2 0.062 \n",
239 | "1596 6.3 0.510 0.13 2.3 0.076 \n",
240 | "1597 5.9 0.645 0.12 2.0 0.075 \n",
241 | "1598 6.0 0.310 0.47 3.6 0.067 \n",
242 | "\n",
243 | " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
244 | "0 11.0 34.0 0.99780 3.51 0.56 \n",
245 | "1 25.0 67.0 0.99680 3.20 0.68 \n",
246 | "2 15.0 54.0 0.99700 3.26 0.65 \n",
247 | "3 17.0 60.0 0.99800 3.16 0.58 \n",
248 | "4 11.0 34.0 0.99780 3.51 0.56 \n",
249 | "... ... ... ... ... ... \n",
250 | "1594 32.0 44.0 0.99490 3.45 0.58 \n",
251 | "1595 39.0 51.0 0.99512 3.52 0.76 \n",
252 | "1596 29.0 40.0 0.99574 3.42 0.75 \n",
253 | "1597 32.0 44.0 0.99547 3.57 0.71 \n",
254 | "1598 18.0 42.0 0.99549 3.39 0.66 \n",
255 | "\n",
256 | " alcohol quality \n",
257 | "0 9.4 5 \n",
258 | "1 9.8 5 \n",
259 | "2 9.8 5 \n",
260 | "3 9.8 6 \n",
261 | "4 9.4 5 \n",
262 | "... ... ... \n",
263 | "1594 10.5 5 \n",
264 | "1595 11.2 6 \n",
265 | "1596 11.0 6 \n",
266 | "1597 10.2 5 \n",
267 | "1598 11.0 6 \n",
268 | "\n",
269 | "[1599 rows x 12 columns]"
270 | ]
271 | },
272 | "execution_count": 90,
273 | "metadata": {},
274 | "output_type": "execute_result"
275 | }
276 | ],
277 | "source": [
278 | "wine=pd.read_csv('winequality-red.csv')\n",
279 | "wine"
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": 73,
285 | "id": "9a898701",
286 | "metadata": {},
287 | "outputs": [
288 | {
289 | "data": {
290 | "text/plain": [
291 | "5 681\n",
292 | "6 638\n",
293 | "7 199\n",
294 | "4 53\n",
295 | "8 18\n",
296 | "3 10\n",
297 | "Name: quality, dtype: int64"
298 | ]
299 | },
300 | "execution_count": 73,
301 | "metadata": {},
302 | "output_type": "execute_result"
303 | }
304 | ],
305 | "source": [
306 | "wine['quality'].value_counts()"
307 | ]
308 | },
309 | {
310 | "cell_type": "code",
311 | "execution_count": 74,
312 | "id": "b9dec75b",
313 | "metadata": {},
314 | "outputs": [
315 | {
316 | "name": "stdout",
317 | "output_type": "stream",
318 | "text": [
319 | "\n",
320 | "RangeIndex: 1599 entries, 0 to 1598\n",
321 | "Data columns (total 12 columns):\n",
322 | " # Column Non-Null Count Dtype \n",
323 | "--- ------ -------------- ----- \n",
324 | " 0 fixed acidity 1599 non-null float64\n",
325 | " 1 volatile acidity 1599 non-null float64\n",
326 | " 2 citric acid 1599 non-null float64\n",
327 | " 3 residual sugar 1599 non-null float64\n",
328 | " 4 chlorides 1599 non-null float64\n",
329 | " 5 free sulfur dioxide 1599 non-null float64\n",
330 | " 6 total sulfur dioxide 1599 non-null float64\n",
331 | " 7 density 1599 non-null float64\n",
332 | " 8 pH 1599 non-null float64\n",
333 | " 9 sulphates 1599 non-null float64\n",
334 | " 10 alcohol 1599 non-null float64\n",
335 | " 11 quality 1599 non-null int64 \n",
336 | "dtypes: float64(11), int64(1)\n",
337 | "memory usage: 150.0 KB\n"
338 | ]
339 | }
340 | ],
341 | "source": [
342 | "wine.info()"
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": 92,
348 | "id": "687120f4",
349 | "metadata": {},
350 | "outputs": [],
351 | "source": [
352 | "#Making binary classificaion for the response variable.\n",
353 | "#Dividing wine as good and bad by giving the limit for the quality\n",
354 | "bins = (2, 6.5, 8)\n",
355 | "group_names = ['bad', 'good']\n",
356 | "wine['quality'] = pd.cut(wine['quality'], bins = bins, labels = group_names)"
357 | ]
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": null,
362 | "id": "3b0d693c",
363 | "metadata": {},
364 | "outputs": [],
365 | "source": []
366 | },
367 | {
368 | "cell_type": "code",
369 | "execution_count": 93,
370 | "id": "24b3198b",
371 | "metadata": {},
372 | "outputs": [],
373 | "source": [
374 | "#Now lets assign a labels to our quality variable\n",
375 | "label_quality = LabelEncoder()\n",
376 | "#Bad becomes 0 and good becomes 1 \n",
377 | "wine['quality'] = label_quality.fit_transform(wine['quality'])"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": 77,
383 | "id": "86f12402",
384 | "metadata": {},
385 | "outputs": [],
386 | "source": []
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": 78,
391 | "id": "d677b6b7",
392 | "metadata": {},
393 | "outputs": [
394 | {
395 | "data": {
396 | "text/plain": [
397 | "0 1382\n",
398 | "1 217\n",
399 | "Name: quality, dtype: int64"
400 | ]
401 | },
402 | "execution_count": 78,
403 | "metadata": {},
404 | "output_type": "execute_result"
405 | }
406 | ],
407 | "source": [
408 | "wine['quality'].value_counts()"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": 79,
414 | "id": "29aeab91",
415 | "metadata": {},
416 | "outputs": [
417 | {
418 | "name": "stderr",
419 | "output_type": "stream",
420 | "text": [
421 | "c:\\users\\arun\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\seaborn\\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n",
422 | " warnings.warn(\n"
423 | ]
424 | },
425 | {
426 | "data": {
427 | "text/plain": [
428 | ""
429 | ]
430 | },
431 | "execution_count": 79,
432 | "metadata": {},
433 | "output_type": "execute_result"
434 | },
435 | {
436 | "data": {
437 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEGCAYAAACUzrmNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAASA0lEQVR4nO3de7BdZ13/8feHhIIo0JYcKybREzWDU1EEzpSOjL/pUIW2KqkOdNpRCCUz8VK8oP74FZ2xDoqjY7UWL3UiDW0c7EUEG7WKnSLWC62clluhImdKIcm05EBCuXQqRr+/P/YT2aTn5DknnL33Sc/7NbMna32fZ6/9ZSbk02ettddOVSFJ0vE8YdINSJJWP8NCktRlWEiSugwLSVKXYSFJ6lo/6QZGYcOGDTU9PT3pNiTppHL33Xd/uqqmFhp7XIbF9PQ0s7Ozk25Dkk4qST6x2JinoSRJXYaFJKnLsJAkdY0sLJLsTnIwyb0LjP1Ckkqyoe0nyZuSzCX5YJLnDc3dnuRj7bV9VP1KkhY3ypXFdcB5xxaTbAZeDHxyqHw+sLW9dgLXtLmnA1cALwDOAq5IctoIe5YkLWBkYVFVdwCHFhi6CngdMPwEw23Anhq4Ezg1yTOBlwC3VdWhqjoM3MYCASRJGq2xXrNIsg04UFUfOGZoI7BvaH9/qy1WX+jYO5PMJpmdn59fwa4lSWMLiyRPAX4J+JVRHL+qdlXVTFXNTE0t+J0SSdIJGufK4luBLcAHkjwAbALuSfINwAFg89DcTa22WF2SNEZj+wZ3VX0I+Pqj+y0wZqrq00n2Aq9JciODi9kPV9WDSd4J/MbQRe0XA68fR7/P/797xvExOsnc/duvnHQL0kSM8tbZG4D3AM9Ksj/JjuNMvxW4H5gD/gT4KYCqOgT8GvDe9npDq0mSxmhkK4uquqQzPj20XcBli8zbDexe0eYkScviN7glSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1DWysEiyO8nBJPcO1X47yb8n+WCSdyQ5dWjs9Unmknw0yUuG6ue12lySy0fVryRpcaNcWVwHnHdM7Tbg2VX1XcB/AK8HSHImcDHwHe09f5RkXZJ1wB8C5wNnApe0uZKkMRpZWFTVHcChY2p/X1VH2u6dwKa2vQ24sar+s6o+DswBZ7XXXFXdX1VfAm5scyVJYzTJaxavBv62bW8E9g2N7W+1xeqPkWRnktkks/Pz8yNoV5LWromERZJfBo4Ab12pY1bVrqqaqaqZqamplTqsJAlYP+4PTPIq4AeBc6uqWvkAsHlo2qZW4zh1SdKYjHVlkeQ84HXAS6vqkaGhvcDFSZ6UZAuwFfg34L3A1iRbkpzC4CL43nH2LEka4coiyQ3AOcCGJPuBKxjc/fQk4LYkAHdW1U9U1YeT3Ax8hMHpqcuq6r/bcV4DvBNYB+yuqg+PqmdJ0sJGFhZVdckC5WuPM/+NwBsXqN8K3LqCrUmSlslvcEuSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpa2RhkWR3koNJ7h2qnZ7ktiQfa3+e1upJ8qYkc0k+mOR5Q+/Z3uZ/LMn2UfUrSVrcKFcW1wHnHVO7HLi9qrYCt7d9gPOBre21E7gGBuECXAG8ADgLuOJowEiSxmdkYVFVdwCHjilvA65v29cDFw7V99TAncCpSZ4JvAS4raoOVdVh4DYeG0CSpBEb9zWLM6rqwbb9EHBG294I7Buat7/VFqs/RpKdSWaTzM7Pz69s15K0xk3sAndVFVAreLxdVTVTVTNTU1MrdVhJEuMPi0+100u0Pw+2+gFg89C8Ta22WF2SNEbjDou9wNE7mrYDtwzVX9nuijobeLidrnon8OIkp7UL2y9uNUnSGK0f1YGT3ACcA2xIsp/BXU2/CdycZAfwCeCiNv1W4AJgDngEuBSgqg4l+TXgvW3eG6rq2IvmkqQRG1lYVNUliwydu8DcAi5b5Di7gd0r2JokaZn8BrckqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSuiYSFklem+TDSe5NckOSJyfZkuSuJHNJbkpySpv7pLY/18anJ9GzJK1lYw+LJBuBnwFmqurZwDrgYuC3gKuq6tuAw8CO9pYdwOFWv6rNkySN0aROQ60HvibJeuApwIPAi4C3tfHrgQvb9ra2Txs/N0nG16okaUlhkeT2pdSWoqoOAFcCn2QQEg8DdwOfraojbdp+YGPb3gjsa+890uY/Y4F+diaZTTI7Pz9/Iq1JkhZx3LBo1xJOBzYkOS3J6e01zZf/MV+WJKcxWC1sAb4R+FrgvBM51rCq2lVVM1U1MzU19dUeTpI0ZH1n/MeBn2Pwj/rdwNHTP58D/uAEP/P7gI9X1TxAkrcDLwROTbK+rR42AQfa/APAZmB/O231dOAzJ/jZkqQTcNyVRVVdXVVbgF+sqm+pqi3t9ZyqOtGw+CRwdpKntGsP5wIfAf4BeFmbsx24pW3vbfu08XdVVZ3gZ0uSTkBvZQFAVf1+ku8BpoffU1V7lvuBVXVXkrcB9wBHgPcBu4C/AW5M8uutdm17y7XAnyaZAw4xuHNKkjRGSwqLJH8KfCvwfuC/W7mAZYcFQFVdAVxxTPl+4KwF5j4KvPxEPkeStDKWFBbADHCmp38kaW1a6vcs7gW+YZSNSJJWr6WuLDYAH0nyb8B/Hi1W1UtH0pUkaVVZalj86iibkCStbku9G+ofR92IJGn1WurdUJ9ncPcTwCnAE4EvVtXTRtWYJGn1WOrK4qlHt9sX6bYBZ4+qKUnS6rLsp87WwF8CL1n5diRJq9FST0P9yNDuExh87+LRkXQkSVp1lno31A8NbR8BHmBwKkqStAYs9ZrFpaNuRJK0ei31x482JXlHkoPt9RdJNo26OUnS6rDUC9xvYfCo8G9sr79qNUnSGrDUsJiqqrdU1ZH2ug7w5+gkaY1Yalh8JsmPJVnXXj+Gv1YnSWvGUsPi1cBFwEPAgwx+se5VI+pJkrTKLPXW2TcA26vqMECS04ErGYSIJOlxbqkri+86GhQAVXUIeO5oWpIkrTZLDYsnJDnt6E5bWSx1VSJJOskt9R/83wHek+TP2/7LgTeOpiVJ0mqz1G9w70kyC7yolX6kqj4yurYkSavJkk8ltXBYkYBIcirwZuDZDH4n49XAR4GbgGkGz566qKoOt0eiXw1cADwCvKqq7lmJPiRJS7PsR5SvkKuBv6uqbweeA9wHXA7cXlVbgdvbPsD5wNb22glcM/52JWltG3tYJHk68H+AawGq6ktV9VkGT7G9vk27HriwbW8D9rTf0bgTODXJM8fatCStcZNYWWwB5oG3JHlfkjcn+VrgjKp6sM15CDijbW8E9g29f3+rfYUkO5PMJpmdn58fYfuStPZMIizWA88Drqmq5wJf5MunnIDBr/Hx5d/8XpKq2lVVM1U1MzXlY6skaSVNIiz2A/ur6q62/zYG4fGpo6eX2p8H2/gBYPPQ+ze1miRpTMYeFlX1ELAvybNa6VwGd1ntBba32nbglra9F3hlBs4GHh46XSVJGoNJfQv7p4G3JjkFuB+4lEFw3ZxkB/AJBg8uBLiVwW2zcwxunfVX+yRpzCYSFlX1fmBmgaFzF5hbwGWj7kmStLhJfc9CknQSMSwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVKXYSFJ6jIsJEldhoUkqcuwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqSuiYVFknVJ3pfkr9v+liR3JZlLclOSU1r9SW1/ro1PT6pnSVqrJrmy+FngvqH93wKuqqpvAw4DO1p9B3C41a9q8yRJYzSRsEiyCfgB4M1tP8CLgLe1KdcDF7btbW2fNn5umy9JGpNJrSx+D3gd8D9t/xnAZ6vqSNvfD2xs2xuBfQBt/OE2/ysk2ZlkNsns/Pz8CFuXpLVn7GGR5AeBg1V190oet6p2VdVMVc1MTU2t5KElac1bP4HPfCHw0iQXAE8GngZcDZyaZH1bPWwCDrT5B4DNwP4k64GnA58Zf9uStHaNfWVRVa+vqk1VNQ1cDLyrqn4U+AfgZW3aduCWtr237dPG31VVNcaWJWnNW03fs/h/wM8nmWNwTeLaVr8WeEar/zxw+YT6k6Q1axKnof5XVb0beHfbvh84a4E5jwIvH2tjkqSvsJpWFpKkVcqwkCR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSutaP+wOTbAb2AGcABeyqqquTnA7cBEwDDwAXVdXhJAGuBi4AHgFeVVX3jLtvaTX55Bu+c9ItaBX6pl/50MiOPYmVxRHgF6rqTOBs4LIkZwKXA7dX1Vbg9rYPcD6wtb12AteMv2VJWtvGHhZV9eDRlUFVfR64D9gIbAOub9OuBy5s29uAPTVwJ3BqkmeOt2tJWtsmes0iyTTwXOAu4IyqerANPcTgNBUMgmTf0Nv2t9qxx9qZZDbJ7Pz8/OialqQ1aGJhkeTrgL8Afq6qPjc8VlXF4HrGklXVrqqaqaqZqampFexUkjSRsEjyRAZB8daqensrf+ro6aX258FWPwBsHnr7plaTJI3J2MOi3d10LXBfVf3u0NBeYHvb3g7cMlR/ZQbOBh4eOl0lSRqDsd86C7wQeAXwoSTvb7VfAn4TuDnJDuATwEVt7FYGt83OMbh19tKxditJGn9YVNU/A1lk+NwF5hdw2UibkiQdl9/gliR1GRaSpC7DQpLUZVhIkroMC0lSl2EhSeoyLCRJXYaFJKnLsJAkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIUnqMiwkSV2GhSSpy7CQJHUZFpKkLsNCktRlWEiSugwLSVLXSRMWSc5L8tEkc0kun3Q/krSWnBRhkWQd8IfA+cCZwCVJzpxsV5K0dpwUYQGcBcxV1f1V9SXgRmDbhHuSpDVj/aQbWKKNwL6h/f3AC4YnJNkJ7Gy7X0jy0TH1thZsAD496SZWg1y5fdIt6LH8+3nUFflqj/DNiw2cLGHRVVW7gF2T7uPxKMlsVc1Mug9pIf79HI+T5TTUAWDz0P6mVpMkjcHJEhbvBbYm2ZLkFOBiYO+Ee5KkNeOkOA1VVUeSvAZ4J7AO2F1VH55wW2uJp/e0mvn3cwxSVZPuQZK0yp0sp6EkSRNkWEiSugwLHZePWdFqlGR3koNJ7p10L2uFYaFF+ZgVrWLXAedNuom1xLDQ8fiYFa1KVXUHcGjSfawlhoWOZ6HHrGycUC+SJsiwkCR1GRY6Hh+zIgkwLHR8PmZFEmBY6Diq6ghw9DEr9wE3+5gVrQZJbgDeAzwryf4kOybd0+Odj/uQJHW5spAkdRkWkqQuw0KS1GVYSJK6DAtJUpdhIU1AkumjT0xNMpPkTW37nCTfM9nupMc6KX5WVXo8q6pZYLbtngN8AfjXiTUkLcCVhbRMSX45yX8k+eckNyT5xSTvTjLTxjckeaBtTyf5pyT3tNdjVg1tNfHXSaaBnwBem+T9Sb43yceTPLHNe9rwvjROriykZUjyfAaPPfluBv//uQe4+zhvOQh8f1U9mmQrcAMws9DEqnogyR8DX6iqK9vnvRv4AeAv2+e+var+a0X+x0jL4MpCWp7vBd5RVY9U1efoPyvricCfJPkQ8OcMfkRqOd4MXNq2LwXessz3SyvClYW0Mo7w5f/4evJQ/bXAp4DntPFHl3PQqvqXdirrHGBdVfkzopoIVxbS8twBXJjka5I8FfihVn8AeH7bftnQ/KcDD1bV/wCvANZ1jv954KnH1PYAf4arCk2QYSEtQ1XdA9wEfAD4WwaPcQe4EvjJJO8DNgy95Y+A7Uk+AHw78MXOR/wV8MNHL3C32luB0xhc75AmwqfOSl+FJL/K0AXpEX3Gy4BtVfWKUX2G1OM1C2kVS/L7wPnABZPuRWubKwtJUpfXLCRJXYaFJKnLsJAkdRkWkqQuw0KS1PX/ASoTvN+6kVUJAAAAAElFTkSuQmCC\n",
438 | "text/plain": [
439 | ""
440 | ]
441 | },
442 | "metadata": {
443 | "needs_background": "light"
444 | },
445 | "output_type": "display_data"
446 | }
447 | ],
448 | "source": [
449 | "sns.countplot(wine['quality'])"
450 | ]
451 | },
452 | {
453 | "cell_type": "code",
454 | "execution_count": 80,
455 | "id": "09cc0e42",
456 | "metadata": {},
457 | "outputs": [],
458 | "source": [
459 | "#Now seperate the dataset as response variable and feature variabes\n",
460 | "X = wine.drop('quality', axis = 1)\n",
461 | "y = wine['quality']"
462 | ]
463 | },
464 | {
465 | "cell_type": "code",
466 | "execution_count": 81,
467 | "id": "065ef448",
468 | "metadata": {},
469 | "outputs": [],
470 | "source": [
471 | "#Train and Test splitting of data \n",
472 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)"
473 | ]
474 | },
475 | {
476 | "cell_type": "code",
477 | "execution_count": 82,
478 | "id": "45f610f3",
479 | "metadata": {},
480 | "outputs": [],
481 | "source": [
482 | "#Applying Standard scaling to get optimized result\n",
483 | "sc = StandardScaler()"
484 | ]
485 | },
486 | {
487 | "cell_type": "code",
488 | "execution_count": 83,
489 | "id": "bac4bfee",
490 | "metadata": {},
491 | "outputs": [],
492 | "source": [
493 | "X_train = sc.fit_transform(X_train)\n",
494 | "X_test = sc.fit_transform(X_test)"
495 | ]
496 | },
497 | {
498 | "cell_type": "code",
499 | "execution_count": 95,
500 | "id": "9809c6c7",
501 | "metadata": {},
502 | "outputs": [
503 | {
504 | "data": {
505 | "text/plain": [
506 | "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n",
507 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
508 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
509 | " 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,\n",
510 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
511 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
512 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
513 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n",
514 | " 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
515 | " 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
516 | " 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
517 | " 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n",
518 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n",
519 | " 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
520 | " 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])"
521 | ]
522 | },
523 | "execution_count": 95,
524 | "metadata": {},
525 | "output_type": "execute_result"
526 | }
527 | ],
528 | "source": [
529 | "svc = SVC()\n",
530 | "svc.fit(X_train, y_train)\n",
531 | "pred_svc = svc.predict(X_test)\n",
532 | "pred_svc"
533 | ]
534 | },
535 | {
536 | "cell_type": "code",
537 | "execution_count": 85,
538 | "id": "89aed9c5",
539 | "metadata": {},
540 | "outputs": [
541 | {
542 | "name": "stdout",
543 | "output_type": "stream",
544 | "text": [
545 | " precision recall f1-score support\n",
546 | "\n",
547 | " 0 0.88 0.98 0.93 273\n",
548 | " 1 0.71 0.26 0.37 47\n",
549 | "\n",
550 | " accuracy 0.88 320\n",
551 | " macro avg 0.80 0.62 0.65 320\n",
552 | "weighted avg 0.86 0.88 0.85 320\n",
553 | "\n"
554 | ]
555 | }
556 | ],
557 | "source": [
558 | "print(classification_report(y_test, pred_svc))"
559 | ]
560 | },
561 | {
562 | "cell_type": "code",
563 | "execution_count": 86,
564 | "id": "c4c95e6e",
565 | "metadata": {},
566 | "outputs": [],
567 | "source": [
568 | "#Finding best parameters for our SVC model\n",
569 | "param = {\n",
570 | " 'C': [0.1,0.8,0.9,1,1.1,1.2,1.3,1.4],\n",
571 | " 'kernel':['linear', 'rbf'],\n",
572 | " 'gamma' :[0.1,0.8,0.9,1,1.1,1.2,1.3,1.4]\n",
573 | "}\n",
574 | "grid_svc = GridSearchCV(svc, param_grid=param, scoring='accuracy', cv=10)"
575 | ]
576 | },
577 | {
578 | "cell_type": "code",
579 | "execution_count": 87,
580 | "id": "3a0d4b32",
581 | "metadata": {},
582 | "outputs": [
583 | {
584 | "data": {
585 | "text/plain": [
586 | "GridSearchCV(cv=10, estimator=SVC(),\n",
587 | " param_grid={'C': [0.1, 0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4],\n",
588 | " 'gamma': [0.1, 0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4],\n",
589 | " 'kernel': ['linear', 'rbf']},\n",
590 | " scoring='accuracy')"
591 | ]
592 | },
593 | "execution_count": 87,
594 | "metadata": {},
595 | "output_type": "execute_result"
596 | }
597 | ],
598 | "source": [
599 | "grid_svc.fit(X_train, y_train)"
600 | ]
601 | },
602 | {
603 | "cell_type": "code",
604 | "execution_count": 88,
605 | "id": "4cd8c8d7",
606 | "metadata": {},
607 | "outputs": [
608 | {
609 | "data": {
610 | "text/plain": [
611 | "{'C': 1.2, 'gamma': 0.9, 'kernel': 'rbf'}"
612 | ]
613 | },
614 | "execution_count": 88,
615 | "metadata": {},
616 | "output_type": "execute_result"
617 | }
618 | ],
619 | "source": [
620 | "#Best parameters for our svc model\n",
621 | "grid_svc.best_params_"
622 | ]
623 | },
624 | {
625 | "cell_type": "code",
626 | "execution_count": 89,
627 | "id": "932f2eef",
628 | "metadata": {},
629 | "outputs": [
630 | {
631 | "name": "stdout",
632 | "output_type": "stream",
633 | "text": [
634 | " precision recall f1-score support\n",
635 | "\n",
636 | " 0 0.90 0.99 0.94 273\n",
637 | " 1 0.89 0.34 0.49 47\n",
638 | "\n",
639 | " accuracy 0.90 320\n",
640 | " macro avg 0.89 0.67 0.72 320\n",
641 | "weighted avg 0.90 0.90 0.88 320\n",
642 | "\n"
643 | ]
644 | }
645 | ],
646 | "source": [
647 | "#Let's run our SVC again with the best parameters.\n",
648 | "svc2 = SVC(C = 1.2, gamma = 0.9, kernel= 'rbf')\n",
649 | "svc2.fit(X_train, y_train)\n",
650 | "pred_svc2 = svc2.predict(X_test)\n",
651 | "print(classification_report(y_test, pred_svc2))"
652 | ]
653 | },
654 | {
655 | "cell_type": "code",
656 | "execution_count": null,
657 | "id": "cda0622f",
658 | "metadata": {},
659 | "outputs": [],
660 | "source": []
661 | },
662 | {
663 | "cell_type": "code",
664 | "execution_count": null,
665 | "id": "e027a200",
666 | "metadata": {},
667 | "outputs": [],
668 | "source": []
669 | }
670 | ],
671 | "metadata": {
672 | "kernelspec": {
673 | "display_name": "Python 3 (ipykernel)",
674 | "language": "python",
675 | "name": "python3"
676 | },
677 | "language_info": {
678 | "codemirror_mode": {
679 | "name": "ipython",
680 | "version": 3
681 | },
682 | "file_extension": ".py",
683 | "mimetype": "text/x-python",
684 | "name": "python",
685 | "nbconvert_exporter": "python",
686 | "pygments_lexer": "ipython3",
687 | "version": "3.8.8"
688 | }
689 | },
690 | "nbformat": 4,
691 | "nbformat_minor": 5
692 | }
693 |
--------------------------------------------------------------------------------
/KNN algorithm.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "id": "romance-interstate",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "#importing libraries\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "from sklearn import datasets\n",
13 | "from sklearn.model_selection import train_test_split \n",
14 | "from sklearn.neighbors import KNeighborsClassifier"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 3,
20 | "id": "knowing-suffering",
21 | "metadata": {},
22 | "outputs": [],
23 | "source": [
24 | "# load the data\n",
25 | "digits = datasets.load_digits()"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 9,
31 | "id": "unauthorized-release",
32 | "metadata": {},
33 | "outputs": [
34 | {
35 | "data": {
36 | "text/plain": [
37 | "5"
38 | ]
39 | },
40 | "execution_count": 9,
41 | "metadata": {},
42 | "output_type": "execute_result"
43 | }
44 | ],
45 | "source": [
46 | "digits.target[1700]"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 5,
52 | "id": "spoken-hepatitis",
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "#Features allocation\n",
57 | "Samplefeatures=digits.data\n",
58 | "labels=digits.target"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 10,
64 | "id": "nominated-edmonton",
65 | "metadata": {},
66 | "outputs": [
67 | {
68 | "data": {
69 | "text/plain": [
70 | ""
71 | ]
72 | },
73 | "execution_count": 10,
74 | "metadata": {},
75 | "output_type": "execute_result"
76 | },
77 | {
78 | "data": {
79 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPUAAAD4CAYAAAA0L6C7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8+yak3AAAACXBIWXMAAAsTAAALEwEAmpwYAAAKoElEQVR4nO3d34tc9RnH8c+nq9JarQtNKDEbsrmQQCk0kSUgKWojlljF5KIXCShECrmpktCCaK82/4CkF0WQqBVMlTbqKmK1gq6t0FqTuG1NVksatmSDNgklmig0RJ9e7AlEWbtnZs6vfXy/ILgzO+z3GfTtmTk7OV9HhADk8ZW2BwBQLaIGkiFqIBmiBpIhaiCZS+r4oUuWLInR0dE6fnSrzpw50+h6p0+fbmytjz/+uLG1zp4929hay5Yta2wtSbr66qsbWWdmZkanTp3yfN+rJerR0VHt37+/jh/dqsnJyUbXm5iYaGytqampxtZ67bXXGltr+/btja0lSePj442sMzY29oXf4+U3kAxRA8kQNZAMUQPJEDWQDFEDyRA1kAxRA8kQNZBMqahtb7T9ru0jtu+reygA/VswattDkn4p6RZJ35a01fa36x4MQH/KHKnXSToSEUcj4pykJyVtqncsAP0qE/VySccuuj1b3PcZtrfb3m97/8mTJ6uaD0CPKjtRFhEPRcRYRIwtXbq0qh8LoEdloj4uacVFt0eK+wB0UJmo35R0je1Vti+TtEXSc/WOBaBfC14kISLO275b0kuShiQ9EhGHap8MQF9KXfkkIl6Q9ELNswCoAJ8oA5IhaiAZogaSIWogGaIGkiFqIBmiBpKpZYeOrDZv3tz2CLXZuXNnY2vdeOONja21bdu2xtbqCo7UQDJEDSRD1EAyRA0kQ9RAMkQNJEPUQDJEDSRD1EAyRA0kU2aHjkdsn7D9dhMDARhMmSP1ryRtrHkOABVZMOqI+IOk/zQwC4AKVPaemm13gG5g2x0gGc5+A8kQNZBMmV9pPSHpT5JW2561/eP6xwLQrzJ7aW1tYhAA1eDlN5AMUQPJEDWQDFEDyRA1kAxRA8kQNZDMot92Z3JysrG1Pvjgg8bWkqRNmzY1tlaTW+GsWbOmsbWGh4cbW6srOFIDyRA1kAxRA8kQNZAMUQPJEDWQDFEDyRA1kAxRA8kQNZBMmWuUrbD9qu3Dtg/Z3tHEYAD6U+az3+cl/SwiDtq+UtIB2y9HxOGaZwPQhzLb7rwXEQeLr89Impa0vO7BAPSnp/fUtkclrZX0xjzfY9sdoANKR237CklPSdoZER9+/vtsuwN0Q6mobV+quaD3RsTT9Y4EYBBlzn5b0sOSpiPigfpHAjCIMkfq9ZLulLTB9lTx54c1zwWgT2W23XldkhuYBUAF+EQZkAxRA8kQNZAMUQPJEDWQDFEDyRA1kAxRA8mwl1aHPfvssynXWrlyZWNrzczMNLZWV3CkBpIhaiAZogaSIWogGaIGkiFqIBmiBpIhaiAZogaSKXPhwa/a/ovtvxbb7uxqYjAA/SnzMdH/StoQEWeLSwW/bvt3EfHnmmcD0IcyFx4MSWeLm5cWf6LOoQD0r+zF/IdsT0k6IenliGDbHaCjSkUdEZ9ExBpJI5LW2f7OPI9h2x2gA3o6+x0RpyW9KmljLdMAGFiZs99LbQ8XX39N0s2S3ql5LgB9KnP2e5mkx2wPae5/Ar+JiOfrHQtAv8qc/f6b5vakBrAI8IkyIBmiBpIhaiAZogaSIWogGaIGkiFqIBmiBpJZ9NvujI+PN7bW1NRUY2tJ0ujoaGNrDQ8PN7bWrl1cZ6NOHKmBZIgaSIaogWSIGkiGqIFkiBpIhqiBZIgaSIaogWSIGkimdNTFBf3fss1FB4EO6+VIvUPSdF2DAKhG2W13RiTdKmlPveMAGFTZI/VuSfdK+vSLHsBeWkA3lNmh4zZJJyLiwP97HHtpAd1Q5ki9XtLttmckPSlpg+3Ha50KQN8WjDoi7o+IkYgYlbRF0isRcUftkwHoC7+nBpLp6XJGETEpabKWSQBUgiM1kAxRA8kQNZAMUQPJEDWQDFEDyRA1kMyi33anSRMTE22PUJvdu3e3PQIqwpEaSIaogWSIGkiGqIFkiBpIhqiBZIgaSIaogWSIGkiGqIFkSn1MtLiS6BlJn0g6HxFjdQ4FoH+9fPb7+xFxqrZJAFSCl99AMmWjDkm/t33A9vb5HsC2O0A3lI36exFxraRbJP3E9vWffwDb7gDdUCrqiDhe/POEpGckratzKAD9K7NB3tdtX3nha0k/kPR23YMB6E+Zs9/fkvSM7QuP/3VEvFjrVAD6tmDUEXFU0ncbmAVABfiVFpAMUQPJEDWQDFEDyRA1kAxRA8kQNZAM2+502Pj4eGNr7dq1q7G1brjhhsbW+jLiSA0kQ9RAMkQNJEPUQDJEDSRD1EAyRA0kQ9RAMkQNJEPUQDKlorY9bHuf7XdsT9u+ru7BAPSn7Ge/fyHpxYj4ke3LJF1e40wABrBg1LavknS9pG2SFBHnJJ2rdywA/Srz8nuVpJOSHrX9lu09xfW/P4Ntd4BuKBP1JZKulfRgRKyV9JGk+z7/ILbdAbqhTNSzkmYj4o3i9j7NRQ6ggxaMOiLel3TM9urirpskHa51KgB9K3v2+x5Je4sz30cl3VXfSAAGUSrqiJiSNFbvKACqwCfKgGSIGkiGqIFkiBpIhqiBZIgaSIaogWSIGkiGvbR6MDU11eh6ExMTja21Y8eOxtZqco+wLyOO1EAyRA0kQ9RAMkQNJEPUQDJEDSRD1EAyRA0kQ9RAMgtGbXu17amL/nxoe2cDswHow4IfE42IdyWtkSTbQ5KOS3qm3rEA9KvXl983SfpnRPyrjmEADK7XqLdIemK+b7DtDtANpaMurvl9u6Tfzvd9tt0BuqGXI/Utkg5GxL/rGgbA4HqJequ+4KU3gO4oFXWxde3Nkp6udxwAgyq77c5Hkr5Z8ywAKsAnyoBkiBpIhqiBZIgaSIaogWSIGkiGqIFkiBpIxhFR/Q+1T0rq9a9nLpF0qvJhuiHrc+N5tWdlRMz7N6dqiboftvdHxFjbc9Qh63PjeXUTL7+BZIgaSKZLUT/U9gA1yvrceF4d1Jn31ACq0aUjNYAKEDWQTCeitr3R9ru2j9i+r+15qmB7he1XbR+2fcj2jrZnqpLtIdtv2X6+7VmqZHvY9j7b79ietn1d2zP1qvX31MUGAf/Q3OWSZiW9KWlrRBxudbAB2V4maVlEHLR9paQDkjYv9ud1ge2fShqT9I2IuK3teapi+zFJf4yIPcUVdC+PiNMtj9WTLhyp10k6EhFHI+KcpCclbWp5poFFxHsRcbD4+oykaUnL252qGrZHJN0qaU/bs1TJ9lWSrpf0sCRFxLnFFrTUjaiXSzp20e1ZJfmP/wLbo5LWSnqj5VGqslvSvZI+bXmOqq2SdFLSo8Vbiz3FRTcXlS5EnZrtKyQ9JWlnRHzY9jyDsn2bpBMRcaDtWWpwiaRrJT0YEWslfSRp0Z3j6ULUxyWtuOj2SHHfomf7Us0FvTcislxeeb2k223PaO6t0gbbj7c7UmVmJc1GxIVXVPs0F/mi0oWo35R0je1VxYmJLZKea3mmgdm25t6bTUfEA23PU5WIuD8iRiJiVHP/rl6JiDtaHqsSEfG+pGO2Vxd33SRp0Z3YLHXd7zpFxHnbd0t6SdKQpEci4lDLY1VhvaQ7Jf3d9lRx388j4oX2RkIJ90jaWxxgjkq6q+V5etb6r7QAVKsLL78BVIiogWSIGkiGqIFkiBpIhqiBZIgaSOZ/5tKswAB+iuUAAAAASUVORK5CYII=\n",
80 | "text/plain": [
81 | ""
82 | ]
83 | },
84 | "metadata": {
85 | "needs_background": "light"
86 | },
87 | "output_type": "display_data"
88 | }
89 | ],
90 | "source": [
91 | "plt.imshow(digits.images[1700], cmap=plt.cm.gray_r, interpolation='nearest')"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 17,
97 | "id": "exclusive-groove",
98 | "metadata": {},
99 | "outputs": [],
100 | "source": [
101 | "# Split into training and test set\n",
102 | "trainimg, testimg,trainlab,testlab = train_test_split(Samplefeatures, labels, test_size = 0.2, random_state=42)"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": 18,
108 | "id": "broken-pakistan",
109 | "metadata": {},
110 | "outputs": [],
111 | "source": [
112 | "knn = KNeighborsClassifier(n_neighbors=7)"
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": 19,
118 | "id": "surprising-commodity",
119 | "metadata": {},
120 | "outputs": [
121 | {
122 | "data": {
123 | "text/plain": [
124 | "KNeighborsClassifier(n_neighbors=7)"
125 | ]
126 | },
127 | "execution_count": 19,
128 | "metadata": {},
129 | "output_type": "execute_result"
130 | }
131 | ],
132 | "source": [
133 | "knn.fit(trainimg, trainlab)"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 34,
139 | "id": "contrary-moore",
140 | "metadata": {},
141 | "outputs": [
142 | {
143 | "data": {
144 | "text/plain": [
145 | "360"
146 | ]
147 | },
148 | "execution_count": 34,
149 | "metadata": {},
150 | "output_type": "execute_result"
151 | }
152 | ],
153 | "source": [
154 | "len(testimg)"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": 35,
160 | "id": "exterior-wallet",
161 | "metadata": {},
162 | "outputs": [
163 | {
164 | "data": {
165 | "text/plain": [
166 | "array([5])"
167 | ]
168 | },
169 | "execution_count": 35,
170 | "metadata": {},
171 | "output_type": "execute_result"
172 | }
173 | ],
174 | "source": [
175 | "knn.predict(testimg[[359]])"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": 37,
181 | "id": "hearing-event",
182 | "metadata": {},
183 | "outputs": [
184 | {
185 | "data": {
186 | "text/plain": [
187 | "5"
188 | ]
189 | },
190 | "execution_count": 37,
191 | "metadata": {},
192 | "output_type": "execute_result"
193 | }
194 | ],
195 | "source": [
196 | "#original\n",
197 | "testlab[359]"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": 39,
203 | "id": "critical-replica",
204 | "metadata": {},
205 | "outputs": [
206 | {
207 | "name": "stdout",
208 | "output_type": "stream",
209 | "text": [
210 | "0.9888888888888889\n"
211 | ]
212 | }
213 | ],
214 | "source": [
215 | "#accuracy\n",
216 | "print(knn.score(testimg,testlab))"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": null,
222 | "id": "surprised-migration",
223 | "metadata": {},
224 | "outputs": [],
225 | "source": []
226 | }
227 | ],
228 | "metadata": {
229 | "kernelspec": {
230 | "display_name": "Python 3",
231 | "language": "python",
232 | "name": "python3"
233 | },
234 | "language_info": {
235 | "codemirror_mode": {
236 | "name": "ipython",
237 | "version": 3
238 | },
239 | "file_extension": ".py",
240 | "mimetype": "text/x-python",
241 | "name": "python",
242 | "nbconvert_exporter": "python",
243 | "pygments_lexer": "ipython3",
244 | "version": "3.8.8"
245 | }
246 | },
247 | "nbformat": 4,
248 | "nbformat_minor": 5
249 | }
250 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 AKpython
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/LP1.py:
--------------------------------------------------------------------------------
1 | from pulp import *
2 | # declare your variables
3 | A = LpVariable("A", 100, 200) # 100 <= A <= 200
4 | B = LpVariable("B", 80, 170) # 80 <= B <= 170
5 | # defines the problem: optimization - Maximization
6 | prob = LpProblem("problem", LpMaximize)
7 |
8 |
9 | # defines the constraints
10 | prob += A + B >=200
11 | prob += A<=200
12 | prob += A>=100
13 | prob += B>=80
14 | prob += B<=170
15 |
16 |
17 | # defines the objective function to maximize
18 | prob += 5000*B- 2000*A
19 |
20 |
21 | # solve the problem
22 | status = prob.solve()
23 | print('printing status of the LP problem: ', LpStatus[status])
24 |
25 |
26 | # print the results A = 100, B = 170
27 | print('Value of model A car: ', value(A))
28 | print('Value of model B car: ', value(B))
29 | print('the optimal solution or say maximum profit: $', value(prob.objective))
--------------------------------------------------------------------------------
/Logistic regression.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "floral-reality",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "#Importing libraries\n",
11 | "import pandas as pd\n",
12 | "%matplotlib inline\n",
13 | "from sklearn.model_selection import train_test_split"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 2,
19 | "id": "undefined-submission",
20 | "metadata": {},
21 | "outputs": [
22 | {
23 | "data": {
24 | "text/html": [
25 | "\n",
26 | "\n",
39 | "
\n",
40 | " \n",
41 | " \n",
42 | " | \n",
43 | " Pregnancies | \n",
44 | " Glucose | \n",
45 | " BloodPressure | \n",
46 | " SkinThickness | \n",
47 | " Insulin | \n",
48 | " BMI | \n",
49 | " DiabetesPedigreeFunction | \n",
50 | " Age | \n",
51 | " Outcome | \n",
52 | "
\n",
53 | " \n",
54 | " \n",
55 | " \n",
56 | " 0 | \n",
57 | " 6 | \n",
58 | " 148 | \n",
59 | " 72 | \n",
60 | " 35 | \n",
61 | " 0 | \n",
62 | " 33.6 | \n",
63 | " 0.627 | \n",
64 | " 50 | \n",
65 | " 1 | \n",
66 | "
\n",
67 | " \n",
68 | " 1 | \n",
69 | " 1 | \n",
70 | " 85 | \n",
71 | " 66 | \n",
72 | " 29 | \n",
73 | " 0 | \n",
74 | " 26.6 | \n",
75 | " 0.351 | \n",
76 | " 31 | \n",
77 | " 0 | \n",
78 | "
\n",
79 | " \n",
80 | " 2 | \n",
81 | " 8 | \n",
82 | " 183 | \n",
83 | " 64 | \n",
84 | " 0 | \n",
85 | " 0 | \n",
86 | " 23.3 | \n",
87 | " 0.672 | \n",
88 | " 32 | \n",
89 | " 1 | \n",
90 | "
\n",
91 | " \n",
92 | " 3 | \n",
93 | " 1 | \n",
94 | " 89 | \n",
95 | " 66 | \n",
96 | " 23 | \n",
97 | " 94 | \n",
98 | " 28.1 | \n",
99 | " 0.167 | \n",
100 | " 21 | \n",
101 | " 0 | \n",
102 | "
\n",
103 | " \n",
104 | " 4 | \n",
105 | " 0 | \n",
106 | " 137 | \n",
107 | " 40 | \n",
108 | " 35 | \n",
109 | " 168 | \n",
110 | " 43.1 | \n",
111 | " 2.288 | \n",
112 | " 33 | \n",
113 | " 1 | \n",
114 | "
\n",
115 | " \n",
116 | "
\n",
117 | "
"
118 | ],
119 | "text/plain": [
120 | " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
121 | "0 6 148 72 35 0 33.6 \n",
122 | "1 1 85 66 29 0 26.6 \n",
123 | "2 8 183 64 0 0 23.3 \n",
124 | "3 1 89 66 23 94 28.1 \n",
125 | "4 0 137 40 35 168 43.1 \n",
126 | "\n",
127 | " DiabetesPedigreeFunction Age Outcome \n",
128 | "0 0.627 50 1 \n",
129 | "1 0.351 31 0 \n",
130 | "2 0.672 32 1 \n",
131 | "3 0.167 21 0 \n",
132 | "4 2.288 33 1 "
133 | ]
134 | },
135 | "execution_count": 2,
136 | "metadata": {},
137 | "output_type": "execute_result"
138 | }
139 | ],
140 | "source": [
141 | "df = pd.read_csv(\"diabetes.csv\")\n",
142 | "df.head()"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 3,
148 | "id": "continuing-royal",
149 | "metadata": {},
150 | "outputs": [],
151 | "source": [
152 | "feature_cols = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age']\n",
153 | "X = df[feature_cols] # Features\n",
154 | "y = df.Outcome # Target variable"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "id": "supposed-latter",
161 | "metadata": {},
162 | "outputs": [],
163 | "source": []
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 5,
168 | "id": "administrative-aside",
169 | "metadata": {},
170 | "outputs": [],
171 | "source": [
172 | "X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.7)"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": 6,
178 | "id": "under-essence",
179 | "metadata": {},
180 | "outputs": [
181 | {
182 | "data": {
183 | "text/plain": [
184 | "450 0\n",
185 | "742 0\n",
186 | "304 0\n",
187 | "667 1\n",
188 | "500 0\n",
189 | " ..\n",
190 | "164 1\n",
191 | "576 0\n",
192 | "315 0\n",
193 | "68 0\n",
194 | "631 0\n",
195 | "Name: Outcome, Length: 231, dtype: int64"
196 | ]
197 | },
198 | "execution_count": 6,
199 | "metadata": {},
200 | "output_type": "execute_result"
201 | }
202 | ],
203 | "source": [
204 | "y_test"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": 7,
210 | "id": "fiscal-blake",
211 | "metadata": {},
212 | "outputs": [],
213 | "source": [
214 | "from sklearn.linear_model import LogisticRegression\n",
215 | "model = LogisticRegression()"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 8,
221 | "id": "vulnerable-bradley",
222 | "metadata": {},
223 | "outputs": [
224 | {
225 | "name": "stderr",
226 | "output_type": "stream",
227 | "text": [
228 | "c:\\users\\admin\\appdata\\local\\programs\\python\\python38\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:763: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
229 | "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
230 | "\n",
231 | "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
232 | " https://scikit-learn.org/stable/modules/preprocessing.html\n",
233 | "Please also refer to the documentation for alternative solver options:\n",
234 | " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
235 | " n_iter_i = _check_optimize_result(\n"
236 | ]
237 | },
238 | {
239 | "data": {
240 | "text/plain": [
241 | "LogisticRegression()"
242 | ]
243 | },
244 | "execution_count": 8,
245 | "metadata": {},
246 | "output_type": "execute_result"
247 | }
248 | ],
249 | "source": [
250 | "\n",
251 | "model.fit(X_train, y_train)"
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": 11,
257 | "id": "handmade-highlight",
258 | "metadata": {},
259 | "outputs": [],
260 | "source": [
261 | "y_predicted = model.predict(X_test)#Random value"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": 12,
267 | "id": "steady-organization",
268 | "metadata": {},
269 | "outputs": [
270 | {
271 | "name": "stdout",
272 | "output_type": "stream",
273 | "text": [
274 | "[0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0\n",
275 | " 0 1 0 0 1 0 1 0 0 1 0 1 0 0 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 0 0 1 1 0 1 1\n",
276 | " 0 1 0 1 1 0 0 1 1 1 0 1 0 0 0 1 0 0 0 0 1 0 1 1 0 1 1 0 0 0 0 0 1 0 0 0 0\n",
277 | " 1 0 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 0 0 0 1 0 0 1 1 0 0 1 1 1 0 1 0 0\n",
278 | " 0 0 0 0 0 0 1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0\n",
279 | " 1 1 0 1 1 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0\n",
280 | " 0 1 0 1 0 0 0 0 0]\n"
281 | ]
282 | }
283 | ],
284 | "source": [
285 | "print(y_predicted)"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": null,
291 | "id": "excited-madison",
292 | "metadata": {},
293 | "outputs": [],
294 | "source": []
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": 115,
299 | "id": "sunrise-feelings",
300 | "metadata": {},
301 | "outputs": [
302 | {
303 | "data": {
304 | "text/plain": [
305 | "0.7965367965367965"
306 | ]
307 | },
308 | "execution_count": 115,
309 | "metadata": {},
310 | "output_type": "execute_result"
311 | }
312 | ],
313 | "source": [
314 | "model.score(X_test,y_test)"
315 | ]
316 | },
317 | {
318 | "cell_type": "code",
319 | "execution_count": null,
320 | "id": "literary-jesus",
321 | "metadata": {},
322 | "outputs": [],
323 | "source": []
324 | },
325 | {
326 | "cell_type": "code",
327 | "execution_count": null,
328 | "id": "latin-stopping",
329 | "metadata": {},
330 | "outputs": [],
331 | "source": []
332 | }
333 | ],
334 | "metadata": {
335 | "kernelspec": {
336 | "display_name": "Python 3",
337 | "language": "python",
338 | "name": "python3"
339 | },
340 | "language_info": {
341 | "codemirror_mode": {
342 | "name": "ipython",
343 | "version": 3
344 | },
345 | "file_extension": ".py",
346 | "mimetype": "text/x-python",
347 | "name": "python",
348 | "nbconvert_exporter": "python",
349 | "pygments_lexer": "ipython3",
350 | "version": "3.8.8"
351 | }
352 | },
353 | "nbformat": 4,
354 | "nbformat_minor": 5
355 | }
356 |
--------------------------------------------------------------------------------
/Salary prediction ( Linear Regression ).ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "id": "lined-aquatic",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import numpy as np # linear algebra\n",
11 | "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 4,
17 | "id": "several-wagon",
18 | "metadata": {},
19 | "outputs": [
20 | {
21 | "data": {
22 | "text/html": [
23 | "\n",
24 | "\n",
37 | "
\n",
38 | " \n",
39 | " \n",
40 | " | \n",
41 | " YearsExperience | \n",
42 | " Salary | \n",
43 | "
\n",
44 | " \n",
45 | " \n",
46 | " \n",
47 | " 0 | \n",
48 | " 1.1 | \n",
49 | " 39343.0 | \n",
50 | "
\n",
51 | " \n",
52 | " 1 | \n",
53 | " 1.3 | \n",
54 | " 46205.0 | \n",
55 | "
\n",
56 | " \n",
57 | " 2 | \n",
58 | " 1.5 | \n",
59 | " 37731.0 | \n",
60 | "
\n",
61 | " \n",
62 | " 3 | \n",
63 | " 2.0 | \n",
64 | " 43525.0 | \n",
65 | "
\n",
66 | " \n",
67 | " 4 | \n",
68 | " 2.2 | \n",
69 | " 39891.0 | \n",
70 | "
\n",
71 | " \n",
72 | " 5 | \n",
73 | " 2.9 | \n",
74 | " 56642.0 | \n",
75 | "
\n",
76 | " \n",
77 | " 6 | \n",
78 | " 3.0 | \n",
79 | " 60150.0 | \n",
80 | "
\n",
81 | " \n",
82 | " 7 | \n",
83 | " 3.2 | \n",
84 | " 54445.0 | \n",
85 | "
\n",
86 | " \n",
87 | " 8 | \n",
88 | " 3.2 | \n",
89 | " 64445.0 | \n",
90 | "
\n",
91 | " \n",
92 | " 9 | \n",
93 | " 3.7 | \n",
94 | " 57189.0 | \n",
95 | "
\n",
96 | " \n",
97 | " 10 | \n",
98 | " 3.9 | \n",
99 | " 63218.0 | \n",
100 | "
\n",
101 | " \n",
102 | " 11 | \n",
103 | " 4.0 | \n",
104 | " 55794.0 | \n",
105 | "
\n",
106 | " \n",
107 | " 12 | \n",
108 | " 4.0 | \n",
109 | " 56957.0 | \n",
110 | "
\n",
111 | " \n",
112 | " 13 | \n",
113 | " 4.1 | \n",
114 | " 57081.0 | \n",
115 | "
\n",
116 | " \n",
117 | " 14 | \n",
118 | " 4.5 | \n",
119 | " 61111.0 | \n",
120 | "
\n",
121 | " \n",
122 | " 15 | \n",
123 | " 4.9 | \n",
124 | " 67938.0 | \n",
125 | "
\n",
126 | " \n",
127 | " 16 | \n",
128 | " 5.1 | \n",
129 | " 66029.0 | \n",
130 | "
\n",
131 | " \n",
132 | " 17 | \n",
133 | " 5.3 | \n",
134 | " 83088.0 | \n",
135 | "
\n",
136 | " \n",
137 | " 18 | \n",
138 | " 5.9 | \n",
139 | " 81363.0 | \n",
140 | "
\n",
141 | " \n",
142 | " 19 | \n",
143 | " 6.0 | \n",
144 | " 93940.0 | \n",
145 | "
\n",
146 | " \n",
147 | " 20 | \n",
148 | " 6.8 | \n",
149 | " 91738.0 | \n",
150 | "
\n",
151 | " \n",
152 | " 21 | \n",
153 | " 7.1 | \n",
154 | " 98273.0 | \n",
155 | "
\n",
156 | " \n",
157 | " 22 | \n",
158 | " 7.9 | \n",
159 | " 101302.0 | \n",
160 | "
\n",
161 | " \n",
162 | " 23 | \n",
163 | " 8.2 | \n",
164 | " 113812.0 | \n",
165 | "
\n",
166 | " \n",
167 | " 24 | \n",
168 | " 8.7 | \n",
169 | " 109431.0 | \n",
170 | "
\n",
171 | " \n",
172 | " 25 | \n",
173 | " 9.0 | \n",
174 | " 105582.0 | \n",
175 | "
\n",
176 | " \n",
177 | " 26 | \n",
178 | " 9.5 | \n",
179 | " 116969.0 | \n",
180 | "
\n",
181 | " \n",
182 | " 27 | \n",
183 | " 9.6 | \n",
184 | " 112635.0 | \n",
185 | "
\n",
186 | " \n",
187 | " 28 | \n",
188 | " 10.3 | \n",
189 | " 122391.0 | \n",
190 | "
\n",
191 | " \n",
192 | " 29 | \n",
193 | " 10.5 | \n",
194 | " 121872.0 | \n",
195 | "
\n",
196 | " \n",
197 | "
\n",
198 | "
"
199 | ],
200 | "text/plain": [
201 | " YearsExperience Salary\n",
202 | "0 1.1 39343.0\n",
203 | "1 1.3 46205.0\n",
204 | "2 1.5 37731.0\n",
205 | "3 2.0 43525.0\n",
206 | "4 2.2 39891.0\n",
207 | "5 2.9 56642.0\n",
208 | "6 3.0 60150.0\n",
209 | "7 3.2 54445.0\n",
210 | "8 3.2 64445.0\n",
211 | "9 3.7 57189.0\n",
212 | "10 3.9 63218.0\n",
213 | "11 4.0 55794.0\n",
214 | "12 4.0 56957.0\n",
215 | "13 4.1 57081.0\n",
216 | "14 4.5 61111.0\n",
217 | "15 4.9 67938.0\n",
218 | "16 5.1 66029.0\n",
219 | "17 5.3 83088.0\n",
220 | "18 5.9 81363.0\n",
221 | "19 6.0 93940.0\n",
222 | "20 6.8 91738.0\n",
223 | "21 7.1 98273.0\n",
224 | "22 7.9 101302.0\n",
225 | "23 8.2 113812.0\n",
226 | "24 8.7 109431.0\n",
227 | "25 9.0 105582.0\n",
228 | "26 9.5 116969.0\n",
229 | "27 9.6 112635.0\n",
230 | "28 10.3 122391.0\n",
231 | "29 10.5 121872.0"
232 | ]
233 | },
234 | "execution_count": 4,
235 | "metadata": {},
236 | "output_type": "execute_result"
237 | }
238 | ],
239 | "source": [
240 | "data = pd.read_csv('Salary_data.csv')\n",
241 | "data"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": 5,
247 | "id": "reduced-journalism",
248 | "metadata": {},
249 | "outputs": [],
250 | "source": [
251 | "x = data.YearsExperience.values.reshape(-1,1)\n",
252 | "y = data.Salary.values.reshape(-1,1)"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 6,
258 | "id": "consistent-parent",
259 | "metadata": {},
260 | "outputs": [],
261 | "source": [
262 | "from sklearn.model_selection import train_test_split\n",
263 | "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 0)"
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": 7,
269 | "id": "handmade-rogers",
270 | "metadata": {},
271 | "outputs": [],
272 | "source": [
273 | "from sklearn.linear_model import LinearRegression"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": 9,
279 | "id": "invalid-natural",
280 | "metadata": {},
281 | "outputs": [],
282 | "source": [
283 | "model= LinearRegression()"
284 | ]
285 | },
286 | {
287 | "cell_type": "code",
288 | "execution_count": 10,
289 | "id": "sealed-composition",
290 | "metadata": {},
291 | "outputs": [
292 | {
293 | "data": {
294 | "text/plain": [
295 | "LinearRegression()"
296 | ]
297 | },
298 | "execution_count": 10,
299 | "metadata": {},
300 | "output_type": "execute_result"
301 | }
302 | ],
303 | "source": [
304 | "model.fit(x,y)"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": 12,
310 | "id": "thorough-rabbit",
311 | "metadata": {},
312 | "outputs": [
313 | {
314 | "name": "stdout",
315 | "output_type": "stream",
316 | "text": [
317 | "63592\n"
318 | ]
319 | }
320 | ],
321 | "source": [
322 | "next_salary = model.predict([[4.0]])\n",
323 | "print(int(next_salary))\n"
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": 13,
329 | "id": "decimal-effectiveness",
330 | "metadata": {},
331 | "outputs": [
332 | {
333 | "data": {
334 | "text/plain": [
335 | "0.941799590058557"
336 | ]
337 | },
338 | "execution_count": 13,
339 | "metadata": {},
340 | "output_type": "execute_result"
341 | }
342 | ],
343 | "source": [
344 | "model.score(X_train,y_train)"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": null,
350 | "id": "billion-portuguese",
351 | "metadata": {},
352 | "outputs": [],
353 | "source": []
354 | }
355 | ],
356 | "metadata": {
357 | "kernelspec": {
358 | "display_name": "Python 3",
359 | "language": "python",
360 | "name": "python3"
361 | },
362 | "language_info": {
363 | "codemirror_mode": {
364 | "name": "ipython",
365 | "version": 3
366 | },
367 | "file_extension": ".py",
368 | "mimetype": "text/x-python",
369 | "name": "python",
370 | "nbconvert_exporter": "python",
371 | "pygments_lexer": "ipython3",
372 | "version": "3.8.8"
373 | }
374 | },
375 | "nbformat": 4,
376 | "nbformat_minor": 5
377 | }
378 |
--------------------------------------------------------------------------------
/Salary_Data.csv:
--------------------------------------------------------------------------------
1 | YearsExperience,Salary
2 | 1.1,39343.00
3 | 1.3,46205.00
4 | 1.5,37731.00
5 | 2.0,43525.00
6 | 2.2,39891.00
7 | 2.9,56642.00
8 | 3.0,60150.00
9 | 3.2,54445.00
10 | 3.2,64445.00
11 | 3.7,57189.00
12 | 3.9,63218.00
13 | 4.0,55794.00
14 | 4.0,56957.00
15 | 4.1,57081.00
16 | 4.5,61111.00
17 | 4.9,67938.00
18 | 5.1,66029.00
19 | 5.3,83088.00
20 | 5.9,81363.00
21 | 6.0,93940.00
22 | 6.8,91738.00
23 | 7.1,98273.00
24 | 7.9,101302.00
25 | 8.2,113812.00
26 | 8.7,109431.00
27 | 9.0,105582.00
28 | 9.5,116969.00
29 | 9.6,112635.00
30 | 10.3,122391.00
31 | 10.5,121872.00
32 |
--------------------------------------------------------------------------------
/Sample scores.csv:
--------------------------------------------------------------------------------
1 | Overs,Scores
2 | 1,15
3 | 2,10
4 | 3,17
5 | 4,10
6 | 5,12
7 | 6,20
8 | 7,100
9 | 8,7
10 | 9,8
11 | 10,11
12 | 11,100
13 | 12,14
14 | 13,3
15 | 14,100
16 | 15,11
17 | 16,13
18 | 17,100
19 | 18,16
20 | 19,26
21 | 20,30
22 |
--------------------------------------------------------------------------------
/Weather prediction (NaiveBayes algorithm ).ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "id": "public-horizontal",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 8,
16 | "id": "spread-fairy",
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "data": {
21 | "text/html": [
22 | "\n",
23 | "\n",
36 | "
\n",
37 | " \n",
38 | " \n",
39 | " | \n",
40 | " Outlook | \n",
41 | " Temp | \n",
42 | " Humidity | \n",
43 | " Windy | \n",
44 | " Play | \n",
45 | "
\n",
46 | " \n",
47 | " \n",
48 | " \n",
49 | " 0 | \n",
50 | " Rainy | \n",
51 | " Hot | \n",
52 | " High | \n",
53 | " f | \n",
54 | " no | \n",
55 | "
\n",
56 | " \n",
57 | " 1 | \n",
58 | " Rainy | \n",
59 | " Hot | \n",
60 | " High | \n",
61 | " t | \n",
62 | " no | \n",
63 | "
\n",
64 | " \n",
65 | " 2 | \n",
66 | " Overcast | \n",
67 | " Hot | \n",
68 | " High | \n",
69 | " f | \n",
70 | " yes | \n",
71 | "
\n",
72 | " \n",
73 | " 3 | \n",
74 | " Sunny | \n",
75 | " Mild | \n",
76 | " High | \n",
77 | " f | \n",
78 | " yes | \n",
79 | "
\n",
80 | " \n",
81 | " 4 | \n",
82 | " Sunny | \n",
83 | " Cool | \n",
84 | " Normal | \n",
85 | " f | \n",
86 | " yes | \n",
87 | "
\n",
88 | " \n",
89 | " 5 | \n",
90 | " Sunny | \n",
91 | " Cool | \n",
92 | " Normal | \n",
93 | " t | \n",
94 | " no | \n",
95 | "
\n",
96 | " \n",
97 | " 6 | \n",
98 | " Overcast | \n",
99 | " Cool | \n",
100 | " Normal | \n",
101 | " t | \n",
102 | " yes | \n",
103 | "
\n",
104 | " \n",
105 | " 7 | \n",
106 | " Rainy | \n",
107 | " Mild | \n",
108 | " High | \n",
109 | " f | \n",
110 | " no | \n",
111 | "
\n",
112 | " \n",
113 | " 8 | \n",
114 | " Rainy | \n",
115 | " Cool | \n",
116 | " Normal | \n",
117 | " f | \n",
118 | " yes | \n",
119 | "
\n",
120 | " \n",
121 | " 9 | \n",
122 | " Sunny | \n",
123 | " Mild | \n",
124 | " Normal | \n",
125 | " f | \n",
126 | " yes | \n",
127 | "
\n",
128 | " \n",
129 | " 10 | \n",
130 | " Rainy | \n",
131 | " Mild | \n",
132 | " Normal | \n",
133 | " t | \n",
134 | " yes | \n",
135 | "
\n",
136 | " \n",
137 | " 11 | \n",
138 | " Overcast | \n",
139 | " Mild | \n",
140 | " High | \n",
141 | " t | \n",
142 | " yes | \n",
143 | "
\n",
144 | " \n",
145 | " 12 | \n",
146 | " Overcast | \n",
147 | " Hot | \n",
148 | " Normal | \n",
149 | " f | \n",
150 | " yes | \n",
151 | "
\n",
152 | " \n",
153 | " 13 | \n",
154 | " Sunny | \n",
155 | " Mild | \n",
156 | " High | \n",
157 | " t | \n",
158 | " no | \n",
159 | "
\n",
160 | " \n",
161 | "
\n",
162 | "
"
163 | ],
164 | "text/plain": [
165 | " Outlook Temp Humidity Windy Play\n",
166 | "0 Rainy Hot High f no\n",
167 | "1 Rainy Hot High t no\n",
168 | "2 Overcast Hot High f yes\n",
169 | "3 Sunny Mild High f yes\n",
170 | "4 Sunny Cool Normal f yes\n",
171 | "5 Sunny Cool Normal t no\n",
172 | "6 Overcast Cool Normal t yes\n",
173 | "7 Rainy Mild High f no\n",
174 | "8 Rainy Cool Normal f yes\n",
175 | "9 Sunny Mild Normal f yes\n",
176 | "10 Rainy Mild Normal t yes\n",
177 | "11 Overcast Mild High t yes\n",
178 | "12 Overcast Hot Normal f yes\n",
179 | "13 Sunny Mild High t no"
180 | ]
181 | },
182 | "execution_count": 8,
183 | "metadata": {},
184 | "output_type": "execute_result"
185 | }
186 | ],
187 | "source": [
188 | "df = pd.read_csv(\"new_dataset.csv\")\n",
189 | "df"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": 1,
195 | "id": "popular-kennedy",
196 | "metadata": {},
197 | "outputs": [],
198 | "source": [
199 | "#NaiveBayes project (Weather Prediction)\n",
200 | "#Required Modules\n",
201 | "import pandas as pd\n",
202 | "from sklearn.preprocessing import LabelEncoder\n",
203 | "from sklearn.naive_bayes import GaussianNB"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": 2,
209 | "id": "touched-packet",
210 | "metadata": {},
211 | "outputs": [
212 | {
213 | "data": {
214 | "text/html": [
215 | "\n",
216 | "\n",
229 | "
\n",
230 | " \n",
231 | " \n",
232 | " | \n",
233 | " Outlook | \n",
234 | " Temp | \n",
235 | " Humidity | \n",
236 | " Windy | \n",
237 | " Play | \n",
238 | "
\n",
239 | " \n",
240 | " \n",
241 | " \n",
242 | " 0 | \n",
243 | " Rainy | \n",
244 | " Hot | \n",
245 | " High | \n",
246 | " f | \n",
247 | " no | \n",
248 | "
\n",
249 | " \n",
250 | " 1 | \n",
251 | " Rainy | \n",
252 | " Hot | \n",
253 | " High | \n",
254 | " t | \n",
255 | " no | \n",
256 | "
\n",
257 | " \n",
258 | " 2 | \n",
259 | " Overcast | \n",
260 | " Hot | \n",
261 | " High | \n",
262 | " f | \n",
263 | " yes | \n",
264 | "
\n",
265 | " \n",
266 | " 3 | \n",
267 | " Sunny | \n",
268 | " Mild | \n",
269 | " High | \n",
270 | " f | \n",
271 | " yes | \n",
272 | "
\n",
273 | " \n",
274 | " 4 | \n",
275 | " Sunny | \n",
276 | " Cool | \n",
277 | " Normal | \n",
278 | " f | \n",
279 | " yes | \n",
280 | "
\n",
281 | " \n",
282 | " 5 | \n",
283 | " Sunny | \n",
284 | " Cool | \n",
285 | " Normal | \n",
286 | " t | \n",
287 | " no | \n",
288 | "
\n",
289 | " \n",
290 | " 6 | \n",
291 | " Overcast | \n",
292 | " Cool | \n",
293 | " Normal | \n",
294 | " t | \n",
295 | " yes | \n",
296 | "
\n",
297 | " \n",
298 | " 7 | \n",
299 | " Rainy | \n",
300 | " Mild | \n",
301 | " High | \n",
302 | " f | \n",
303 | " no | \n",
304 | "
\n",
305 | " \n",
306 | " 8 | \n",
307 | " Rainy | \n",
308 | " Cool | \n",
309 | " Normal | \n",
310 | " f | \n",
311 | " yes | \n",
312 | "
\n",
313 | " \n",
314 | " 9 | \n",
315 | " Sunny | \n",
316 | " Mild | \n",
317 | " Normal | \n",
318 | " f | \n",
319 | " yes | \n",
320 | "
\n",
321 | " \n",
322 | " 10 | \n",
323 | " Rainy | \n",
324 | " Mild | \n",
325 | " Normal | \n",
326 | " t | \n",
327 | " yes | \n",
328 | "
\n",
329 | " \n",
330 | " 11 | \n",
331 | " Overcast | \n",
332 | " Mild | \n",
333 | " High | \n",
334 | " t | \n",
335 | " yes | \n",
336 | "
\n",
337 | " \n",
338 | " 12 | \n",
339 | " Overcast | \n",
340 | " Hot | \n",
341 | " Normal | \n",
342 | " f | \n",
343 | " yes | \n",
344 | "
\n",
345 | " \n",
346 | " 13 | \n",
347 | " Sunny | \n",
348 | " Mild | \n",
349 | " High | \n",
350 | " t | \n",
351 | " no | \n",
352 | "
\n",
353 | " \n",
354 | "
\n",
355 | "
"
356 | ],
357 | "text/plain": [
358 | " Outlook Temp Humidity Windy Play\n",
359 | "0 Rainy Hot High f no\n",
360 | "1 Rainy Hot High t no\n",
361 | "2 Overcast Hot High f yes\n",
362 | "3 Sunny Mild High f yes\n",
363 | "4 Sunny Cool Normal f yes\n",
364 | "5 Sunny Cool Normal t no\n",
365 | "6 Overcast Cool Normal t yes\n",
366 | "7 Rainy Mild High f no\n",
367 | "8 Rainy Cool Normal f yes\n",
368 | "9 Sunny Mild Normal f yes\n",
369 | "10 Rainy Mild Normal t yes\n",
370 | "11 Overcast Mild High t yes\n",
371 | "12 Overcast Hot Normal f yes\n",
372 | "13 Sunny Mild High t no"
373 | ]
374 | },
375 | "execution_count": 2,
376 | "metadata": {},
377 | "output_type": "execute_result"
378 | }
379 | ],
380 | "source": [
381 | "#Reading CSV files\n",
382 | "df = pd.read_csv(\"new_dataset.csv\")\n",
383 | "df"
384 | ]
385 | },
386 | {
387 | "cell_type": "code",
388 | "execution_count": 3,
389 | "id": "forbidden-ottawa",
390 | "metadata": {},
391 | "outputs": [],
392 | "source": [
393 | "#Encoding the strings to Numericals\n",
394 | "outlook_at=LabelEncoder()\n",
395 | "Temp_at=LabelEncoder()\n",
396 | "Hum_at=LabelEncoder()\n",
397 | "win_at=LabelEncoder()"
398 | ]
399 | },
400 | {
401 | "cell_type": "code",
402 | "execution_count": 5,
403 | "id": "supposed-radar",
404 | "metadata": {},
405 | "outputs": [
406 | {
407 | "data": {
408 | "text/plain": [
409 | "0 no\n",
410 | "1 no\n",
411 | "2 yes\n",
412 | "3 yes\n",
413 | "4 yes\n",
414 | "5 no\n",
415 | "6 yes\n",
416 | "7 no\n",
417 | "8 yes\n",
418 | "9 yes\n",
419 | "10 yes\n",
420 | "11 yes\n",
421 | "12 yes\n",
422 | "13 no\n",
423 | "Name: Play, dtype: object"
424 | ]
425 | },
426 | "execution_count": 5,
427 | "metadata": {},
428 | "output_type": "execute_result"
429 | }
430 | ],
431 | "source": [
432 | "#Dropping the target variable and make it is as newframe\n",
433 | "inputs=df.drop('Play',axis='columns')\n",
434 | "target=df['Play']\n",
435 | "target"
436 | ]
437 | },
438 | {
439 | "cell_type": "code",
440 | "execution_count": 6,
441 | "id": "double-rwanda",
442 | "metadata": {},
443 | "outputs": [
444 | {
445 | "data": {
446 | "text/html": [
447 | "\n",
448 | "\n",
461 | "
\n",
462 | " \n",
463 | " \n",
464 | " | \n",
465 | " Outlook | \n",
466 | " Temp | \n",
467 | " Humidity | \n",
468 | " Windy | \n",
469 | " outlook_n | \n",
470 | " Temp_n | \n",
471 | " Hum_n | \n",
472 | " win_n | \n",
473 | "
\n",
474 | " \n",
475 | " \n",
476 | " \n",
477 | " 0 | \n",
478 | " Rainy | \n",
479 | " Hot | \n",
480 | " High | \n",
481 | " f | \n",
482 | " 1 | \n",
483 | " 1 | \n",
484 | " 0 | \n",
485 | " 0 | \n",
486 | "
\n",
487 | " \n",
488 | " 1 | \n",
489 | " Rainy | \n",
490 | " Hot | \n",
491 | " High | \n",
492 | " t | \n",
493 | " 1 | \n",
494 | " 1 | \n",
495 | " 0 | \n",
496 | " 1 | \n",
497 | "
\n",
498 | " \n",
499 | " 2 | \n",
500 | " Overcast | \n",
501 | " Hot | \n",
502 | " High | \n",
503 | " f | \n",
504 | " 0 | \n",
505 | " 1 | \n",
506 | " 0 | \n",
507 | " 0 | \n",
508 | "
\n",
509 | " \n",
510 | " 3 | \n",
511 | " Sunny | \n",
512 | " Mild | \n",
513 | " High | \n",
514 | " f | \n",
515 | " 2 | \n",
516 | " 2 | \n",
517 | " 0 | \n",
518 | " 0 | \n",
519 | "
\n",
520 | " \n",
521 | " 4 | \n",
522 | " Sunny | \n",
523 | " Cool | \n",
524 | " Normal | \n",
525 | " f | \n",
526 | " 2 | \n",
527 | " 0 | \n",
528 | " 1 | \n",
529 | " 0 | \n",
530 | "
\n",
531 | " \n",
532 | " 5 | \n",
533 | " Sunny | \n",
534 | " Cool | \n",
535 | " Normal | \n",
536 | " t | \n",
537 | " 2 | \n",
538 | " 0 | \n",
539 | " 1 | \n",
540 | " 1 | \n",
541 | "
\n",
542 | " \n",
543 | " 6 | \n",
544 | " Overcast | \n",
545 | " Cool | \n",
546 | " Normal | \n",
547 | " t | \n",
548 | " 0 | \n",
549 | " 0 | \n",
550 | " 1 | \n",
551 | " 1 | \n",
552 | "
\n",
553 | " \n",
554 | " 7 | \n",
555 | " Rainy | \n",
556 | " Mild | \n",
557 | " High | \n",
558 | " f | \n",
559 | " 1 | \n",
560 | " 2 | \n",
561 | " 0 | \n",
562 | " 0 | \n",
563 | "
\n",
564 | " \n",
565 | " 8 | \n",
566 | " Rainy | \n",
567 | " Cool | \n",
568 | " Normal | \n",
569 | " f | \n",
570 | " 1 | \n",
571 | " 0 | \n",
572 | " 1 | \n",
573 | " 0 | \n",
574 | "
\n",
575 | " \n",
576 | " 9 | \n",
577 | " Sunny | \n",
578 | " Mild | \n",
579 | " Normal | \n",
580 | " f | \n",
581 | " 2 | \n",
582 | " 2 | \n",
583 | " 1 | \n",
584 | " 0 | \n",
585 | "
\n",
586 | " \n",
587 | " 10 | \n",
588 | " Rainy | \n",
589 | " Mild | \n",
590 | " Normal | \n",
591 | " t | \n",
592 | " 1 | \n",
593 | " 2 | \n",
594 | " 1 | \n",
595 | " 1 | \n",
596 | "
\n",
597 | " \n",
598 | " 11 | \n",
599 | " Overcast | \n",
600 | " Mild | \n",
601 | " High | \n",
602 | " t | \n",
603 | " 0 | \n",
604 | " 2 | \n",
605 | " 0 | \n",
606 | " 1 | \n",
607 | "
\n",
608 | " \n",
609 | " 12 | \n",
610 | " Overcast | \n",
611 | " Hot | \n",
612 | " Normal | \n",
613 | " f | \n",
614 | " 0 | \n",
615 | " 1 | \n",
616 | " 1 | \n",
617 | " 0 | \n",
618 | "
\n",
619 | " \n",
620 | " 13 | \n",
621 | " Sunny | \n",
622 | " Mild | \n",
623 | " High | \n",
624 | " t | \n",
625 | " 2 | \n",
626 | " 2 | \n",
627 | " 0 | \n",
628 | " 1 | \n",
629 | "
\n",
630 | " \n",
631 | "
\n",
632 | "
"
633 | ],
634 | "text/plain": [
635 | " Outlook Temp Humidity Windy outlook_n Temp_n Hum_n win_n\n",
636 | "0 Rainy Hot High f 1 1 0 0\n",
637 | "1 Rainy Hot High t 1 1 0 1\n",
638 | "2 Overcast Hot High f 0 1 0 0\n",
639 | "3 Sunny Mild High f 2 2 0 0\n",
640 | "4 Sunny Cool Normal f 2 0 1 0\n",
641 | "5 Sunny Cool Normal t 2 0 1 1\n",
642 | "6 Overcast Cool Normal t 0 0 1 1\n",
643 | "7 Rainy Mild High f 1 2 0 0\n",
644 | "8 Rainy Cool Normal f 1 0 1 0\n",
645 | "9 Sunny Mild Normal f 2 2 1 0\n",
646 | "10 Rainy Mild Normal t 1 2 1 1\n",
647 | "11 Overcast Mild High t 0 2 0 1\n",
648 | "12 Overcast Hot Normal f 0 1 1 0\n",
649 | "13 Sunny Mild High t 2 2 0 1"
650 | ]
651 | },
652 | "execution_count": 6,
653 | "metadata": {},
654 | "output_type": "execute_result"
655 | }
656 | ],
657 | "source": [
658 | "#Creating the new dataframe\n",
659 | "inputs['outlook_n']= outlook_at.fit_transform(inputs['Outlook'])\n",
660 | "inputs['Temp_n']= outlook_at.fit_transform(inputs['Temp'])\n",
661 | "inputs['Hum_n']= outlook_at.fit_transform(inputs['Humidity'])\n",
662 | "inputs['win_n']= outlook_at.fit_transform(inputs['Windy'])\n",
663 | "inputs"
664 | ]
665 | },
666 | {
667 | "cell_type": "code",
668 | "execution_count": 7,
669 | "id": "indoor-satellite",
670 | "metadata": {},
671 | "outputs": [
672 | {
673 | "data": {
674 | "text/html": [
675 | "\n",
676 | "\n",
689 | "
\n",
690 | " \n",
691 | " \n",
692 | " | \n",
693 | " outlook_n | \n",
694 | " Temp_n | \n",
695 | " Hum_n | \n",
696 | " win_n | \n",
697 | "
\n",
698 | " \n",
699 | " \n",
700 | " \n",
701 | " 0 | \n",
702 | " 1 | \n",
703 | " 1 | \n",
704 | " 0 | \n",
705 | " 0 | \n",
706 | "
\n",
707 | " \n",
708 | " 1 | \n",
709 | " 1 | \n",
710 | " 1 | \n",
711 | " 0 | \n",
712 | " 1 | \n",
713 | "
\n",
714 | " \n",
715 | " 2 | \n",
716 | " 0 | \n",
717 | " 1 | \n",
718 | " 0 | \n",
719 | " 0 | \n",
720 | "
\n",
721 | " \n",
722 | " 3 | \n",
723 | " 2 | \n",
724 | " 2 | \n",
725 | " 0 | \n",
726 | " 0 | \n",
727 | "
\n",
728 | " \n",
729 | " 4 | \n",
730 | " 2 | \n",
731 | " 0 | \n",
732 | " 1 | \n",
733 | " 0 | \n",
734 | "
\n",
735 | " \n",
736 | " 5 | \n",
737 | " 2 | \n",
738 | " 0 | \n",
739 | " 1 | \n",
740 | " 1 | \n",
741 | "
\n",
742 | " \n",
743 | " 6 | \n",
744 | " 0 | \n",
745 | " 0 | \n",
746 | " 1 | \n",
747 | " 1 | \n",
748 | "
\n",
749 | " \n",
750 | " 7 | \n",
751 | " 1 | \n",
752 | " 2 | \n",
753 | " 0 | \n",
754 | " 0 | \n",
755 | "
\n",
756 | " \n",
757 | " 8 | \n",
758 | " 1 | \n",
759 | " 0 | \n",
760 | " 1 | \n",
761 | " 0 | \n",
762 | "
\n",
763 | " \n",
764 | " 9 | \n",
765 | " 2 | \n",
766 | " 2 | \n",
767 | " 1 | \n",
768 | " 0 | \n",
769 | "
\n",
770 | " \n",
771 | " 10 | \n",
772 | " 1 | \n",
773 | " 2 | \n",
774 | " 1 | \n",
775 | " 1 | \n",
776 | "
\n",
777 | " \n",
778 | " 11 | \n",
779 | " 0 | \n",
780 | " 2 | \n",
781 | " 0 | \n",
782 | " 1 | \n",
783 | "
\n",
784 | " \n",
785 | " 12 | \n",
786 | " 0 | \n",
787 | " 1 | \n",
788 | " 1 | \n",
789 | " 0 | \n",
790 | "
\n",
791 | " \n",
792 | " 13 | \n",
793 | " 2 | \n",
794 | " 2 | \n",
795 | " 0 | \n",
796 | " 1 | \n",
797 | "
\n",
798 | " \n",
799 | "
\n",
800 | "
"
801 | ],
802 | "text/plain": [
803 | " outlook_n Temp_n Hum_n win_n\n",
804 | "0 1 1 0 0\n",
805 | "1 1 1 0 1\n",
806 | "2 0 1 0 0\n",
807 | "3 2 2 0 0\n",
808 | "4 2 0 1 0\n",
809 | "5 2 0 1 1\n",
810 | "6 0 0 1 1\n",
811 | "7 1 2 0 0\n",
812 | "8 1 0 1 0\n",
813 | "9 2 2 1 0\n",
814 | "10 1 2 1 1\n",
815 | "11 0 2 0 1\n",
816 | "12 0 1 1 0\n",
817 | "13 2 2 0 1"
818 | ]
819 | },
820 | "execution_count": 7,
821 | "metadata": {},
822 | "output_type": "execute_result"
823 | }
824 | ],
825 | "source": [
826 | "#Dropping the string values\n",
827 | "inputs_n=inputs.drop(['Outlook','Temp','Humidity','Windy'],axis='columns')\n",
828 | "inputs_n"
829 | ]
830 | },
831 | {
832 | "cell_type": "code",
833 | "execution_count": 8,
834 | "id": "functional-rebecca",
835 | "metadata": {},
836 | "outputs": [
837 | {
838 | "data": {
839 | "text/plain": [
840 | "GaussianNB()"
841 | ]
842 | },
843 | "execution_count": 8,
844 | "metadata": {},
845 | "output_type": "execute_result"
846 | }
847 | ],
848 | "source": [
849 | "#Applying the Gaussian naivebayes\n",
850 | "classifier = GaussianNB()\n",
851 | "classifier.fit(inputs_n,target)"
852 | ]
853 | },
854 | {
855 | "cell_type": "code",
856 | "execution_count": 9,
857 | "id": "hybrid-program",
858 | "metadata": {},
859 | "outputs": [
860 | {
861 | "data": {
862 | "text/plain": [
863 | "0.8571428571428571"
864 | ]
865 | },
866 | "execution_count": 9,
867 | "metadata": {},
868 | "output_type": "execute_result"
869 | }
870 | ],
871 | "source": [
872 | "#85% accuracy \n",
873 | "classifier.score(inputs_n,target)"
874 | ]
875 | },
876 | {
877 | "cell_type": "code",
878 | "execution_count": 10,
879 | "id": "coordinated-sector",
880 | "metadata": {},
881 | "outputs": [
882 | {
883 | "data": {
884 | "text/plain": [
885 | "array(['yes'], dtype='\n",
145 | "\n",
158 | "\n",
159 | " \n",
160 | " \n",
161 | " | \n",
162 | " SepalLengthCm | \n",
163 | " SepalWidthCm | \n",
164 | " PetalLengthCm | \n",
165 | " PetalWidthCm | \n",
166 | "
\n",
167 | " \n",
168 | " \n",
169 | " \n",
170 | " 0 | \n",
171 | " 5.1 | \n",
172 | " 3.5 | \n",
173 | " 1.4 | \n",
174 | " 0.2 | \n",
175 | "
\n",
176 | " \n",
177 | " 1 | \n",
178 | " 4.9 | \n",
179 | " 3.0 | \n",
180 | " 1.4 | \n",
181 | " 0.2 | \n",
182 | "
\n",
183 | " \n",
184 | " 2 | \n",
185 | " 4.7 | \n",
186 | " 3.2 | \n",
187 | " 1.3 | \n",
188 | " 0.2 | \n",
189 | "
\n",
190 | " \n",
191 | " 3 | \n",
192 | " 4.6 | \n",
193 | " 3.1 | \n",
194 | " 1.5 | \n",
195 | " 0.2 | \n",
196 | "
\n",
197 | " \n",
198 | " 4 | \n",
199 | " 5.0 | \n",
200 | " 3.6 | \n",
201 | " 1.4 | \n",
202 | " 0.2 | \n",
203 | "
\n",
204 | " \n",
205 | "
\n",
206 | ""
207 | ],
208 | "text/plain": [
209 | " SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm\n",
210 | "0 5.1 3.5 1.4 0.2\n",
211 | "1 4.9 3.0 1.4 0.2\n",
212 | "2 4.7 3.2 1.3 0.2\n",
213 | "3 4.6 3.1 1.5 0.2\n",
214 | "4 5.0 3.6 1.4 0.2"
215 | ]
216 | },
217 | "execution_count": 8,
218 | "metadata": {},
219 | "output_type": "execute_result"
220 | }
221 | ],
222 | "source": [
223 | "X = iris[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']]\n",
224 | "\n",
225 | "X.head()"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 9,
231 | "id": "100396f2",
232 | "metadata": {},
233 | "outputs": [
234 | {
235 | "data": {
236 | "text/plain": [
237 | "0 Iris-setosa\n",
238 | "1 Iris-setosa\n",
239 | "2 Iris-setosa\n",
240 | "3 Iris-setosa\n",
241 | "4 Iris-setosa\n",
242 | "Name: Species, dtype: object"
243 | ]
244 | },
245 | "execution_count": 9,
246 | "metadata": {},
247 | "output_type": "execute_result"
248 | }
249 | ],
250 | "source": [
251 | "y = iris['Species']\n",
252 | "\n",
253 | "y.head()"
254 | ]
255 | },
256 | {
257 | "cell_type": "code",
258 | "execution_count": 10,
259 | "id": "689d29bf",
260 | "metadata": {},
261 | "outputs": [],
262 | "source": [
263 | "from sklearn.preprocessing import LabelEncoder\n",
264 | "\n",
265 | "le=LabelEncoder()\n",
266 | "\n",
267 | "y=le.fit_transform(y)"
268 | ]
269 | },
270 | {
271 | "cell_type": "code",
272 | "execution_count": 11,
273 | "id": "1e35285c",
274 | "metadata": {},
275 | "outputs": [
276 | {
277 | "data": {
278 | "text/plain": [
279 | "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
280 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
281 | " 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
282 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
283 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
284 | " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
285 | " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])"
286 | ]
287 | },
288 | "execution_count": 11,
289 | "metadata": {},
290 | "output_type": "execute_result"
291 | }
292 | ],
293 | "source": [
294 | "y"
295 | ]
296 | },
297 | {
298 | "cell_type": "code",
299 | "execution_count": 12,
300 | "id": "2e7751ff",
301 | "metadata": {},
302 | "outputs": [],
303 | "source": [
304 | "# Import train_test_split function\n",
305 | "from sklearn.model_selection import train_test_split\n",
306 | "\n",
307 | "# Split dataset into training set and test set\n",
308 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)"
309 | ]
310 | },
311 | {
312 | "cell_type": "code",
313 | "execution_count": 14,
314 | "id": "2f35d717",
315 | "metadata": {},
316 | "outputs": [],
317 | "source": [
318 | "# Import the AdaBoost classifier\n",
319 | "from sklearn.ensemble import AdaBoostClassifier\n",
320 | "\n",
321 | "\n",
322 | "# Create adaboost classifer object\n",
323 | "abc = AdaBoostClassifier(n_estimators=50, learning_rate=1, random_state=0)\n",
324 | "\n",
325 | "# Train Adaboost Classifer\n",
326 | "model1 = abc.fit(X_train, y_train)\n",
327 | "\n",
328 | "\n",
329 | "#Predict the response for test dataset\n",
330 | "y_pred = model1.predict(X_test)"
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 16,
336 | "id": "2c447ea7",
337 | "metadata": {},
338 | "outputs": [
339 | {
340 | "name": "stdout",
341 | "output_type": "stream",
342 | "text": [
343 | "AdaBoost Classifier Model Accuracy: 0.9333333333333333\n"
344 | ]
345 | }
346 | ],
347 | "source": [
348 | "#import scikit-learn metrics module for accuracy calculation\n",
349 | "from sklearn.metrics import accuracy_score\n",
350 | "\n",
351 | "\n",
352 | "# calculate and print model accuracy\n",
353 | "print(\"AdaBoost Classifier Model Accuracy:\", accuracy_score(y_test, y_pred))"
354 | ]
355 | },
356 | {
357 | "cell_type": "code",
358 | "execution_count": 15,
359 | "id": "e8ffad25",
360 | "metadata": {},
361 | "outputs": [
362 | {
363 | "data": {
364 | "text/plain": [
365 | "array([1, 2, 1, 0, 2, 2, 1, 2, 1, 1, 1, 0, 2, 1, 0, 2, 1, 1, 2, 1, 1, 1,\n",
366 | " 0, 0, 0, 0, 0, 1, 1, 2, 1, 0, 1, 2, 2, 1, 2, 2, 0, 2, 2, 1, 1, 1,\n",
367 | " 2])"
368 | ]
369 | },
370 | "execution_count": 15,
371 | "metadata": {},
372 | "output_type": "execute_result"
373 | }
374 | ],
375 | "source": [
376 | "y_pred"
377 | ]
378 | },
379 | {
380 | "cell_type": "code",
381 | "execution_count": null,
382 | "id": "fc852e82",
383 | "metadata": {},
384 | "outputs": [],
385 | "source": []
386 | }
387 | ],
388 | "metadata": {
389 | "kernelspec": {
390 | "display_name": "Python 3 (ipykernel)",
391 | "language": "python",
392 | "name": "python3"
393 | },
394 | "language_info": {
395 | "codemirror_mode": {
396 | "name": "ipython",
397 | "version": 3
398 | },
399 | "file_extension": ".py",
400 | "mimetype": "text/x-python",
401 | "name": "python",
402 | "nbconvert_exporter": "python",
403 | "pygments_lexer": "ipython3",
404 | "version": "3.8.8"
405 | }
406 | },
407 | "nbformat": 4,
408 | "nbformat_minor": 5
409 | }
410 |
--------------------------------------------------------------------------------
/airline-passenger-traffic(1).csv:
--------------------------------------------------------------------------------
1 | 1949-01,112
2 | 1949-02,118
3 | 1949-03,132
4 | 1949-04,129
5 | 1949-05,121
6 | 1949-06,135
7 | 1949-07,148
8 | 1949-08,148
9 | 1949-09,136
10 | 1949-10,119
11 | 1949-11,104
12 | 1949-12,118
13 | 1950-01,115
14 | 1950-02,126
15 | 1950-03,141
16 | 1950-04,135
17 | 1950-05,125
18 | 1950-06,149
19 | 1950-07,170
20 | 1950-08,170
21 | 1950-09,158
22 | 1950-10,133
23 | 1950-11,114
24 | 1950-12,140
25 | 1951-01,145
26 | 1951-02,150
27 | 1951-03,178
28 | 1951-04,163
29 | 1951-05,172
30 | 1951-06,
31 | 1951-07,
32 | 1951-08,199
33 | 1951-09,184
34 | 1951-10,162
35 | 1951-11,146
36 | 1951-12,166
37 | 1952-01,171
38 | 1952-02,180
39 | 1952-03,193
40 | 1952-04,181
41 | 1952-05,183
42 | 1952-06,218
43 | 1952-07,230
44 | 1952-08,242
45 | 1952-09,209
46 | 1952-10,191
47 | 1952-11,172
48 | 1952-12,194
49 | 1953-01,196
50 | 1953-02,196
51 | 1953-03,236
52 | 1953-04,235
53 | 1953-05,229
54 | 1953-06,243
55 | 1953-07,264
56 | 1953-08,272
57 | 1953-09,237
58 | 1953-10,211
59 | 1953-11,180
60 | 1953-12,201
61 | 1954-01,204
62 | 1954-02,188
63 | 1954-03,235
64 | 1954-04,227
65 | 1954-05,234
66 | 1954-06,
67 | 1954-07,302
68 | 1954-08,293
69 | 1954-09,259
70 | 1954-10,229
71 | 1954-11,203
72 | 1954-12,229
73 | 1955-01,242
74 | 1955-02,233
75 | 1955-03,267
76 | 1955-04,269
77 | 1955-05,270
78 | 1955-06,315
79 | 1955-07,364
80 | 1955-08,347
81 | 1955-09,312
82 | 1955-10,274
83 | 1955-11,237
84 | 1955-12,278
85 | 1956-01,284
86 | 1956-02,277
87 | 1956-03,317
88 | 1956-04,313
89 | 1956-05,318
90 | 1956-06,374
91 | 1956-07,413
92 | 1956-08,405
93 | 1956-09,355
94 | 1956-10,306
95 | 1956-11,271
96 | 1956-12,306
97 | 1957-01,315
98 | 1957-02,301
99 | 1957-03,356
100 | 1957-04,348
101 | 1957-05,355
102 | 1957-06,422
103 | 1957-07,465
104 | 1957-08,467
105 | 1957-09,404
106 | 1957-10,347
107 | 1957-11,305
108 | 1957-12,336
109 | 1958-01,340
110 | 1958-02,318
111 | 1958-03,362
112 | 1958-04,348
113 | 1958-05,363
114 | 1958-06,435
115 | 1958-07,491
116 | 1958-08,505
117 | 1958-09,404
118 | 1958-10,359
119 | 1958-11,310
120 | 1958-12,337
121 | 1959-01,360
122 | 1959-02,342
123 | 1959-03,406
124 | 1959-04,396
125 | 1959-05,420
126 | 1959-06,472
127 | 1959-07,548
128 | 1959-08,559
129 | 1959-09,463
130 | 1959-10,407
131 | 1959-11,362
132 | 1959-12,405
133 | 1960-01,417
134 | 1960-02,391
135 | 1960-03,
136 | 1960-04,461
137 | 1960-05,472
138 | 1960-06,535
139 | 1960-07,622
140 | 1960-08,606
141 | 1960-09,508
142 | 1960-10,461
143 | 1960-11,390
144 | 1960-12,432
145 |
--------------------------------------------------------------------------------
/car data.csv:
--------------------------------------------------------------------------------
1 | Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
2 | ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
3 | sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
4 | ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
5 | wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
6 | swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0
7 | vitara brezza,2018,9.25,9.83,2071,Diesel,Dealer,Manual,0
8 | ciaz,2015,6.75,8.12,18796,Petrol,Dealer,Manual,0
9 | s cross,2015,6.5,8.61,33429,Diesel,Dealer,Manual,0
10 | ciaz,2016,8.75,8.89,20273,Diesel,Dealer,Manual,0
11 | ciaz,2015,7.45,8.92,42367,Diesel,Dealer,Manual,0
12 | alto 800,2017,2.85,3.6,2135,Petrol,Dealer,Manual,0
13 | ciaz,2015,6.85,10.38,51000,Diesel,Dealer,Manual,0
14 | ciaz,2015,7.5,9.94,15000,Petrol,Dealer,Automatic,0
15 | ertiga,2015,6.1,7.71,26000,Petrol,Dealer,Manual,0
16 | dzire,2009,2.25,7.21,77427,Petrol,Dealer,Manual,0
17 | ertiga,2016,7.75,10.79,43000,Diesel,Dealer,Manual,0
18 | ertiga,2015,7.25,10.79,41678,Diesel,Dealer,Manual,0
19 | ertiga,2016,7.75,10.79,43000,Diesel,Dealer,Manual,0
20 | wagon r,2015,3.25,5.09,35500,CNG,Dealer,Manual,0
21 | sx4,2010,2.65,7.98,41442,Petrol,Dealer,Manual,0
22 | alto k10,2016,2.85,3.95,25000,Petrol,Dealer,Manual,0
23 | ignis,2017,4.9,5.71,2400,Petrol,Dealer,Manual,0
24 | sx4,2011,4.4,8.01,50000,Petrol,Dealer,Automatic,0
25 | alto k10,2014,2.5,3.46,45280,Petrol,Dealer,Manual,0
26 | wagon r,2013,2.9,4.41,56879,Petrol,Dealer,Manual,0
27 | swift,2011,3,4.99,20000,Petrol,Dealer,Manual,0
28 | swift,2013,4.15,5.87,55138,Petrol,Dealer,Manual,0
29 | swift,2017,6,6.49,16200,Petrol,Individual,Manual,0
30 | alto k10,2010,1.95,3.95,44542,Petrol,Dealer,Manual,0
31 | ciaz,2015,7.45,10.38,45000,Diesel,Dealer,Manual,0
32 | ritz,2012,3.1,5.98,51439,Diesel,Dealer,Manual,0
33 | ritz,2011,2.35,4.89,54200,Petrol,Dealer,Manual,0
34 | swift,2014,4.95,7.49,39000,Diesel,Dealer,Manual,0
35 | ertiga,2014,6,9.95,45000,Diesel,Dealer,Manual,0
36 | dzire,2014,5.5,8.06,45000,Diesel,Dealer,Manual,0
37 | sx4,2011,2.95,7.74,49998,CNG,Dealer,Manual,0
38 | dzire,2015,4.65,7.2,48767,Petrol,Dealer,Manual,0
39 | 800,2003,0.35,2.28,127000,Petrol,Individual,Manual,0
40 | alto k10,2016,3,3.76,10079,Petrol,Dealer,Manual,0
41 | sx4,2003,2.25,7.98,62000,Petrol,Dealer,Manual,0
42 | baleno,2016,5.85,7.87,24524,Petrol,Dealer,Automatic,0
43 | alto k10,2014,2.55,3.98,46706,Petrol,Dealer,Manual,0
44 | sx4,2008,1.95,7.15,58000,Petrol,Dealer,Manual,0
45 | dzire,2014,5.5,8.06,45780,Diesel,Dealer,Manual,0
46 | omni,2012,1.25,2.69,50000,Petrol,Dealer,Manual,0
47 | ciaz,2014,7.5,12.04,15000,Petrol,Dealer,Automatic,0
48 | ritz,2013,2.65,4.89,64532,Petrol,Dealer,Manual,0
49 | wagon r,2006,1.05,4.15,65000,Petrol,Dealer,Manual,0
50 | ertiga,2015,5.8,7.71,25870,Petrol,Dealer,Manual,0
51 | ciaz,2017,7.75,9.29,37000,Petrol,Dealer,Automatic,0
52 | fortuner,2012,14.9,30.61,104707,Diesel,Dealer,Automatic,0
53 | fortuner,2015,23,30.61,40000,Diesel,Dealer,Automatic,0
54 | innova,2017,18,19.77,15000,Diesel,Dealer,Automatic,0
55 | fortuner,2013,16,30.61,135000,Diesel,Individual,Automatic,0
56 | innova,2005,2.75,10.21,90000,Petrol,Individual,Manual,0
57 | corolla altis,2009,3.6,15.04,70000,Petrol,Dealer,Automatic,0
58 | etios cross,2015,4.5,7.27,40534,Petrol,Dealer,Manual,0
59 | corolla altis,2010,4.75,18.54,50000,Petrol,Dealer,Manual,0
60 | etios g,2014,4.1,6.8,39485,Petrol,Dealer,Manual,1
61 | fortuner,2014,19.99,35.96,41000,Diesel,Dealer,Automatic,0
62 | corolla altis,2013,6.95,18.61,40001,Petrol,Dealer,Manual,0
63 | etios cross,2015,4.5,7.7,40588,Petrol,Dealer,Manual,0
64 | fortuner,2014,18.75,35.96,78000,Diesel,Dealer,Automatic,0
65 | fortuner,2015,23.5,35.96,47000,Diesel,Dealer,Automatic,0
66 | fortuner,2017,33,36.23,6000,Diesel,Dealer,Automatic,0
67 | etios liva,2014,4.75,6.95,45000,Diesel,Dealer,Manual,0
68 | innova,2017,19.75,23.15,11000,Petrol,Dealer,Automatic,0
69 | fortuner,2010,9.25,20.45,59000,Diesel,Dealer,Manual,0
70 | corolla altis,2011,4.35,13.74,88000,Petrol,Dealer,Manual,0
71 | corolla altis,2016,14.25,20.91,12000,Petrol,Dealer,Manual,0
72 | etios liva,2014,3.95,6.76,71000,Diesel,Dealer,Manual,0
73 | corolla altis,2011,4.5,12.48,45000,Diesel,Dealer,Manual,0
74 | corolla altis,2013,7.45,18.61,56001,Petrol,Dealer,Manual,0
75 | etios liva,2011,2.65,5.71,43000,Petrol,Dealer,Manual,0
76 | etios cross,2014,4.9,8.93,83000,Diesel,Dealer,Manual,0
77 | etios g,2015,3.95,6.8,36000,Petrol,Dealer,Manual,0
78 | corolla altis,2013,5.5,14.68,72000,Petrol,Dealer,Manual,0
79 | corolla,2004,1.5,12.35,135154,Petrol,Dealer,Automatic,0
80 | corolla altis,2010,5.25,22.83,80000,Petrol,Dealer,Automatic,0
81 | fortuner,2012,14.5,30.61,89000,Diesel,Dealer,Automatic,0
82 | corolla altis,2016,14.73,14.89,23000,Diesel,Dealer,Manual,0
83 | etios gd,2015,4.75,7.85,40000,Diesel,Dealer,Manual,0
84 | innova,2017,23,25.39,15000,Diesel,Dealer,Automatic,0
85 | innova,2015,12.5,13.46,38000,Diesel,Dealer,Manual,0
86 | innova,2005,3.49,13.46,197176,Diesel,Dealer,Manual,0
87 | camry,2006,2.5,23.73,142000,Petrol,Individual,Automatic,3
88 | land cruiser,2010,35,92.6,78000,Diesel,Dealer,Manual,0
89 | corolla altis,2012,5.9,13.74,56000,Petrol,Dealer,Manual,0
90 | etios liva,2013,3.45,6.05,47000,Petrol,Dealer,Manual,0
91 | etios g,2014,4.75,6.76,40000,Petrol,Dealer,Manual,0
92 | corolla altis,2009,3.8,18.61,62000,Petrol,Dealer,Manual,0
93 | innova,2014,11.25,16.09,58242,Diesel,Dealer,Manual,0
94 | innova,2005,3.51,13.7,75000,Petrol,Dealer,Manual,0
95 | fortuner,2015,23,30.61,40000,Diesel,Dealer,Automatic,0
96 | corolla altis,2008,4,22.78,89000,Petrol,Dealer,Automatic,0
97 | corolla altis,2012,5.85,18.61,72000,Petrol,Dealer,Manual,0
98 | innova,2016,20.75,25.39,29000,Diesel,Dealer,Automatic,0
99 | corolla altis,2017,17,18.64,8700,Petrol,Dealer,Manual,0
100 | corolla altis,2013,7.05,18.61,45000,Petrol,Dealer,Manual,0
101 | fortuner,2010,9.65,20.45,50024,Diesel,Dealer,Manual,0
102 | Royal Enfield Thunder 500,2016,1.75,1.9,3000,Petrol,Individual,Manual,0
103 | UM Renegade Mojave,2017,1.7,1.82,1400,Petrol,Individual,Manual,0
104 | KTM RC200,2017,1.65,1.78,4000,Petrol,Individual,Manual,0
105 | Bajaj Dominar 400,2017,1.45,1.6,1200,Petrol,Individual,Manual,0
106 | Royal Enfield Classic 350,2017,1.35,1.47,4100,Petrol,Individual,Manual,0
107 | KTM RC390,2015,1.35,2.37,21700,Petrol,Individual,Manual,0
108 | Hyosung GT250R,2014,1.35,3.45,16500,Petrol,Individual,Manual,1
109 | Royal Enfield Thunder 350,2013,1.25,1.5,15000,Petrol,Individual,Manual,0
110 | Royal Enfield Thunder 350,2016,1.2,1.5,18000,Petrol,Individual,Manual,0
111 | Royal Enfield Classic 350,2017,1.2,1.47,11000,Petrol,Individual,Manual,0
112 | KTM RC200,2016,1.2,1.78,6000,Petrol,Individual,Manual,0
113 | Royal Enfield Thunder 350,2016,1.15,1.5,8700,Petrol,Individual,Manual,0
114 | KTM 390 Duke ,2014,1.15,2.4,7000,Petrol,Individual,Manual,0
115 | Mahindra Mojo XT300,2016,1.15,1.4,35000,Petrol,Individual,Manual,0
116 | Royal Enfield Classic 350,2015,1.15,1.47,17000,Petrol,Individual,Manual,0
117 | Royal Enfield Classic 350,2015,1.11,1.47,17500,Petrol,Individual,Manual,0
118 | Royal Enfield Classic 350,2013,1.1,1.47,33000,Petrol,Individual,Manual,0
119 | Royal Enfield Thunder 500,2015,1.1,1.9,14000,Petrol,Individual,Manual,0
120 | Royal Enfield Classic 350,2015,1.1,1.47,26000,Petrol,Individual,Manual,0
121 | Royal Enfield Thunder 500,2013,1.05,1.9,5400,Petrol,Individual,Manual,0
122 | Bajaj Pulsar RS200,2016,1.05,1.26,5700,Petrol,Individual,Manual,0
123 | Royal Enfield Thunder 350,2011,1.05,1.5,6900,Petrol,Individual,Manual,0
124 | Royal Enfield Bullet 350,2016,1.05,1.17,6000,Petrol,Individual,Manual,0
125 | Royal Enfield Classic 350,2013,1,1.47,46500,Petrol,Individual,Manual,0
126 | Royal Enfield Classic 500,2012,0.95,1.75,11500,Petrol,Individual,Manual,0
127 | Royal Enfield Classic 500,2009,0.9,1.75,40000,Petrol,Individual,Manual,0
128 | Bajaj Avenger 220,2017,0.9,0.95,1300,Petrol,Individual,Manual,0
129 | Bajaj Avenger 150,2016,0.75,0.8,7000,Petrol,Individual,Manual,0
130 | Honda CB Hornet 160R,2017,0.8,0.87,3000,Petrol,Individual,Manual,0
131 | Yamaha FZ S V 2.0,2017,0.78,0.84,5000,Petrol,Individual,Manual,0
132 | Honda CB Hornet 160R,2017,0.75,0.87,11000,Petrol,Individual,Manual,0
133 | Yamaha FZ 16,2015,0.75,0.82,18000,Petrol,Individual,Manual,0
134 | Bajaj Avenger 220,2017,0.75,0.95,3500,Petrol,Individual,Manual,0
135 | Bajaj Avenger 220,2016,0.72,0.95,500,Petrol,Individual,Manual,0
136 | TVS Apache RTR 160,2017,0.65,0.81,11800,Petrol,Individual,Manual,0
137 | Bajaj Pulsar 150,2015,0.65,0.74,5000,Petrol,Individual,Manual,0
138 | Honda CBR 150,2014,0.65,1.2,23500,Petrol,Individual,Manual,0
139 | Hero Extreme,2013,0.65,0.787,16000,Petrol,Individual,Manual,0
140 | Honda CB Hornet 160R,2016,0.6,0.87,15000,Petrol,Individual,Manual,0
141 | Bajaj Avenger 220 dtsi,2015,0.6,0.95,16600,Petrol,Individual,Manual,0
142 | Honda CBR 150,2013,0.6,1.2,32000,Petrol,Individual,Manual,0
143 | Bajaj Avenger 150 street,2016,0.6,0.8,20000,Petrol,Individual,Manual,0
144 | Yamaha FZ v 2.0,2015,0.6,0.84,29000,Petrol,Individual,Manual,0
145 | Yamaha FZ v 2.0,2016,0.6,0.84,25000,Petrol,Individual,Manual,0
146 | Bajaj Pulsar NS 200,2014,0.6,0.99,25000,Petrol,Individual,Manual,0
147 | TVS Apache RTR 160,2012,0.6,0.81,19000,Petrol,Individual,Manual,0
148 | Hero Extreme,2014,0.55,0.787,15000,Petrol,Individual,Manual,0
149 | Yamaha FZ S V 2.0,2015,0.55,0.84,58000,Petrol,Individual,Manual,0
150 | Bajaj Pulsar 220 F,2010,0.52,0.94,45000,Petrol,Individual,Manual,0
151 | Bajaj Pulsar 220 F,2016,0.51,0.94,24000,Petrol,Individual,Manual,0
152 | TVS Apache RTR 180,2011,0.5,0.826,6000,Petrol,Individual,Manual,0
153 | Hero Passion X pro,2016,0.5,0.55,31000,Petrol,Individual,Manual,0
154 | Bajaj Pulsar NS 200,2012,0.5,0.99,13000,Petrol,Individual,Manual,0
155 | Bajaj Pulsar NS 200,2013,0.5,0.99,45000,Petrol,Individual,Manual,0
156 | Yamaha Fazer ,2014,0.5,0.88,8000,Petrol,Individual,Manual,0
157 | Honda Activa 4G,2017,0.48,0.51,4300,Petrol,Individual,Automatic,0
158 | TVS Sport ,2017,0.48,0.52,15000,Petrol,Individual,Manual,0
159 | Yamaha FZ S V 2.0,2015,0.48,0.84,23000,Petrol,Individual,Manual,0
160 | Honda Dream Yuga ,2017,0.48,0.54,8600,Petrol,Individual,Manual,0
161 | Honda Activa 4G,2017,0.45,0.51,4000,Petrol,Individual,Automatic,0
162 | Bajaj Avenger Street 220,2011,0.45,0.95,24000,Petrol,Individual,Manual,0
163 | TVS Apache RTR 180,2014,0.45,0.826,23000,Petrol,Individual,Manual,0
164 | Bajaj Pulsar NS 200,2012,0.45,0.99,14500,Petrol,Individual,Manual,0
165 | Bajaj Avenger 220 dtsi,2010,0.45,0.95,27000,Petrol,Individual,Manual,0
166 | Hero Splender iSmart,2016,0.45,0.54,14000,Petrol,Individual,Manual,0
167 | Activa 3g,2016,0.45,0.54,500,Petrol,Individual,Automatic,0
168 | Hero Passion Pro,2016,0.45,0.55,1000,Petrol,Individual,Manual,0
169 | TVS Apache RTR 160,2014,0.42,0.81,42000,Petrol,Individual,Manual,0
170 | Honda CB Trigger,2013,0.42,0.73,12000,Petrol,Individual,Manual,0
171 | Hero Splender iSmart,2015,0.4,0.54,14000,Petrol,Individual,Manual,0
172 | Yamaha FZ S ,2012,0.4,0.83,5500,Petrol,Individual,Manual,0
173 | Hero Passion Pro,2015,0.4,0.55,6700,Petrol,Individual,Manual,0
174 | Bajaj Pulsar 135 LS,2014,0.4,0.64,13700,Petrol,Individual,Manual,0
175 | Activa 4g,2017,0.4,0.51,1300,Petrol,Individual,Automatic,0
176 | Honda CB Unicorn,2015,0.38,0.72,38600,Petrol,Individual,Manual,0
177 | Hero Honda CBZ extreme,2011,0.38,0.787,75000,Petrol,Individual,Manual,0
178 | Honda Karizma,2011,0.35,1.05,30000,Petrol,Individual,Manual,0
179 | Honda Activa 125,2016,0.35,0.57,24000,Petrol,Individual,Automatic,0
180 | TVS Jupyter,2014,0.35,0.52,19000,Petrol,Individual,Automatic,0
181 | Honda Karizma,2010,0.31,1.05,213000,Petrol,Individual,Manual,0
182 | Hero Honda Passion Pro,2012,0.3,0.51,60000,Petrol,Individual,Manual,0
183 | Hero Splender Plus,2016,0.3,0.48,50000,Petrol,Individual,Manual,0
184 | Honda CB Shine,2013,0.3,0.58,30000,Petrol,Individual,Manual,0
185 | Bajaj Discover 100,2013,0.27,0.47,21000,Petrol,Individual,Manual,0
186 | Bajaj Pulsar 150,2008,0.25,0.75,26000,Petrol,Individual,Manual,1
187 | Suzuki Access 125,2008,0.25,0.58,1900,Petrol,Individual,Automatic,0
188 | TVS Wego,2010,0.25,0.52,22000,Petrol,Individual,Automatic,0
189 | Honda CB twister,2013,0.25,0.51,32000,Petrol,Individual,Manual,0
190 | Hero Glamour,2013,0.25,0.57,18000,Petrol,Individual,Manual,0
191 | Hero Super Splendor,2005,0.2,0.57,55000,Petrol,Individual,Manual,0
192 | Bajaj Pulsar 150,2008,0.2,0.75,60000,Petrol,Individual,Manual,0
193 | Bajaj Discover 125,2012,0.2,0.57,25000,Petrol,Individual,Manual,1
194 | Hero Hunk,2007,0.2,0.75,49000,Petrol,Individual,Manual,1
195 | Hero Ignitor Disc,2013,0.2,0.65,24000,Petrol,Individual,Manual,1
196 | Hero CBZ Xtreme,2008,0.2,0.787,50000,Petrol,Individual,Manual,0
197 | Bajaj ct 100,2015,0.18,0.32,35000,Petrol,Individual,Manual,0
198 | Activa 3g,2008,0.17,0.52,500000,Petrol,Individual,Automatic,0
199 | Honda CB twister,2010,0.16,0.51,33000,Petrol,Individual,Manual,0
200 | Bajaj Discover 125,2011,0.15,0.57,35000,Petrol,Individual,Manual,1
201 | Honda CB Shine,2007,0.12,0.58,53000,Petrol,Individual,Manual,0
202 | Bajaj Pulsar 150,2006,0.1,0.75,92233,Petrol,Individual,Manual,0
203 | i20,2010,3.25,6.79,58000,Diesel,Dealer,Manual,1
204 | grand i10,2015,4.4,5.7,28200,Petrol,Dealer,Manual,0
205 | i10,2011,2.95,4.6,53460,Petrol,Dealer,Manual,0
206 | eon,2015,2.75,4.43,28282,Petrol,Dealer,Manual,0
207 | grand i10,2016,5.25,5.7,3493,Petrol,Dealer,Manual,1
208 | xcent,2017,5.75,7.13,12479,Petrol,Dealer,Manual,0
209 | grand i10,2015,5.15,5.7,34797,Petrol,Dealer,Automatic,0
210 | i20,2017,7.9,8.1,3435,Petrol,Dealer,Manual,0
211 | grand i10,2015,4.85,5.7,21125,Diesel,Dealer,Manual,0
212 | i10,2012,3.1,4.6,35775,Petrol,Dealer,Manual,0
213 | elantra,2015,11.75,14.79,43535,Diesel,Dealer,Manual,0
214 | creta,2016,11.25,13.6,22671,Petrol,Dealer,Manual,0
215 | i20,2011,2.9,6.79,31604,Petrol,Dealer,Manual,0
216 | grand i10,2017,5.25,5.7,20114,Petrol,Dealer,Manual,0
217 | verna,2012,4.5,9.4,36100,Petrol,Dealer,Manual,0
218 | eon,2016,2.9,4.43,12500,Petrol,Dealer,Manual,0
219 | eon,2016,3.15,4.43,15000,Petrol,Dealer,Manual,0
220 | verna,2014,6.45,9.4,45078,Petrol,Dealer,Manual,0
221 | verna,2012,4.5,9.4,36000,Petrol,Dealer,Manual,0
222 | eon,2017,3.5,4.43,38488,Petrol,Dealer,Manual,0
223 | i20,2013,4.5,6.79,32000,Petrol,Dealer,Automatic,0
224 | i20,2014,6,7.6,77632,Diesel,Dealer,Manual,0
225 | verna,2015,8.25,9.4,61381,Diesel,Dealer,Manual,0
226 | verna,2013,5.11,9.4,36198,Petrol,Dealer,Automatic,0
227 | i10,2011,2.7,4.6,22517,Petrol,Dealer,Manual,0
228 | grand i10,2015,5.25,5.7,24678,Petrol,Dealer,Manual,0
229 | i10,2011,2.55,4.43,57000,Petrol,Dealer,Manual,0
230 | verna,2012,4.95,9.4,60000,Diesel,Dealer,Manual,0
231 | i20,2012,3.1,6.79,52132,Diesel,Dealer,Manual,0
232 | verna,2013,6.15,9.4,45000,Diesel,Dealer,Manual,0
233 | verna,2017,9.25,9.4,15001,Petrol,Dealer,Manual,0
234 | elantra,2015,11.45,14.79,12900,Petrol,Dealer,Automatic,0
235 | grand i10,2013,3.9,5.7,53000,Diesel,Dealer,Manual,0
236 | grand i10,2015,5.5,5.7,4492,Petrol,Dealer,Manual,0
237 | verna,2017,9.1,9.4,15141,Petrol,Dealer,Manual,0
238 | eon,2016,3.1,4.43,11849,Petrol,Dealer,Manual,0
239 | creta,2015,11.25,13.6,68000,Diesel,Dealer,Manual,0
240 | verna,2013,4.8,9.4,60241,Petrol,Dealer,Manual,0
241 | eon,2012,2,4.43,23709,Petrol,Dealer,Manual,0
242 | verna,2012,5.35,9.4,32322,Diesel,Dealer,Manual,0
243 | xcent,2015,4.75,7.13,35866,Petrol,Dealer,Manual,1
244 | xcent,2014,4.4,7.13,34000,Petrol,Dealer,Manual,0
245 | i20,2016,6.25,7.6,7000,Petrol,Dealer,Manual,0
246 | verna,2013,5.95,9.4,49000,Diesel,Dealer,Manual,0
247 | verna,2012,5.2,9.4,71000,Diesel,Dealer,Manual,0
248 | i20,2012,3.75,6.79,35000,Petrol,Dealer,Manual,0
249 | verna,2015,5.95,9.4,36000,Petrol,Dealer,Manual,0
250 | i10,2013,4,4.6,30000,Petrol,Dealer,Manual,0
251 | i20,2016,5.25,7.6,17000,Petrol,Dealer,Manual,0
252 | creta,2016,12.9,13.6,35934,Diesel,Dealer,Manual,0
253 | city,2013,5,9.9,56701,Petrol,Dealer,Manual,0
254 | brio,2015,5.4,6.82,31427,Petrol,Dealer,Automatic,0
255 | city,2014,7.2,9.9,48000,Diesel,Dealer,Manual,0
256 | city,2013,5.25,9.9,54242,Petrol,Dealer,Manual,0
257 | brio,2012,3,5.35,53675,Petrol,Dealer,Manual,0
258 | city,2016,10.25,13.6,49562,Petrol,Dealer,Manual,0
259 | city,2015,8.5,13.6,40324,Petrol,Dealer,Manual,0
260 | city,2015,8.4,13.6,25000,Petrol,Dealer,Manual,0
261 | amaze,2014,3.9,7,36054,Petrol,Dealer,Manual,0
262 | city,2016,9.15,13.6,29223,Petrol,Dealer,Manual,0
263 | brio,2016,5.5,5.97,5600,Petrol,Dealer,Manual,0
264 | amaze,2015,4,5.8,40023,Petrol,Dealer,Manual,0
265 | jazz,2016,6.6,7.7,16002,Petrol,Dealer,Manual,0
266 | amaze,2015,4,7,40026,Petrol,Dealer,Manual,0
267 | jazz,2017,6.5,8.7,21200,Petrol,Dealer,Manual,0
268 | amaze,2014,3.65,7,35000,Petrol,Dealer,Manual,0
269 | city,2016,8.35,9.4,19434,Diesel,Dealer,Manual,0
270 | brio,2017,4.8,5.8,19000,Petrol,Dealer,Manual,0
271 | city,2015,6.7,10,18828,Petrol,Dealer,Manual,0
272 | city,2011,4.1,10,69341,Petrol,Dealer,Manual,0
273 | city,2009,3,10,69562,Petrol,Dealer,Manual,0
274 | city,2015,7.5,10,27600,Petrol,Dealer,Manual,0
275 | jazz,2010,2.25,7.5,61203,Petrol,Dealer,Manual,0
276 | brio,2014,5.3,6.8,16500,Petrol,Dealer,Manual,0
277 | city,2016,10.9,13.6,30753,Petrol,Dealer,Automatic,0
278 | city,2015,8.65,13.6,24800,Petrol,Dealer,Manual,0
279 | city,2015,9.7,13.6,21780,Petrol,Dealer,Manual,0
280 | jazz,2016,6,8.4,4000,Petrol,Dealer,Manual,0
281 | city,2014,6.25,13.6,40126,Petrol,Dealer,Manual,0
282 | brio,2015,5.25,5.9,14465,Petrol,Dealer,Manual,0
283 | city,2006,2.1,7.6,50456,Petrol,Dealer,Manual,0
284 | city,2014,8.25,14,63000,Diesel,Dealer,Manual,0
285 | city,2016,8.99,11.8,9010,Petrol,Dealer,Manual,0
286 | brio,2013,3.5,5.9,9800,Petrol,Dealer,Manual,0
287 | jazz,2016,7.4,8.5,15059,Petrol,Dealer,Automatic,0
288 | jazz,2016,5.65,7.9,28569,Petrol,Dealer,Manual,0
289 | amaze,2015,5.75,7.5,44000,Petrol,Dealer,Automatic,0
290 | city,2015,8.4,13.6,34000,Petrol,Dealer,Manual,0
291 | city,2016,10.11,13.6,10980,Petrol,Dealer,Manual,0
292 | amaze,2014,4.5,6.4,19000,Petrol,Dealer,Manual,0
293 | brio,2015,5.4,6.1,31427,Petrol,Dealer,Manual,0
294 | jazz,2016,6.4,8.4,12000,Petrol,Dealer,Manual,0
295 | city,2010,3.25,9.9,38000,Petrol,Dealer,Manual,0
296 | amaze,2014,3.75,6.8,33019,Petrol,Dealer,Manual,0
297 | city,2015,8.55,13.09,60076,Diesel,Dealer,Manual,0
298 | city,2016,9.5,11.6,33988,Diesel,Dealer,Manual,0
299 | brio,2015,4,5.9,60000,Petrol,Dealer,Manual,0
300 | city,2009,3.35,11,87934,Petrol,Dealer,Manual,0
301 | city,2017,11.5,12.5,9000,Diesel,Dealer,Manual,0
302 | brio,2016,5.3,5.9,5464,Petrol,Dealer,Manual,0
303 |
--------------------------------------------------------------------------------
/hm-recommender.ipynb:
--------------------------------------------------------------------------------
1 | {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"pygments_lexer":"ipython3","nbconvert_exporter":"python","version":"3.6.4","file_extension":".py","codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2022-04-13T11:59:47.200163Z","iopub.execute_input":"2022-04-13T11:59:47.200668Z","iopub.status.idle":"2022-04-13T11:59:47.230047Z","shell.execute_reply.started":"2022-04-13T11:59:47.200543Z","shell.execute_reply":"2022-04-13T11:59:47.229075Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import pandas as pd\nimport os\nimport warnings\nwarnings.filterwarnings(\"ignore\")","metadata":{"execution":{"iopub.status.busy":"2022-04-13T11:59:47.231711Z","iopub.execute_input":"2022-04-13T11:59:47.232622Z","iopub.status.idle":"2022-04-13T11:59:47.236528Z","shell.execute_reply.started":"2022-04-13T11:59:47.232583Z","shell.execute_reply":"2022-04-13T11:59:47.235708Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"customers=pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/customers.csv')\ncustomers.describe()","metadata":{"execution":{"iopub.status.busy":"2022-04-13T11:59:47.237707Z","iopub.execute_input":"2022-04-13T11:59:47.237947Z","iopub.status.idle":"2022-04-13T11:59:53.367629Z","shell.execute_reply.started":"2022-04-13T11:59:47.237919Z","shell.execute_reply":"2022-04-13T11:59:53.366776Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"transactions_train=pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/transactions_train.csv')\ntransactions_train['article_id']","metadata":{"execution":{"iopub.status.busy":"2022-04-13T11:59:53.369787Z","iopub.execute_input":"2022-04-13T11:59:53.370012Z","iopub.status.idle":"2022-04-13T12:01:02.469793Z","shell.execute_reply.started":"2022-04-13T11:59:53.369963Z","shell.execute_reply":"2022-04-13T12:01:02.468622Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"transactionsdata=pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/transactions_train.csv',dtype={'article_id': str})","metadata":{"execution":{"iopub.status.busy":"2022-04-13T12:01:02.471617Z","iopub.execute_input":"2022-04-13T12:01:02.471928Z","iopub.status.idle":"2022-04-13T12:01:45.90164Z","shell.execute_reply.started":"2022-04-13T12:01:02.471887Z","shell.execute_reply":"2022-04-13T12:01:45.90082Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"transactionsdata","metadata":{"execution":{"iopub.status.busy":"2022-04-13T12:01:45.903367Z","iopub.execute_input":"2022-04-13T12:01:45.904106Z","iopub.status.idle":"2022-04-13T12:01:45.926837Z","shell.execute_reply.started":"2022-04-13T12:01:45.904041Z","shell.execute_reply":"2022-04-13T12:01:45.925833Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"data=transactionsdata.drop(['t_dat', 'price','sales_channel_id'], axis=1)","metadata":{"execution":{"iopub.status.busy":"2022-04-13T12:01:45.928363Z","iopub.execute_input":"2022-04-13T12:01:45.928975Z","iopub.status.idle":"2022-04-13T12:01:46.653996Z","shell.execute_reply.started":"2022-04-13T12:01:45.928925Z","shell.execute_reply":"2022-04-13T12:01:46.653202Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"data","metadata":{"execution":{"iopub.status.busy":"2022-04-13T12:01:46.655546Z","iopub.execute_input":"2022-04-13T12:01:46.655839Z","iopub.status.idle":"2022-04-13T12:01:46.668097Z","shell.execute_reply.started":"2022-04-13T12:01:46.655802Z","shell.execute_reply":"2022-04-13T12:01:46.667195Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import random","metadata":{"execution":{"iopub.status.busy":"2022-04-13T12:01:46.669817Z","iopub.execute_input":"2022-04-13T12:01:46.670243Z","iopub.status.idle":"2022-04-13T12:01:46.678833Z","shell.execute_reply.started":"2022-04-13T12:01:46.670201Z","shell.execute_reply":"2022-04-13T12:01:46.678284Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"top_12_items = transactionsdata.groupby('article_id')['customer_id'].nunique().sort_values(ascending=False).head(12).index.tolist()\ntop_12_items","metadata":{"execution":{"iopub.status.busy":"2022-04-13T12:01:46.680816Z","iopub.execute_input":"2022-04-13T12:01:46.681228Z","iopub.status.idle":"2022-04-13T12:02:13.344562Z","shell.execute_reply.started":"2022-04-13T12:01:46.681191Z","shell.execute_reply":"2022-04-13T12:02:13.34203Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"ss=pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/sample_submission.csv')","metadata":{"execution":{"iopub.status.busy":"2022-04-13T12:44:10.531825Z","iopub.execute_input":"2022-04-13T12:44:10.532274Z","iopub.status.idle":"2022-04-13T12:44:10.547507Z","shell.execute_reply.started":"2022-04-13T12:44:10.532239Z","shell.execute_reply":"2022-04-13T12:44:10.546212Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"ss","metadata":{"execution":{"iopub.status.busy":"2022-04-13T12:44:10.95877Z","iopub.execute_input":"2022-04-13T12:44:10.959718Z","iopub.status.idle":"2022-04-13T12:44:10.977886Z","shell.execute_reply.started":"2022-04-13T12:44:10.959665Z","shell.execute_reply":"2022-04-13T12:44:10.976699Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"ss['prediction'] = ' '.join(top_12_items)\n","metadata":{"execution":{"iopub.status.busy":"2022-04-13T12:03:19.844987Z","iopub.execute_input":"2022-04-13T12:03:19.845289Z","iopub.status.idle":"2022-04-13T12:03:19.862627Z","shell.execute_reply.started":"2022-04-13T12:03:19.845256Z","shell.execute_reply":"2022-04-13T12:03:19.861594Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"ss.to_csv('submission.csv', index=False)\nss.shape","metadata":{"execution":{"iopub.status.busy":"2022-04-13T12:43:55.805007Z","iopub.execute_input":"2022-04-13T12:43:55.805734Z","iopub.status.idle":"2022-04-13T12:43:55.818214Z","shell.execute_reply.started":"2022-04-13T12:43:55.805686Z","shell.execute_reply":"2022-04-13T12:43:55.817226Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}
--------------------------------------------------------------------------------
/overfitting-vs-underfitting-simple-explanation.ipynb:
--------------------------------------------------------------------------------
1 | {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"* > 1- Underfits, when the training loss is way more significant than the testing loss.\n* > 2- Overfits, when the training loss is way smaller than the testing loss.\n* > 3- Performs very well when the training loss and the testing loss are very close.**","metadata":{}},{"cell_type":"markdown","source":"# Packages","metadata":{}},{"cell_type":"code","source":"from keras.preprocessing.image import ImageDataGenerator\nfrom keras.models import Sequential,load_model\nfrom keras.layers import Conv2D,MaxPooling2D,SpatialDropout2D,Flatten,Dropout,Dense\nfrom keras.preprocessing import image\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\nimport cv2\nimport numpy as np\nimport os\n","metadata":{"execution":{"iopub.status.busy":"2022-07-27T13:43:46.152269Z","iopub.execute_input":"2022-07-27T13:43:46.152679Z","iopub.status.idle":"2022-07-27T13:43:46.158540Z","shell.execute_reply.started":"2022-07-27T13:43:46.152647Z","shell.execute_reply":"2022-07-27T13:43:46.157406Z"},"trusted":true},"execution_count":30,"outputs":[]},{"cell_type":"markdown","source":"# Data scaling ","metadata":{}},{"cell_type":"code","source":"#Normalization\ntrain=ImageDataGenerator(rescale=1/255)\ntest=ImageDataGenerator(rescale=1/255)","metadata":{"execution":{"iopub.status.busy":"2022-07-27T13:44:04.950608Z","iopub.execute_input":"2022-07-27T13:44:04.951018Z","iopub.status.idle":"2022-07-27T13:44:04.956784Z","shell.execute_reply.started":"2022-07-27T13:44:04.950986Z","shell.execute_reply":"2022-07-27T13:44:04.955450Z"},"trusted":true},"execution_count":31,"outputs":[]},{"cell_type":"markdown","source":"# Data preparation","metadata":{}},{"cell_type":"code","source":"traindataset=train.flow_from_directory('../input/pizza-dataset/pizza_not_pizza/Train',\n target_size=(224,224),\n batch_size=16,\n class_mode='binary')\ntestdataset=train.flow_from_directory('../input/pizza-dataset/pizza_not_pizza/Test',\n target_size=(224,224),\n batch_size=16,\n class_mode='binary')","metadata":{"execution":{"iopub.status.busy":"2022-07-27T13:44:51.279725Z","iopub.execute_input":"2022-07-27T13:44:51.280120Z","iopub.status.idle":"2022-07-27T13:44:51.602476Z","shell.execute_reply.started":"2022-07-27T13:44:51.280091Z","shell.execute_reply":"2022-07-27T13:44:51.601562Z"},"trusted":true},"execution_count":32,"outputs":[{"name":"stdout","text":"Found 1474 images belonging to 2 classes.\nFound 492 images belonging to 2 classes.\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# CNN model","metadata":{}},{"cell_type":"code","source":"model=Sequential()\nmodel.add(Conv2D(32,(3,3),activation='relu',input_shape=(224,224,3)))#filters #Kernalsize #RELU\nmodel.add(MaxPooling2D() )\nmodel.add(Conv2D(32,(3,3),activation='relu'))\nmodel.add(MaxPooling2D() )\nmodel.add(Conv2D(32,(3,3),activation='relu'))\nmodel.add(MaxPooling2D() )\nmodel.add(Flatten())\nmodel.add(Dense(100,activation='relu'))\nmodel.add(Dense(1,activation='sigmoid'))","metadata":{"execution":{"iopub.status.busy":"2022-07-27T13:45:23.765242Z","iopub.execute_input":"2022-07-27T13:45:23.765623Z","iopub.status.idle":"2022-07-27T13:45:23.842837Z","shell.execute_reply.started":"2022-07-27T13:45:23.765591Z","shell.execute_reply":"2022-07-27T13:45:23.841917Z"},"trusted":true},"execution_count":33,"outputs":[]},{"cell_type":"code","source":"\nmodel.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])","metadata":{"execution":{"iopub.status.busy":"2022-07-27T13:45:33.305982Z","iopub.execute_input":"2022-07-27T13:45:33.306375Z","iopub.status.idle":"2022-07-27T13:45:33.317316Z","shell.execute_reply.started":"2022-07-27T13:45:33.306344Z","shell.execute_reply":"2022-07-27T13:45:33.316450Z"},"trusted":true},"execution_count":34,"outputs":[]},{"cell_type":"markdown","source":"# Training","metadata":{}},{"cell_type":"code","source":"model_saved=model.fit_generator(\n traindataset,\n epochs=7)","metadata":{"execution":{"iopub.status.busy":"2022-07-27T13:45:43.757598Z","iopub.execute_input":"2022-07-27T13:45:43.757992Z","iopub.status.idle":"2022-07-27T13:48:41.975555Z","shell.execute_reply.started":"2022-07-27T13:45:43.757958Z","shell.execute_reply":"2022-07-27T13:48:41.974458Z"},"trusted":true},"execution_count":35,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.7/site-packages/keras/engine/training.py:1972: UserWarning: `Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators.\n warnings.warn('`Model.fit_generator` is deprecated and '\n","output_type":"stream"},{"name":"stdout","text":"Epoch 1/7\n93/93 [==============================] - 28s 288ms/step - loss: 0.7196 - accuracy: 0.5543\nEpoch 2/7\n93/93 [==============================] - 25s 270ms/step - loss: 0.6220 - accuracy: 0.6682\nEpoch 3/7\n93/93 [==============================] - 25s 265ms/step - loss: 0.5642 - accuracy: 0.7103\nEpoch 4/7\n93/93 [==============================] - 25s 269ms/step - loss: 0.4936 - accuracy: 0.7673\nEpoch 5/7\n93/93 [==============================] - 25s 264ms/step - loss: 0.4625 - accuracy: 0.7788\nEpoch 6/7\n93/93 [==============================] - 25s 269ms/step - loss: 0.3812 - accuracy: 0.8304\nEpoch 7/7\n93/93 [==============================] - 25s 267ms/step - loss: 0.3009 - accuracy: 0.8752\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Evaluation","metadata":{}},{"cell_type":"code","source":"prediction=model.predict(testdataset)","metadata":{"execution":{"iopub.status.busy":"2022-07-27T13:53:43.872718Z","iopub.execute_input":"2022-07-27T13:53:43.873065Z","iopub.status.idle":"2022-07-27T13:53:48.207364Z","shell.execute_reply.started":"2022-07-27T13:53:43.873038Z","shell.execute_reply":"2022-07-27T13:53:48.206515Z"},"trusted":true},"execution_count":48,"outputs":[]},{"cell_type":"code","source":"result=prediction[0]\nprint(result)","metadata":{"execution":{"iopub.status.busy":"2022-07-27T13:49:51.927990Z","iopub.execute_input":"2022-07-27T13:49:51.928901Z","iopub.status.idle":"2022-07-27T13:49:51.936127Z","shell.execute_reply.started":"2022-07-27T13:49:51.928848Z","shell.execute_reply":"2022-07-27T13:49:51.934479Z"},"trusted":true},"execution_count":38,"outputs":[{"name":"stdout","text":"[0.9787929]\n","output_type":"stream"}]},{"cell_type":"code","source":"score = model.evaluate(testdataset,verbose=0)\nprint('Test loss:', score[0])\nprint('Test accuracy:', score[1])","metadata":{"execution":{"iopub.status.busy":"2022-07-27T13:54:11.083538Z","iopub.execute_input":"2022-07-27T13:54:11.083921Z","iopub.status.idle":"2022-07-27T13:54:15.488750Z","shell.execute_reply.started":"2022-07-27T13:54:11.083888Z","shell.execute_reply":"2022-07-27T13:54:15.487836Z"},"trusted":true},"execution_count":50,"outputs":[{"name":"stdout","text":"Test loss: 0.5352293848991394\nTest accuracy: 0.7560975551605225\n","output_type":"stream"}]},{"cell_type":"code","source":"score = model.evaluate(traindataset,verbose=0)\nprint('Train loss:', score[0])\nprint('Train accuracy:', score[1])","metadata":{"execution":{"iopub.status.busy":"2022-07-27T13:54:50.260134Z","iopub.execute_input":"2022-07-27T13:54:50.260489Z","iopub.status.idle":"2022-07-27T13:55:01.990069Z","shell.execute_reply.started":"2022-07-27T13:54:50.260460Z","shell.execute_reply":"2022-07-27T13:55:01.988902Z"},"trusted":true},"execution_count":52,"outputs":[{"name":"stdout","text":"Train loss: 0.1777476817369461\nTrain accuracy: 0.9423337578773499\n","output_type":"stream"}]},{"cell_type":"markdown","source":"**Here Train loss is smaller than testing loss,It means we got overfitting issues!For balancing this loss you should add more data variations on both sides**","metadata":{}},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}
--------------------------------------------------------------------------------
/price-elasticity.ipynb:
--------------------------------------------------------------------------------
1 | {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2022-09-30T04:16:29.358192Z","iopub.execute_input":"2022-09-30T04:16:29.358836Z","iopub.status.idle":"2022-09-30T04:16:29.377678Z","shell.execute_reply.started":"2022-09-30T04:16:29.358775Z","shell.execute_reply":"2022-09-30T04:16:29.375892Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"/kaggle/input/productsales/sales.csv\n","output_type":"stream"}]},{"cell_type":"code","source":"pip install pyspark","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from pyspark.sql import SparkSession\n\nspark = SparkSession \\\n .builder \\\n .appName(\"Python Spark\") \\\n .config(\"spark.some.config.option\", \"some-value\") \\\n .getOrCreate()","metadata":{"execution":{"iopub.status.busy":"2022-09-30T04:17:23.806424Z","iopub.execute_input":"2022-09-30T04:17:23.807625Z","iopub.status.idle":"2022-09-30T04:17:24.788719Z","shell.execute_reply.started":"2022-09-30T04:17:23.807544Z","shell.execute_reply":"2022-09-30T04:17:24.787503Z"},"trusted":true},"execution_count":4,"outputs":[{"name":"stdout","text":"22/09/30 04:17:24 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.\n","output_type":"stream"}]},{"cell_type":"code","source":"from pyspark.sql.window import Window\nfrom pyspark.sql.functions import lag\nfrom pyspark.sql.functions import col\ndf = spark.read.csv(\"../input/productsales/sales.csv\", header=True, inferSchema=True)","metadata":{"execution":{"iopub.status.busy":"2022-09-30T04:17:58.073876Z","iopub.execute_input":"2022-09-30T04:17:58.074319Z","iopub.status.idle":"2022-09-30T04:17:58.453147Z","shell.execute_reply.started":"2022-09-30T04:17:58.074279Z","shell.execute_reply":"2022-09-30T04:17:58.452209Z"},"trusted":true},"execution_count":7,"outputs":[]},{"cell_type":"code","source":"win = Window.partitionBy('id').orderBy('Discount_price')\ndf1=df.withColumn('perc_price_change', (df.Discount_price - lag(df['Discount_price']).over(win))/100)\ndf1=df1.withColumn('perc_demand_change',(df.Impression-lag(df['Impression']).over(win))/100)","metadata":{"execution":{"iopub.status.busy":"2022-09-30T04:17:53.485363Z","iopub.execute_input":"2022-09-30T04:17:53.485734Z","iopub.status.idle":"2022-09-30T04:17:53.674901Z","shell.execute_reply.started":"2022-09-30T04:17:53.485703Z","shell.execute_reply":"2022-09-30T04:17:53.673857Z"},"trusted":true},"execution_count":6,"outputs":[]},{"cell_type":"code","source":"df1.select(\"Discount_price\",\"perc_price_change\",\"Impression\",\"perc_demand_change\").show()","metadata":{"execution":{"iopub.status.busy":"2022-09-30T04:18:12.192034Z","iopub.execute_input":"2022-09-30T04:18:12.192403Z","iopub.status.idle":"2022-09-30T04:18:13.388488Z","shell.execute_reply.started":"2022-09-30T04:18:12.192372Z","shell.execute_reply":"2022-09-30T04:18:13.387205Z"},"trusted":true},"execution_count":8,"outputs":[{"name":"stdout","text":"+--------------+-------------------+----------+------------------+\n|Discount_price| perc_price_change|Impression|perc_demand_change|\n+--------------+-------------------+----------+------------------+\n| 96.6| null| 1| null|\n| 106.99| 0.1039| 1| 0.0|\n| 174.99| 0.6800000000000002| 1| 0.0|\n| 179.99| 0.05| 5| 0.04|\n| 184.99| 0.05| 1| -0.04|\n| 189.0|0.04009999999999991| 1| 0.0|\n| 199.0| 0.1| 2| 0.01|\n| 219.0| 0.2| 1| -0.01|\n| 229.0| 0.1| 1| 0.0|\n+--------------+-------------------+----------+------------------+\n\n","output_type":"stream"}]},{"cell_type":"code","source":"df2=df1.withColumn(\"price_elasticiy\", df1.perc_demand_change /df1.perc_price_change)\ndf3=df2.select(\"Discount_price\",\"perc_price_change\",\"Impression\",\"perc_demand_change\",'price_elasticiy')\ndf3.show()\n","metadata":{"execution":{"iopub.status.busy":"2022-09-30T04:18:49.646406Z","iopub.execute_input":"2022-09-30T04:18:49.646805Z","iopub.status.idle":"2022-09-30T04:18:50.114716Z","shell.execute_reply.started":"2022-09-30T04:18:49.646771Z","shell.execute_reply":"2022-09-30T04:18:50.113517Z"},"trusted":true},"execution_count":11,"outputs":[{"name":"stdout","text":"+--------------+-------------------+----------+------------------+--------------------+\n|Discount_price| perc_price_change|Impression|perc_demand_change| price_elasticiy|\n+--------------+-------------------+----------+------------------+--------------------+\n| 96.6| null| 1| null| null|\n| 106.99| 0.1039| 1| 0.0| 0.0|\n| 174.99| 0.6800000000000002| 1| 0.0| 0.0|\n| 179.99| 0.05| 5| 0.04| 0.7999999999999999|\n| 184.99| 0.05| 1| -0.04| -0.7999999999999999|\n| 189.0|0.04009999999999991| 1| 0.0| 0.0|\n| 199.0| 0.1| 2| 0.01| 0.09999999999999999|\n| 219.0| 0.2| 1| -0.01|-0.04999999999999...|\n| 229.0| 0.1| 1| 0.0| 0.0|\n+--------------+-------------------+----------+------------------+--------------------+\n\n","output_type":"stream"}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}
--------------------------------------------------------------------------------