├── .DS_Store
├── 2.deep-learning-fundamentals
├── .DS_Store
├── NVIDIA Certification - DL Fundamentals.pdf
└── 2.Activation_Functions_in_Tensorflow.ipynb
├── 1.machine-learning-fundamentals
├── .DS_Store
├── machine-learning-fundamentals.pdf
├── SeaPlaneTravel.csv
├── Advertising.csv
├── Social_Network_Ads.csv
├── Wholesale customers data.csv
├── 4. KNN Classifier.ipynb
├── 5. Support Vector Machines.ipynb
└── 7. XgBoost.ipynb
└── README.md
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manifoldailearning/NVIDIA-Certified-Associate--Generative-AI-LLMs-NCA-GENL/HEAD/.DS_Store
--------------------------------------------------------------------------------
/2.deep-learning-fundamentals/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manifoldailearning/NVIDIA-Certified-Associate--Generative-AI-LLMs-NCA-GENL/HEAD/2.deep-learning-fundamentals/.DS_Store
--------------------------------------------------------------------------------
/1.machine-learning-fundamentals/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manifoldailearning/NVIDIA-Certified-Associate--Generative-AI-LLMs-NCA-GENL/HEAD/1.machine-learning-fundamentals/.DS_Store
--------------------------------------------------------------------------------
/1.machine-learning-fundamentals/machine-learning-fundamentals.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manifoldailearning/NVIDIA-Certified-Associate--Generative-AI-LLMs-NCA-GENL/HEAD/1.machine-learning-fundamentals/machine-learning-fundamentals.pdf
--------------------------------------------------------------------------------
/2.deep-learning-fundamentals/NVIDIA Certification - DL Fundamentals.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manifoldailearning/NVIDIA-Certified-Associate--Generative-AI-LLMs-NCA-GENL/HEAD/2.deep-learning-fundamentals/NVIDIA Certification - DL Fundamentals.pdf
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # NVIDIA-Certified-Associate -Generative-AI-LLMs-NCA-GENL
2 | A Course by Manifold AI Learning to help the practitioners to prepare for the NVIDIA-Certified Associate - Generative AI LLMs (NCA-GENL) Examination
3 |
4 | Course link - https://www.manifoldailearning.in/courses/NVIDIA-Certified-Associate---Generative-AI-LLMs-NCA-GENL-662e207dd9c42436ec97fde0
5 |
6 | Be a part of Workshops conducted Live for free -
7 | https://www.manifoldailearning.in/courses/Webinar-Learning-Materials--Slides-64df2bc5e4b0267d331f9998
8 |
9 |
10 | Our other best courses:
11 |
12 | MLOps Bootcamp - https://www.manifoldailearning.in/courses/Complete-MLOps-BootCamp-654ddf11e4b004edc19e2649
13 |
14 | MLOps with AWS Bootcamp -
15 | https://www.manifoldailearning.in/courses/Master-Practical-MLOps-for-Data-Scientists--DevOps-on-AWS-65351f01e4b08600bc438698
16 |
17 |
18 | Created by the Top Instructor of Manifold AI Learning - Nachiketh Murthy (https://www.linkedin.com/in/nachiketh-murthy/)
19 |
20 | For support - support@manifoldailearning.in
21 |
22 | Thank you for all your support. Happy Learning!!!
--------------------------------------------------------------------------------
/1.machine-learning-fundamentals/SeaPlaneTravel.csv:
--------------------------------------------------------------------------------
1 | Month,#Passengers
2 | 2003-01,112
3 | 2003-02,118
4 | 2003-03,132
5 | 2003-04,129
6 | 2003-05,121
7 | 2003-06,135
8 | 2003-07,148
9 | 2003-08,148
10 | 2003-09,136
11 | 2003-10,119
12 | 2003-11,104
13 | 2003-12,118
14 | 2004-01,115
15 | 2004-02,126
16 | 2004-03,141
17 | 2004-04,135
18 | 2004-05,125
19 | 2004-06,149
20 | 2004-07,170
21 | 2004-08,170
22 | 2004-09,158
23 | 2004-10,133
24 | 2004-11,114
25 | 2004-12,140
26 | 2006-01,145
27 | 2006-02,150
28 | 2006-03,178
29 | 2006-04,163
30 | 2006-05,172
31 | 2006-06,178
32 | 2006-07,199
33 | 2006-08,199
34 | 2006-09,184
35 | 2006-10,162
36 | 2006-11,146
37 | 2006-12,166
38 | 2007-01,171
39 | 2007-02,180
40 | 2007-03,193
41 | 2007-04,181
42 | 2007-05,183
43 | 2007-06,218
44 | 2007-07,230
45 | 2007-08,242
46 | 2007-09,209
47 | 2007-10,191
48 | 2007-11,172
49 | 2007-12,194
50 | 2008-01,196
51 | 2008-02,196
52 | 2008-03,236
53 | 2008-04,235
54 | 2008-05,229
55 | 2008-06,243
56 | 2008-07,264
57 | 2008-08,272
58 | 2008-09,237
59 | 2008-10,211
60 | 2008-11,180
61 | 2008-12,201
62 | 2009-01,204
63 | 2009-02,188
64 | 2009-03,235
65 | 2009-04,227
66 | 2009-05,234
67 | 2009-06,264
68 | 2009-07,302
69 | 2009-08,293
70 | 2009-09,259
71 | 2009-10,229
72 | 2009-11,203
73 | 2009-12,229
74 | 2010-01,242
75 | 2010-02,233
76 | 2010-03,267
77 | 2010-04,269
78 | 2010-05,270
79 | 2010-06,315
80 | 2010-07,364
81 | 2010-08,347
82 | 2010-09,312
83 | 2010-10,274
84 | 2010-11,237
85 | 2010-12,278
86 | 2011-01,284
87 | 2011-02,277
88 | 2011-03,317
89 | 2011-04,313
90 | 2011-05,318
91 | 2011-06,374
92 | 2011-07,413
93 | 2011-08,405
94 | 2011-09,355
95 | 2011-10,306
96 | 2011-11,271
97 | 2011-12,306
98 | 2012-01,315
99 | 2012-02,301
100 | 2012-03,356
101 | 2012-04,348
102 | 2012-05,355
103 | 2012-06,422
104 | 2012-07,465
105 | 2012-08,467
106 | 2012-09,404
107 | 2012-10,347
108 | 2012-11,305
109 | 2012-12,336
110 | 2013-01,340
111 | 2013-02,318
112 | 2013-03,362
113 | 2013-04,348
114 | 2013-05,363
115 | 2013-06,435
116 | 2013-07,491
117 | 2013-08,505
118 | 2013-09,404
119 | 2013-10,359
120 | 2013-11,310
121 | 2013-12,337
122 | 2014-01,360
123 | 2014-02,342
124 | 2014-03,406
125 | 2014-04,396
126 | 2014-05,420
127 | 2014-06,472
128 | 2014-07,548
129 | 2014-08,559
130 | 2014-09,463
131 | 2014-10,407
132 | 2014-11,362
133 | 2014-12,405
134 | 2015-01,417
135 | 2015-02,391
136 | 2015-03,419
137 | 2015-04,461
138 | 2015-05,472
139 | 2015-06,535
140 | 2015-07,622
141 | 2015-08,606
142 | 2015-09,508
143 | 2015-10,461
144 | 2015-11,390
145 | 2015-12,432
146 |
--------------------------------------------------------------------------------
/1.machine-learning-fundamentals/Advertising.csv:
--------------------------------------------------------------------------------
1 | ,TV,radio,newspaper,sales
2 | 1,230.1,37.8,69.2,22.1
3 | 2,44.5,39.3,45.1,10.4
4 | 3,17.2,45.9,69.3,9.3
5 | 4,151.5,41.3,58.5,18.5
6 | 5,180.8,10.8,58.4,12.9
7 | 6,8.7,48.9,75,7.2
8 | 7,57.5,32.8,23.5,11.8
9 | 8,120.2,19.6,11.6,13.2
10 | 9,8.6,2.1,1,4.8
11 | 10,199.8,2.6,21.2,10.6
12 | 11,66.1,5.8,24.2,8.6
13 | 12,214.7,24,4,17.4
14 | 13,23.8,35.1,65.9,9.2
15 | 14,97.5,7.6,7.2,9.7
16 | 15,204.1,32.9,46,19
17 | 16,195.4,47.7,52.9,22.4
18 | 17,67.8,36.6,114,12.5
19 | 18,281.4,39.6,55.8,24.4
20 | 19,69.2,20.5,18.3,11.3
21 | 20,147.3,23.9,19.1,14.6
22 | 21,218.4,27.7,53.4,18
23 | 22,237.4,5.1,23.5,12.5
24 | 23,13.2,15.9,49.6,5.6
25 | 24,228.3,16.9,26.2,15.5
26 | 25,62.3,12.6,18.3,9.7
27 | 26,262.9,3.5,19.5,12
28 | 27,142.9,29.3,12.6,15
29 | 28,240.1,16.7,22.9,15.9
30 | 29,248.8,27.1,22.9,18.9
31 | 30,70.6,16,40.8,10.5
32 | 31,292.9,28.3,43.2,21.4
33 | 32,112.9,17.4,38.6,11.9
34 | 33,97.2,1.5,30,9.6
35 | 34,265.6,20,0.3,17.4
36 | 35,95.7,1.4,7.4,9.5
37 | 36,290.7,4.1,8.5,12.8
38 | 37,266.9,43.8,5,25.4
39 | 38,74.7,49.4,45.7,14.7
40 | 39,43.1,26.7,35.1,10.1
41 | 40,228,37.7,32,21.5
42 | 41,202.5,22.3,31.6,16.6
43 | 42,177,33.4,38.7,17.1
44 | 43,293.6,27.7,1.8,20.7
45 | 44,206.9,8.4,26.4,12.9
46 | 45,25.1,25.7,43.3,8.5
47 | 46,175.1,22.5,31.5,14.9
48 | 47,89.7,9.9,35.7,10.6
49 | 48,239.9,41.5,18.5,23.2
50 | 49,227.2,15.8,49.9,14.8
51 | 50,66.9,11.7,36.8,9.7
52 | 51,199.8,3.1,34.6,11.4
53 | 52,100.4,9.6,3.6,10.7
54 | 53,216.4,41.7,39.6,22.6
55 | 54,182.6,46.2,58.7,21.2
56 | 55,262.7,28.8,15.9,20.2
57 | 56,198.9,49.4,60,23.7
58 | 57,7.3,28.1,41.4,5.5
59 | 58,136.2,19.2,16.6,13.2
60 | 59,210.8,49.6,37.7,23.8
61 | 60,210.7,29.5,9.3,18.4
62 | 61,53.5,2,21.4,8.1
63 | 62,261.3,42.7,54.7,24.2
64 | 63,239.3,15.5,27.3,15.7
65 | 64,102.7,29.6,8.4,14
66 | 65,131.1,42.8,28.9,18
67 | 66,69,9.3,0.9,9.3
68 | 67,31.5,24.6,2.2,9.5
69 | 68,139.3,14.5,10.2,13.4
70 | 69,237.4,27.5,11,18.9
71 | 70,216.8,43.9,27.2,22.3
72 | 71,199.1,30.6,38.7,18.3
73 | 72,109.8,14.3,31.7,12.4
74 | 73,26.8,33,19.3,8.8
75 | 74,129.4,5.7,31.3,11
76 | 75,213.4,24.6,13.1,17
77 | 76,16.9,43.7,89.4,8.7
78 | 77,27.5,1.6,20.7,6.9
79 | 78,120.5,28.5,14.2,14.2
80 | 79,5.4,29.9,9.4,5.3
81 | 80,116,7.7,23.1,11
82 | 81,76.4,26.7,22.3,11.8
83 | 82,239.8,4.1,36.9,12.3
84 | 83,75.3,20.3,32.5,11.3
85 | 84,68.4,44.5,35.6,13.6
86 | 85,213.5,43,33.8,21.7
87 | 86,193.2,18.4,65.7,15.2
88 | 87,76.3,27.5,16,12
89 | 88,110.7,40.6,63.2,16
90 | 89,88.3,25.5,73.4,12.9
91 | 90,109.8,47.8,51.4,16.7
92 | 91,134.3,4.9,9.3,11.2
93 | 92,28.6,1.5,33,7.3
94 | 93,217.7,33.5,59,19.4
95 | 94,250.9,36.5,72.3,22.2
96 | 95,107.4,14,10.9,11.5
97 | 96,163.3,31.6,52.9,16.9
98 | 97,197.6,3.5,5.9,11.7
99 | 98,184.9,21,22,15.5
100 | 99,289.7,42.3,51.2,25.4
101 | 100,135.2,41.7,45.9,17.2
102 | 101,222.4,4.3,49.8,11.7
103 | 102,296.4,36.3,100.9,23.8
104 | 103,280.2,10.1,21.4,14.8
105 | 104,187.9,17.2,17.9,14.7
106 | 105,238.2,34.3,5.3,20.7
107 | 106,137.9,46.4,59,19.2
108 | 107,25,11,29.7,7.2
109 | 108,90.4,0.3,23.2,8.7
110 | 109,13.1,0.4,25.6,5.3
111 | 110,255.4,26.9,5.5,19.8
112 | 111,225.8,8.2,56.5,13.4
113 | 112,241.7,38,23.2,21.8
114 | 113,175.7,15.4,2.4,14.1
115 | 114,209.6,20.6,10.7,15.9
116 | 115,78.2,46.8,34.5,14.6
117 | 116,75.1,35,52.7,12.6
118 | 117,139.2,14.3,25.6,12.2
119 | 118,76.4,0.8,14.8,9.4
120 | 119,125.7,36.9,79.2,15.9
121 | 120,19.4,16,22.3,6.6
122 | 121,141.3,26.8,46.2,15.5
123 | 122,18.8,21.7,50.4,7
124 | 123,224,2.4,15.6,11.6
125 | 124,123.1,34.6,12.4,15.2
126 | 125,229.5,32.3,74.2,19.7
127 | 126,87.2,11.8,25.9,10.6
128 | 127,7.8,38.9,50.6,6.6
129 | 128,80.2,0,9.2,8.8
130 | 129,220.3,49,3.2,24.7
131 | 130,59.6,12,43.1,9.7
132 | 131,0.7,39.6,8.7,1.6
133 | 132,265.2,2.9,43,12.7
134 | 133,8.4,27.2,2.1,5.7
135 | 134,219.8,33.5,45.1,19.6
136 | 135,36.9,38.6,65.6,10.8
137 | 136,48.3,47,8.5,11.6
138 | 137,25.6,39,9.3,9.5
139 | 138,273.7,28.9,59.7,20.8
140 | 139,43,25.9,20.5,9.6
141 | 140,184.9,43.9,1.7,20.7
142 | 141,73.4,17,12.9,10.9
143 | 142,193.7,35.4,75.6,19.2
144 | 143,220.5,33.2,37.9,20.1
145 | 144,104.6,5.7,34.4,10.4
146 | 145,96.2,14.8,38.9,11.4
147 | 146,140.3,1.9,9,10.3
148 | 147,240.1,7.3,8.7,13.2
149 | 148,243.2,49,44.3,25.4
150 | 149,38,40.3,11.9,10.9
151 | 150,44.7,25.8,20.6,10.1
152 | 151,280.7,13.9,37,16.1
153 | 152,121,8.4,48.7,11.6
154 | 153,197.6,23.3,14.2,16.6
155 | 154,171.3,39.7,37.7,19
156 | 155,187.8,21.1,9.5,15.6
157 | 156,4.1,11.6,5.7,3.2
158 | 157,93.9,43.5,50.5,15.3
159 | 158,149.8,1.3,24.3,10.1
160 | 159,11.7,36.9,45.2,7.3
161 | 160,131.7,18.4,34.6,12.9
162 | 161,172.5,18.1,30.7,14.4
163 | 162,85.7,35.8,49.3,13.3
164 | 163,188.4,18.1,25.6,14.9
165 | 164,163.5,36.8,7.4,18
166 | 165,117.2,14.7,5.4,11.9
167 | 166,234.5,3.4,84.8,11.9
168 | 167,17.9,37.6,21.6,8
169 | 168,206.8,5.2,19.4,12.2
170 | 169,215.4,23.6,57.6,17.1
171 | 170,284.3,10.6,6.4,15
172 | 171,50,11.6,18.4,8.4
173 | 172,164.5,20.9,47.4,14.5
174 | 173,19.6,20.1,17,7.6
175 | 174,168.4,7.1,12.8,11.7
176 | 175,222.4,3.4,13.1,11.5
177 | 176,276.9,48.9,41.8,27
178 | 177,248.4,30.2,20.3,20.2
179 | 178,170.2,7.8,35.2,11.7
180 | 179,276.7,2.3,23.7,11.8
181 | 180,165.6,10,17.6,12.6
182 | 181,156.6,2.6,8.3,10.5
183 | 182,218.5,5.4,27.4,12.2
184 | 183,56.2,5.7,29.7,8.7
185 | 184,287.6,43,71.8,26.2
186 | 185,253.8,21.3,30,17.6
187 | 186,205,45.1,19.6,22.6
188 | 187,139.5,2.1,26.6,10.3
189 | 188,191.1,28.7,18.2,17.3
190 | 189,286,13.9,3.7,15.9
191 | 190,18.7,12.1,23.4,6.7
192 | 191,39.5,41.1,5.8,10.8
193 | 192,75.5,10.8,6,9.9
194 | 193,17.2,4.1,31.6,5.9
195 | 194,166.8,42,3.6,19.6
196 | 195,149.7,35.6,6,17.3
197 | 196,38.2,3.7,13.8,7.6
198 | 197,94.2,4.9,8.1,9.7
199 | 198,177,9.3,6.4,12.8
200 | 199,283.6,42,66.2,25.5
201 | 200,232.1,8.6,8.7,13.4
202 |
--------------------------------------------------------------------------------
/1.machine-learning-fundamentals/Social_Network_Ads.csv:
--------------------------------------------------------------------------------
1 | Age,EstimatedSalary,Purchased
2 | 19,19000,0
3 | 35,20000,0
4 | 26,43000,0
5 | 27,57000,0
6 | 19,76000,0
7 | 27,58000,0
8 | 27,84000,0
9 | 32,150000,1
10 | 25,33000,0
11 | 35,65000,0
12 | 26,80000,0
13 | 26,52000,0
14 | 20,86000,0
15 | 32,18000,0
16 | 18,82000,0
17 | 29,80000,0
18 | 47,25000,1
19 | 45,26000,1
20 | 46,28000,1
21 | 48,29000,1
22 | 45,22000,1
23 | 47,49000,1
24 | 48,41000,1
25 | 45,22000,1
26 | 46,23000,1
27 | 47,20000,1
28 | 49,28000,1
29 | 47,30000,1
30 | 29,43000,0
31 | 31,18000,0
32 | 31,74000,0
33 | 27,137000,1
34 | 21,16000,0
35 | 28,44000,0
36 | 27,90000,0
37 | 35,27000,0
38 | 33,28000,0
39 | 30,49000,0
40 | 26,72000,0
41 | 27,31000,0
42 | 27,17000,0
43 | 33,51000,0
44 | 35,108000,0
45 | 30,15000,0
46 | 28,84000,0
47 | 23,20000,0
48 | 25,79000,0
49 | 27,54000,0
50 | 30,135000,1
51 | 31,89000,0
52 | 24,32000,0
53 | 18,44000,0
54 | 29,83000,0
55 | 35,23000,0
56 | 27,58000,0
57 | 24,55000,0
58 | 23,48000,0
59 | 28,79000,0
60 | 22,18000,0
61 | 32,117000,0
62 | 27,20000,0
63 | 25,87000,0
64 | 23,66000,0
65 | 32,120000,1
66 | 59,83000,0
67 | 24,58000,0
68 | 24,19000,0
69 | 23,82000,0
70 | 22,63000,0
71 | 31,68000,0
72 | 25,80000,0
73 | 24,27000,0
74 | 20,23000,0
75 | 33,113000,0
76 | 32,18000,0
77 | 34,112000,1
78 | 18,52000,0
79 | 22,27000,0
80 | 28,87000,0
81 | 26,17000,0
82 | 30,80000,0
83 | 39,42000,0
84 | 20,49000,0
85 | 35,88000,0
86 | 30,62000,0
87 | 31,118000,1
88 | 24,55000,0
89 | 28,85000,0
90 | 26,81000,0
91 | 35,50000,0
92 | 22,81000,0
93 | 30,116000,0
94 | 26,15000,0
95 | 29,28000,0
96 | 29,83000,0
97 | 35,44000,0
98 | 35,25000,0
99 | 28,123000,1
100 | 35,73000,0
101 | 28,37000,0
102 | 27,88000,0
103 | 28,59000,0
104 | 32,86000,0
105 | 33,149000,1
106 | 19,21000,0
107 | 21,72000,0
108 | 26,35000,0
109 | 27,89000,0
110 | 26,86000,0
111 | 38,80000,0
112 | 39,71000,0
113 | 37,71000,0
114 | 38,61000,0
115 | 37,55000,0
116 | 42,80000,0
117 | 40,57000,0
118 | 35,75000,0
119 | 36,52000,0
120 | 40,59000,0
121 | 41,59000,0
122 | 36,75000,0
123 | 37,72000,0
124 | 40,75000,0
125 | 35,53000,0
126 | 41,51000,0
127 | 39,61000,0
128 | 42,65000,0
129 | 26,32000,0
130 | 30,17000,0
131 | 26,84000,0
132 | 31,58000,0
133 | 33,31000,0
134 | 30,87000,0
135 | 21,68000,0
136 | 28,55000,0
137 | 23,63000,0
138 | 20,82000,0
139 | 30,107000,1
140 | 28,59000,0
141 | 19,25000,0
142 | 19,85000,0
143 | 18,68000,0
144 | 35,59000,0
145 | 30,89000,0
146 | 34,25000,0
147 | 24,89000,0
148 | 27,96000,1
149 | 41,30000,0
150 | 29,61000,0
151 | 20,74000,0
152 | 26,15000,0
153 | 41,45000,0
154 | 31,76000,0
155 | 36,50000,0
156 | 40,47000,0
157 | 31,15000,0
158 | 46,59000,0
159 | 29,75000,0
160 | 26,30000,0
161 | 32,135000,1
162 | 32,100000,1
163 | 25,90000,0
164 | 37,33000,0
165 | 35,38000,0
166 | 33,69000,0
167 | 18,86000,0
168 | 22,55000,0
169 | 35,71000,0
170 | 29,148000,1
171 | 29,47000,0
172 | 21,88000,0
173 | 34,115000,0
174 | 26,118000,0
175 | 34,43000,0
176 | 34,72000,0
177 | 23,28000,0
178 | 35,47000,0
179 | 25,22000,0
180 | 24,23000,0
181 | 31,34000,0
182 | 26,16000,0
183 | 31,71000,0
184 | 32,117000,1
185 | 33,43000,0
186 | 33,60000,0
187 | 31,66000,0
188 | 20,82000,0
189 | 33,41000,0
190 | 35,72000,0
191 | 28,32000,0
192 | 24,84000,0
193 | 19,26000,0
194 | 29,43000,0
195 | 19,70000,0
196 | 28,89000,0
197 | 34,43000,0
198 | 30,79000,0
199 | 20,36000,0
200 | 26,80000,0
201 | 35,22000,0
202 | 35,39000,0
203 | 49,74000,0
204 | 39,134000,1
205 | 41,71000,0
206 | 58,101000,1
207 | 47,47000,0
208 | 55,130000,1
209 | 52,114000,0
210 | 40,142000,1
211 | 46,22000,0
212 | 48,96000,1
213 | 52,150000,1
214 | 59,42000,0
215 | 35,58000,0
216 | 47,43000,0
217 | 60,108000,1
218 | 49,65000,0
219 | 40,78000,0
220 | 46,96000,0
221 | 59,143000,1
222 | 41,80000,0
223 | 35,91000,1
224 | 37,144000,1
225 | 60,102000,1
226 | 35,60000,0
227 | 37,53000,0
228 | 36,126000,1
229 | 56,133000,1
230 | 40,72000,0
231 | 42,80000,1
232 | 35,147000,1
233 | 39,42000,0
234 | 40,107000,1
235 | 49,86000,1
236 | 38,112000,0
237 | 46,79000,1
238 | 40,57000,0
239 | 37,80000,0
240 | 46,82000,0
241 | 53,143000,1
242 | 42,149000,1
243 | 38,59000,0
244 | 50,88000,1
245 | 56,104000,1
246 | 41,72000,0
247 | 51,146000,1
248 | 35,50000,0
249 | 57,122000,1
250 | 41,52000,0
251 | 35,97000,1
252 | 44,39000,0
253 | 37,52000,0
254 | 48,134000,1
255 | 37,146000,1
256 | 50,44000,0
257 | 52,90000,1
258 | 41,72000,0
259 | 40,57000,0
260 | 58,95000,1
261 | 45,131000,1
262 | 35,77000,0
263 | 36,144000,1
264 | 55,125000,1
265 | 35,72000,0
266 | 48,90000,1
267 | 42,108000,1
268 | 40,75000,0
269 | 37,74000,0
270 | 47,144000,1
271 | 40,61000,0
272 | 43,133000,0
273 | 59,76000,1
274 | 60,42000,1
275 | 39,106000,1
276 | 57,26000,1
277 | 57,74000,1
278 | 38,71000,0
279 | 49,88000,1
280 | 52,38000,1
281 | 50,36000,1
282 | 59,88000,1
283 | 35,61000,0
284 | 37,70000,1
285 | 52,21000,1
286 | 48,141000,0
287 | 37,93000,1
288 | 37,62000,0
289 | 48,138000,1
290 | 41,79000,0
291 | 37,78000,1
292 | 39,134000,1
293 | 49,89000,1
294 | 55,39000,1
295 | 37,77000,0
296 | 35,57000,0
297 | 36,63000,0
298 | 42,73000,1
299 | 43,112000,1
300 | 45,79000,0
301 | 46,117000,1
302 | 58,38000,1
303 | 48,74000,1
304 | 37,137000,1
305 | 37,79000,1
306 | 40,60000,0
307 | 42,54000,0
308 | 51,134000,0
309 | 47,113000,1
310 | 36,125000,1
311 | 38,50000,0
312 | 42,70000,0
313 | 39,96000,1
314 | 38,50000,0
315 | 49,141000,1
316 | 39,79000,0
317 | 39,75000,1
318 | 54,104000,1
319 | 35,55000,0
320 | 45,32000,1
321 | 36,60000,0
322 | 52,138000,1
323 | 53,82000,1
324 | 41,52000,0
325 | 48,30000,1
326 | 48,131000,1
327 | 41,60000,0
328 | 41,72000,0
329 | 42,75000,0
330 | 36,118000,1
331 | 47,107000,1
332 | 38,51000,0
333 | 48,119000,1
334 | 42,65000,0
335 | 40,65000,0
336 | 57,60000,1
337 | 36,54000,0
338 | 58,144000,1
339 | 35,79000,0
340 | 38,55000,0
341 | 39,122000,1
342 | 53,104000,1
343 | 35,75000,0
344 | 38,65000,0
345 | 47,51000,1
346 | 47,105000,1
347 | 41,63000,0
348 | 53,72000,1
349 | 54,108000,1
350 | 39,77000,0
351 | 38,61000,0
352 | 38,113000,1
353 | 37,75000,0
354 | 42,90000,1
355 | 37,57000,0
356 | 36,99000,1
357 | 60,34000,1
358 | 54,70000,1
359 | 41,72000,0
360 | 40,71000,1
361 | 42,54000,0
362 | 43,129000,1
363 | 53,34000,1
364 | 47,50000,1
365 | 42,79000,0
366 | 42,104000,1
367 | 59,29000,1
368 | 58,47000,1
369 | 46,88000,1
370 | 38,71000,0
371 | 54,26000,1
372 | 60,46000,1
373 | 60,83000,1
374 | 39,73000,0
375 | 59,130000,1
376 | 37,80000,0
377 | 46,32000,1
378 | 46,74000,0
379 | 42,53000,0
380 | 41,87000,1
381 | 58,23000,1
382 | 42,64000,0
383 | 48,33000,1
384 | 44,139000,1
385 | 49,28000,1
386 | 57,33000,1
387 | 56,60000,1
388 | 49,39000,1
389 | 39,71000,0
390 | 47,34000,1
391 | 48,35000,1
392 | 48,33000,1
393 | 47,23000,1
394 | 45,45000,1
395 | 60,42000,1
396 | 39,59000,0
397 | 46,41000,1
398 | 51,23000,1
399 | 50,20000,1
400 | 36,33000,0
401 | 49,36000,1
--------------------------------------------------------------------------------
/1.machine-learning-fundamentals/Wholesale customers data.csv:
--------------------------------------------------------------------------------
1 | Channel,Region,Fresh,Milk,Grocery,Frozen,Detergents_Paper,Delicassen
2 | 2,3,12669,9656,7561,214,2674,1338
3 | 2,3,7057,9810,9568,1762,3293,1776
4 | 2,3,6353,8808,7684,2405,3516,7844
5 | 1,3,13265,1196,4221,6404,507,1788
6 | 2,3,22615,5410,7198,3915,1777,5185
7 | 2,3,9413,8259,5126,666,1795,1451
8 | 2,3,12126,3199,6975,480,3140,545
9 | 2,3,7579,4956,9426,1669,3321,2566
10 | 1,3,5963,3648,6192,425,1716,750
11 | 2,3,6006,11093,18881,1159,7425,2098
12 | 2,3,3366,5403,12974,4400,5977,1744
13 | 2,3,13146,1124,4523,1420,549,497
14 | 2,3,31714,12319,11757,287,3881,2931
15 | 2,3,21217,6208,14982,3095,6707,602
16 | 2,3,24653,9465,12091,294,5058,2168
17 | 1,3,10253,1114,3821,397,964,412
18 | 2,3,1020,8816,12121,134,4508,1080
19 | 1,3,5876,6157,2933,839,370,4478
20 | 2,3,18601,6327,10099,2205,2767,3181
21 | 1,3,7780,2495,9464,669,2518,501
22 | 2,3,17546,4519,4602,1066,2259,2124
23 | 1,3,5567,871,2010,3383,375,569
24 | 1,3,31276,1917,4469,9408,2381,4334
25 | 2,3,26373,36423,22019,5154,4337,16523
26 | 2,3,22647,9776,13792,2915,4482,5778
27 | 2,3,16165,4230,7595,201,4003,57
28 | 1,3,9898,961,2861,3151,242,833
29 | 1,3,14276,803,3045,485,100,518
30 | 2,3,4113,20484,25957,1158,8604,5206
31 | 1,3,43088,2100,2609,1200,1107,823
32 | 1,3,18815,3610,11107,1148,2134,2963
33 | 1,3,2612,4339,3133,2088,820,985
34 | 1,3,21632,1318,2886,266,918,405
35 | 1,3,29729,4786,7326,6130,361,1083
36 | 1,3,1502,1979,2262,425,483,395
37 | 2,3,688,5491,11091,833,4239,436
38 | 1,3,29955,4362,5428,1729,862,4626
39 | 2,3,15168,10556,12477,1920,6506,714
40 | 2,3,4591,15729,16709,33,6956,433
41 | 1,3,56159,555,902,10002,212,2916
42 | 1,3,24025,4332,4757,9510,1145,5864
43 | 1,3,19176,3065,5956,2033,2575,2802
44 | 2,3,10850,7555,14961,188,6899,46
45 | 2,3,630,11095,23998,787,9529,72
46 | 2,3,9670,7027,10471,541,4618,65
47 | 2,3,5181,22044,21531,1740,7353,4985
48 | 2,3,3103,14069,21955,1668,6792,1452
49 | 2,3,44466,54259,55571,7782,24171,6465
50 | 2,3,11519,6152,10868,584,5121,1476
51 | 2,3,4967,21412,28921,1798,13583,1163
52 | 1,3,6269,1095,1980,3860,609,2162
53 | 1,3,3347,4051,6996,239,1538,301
54 | 2,3,40721,3916,5876,532,2587,1278
55 | 2,3,491,10473,11532,744,5611,224
56 | 1,3,27329,1449,1947,2436,204,1333
57 | 1,3,5264,3683,5005,1057,2024,1130
58 | 2,3,4098,29892,26866,2616,17740,1340
59 | 2,3,5417,9933,10487,38,7572,1282
60 | 1,3,13779,1970,1648,596,227,436
61 | 1,3,6137,5360,8040,129,3084,1603
62 | 2,3,8590,3045,7854,96,4095,225
63 | 2,3,35942,38369,59598,3254,26701,2017
64 | 2,3,7823,6245,6544,4154,4074,964
65 | 2,3,9396,11601,15775,2896,7677,1295
66 | 1,3,4760,1227,3250,3724,1247,1145
67 | 2,3,85,20959,45828,36,24231,1423
68 | 1,3,9,1534,7417,175,3468,27
69 | 2,3,19913,6759,13462,1256,5141,834
70 | 1,3,2446,7260,3993,5870,788,3095
71 | 1,3,8352,2820,1293,779,656,144
72 | 1,3,16705,2037,3202,10643,116,1365
73 | 1,3,18291,1266,21042,5373,4173,14472
74 | 1,3,4420,5139,2661,8872,1321,181
75 | 2,3,19899,5332,8713,8132,764,648
76 | 2,3,8190,6343,9794,1285,1901,1780
77 | 1,3,20398,1137,3,4407,3,975
78 | 1,3,717,3587,6532,7530,529,894
79 | 2,3,12205,12697,28540,869,12034,1009
80 | 1,3,10766,1175,2067,2096,301,167
81 | 1,3,1640,3259,3655,868,1202,1653
82 | 1,3,7005,829,3009,430,610,529
83 | 2,3,219,9540,14403,283,7818,156
84 | 2,3,10362,9232,11009,737,3537,2342
85 | 1,3,20874,1563,1783,2320,550,772
86 | 2,3,11867,3327,4814,1178,3837,120
87 | 2,3,16117,46197,92780,1026,40827,2944
88 | 2,3,22925,73498,32114,987,20070,903
89 | 1,3,43265,5025,8117,6312,1579,14351
90 | 1,3,7864,542,4042,9735,165,46
91 | 1,3,24904,3836,5330,3443,454,3178
92 | 1,3,11405,596,1638,3347,69,360
93 | 1,3,12754,2762,2530,8693,627,1117
94 | 2,3,9198,27472,32034,3232,18906,5130
95 | 1,3,11314,3090,2062,35009,71,2698
96 | 2,3,5626,12220,11323,206,5038,244
97 | 1,3,3,2920,6252,440,223,709
98 | 2,3,23,2616,8118,145,3874,217
99 | 1,3,403,254,610,774,54,63
100 | 1,3,503,112,778,895,56,132
101 | 1,3,9658,2182,1909,5639,215,323
102 | 2,3,11594,7779,12144,3252,8035,3029
103 | 2,3,1420,10810,16267,1593,6766,1838
104 | 2,3,2932,6459,7677,2561,4573,1386
105 | 1,3,56082,3504,8906,18028,1480,2498
106 | 1,3,14100,2132,3445,1336,1491,548
107 | 1,3,15587,1014,3970,910,139,1378
108 | 2,3,1454,6337,10704,133,6830,1831
109 | 2,3,8797,10646,14886,2471,8969,1438
110 | 2,3,1531,8397,6981,247,2505,1236
111 | 2,3,1406,16729,28986,673,836,3
112 | 1,3,11818,1648,1694,2276,169,1647
113 | 2,3,12579,11114,17569,805,6457,1519
114 | 1,3,19046,2770,2469,8853,483,2708
115 | 1,3,14438,2295,1733,3220,585,1561
116 | 1,3,18044,1080,2000,2555,118,1266
117 | 1,3,11134,793,2988,2715,276,610
118 | 1,3,11173,2521,3355,1517,310,222
119 | 1,3,6990,3880,5380,1647,319,1160
120 | 1,3,20049,1891,2362,5343,411,933
121 | 1,3,8258,2344,2147,3896,266,635
122 | 1,3,17160,1200,3412,2417,174,1136
123 | 1,3,4020,3234,1498,2395,264,255
124 | 1,3,12212,201,245,1991,25,860
125 | 2,3,11170,10769,8814,2194,1976,143
126 | 1,3,36050,1642,2961,4787,500,1621
127 | 1,3,76237,3473,7102,16538,778,918
128 | 1,3,19219,1840,1658,8195,349,483
129 | 2,3,21465,7243,10685,880,2386,2749
130 | 1,3,140,8847,3823,142,1062,3
131 | 1,3,42312,926,1510,1718,410,1819
132 | 1,3,7149,2428,699,6316,395,911
133 | 1,3,2101,589,314,346,70,310
134 | 1,3,14903,2032,2479,576,955,328
135 | 1,3,9434,1042,1235,436,256,396
136 | 1,3,7388,1882,2174,720,47,537
137 | 1,3,6300,1289,2591,1170,199,326
138 | 1,3,4625,8579,7030,4575,2447,1542
139 | 1,3,3087,8080,8282,661,721,36
140 | 1,3,13537,4257,5034,155,249,3271
141 | 1,3,5387,4979,3343,825,637,929
142 | 1,3,17623,4280,7305,2279,960,2616
143 | 1,3,30379,13252,5189,321,51,1450
144 | 1,3,37036,7152,8253,2995,20,3
145 | 1,3,10405,1596,1096,8425,399,318
146 | 1,3,18827,3677,1988,118,516,201
147 | 2,3,22039,8384,34792,42,12591,4430
148 | 1,3,7769,1936,2177,926,73,520
149 | 1,3,9203,3373,2707,1286,1082,526
150 | 1,3,5924,584,542,4052,283,434
151 | 1,3,31812,1433,1651,800,113,1440
152 | 1,3,16225,1825,1765,853,170,1067
153 | 1,3,1289,3328,2022,531,255,1774
154 | 1,3,18840,1371,3135,3001,352,184
155 | 1,3,3463,9250,2368,779,302,1627
156 | 1,3,622,55,137,75,7,8
157 | 2,3,1989,10690,19460,233,11577,2153
158 | 2,3,3830,5291,14855,317,6694,3182
159 | 1,3,17773,1366,2474,3378,811,418
160 | 2,3,2861,6570,9618,930,4004,1682
161 | 2,3,355,7704,14682,398,8077,303
162 | 2,3,1725,3651,12822,824,4424,2157
163 | 1,3,12434,540,283,1092,3,2233
164 | 1,3,15177,2024,3810,2665,232,610
165 | 2,3,5531,15726,26870,2367,13726,446
166 | 2,3,5224,7603,8584,2540,3674,238
167 | 2,3,15615,12653,19858,4425,7108,2379
168 | 2,3,4822,6721,9170,993,4973,3637
169 | 1,3,2926,3195,3268,405,1680,693
170 | 1,3,5809,735,803,1393,79,429
171 | 1,3,5414,717,2155,2399,69,750
172 | 2,3,260,8675,13430,1116,7015,323
173 | 2,3,200,25862,19816,651,8773,6250
174 | 1,3,955,5479,6536,333,2840,707
175 | 2,3,514,7677,19805,937,9836,716
176 | 1,3,286,1208,5241,2515,153,1442
177 | 2,3,2343,7845,11874,52,4196,1697
178 | 1,3,45640,6958,6536,7368,1532,230
179 | 1,3,12759,7330,4533,1752,20,2631
180 | 1,3,11002,7075,4945,1152,120,395
181 | 1,3,3157,4888,2500,4477,273,2165
182 | 1,3,12356,6036,8887,402,1382,2794
183 | 1,3,112151,29627,18148,16745,4948,8550
184 | 1,3,694,8533,10518,443,6907,156
185 | 1,3,36847,43950,20170,36534,239,47943
186 | 1,3,327,918,4710,74,334,11
187 | 1,3,8170,6448,1139,2181,58,247
188 | 1,3,3009,521,854,3470,949,727
189 | 1,3,2438,8002,9819,6269,3459,3
190 | 2,3,8040,7639,11687,2758,6839,404
191 | 2,3,834,11577,11522,275,4027,1856
192 | 1,3,16936,6250,1981,7332,118,64
193 | 1,3,13624,295,1381,890,43,84
194 | 1,3,5509,1461,2251,547,187,409
195 | 2,3,180,3485,20292,959,5618,666
196 | 1,3,7107,1012,2974,806,355,1142
197 | 1,3,17023,5139,5230,7888,330,1755
198 | 1,1,30624,7209,4897,18711,763,2876
199 | 2,1,2427,7097,10391,1127,4314,1468
200 | 1,1,11686,2154,6824,3527,592,697
201 | 1,1,9670,2280,2112,520,402,347
202 | 2,1,3067,13240,23127,3941,9959,731
203 | 2,1,4484,14399,24708,3549,14235,1681
204 | 1,1,25203,11487,9490,5065,284,6854
205 | 1,1,583,685,2216,469,954,18
206 | 1,1,1956,891,5226,1383,5,1328
207 | 2,1,1107,11711,23596,955,9265,710
208 | 1,1,6373,780,950,878,288,285
209 | 2,1,2541,4737,6089,2946,5316,120
210 | 1,1,1537,3748,5838,1859,3381,806
211 | 2,1,5550,12729,16767,864,12420,797
212 | 1,1,18567,1895,1393,1801,244,2100
213 | 2,1,12119,28326,39694,4736,19410,2870
214 | 1,1,7291,1012,2062,1291,240,1775
215 | 1,1,3317,6602,6861,1329,3961,1215
216 | 2,1,2362,6551,11364,913,5957,791
217 | 1,1,2806,10765,15538,1374,5828,2388
218 | 2,1,2532,16599,36486,179,13308,674
219 | 1,1,18044,1475,2046,2532,130,1158
220 | 2,1,18,7504,15205,1285,4797,6372
221 | 1,1,4155,367,1390,2306,86,130
222 | 1,1,14755,899,1382,1765,56,749
223 | 1,1,5396,7503,10646,91,4167,239
224 | 1,1,5041,1115,2856,7496,256,375
225 | 2,1,2790,2527,5265,5612,788,1360
226 | 1,1,7274,659,1499,784,70,659
227 | 1,1,12680,3243,4157,660,761,786
228 | 2,1,20782,5921,9212,1759,2568,1553
229 | 1,1,4042,2204,1563,2286,263,689
230 | 1,1,1869,577,572,950,4762,203
231 | 1,1,8656,2746,2501,6845,694,980
232 | 2,1,11072,5989,5615,8321,955,2137
233 | 1,1,2344,10678,3828,1439,1566,490
234 | 1,1,25962,1780,3838,638,284,834
235 | 1,1,964,4984,3316,937,409,7
236 | 1,1,15603,2703,3833,4260,325,2563
237 | 1,1,1838,6380,2824,1218,1216,295
238 | 1,1,8635,820,3047,2312,415,225
239 | 1,1,18692,3838,593,4634,28,1215
240 | 1,1,7363,475,585,1112,72,216
241 | 1,1,47493,2567,3779,5243,828,2253
242 | 1,1,22096,3575,7041,11422,343,2564
243 | 1,1,24929,1801,2475,2216,412,1047
244 | 1,1,18226,659,2914,3752,586,578
245 | 1,1,11210,3576,5119,561,1682,2398
246 | 1,1,6202,7775,10817,1183,3143,1970
247 | 2,1,3062,6154,13916,230,8933,2784
248 | 1,1,8885,2428,1777,1777,430,610
249 | 1,1,13569,346,489,2077,44,659
250 | 1,1,15671,5279,2406,559,562,572
251 | 1,1,8040,3795,2070,6340,918,291
252 | 1,1,3191,1993,1799,1730,234,710
253 | 2,1,6134,23133,33586,6746,18594,5121
254 | 1,1,6623,1860,4740,7683,205,1693
255 | 1,1,29526,7961,16966,432,363,1391
256 | 1,1,10379,17972,4748,4686,1547,3265
257 | 1,1,31614,489,1495,3242,111,615
258 | 1,1,11092,5008,5249,453,392,373
259 | 1,1,8475,1931,1883,5004,3593,987
260 | 1,1,56083,4563,2124,6422,730,3321
261 | 1,1,53205,4959,7336,3012,967,818
262 | 1,1,9193,4885,2157,327,780,548
263 | 1,1,7858,1110,1094,6818,49,287
264 | 1,1,23257,1372,1677,982,429,655
265 | 1,1,2153,1115,6684,4324,2894,411
266 | 2,1,1073,9679,15445,61,5980,1265
267 | 1,1,5909,23527,13699,10155,830,3636
268 | 2,1,572,9763,22182,2221,4882,2563
269 | 1,1,20893,1222,2576,3975,737,3628
270 | 2,1,11908,8053,19847,1069,6374,698
271 | 1,1,15218,258,1138,2516,333,204
272 | 1,1,4720,1032,975,5500,197,56
273 | 1,1,2083,5007,1563,1120,147,1550
274 | 1,1,514,8323,6869,529,93,1040
275 | 1,3,36817,3045,1493,4802,210,1824
276 | 1,3,894,1703,1841,744,759,1153
277 | 1,3,680,1610,223,862,96,379
278 | 1,3,27901,3749,6964,4479,603,2503
279 | 1,3,9061,829,683,16919,621,139
280 | 1,3,11693,2317,2543,5845,274,1409
281 | 2,3,17360,6200,9694,1293,3620,1721
282 | 1,3,3366,2884,2431,977,167,1104
283 | 2,3,12238,7108,6235,1093,2328,2079
284 | 1,3,49063,3965,4252,5970,1041,1404
285 | 1,3,25767,3613,2013,10303,314,1384
286 | 1,3,68951,4411,12609,8692,751,2406
287 | 1,3,40254,640,3600,1042,436,18
288 | 1,3,7149,2247,1242,1619,1226,128
289 | 1,3,15354,2102,2828,8366,386,1027
290 | 1,3,16260,594,1296,848,445,258
291 | 1,3,42786,286,471,1388,32,22
292 | 1,3,2708,2160,2642,502,965,1522
293 | 1,3,6022,3354,3261,2507,212,686
294 | 1,3,2838,3086,4329,3838,825,1060
295 | 2,2,3996,11103,12469,902,5952,741
296 | 1,2,21273,2013,6550,909,811,1854
297 | 2,2,7588,1897,5234,417,2208,254
298 | 1,2,19087,1304,3643,3045,710,898
299 | 2,2,8090,3199,6986,1455,3712,531
300 | 2,2,6758,4560,9965,934,4538,1037
301 | 1,2,444,879,2060,264,290,259
302 | 2,2,16448,6243,6360,824,2662,2005
303 | 2,2,5283,13316,20399,1809,8752,172
304 | 2,2,2886,5302,9785,364,6236,555
305 | 2,2,2599,3688,13829,492,10069,59
306 | 2,2,161,7460,24773,617,11783,2410
307 | 2,2,243,12939,8852,799,3909,211
308 | 2,2,6468,12867,21570,1840,7558,1543
309 | 1,2,17327,2374,2842,1149,351,925
310 | 1,2,6987,1020,3007,416,257,656
311 | 2,2,918,20655,13567,1465,6846,806
312 | 1,2,7034,1492,2405,12569,299,1117
313 | 1,2,29635,2335,8280,3046,371,117
314 | 2,2,2137,3737,19172,1274,17120,142
315 | 1,2,9784,925,2405,4447,183,297
316 | 1,2,10617,1795,7647,1483,857,1233
317 | 2,2,1479,14982,11924,662,3891,3508
318 | 1,2,7127,1375,2201,2679,83,1059
319 | 1,2,1182,3088,6114,978,821,1637
320 | 1,2,11800,2713,3558,2121,706,51
321 | 2,2,9759,25071,17645,1128,12408,1625
322 | 1,2,1774,3696,2280,514,275,834
323 | 1,2,9155,1897,5167,2714,228,1113
324 | 1,2,15881,713,3315,3703,1470,229
325 | 1,2,13360,944,11593,915,1679,573
326 | 1,2,25977,3587,2464,2369,140,1092
327 | 1,2,32717,16784,13626,60869,1272,5609
328 | 1,2,4414,1610,1431,3498,387,834
329 | 1,2,542,899,1664,414,88,522
330 | 1,2,16933,2209,3389,7849,210,1534
331 | 1,2,5113,1486,4583,5127,492,739
332 | 1,2,9790,1786,5109,3570,182,1043
333 | 2,2,11223,14881,26839,1234,9606,1102
334 | 1,2,22321,3216,1447,2208,178,2602
335 | 2,2,8565,4980,67298,131,38102,1215
336 | 2,2,16823,928,2743,11559,332,3486
337 | 2,2,27082,6817,10790,1365,4111,2139
338 | 1,2,13970,1511,1330,650,146,778
339 | 1,2,9351,1347,2611,8170,442,868
340 | 1,2,3,333,7021,15601,15,550
341 | 1,2,2617,1188,5332,9584,573,1942
342 | 2,3,381,4025,9670,388,7271,1371
343 | 2,3,2320,5763,11238,767,5162,2158
344 | 1,3,255,5758,5923,349,4595,1328
345 | 2,3,1689,6964,26316,1456,15469,37
346 | 1,3,3043,1172,1763,2234,217,379
347 | 1,3,1198,2602,8335,402,3843,303
348 | 2,3,2771,6939,15541,2693,6600,1115
349 | 2,3,27380,7184,12311,2809,4621,1022
350 | 1,3,3428,2380,2028,1341,1184,665
351 | 2,3,5981,14641,20521,2005,12218,445
352 | 1,3,3521,1099,1997,1796,173,995
353 | 2,3,1210,10044,22294,1741,12638,3137
354 | 1,3,608,1106,1533,830,90,195
355 | 2,3,117,6264,21203,228,8682,1111
356 | 1,3,14039,7393,2548,6386,1333,2341
357 | 1,3,190,727,2012,245,184,127
358 | 1,3,22686,134,218,3157,9,548
359 | 2,3,37,1275,22272,137,6747,110
360 | 1,3,759,18664,1660,6114,536,4100
361 | 1,3,796,5878,2109,340,232,776
362 | 1,3,19746,2872,2006,2601,468,503
363 | 1,3,4734,607,864,1206,159,405
364 | 1,3,2121,1601,2453,560,179,712
365 | 1,3,4627,997,4438,191,1335,314
366 | 1,3,2615,873,1524,1103,514,468
367 | 2,3,4692,6128,8025,1619,4515,3105
368 | 1,3,9561,2217,1664,1173,222,447
369 | 1,3,3477,894,534,1457,252,342
370 | 1,3,22335,1196,2406,2046,101,558
371 | 1,3,6211,337,683,1089,41,296
372 | 2,3,39679,3944,4955,1364,523,2235
373 | 1,3,20105,1887,1939,8164,716,790
374 | 1,3,3884,3801,1641,876,397,4829
375 | 2,3,15076,6257,7398,1504,1916,3113
376 | 1,3,6338,2256,1668,1492,311,686
377 | 1,3,5841,1450,1162,597,476,70
378 | 2,3,3136,8630,13586,5641,4666,1426
379 | 1,3,38793,3154,2648,1034,96,1242
380 | 1,3,3225,3294,1902,282,68,1114
381 | 2,3,4048,5164,10391,130,813,179
382 | 1,3,28257,944,2146,3881,600,270
383 | 1,3,17770,4591,1617,9927,246,532
384 | 1,3,34454,7435,8469,2540,1711,2893
385 | 1,3,1821,1364,3450,4006,397,361
386 | 1,3,10683,21858,15400,3635,282,5120
387 | 1,3,11635,922,1614,2583,192,1068
388 | 1,3,1206,3620,2857,1945,353,967
389 | 1,3,20918,1916,1573,1960,231,961
390 | 1,3,9785,848,1172,1677,200,406
391 | 1,3,9385,1530,1422,3019,227,684
392 | 1,3,3352,1181,1328,5502,311,1000
393 | 1,3,2647,2761,2313,907,95,1827
394 | 1,3,518,4180,3600,659,122,654
395 | 1,3,23632,6730,3842,8620,385,819
396 | 1,3,12377,865,3204,1398,149,452
397 | 1,3,9602,1316,1263,2921,841,290
398 | 2,3,4515,11991,9345,2644,3378,2213
399 | 1,3,11535,1666,1428,6838,64,743
400 | 1,3,11442,1032,582,5390,74,247
401 | 1,3,9612,577,935,1601,469,375
402 | 1,3,4446,906,1238,3576,153,1014
403 | 1,3,27167,2801,2128,13223,92,1902
404 | 1,3,26539,4753,5091,220,10,340
405 | 1,3,25606,11006,4604,127,632,288
406 | 1,3,18073,4613,3444,4324,914,715
407 | 1,3,6884,1046,1167,2069,593,378
408 | 1,3,25066,5010,5026,9806,1092,960
409 | 2,3,7362,12844,18683,2854,7883,553
410 | 2,3,8257,3880,6407,1646,2730,344
411 | 1,3,8708,3634,6100,2349,2123,5137
412 | 1,3,6633,2096,4563,1389,1860,1892
413 | 1,3,2126,3289,3281,1535,235,4365
414 | 1,3,97,3605,12400,98,2970,62
415 | 1,3,4983,4859,6633,17866,912,2435
416 | 1,3,5969,1990,3417,5679,1135,290
417 | 2,3,7842,6046,8552,1691,3540,1874
418 | 2,3,4389,10940,10908,848,6728,993
419 | 1,3,5065,5499,11055,364,3485,1063
420 | 2,3,660,8494,18622,133,6740,776
421 | 1,3,8861,3783,2223,633,1580,1521
422 | 1,3,4456,5266,13227,25,6818,1393
423 | 2,3,17063,4847,9053,1031,3415,1784
424 | 1,3,26400,1377,4172,830,948,1218
425 | 2,3,17565,3686,4657,1059,1803,668
426 | 2,3,16980,2884,12232,874,3213,249
427 | 1,3,11243,2408,2593,15348,108,1886
428 | 1,3,13134,9347,14316,3141,5079,1894
429 | 1,3,31012,16687,5429,15082,439,1163
430 | 1,3,3047,5970,4910,2198,850,317
431 | 1,3,8607,1750,3580,47,84,2501
432 | 1,3,3097,4230,16483,575,241,2080
433 | 1,3,8533,5506,5160,13486,1377,1498
434 | 1,3,21117,1162,4754,269,1328,395
435 | 1,3,1982,3218,1493,1541,356,1449
436 | 1,3,16731,3922,7994,688,2371,838
437 | 1,3,29703,12051,16027,13135,182,2204
438 | 1,3,39228,1431,764,4510,93,2346
439 | 2,3,14531,15488,30243,437,14841,1867
440 | 1,3,10290,1981,2232,1038,168,2125
441 | 1,3,2787,1698,2510,65,477,52
442 |
--------------------------------------------------------------------------------
/1.machine-learning-fundamentals/4. KNN Classifier.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "cabe5af1",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd\n",
11 | "import numpy as np\n",
12 | "import matplotlib.pyplot as plt\n",
13 | "import seaborn as sns\n",
14 | "%matplotlib inline"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "id": "2ce4e204",
21 | "metadata": {},
22 | "outputs": [
23 | {
24 | "data": {
25 | "text/html": [
26 | "
\n",
27 | "\n",
40 | "
\n",
41 | " \n",
42 | " \n",
43 | " \n",
44 | " Age \n",
45 | " EstimatedSalary \n",
46 | " Purchased \n",
47 | " \n",
48 | " \n",
49 | " \n",
50 | " \n",
51 | " 0 \n",
52 | " 19 \n",
53 | " 19000 \n",
54 | " 0 \n",
55 | " \n",
56 | " \n",
57 | " 1 \n",
58 | " 35 \n",
59 | " 20000 \n",
60 | " 0 \n",
61 | " \n",
62 | " \n",
63 | " 2 \n",
64 | " 26 \n",
65 | " 43000 \n",
66 | " 0 \n",
67 | " \n",
68 | " \n",
69 | " 3 \n",
70 | " 27 \n",
71 | " 57000 \n",
72 | " 0 \n",
73 | " \n",
74 | " \n",
75 | " 4 \n",
76 | " 19 \n",
77 | " 76000 \n",
78 | " 0 \n",
79 | " \n",
80 | " \n",
81 | "
\n",
82 | "
"
83 | ],
84 | "text/plain": [
85 | " Age EstimatedSalary Purchased\n",
86 | "0 19 19000 0\n",
87 | "1 35 20000 0\n",
88 | "2 26 43000 0\n",
89 | "3 27 57000 0\n",
90 | "4 19 76000 0"
91 | ]
92 | },
93 | "execution_count": 2,
94 | "metadata": {},
95 | "output_type": "execute_result"
96 | }
97 | ],
98 | "source": [
99 | "data = pd.read_csv(\"Social_Network_Ads.csv\")\n",
100 | "data.head()"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": 3,
106 | "id": "f6ec42ee",
107 | "metadata": {},
108 | "outputs": [],
109 | "source": [
110 | "X = data.drop(columns='Purchased')\n",
111 | "y = data['Purchased']\n",
112 | "\n",
113 | "from sklearn.model_selection import train_test_split\n",
114 | "X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2, random_state=0)\n",
115 | "\n",
116 | "from sklearn.preprocessing import StandardScaler\n",
117 | "ss = StandardScaler()\n",
118 | "X_train_transform = ss.fit_transform(X_train)\n",
119 | "X_test_transform = ss.transform(X_test)"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 5,
125 | "id": "0d5a3a6c",
126 | "metadata": {},
127 | "outputs": [
128 | {
129 | "data": {
130 | "text/plain": [
131 | ""
132 | ]
133 | },
134 | "execution_count": 5,
135 | "metadata": {},
136 | "output_type": "execute_result"
137 | },
138 | {
139 | "data": {
140 | "image/png": "\n",
141 | "text/plain": [
142 | ""
143 | ]
144 | },
145 | "metadata": {},
146 | "output_type": "display_data"
147 | }
148 | ],
149 | "source": [
150 | "from sklearn.neighbors import KNeighborsClassifier\n",
151 | "\n",
152 | "train_error= []\n",
153 | "test_error= []\n",
154 | "for k in range(1,15):\n",
155 | " knn= KNeighborsClassifier(n_neighbors=k)\n",
156 | " knn.fit(X_train_transform,y_train)\n",
157 | " y_pred_train= knn.predict(X_train_transform)\n",
158 | " train_error.append(np.mean(y_train!= y_pred_train))\n",
159 | " y_pred_test= knn.predict(X_test_transform)\n",
160 | " test_error.append(np.mean(y_test!= y_pred_test))\n",
161 | "# plt.figure(figsize(10,5))\n",
162 | "plt.plot(range(1,15),train_error,label=\"train\")\n",
163 | "plt.plot(range(1,15),test_error,label=\"test\")\n",
164 | "plt.xlabel('k Value')\n",
165 | "plt.ylabel('Error')\n",
166 | "plt.legend()"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": 7,
172 | "id": "e045c364",
173 | "metadata": {},
174 | "outputs": [
175 | {
176 | "data": {
177 | "text/html": [
178 | "KNeighborsClassifier(n_neighbors=3) In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
179 | ],
180 | "text/plain": [
181 | "KNeighborsClassifier(n_neighbors=3)"
182 | ]
183 | },
184 | "execution_count": 7,
185 | "metadata": {},
186 | "output_type": "execute_result"
187 | }
188 | ],
189 | "source": [
190 | "# k = 11\n",
191 | "knn = KNeighborsClassifier(n_neighbors=3)\n",
192 | "knn.fit(X_train_transform,y_train)"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": 8,
198 | "id": "80af73d4",
199 | "metadata": {},
200 | "outputs": [
201 | {
202 | "data": {
203 | "text/plain": [
204 | "0.95"
205 | ]
206 | },
207 | "execution_count": 8,
208 | "metadata": {},
209 | "output_type": "execute_result"
210 | }
211 | ],
212 | "source": [
213 | "from sklearn.metrics import accuracy_score\n",
214 | "y_pred = knn.predict(X_test_transform)\n",
215 | "accuracy_score(y_test,y_pred)"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": null,
221 | "id": "623a74cd",
222 | "metadata": {},
223 | "outputs": [],
224 | "source": []
225 | }
226 | ],
227 | "metadata": {
228 | "kernelspec": {
229 | "display_name": "Python 3 (ipykernel)",
230 | "language": "python",
231 | "name": "python3"
232 | },
233 | "language_info": {
234 | "codemirror_mode": {
235 | "name": "ipython",
236 | "version": 3
237 | },
238 | "file_extension": ".py",
239 | "mimetype": "text/x-python",
240 | "name": "python",
241 | "nbconvert_exporter": "python",
242 | "pygments_lexer": "ipython3",
243 | "version": "3.9.12"
244 | }
245 | },
246 | "nbformat": 4,
247 | "nbformat_minor": 5
248 | }
249 |
--------------------------------------------------------------------------------
/1.machine-learning-fundamentals/5. Support Vector Machines.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 17,
6 | "id": "2de33bc4",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd\n",
11 | "import numpy as np\n",
12 | "import matplotlib.pyplot as plt\n",
13 | "import seaborn as sns\n",
14 | "%matplotlib inline"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 18,
20 | "id": "b7e8433c",
21 | "metadata": {},
22 | "outputs": [
23 | {
24 | "data": {
25 | "text/html": [
26 | "\n",
27 | "\n",
40 | "
\n",
41 | " \n",
42 | " \n",
43 | " \n",
44 | " age \n",
45 | " workclass \n",
46 | " fnlwgt \n",
47 | " education \n",
48 | " education-num \n",
49 | " marital-status \n",
50 | " occupation \n",
51 | " relationship \n",
52 | " race \n",
53 | " sex \n",
54 | " capital-gain \n",
55 | " capital-loss \n",
56 | " hours-per-week \n",
57 | " native-country \n",
58 | " income \n",
59 | " \n",
60 | " \n",
61 | " \n",
62 | " \n",
63 | " 0 \n",
64 | " 39 \n",
65 | " State-gov \n",
66 | " 77516 \n",
67 | " Bachelors \n",
68 | " 13 \n",
69 | " Never-married \n",
70 | " Adm-clerical \n",
71 | " Not-in-family \n",
72 | " White \n",
73 | " Male \n",
74 | " 2174 \n",
75 | " 0 \n",
76 | " 40 \n",
77 | " United-States \n",
78 | " <=50K \n",
79 | " \n",
80 | " \n",
81 | " 1 \n",
82 | " 50 \n",
83 | " Self-emp-not-inc \n",
84 | " 83311 \n",
85 | " Bachelors \n",
86 | " 13 \n",
87 | " Married-civ-spouse \n",
88 | " Exec-managerial \n",
89 | " Husband \n",
90 | " White \n",
91 | " Male \n",
92 | " 0 \n",
93 | " 0 \n",
94 | " 13 \n",
95 | " United-States \n",
96 | " <=50K \n",
97 | " \n",
98 | " \n",
99 | " 2 \n",
100 | " 38 \n",
101 | " Private \n",
102 | " 215646 \n",
103 | " HS-grad \n",
104 | " 9 \n",
105 | " Divorced \n",
106 | " Handlers-cleaners \n",
107 | " Not-in-family \n",
108 | " White \n",
109 | " Male \n",
110 | " 0 \n",
111 | " 0 \n",
112 | " 40 \n",
113 | " United-States \n",
114 | " <=50K \n",
115 | " \n",
116 | " \n",
117 | " 3 \n",
118 | " 53 \n",
119 | " Private \n",
120 | " 234721 \n",
121 | " 11th \n",
122 | " 7 \n",
123 | " Married-civ-spouse \n",
124 | " Handlers-cleaners \n",
125 | " Husband \n",
126 | " Black \n",
127 | " Male \n",
128 | " 0 \n",
129 | " 0 \n",
130 | " 40 \n",
131 | " United-States \n",
132 | " <=50K \n",
133 | " \n",
134 | " \n",
135 | " 4 \n",
136 | " 28 \n",
137 | " Private \n",
138 | " 338409 \n",
139 | " Bachelors \n",
140 | " 13 \n",
141 | " Married-civ-spouse \n",
142 | " Prof-specialty \n",
143 | " Wife \n",
144 | " Black \n",
145 | " Female \n",
146 | " 0 \n",
147 | " 0 \n",
148 | " 40 \n",
149 | " Cuba \n",
150 | " <=50K \n",
151 | " \n",
152 | " \n",
153 | "
\n",
154 | "
"
155 | ],
156 | "text/plain": [
157 | " age workclass fnlwgt education education-num \\\n",
158 | "0 39 State-gov 77516 Bachelors 13 \n",
159 | "1 50 Self-emp-not-inc 83311 Bachelors 13 \n",
160 | "2 38 Private 215646 HS-grad 9 \n",
161 | "3 53 Private 234721 11th 7 \n",
162 | "4 28 Private 338409 Bachelors 13 \n",
163 | "\n",
164 | " marital-status occupation relationship race sex \\\n",
165 | "0 Never-married Adm-clerical Not-in-family White Male \n",
166 | "1 Married-civ-spouse Exec-managerial Husband White Male \n",
167 | "2 Divorced Handlers-cleaners Not-in-family White Male \n",
168 | "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n",
169 | "4 Married-civ-spouse Prof-specialty Wife Black Female \n",
170 | "\n",
171 | " capital-gain capital-loss hours-per-week native-country income \n",
172 | "0 2174 0 40 United-States <=50K \n",
173 | "1 0 0 13 United-States <=50K \n",
174 | "2 0 0 40 United-States <=50K \n",
175 | "3 0 0 40 United-States <=50K \n",
176 | "4 0 0 40 Cuba <=50K "
177 | ]
178 | },
179 | "execution_count": 18,
180 | "metadata": {},
181 | "output_type": "execute_result"
182 | }
183 | ],
184 | "source": [
185 | "df = pd.read_csv(\"income_evaluation.csv\")\n",
186 | "df.head()"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 19,
192 | "id": "78303d81",
193 | "metadata": {},
194 | "outputs": [
195 | {
196 | "data": {
197 | "text/plain": [
198 | "Index(['age', ' workclass', ' fnlwgt', ' education', ' education-num',\n",
199 | " ' marital-status', ' occupation', ' relationship', ' race', ' sex',\n",
200 | " ' capital-gain', ' capital-loss', ' hours-per-week', ' native-country',\n",
201 | " ' income'],\n",
202 | " dtype='object')"
203 | ]
204 | },
205 | "execution_count": 19,
206 | "metadata": {},
207 | "output_type": "execute_result"
208 | }
209 | ],
210 | "source": [
211 | "df.columns"
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 20,
217 | "id": "4d28eef5",
218 | "metadata": {},
219 | "outputs": [
220 | {
221 | "data": {
222 | "text/plain": [
223 | " <=50K 24720\n",
224 | " >50K 7841\n",
225 | "Name: income, dtype: int64"
226 | ]
227 | },
228 | "execution_count": 20,
229 | "metadata": {},
230 | "output_type": "execute_result"
231 | }
232 | ],
233 | "source": [
234 | "df[' income'].value_counts()"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": 21,
240 | "id": "3473c261",
241 | "metadata": {},
242 | "outputs": [
243 | {
244 | "data": {
245 | "text/plain": [
246 | "array([' Never-married', ' Married-civ-spouse', ' Divorced',\n",
247 | " ' Married-spouse-absent', ' Separated', ' Married-AF-spouse',\n",
248 | " ' Widowed'], dtype=object)"
249 | ]
250 | },
251 | "execution_count": 21,
252 | "metadata": {},
253 | "output_type": "execute_result"
254 | }
255 | ],
256 | "source": [
257 | "df[' marital-status'].unique()"
258 | ]
259 | },
260 | {
261 | "cell_type": "code",
262 | "execution_count": 22,
263 | "id": "34e1f61d",
264 | "metadata": {},
265 | "outputs": [
266 | {
267 | "data": {
268 | "text/plain": [
269 | "['age',\n",
270 | " 'workclass',\n",
271 | " 'fnlwgt',\n",
272 | " 'education',\n",
273 | " 'education-num',\n",
274 | " 'marital-status',\n",
275 | " 'occupation',\n",
276 | " 'relationship',\n",
277 | " 'race',\n",
278 | " 'sex',\n",
279 | " 'capital-gain',\n",
280 | " 'capital-loss',\n",
281 | " 'hours-per-week',\n",
282 | " 'native-country',\n",
283 | " 'income']"
284 | ]
285 | },
286 | "execution_count": 22,
287 | "metadata": {},
288 | "output_type": "execute_result"
289 | }
290 | ],
291 | "source": [
292 | "col_names = df.columns\n",
293 | "col_names = [v.strip() for v in col_names]\n",
294 | "col_names"
295 | ]
296 | },
297 | {
298 | "cell_type": "code",
299 | "execution_count": 23,
300 | "id": "4b509bd8",
301 | "metadata": {},
302 | "outputs": [],
303 | "source": [
304 | "df.columns = col_names\n",
305 | "df.drop(columns=\"fnlwgt\",inplace=True)"
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": 24,
311 | "id": "cfd17372",
312 | "metadata": {},
313 | "outputs": [
314 | {
315 | "data": {
316 | "text/html": [
317 | "\n",
318 | "\n",
331 | "
\n",
332 | " \n",
333 | " \n",
334 | " \n",
335 | " age \n",
336 | " workclass \n",
337 | " education \n",
338 | " education-num \n",
339 | " marital-status \n",
340 | " occupation \n",
341 | " relationship \n",
342 | " race \n",
343 | " sex \n",
344 | " capital-gain \n",
345 | " capital-loss \n",
346 | " hours-per-week \n",
347 | " native-country \n",
348 | " income \n",
349 | " \n",
350 | " \n",
351 | " \n",
352 | " \n",
353 | " 0 \n",
354 | " 39 \n",
355 | " State-gov \n",
356 | " Bachelors \n",
357 | " 13 \n",
358 | " Never-married \n",
359 | " Adm-clerical \n",
360 | " Not-in-family \n",
361 | " White \n",
362 | " Male \n",
363 | " 2174 \n",
364 | " 0 \n",
365 | " 40 \n",
366 | " United-States \n",
367 | " <=50K \n",
368 | " \n",
369 | " \n",
370 | " 1 \n",
371 | " 50 \n",
372 | " Self-emp-not-inc \n",
373 | " Bachelors \n",
374 | " 13 \n",
375 | " Married-civ-spouse \n",
376 | " Exec-managerial \n",
377 | " Husband \n",
378 | " White \n",
379 | " Male \n",
380 | " 0 \n",
381 | " 0 \n",
382 | " 13 \n",
383 | " United-States \n",
384 | " <=50K \n",
385 | " \n",
386 | " \n",
387 | " 2 \n",
388 | " 38 \n",
389 | " Private \n",
390 | " HS-grad \n",
391 | " 9 \n",
392 | " Divorced \n",
393 | " Handlers-cleaners \n",
394 | " Not-in-family \n",
395 | " White \n",
396 | " Male \n",
397 | " 0 \n",
398 | " 0 \n",
399 | " 40 \n",
400 | " United-States \n",
401 | " <=50K \n",
402 | " \n",
403 | " \n",
404 | " 3 \n",
405 | " 53 \n",
406 | " Private \n",
407 | " 11th \n",
408 | " 7 \n",
409 | " Married-civ-spouse \n",
410 | " Handlers-cleaners \n",
411 | " Husband \n",
412 | " Black \n",
413 | " Male \n",
414 | " 0 \n",
415 | " 0 \n",
416 | " 40 \n",
417 | " United-States \n",
418 | " <=50K \n",
419 | " \n",
420 | " \n",
421 | " 4 \n",
422 | " 28 \n",
423 | " Private \n",
424 | " Bachelors \n",
425 | " 13 \n",
426 | " Married-civ-spouse \n",
427 | " Prof-specialty \n",
428 | " Wife \n",
429 | " Black \n",
430 | " Female \n",
431 | " 0 \n",
432 | " 0 \n",
433 | " 40 \n",
434 | " Cuba \n",
435 | " <=50K \n",
436 | " \n",
437 | " \n",
438 | "
\n",
439 | "
"
440 | ],
441 | "text/plain": [
442 | " age workclass education education-num marital-status \\\n",
443 | "0 39 State-gov Bachelors 13 Never-married \n",
444 | "1 50 Self-emp-not-inc Bachelors 13 Married-civ-spouse \n",
445 | "2 38 Private HS-grad 9 Divorced \n",
446 | "3 53 Private 11th 7 Married-civ-spouse \n",
447 | "4 28 Private Bachelors 13 Married-civ-spouse \n",
448 | "\n",
449 | " occupation relationship race sex capital-gain \\\n",
450 | "0 Adm-clerical Not-in-family White Male 2174 \n",
451 | "1 Exec-managerial Husband White Male 0 \n",
452 | "2 Handlers-cleaners Not-in-family White Male 0 \n",
453 | "3 Handlers-cleaners Husband Black Male 0 \n",
454 | "4 Prof-specialty Wife Black Female 0 \n",
455 | "\n",
456 | " capital-loss hours-per-week native-country income \n",
457 | "0 0 40 United-States <=50K \n",
458 | "1 0 13 United-States <=50K \n",
459 | "2 0 40 United-States <=50K \n",
460 | "3 0 40 United-States <=50K \n",
461 | "4 0 40 Cuba <=50K "
462 | ]
463 | },
464 | "execution_count": 24,
465 | "metadata": {},
466 | "output_type": "execute_result"
467 | }
468 | ],
469 | "source": [
470 | "df.head()"
471 | ]
472 | },
473 | {
474 | "cell_type": "code",
475 | "execution_count": 25,
476 | "id": "b700e726",
477 | "metadata": {},
478 | "outputs": [
479 | {
480 | "data": {
481 | "text/plain": [
482 | "age 0\n",
483 | "workclass 0\n",
484 | "education 0\n",
485 | "education-num 0\n",
486 | "marital-status 0\n",
487 | "occupation 0\n",
488 | "relationship 0\n",
489 | "race 0\n",
490 | "sex 0\n",
491 | "capital-gain 0\n",
492 | "capital-loss 0\n",
493 | "hours-per-week 0\n",
494 | "native-country 0\n",
495 | "income 0\n",
496 | "dtype: int64"
497 | ]
498 | },
499 | "execution_count": 25,
500 | "metadata": {},
501 | "output_type": "execute_result"
502 | }
503 | ],
504 | "source": [
505 | "df.isnull().sum()"
506 | ]
507 | },
508 | {
509 | "cell_type": "code",
510 | "execution_count": 26,
511 | "id": "eacc0e26",
512 | "metadata": {},
513 | "outputs": [],
514 | "source": [
515 | "bins = [16,24,64,90]\n",
516 | "labels=['young','adult','old']\n",
517 | "df['age_types'] = pd.cut(df['age'], bins=bins,labels=labels)\n",
518 | "df['income_num'] = np.where(df['income'] == \" >50K\",1,0).astype('int16')"
519 | ]
520 | },
521 | {
522 | "cell_type": "code",
523 | "execution_count": 27,
524 | "id": "719aaa18",
525 | "metadata": {},
526 | "outputs": [
527 | {
528 | "data": {
529 | "text/html": [
530 | "\n",
531 | "\n",
544 | "
\n",
545 | " \n",
546 | " \n",
547 | " \n",
548 | " age \n",
549 | " workclass \n",
550 | " education \n",
551 | " education-num \n",
552 | " marital-status \n",
553 | " occupation \n",
554 | " relationship \n",
555 | " race \n",
556 | " sex \n",
557 | " capital-gain \n",
558 | " capital-loss \n",
559 | " hours-per-week \n",
560 | " native-country \n",
561 | " income \n",
562 | " age_types \n",
563 | " income_num \n",
564 | " \n",
565 | " \n",
566 | " \n",
567 | " \n",
568 | " 0 \n",
569 | " 39 \n",
570 | " State-gov \n",
571 | " Bachelors \n",
572 | " 13 \n",
573 | " Never-married \n",
574 | " Adm-clerical \n",
575 | " Not-in-family \n",
576 | " White \n",
577 | " Male \n",
578 | " 2174 \n",
579 | " 0 \n",
580 | " 40 \n",
581 | " United-States \n",
582 | " <=50K \n",
583 | " adult \n",
584 | " 0 \n",
585 | " \n",
586 | " \n",
587 | " 1 \n",
588 | " 50 \n",
589 | " Self-emp-not-inc \n",
590 | " Bachelors \n",
591 | " 13 \n",
592 | " Married-civ-spouse \n",
593 | " Exec-managerial \n",
594 | " Husband \n",
595 | " White \n",
596 | " Male \n",
597 | " 0 \n",
598 | " 0 \n",
599 | " 13 \n",
600 | " United-States \n",
601 | " <=50K \n",
602 | " adult \n",
603 | " 0 \n",
604 | " \n",
605 | " \n",
606 | " 2 \n",
607 | " 38 \n",
608 | " Private \n",
609 | " HS-grad \n",
610 | " 9 \n",
611 | " Divorced \n",
612 | " Handlers-cleaners \n",
613 | " Not-in-family \n",
614 | " White \n",
615 | " Male \n",
616 | " 0 \n",
617 | " 0 \n",
618 | " 40 \n",
619 | " United-States \n",
620 | " <=50K \n",
621 | " adult \n",
622 | " 0 \n",
623 | " \n",
624 | " \n",
625 | " 3 \n",
626 | " 53 \n",
627 | " Private \n",
628 | " 11th \n",
629 | " 7 \n",
630 | " Married-civ-spouse \n",
631 | " Handlers-cleaners \n",
632 | " Husband \n",
633 | " Black \n",
634 | " Male \n",
635 | " 0 \n",
636 | " 0 \n",
637 | " 40 \n",
638 | " United-States \n",
639 | " <=50K \n",
640 | " adult \n",
641 | " 0 \n",
642 | " \n",
643 | " \n",
644 | " 4 \n",
645 | " 28 \n",
646 | " Private \n",
647 | " Bachelors \n",
648 | " 13 \n",
649 | " Married-civ-spouse \n",
650 | " Prof-specialty \n",
651 | " Wife \n",
652 | " Black \n",
653 | " Female \n",
654 | " 0 \n",
655 | " 0 \n",
656 | " 40 \n",
657 | " Cuba \n",
658 | " <=50K \n",
659 | " adult \n",
660 | " 0 \n",
661 | " \n",
662 | " \n",
663 | "
\n",
664 | "
"
665 | ],
666 | "text/plain": [
667 | " age workclass education education-num marital-status \\\n",
668 | "0 39 State-gov Bachelors 13 Never-married \n",
669 | "1 50 Self-emp-not-inc Bachelors 13 Married-civ-spouse \n",
670 | "2 38 Private HS-grad 9 Divorced \n",
671 | "3 53 Private 11th 7 Married-civ-spouse \n",
672 | "4 28 Private Bachelors 13 Married-civ-spouse \n",
673 | "\n",
674 | " occupation relationship race sex capital-gain \\\n",
675 | "0 Adm-clerical Not-in-family White Male 2174 \n",
676 | "1 Exec-managerial Husband White Male 0 \n",
677 | "2 Handlers-cleaners Not-in-family White Male 0 \n",
678 | "3 Handlers-cleaners Husband Black Male 0 \n",
679 | "4 Prof-specialty Wife Black Female 0 \n",
680 | "\n",
681 | " capital-loss hours-per-week native-country income age_types income_num \n",
682 | "0 0 40 United-States <=50K adult 0 \n",
683 | "1 0 13 United-States <=50K adult 0 \n",
684 | "2 0 40 United-States <=50K adult 0 \n",
685 | "3 0 40 United-States <=50K adult 0 \n",
686 | "4 0 40 Cuba <=50K adult 0 "
687 | ]
688 | },
689 | "execution_count": 27,
690 | "metadata": {},
691 | "output_type": "execute_result"
692 | }
693 | ],
694 | "source": [
695 | "df.head()"
696 | ]
697 | },
698 | {
699 | "cell_type": "code",
700 | "execution_count": 28,
701 | "id": "01261068",
702 | "metadata": {},
703 | "outputs": [],
704 | "source": [
705 | "df.loc[df['workclass']=='?', 'workclass']= np.NaN\n",
706 | "df.loc[df['occupation']=='?', 'occupation']= np.NaN\n",
707 | "df.loc[df['native-country']=='?', 'native_country']= np.NaN"
708 | ]
709 | },
710 | {
711 | "cell_type": "code",
712 | "execution_count": 32,
713 | "id": "d2de6659",
714 | "metadata": {},
715 | "outputs": [],
716 | "source": [
717 | "df = df.dropna(axis=1)"
718 | ]
719 | },
720 | {
721 | "cell_type": "code",
722 | "execution_count": 35,
723 | "id": "2fa5c0d5",
724 | "metadata": {},
725 | "outputs": [],
726 | "source": [
727 | "from sklearn.preprocessing import LabelEncoder\n",
728 | "def label_encoder(a):\n",
729 | " le = LabelEncoder()\n",
730 | " df[a] = le.fit_transform(df[a])\n",
731 | "label_list = ['workclass', 'education','marital-status',\n",
732 | " 'occupation', 'relationship', 'race', 'sex','native-country', 'income']\n",
733 | "for i in label_list:\n",
734 | " label_encoder(i)"
735 | ]
736 | },
737 | {
738 | "cell_type": "code",
739 | "execution_count": 36,
740 | "id": "2522fdf5",
741 | "metadata": {},
742 | "outputs": [],
743 | "source": [
744 | "from sklearn.preprocessing import MinMaxScaler"
745 | ]
746 | },
747 | {
748 | "cell_type": "code",
749 | "execution_count": 37,
750 | "id": "17a89425",
751 | "metadata": {},
752 | "outputs": [],
753 | "source": [
754 | "scaler = MinMaxScaler()"
755 | ]
756 | },
757 | {
758 | "cell_type": "code",
759 | "execution_count": 38,
760 | "id": "87613583",
761 | "metadata": {},
762 | "outputs": [
763 | {
764 | "data": {
765 | "text/html": [
766 | "MinMaxScaler() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
767 | ],
768 | "text/plain": [
769 | "MinMaxScaler()"
770 | ]
771 | },
772 | "execution_count": 38,
773 | "metadata": {},
774 | "output_type": "execute_result"
775 | }
776 | ],
777 | "source": [
778 | "scaler.fit(df.drop(['income','age_types','income_num'],axis=1))"
779 | ]
780 | },
781 | {
782 | "cell_type": "code",
783 | "execution_count": 39,
784 | "id": "34ed8a08",
785 | "metadata": {},
786 | "outputs": [],
787 | "source": [
788 | "scaled_features = scaler.transform(df.drop(['income','age_types','income_num'],axis=1))"
789 | ]
790 | },
791 | {
792 | "cell_type": "code",
793 | "execution_count": 40,
794 | "id": "80d9d538",
795 | "metadata": {},
796 | "outputs": [],
797 | "source": [
798 | "columns=['age', 'workclass', 'education', 'education_num', 'marital_status',\n",
799 | " 'occupation', 'relationship', 'race', 'sex', 'capital_gain',\n",
800 | " 'capital_loss', 'hours_per_week', 'native_country']"
801 | ]
802 | },
803 | {
804 | "cell_type": "code",
805 | "execution_count": 41,
806 | "id": "b1023508",
807 | "metadata": {},
808 | "outputs": [
809 | {
810 | "data": {
811 | "text/html": [
812 | "\n",
813 | "\n",
826 | "
\n",
827 | " \n",
828 | " \n",
829 | " \n",
830 | " age \n",
831 | " workclass \n",
832 | " education \n",
833 | " education_num \n",
834 | " marital_status \n",
835 | " occupation \n",
836 | " relationship \n",
837 | " race \n",
838 | " sex \n",
839 | " capital_gain \n",
840 | " capital_loss \n",
841 | " hours_per_week \n",
842 | " native_country \n",
843 | " \n",
844 | " \n",
845 | " \n",
846 | " \n",
847 | " 0 \n",
848 | " 0.301370 \n",
849 | " 0.875 \n",
850 | " 0.600000 \n",
851 | " 0.800000 \n",
852 | " 0.666667 \n",
853 | " 0.071429 \n",
854 | " 0.2 \n",
855 | " 1.0 \n",
856 | " 1.0 \n",
857 | " 0.02174 \n",
858 | " 0.0 \n",
859 | " 0.397959 \n",
860 | " 0.951220 \n",
861 | " \n",
862 | " \n",
863 | " 1 \n",
864 | " 0.452055 \n",
865 | " 0.750 \n",
866 | " 0.600000 \n",
867 | " 0.800000 \n",
868 | " 0.333333 \n",
869 | " 0.285714 \n",
870 | " 0.0 \n",
871 | " 1.0 \n",
872 | " 1.0 \n",
873 | " 0.00000 \n",
874 | " 0.0 \n",
875 | " 0.122449 \n",
876 | " 0.951220 \n",
877 | " \n",
878 | " \n",
879 | " 2 \n",
880 | " 0.287671 \n",
881 | " 0.500 \n",
882 | " 0.733333 \n",
883 | " 0.533333 \n",
884 | " 0.000000 \n",
885 | " 0.428571 \n",
886 | " 0.2 \n",
887 | " 1.0 \n",
888 | " 1.0 \n",
889 | " 0.00000 \n",
890 | " 0.0 \n",
891 | " 0.397959 \n",
892 | " 0.951220 \n",
893 | " \n",
894 | " \n",
895 | " 3 \n",
896 | " 0.493151 \n",
897 | " 0.500 \n",
898 | " 0.066667 \n",
899 | " 0.400000 \n",
900 | " 0.333333 \n",
901 | " 0.428571 \n",
902 | " 0.0 \n",
903 | " 0.5 \n",
904 | " 1.0 \n",
905 | " 0.00000 \n",
906 | " 0.0 \n",
907 | " 0.397959 \n",
908 | " 0.951220 \n",
909 | " \n",
910 | " \n",
911 | " 4 \n",
912 | " 0.150685 \n",
913 | " 0.500 \n",
914 | " 0.600000 \n",
915 | " 0.800000 \n",
916 | " 0.333333 \n",
917 | " 0.714286 \n",
918 | " 1.0 \n",
919 | " 0.5 \n",
920 | " 0.0 \n",
921 | " 0.00000 \n",
922 | " 0.0 \n",
923 | " 0.397959 \n",
924 | " 0.121951 \n",
925 | " \n",
926 | " \n",
927 | "
\n",
928 | "
"
929 | ],
930 | "text/plain": [
931 | " age workclass education education_num marital_status occupation \\\n",
932 | "0 0.301370 0.875 0.600000 0.800000 0.666667 0.071429 \n",
933 | "1 0.452055 0.750 0.600000 0.800000 0.333333 0.285714 \n",
934 | "2 0.287671 0.500 0.733333 0.533333 0.000000 0.428571 \n",
935 | "3 0.493151 0.500 0.066667 0.400000 0.333333 0.428571 \n",
936 | "4 0.150685 0.500 0.600000 0.800000 0.333333 0.714286 \n",
937 | "\n",
938 | " relationship race sex capital_gain capital_loss hours_per_week \\\n",
939 | "0 0.2 1.0 1.0 0.02174 0.0 0.397959 \n",
940 | "1 0.0 1.0 1.0 0.00000 0.0 0.122449 \n",
941 | "2 0.2 1.0 1.0 0.00000 0.0 0.397959 \n",
942 | "3 0.0 0.5 1.0 0.00000 0.0 0.397959 \n",
943 | "4 1.0 0.5 0.0 0.00000 0.0 0.397959 \n",
944 | "\n",
945 | " native_country \n",
946 | "0 0.951220 \n",
947 | "1 0.951220 \n",
948 | "2 0.951220 \n",
949 | "3 0.951220 \n",
950 | "4 0.121951 "
951 | ]
952 | },
953 | "execution_count": 41,
954 | "metadata": {},
955 | "output_type": "execute_result"
956 | }
957 | ],
958 | "source": [
959 | "df_scaled = pd.DataFrame(scaled_features,columns=columns)\n",
960 | "df_scaled.head()"
961 | ]
962 | },
963 | {
964 | "cell_type": "code",
965 | "execution_count": 43,
966 | "id": "013bb6eb",
967 | "metadata": {},
968 | "outputs": [
969 | {
970 | "name": "stdout",
971 | "output_type": "stream",
972 | "text": [
973 | "Train Score: 0.8301671378391384\n",
974 | "Test Score: 0.8332987336516504\n"
975 | ]
976 | }
977 | ],
978 | "source": [
979 | "from imblearn.combine import SMOTETomek\n",
980 | "from imblearn.under_sampling import NearMiss\n",
981 | "\n",
982 | "X = df_scaled\n",
983 | "y= df.income\n",
984 | "\n",
985 | "# Implementing Oversampling for Handling Imbalanced \n",
986 | "smk = SMOTETomek(random_state=42)\n",
987 | "X_res,y_res=smk.fit_resample(X,y)\n",
988 | "\n",
989 | "from sklearn.model_selection import train_test_split\n",
990 | "X_train, X_test, y_train, y_test = train_test_split(X_res,y_res,test_size=0.20,random_state=101,shuffle=True)\n",
991 | "from sklearn.model_selection import cross_val_score\n",
992 | "\n",
993 | "from sklearn.svm import SVC\n",
994 | "svc = SVC(random_state = 101)\n",
995 | "accuracies = cross_val_score(svc, X_train, y_train, cv=5)\n",
996 | "svc.fit(X_train,y_train)\n",
997 | "\n",
998 | "print(\"Train Score:\",np.mean(accuracies))\n",
999 | "print(\"Test Score:\",svc.score(X_test,y_test))"
1000 | ]
1001 | },
1002 | {
1003 | "cell_type": "code",
1004 | "execution_count": null,
1005 | "id": "40909656",
1006 | "metadata": {},
1007 | "outputs": [],
1008 | "source": []
1009 | }
1010 | ],
1011 | "metadata": {
1012 | "kernelspec": {
1013 | "display_name": "Python 3 (ipykernel)",
1014 | "language": "python",
1015 | "name": "python3"
1016 | },
1017 | "language_info": {
1018 | "codemirror_mode": {
1019 | "name": "ipython",
1020 | "version": 3
1021 | },
1022 | "file_extension": ".py",
1023 | "mimetype": "text/x-python",
1024 | "name": "python",
1025 | "nbconvert_exporter": "python",
1026 | "pygments_lexer": "ipython3",
1027 | "version": "3.9.12"
1028 | }
1029 | },
1030 | "nbformat": 4,
1031 | "nbformat_minor": 5
1032 | }
1033 |
--------------------------------------------------------------------------------
/1.machine-learning-fundamentals/7. XgBoost.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "b5bfd015",
6 | "metadata": {},
7 | "source": [
8 | "# XGBoost"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "id": "33ac4a4b",
15 | "metadata": {},
16 | "outputs": [
17 | {
18 | "name": "stdout",
19 | "output_type": "stream",
20 | "text": [
21 | "Requirement already satisfied: xgboost in /Users/nachikethpro/opt/miniconda3/lib/python3.9/site-packages (1.7.5)\n",
22 | "Requirement already satisfied: numpy in /Users/nachikethpro/opt/miniconda3/lib/python3.9/site-packages (from xgboost) (1.24.1)\n",
23 | "Requirement already satisfied: scipy in /Users/nachikethpro/opt/miniconda3/lib/python3.9/site-packages (from xgboost) (1.10.0)\n"
24 | ]
25 | }
26 | ],
27 | "source": [
28 | "!pip install xgboost"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 2,
34 | "id": "2de33bc4",
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "import pandas as pd\n",
39 | "import numpy as np\n",
40 | "import matplotlib.pyplot as plt\n",
41 | "import seaborn as sns\n",
42 | "%matplotlib inline"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 3,
48 | "id": "b7e8433c",
49 | "metadata": {},
50 | "outputs": [
51 | {
52 | "data": {
53 | "text/html": [
54 | "\n",
55 | "\n",
68 | "
\n",
69 | " \n",
70 | " \n",
71 | " \n",
72 | " age \n",
73 | " workclass \n",
74 | " fnlwgt \n",
75 | " education \n",
76 | " education-num \n",
77 | " marital-status \n",
78 | " occupation \n",
79 | " relationship \n",
80 | " race \n",
81 | " sex \n",
82 | " capital-gain \n",
83 | " capital-loss \n",
84 | " hours-per-week \n",
85 | " native-country \n",
86 | " income \n",
87 | " \n",
88 | " \n",
89 | " \n",
90 | " \n",
91 | " 0 \n",
92 | " 39 \n",
93 | " State-gov \n",
94 | " 77516 \n",
95 | " Bachelors \n",
96 | " 13 \n",
97 | " Never-married \n",
98 | " Adm-clerical \n",
99 | " Not-in-family \n",
100 | " White \n",
101 | " Male \n",
102 | " 2174 \n",
103 | " 0 \n",
104 | " 40 \n",
105 | " United-States \n",
106 | " <=50K \n",
107 | " \n",
108 | " \n",
109 | " 1 \n",
110 | " 50 \n",
111 | " Self-emp-not-inc \n",
112 | " 83311 \n",
113 | " Bachelors \n",
114 | " 13 \n",
115 | " Married-civ-spouse \n",
116 | " Exec-managerial \n",
117 | " Husband \n",
118 | " White \n",
119 | " Male \n",
120 | " 0 \n",
121 | " 0 \n",
122 | " 13 \n",
123 | " United-States \n",
124 | " <=50K \n",
125 | " \n",
126 | " \n",
127 | " 2 \n",
128 | " 38 \n",
129 | " Private \n",
130 | " 215646 \n",
131 | " HS-grad \n",
132 | " 9 \n",
133 | " Divorced \n",
134 | " Handlers-cleaners \n",
135 | " Not-in-family \n",
136 | " White \n",
137 | " Male \n",
138 | " 0 \n",
139 | " 0 \n",
140 | " 40 \n",
141 | " United-States \n",
142 | " <=50K \n",
143 | " \n",
144 | " \n",
145 | " 3 \n",
146 | " 53 \n",
147 | " Private \n",
148 | " 234721 \n",
149 | " 11th \n",
150 | " 7 \n",
151 | " Married-civ-spouse \n",
152 | " Handlers-cleaners \n",
153 | " Husband \n",
154 | " Black \n",
155 | " Male \n",
156 | " 0 \n",
157 | " 0 \n",
158 | " 40 \n",
159 | " United-States \n",
160 | " <=50K \n",
161 | " \n",
162 | " \n",
163 | " 4 \n",
164 | " 28 \n",
165 | " Private \n",
166 | " 338409 \n",
167 | " Bachelors \n",
168 | " 13 \n",
169 | " Married-civ-spouse \n",
170 | " Prof-specialty \n",
171 | " Wife \n",
172 | " Black \n",
173 | " Female \n",
174 | " 0 \n",
175 | " 0 \n",
176 | " 40 \n",
177 | " Cuba \n",
178 | " <=50K \n",
179 | " \n",
180 | " \n",
181 | "
\n",
182 | "
"
183 | ],
184 | "text/plain": [
185 | " age workclass fnlwgt education education-num \\\n",
186 | "0 39 State-gov 77516 Bachelors 13 \n",
187 | "1 50 Self-emp-not-inc 83311 Bachelors 13 \n",
188 | "2 38 Private 215646 HS-grad 9 \n",
189 | "3 53 Private 234721 11th 7 \n",
190 | "4 28 Private 338409 Bachelors 13 \n",
191 | "\n",
192 | " marital-status occupation relationship race sex \\\n",
193 | "0 Never-married Adm-clerical Not-in-family White Male \n",
194 | "1 Married-civ-spouse Exec-managerial Husband White Male \n",
195 | "2 Divorced Handlers-cleaners Not-in-family White Male \n",
196 | "3 Married-civ-spouse Handlers-cleaners Husband Black Male \n",
197 | "4 Married-civ-spouse Prof-specialty Wife Black Female \n",
198 | "\n",
199 | " capital-gain capital-loss hours-per-week native-country income \n",
200 | "0 2174 0 40 United-States <=50K \n",
201 | "1 0 0 13 United-States <=50K \n",
202 | "2 0 0 40 United-States <=50K \n",
203 | "3 0 0 40 United-States <=50K \n",
204 | "4 0 0 40 Cuba <=50K "
205 | ]
206 | },
207 | "execution_count": 3,
208 | "metadata": {},
209 | "output_type": "execute_result"
210 | }
211 | ],
212 | "source": [
213 | "df = pd.read_csv(\"income_evaluation.csv\")\n",
214 | "df.head()"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": 4,
220 | "id": "78303d81",
221 | "metadata": {},
222 | "outputs": [
223 | {
224 | "data": {
225 | "text/plain": [
226 | "Index(['age', ' workclass', ' fnlwgt', ' education', ' education-num',\n",
227 | " ' marital-status', ' occupation', ' relationship', ' race', ' sex',\n",
228 | " ' capital-gain', ' capital-loss', ' hours-per-week', ' native-country',\n",
229 | " ' income'],\n",
230 | " dtype='object')"
231 | ]
232 | },
233 | "execution_count": 4,
234 | "metadata": {},
235 | "output_type": "execute_result"
236 | }
237 | ],
238 | "source": [
239 | "df.columns"
240 | ]
241 | },
242 | {
243 | "cell_type": "code",
244 | "execution_count": 5,
245 | "id": "4d28eef5",
246 | "metadata": {},
247 | "outputs": [
248 | {
249 | "data": {
250 | "text/plain": [
251 | " <=50K 24720\n",
252 | " >50K 7841\n",
253 | "Name: income, dtype: int64"
254 | ]
255 | },
256 | "execution_count": 5,
257 | "metadata": {},
258 | "output_type": "execute_result"
259 | }
260 | ],
261 | "source": [
262 | "df[' income'].value_counts()"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": 6,
268 | "id": "3473c261",
269 | "metadata": {},
270 | "outputs": [
271 | {
272 | "data": {
273 | "text/plain": [
274 | "array([' Never-married', ' Married-civ-spouse', ' Divorced',\n",
275 | " ' Married-spouse-absent', ' Separated', ' Married-AF-spouse',\n",
276 | " ' Widowed'], dtype=object)"
277 | ]
278 | },
279 | "execution_count": 6,
280 | "metadata": {},
281 | "output_type": "execute_result"
282 | }
283 | ],
284 | "source": [
285 | "df[' marital-status'].unique()"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 7,
291 | "id": "34e1f61d",
292 | "metadata": {},
293 | "outputs": [
294 | {
295 | "data": {
296 | "text/plain": [
297 | "['age',\n",
298 | " 'workclass',\n",
299 | " 'fnlwgt',\n",
300 | " 'education',\n",
301 | " 'education-num',\n",
302 | " 'marital-status',\n",
303 | " 'occupation',\n",
304 | " 'relationship',\n",
305 | " 'race',\n",
306 | " 'sex',\n",
307 | " 'capital-gain',\n",
308 | " 'capital-loss',\n",
309 | " 'hours-per-week',\n",
310 | " 'native-country',\n",
311 | " 'income']"
312 | ]
313 | },
314 | "execution_count": 7,
315 | "metadata": {},
316 | "output_type": "execute_result"
317 | }
318 | ],
319 | "source": [
320 | "col_names = df.columns\n",
321 | "col_names = [v.strip() for v in col_names]\n",
322 | "col_names"
323 | ]
324 | },
325 | {
326 | "cell_type": "code",
327 | "execution_count": 8,
328 | "id": "4b509bd8",
329 | "metadata": {},
330 | "outputs": [],
331 | "source": [
332 | "df.columns = col_names\n",
333 | "df.drop(columns=\"fnlwgt\",inplace=True)"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 9,
339 | "id": "cfd17372",
340 | "metadata": {},
341 | "outputs": [
342 | {
343 | "data": {
344 | "text/html": [
345 | "\n",
346 | "\n",
359 | "
\n",
360 | " \n",
361 | " \n",
362 | " \n",
363 | " age \n",
364 | " workclass \n",
365 | " education \n",
366 | " education-num \n",
367 | " marital-status \n",
368 | " occupation \n",
369 | " relationship \n",
370 | " race \n",
371 | " sex \n",
372 | " capital-gain \n",
373 | " capital-loss \n",
374 | " hours-per-week \n",
375 | " native-country \n",
376 | " income \n",
377 | " \n",
378 | " \n",
379 | " \n",
380 | " \n",
381 | " 0 \n",
382 | " 39 \n",
383 | " State-gov \n",
384 | " Bachelors \n",
385 | " 13 \n",
386 | " Never-married \n",
387 | " Adm-clerical \n",
388 | " Not-in-family \n",
389 | " White \n",
390 | " Male \n",
391 | " 2174 \n",
392 | " 0 \n",
393 | " 40 \n",
394 | " United-States \n",
395 | " <=50K \n",
396 | " \n",
397 | " \n",
398 | " 1 \n",
399 | " 50 \n",
400 | " Self-emp-not-inc \n",
401 | " Bachelors \n",
402 | " 13 \n",
403 | " Married-civ-spouse \n",
404 | " Exec-managerial \n",
405 | " Husband \n",
406 | " White \n",
407 | " Male \n",
408 | " 0 \n",
409 | " 0 \n",
410 | " 13 \n",
411 | " United-States \n",
412 | " <=50K \n",
413 | " \n",
414 | " \n",
415 | " 2 \n",
416 | " 38 \n",
417 | " Private \n",
418 | " HS-grad \n",
419 | " 9 \n",
420 | " Divorced \n",
421 | " Handlers-cleaners \n",
422 | " Not-in-family \n",
423 | " White \n",
424 | " Male \n",
425 | " 0 \n",
426 | " 0 \n",
427 | " 40 \n",
428 | " United-States \n",
429 | " <=50K \n",
430 | " \n",
431 | " \n",
432 | " 3 \n",
433 | " 53 \n",
434 | " Private \n",
435 | " 11th \n",
436 | " 7 \n",
437 | " Married-civ-spouse \n",
438 | " Handlers-cleaners \n",
439 | " Husband \n",
440 | " Black \n",
441 | " Male \n",
442 | " 0 \n",
443 | " 0 \n",
444 | " 40 \n",
445 | " United-States \n",
446 | " <=50K \n",
447 | " \n",
448 | " \n",
449 | " 4 \n",
450 | " 28 \n",
451 | " Private \n",
452 | " Bachelors \n",
453 | " 13 \n",
454 | " Married-civ-spouse \n",
455 | " Prof-specialty \n",
456 | " Wife \n",
457 | " Black \n",
458 | " Female \n",
459 | " 0 \n",
460 | " 0 \n",
461 | " 40 \n",
462 | " Cuba \n",
463 | " <=50K \n",
464 | " \n",
465 | " \n",
466 | "
\n",
467 | "
"
468 | ],
469 | "text/plain": [
470 | " age workclass education education-num marital-status \\\n",
471 | "0 39 State-gov Bachelors 13 Never-married \n",
472 | "1 50 Self-emp-not-inc Bachelors 13 Married-civ-spouse \n",
473 | "2 38 Private HS-grad 9 Divorced \n",
474 | "3 53 Private 11th 7 Married-civ-spouse \n",
475 | "4 28 Private Bachelors 13 Married-civ-spouse \n",
476 | "\n",
477 | " occupation relationship race sex capital-gain \\\n",
478 | "0 Adm-clerical Not-in-family White Male 2174 \n",
479 | "1 Exec-managerial Husband White Male 0 \n",
480 | "2 Handlers-cleaners Not-in-family White Male 0 \n",
481 | "3 Handlers-cleaners Husband Black Male 0 \n",
482 | "4 Prof-specialty Wife Black Female 0 \n",
483 | "\n",
484 | " capital-loss hours-per-week native-country income \n",
485 | "0 0 40 United-States <=50K \n",
486 | "1 0 13 United-States <=50K \n",
487 | "2 0 40 United-States <=50K \n",
488 | "3 0 40 United-States <=50K \n",
489 | "4 0 40 Cuba <=50K "
490 | ]
491 | },
492 | "execution_count": 9,
493 | "metadata": {},
494 | "output_type": "execute_result"
495 | }
496 | ],
497 | "source": [
498 | "df.head()"
499 | ]
500 | },
501 | {
502 | "cell_type": "code",
503 | "execution_count": 10,
504 | "id": "b700e726",
505 | "metadata": {},
506 | "outputs": [
507 | {
508 | "data": {
509 | "text/plain": [
510 | "age 0\n",
511 | "workclass 0\n",
512 | "education 0\n",
513 | "education-num 0\n",
514 | "marital-status 0\n",
515 | "occupation 0\n",
516 | "relationship 0\n",
517 | "race 0\n",
518 | "sex 0\n",
519 | "capital-gain 0\n",
520 | "capital-loss 0\n",
521 | "hours-per-week 0\n",
522 | "native-country 0\n",
523 | "income 0\n",
524 | "dtype: int64"
525 | ]
526 | },
527 | "execution_count": 10,
528 | "metadata": {},
529 | "output_type": "execute_result"
530 | }
531 | ],
532 | "source": [
533 | "df.isnull().sum()"
534 | ]
535 | },
536 | {
537 | "cell_type": "code",
538 | "execution_count": 11,
539 | "id": "eacc0e26",
540 | "metadata": {},
541 | "outputs": [],
542 | "source": [
543 | "bins = [16,24,64,90]\n",
544 | "labels=['young','adult','old']\n",
545 | "df['age_types'] = pd.cut(df['age'], bins=bins,labels=labels)\n",
546 | "df['income_num'] = np.where(df['income'] == \" >50K\",1,0).astype('int16')"
547 | ]
548 | },
549 | {
550 | "cell_type": "code",
551 | "execution_count": 12,
552 | "id": "719aaa18",
553 | "metadata": {},
554 | "outputs": [
555 | {
556 | "data": {
557 | "text/html": [
558 | "\n",
559 | "\n",
572 | "
\n",
573 | " \n",
574 | " \n",
575 | " \n",
576 | " age \n",
577 | " workclass \n",
578 | " education \n",
579 | " education-num \n",
580 | " marital-status \n",
581 | " occupation \n",
582 | " relationship \n",
583 | " race \n",
584 | " sex \n",
585 | " capital-gain \n",
586 | " capital-loss \n",
587 | " hours-per-week \n",
588 | " native-country \n",
589 | " income \n",
590 | " age_types \n",
591 | " income_num \n",
592 | " \n",
593 | " \n",
594 | " \n",
595 | " \n",
596 | " 0 \n",
597 | " 39 \n",
598 | " State-gov \n",
599 | " Bachelors \n",
600 | " 13 \n",
601 | " Never-married \n",
602 | " Adm-clerical \n",
603 | " Not-in-family \n",
604 | " White \n",
605 | " Male \n",
606 | " 2174 \n",
607 | " 0 \n",
608 | " 40 \n",
609 | " United-States \n",
610 | " <=50K \n",
611 | " adult \n",
612 | " 0 \n",
613 | " \n",
614 | " \n",
615 | " 1 \n",
616 | " 50 \n",
617 | " Self-emp-not-inc \n",
618 | " Bachelors \n",
619 | " 13 \n",
620 | " Married-civ-spouse \n",
621 | " Exec-managerial \n",
622 | " Husband \n",
623 | " White \n",
624 | " Male \n",
625 | " 0 \n",
626 | " 0 \n",
627 | " 13 \n",
628 | " United-States \n",
629 | " <=50K \n",
630 | " adult \n",
631 | " 0 \n",
632 | " \n",
633 | " \n",
634 | " 2 \n",
635 | " 38 \n",
636 | " Private \n",
637 | " HS-grad \n",
638 | " 9 \n",
639 | " Divorced \n",
640 | " Handlers-cleaners \n",
641 | " Not-in-family \n",
642 | " White \n",
643 | " Male \n",
644 | " 0 \n",
645 | " 0 \n",
646 | " 40 \n",
647 | " United-States \n",
648 | " <=50K \n",
649 | " adult \n",
650 | " 0 \n",
651 | " \n",
652 | " \n",
653 | " 3 \n",
654 | " 53 \n",
655 | " Private \n",
656 | " 11th \n",
657 | " 7 \n",
658 | " Married-civ-spouse \n",
659 | " Handlers-cleaners \n",
660 | " Husband \n",
661 | " Black \n",
662 | " Male \n",
663 | " 0 \n",
664 | " 0 \n",
665 | " 40 \n",
666 | " United-States \n",
667 | " <=50K \n",
668 | " adult \n",
669 | " 0 \n",
670 | " \n",
671 | " \n",
672 | " 4 \n",
673 | " 28 \n",
674 | " Private \n",
675 | " Bachelors \n",
676 | " 13 \n",
677 | " Married-civ-spouse \n",
678 | " Prof-specialty \n",
679 | " Wife \n",
680 | " Black \n",
681 | " Female \n",
682 | " 0 \n",
683 | " 0 \n",
684 | " 40 \n",
685 | " Cuba \n",
686 | " <=50K \n",
687 | " adult \n",
688 | " 0 \n",
689 | " \n",
690 | " \n",
691 | "
\n",
692 | "
"
693 | ],
694 | "text/plain": [
695 | " age workclass education education-num marital-status \\\n",
696 | "0 39 State-gov Bachelors 13 Never-married \n",
697 | "1 50 Self-emp-not-inc Bachelors 13 Married-civ-spouse \n",
698 | "2 38 Private HS-grad 9 Divorced \n",
699 | "3 53 Private 11th 7 Married-civ-spouse \n",
700 | "4 28 Private Bachelors 13 Married-civ-spouse \n",
701 | "\n",
702 | " occupation relationship race sex capital-gain \\\n",
703 | "0 Adm-clerical Not-in-family White Male 2174 \n",
704 | "1 Exec-managerial Husband White Male 0 \n",
705 | "2 Handlers-cleaners Not-in-family White Male 0 \n",
706 | "3 Handlers-cleaners Husband Black Male 0 \n",
707 | "4 Prof-specialty Wife Black Female 0 \n",
708 | "\n",
709 | " capital-loss hours-per-week native-country income age_types income_num \n",
710 | "0 0 40 United-States <=50K adult 0 \n",
711 | "1 0 13 United-States <=50K adult 0 \n",
712 | "2 0 40 United-States <=50K adult 0 \n",
713 | "3 0 40 United-States <=50K adult 0 \n",
714 | "4 0 40 Cuba <=50K adult 0 "
715 | ]
716 | },
717 | "execution_count": 12,
718 | "metadata": {},
719 | "output_type": "execute_result"
720 | }
721 | ],
722 | "source": [
723 | "df.head()"
724 | ]
725 | },
726 | {
727 | "cell_type": "code",
728 | "execution_count": 13,
729 | "id": "01261068",
730 | "metadata": {},
731 | "outputs": [],
732 | "source": [
733 | "df.loc[df['workclass']=='?', 'workclass']= np.NaN\n",
734 | "df.loc[df['occupation']=='?', 'occupation']= np.NaN\n",
735 | "df.loc[df['native-country']=='?', 'native_country']= np.NaN"
736 | ]
737 | },
738 | {
739 | "cell_type": "code",
740 | "execution_count": 14,
741 | "id": "d2de6659",
742 | "metadata": {},
743 | "outputs": [],
744 | "source": [
745 | "df = df.dropna(axis=1)"
746 | ]
747 | },
748 | {
749 | "cell_type": "code",
750 | "execution_count": 15,
751 | "id": "2fa5c0d5",
752 | "metadata": {},
753 | "outputs": [],
754 | "source": [
755 | "from sklearn.preprocessing import LabelEncoder\n",
756 | "def label_encoder(a):\n",
757 | " le = LabelEncoder()\n",
758 | " df[a] = le.fit_transform(df[a])\n",
759 | "label_list = ['workclass', 'education','marital-status',\n",
760 | " 'occupation', 'relationship', 'race', 'sex','native-country', 'income']\n",
761 | "for i in label_list:\n",
762 | " label_encoder(i)"
763 | ]
764 | },
765 | {
766 | "cell_type": "code",
767 | "execution_count": 16,
768 | "id": "2522fdf5",
769 | "metadata": {},
770 | "outputs": [],
771 | "source": [
772 | "from sklearn.preprocessing import MinMaxScaler"
773 | ]
774 | },
775 | {
776 | "cell_type": "code",
777 | "execution_count": 17,
778 | "id": "17a89425",
779 | "metadata": {},
780 | "outputs": [],
781 | "source": [
782 | "scaler = MinMaxScaler()"
783 | ]
784 | },
785 | {
786 | "cell_type": "code",
787 | "execution_count": 18,
788 | "id": "87613583",
789 | "metadata": {},
790 | "outputs": [
791 | {
792 | "data": {
793 | "text/html": [
794 | "MinMaxScaler() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
795 | ],
796 | "text/plain": [
797 | "MinMaxScaler()"
798 | ]
799 | },
800 | "execution_count": 18,
801 | "metadata": {},
802 | "output_type": "execute_result"
803 | }
804 | ],
805 | "source": [
806 | "scaler.fit(df.drop(['income','age_types','income_num'],axis=1))"
807 | ]
808 | },
809 | {
810 | "cell_type": "code",
811 | "execution_count": 19,
812 | "id": "34ed8a08",
813 | "metadata": {},
814 | "outputs": [],
815 | "source": [
816 | "scaled_features = scaler.transform(df.drop(['income','age_types','income_num'],axis=1))"
817 | ]
818 | },
819 | {
820 | "cell_type": "code",
821 | "execution_count": 20,
822 | "id": "80d9d538",
823 | "metadata": {},
824 | "outputs": [],
825 | "source": [
826 | "columns=['age', 'workclass', 'education', 'education_num', 'marital_status',\n",
827 | " 'occupation', 'relationship', 'race', 'sex', 'capital_gain',\n",
828 | " 'capital_loss', 'hours_per_week', 'native_country']"
829 | ]
830 | },
831 | {
832 | "cell_type": "code",
833 | "execution_count": 21,
834 | "id": "b1023508",
835 | "metadata": {},
836 | "outputs": [
837 | {
838 | "data": {
839 | "text/html": [
840 | "\n",
841 | "\n",
854 | "
\n",
855 | " \n",
856 | " \n",
857 | " \n",
858 | " age \n",
859 | " workclass \n",
860 | " education \n",
861 | " education_num \n",
862 | " marital_status \n",
863 | " occupation \n",
864 | " relationship \n",
865 | " race \n",
866 | " sex \n",
867 | " capital_gain \n",
868 | " capital_loss \n",
869 | " hours_per_week \n",
870 | " native_country \n",
871 | " \n",
872 | " \n",
873 | " \n",
874 | " \n",
875 | " 0 \n",
876 | " 0.301370 \n",
877 | " 0.875 \n",
878 | " 0.600000 \n",
879 | " 0.800000 \n",
880 | " 0.666667 \n",
881 | " 0.071429 \n",
882 | " 0.2 \n",
883 | " 1.0 \n",
884 | " 1.0 \n",
885 | " 0.02174 \n",
886 | " 0.0 \n",
887 | " 0.397959 \n",
888 | " 0.951220 \n",
889 | " \n",
890 | " \n",
891 | " 1 \n",
892 | " 0.452055 \n",
893 | " 0.750 \n",
894 | " 0.600000 \n",
895 | " 0.800000 \n",
896 | " 0.333333 \n",
897 | " 0.285714 \n",
898 | " 0.0 \n",
899 | " 1.0 \n",
900 | " 1.0 \n",
901 | " 0.00000 \n",
902 | " 0.0 \n",
903 | " 0.122449 \n",
904 | " 0.951220 \n",
905 | " \n",
906 | " \n",
907 | " 2 \n",
908 | " 0.287671 \n",
909 | " 0.500 \n",
910 | " 0.733333 \n",
911 | " 0.533333 \n",
912 | " 0.000000 \n",
913 | " 0.428571 \n",
914 | " 0.2 \n",
915 | " 1.0 \n",
916 | " 1.0 \n",
917 | " 0.00000 \n",
918 | " 0.0 \n",
919 | " 0.397959 \n",
920 | " 0.951220 \n",
921 | " \n",
922 | " \n",
923 | " 3 \n",
924 | " 0.493151 \n",
925 | " 0.500 \n",
926 | " 0.066667 \n",
927 | " 0.400000 \n",
928 | " 0.333333 \n",
929 | " 0.428571 \n",
930 | " 0.0 \n",
931 | " 0.5 \n",
932 | " 1.0 \n",
933 | " 0.00000 \n",
934 | " 0.0 \n",
935 | " 0.397959 \n",
936 | " 0.951220 \n",
937 | " \n",
938 | " \n",
939 | " 4 \n",
940 | " 0.150685 \n",
941 | " 0.500 \n",
942 | " 0.600000 \n",
943 | " 0.800000 \n",
944 | " 0.333333 \n",
945 | " 0.714286 \n",
946 | " 1.0 \n",
947 | " 0.5 \n",
948 | " 0.0 \n",
949 | " 0.00000 \n",
950 | " 0.0 \n",
951 | " 0.397959 \n",
952 | " 0.121951 \n",
953 | " \n",
954 | " \n",
955 | "
\n",
956 | "
"
957 | ],
958 | "text/plain": [
959 | " age workclass education education_num marital_status occupation \\\n",
960 | "0 0.301370 0.875 0.600000 0.800000 0.666667 0.071429 \n",
961 | "1 0.452055 0.750 0.600000 0.800000 0.333333 0.285714 \n",
962 | "2 0.287671 0.500 0.733333 0.533333 0.000000 0.428571 \n",
963 | "3 0.493151 0.500 0.066667 0.400000 0.333333 0.428571 \n",
964 | "4 0.150685 0.500 0.600000 0.800000 0.333333 0.714286 \n",
965 | "\n",
966 | " relationship race sex capital_gain capital_loss hours_per_week \\\n",
967 | "0 0.2 1.0 1.0 0.02174 0.0 0.397959 \n",
968 | "1 0.0 1.0 1.0 0.00000 0.0 0.122449 \n",
969 | "2 0.2 1.0 1.0 0.00000 0.0 0.397959 \n",
970 | "3 0.0 0.5 1.0 0.00000 0.0 0.397959 \n",
971 | "4 1.0 0.5 0.0 0.00000 0.0 0.397959 \n",
972 | "\n",
973 | " native_country \n",
974 | "0 0.951220 \n",
975 | "1 0.951220 \n",
976 | "2 0.951220 \n",
977 | "3 0.951220 \n",
978 | "4 0.121951 "
979 | ]
980 | },
981 | "execution_count": 21,
982 | "metadata": {},
983 | "output_type": "execute_result"
984 | }
985 | ],
986 | "source": [
987 | "df_scaled = pd.DataFrame(scaled_features,columns=columns)\n",
988 | "df_scaled.head()"
989 | ]
990 | },
991 | {
992 | "cell_type": "code",
993 | "execution_count": 22,
994 | "id": "013bb6eb",
995 | "metadata": {},
996 | "outputs": [],
997 | "source": [
998 | "from imblearn.combine import SMOTETomek\n",
999 | "from imblearn.under_sampling import NearMiss\n",
1000 | "\n",
1001 | "X = df_scaled\n",
1002 | "y= df.income\n",
1003 | "\n",
1004 | "# Implementing Oversampling for Handling Imbalanced \n",
1005 | "smk = SMOTETomek(random_state=42)\n",
1006 | "X_res,y_res=smk.fit_resample(X,y)\n",
1007 | "\n",
1008 | "from sklearn.model_selection import train_test_split\n",
1009 | "X_train, X_test, y_train, y_test = train_test_split(X_res,y_res,test_size=0.20,random_state=101,shuffle=True)"
1010 | ]
1011 | },
1012 | {
1013 | "cell_type": "code",
1014 | "execution_count": 23,
1015 | "id": "40909656",
1016 | "metadata": {},
1017 | "outputs": [],
1018 | "source": [
1019 | "import xgboost as xgb"
1020 | ]
1021 | },
1022 | {
1023 | "cell_type": "code",
1024 | "execution_count": 24,
1025 | "id": "4c4f556a",
1026 | "metadata": {},
1027 | "outputs": [],
1028 | "source": [
1029 | "dtrain = xgb.DMatrix(X_train, label=y_train, feature_names=X_train.columns)\n",
1030 | "dtest = xgb.DMatrix(X_test, label=y_test, feature_names=X_train.columns)"
1031 | ]
1032 | },
1033 | {
1034 | "cell_type": "code",
1035 | "execution_count": 26,
1036 | "id": "00379905",
1037 | "metadata": {},
1038 | "outputs": [],
1039 | "source": [
1040 | "xgb_params = {\n",
1041 | " 'eta': 0.3,\n",
1042 | " 'max_depth': 6,\n",
1043 | " 'min_child_weight': 1,\n",
1044 | " \n",
1045 | " 'objective': 'binary:logistic',\n",
1046 | " 'nthread': 8,\n",
1047 | " 'seed': 1,\n",
1048 | " 'silent': 1\n",
1049 | "}"
1050 | ]
1051 | },
1052 | {
1053 | "cell_type": "code",
1054 | "execution_count": 27,
1055 | "id": "b192eb3e",
1056 | "metadata": {},
1057 | "outputs": [
1058 | {
1059 | "name": "stdout",
1060 | "output_type": "stream",
1061 | "text": [
1062 | "[13:07:36] WARNING: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-11.0-arm64-cpython-38/xgboost/src/learner.cc:767: \n",
1063 | "Parameters: { \"silent\" } are not used.\n",
1064 | "\n"
1065 | ]
1066 | }
1067 | ],
1068 | "source": [
1069 | "model = xgb.train(xgb_params, dtrain, num_boost_round=10)"
1070 | ]
1071 | },
1072 | {
1073 | "cell_type": "code",
1074 | "execution_count": 28,
1075 | "id": "424f8dca",
1076 | "metadata": {},
1077 | "outputs": [],
1078 | "source": [
1079 | "y_pred = model.predict(dtest)"
1080 | ]
1081 | },
1082 | {
1083 | "cell_type": "code",
1084 | "execution_count": 29,
1085 | "id": "72fa42cd",
1086 | "metadata": {},
1087 | "outputs": [
1088 | {
1089 | "data": {
1090 | "text/plain": [
1091 | "array([0.06784513, 0.08248003, 0.6497586 , 0.9176678 , 0.02431703],\n",
1092 | " dtype=float32)"
1093 | ]
1094 | },
1095 | "execution_count": 29,
1096 | "metadata": {},
1097 | "output_type": "execute_result"
1098 | }
1099 | ],
1100 | "source": [
1101 | "y_pred[:5]"
1102 | ]
1103 | },
1104 | {
1105 | "cell_type": "code",
1106 | "execution_count": 30,
1107 | "id": "b19e49d6",
1108 | "metadata": {},
1109 | "outputs": [
1110 | {
1111 | "data": {
1112 | "text/plain": [
1113 | "0.9399015348717228"
1114 | ]
1115 | },
1116 | "execution_count": 30,
1117 | "metadata": {},
1118 | "output_type": "execute_result"
1119 | }
1120 | ],
1121 | "source": [
1122 | "from sklearn.metrics import roc_auc_score\n",
1123 | "roc_auc_score(y_test, y_pred)"
1124 | ]
1125 | },
1126 | {
1127 | "cell_type": "code",
1128 | "execution_count": 31,
1129 | "id": "5b6bae0c",
1130 | "metadata": {},
1131 | "outputs": [],
1132 | "source": [
1133 | "watchlist = [(dtrain, 'train'), (dtest, 'test')]"
1134 | ]
1135 | },
1136 | {
1137 | "cell_type": "code",
1138 | "execution_count": 32,
1139 | "id": "0b3438d4",
1140 | "metadata": {},
1141 | "outputs": [],
1142 | "source": [
1143 | "xgb_params = {\n",
1144 | " 'eta': 0.3,\n",
1145 | " 'max_depth': 6,\n",
1146 | " 'min_child_weight': 1,\n",
1147 | " \n",
1148 | " 'objective': 'binary:logistic',\n",
1149 | " 'eval_metric': 'auc',\n",
1150 | " 'nthread': 8,\n",
1151 | " 'seed': 1,\n",
1152 | " 'silent': 1\n",
1153 | "}"
1154 | ]
1155 | },
1156 | {
1157 | "cell_type": "code",
1158 | "execution_count": 33,
1159 | "id": "cd925feb",
1160 | "metadata": {},
1161 | "outputs": [],
1162 | "source": [
1163 | "import warnings \n",
1164 | "warnings.filterwarnings('ignore') "
1165 | ]
1166 | },
1167 | {
1168 | "cell_type": "code",
1169 | "execution_count": 34,
1170 | "id": "6ffab777",
1171 | "metadata": {},
1172 | "outputs": [
1173 | {
1174 | "name": "stdout",
1175 | "output_type": "stream",
1176 | "text": [
1177 | "[13:10:23] WARNING: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-11.0-arm64-cpython-38/xgboost/src/learner.cc:767: \n",
1178 | "Parameters: { \"silent\" } are not used.\n",
1179 | "\n",
1180 | "[0]\ttrain-auc:0.91291\ttest-auc:0.91128\n",
1181 | "[10]\ttrain-auc:0.94806\ttest-auc:0.94449\n",
1182 | "[20]\ttrain-auc:0.96152\ttest-auc:0.95790\n",
1183 | "[30]\ttrain-auc:0.96850\ttest-auc:0.96393\n",
1184 | "[40]\ttrain-auc:0.97321\ttest-auc:0.96813\n",
1185 | "[50]\ttrain-auc:0.97633\ttest-auc:0.97088\n",
1186 | "[60]\ttrain-auc:0.97958\ttest-auc:0.97373\n",
1187 | "[70]\ttrain-auc:0.98086\ttest-auc:0.97497\n",
1188 | "[80]\ttrain-auc:0.98219\ttest-auc:0.97601\n",
1189 | "[90]\ttrain-auc:0.98324\ttest-auc:0.97659\n",
1190 | "[99]\ttrain-auc:0.98395\ttest-auc:0.97690\n"
1191 | ]
1192 | }
1193 | ],
1194 | "source": [
1195 | "model = xgb.train(xgb_params, dtrain,\n",
1196 | " num_boost_round=100,\n",
1197 | " evals=watchlist, verbose_eval=10)"
1198 | ]
1199 | },
1200 | {
1201 | "cell_type": "code",
1202 | "execution_count": null,
1203 | "id": "8f85aa7a",
1204 | "metadata": {},
1205 | "outputs": [],
1206 | "source": []
1207 | }
1208 | ],
1209 | "metadata": {
1210 | "kernelspec": {
1211 | "display_name": "Python 3 (ipykernel)",
1212 | "language": "python",
1213 | "name": "python3"
1214 | },
1215 | "language_info": {
1216 | "codemirror_mode": {
1217 | "name": "ipython",
1218 | "version": 3
1219 | },
1220 | "file_extension": ".py",
1221 | "mimetype": "text/x-python",
1222 | "name": "python",
1223 | "nbconvert_exporter": "python",
1224 | "pygments_lexer": "ipython3",
1225 | "version": "3.9.12"
1226 | }
1227 | },
1228 | "nbformat": 4,
1229 | "nbformat_minor": 5
1230 | }
1231 |
--------------------------------------------------------------------------------
/2.deep-learning-fundamentals/2.Activation_Functions_in_Tensorflow.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | },
12 | "language_info": {
13 | "name": "python"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {
20 | "id": "Vs8PJe7J7Jal"
21 | },
22 | "source": [
23 | "# Activation Functions\n"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {
29 | "id": "oiUNYAyKFEf7"
30 | },
31 | "source": [
32 | "## Sigmoid Activation"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "metadata": {
38 | "id": "jf2uv8yM6yeB"
39 | },
40 | "source": [
41 | "#Imports\n",
42 | "import tensorflow as tf\n",
43 | "from tensorflow import keras\n",
44 | "import matplotlib.pyplot as plt\n",
45 | "plt.style.use(\"seaborn\")"
46 | ],
47 | "execution_count": null,
48 | "outputs": []
49 | },
50 | {
51 | "cell_type": "code",
52 | "metadata": {
53 | "id": "PLughPw17S15"
54 | },
55 | "source": [
56 | "#Generate Data Points\n",
57 | "X = tf.linspace(-10,10,100)\n",
58 | "X"
59 | ],
60 | "execution_count": null,
61 | "outputs": []
62 | },
63 | {
64 | "cell_type": "code",
65 | "metadata": {
66 | "id": "DNX3FgKFV90M"
67 | },
68 | "source": [
69 | "X.shape"
70 | ],
71 | "execution_count": null,
72 | "outputs": []
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {
77 | "id": "F_WuF8As7nzc"
78 | },
79 | "source": [
80 | "Activations : https://www.tensorflow.org/api_docs/python/tf/keras/activations\n"
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {
86 | "id": "c1Pb3__D72s4"
87 | },
88 | "source": [
89 | "sigmoid(x) = 1 / (1 + exp(-x))"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "metadata": {
95 | "id": "U266pfs17hRq"
96 | },
97 | "source": [
98 | "#Activations\n",
99 | "y = tf.keras.activations.sigmoid(X)\n",
100 | "\n",
101 | "#Visualize Graph\n",
102 | "plt.plot(X,y,label = \"sigmoid(x) = 1 / (1 + exp(-x))\" )\n",
103 | "plt.title(\"Sigmoid\")\n",
104 | "plt.xlabel(\"X\")\n",
105 | "plt.ylabel(\"Sigmoid of X\")\n",
106 | "plt.legend()\n",
107 | "plt.show()"
108 | ],
109 | "execution_count": null,
110 | "outputs": []
111 | },
112 | {
113 | "cell_type": "markdown",
114 | "metadata": {
115 | "id": "b8wOT7128nXY"
116 | },
117 | "source": [
118 | "## Tanh - Hyperbolic Tangent Function"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "metadata": {
124 | "id": "g1bO5AYh8GAf"
125 | },
126 | "source": [
127 | "#Activations\n",
128 | "y = tf.keras.activations.tanh(X)\n",
129 | "\n",
130 | "#Visualize Graph\n",
131 | "plt.plot(X,y,label = \"tanh(x)\" )\n",
132 | "plt.title(\"Hyperbolic Tangent\")\n",
133 | "plt.xlabel(\"X\")\n",
134 | "plt.ylabel(\"tanh(x)\")\n",
135 | "plt.legend()\n",
136 | "plt.show()"
137 | ],
138 | "execution_count": null,
139 | "outputs": []
140 | },
141 | {
142 | "cell_type": "markdown",
143 | "metadata": {
144 | "id": "rsuW8qf5-yuy"
145 | },
146 | "source": [
147 | "## Relu Activation"
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {
153 | "id": "ibVw5_Xm-0hO"
154 | },
155 | "source": [
156 | "Relu Activation :\n",
157 | "y = max(x,0)"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "metadata": {
163 | "id": "l8V-0uUK-UcA"
164 | },
165 | "source": [
166 | "#Activations\n",
167 | "y = tf.keras.activations.relu(X)\n",
168 | "\n",
169 | "#Visualize Graph\n",
170 | "plt.plot(X,y,label = \"y = max(x,0)\" )\n",
171 | "plt.title(\"Relu\")\n",
172 | "plt.xlabel(\"X\")\n",
173 | "plt.ylabel(\"max(x,0)\")\n",
174 | "plt.legend()\n",
175 | "plt.show()"
176 | ],
177 | "execution_count": null,
178 | "outputs": []
179 | },
180 | {
181 | "cell_type": "markdown",
182 | "source": [
183 | "# Leaky Relu"
184 | ],
185 | "metadata": {
186 | "id": "TZtNRw5Ua6Mg"
187 | }
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "source": [
192 | ""
193 | ],
194 | "metadata": {
195 | "id": "n6SKyPNnbSk9"
196 | }
197 | },
198 | {
199 | "cell_type": "code",
200 | "source": [
201 | "#Activations\n",
202 | "y = tf.nn.leaky_relu(X)\n",
203 | "\n",
204 | "#Visualize Graph\n",
205 | "plt.plot(X,y )\n",
206 | "plt.title(\"Leaky Relu\")\n",
207 | "plt.xlabel(\"X\")\n",
208 | "plt.ylabel(\"Leaky Relu\")\n",
209 | "plt.legend()\n",
210 | "plt.show()"
211 | ],
212 | "metadata": {
213 | "id": "HXsa5H4Ea8C7"
214 | },
215 | "execution_count": null,
216 | "outputs": []
217 | },
218 | {
219 | "cell_type": "markdown",
220 | "metadata": {
221 | "id": "EQc5WUBL_VHi"
222 | },
223 | "source": [
224 | "## Elu Activation Function - Exponential Linear Unit \n",
225 | "\n",
226 | "y = x if x > 0 and alpha * (exp(x) - 1) if x < 0."
227 | ]
228 | },
229 | {
230 | "cell_type": "code",
231 | "metadata": {
232 | "id": "62sr4LOt_NsZ"
233 | },
234 | "source": [
235 | "#Activations\n",
236 | "y = tf.keras.activations.elu(X,alpha = 0.9)\n",
237 | "\n",
238 | "#Visualize Graph\n",
239 | "plt.plot(X,y,label = \"y = x if x>0 else alpha * (exp(x) - 1) if x < 0 \" )\n",
240 | "plt.title(\"Elu\")\n",
241 | "plt.xlabel(\"X\")\n",
242 | "plt.ylabel(\"elu(x)\")\n",
243 | "plt.legend()\n",
244 | "plt.show()"
245 | ],
246 | "execution_count": null,
247 | "outputs": []
248 | },
249 | {
250 | "cell_type": "markdown",
251 | "metadata": {
252 | "id": "kvHV8AuX_-GG"
253 | },
254 | "source": [
255 | "## SELU Activation Function - Scaled Exponential Linear Unit\n",
256 | "\n",
257 | "if x > 0: return scale * x\n",
258 | "\n",
259 | "if x < 0: return scale * alpha * (exp(x) - 1)\n",
260 | "\n",
261 | "where alpha and scale are pre-defined constants (alpha=1.67326324 and scale=1.05070098)"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "metadata": {
267 | "id": "OVLdS57G_3vO"
268 | },
269 | "source": [
270 | "#Activations\n",
271 | "y = tf.keras.activations.selu(X)\n",
272 | "\n",
273 | "#Visualize Graph\n",
274 | "plt.plot(X,y,label = \"selu(x)\" )\n",
275 | "plt.title(\"SELU\")\n",
276 | "plt.xlabel(\"X\")\n",
277 | "plt.ylabel(\"selu(x)\")\n",
278 | "plt.legend()\n",
279 | "plt.show()"
280 | ],
281 | "execution_count": null,
282 | "outputs": []
283 | },
284 | {
285 | "cell_type": "markdown",
286 | "metadata": {
287 | "id": "IRyJGq5OEO_n"
288 | },
289 | "source": [
290 | "## Swish Activation\n",
291 | "swish(x) = x * sigmoid(x)"
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "metadata": {
297 | "id": "-lUOmajvESfo"
298 | },
299 | "source": [
300 | "#Activations\n",
301 | "y = tf.keras.activations.swish(X)\n",
302 | "\n",
303 | "#Visualize Graph\n",
304 | "plt.plot(X,y,label = \"swish(x) = x * sigmoid(x)\" )\n",
305 | "plt.title(\"Swish\")\n",
306 | "plt.xlabel(\"X\")\n",
307 | "plt.ylabel(\"swish(x)\")\n",
308 | "plt.legend()\n",
309 | "plt.show()"
310 | ],
311 | "execution_count": null,
312 | "outputs": []
313 | },
314 | {
315 | "cell_type": "markdown",
316 | "metadata": {
317 | "id": "ZzfIVJouAwhU"
318 | },
319 | "source": [
320 | "## Softmax\n",
321 | "\n",
322 | "converts a vector of values to a probability distribution"
323 | ]
324 | },
325 | {
326 | "cell_type": "markdown",
327 | "metadata": {
328 | "id": "E0188SOcCG_K"
329 | },
330 | "source": [
331 | ""
332 | ]
333 | },
334 | {
335 | "cell_type": "code",
336 | "metadata": {
337 | "id": "unairtA2Ac95"
338 | },
339 | "source": [
340 | "x = tf.constant([150,50,10],dtype=tf.float32)\n",
341 | "x = tf.expand_dims(x,axis=0)\n",
342 | "print(x.shape,x)\n",
343 | "y = tf.keras.activations.softmax(x)\n",
344 | "print(y)"
345 | ],
346 | "execution_count": null,
347 | "outputs": []
348 | },
349 | {
350 | "cell_type": "code",
351 | "metadata": {
352 | "id": "IlzbNsfAU6N_"
353 | },
354 | "source": [
355 | "tf.math.reduce_sum(y)"
356 | ],
357 | "execution_count": null,
358 | "outputs": []
359 | },
360 | {
361 | "cell_type": "code",
362 | "metadata": {
363 | "id": "ZT62fwfzU510"
364 | },
365 | "source": [],
366 | "execution_count": null,
367 | "outputs": []
368 | },
369 | {
370 | "cell_type": "code",
371 | "metadata": {
372 | "id": "Zw6D_vQyA_Ad"
373 | },
374 | "source": [
375 | "#he initialiazer\n",
376 | "keras.layers.Dense(10,activation=\"relu\",kernel_initializer=\"he_normal\")"
377 | ],
378 | "execution_count": null,
379 | "outputs": []
380 | },
381 | {
382 | "cell_type": "code",
383 | "metadata": {
384 | "id": "yl7x23ga7TIs"
385 | },
386 | "source": [
387 | "#Optimizers\n",
388 | "1. SGD - Lowest Speed - Good convergence\n",
389 | "2. Momemtum SGD - Medium Speed - Good convergence\n",
390 | "3. Adagrad - Very high Speed (it stops early) - Convergence quality - lowest\n",
391 | "4. RMSprop - high speed & Medium to high quality\n",
392 | "5. Adam - high speed & Medium to high quality\n",
393 | "\n",
394 | "Nadam\n",
395 | "Adamax"
396 | ],
397 | "execution_count": null,
398 | "outputs": []
399 | }
400 | ]
401 | }
--------------------------------------------------------------------------------