├── README.md ├── LICENSE └── Customer Behaviour Prediction ├── Customer_Behaviour.csv └── Customer_Behaviour_Prediction.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Feature-Engineering 2 | All feature engineering related Notebooks. I have tried to see if performance of the model increases or not by performing the feature engineering steps. 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Sonu Kumar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Customer Behaviour Prediction/Customer_Behaviour.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000,0 216 | 15622478,Male,47,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 402 | -------------------------------------------------------------------------------- /Customer Behaviour Prediction/Customer_Behaviour_Prediction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Customer Behaviour Prediction.ipynb", 7 | "provenance": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | } 13 | }, 14 | "cells": [ 15 | { 16 | "cell_type": "code", 17 | "metadata": { 18 | "id": "Jq2bZura6Feu" 19 | }, 20 | "source": [ 21 | "import pandas as pd\n", 22 | "import numpy as np\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "%matplotlib inline\n", 25 | "from sklearn.preprocessing import StandardScaler\n", 26 | "from sklearn.linear_model import LogisticRegression\n", 27 | "from sklearn.model_selection import train_test_split" 28 | ], 29 | "execution_count": 1, 30 | "outputs": [] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "metadata": { 35 | "id": "E-aj4zwE7aua" 36 | }, 37 | "source": [ 38 | "data = pd.read_csv(\"drive/My Drive/Colab Notebooks/Customer_Behaviour.csv\")" 39 | ], 40 | "execution_count": 2, 41 | "outputs": [] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "metadata": { 46 | "colab": { 47 | "base_uri": "https://localhost:8080/", 48 | "height": 419 49 | }, 50 | "id": "_9yncnQz7j8l", 51 | "outputId": "cc74e7dd-ee16-4035-b037-8366692572a9" 52 | }, 53 | "source": [ 54 | "data" 55 | ], 56 | "execution_count": 3, 57 | "outputs": [ 58 | { 59 | "output_type": "execute_result", 60 | "data": { 61 | "text/html": [ 62 | "
\n", 63 | "\n", 76 | "\n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | "
User IDGenderAgeEstimatedSalaryPurchased
015624510Male19190000
115810944Male35200000
215668575Female26430000
315603246Female27570000
415804002Male19760000
..................
39515691863Female46410001
39615706071Male51230001
39715654296Female50200001
39815755018Male36330000
39915594041Female49360001
\n", 178 | "

400 rows × 5 columns

\n", 179 | "
" 180 | ], 181 | "text/plain": [ 182 | " User ID Gender Age EstimatedSalary Purchased\n", 183 | "0 15624510 Male 19 19000 0\n", 184 | "1 15810944 Male 35 20000 0\n", 185 | "2 15668575 Female 26 43000 0\n", 186 | "3 15603246 Female 27 57000 0\n", 187 | "4 15804002 Male 19 76000 0\n", 188 | ".. ... ... ... ... ...\n", 189 | "395 15691863 Female 46 41000 1\n", 190 | "396 15706071 Male 51 23000 1\n", 191 | "397 15654296 Female 50 20000 1\n", 192 | "398 15755018 Male 36 33000 0\n", 193 | "399 15594041 Female 49 36000 1\n", 194 | "\n", 195 | "[400 rows x 5 columns]" 196 | ] 197 | }, 198 | "metadata": { 199 | "tags": [] 200 | }, 201 | "execution_count": 3 202 | } 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "metadata": { 208 | "colab": { 209 | "base_uri": "https://localhost:8080/" 210 | }, 211 | "id": "I_6CMjD-7lZB", 212 | "outputId": "93478686-3263-42cc-823b-f4cc611b1d5c" 213 | }, 214 | "source": [ 215 | "data.info()" 216 | ], 217 | "execution_count": 4, 218 | "outputs": [ 219 | { 220 | "output_type": "stream", 221 | "text": [ 222 | "\n", 223 | "RangeIndex: 400 entries, 0 to 399\n", 224 | "Data columns (total 5 columns):\n", 225 | " # Column Non-Null Count Dtype \n", 226 | "--- ------ -------------- ----- \n", 227 | " 0 User ID 400 non-null int64 \n", 228 | " 1 Gender 400 non-null object\n", 229 | " 2 Age 400 non-null int64 \n", 230 | " 3 EstimatedSalary 400 non-null int64 \n", 231 | " 4 Purchased 400 non-null int64 \n", 232 | "dtypes: int64(4), object(1)\n", 233 | "memory usage: 15.8+ KB\n" 234 | ], 235 | "name": "stdout" 236 | } 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "metadata": { 242 | "colab": { 243 | "base_uri": "https://localhost:8080/", 244 | "height": 297 245 | }, 246 | "id": "Wyqqp2IL7rnE", 247 | "outputId": "a22f4e4a-acab-4566-dc9b-5c62ed74e037" 248 | }, 249 | "source": [ 250 | "data.describe()" 251 | ], 252 | "execution_count": 5, 253 | "outputs": [ 254 | { 255 | "output_type": "execute_result", 256 | "data": { 257 | "text/html": [ 258 | "
\n", 259 | "\n", 272 | "\n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | "
User IDAgeEstimatedSalaryPurchased
count4.000000e+02400.000000400.000000400.000000
mean1.569154e+0737.65500069742.5000000.357500
std7.165832e+0410.48287734096.9602820.479864
min1.556669e+0718.00000015000.0000000.000000
25%1.562676e+0729.75000043000.0000000.000000
50%1.569434e+0737.00000070000.0000000.000000
75%1.575036e+0746.00000088000.0000001.000000
max1.581524e+0760.000000150000.0000001.000000
\n", 341 | "
" 342 | ], 343 | "text/plain": [ 344 | " User ID Age EstimatedSalary Purchased\n", 345 | "count 4.000000e+02 400.000000 400.000000 400.000000\n", 346 | "mean 1.569154e+07 37.655000 69742.500000 0.357500\n", 347 | "std 7.165832e+04 10.482877 34096.960282 0.479864\n", 348 | "min 1.556669e+07 18.000000 15000.000000 0.000000\n", 349 | "25% 1.562676e+07 29.750000 43000.000000 0.000000\n", 350 | "50% 1.569434e+07 37.000000 70000.000000 0.000000\n", 351 | "75% 1.575036e+07 46.000000 88000.000000 1.000000\n", 352 | "max 1.581524e+07 60.000000 150000.000000 1.000000" 353 | ] 354 | }, 355 | "metadata": { 356 | "tags": [] 357 | }, 358 | "execution_count": 5 359 | } 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "metadata": { 365 | "id": "2FhZMbKL7wlN" 366 | }, 367 | "source": [ 368 | "def preprocess_inputs(df, engineer_features=False):\n", 369 | " df = df.copy()\n", 370 | " #Drop User ID column\n", 371 | " df = df.drop('User ID', axis=1)\n", 372 | " #Binary encode gender column\n", 373 | " df['Gender'] = df['Gender'].replace({\"Female\":0, \"Male\":1})\n", 374 | "\n", 375 | " #feature engineering\n", 376 | " if engineer_features == True:\n", 377 | " threshold_salary = df['EstimatedSalary'].quantile(0.95)\n", 378 | " df['High Income'] = df['EstimatedSalary'].apply(lambda x: 1 if x>= threshold_salary else 0)\n", 379 | "\n", 380 | " old_age_threshold = df['Age'].quantile(0.75)\n", 381 | " young_age_threshold = df['Age'].quantile(0.25)\n", 382 | "\n", 383 | " df['Old Age'] = df['Age'].apply(lambda x:1 if x>= old_age_threshold else 0)\n", 384 | " df['Young Age'] = df['Age'].apply(lambda x:1 if x <= young_age_threshold else 0)\n", 385 | "\n", 386 | " \n", 387 | "\n", 388 | "\n", 389 | "\n", 390 | " #Drop and Split the dataset\n", 391 | " y = df['Purchased']\n", 392 | " X = df.drop('Purchased', axis=1)\n", 393 | "\n", 394 | " X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, shuffle=True, random_state=42)\n", 395 | " #scale the data\n", 396 | " scaler = StandardScaler()\n", 397 | " scaler.fit(X_train)\n", 398 | " X_train = pd.DataFrame(scaler.transform(X_train), index= X_train.index, columns = X_train.columns)\n", 399 | " X_test = pd.DataFrame(scaler.transform(X_test), index = X_test.index, columns = X_test.columns)\n", 400 | " return X_train, X_test, y_train, y_test" 401 | ], 402 | "execution_count": 47, 403 | "outputs": [] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "metadata": { 408 | "id": "s1xqjGpw8E23" 409 | }, 410 | "source": [ 411 | "X_train, X_test, y_train, y_test = preprocess_inputs(data, engineer_features=False)" 412 | ], 413 | "execution_count": 35, 414 | "outputs": [] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "metadata": { 419 | "colab": { 420 | "base_uri": "https://localhost:8080/", 421 | "height": 419 422 | }, 423 | "id": "BdlPKFDp8P5W", 424 | "outputId": "ced3f438-e7d6-4059-b0b0-ad7cbe94835f" 425 | }, 426 | "source": [ 427 | "X_train" 428 | ], 429 | "execution_count": 36, 430 | "outputs": [ 431 | { 432 | "output_type": "execute_result", 433 | "data": { 434 | "text/html": [ 435 | "
\n", 436 | "\n", 449 | "\n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | "
GenderAgeEstimatedSalary
247-0.9867541.8925891.521894
110-0.9867540.1250380.032132
161.0134230.910616-1.311575
661.013423-1.347922-1.486841
153-0.986754-0.169554-0.581299
............
71-0.986754-1.347922-1.253153
106-0.986754-1.151527-1.019465
270-0.9867540.5178271.843215
3481.0134230.1250380.207398
102-0.986754-0.5623430.470297
\n", 527 | "

300 rows × 3 columns

\n", 528 | "
" 529 | ], 530 | "text/plain": [ 531 | " Gender Age EstimatedSalary\n", 532 | "247 -0.986754 1.892589 1.521894\n", 533 | "110 -0.986754 0.125038 0.032132\n", 534 | "16 1.013423 0.910616 -1.311575\n", 535 | "66 1.013423 -1.347922 -1.486841\n", 536 | "153 -0.986754 -0.169554 -0.581299\n", 537 | ".. ... ... ...\n", 538 | "71 -0.986754 -1.347922 -1.253153\n", 539 | "106 -0.986754 -1.151527 -1.019465\n", 540 | "270 -0.986754 0.517827 1.843215\n", 541 | "348 1.013423 0.125038 0.207398\n", 542 | "102 -0.986754 -0.562343 0.470297\n", 543 | "\n", 544 | "[300 rows x 3 columns]" 545 | ] 546 | }, 547 | "metadata": { 548 | "tags": [] 549 | }, 550 | "execution_count": 36 551 | } 552 | ] 553 | }, 554 | { 555 | "cell_type": "code", 556 | "metadata": { 557 | "colab": { 558 | "base_uri": "https://localhost:8080/", 559 | "height": 419 560 | }, 561 | "id": "z8YZTJvV8Qrr", 562 | "outputId": "80fd5274-6c28-4920-a65a-0695264f54c0" 563 | }, 564 | "source": [ 565 | "X_test" 566 | ], 567 | "execution_count": 37, 568 | "outputs": [ 569 | { 570 | "output_type": "execute_result", 571 | "data": { 572 | "text/html": [ 573 | "
\n", 574 | "\n", 587 | "\n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | "
GenderAgeEstimatedSalary
209-0.9867540.812419-1.399208
280-0.9867542.0889840.528719
33-0.986754-0.955132-0.756565
210-0.9867541.0088140.762408
93-0.986754-0.856935-1.223942
............
314-0.9867540.1250380.265820
3731.0134232.0889841.755582
3801.0134230.419630-0.172345
239-0.9867541.4998002.135325
751.013423-0.3659491.229784
\n", 665 | "

100 rows × 3 columns

\n", 666 | "
" 667 | ], 668 | "text/plain": [ 669 | " Gender Age EstimatedSalary\n", 670 | "209 -0.986754 0.812419 -1.399208\n", 671 | "280 -0.986754 2.088984 0.528719\n", 672 | "33 -0.986754 -0.955132 -0.756565\n", 673 | "210 -0.986754 1.008814 0.762408\n", 674 | "93 -0.986754 -0.856935 -1.223942\n", 675 | ".. ... ... ...\n", 676 | "314 -0.986754 0.125038 0.265820\n", 677 | "373 1.013423 2.088984 1.755582\n", 678 | "380 1.013423 0.419630 -0.172345\n", 679 | "239 -0.986754 1.499800 2.135325\n", 680 | "75 1.013423 -0.365949 1.229784\n", 681 | "\n", 682 | "[100 rows x 3 columns]" 683 | ] 684 | }, 685 | "metadata": { 686 | "tags": [] 687 | }, 688 | "execution_count": 37 689 | } 690 | ] 691 | }, 692 | { 693 | "cell_type": "markdown", 694 | "metadata": { 695 | "id": "kehN9a3aA9D1" 696 | }, 697 | "source": [ 698 | "## Training the Model without Feature Engineering" 699 | ] 700 | }, 701 | { 702 | "cell_type": "code", 703 | "metadata": { 704 | "id": "oGs1EXkq9K4W" 705 | }, 706 | "source": [ 707 | "model = LogisticRegression()\n", 708 | "model = model.fit(X_train, y_train)" 709 | ], 710 | "execution_count": 38, 711 | "outputs": [] 712 | }, 713 | { 714 | "cell_type": "code", 715 | "metadata": { 716 | "colab": { 717 | "base_uri": "https://localhost:8080/" 718 | }, 719 | "id": "jUQoo7CJBK56", 720 | "outputId": "0af615d6-cc10-45e8-935c-ba6b0a3363ce" 721 | }, 722 | "source": [ 723 | "model.score(X_test, y_test)" 724 | ], 725 | "execution_count": 39, 726 | "outputs": [ 727 | { 728 | "output_type": "execute_result", 729 | "data": { 730 | "text/plain": [ 731 | "0.88" 732 | ] 733 | }, 734 | "metadata": { 735 | "tags": [] 736 | }, 737 | "execution_count": 39 738 | } 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "metadata": { 744 | "id": "IZbOfcoLBOPc" 745 | }, 746 | "source": [ 747 | "y_pred = model.predict(X_test)" 748 | ], 749 | "execution_count": 40, 750 | "outputs": [] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "metadata": { 755 | "colab": { 756 | "base_uri": "https://localhost:8080/" 757 | }, 758 | "id": "yD8M2_c2Bfmf", 759 | "outputId": "7d038a2a-c74b-4732-f457-bcd57ef231d0" 760 | }, 761 | "source": [ 762 | "y_pred" 763 | ], 764 | "execution_count": 41, 765 | "outputs": [ 766 | { 767 | "output_type": "execute_result", 768 | "data": { 769 | "text/plain": [ 770 | "array([0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,\n", 771 | " 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 772 | " 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n", 773 | " 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,\n", 774 | " 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0])" 775 | ] 776 | }, 777 | "metadata": { 778 | "tags": [] 779 | }, 780 | "execution_count": 41 781 | } 782 | ] 783 | }, 784 | { 785 | "cell_type": "markdown", 786 | "metadata": { 787 | "id": "UU2e47K4DaNC" 788 | }, 789 | "source": [ 790 | "## Training with Feature Engineering" 791 | ] 792 | }, 793 | { 794 | "cell_type": "code", 795 | "metadata": { 796 | "colab": { 797 | "base_uri": "https://localhost:8080/", 798 | "height": 419 799 | }, 800 | "id": "kciVgmnjBgyz", 801 | "outputId": "81dfc820-3776-4d4c-c282-d1c1e56bfba7" 802 | }, 803 | "source": [ 804 | "X_train, X_test, y_train, y_test = preprocess_inputs(data, engineer_features=True)\n", 805 | "X_train" 806 | ], 807 | "execution_count": 48, 808 | "outputs": [ 809 | { 810 | "output_type": "execute_result", 811 | "data": { 812 | "text/html": [ 813 | "
\n", 814 | "\n", 827 | "\n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | "
GenderAgeEstimatedSalaryHigh IncomeOld AgeYoung Age
247-0.9867541.8925891.521894-0.2294161.763403-0.546536
110-0.9867540.1250380.032132-0.229416-0.567085-0.546536
161.0134230.910616-1.311575-0.2294161.763403-0.546536
661.013423-1.347922-1.486841-0.229416-0.5670851.829707
153-0.986754-0.169554-0.581299-0.229416-0.567085-0.546536
.....................
71-0.986754-1.347922-1.253153-0.229416-0.5670851.829707
106-0.986754-1.151527-1.019465-0.229416-0.5670851.829707
270-0.9867540.5178271.843215-0.229416-0.567085-0.546536
3481.0134230.1250380.207398-0.229416-0.567085-0.546536
102-0.986754-0.5623430.470297-0.229416-0.567085-0.546536
\n", 941 | "

300 rows × 6 columns

\n", 942 | "
" 943 | ], 944 | "text/plain": [ 945 | " Gender Age EstimatedSalary High Income Old Age Young Age\n", 946 | "247 -0.986754 1.892589 1.521894 -0.229416 1.763403 -0.546536\n", 947 | "110 -0.986754 0.125038 0.032132 -0.229416 -0.567085 -0.546536\n", 948 | "16 1.013423 0.910616 -1.311575 -0.229416 1.763403 -0.546536\n", 949 | "66 1.013423 -1.347922 -1.486841 -0.229416 -0.567085 1.829707\n", 950 | "153 -0.986754 -0.169554 -0.581299 -0.229416 -0.567085 -0.546536\n", 951 | ".. ... ... ... ... ... ...\n", 952 | "71 -0.986754 -1.347922 -1.253153 -0.229416 -0.567085 1.829707\n", 953 | "106 -0.986754 -1.151527 -1.019465 -0.229416 -0.567085 1.829707\n", 954 | "270 -0.986754 0.517827 1.843215 -0.229416 -0.567085 -0.546536\n", 955 | "348 1.013423 0.125038 0.207398 -0.229416 -0.567085 -0.546536\n", 956 | "102 -0.986754 -0.562343 0.470297 -0.229416 -0.567085 -0.546536\n", 957 | "\n", 958 | "[300 rows x 6 columns]" 959 | ] 960 | }, 961 | "metadata": { 962 | "tags": [] 963 | }, 964 | "execution_count": 48 965 | } 966 | ] 967 | }, 968 | { 969 | "cell_type": "code", 970 | "metadata": { 971 | "id": "7RfPhpQwDtIw" 972 | }, 973 | "source": [ 974 | "model_1 = LogisticRegression()\n", 975 | "model_1 = model_1.fit(X_train, y_train)" 976 | ], 977 | "execution_count": 49, 978 | "outputs": [] 979 | }, 980 | { 981 | "cell_type": "code", 982 | "metadata": { 983 | "colab": { 984 | "base_uri": "https://localhost:8080/" 985 | }, 986 | "id": "kLT0iQTRD748", 987 | "outputId": "dc48abd8-5ce4-45e3-830c-306bcca8817b" 988 | }, 989 | "source": [ 990 | "model_1.score(X_test, y_test)" 991 | ], 992 | "execution_count": 50, 993 | "outputs": [ 994 | { 995 | "output_type": "execute_result", 996 | "data": { 997 | "text/plain": [ 998 | "0.93" 999 | ] 1000 | }, 1001 | "metadata": { 1002 | "tags": [] 1003 | }, 1004 | "execution_count": 50 1005 | } 1006 | ] 1007 | }, 1008 | { 1009 | "cell_type": "code", 1010 | "metadata": { 1011 | "id": "yge-NHQbD-lq" 1012 | }, 1013 | "source": [ 1014 | "" 1015 | ], 1016 | "execution_count": null, 1017 | "outputs": [] 1018 | } 1019 | ] 1020 | } --------------------------------------------------------------------------------