├── README.md ├── 50_startups Success Rate Prediction ├── 50_Startups.csv └── Multiple_Linear_Regression.ipynb ├── House_Price_Prediction Detailed Analysis └── Feature_Selection.ipynb └── Fraud Detection Using ML └── fraud_detection.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Machine Learning Projects 2 | 3 | Welcome to my Machine Learning Projects repository! This repository contains a collection of machine learning projects that cover various topics. 4 | -------------------------------------------------------------------------------- /50_startups Success Rate Prediction/50_Startups.csv: -------------------------------------------------------------------------------- 1 | R&D Spend,Administration,Marketing Spend,State,Profit 2 | 165349.2,136897.8,471784.1,New York,192261.83 3 | 162597.7,151377.59,443898.53,California,191792.06 4 | 153441.51,101145.55,407934.54,Florida,191050.39 5 | 144372.41,118671.85,383199.62,New York,182901.99 6 | 142107.34,91391.77,366168.42,Florida,166187.94 7 | 131876.9,99814.71,362861.36,New York,156991.12 8 | 134615.46,147198.87,127716.82,California,156122.51 9 | 130298.13,145530.06,323876.68,Florida,155752.6 10 | 120542.52,148718.95,311613.29,New York,152211.77 11 | 123334.88,108679.17,304981.62,California,149759.96 12 | 101913.08,110594.11,229160.95,Florida,146121.95 13 | 100671.96,91790.61,249744.55,California,144259.4 14 | 93863.75,127320.38,249839.44,Florida,141585.52 15 | 91992.39,135495.07,252664.93,California,134307.35 16 | 119943.24,156547.42,256512.92,Florida,132602.65 17 | 114523.61,122616.84,261776.23,New York,129917.04 18 | 78013.11,121597.55,264346.06,California,126992.93 19 | 94657.16,145077.58,282574.31,New York,125370.37 20 | 91749.16,114175.79,294919.57,Florida,124266.9 21 | 86419.7,153514.11,0,New York,122776.86 22 | 76253.86,113867.3,298664.47,California,118474.03 23 | 78389.47,153773.43,299737.29,New York,111313.02 24 | 73994.56,122782.75,303319.26,Florida,110352.25 25 | 67532.53,105751.03,304768.73,Florida,108733.99 26 | 77044.01,99281.34,140574.81,New York,108552.04 27 | 64664.71,139553.16,137962.62,California,107404.34 28 | 75328.87,144135.98,134050.07,Florida,105733.54 29 | 72107.6,127864.55,353183.81,New York,105008.31 30 | 66051.52,182645.56,118148.2,Florida,103282.38 31 | 65605.48,153032.06,107138.38,New York,101004.64 32 | 61994.48,115641.28,91131.24,Florida,99937.59 33 | 61136.38,152701.92,88218.23,New York,97483.56 34 | 63408.86,129219.61,46085.25,California,97427.84 35 | 55493.95,103057.49,214634.81,Florida,96778.92 36 | 46426.07,157693.92,210797.67,California,96712.8 37 | 46014.02,85047.44,205517.64,New York,96479.51 38 | 28663.76,127056.21,201126.82,Florida,90708.19 39 | 44069.95,51283.14,197029.42,California,89949.14 40 | 20229.59,65947.93,185265.1,New York,81229.06 41 | 38558.51,82982.09,174999.3,California,81005.76 42 | 28754.33,118546.05,172795.67,California,78239.91 43 | 27892.92,84710.77,164470.71,Florida,77798.83 44 | 23640.93,96189.63,148001.11,California,71498.49 45 | 15505.73,127382.3,35534.17,New York,69758.98 46 | 22177.74,154806.14,28334.72,California,65200.33 47 | 1000.23,124153.04,1903.93,New York,64926.08 48 | 1315.46,115816.21,297114.46,Florida,49490.75 49 | 0,135426.92,0,California,42559.73 50 | 542.05,51743.15,0,New York,35673.41 51 | 0,116983.8,45173.06,California,14681.4 -------------------------------------------------------------------------------- /50_startups Success Rate Prediction/Multiple_Linear_Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "df = pd.read_csv('Startups.csv')" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/html": [ 31 | "
\n", 32 | "\n", 45 | "\n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | "
R&D SpendAdministrationMarketing SpendStateProfit
0165349.20136897.80471784.10New York192261.83
1162597.70151377.59443898.53California191792.06
2153441.51101145.55407934.54Florida191050.39
3144372.41118671.85383199.62New York182901.99
4142107.3491391.77366168.42Florida166187.94
\n", 99 | "
" 100 | ], 101 | "text/plain": [ 102 | " R&D Spend Administration Marketing Spend State Profit\n", 103 | "0 165349.20 136897.80 471784.10 New York 192261.83\n", 104 | "1 162597.70 151377.59 443898.53 California 191792.06\n", 105 | "2 153441.51 101145.55 407934.54 Florida 191050.39\n", 106 | "3 144372.41 118671.85 383199.62 New York 182901.99\n", 107 | "4 142107.34 91391.77 366168.42 Florida 166187.94" 108 | ] 109 | }, 110 | "execution_count": 3, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "df.head()" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 4, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "(50, 5)" 128 | ] 129 | }, 130 | "execution_count": 4, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "df.shape" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 5, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "X = df.iloc[:, :-1] # independent data\n", 146 | "y = df.iloc[:, -1] # dependent data" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 6, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "text/plain": [ 157 | "New York 17\n", 158 | "California 17\n", 159 | "Florida 16\n", 160 | "Name: State, dtype: int64" 161 | ] 162 | }, 163 | "execution_count": 6, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "df['State'].value_counts() ## count the cities" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 7, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "text/plain": [ 180 | "R&D Spend 0\n", 181 | "Administration 0\n", 182 | "Marketing Spend 0\n", 183 | "State 0\n", 184 | "Profit 0\n", 185 | "dtype: int64" 186 | ] 187 | }, 188 | "execution_count": 7, 189 | "metadata": {}, 190 | "output_type": "execute_result" 191 | } 192 | ], 193 | "source": [ 194 | "df.isnull().sum() ## checking null values" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 9, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "states = pd.get_dummies(X['State'], drop_first=True) # Converting State column into one-hot-encoding" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 12, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "X = X.drop('State', axis=1) # droping state column" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 14, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "X = pd.concat([X, states], axis=1)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 15, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/html": [ 232 | "
\n", 233 | "\n", 246 | "\n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | "
R&D SpendAdministrationMarketing SpendFloridaNew York
0165349.20136897.80471784.1001
1162597.70151377.59443898.5300
2153441.51101145.55407934.5410
3144372.41118671.85383199.6201
4142107.3491391.77366168.4210
\n", 300 | "
" 301 | ], 302 | "text/plain": [ 303 | " R&D Spend Administration Marketing Spend Florida New York\n", 304 | "0 165349.20 136897.80 471784.10 0 1\n", 305 | "1 162597.70 151377.59 443898.53 0 0\n", 306 | "2 153441.51 101145.55 407934.54 1 0\n", 307 | "3 144372.41 118671.85 383199.62 0 1\n", 308 | "4 142107.34 91391.77 366168.42 1 0" 309 | ] 310 | }, 311 | "execution_count": 15, 312 | "metadata": {}, 313 | "output_type": "execute_result" 314 | } 315 | ], 316 | "source": [ 317 | "X.head()" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 16, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "from sklearn.model_selection import train_test_split" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 17, 332 | "metadata": {}, 333 | "outputs": [], 334 | "source": [ 335 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 18, 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [ 344 | "from sklearn.linear_model import LinearRegression" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 19, 350 | "metadata": {}, 351 | "outputs": [], 352 | "source": [ 353 | "regressor = LinearRegression()" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": 20, 359 | "metadata": {}, 360 | "outputs": [ 361 | { 362 | "data": { 363 | "text/plain": [ 364 | "LinearRegression()" 365 | ] 366 | }, 367 | "execution_count": 20, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "regressor.fit(X_train, y_train)" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 21, 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "y_pred = regressor.predict(X_test)" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 24, 388 | "metadata": {}, 389 | "outputs": [ 390 | { 391 | "name": "stdout", 392 | "output_type": "stream", 393 | "text": [ 394 | "[103015.20159796 132582.27760816 132447.73845174 71976.09851258\n", 395 | " 178537.48221055]\n", 396 | "28 103282.38\n", 397 | "11 144259.40\n", 398 | "10 146121.95\n", 399 | "41 77798.83\n", 400 | "2 191050.39\n", 401 | "27 105008.31\n", 402 | "38 81229.06\n", 403 | "31 97483.56\n", 404 | "22 110352.25\n", 405 | "4 166187.94\n", 406 | "Name: Profit, dtype: float64\n" 407 | ] 408 | } 409 | ], 410 | "source": [ 411 | "print(y_pred[:5])\n", 412 | "print(y_test[:])" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": 28, 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [ 421 | "from sklearn.metrics import r2_score" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 29, 427 | "metadata": {}, 428 | "outputs": [], 429 | "source": [ 430 | "score = r2_score(y_test, y_pred)" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 30, 436 | "metadata": {}, 437 | "outputs": [ 438 | { 439 | "name": "stdout", 440 | "output_type": "stream", 441 | "text": [ 442 | "0.9347068473282423\n" 443 | ] 444 | } 445 | ], 446 | "source": [ 447 | "print(score)" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [] 456 | } 457 | ], 458 | "metadata": { 459 | "kernelspec": { 460 | "display_name": "Python 3", 461 | "language": "python", 462 | "name": "python3" 463 | }, 464 | "language_info": { 465 | "codemirror_mode": { 466 | "name": "ipython", 467 | "version": 3 468 | }, 469 | "file_extension": ".py", 470 | "mimetype": "text/x-python", 471 | "name": "python", 472 | "nbconvert_exporter": "python", 473 | "pygments_lexer": "ipython3", 474 | "version": "3.8.5" 475 | } 476 | }, 477 | "nbformat": 4, 478 | "nbformat_minor": 4 479 | } 480 | -------------------------------------------------------------------------------- /House_Price_Prediction Detailed Analysis/Feature_Selection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "%matplotlib inline\n", 14 | "\n", 15 | "# for feature selection\n", 16 | "from sklearn.linear_model import Lasso\n", 17 | "from sklearn.feature_selection import SelectFromModel\n", 18 | "\n", 19 | "# to visualize all the column of the dataframe\n", 20 | "pd.pandas.set_option('display.max_columns', None)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "dataset = pd.read_csv('train.csv')" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/html": [ 40 | "
\n", 41 | "\n", 54 | "\n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | "
Unnamed: 0IdSalePriceMSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilitiesLotConfigLandSlopeNeighborhoodCondition1Condition2BldgTypeHouseStyleOverallQualOverallCondYearBuiltYearRemodAddRoofStyleRoofMatlExterior1stExterior2ndMasVnrTypeMasVnrAreaExterQualExterCondFoundationBsmtQualBsmtCondBsmtExposureBsmtFinType1BsmtFinSF1BsmtFinType2BsmtFinSF2BsmtUnfSFTotalBsmtSFHeatingHeatingQCCentralAirElectrical1stFlrSF2ndFlrSFLowQualFinSFGrLivAreaBsmtFullBathBsmtHalfBathFullBathHalfBathBedroomAbvGrKitchenAbvGrKitchenQualTotRmsAbvGrdFunctionalFireplacesFireplaceQuGarageTypeGarageYrBltGarageFinishGarageCarsGarageAreaGarageQualGarageCondPavedDriveWoodDeckSFOpenPorchSFEnclosedPorch3SsnPorchScreenPorchPoolAreaPoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionLotFrontagenanMasVnrAreananGarageYrBltnan
00112.2476940.2352940.50.4182080.3663440.00.51.01.00.00.750.00.1818180.40.00.00.40.6666670.5000.0367650.0983610.00.00.80.80.250.122500.3333331.00.500.51.001.000.3333330.1250891.00.00.0642120.1400980.00.01.01.00.3561550.4135590.00.5777120.3333330.00.6666670.50.3750.3333330.6666670.5000001.00.0000000.60.00.0467290.6666670.500.3864601.01.01.00.0000000.1115170.0000000.00.00.00.00.50.00.00.0909090.501.00.50.00.00.0
11212.1090110.0000000.50.4950640.3913170.00.51.01.00.00.500.00.7272730.20.00.00.20.5555560.8750.2279410.5245900.00.00.40.40.500.000001.0000001.00.250.51.000.250.0000000.1732811.00.00.1215750.2065470.00.01.01.00.5030560.0000000.00.4702450.0000000.50.6666670.00.3750.3333331.0000000.3333331.00.3333331.00.00.2897200.6666670.500.3244011.01.01.00.3477250.0000000.0000000.00.00.00.00.50.00.00.3636360.251.00.50.00.00.0
22312.3171670.2352940.50.4349090.4223590.00.50.01.00.00.750.00.1818180.40.00.00.40.6666670.5000.0514710.1147540.00.00.80.80.250.101250.3333331.00.500.51.000.750.3333330.0861091.00.00.1857880.1505730.00.01.01.00.3834410.4193700.00.5930950.3333330.00.6666670.50.3750.3333330.6666670.3333331.00.3333331.00.00.0654210.6666670.500.4287731.01.01.00.0000000.0767820.0000000.00.00.00.00.50.00.00.7272730.501.00.50.00.00.0
33411.8493980.2941180.50.3885810.3902950.00.50.01.00.00.000.00.2272730.40.00.00.40.6666670.5000.6691180.6065570.00.00.91.00.500.000001.0000001.00.001.00.251.000.0000000.0382711.00.00.2311640.1237320.00.51.01.00.3999410.3661020.00.5791570.3333330.00.3333330.00.3750.3333330.6666670.4166671.00.3333330.40.60.0747661.0000000.750.4527501.01.01.00.0000000.0639850.4927540.00.00.00.00.50.00.00.0909090.001.00.00.00.00.0
44512.4292160.2352940.50.5131230.4687610.00.50.01.00.00.500.00.5909090.40.00.00.40.7777780.5000.0588240.1475410.00.00.80.80.250.218750.3333331.00.500.51.000.000.3333330.1160521.00.00.2097600.1873980.00.01.01.00.4662370.5099270.00.6665230.3333330.00.6666670.50.5000.3333330.6666670.5833331.00.3333331.00.00.0747660.6666670.750.5895631.01.01.00.2240370.1535650.0000000.00.00.00.00.50.00.01.0000000.501.00.50.00.00.0
\n", 588 | "
" 589 | ], 590 | "text/plain": [ 591 | " Unnamed: 0 Id SalePrice MSSubClass MSZoning LotFrontage LotArea \\\n", 592 | "0 0 1 12.247694 0.235294 0.5 0.418208 0.366344 \n", 593 | "1 1 2 12.109011 0.000000 0.5 0.495064 0.391317 \n", 594 | "2 2 3 12.317167 0.235294 0.5 0.434909 0.422359 \n", 595 | "3 3 4 11.849398 0.294118 0.5 0.388581 0.390295 \n", 596 | "4 4 5 12.429216 0.235294 0.5 0.513123 0.468761 \n", 597 | "\n", 598 | " Street Alley LotShape LandContour Utilities LotConfig LandSlope \\\n", 599 | "0 0.0 0.5 1.0 1.0 0.0 0.75 0.0 \n", 600 | "1 0.0 0.5 1.0 1.0 0.0 0.50 0.0 \n", 601 | "2 0.0 0.5 0.0 1.0 0.0 0.75 0.0 \n", 602 | "3 0.0 0.5 0.0 1.0 0.0 0.00 0.0 \n", 603 | "4 0.0 0.5 0.0 1.0 0.0 0.50 0.0 \n", 604 | "\n", 605 | " Neighborhood Condition1 Condition2 BldgType HouseStyle OverallQual \\\n", 606 | "0 0.181818 0.4 0.0 0.0 0.4 0.666667 \n", 607 | "1 0.727273 0.2 0.0 0.0 0.2 0.555556 \n", 608 | "2 0.181818 0.4 0.0 0.0 0.4 0.666667 \n", 609 | "3 0.227273 0.4 0.0 0.0 0.4 0.666667 \n", 610 | "4 0.590909 0.4 0.0 0.0 0.4 0.777778 \n", 611 | "\n", 612 | " OverallCond YearBuilt YearRemodAdd RoofStyle RoofMatl Exterior1st \\\n", 613 | "0 0.500 0.036765 0.098361 0.0 0.0 0.8 \n", 614 | "1 0.875 0.227941 0.524590 0.0 0.0 0.4 \n", 615 | "2 0.500 0.051471 0.114754 0.0 0.0 0.8 \n", 616 | "3 0.500 0.669118 0.606557 0.0 0.0 0.9 \n", 617 | "4 0.500 0.058824 0.147541 0.0 0.0 0.8 \n", 618 | "\n", 619 | " Exterior2nd MasVnrType MasVnrArea ExterQual ExterCond Foundation \\\n", 620 | "0 0.8 0.25 0.12250 0.333333 1.0 0.50 \n", 621 | "1 0.4 0.50 0.00000 1.000000 1.0 0.25 \n", 622 | "2 0.8 0.25 0.10125 0.333333 1.0 0.50 \n", 623 | "3 1.0 0.50 0.00000 1.000000 1.0 0.00 \n", 624 | "4 0.8 0.25 0.21875 0.333333 1.0 0.50 \n", 625 | "\n", 626 | " BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinSF1 BsmtFinType2 \\\n", 627 | "0 0.5 1.00 1.00 0.333333 0.125089 1.0 \n", 628 | "1 0.5 1.00 0.25 0.000000 0.173281 1.0 \n", 629 | "2 0.5 1.00 0.75 0.333333 0.086109 1.0 \n", 630 | "3 1.0 0.25 1.00 0.000000 0.038271 1.0 \n", 631 | "4 0.5 1.00 0.00 0.333333 0.116052 1.0 \n", 632 | "\n", 633 | " BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating HeatingQC CentralAir \\\n", 634 | "0 0.0 0.064212 0.140098 0.0 0.0 1.0 \n", 635 | "1 0.0 0.121575 0.206547 0.0 0.0 1.0 \n", 636 | "2 0.0 0.185788 0.150573 0.0 0.0 1.0 \n", 637 | "3 0.0 0.231164 0.123732 0.0 0.5 1.0 \n", 638 | "4 0.0 0.209760 0.187398 0.0 0.0 1.0 \n", 639 | "\n", 640 | " Electrical 1stFlrSF 2ndFlrSF LowQualFinSF GrLivArea BsmtFullBath \\\n", 641 | "0 1.0 0.356155 0.413559 0.0 0.577712 0.333333 \n", 642 | "1 1.0 0.503056 0.000000 0.0 0.470245 0.000000 \n", 643 | "2 1.0 0.383441 0.419370 0.0 0.593095 0.333333 \n", 644 | "3 1.0 0.399941 0.366102 0.0 0.579157 0.333333 \n", 645 | "4 1.0 0.466237 0.509927 0.0 0.666523 0.333333 \n", 646 | "\n", 647 | " BsmtHalfBath FullBath HalfBath BedroomAbvGr KitchenAbvGr KitchenQual \\\n", 648 | "0 0.0 0.666667 0.5 0.375 0.333333 0.666667 \n", 649 | "1 0.5 0.666667 0.0 0.375 0.333333 1.000000 \n", 650 | "2 0.0 0.666667 0.5 0.375 0.333333 0.666667 \n", 651 | "3 0.0 0.333333 0.0 0.375 0.333333 0.666667 \n", 652 | "4 0.0 0.666667 0.5 0.500 0.333333 0.666667 \n", 653 | "\n", 654 | " TotRmsAbvGrd Functional Fireplaces FireplaceQu GarageType GarageYrBlt \\\n", 655 | "0 0.500000 1.0 0.000000 0.6 0.0 0.046729 \n", 656 | "1 0.333333 1.0 0.333333 1.0 0.0 0.289720 \n", 657 | "2 0.333333 1.0 0.333333 1.0 0.0 0.065421 \n", 658 | "3 0.416667 1.0 0.333333 0.4 0.6 0.074766 \n", 659 | "4 0.583333 1.0 0.333333 1.0 0.0 0.074766 \n", 660 | "\n", 661 | " GarageFinish GarageCars GarageArea GarageQual GarageCond PavedDrive \\\n", 662 | "0 0.666667 0.50 0.386460 1.0 1.0 1.0 \n", 663 | "1 0.666667 0.50 0.324401 1.0 1.0 1.0 \n", 664 | "2 0.666667 0.50 0.428773 1.0 1.0 1.0 \n", 665 | "3 1.000000 0.75 0.452750 1.0 1.0 1.0 \n", 666 | "4 0.666667 0.75 0.589563 1.0 1.0 1.0 \n", 667 | "\n", 668 | " WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch PoolArea \\\n", 669 | "0 0.000000 0.111517 0.000000 0.0 0.0 0.0 \n", 670 | "1 0.347725 0.000000 0.000000 0.0 0.0 0.0 \n", 671 | "2 0.000000 0.076782 0.000000 0.0 0.0 0.0 \n", 672 | "3 0.000000 0.063985 0.492754 0.0 0.0 0.0 \n", 673 | "4 0.224037 0.153565 0.000000 0.0 0.0 0.0 \n", 674 | "\n", 675 | " PoolQC Fence MiscFeature MiscVal MoSold YrSold SaleType \\\n", 676 | "0 0.0 0.5 0.0 0.0 0.090909 0.50 1.0 \n", 677 | "1 0.0 0.5 0.0 0.0 0.363636 0.25 1.0 \n", 678 | "2 0.0 0.5 0.0 0.0 0.727273 0.50 1.0 \n", 679 | "3 0.0 0.5 0.0 0.0 0.090909 0.00 1.0 \n", 680 | "4 0.0 0.5 0.0 0.0 1.000000 0.50 1.0 \n", 681 | "\n", 682 | " SaleCondition LotFrontagenan MasVnrAreanan GarageYrBltnan \n", 683 | "0 0.5 0.0 0.0 0.0 \n", 684 | "1 0.5 0.0 0.0 0.0 \n", 685 | "2 0.5 0.0 0.0 0.0 \n", 686 | "3 0.0 0.0 0.0 0.0 \n", 687 | "4 0.5 0.0 0.0 0.0 " 688 | ] 689 | }, 690 | "execution_count": 3, 691 | "metadata": {}, 692 | "output_type": "execute_result" 693 | } 694 | ], 695 | "source": [ 696 | "dataset.head()" 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": 4, 702 | "metadata": {}, 703 | "outputs": [], 704 | "source": [ 705 | "# capture the dependent dataset\n", 706 | "y_train = dataset[['SalePrice']]" 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "execution_count": 5, 712 | "metadata": {}, 713 | "outputs": [], 714 | "source": [ 715 | "# drop the dependent feature from dataset\n", 716 | "X_train = dataset.drop(['Id', 'SalePrice'], axis=1)" 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "execution_count": 6, 722 | "metadata": {}, 723 | "outputs": [ 724 | { 725 | "data": { 726 | "text/plain": [ 727 | "SelectFromModel(estimator=Lasso(alpha=0.05, random_state=0))" 728 | ] 729 | }, 730 | "execution_count": 6, 731 | "metadata": {}, 732 | "output_type": "execute_result" 733 | } 734 | ], 735 | "source": [ 736 | "# Apply Feature Selection \n", 737 | "# First, I specify the Lasso Regression model, and I \n", 738 | "# Selected a suitable alpha (equivalent of penalty).\n", 739 | "# The bigger the alpha the less features that will be selected\n", 740 | "\n", 741 | "# Then I use selectfromModel object from sklearn which\n", 742 | "# will select the feature which co-officients are non zero\n", 743 | "\n", 744 | "feature_sel_model = SelectFromModel(Lasso(alpha=0.05, random_state=0))\n", 745 | "feature_sel_model.fit(X_train, y_train)" 746 | ] 747 | }, 748 | { 749 | "cell_type": "code", 750 | "execution_count": 7, 751 | "metadata": {}, 752 | "outputs": [ 753 | { 754 | "data": { 755 | "text/plain": [ 756 | "array([ True, False, False, False, False, False, False, False, False,\n", 757 | " False, False, False, False, False, False, False, False, False,\n", 758 | " False, False, True, False, False, False, False, False, False,\n", 759 | " True, False, False, True, False, False, False, False, False,\n", 760 | " False, False, False, False, False, False, False, False, False,\n", 761 | " False, False, False, False, False, False, False, False, False,\n", 762 | " False, False, False, False, False, False, False, False, False,\n", 763 | " False, False, False, False, False, False, False, False, False,\n", 764 | " False, False, False, False, False, False, False, False, False,\n", 765 | " False, False])" 766 | ] 767 | }, 768 | "execution_count": 7, 769 | "metadata": {}, 770 | "output_type": "execute_result" 771 | } 772 | ], 773 | "source": [ 774 | "feature_sel_model.get_support()" 775 | ] 776 | }, 777 | { 778 | "cell_type": "code", 779 | "execution_count": 14, 780 | "metadata": {}, 781 | "outputs": [ 782 | { 783 | "name": "stdout", 784 | "output_type": "stream", 785 | "text": [ 786 | "Total features: 83\n", 787 | "selected featrues: 4\n", 788 | "features with cofficients shrank to zero: 79\n" 789 | ] 790 | } 791 | ], 792 | "source": [ 793 | "# Let's print the number of total and selected features\n", 794 | "# this is how we can make a list of the selected features\n", 795 | "\n", 796 | "selected_feat = X_train.columns[(feature_sel_model.get_support())]\n", 797 | "\n", 798 | "# let's print some stats\n", 799 | "print('Total features: {}'.format(X_train.shape[1]))\n", 800 | "print(\"selected featrues: {}\".format(len(selected_feat)))\n", 801 | "print('features with cofficients shrank to zero: {}'.format(\n", 802 | " np.sum(feature_sel_model.estimator_.coef_ == 0)))" 803 | ] 804 | }, 805 | { 806 | "cell_type": "code", 807 | "execution_count": 16, 808 | "metadata": {}, 809 | "outputs": [], 810 | "source": [ 811 | "X_train = X_train[selected_feat]" 812 | ] 813 | }, 814 | { 815 | "cell_type": "code", 816 | "execution_count": 17, 817 | "metadata": {}, 818 | "outputs": [ 819 | { 820 | "data": { 821 | "text/html": [ 822 | "
\n", 823 | "\n", 836 | "\n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | "
Unnamed: 0YearRemodAddExterQualBsmtQual
000.0983610.3333330.5
110.5245901.0000000.5
220.1147540.3333330.5
330.6065571.0000001.0
440.1475410.3333330.5
\n", 884 | "
" 885 | ], 886 | "text/plain": [ 887 | " Unnamed: 0 YearRemodAdd ExterQual BsmtQual\n", 888 | "0 0 0.098361 0.333333 0.5\n", 889 | "1 1 0.524590 1.000000 0.5\n", 890 | "2 2 0.114754 0.333333 0.5\n", 891 | "3 3 0.606557 1.000000 1.0\n", 892 | "4 4 0.147541 0.333333 0.5" 893 | ] 894 | }, 895 | "execution_count": 17, 896 | "metadata": {}, 897 | "output_type": "execute_result" 898 | } 899 | ], 900 | "source": [ 901 | "X_train.head()" 902 | ] 903 | }, 904 | { 905 | "cell_type": "code", 906 | "execution_count": null, 907 | "metadata": {}, 908 | "outputs": [], 909 | "source": [] 910 | } 911 | ], 912 | "metadata": { 913 | "kernelspec": { 914 | "display_name": "Python 3", 915 | "language": "python", 916 | "name": "python3" 917 | }, 918 | "language_info": { 919 | "codemirror_mode": { 920 | "name": "ipython", 921 | "version": 3 922 | }, 923 | "file_extension": ".py", 924 | "mimetype": "text/x-python", 925 | "name": "python", 926 | "nbconvert_exporter": "python", 927 | "pygments_lexer": "ipython3", 928 | "version": "3.8.5" 929 | } 930 | }, 931 | "nbformat": 4, 932 | "nbformat_minor": 4 933 | } 934 | -------------------------------------------------------------------------------- /Fraud Detection Using ML/fraud_detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np \n", 10 | "import pandas as pd \n", 11 | "import seaborn as sns \n", 12 | "import matplotlib.pyplot as plt \n", 13 | "%matplotlib inline\n", 14 | "from sklearn.model_selection import train_test_split \n", 15 | "from sklearn.preprocessing import StandardScaler\n", 16 | "from sklearn.metrics import classification_report, accuracy_score, confusion_matrix\n", 17 | "from sklearn.linear_model import LogisticRegression \n", 18 | "\n", 19 | "sns.set_style('darkgrid')" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 35, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "## Dataset \n", 29 | "df = pd.read_csv('payment_fraud.csv')" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 36, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/html": [ 40 | "
\n", 41 | "\n", 54 | "\n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | "
accountAgeDaysnumItemslocalTimepaymentMethodpaymentMethodAgeDayslabel
02914.745402paypal28.2048610
172514.742303storecredit0.0000000
284514.921318creditcard0.0000000
350314.886641creditcard0.0000000
4200015.040929creditcard0.0000000
\n", 114 | "
" 115 | ], 116 | "text/plain": [ 117 | " accountAgeDays numItems localTime paymentMethod paymentMethodAgeDays \\\n", 118 | "0 29 1 4.745402 paypal 28.204861 \n", 119 | "1 725 1 4.742303 storecredit 0.000000 \n", 120 | "2 845 1 4.921318 creditcard 0.000000 \n", 121 | "3 503 1 4.886641 creditcard 0.000000 \n", 122 | "4 2000 1 5.040929 creditcard 0.000000 \n", 123 | "\n", 124 | " label \n", 125 | "0 0 \n", 126 | "1 0 \n", 127 | "2 0 \n", 128 | "3 0 \n", 129 | "4 0 " 130 | ] 131 | }, 132 | "execution_count": 36, 133 | "metadata": {}, 134 | "output_type": "execute_result" 135 | } 136 | ], 137 | "source": [ 138 | "df.head()" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 37, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "data": { 148 | "text/plain": [ 149 | "accountAgeDays 0\n", 150 | "numItems 0\n", 151 | "localTime 0\n", 152 | "paymentMethod 0\n", 153 | "paymentMethodAgeDays 0\n", 154 | "label 0\n", 155 | "dtype: int64" 156 | ] 157 | }, 158 | "execution_count": 37, 159 | "metadata": {}, 160 | "output_type": "execute_result" 161 | } 162 | ], 163 | "source": [ 164 | "df.isnull().sum() ## checking the null valeus " 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 39, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "data": { 174 | "text/plain": [ 175 | "0 38661\n", 176 | "1 560\n", 177 | "Name: label, dtype: int64" 178 | ] 179 | }, 180 | "execution_count": 39, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "df.label.value_counts() ## count the number of 0's and 1's" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 40, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "## coverting paymentMethod column into label encoding\n", 196 | "paymthd_label = {v:k for k, v in enumerate(df.paymentMethod.unique())}\n", 197 | "\n", 198 | "df.paymentMethod = df.paymentMethod.map(paymthd_label)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 41, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "data": { 208 | "text/html": [ 209 | "
\n", 210 | "\n", 223 | "\n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | "
accountAgeDaysnumItemslocalTimepaymentMethodpaymentMethodAgeDayslabel
02914.745402028.2048610
172514.74230310.0000000
284514.92131820.0000000
350314.88664120.0000000
4200015.04092920.0000000
\n", 283 | "
" 284 | ], 285 | "text/plain": [ 286 | " accountAgeDays numItems localTime paymentMethod paymentMethodAgeDays \\\n", 287 | "0 29 1 4.745402 0 28.204861 \n", 288 | "1 725 1 4.742303 1 0.000000 \n", 289 | "2 845 1 4.921318 2 0.000000 \n", 290 | "3 503 1 4.886641 2 0.000000 \n", 291 | "4 2000 1 5.040929 2 0.000000 \n", 292 | "\n", 293 | " label \n", 294 | "0 0 \n", 295 | "1 0 \n", 296 | "2 0 \n", 297 | "3 0 \n", 298 | "4 0 " 299 | ] 300 | }, 301 | "execution_count": 41, 302 | "metadata": {}, 303 | "output_type": "execute_result" 304 | } 305 | ], 306 | "source": [ 307 | "df.head()" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 42, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "image/png": "", 318 | "text/plain": [ 319 | "
" 320 | ] 321 | }, 322 | "metadata": {}, 323 | "output_type": "display_data" 324 | } 325 | ], 326 | "source": [ 327 | "## corr(): it gives the correlation between the featuers\n", 328 | "plt.figure(figsize=(10, 10))\n", 329 | "sns.heatmap(df.corr(), annot=True);" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 43, 335 | "metadata": {}, 336 | "outputs": [ 337 | { 338 | "data": { 339 | "text/html": [ 340 | "
\n", 341 | "\n", 354 | "\n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | "
accountAgeDaysnumItemslocalTimepaymentMethodpaymentMethodAgeDayslabel
count39221.00000039221.00000039221.00000039221.00000039221.00000039221.000000
mean857.5639841.0847514.7482321.476811122.6413260.014278
std804.7882120.5668990.3893600.850805283.5691770.118636
min1.0000001.0000000.4212140.0000000.0000000.000000
25%72.0000001.0000004.7423031.0000000.0000000.000000
50%603.0000001.0000004.8866412.0000000.0125000.000000
75%1804.0000001.0000004.9620552.00000087.5104170.000000
max2000.00000029.0000005.0409292.0000001999.5805561.000000
\n", 441 | "
" 442 | ], 443 | "text/plain": [ 444 | " accountAgeDays numItems localTime paymentMethod \\\n", 445 | "count 39221.000000 39221.000000 39221.000000 39221.000000 \n", 446 | "mean 857.563984 1.084751 4.748232 1.476811 \n", 447 | "std 804.788212 0.566899 0.389360 0.850805 \n", 448 | "min 1.000000 1.000000 0.421214 0.000000 \n", 449 | "25% 72.000000 1.000000 4.742303 1.000000 \n", 450 | "50% 603.000000 1.000000 4.886641 2.000000 \n", 451 | "75% 1804.000000 1.000000 4.962055 2.000000 \n", 452 | "max 2000.000000 29.000000 5.040929 2.000000 \n", 453 | "\n", 454 | " paymentMethodAgeDays label \n", 455 | "count 39221.000000 39221.000000 \n", 456 | "mean 122.641326 0.014278 \n", 457 | "std 283.569177 0.118636 \n", 458 | "min 0.000000 0.000000 \n", 459 | "25% 0.000000 0.000000 \n", 460 | "50% 0.012500 0.000000 \n", 461 | "75% 87.510417 0.000000 \n", 462 | "max 1999.580556 1.000000 " 463 | ] 464 | }, 465 | "execution_count": 43, 466 | "metadata": {}, 467 | "output_type": "execute_result" 468 | } 469 | ], 470 | "source": [ 471 | "df.describe()" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": 44, 477 | "metadata": {}, 478 | "outputs": [], 479 | "source": [ 480 | "## independent and dependent features\n", 481 | "X = df.iloc[:, :-1].values\n", 482 | "y = df.iloc[:, -1].values" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": 45, 488 | "metadata": {}, 489 | "outputs": [], 490 | "source": [ 491 | "## scaling \n", 492 | "\n", 493 | "sc = StandardScaler()\n", 494 | "X = sc.fit_transform(X)" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 46, 500 | "metadata": {}, 501 | "outputs": [], 502 | "source": [ 503 | "## train test split \n", 504 | "\n", 505 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 48, 511 | "metadata": {}, 512 | "outputs": [ 513 | { 514 | "name": "stdout", 515 | "output_type": "stream", 516 | "text": [ 517 | "X_train shape: (29415, 5)\n", 518 | "X_test shape: (9806, 5)\n", 519 | "y_train shape: (29415,)\n", 520 | "y_test shape: (9806,)\n" 521 | ] 522 | } 523 | ], 524 | "source": [ 525 | "print(\"X_train shape: \", X_train.shape)\n", 526 | "print(\"X_test shape: \", X_test.shape)\n", 527 | "print(\"y_train shape: \", y_train.shape)\n", 528 | "print(\"y_test shape: \", y_test.shape)" 529 | ] 530 | }, 531 | { 532 | "cell_type": "code", 533 | "execution_count": 49, 534 | "metadata": {}, 535 | "outputs": [ 536 | { 537 | "data": { 538 | "text/plain": [ 539 | "LogisticRegression()" 540 | ] 541 | }, 542 | "execution_count": 49, 543 | "metadata": {}, 544 | "output_type": "execute_result" 545 | } 546 | ], 547 | "source": [ 548 | "## logisticRegression Model\n", 549 | "lg = LogisticRegression()\n", 550 | "\n", 551 | "## training\n", 552 | "lg.fit(X_train, y_train)" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": 50, 558 | "metadata": {}, 559 | "outputs": [], 560 | "source": [ 561 | "## prediction \n", 562 | "pred = lg.predict(X_test)" 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": null, 568 | "metadata": {}, 569 | "outputs": [], 570 | "source": [] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": null, 575 | "metadata": {}, 576 | "outputs": [], 577 | "source": [] 578 | } 579 | ], 580 | "metadata": { 581 | "kernelspec": { 582 | "display_name": "Python 3", 583 | "language": "python", 584 | "name": "python3" 585 | }, 586 | "language_info": { 587 | "codemirror_mode": { 588 | "name": "ipython", 589 | "version": 3 590 | }, 591 | "file_extension": ".py", 592 | "mimetype": "text/x-python", 593 | "name": "python", 594 | "nbconvert_exporter": "python", 595 | "pygments_lexer": "ipython3", 596 | "version": "3.8.5" 597 | } 598 | }, 599 | "nbformat": 4, 600 | "nbformat_minor": 4 601 | } 602 | --------------------------------------------------------------------------------