└── Credit_Card_Fraud_Detection.ipynb
/Credit_Card_Fraud_Detection.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Credit Card Fraud Detection.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "authorship_tag": "ABX9TyOvXvWa08x2LlWfoGmI6x8T",
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "name": "python3",
14 | "display_name": "Python 3"
15 | },
16 | "language_info": {
17 | "name": "python"
18 | }
19 | },
20 | "cells": [
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {
24 | "id": "view-in-github",
25 | "colab_type": "text"
26 | },
27 | "source": [
28 | "
"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {
34 | "id": "MhC-OrS7Cn48"
35 | },
36 | "source": [
37 | "Importing the Dependencies"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "metadata": {
43 | "id": "FK6vtiaB8T51"
44 | },
45 | "source": [
46 | "import numpy as np\n",
47 | "import pandas as pd\n",
48 | "from sklearn.model_selection import train_test_split\n",
49 | "from sklearn.linear_model import LogisticRegression\n",
50 | "from sklearn.metrics import accuracy_score"
51 | ],
52 | "execution_count": 1,
53 | "outputs": []
54 | },
55 | {
56 | "cell_type": "code",
57 | "metadata": {
58 | "id": "L44gD2PlCptM"
59 | },
60 | "source": [
61 | "#loading the dataset to a Pandas DataFrame\n",
62 | "credit_card_data = pd.read_csv('/content/creditcard.csv')"
63 | ],
64 | "execution_count": 2,
65 | "outputs": []
66 | },
67 | {
68 | "cell_type": "code",
69 | "metadata": {
70 | "colab": {
71 | "base_uri": "https://localhost:8080/",
72 | "height": 223
73 | },
74 | "id": "hRrcaIP6Cv_0",
75 | "outputId": "5265fc7b-abf6-4c24-a66e-a896acb997a6"
76 | },
77 | "source": [
78 | "credit_card_data.head()"
79 | ],
80 | "execution_count": 3,
81 | "outputs": [
82 | {
83 | "output_type": "execute_result",
84 | "data": {
85 | "text/html": [
86 | "
\n",
87 | "\n",
100 | "
\n",
101 | " \n",
102 | " \n",
103 | " | \n",
104 | " Time | \n",
105 | " V1 | \n",
106 | " V2 | \n",
107 | " V3 | \n",
108 | " V4 | \n",
109 | " V5 | \n",
110 | " V6 | \n",
111 | " V7 | \n",
112 | " V8 | \n",
113 | " V9 | \n",
114 | " V10 | \n",
115 | " V11 | \n",
116 | " V12 | \n",
117 | " V13 | \n",
118 | " V14 | \n",
119 | " V15 | \n",
120 | " V16 | \n",
121 | " V17 | \n",
122 | " V18 | \n",
123 | " V19 | \n",
124 | " V20 | \n",
125 | " V21 | \n",
126 | " V22 | \n",
127 | " V23 | \n",
128 | " V24 | \n",
129 | " V25 | \n",
130 | " V26 | \n",
131 | " V27 | \n",
132 | " V28 | \n",
133 | " Amount | \n",
134 | " Class | \n",
135 | "
\n",
136 | " \n",
137 | " \n",
138 | " \n",
139 | " 0 | \n",
140 | " 0.0 | \n",
141 | " -1.359807 | \n",
142 | " -0.072781 | \n",
143 | " 2.536347 | \n",
144 | " 1.378155 | \n",
145 | " -0.338321 | \n",
146 | " 0.462388 | \n",
147 | " 0.239599 | \n",
148 | " 0.098698 | \n",
149 | " 0.363787 | \n",
150 | " 0.090794 | \n",
151 | " -0.551600 | \n",
152 | " -0.617801 | \n",
153 | " -0.991390 | \n",
154 | " -0.311169 | \n",
155 | " 1.468177 | \n",
156 | " -0.470401 | \n",
157 | " 0.207971 | \n",
158 | " 0.025791 | \n",
159 | " 0.403993 | \n",
160 | " 0.251412 | \n",
161 | " -0.018307 | \n",
162 | " 0.277838 | \n",
163 | " -0.110474 | \n",
164 | " 0.066928 | \n",
165 | " 0.128539 | \n",
166 | " -0.189115 | \n",
167 | " 0.133558 | \n",
168 | " -0.021053 | \n",
169 | " 149.62 | \n",
170 | " 0 | \n",
171 | "
\n",
172 | " \n",
173 | " 1 | \n",
174 | " 0.0 | \n",
175 | " 1.191857 | \n",
176 | " 0.266151 | \n",
177 | " 0.166480 | \n",
178 | " 0.448154 | \n",
179 | " 0.060018 | \n",
180 | " -0.082361 | \n",
181 | " -0.078803 | \n",
182 | " 0.085102 | \n",
183 | " -0.255425 | \n",
184 | " -0.166974 | \n",
185 | " 1.612727 | \n",
186 | " 1.065235 | \n",
187 | " 0.489095 | \n",
188 | " -0.143772 | \n",
189 | " 0.635558 | \n",
190 | " 0.463917 | \n",
191 | " -0.114805 | \n",
192 | " -0.183361 | \n",
193 | " -0.145783 | \n",
194 | " -0.069083 | \n",
195 | " -0.225775 | \n",
196 | " -0.638672 | \n",
197 | " 0.101288 | \n",
198 | " -0.339846 | \n",
199 | " 0.167170 | \n",
200 | " 0.125895 | \n",
201 | " -0.008983 | \n",
202 | " 0.014724 | \n",
203 | " 2.69 | \n",
204 | " 0 | \n",
205 | "
\n",
206 | " \n",
207 | " 2 | \n",
208 | " 1.0 | \n",
209 | " -1.358354 | \n",
210 | " -1.340163 | \n",
211 | " 1.773209 | \n",
212 | " 0.379780 | \n",
213 | " -0.503198 | \n",
214 | " 1.800499 | \n",
215 | " 0.791461 | \n",
216 | " 0.247676 | \n",
217 | " -1.514654 | \n",
218 | " 0.207643 | \n",
219 | " 0.624501 | \n",
220 | " 0.066084 | \n",
221 | " 0.717293 | \n",
222 | " -0.165946 | \n",
223 | " 2.345865 | \n",
224 | " -2.890083 | \n",
225 | " 1.109969 | \n",
226 | " -0.121359 | \n",
227 | " -2.261857 | \n",
228 | " 0.524980 | \n",
229 | " 0.247998 | \n",
230 | " 0.771679 | \n",
231 | " 0.909412 | \n",
232 | " -0.689281 | \n",
233 | " -0.327642 | \n",
234 | " -0.139097 | \n",
235 | " -0.055353 | \n",
236 | " -0.059752 | \n",
237 | " 378.66 | \n",
238 | " 0 | \n",
239 | "
\n",
240 | " \n",
241 | " 3 | \n",
242 | " 1.0 | \n",
243 | " -0.966272 | \n",
244 | " -0.185226 | \n",
245 | " 1.792993 | \n",
246 | " -0.863291 | \n",
247 | " -0.010309 | \n",
248 | " 1.247203 | \n",
249 | " 0.237609 | \n",
250 | " 0.377436 | \n",
251 | " -1.387024 | \n",
252 | " -0.054952 | \n",
253 | " -0.226487 | \n",
254 | " 0.178228 | \n",
255 | " 0.507757 | \n",
256 | " -0.287924 | \n",
257 | " -0.631418 | \n",
258 | " -1.059647 | \n",
259 | " -0.684093 | \n",
260 | " 1.965775 | \n",
261 | " -1.232622 | \n",
262 | " -0.208038 | \n",
263 | " -0.108300 | \n",
264 | " 0.005274 | \n",
265 | " -0.190321 | \n",
266 | " -1.175575 | \n",
267 | " 0.647376 | \n",
268 | " -0.221929 | \n",
269 | " 0.062723 | \n",
270 | " 0.061458 | \n",
271 | " 123.50 | \n",
272 | " 0 | \n",
273 | "
\n",
274 | " \n",
275 | " 4 | \n",
276 | " 2.0 | \n",
277 | " -1.158233 | \n",
278 | " 0.877737 | \n",
279 | " 1.548718 | \n",
280 | " 0.403034 | \n",
281 | " -0.407193 | \n",
282 | " 0.095921 | \n",
283 | " 0.592941 | \n",
284 | " -0.270533 | \n",
285 | " 0.817739 | \n",
286 | " 0.753074 | \n",
287 | " -0.822843 | \n",
288 | " 0.538196 | \n",
289 | " 1.345852 | \n",
290 | " -1.119670 | \n",
291 | " 0.175121 | \n",
292 | " -0.451449 | \n",
293 | " -0.237033 | \n",
294 | " -0.038195 | \n",
295 | " 0.803487 | \n",
296 | " 0.408542 | \n",
297 | " -0.009431 | \n",
298 | " 0.798278 | \n",
299 | " -0.137458 | \n",
300 | " 0.141267 | \n",
301 | " -0.206010 | \n",
302 | " 0.502292 | \n",
303 | " 0.219422 | \n",
304 | " 0.215153 | \n",
305 | " 69.99 | \n",
306 | " 0 | \n",
307 | "
\n",
308 | " \n",
309 | "
\n",
310 | "
"
311 | ],
312 | "text/plain": [
313 | " Time V1 V2 V3 ... V27 V28 Amount Class\n",
314 | "0 0.0 -1.359807 -0.072781 2.536347 ... 0.133558 -0.021053 149.62 0\n",
315 | "1 0.0 1.191857 0.266151 0.166480 ... -0.008983 0.014724 2.69 0\n",
316 | "2 1.0 -1.358354 -1.340163 1.773209 ... -0.055353 -0.059752 378.66 0\n",
317 | "3 1.0 -0.966272 -0.185226 1.792993 ... 0.062723 0.061458 123.50 0\n",
318 | "4 2.0 -1.158233 0.877737 1.548718 ... 0.219422 0.215153 69.99 0\n",
319 | "\n",
320 | "[5 rows x 31 columns]"
321 | ]
322 | },
323 | "metadata": {
324 | "tags": []
325 | },
326 | "execution_count": 3
327 | }
328 | ]
329 | },
330 | {
331 | "cell_type": "code",
332 | "metadata": {
333 | "colab": {
334 | "base_uri": "https://localhost:8080/",
335 | "height": 223
336 | },
337 | "id": "0MaBF9kuCyk7",
338 | "outputId": "4bd74368-a078-4f14-db36-57d3e4d3a524"
339 | },
340 | "source": [
341 | "credit_card_data.tail()"
342 | ],
343 | "execution_count": 4,
344 | "outputs": [
345 | {
346 | "output_type": "execute_result",
347 | "data": {
348 | "text/html": [
349 | "\n",
350 | "\n",
363 | "
\n",
364 | " \n",
365 | " \n",
366 | " | \n",
367 | " Time | \n",
368 | " V1 | \n",
369 | " V2 | \n",
370 | " V3 | \n",
371 | " V4 | \n",
372 | " V5 | \n",
373 | " V6 | \n",
374 | " V7 | \n",
375 | " V8 | \n",
376 | " V9 | \n",
377 | " V10 | \n",
378 | " V11 | \n",
379 | " V12 | \n",
380 | " V13 | \n",
381 | " V14 | \n",
382 | " V15 | \n",
383 | " V16 | \n",
384 | " V17 | \n",
385 | " V18 | \n",
386 | " V19 | \n",
387 | " V20 | \n",
388 | " V21 | \n",
389 | " V22 | \n",
390 | " V23 | \n",
391 | " V24 | \n",
392 | " V25 | \n",
393 | " V26 | \n",
394 | " V27 | \n",
395 | " V28 | \n",
396 | " Amount | \n",
397 | " Class | \n",
398 | "
\n",
399 | " \n",
400 | " \n",
401 | " \n",
402 | " 284802 | \n",
403 | " 172786.0 | \n",
404 | " -11.881118 | \n",
405 | " 10.071785 | \n",
406 | " -9.834783 | \n",
407 | " -2.066656 | \n",
408 | " -5.364473 | \n",
409 | " -2.606837 | \n",
410 | " -4.918215 | \n",
411 | " 7.305334 | \n",
412 | " 1.914428 | \n",
413 | " 4.356170 | \n",
414 | " -1.593105 | \n",
415 | " 2.711941 | \n",
416 | " -0.689256 | \n",
417 | " 4.626942 | \n",
418 | " -0.924459 | \n",
419 | " 1.107641 | \n",
420 | " 1.991691 | \n",
421 | " 0.510632 | \n",
422 | " -0.682920 | \n",
423 | " 1.475829 | \n",
424 | " 0.213454 | \n",
425 | " 0.111864 | \n",
426 | " 1.014480 | \n",
427 | " -0.509348 | \n",
428 | " 1.436807 | \n",
429 | " 0.250034 | \n",
430 | " 0.943651 | \n",
431 | " 0.823731 | \n",
432 | " 0.77 | \n",
433 | " 0 | \n",
434 | "
\n",
435 | " \n",
436 | " 284803 | \n",
437 | " 172787.0 | \n",
438 | " -0.732789 | \n",
439 | " -0.055080 | \n",
440 | " 2.035030 | \n",
441 | " -0.738589 | \n",
442 | " 0.868229 | \n",
443 | " 1.058415 | \n",
444 | " 0.024330 | \n",
445 | " 0.294869 | \n",
446 | " 0.584800 | \n",
447 | " -0.975926 | \n",
448 | " -0.150189 | \n",
449 | " 0.915802 | \n",
450 | " 1.214756 | \n",
451 | " -0.675143 | \n",
452 | " 1.164931 | \n",
453 | " -0.711757 | \n",
454 | " -0.025693 | \n",
455 | " -1.221179 | \n",
456 | " -1.545556 | \n",
457 | " 0.059616 | \n",
458 | " 0.214205 | \n",
459 | " 0.924384 | \n",
460 | " 0.012463 | \n",
461 | " -1.016226 | \n",
462 | " -0.606624 | \n",
463 | " -0.395255 | \n",
464 | " 0.068472 | \n",
465 | " -0.053527 | \n",
466 | " 24.79 | \n",
467 | " 0 | \n",
468 | "
\n",
469 | " \n",
470 | " 284804 | \n",
471 | " 172788.0 | \n",
472 | " 1.919565 | \n",
473 | " -0.301254 | \n",
474 | " -3.249640 | \n",
475 | " -0.557828 | \n",
476 | " 2.630515 | \n",
477 | " 3.031260 | \n",
478 | " -0.296827 | \n",
479 | " 0.708417 | \n",
480 | " 0.432454 | \n",
481 | " -0.484782 | \n",
482 | " 0.411614 | \n",
483 | " 0.063119 | \n",
484 | " -0.183699 | \n",
485 | " -0.510602 | \n",
486 | " 1.329284 | \n",
487 | " 0.140716 | \n",
488 | " 0.313502 | \n",
489 | " 0.395652 | \n",
490 | " -0.577252 | \n",
491 | " 0.001396 | \n",
492 | " 0.232045 | \n",
493 | " 0.578229 | \n",
494 | " -0.037501 | \n",
495 | " 0.640134 | \n",
496 | " 0.265745 | \n",
497 | " -0.087371 | \n",
498 | " 0.004455 | \n",
499 | " -0.026561 | \n",
500 | " 67.88 | \n",
501 | " 0 | \n",
502 | "
\n",
503 | " \n",
504 | " 284805 | \n",
505 | " 172788.0 | \n",
506 | " -0.240440 | \n",
507 | " 0.530483 | \n",
508 | " 0.702510 | \n",
509 | " 0.689799 | \n",
510 | " -0.377961 | \n",
511 | " 0.623708 | \n",
512 | " -0.686180 | \n",
513 | " 0.679145 | \n",
514 | " 0.392087 | \n",
515 | " -0.399126 | \n",
516 | " -1.933849 | \n",
517 | " -0.962886 | \n",
518 | " -1.042082 | \n",
519 | " 0.449624 | \n",
520 | " 1.962563 | \n",
521 | " -0.608577 | \n",
522 | " 0.509928 | \n",
523 | " 1.113981 | \n",
524 | " 2.897849 | \n",
525 | " 0.127434 | \n",
526 | " 0.265245 | \n",
527 | " 0.800049 | \n",
528 | " -0.163298 | \n",
529 | " 0.123205 | \n",
530 | " -0.569159 | \n",
531 | " 0.546668 | \n",
532 | " 0.108821 | \n",
533 | " 0.104533 | \n",
534 | " 10.00 | \n",
535 | " 0 | \n",
536 | "
\n",
537 | " \n",
538 | " 284806 | \n",
539 | " 172792.0 | \n",
540 | " -0.533413 | \n",
541 | " -0.189733 | \n",
542 | " 0.703337 | \n",
543 | " -0.506271 | \n",
544 | " -0.012546 | \n",
545 | " -0.649617 | \n",
546 | " 1.577006 | \n",
547 | " -0.414650 | \n",
548 | " 0.486180 | \n",
549 | " -0.915427 | \n",
550 | " -1.040458 | \n",
551 | " -0.031513 | \n",
552 | " -0.188093 | \n",
553 | " -0.084316 | \n",
554 | " 0.041333 | \n",
555 | " -0.302620 | \n",
556 | " -0.660377 | \n",
557 | " 0.167430 | \n",
558 | " -0.256117 | \n",
559 | " 0.382948 | \n",
560 | " 0.261057 | \n",
561 | " 0.643078 | \n",
562 | " 0.376777 | \n",
563 | " 0.008797 | \n",
564 | " -0.473649 | \n",
565 | " -0.818267 | \n",
566 | " -0.002415 | \n",
567 | " 0.013649 | \n",
568 | " 217.00 | \n",
569 | " 0 | \n",
570 | "
\n",
571 | " \n",
572 | "
\n",
573 | "
"
574 | ],
575 | "text/plain": [
576 | " Time V1 V2 ... V28 Amount Class\n",
577 | "284802 172786.0 -11.881118 10.071785 ... 0.823731 0.77 0\n",
578 | "284803 172787.0 -0.732789 -0.055080 ... -0.053527 24.79 0\n",
579 | "284804 172788.0 1.919565 -0.301254 ... -0.026561 67.88 0\n",
580 | "284805 172788.0 -0.240440 0.530483 ... 0.104533 10.00 0\n",
581 | "284806 172792.0 -0.533413 -0.189733 ... 0.013649 217.00 0\n",
582 | "\n",
583 | "[5 rows x 31 columns]"
584 | ]
585 | },
586 | "metadata": {
587 | "tags": []
588 | },
589 | "execution_count": 4
590 | }
591 | ]
592 | },
593 | {
594 | "cell_type": "code",
595 | "metadata": {
596 | "colab": {
597 | "base_uri": "https://localhost:8080/"
598 | },
599 | "id": "H7bD0daoC0Jl",
600 | "outputId": "7546f901-1fb8-4f3f-c095-f64e096f1296"
601 | },
602 | "source": [
603 | "credit_card_data.info()"
604 | ],
605 | "execution_count": 5,
606 | "outputs": [
607 | {
608 | "output_type": "stream",
609 | "text": [
610 | "\n",
611 | "RangeIndex: 284807 entries, 0 to 284806\n",
612 | "Data columns (total 31 columns):\n",
613 | " # Column Non-Null Count Dtype \n",
614 | "--- ------ -------------- ----- \n",
615 | " 0 Time 284807 non-null float64\n",
616 | " 1 V1 284807 non-null float64\n",
617 | " 2 V2 284807 non-null float64\n",
618 | " 3 V3 284807 non-null float64\n",
619 | " 4 V4 284807 non-null float64\n",
620 | " 5 V5 284807 non-null float64\n",
621 | " 6 V6 284807 non-null float64\n",
622 | " 7 V7 284807 non-null float64\n",
623 | " 8 V8 284807 non-null float64\n",
624 | " 9 V9 284807 non-null float64\n",
625 | " 10 V10 284807 non-null float64\n",
626 | " 11 V11 284807 non-null float64\n",
627 | " 12 V12 284807 non-null float64\n",
628 | " 13 V13 284807 non-null float64\n",
629 | " 14 V14 284807 non-null float64\n",
630 | " 15 V15 284807 non-null float64\n",
631 | " 16 V16 284807 non-null float64\n",
632 | " 17 V17 284807 non-null float64\n",
633 | " 18 V18 284807 non-null float64\n",
634 | " 19 V19 284807 non-null float64\n",
635 | " 20 V20 284807 non-null float64\n",
636 | " 21 V21 284807 non-null float64\n",
637 | " 22 V22 284807 non-null float64\n",
638 | " 23 V23 284807 non-null float64\n",
639 | " 24 V24 284807 non-null float64\n",
640 | " 25 V25 284807 non-null float64\n",
641 | " 26 V26 284807 non-null float64\n",
642 | " 27 V27 284807 non-null float64\n",
643 | " 28 V28 284807 non-null float64\n",
644 | " 29 Amount 284807 non-null float64\n",
645 | " 30 Class 284807 non-null int64 \n",
646 | "dtypes: float64(30), int64(1)\n",
647 | "memory usage: 67.4 MB\n"
648 | ],
649 | "name": "stdout"
650 | }
651 | ]
652 | },
653 | {
654 | "cell_type": "code",
655 | "metadata": {
656 | "colab": {
657 | "base_uri": "https://localhost:8080/"
658 | },
659 | "id": "O57rEj5DC1uD",
660 | "outputId": "aff23b2f-6bb3-40dd-b029-709a44f0204a"
661 | },
662 | "source": [
663 | "#checking the number of missing values in each column\n",
664 | "credit_card_data.isnull().sum()"
665 | ],
666 | "execution_count": 6,
667 | "outputs": [
668 | {
669 | "output_type": "execute_result",
670 | "data": {
671 | "text/plain": [
672 | "Time 0\n",
673 | "V1 0\n",
674 | "V2 0\n",
675 | "V3 0\n",
676 | "V4 0\n",
677 | "V5 0\n",
678 | "V6 0\n",
679 | "V7 0\n",
680 | "V8 0\n",
681 | "V9 0\n",
682 | "V10 0\n",
683 | "V11 0\n",
684 | "V12 0\n",
685 | "V13 0\n",
686 | "V14 0\n",
687 | "V15 0\n",
688 | "V16 0\n",
689 | "V17 0\n",
690 | "V18 0\n",
691 | "V19 0\n",
692 | "V20 0\n",
693 | "V21 0\n",
694 | "V22 0\n",
695 | "V23 0\n",
696 | "V24 0\n",
697 | "V25 0\n",
698 | "V26 0\n",
699 | "V27 0\n",
700 | "V28 0\n",
701 | "Amount 0\n",
702 | "Class 0\n",
703 | "dtype: int64"
704 | ]
705 | },
706 | "metadata": {
707 | "tags": []
708 | },
709 | "execution_count": 6
710 | }
711 | ]
712 | },
713 | {
714 | "cell_type": "code",
715 | "metadata": {
716 | "colab": {
717 | "base_uri": "https://localhost:8080/"
718 | },
719 | "id": "kHZ3xqtwC5Eo",
720 | "outputId": "3a427f5e-3f25-4005-de8d-28bc58f51a55"
721 | },
722 | "source": [
723 | "#distribution of legit transactions & fraudulent transactions\n",
724 | "credit_card_data['Class'].value_counts()"
725 | ],
726 | "execution_count": 7,
727 | "outputs": [
728 | {
729 | "output_type": "execute_result",
730 | "data": {
731 | "text/plain": [
732 | "0 284315\n",
733 | "1 492\n",
734 | "Name: Class, dtype: int64"
735 | ]
736 | },
737 | "metadata": {
738 | "tags": []
739 | },
740 | "execution_count": 7
741 | }
742 | ]
743 | },
744 | {
745 | "cell_type": "markdown",
746 | "metadata": {
747 | "id": "SIYpCTxxC_u7"
748 | },
749 | "source": [
750 | "This Dataset is Highly Unbalanced\n",
751 | "\n",
752 | "0 --> Normal Transaction\n",
753 | "1 --> Fraudulent Transaction"
754 | ]
755 | },
756 | {
757 | "cell_type": "code",
758 | "metadata": {
759 | "id": "QfADx3mdC8c1"
760 | },
761 | "source": [
762 | "#separating the data for analysis\n",
763 | "legit = credit_card_data[credit_card_data.Class == 0]\n",
764 | "fraud = credit_card_data[credit_card_data.Class == 1]"
765 | ],
766 | "execution_count": 8,
767 | "outputs": []
768 | },
769 | {
770 | "cell_type": "code",
771 | "metadata": {
772 | "colab": {
773 | "base_uri": "https://localhost:8080/"
774 | },
775 | "id": "OAYzlLtHDKoy",
776 | "outputId": "7fa72ac0-b00d-45a8-fbcf-3540052b05d1"
777 | },
778 | "source": [
779 | "print(legit.shape)\n",
780 | "print(fraud.shape)"
781 | ],
782 | "execution_count": 9,
783 | "outputs": [
784 | {
785 | "output_type": "stream",
786 | "text": [
787 | "(284315, 31)\n",
788 | "(492, 31)\n"
789 | ],
790 | "name": "stdout"
791 | }
792 | ]
793 | },
794 | {
795 | "cell_type": "code",
796 | "metadata": {
797 | "colab": {
798 | "base_uri": "https://localhost:8080/"
799 | },
800 | "id": "cir6ALxkDMIa",
801 | "outputId": "9bedb752-8856-4218-d997-798382491c25"
802 | },
803 | "source": [
804 | "#statistical measures of the data\n",
805 | "legit.Amount.describe()"
806 | ],
807 | "execution_count": 10,
808 | "outputs": [
809 | {
810 | "output_type": "execute_result",
811 | "data": {
812 | "text/plain": [
813 | "count 284315.000000\n",
814 | "mean 88.291022\n",
815 | "std 250.105092\n",
816 | "min 0.000000\n",
817 | "25% 5.650000\n",
818 | "50% 22.000000\n",
819 | "75% 77.050000\n",
820 | "max 25691.160000\n",
821 | "Name: Amount, dtype: float64"
822 | ]
823 | },
824 | "metadata": {
825 | "tags": []
826 | },
827 | "execution_count": 10
828 | }
829 | ]
830 | },
831 | {
832 | "cell_type": "code",
833 | "metadata": {
834 | "colab": {
835 | "base_uri": "https://localhost:8080/"
836 | },
837 | "id": "rs1vrV2-DOor",
838 | "outputId": "3f029505-fd95-42ac-ec57-42e6a9b1093d"
839 | },
840 | "source": [
841 | "fraud.Amount.describe()"
842 | ],
843 | "execution_count": 11,
844 | "outputs": [
845 | {
846 | "output_type": "execute_result",
847 | "data": {
848 | "text/plain": [
849 | "count 492.000000\n",
850 | "mean 122.211321\n",
851 | "std 256.683288\n",
852 | "min 0.000000\n",
853 | "25% 1.000000\n",
854 | "50% 9.250000\n",
855 | "75% 105.890000\n",
856 | "max 2125.870000\n",
857 | "Name: Amount, dtype: float64"
858 | ]
859 | },
860 | "metadata": {
861 | "tags": []
862 | },
863 | "execution_count": 11
864 | }
865 | ]
866 | },
867 | {
868 | "cell_type": "code",
869 | "metadata": {
870 | "colab": {
871 | "base_uri": "https://localhost:8080/",
872 | "height": 162
873 | },
874 | "id": "xQP2XYh-DQYc",
875 | "outputId": "bcc5d05b-001d-40e8-ad80-82b4a1630988"
876 | },
877 | "source": [
878 | "#compare the values for both transactions\n",
879 | "credit_card_data.groupby('Class').mean()"
880 | ],
881 | "execution_count": 12,
882 | "outputs": [
883 | {
884 | "output_type": "execute_result",
885 | "data": {
886 | "text/html": [
887 | "\n",
888 | "\n",
901 | "
\n",
902 | " \n",
903 | " \n",
904 | " | \n",
905 | " Time | \n",
906 | " V1 | \n",
907 | " V2 | \n",
908 | " V3 | \n",
909 | " V4 | \n",
910 | " V5 | \n",
911 | " V6 | \n",
912 | " V7 | \n",
913 | " V8 | \n",
914 | " V9 | \n",
915 | " V10 | \n",
916 | " V11 | \n",
917 | " V12 | \n",
918 | " V13 | \n",
919 | " V14 | \n",
920 | " V15 | \n",
921 | " V16 | \n",
922 | " V17 | \n",
923 | " V18 | \n",
924 | " V19 | \n",
925 | " V20 | \n",
926 | " V21 | \n",
927 | " V22 | \n",
928 | " V23 | \n",
929 | " V24 | \n",
930 | " V25 | \n",
931 | " V26 | \n",
932 | " V27 | \n",
933 | " V28 | \n",
934 | " Amount | \n",
935 | "
\n",
936 | " \n",
937 | " Class | \n",
938 | " | \n",
939 | " | \n",
940 | " | \n",
941 | " | \n",
942 | " | \n",
943 | " | \n",
944 | " | \n",
945 | " | \n",
946 | " | \n",
947 | " | \n",
948 | " | \n",
949 | " | \n",
950 | " | \n",
951 | " | \n",
952 | " | \n",
953 | " | \n",
954 | " | \n",
955 | " | \n",
956 | " | \n",
957 | " | \n",
958 | " | \n",
959 | " | \n",
960 | " | \n",
961 | " | \n",
962 | " | \n",
963 | " | \n",
964 | " | \n",
965 | " | \n",
966 | " | \n",
967 | " | \n",
968 | "
\n",
969 | " \n",
970 | " \n",
971 | " \n",
972 | " 0 | \n",
973 | " 94838.202258 | \n",
974 | " 0.008258 | \n",
975 | " -0.006271 | \n",
976 | " 0.012171 | \n",
977 | " -0.007860 | \n",
978 | " 0.005453 | \n",
979 | " 0.002419 | \n",
980 | " 0.009637 | \n",
981 | " -0.000987 | \n",
982 | " 0.004467 | \n",
983 | " 0.009824 | \n",
984 | " -0.006576 | \n",
985 | " 0.010832 | \n",
986 | " 0.000189 | \n",
987 | " 0.012064 | \n",
988 | " 0.000161 | \n",
989 | " 0.007164 | \n",
990 | " 0.011535 | \n",
991 | " 0.003887 | \n",
992 | " -0.001178 | \n",
993 | " -0.000644 | \n",
994 | " -0.001235 | \n",
995 | " -0.000024 | \n",
996 | " 0.000070 | \n",
997 | " 0.000182 | \n",
998 | " -0.000072 | \n",
999 | " -0.000089 | \n",
1000 | " -0.000295 | \n",
1001 | " -0.000131 | \n",
1002 | " 88.291022 | \n",
1003 | "
\n",
1004 | " \n",
1005 | " 1 | \n",
1006 | " 80746.806911 | \n",
1007 | " -4.771948 | \n",
1008 | " 3.623778 | \n",
1009 | " -7.033281 | \n",
1010 | " 4.542029 | \n",
1011 | " -3.151225 | \n",
1012 | " -1.397737 | \n",
1013 | " -5.568731 | \n",
1014 | " 0.570636 | \n",
1015 | " -2.581123 | \n",
1016 | " -5.676883 | \n",
1017 | " 3.800173 | \n",
1018 | " -6.259393 | \n",
1019 | " -0.109334 | \n",
1020 | " -6.971723 | \n",
1021 | " -0.092929 | \n",
1022 | " -4.139946 | \n",
1023 | " -6.665836 | \n",
1024 | " -2.246308 | \n",
1025 | " 0.680659 | \n",
1026 | " 0.372319 | \n",
1027 | " 0.713588 | \n",
1028 | " 0.014049 | \n",
1029 | " -0.040308 | \n",
1030 | " -0.105130 | \n",
1031 | " 0.041449 | \n",
1032 | " 0.051648 | \n",
1033 | " 0.170575 | \n",
1034 | " 0.075667 | \n",
1035 | " 122.211321 | \n",
1036 | "
\n",
1037 | " \n",
1038 | "
\n",
1039 | "
"
1040 | ],
1041 | "text/plain": [
1042 | " Time V1 V2 ... V27 V28 Amount\n",
1043 | "Class ... \n",
1044 | "0 94838.202258 0.008258 -0.006271 ... -0.000295 -0.000131 88.291022\n",
1045 | "1 80746.806911 -4.771948 3.623778 ... 0.170575 0.075667 122.211321\n",
1046 | "\n",
1047 | "[2 rows x 30 columns]"
1048 | ]
1049 | },
1050 | "metadata": {
1051 | "tags": []
1052 | },
1053 | "execution_count": 12
1054 | }
1055 | ]
1056 | },
1057 | {
1058 | "cell_type": "markdown",
1059 | "metadata": {
1060 | "id": "TbUcihrEDVM0"
1061 | },
1062 | "source": [
1063 | "Under-Sampling:\n",
1064 | "\n",
1065 | "Build a sample dataset containing similar distribution of Normal Transactions and Fraudulent Transactions"
1066 | ]
1067 | },
1068 | {
1069 | "cell_type": "markdown",
1070 | "metadata": {
1071 | "id": "0wqv0N1dDhTT"
1072 | },
1073 | "source": [
1074 | "Number of Fraudulent Transactions --> 492"
1075 | ]
1076 | },
1077 | {
1078 | "cell_type": "code",
1079 | "metadata": {
1080 | "id": "kMhRAKtCDS94"
1081 | },
1082 | "source": [
1083 | "legit_sample = legit.sample(n=492)"
1084 | ],
1085 | "execution_count": 13,
1086 | "outputs": []
1087 | },
1088 | {
1089 | "cell_type": "markdown",
1090 | "metadata": {
1091 | "id": "2zLLgbr_DlEs"
1092 | },
1093 | "source": [
1094 | "Concatenating two DataFrames:"
1095 | ]
1096 | },
1097 | {
1098 | "cell_type": "code",
1099 | "metadata": {
1100 | "id": "YGi0YqQWDjch"
1101 | },
1102 | "source": [
1103 | "new_dataset = pd.concat([legit_sample, fraud], axis=0)"
1104 | ],
1105 | "execution_count": 14,
1106 | "outputs": []
1107 | },
1108 | {
1109 | "cell_type": "code",
1110 | "metadata": {
1111 | "colab": {
1112 | "base_uri": "https://localhost:8080/",
1113 | "height": 223
1114 | },
1115 | "id": "Lr0VHCwBDqs9",
1116 | "outputId": "6c2fb935-1570-4d04-8c57-dc62925ffa68"
1117 | },
1118 | "source": [
1119 | "new_dataset.head()"
1120 | ],
1121 | "execution_count": 15,
1122 | "outputs": [
1123 | {
1124 | "output_type": "execute_result",
1125 | "data": {
1126 | "text/html": [
1127 | "\n",
1128 | "\n",
1141 | "
\n",
1142 | " \n",
1143 | " \n",
1144 | " | \n",
1145 | " Time | \n",
1146 | " V1 | \n",
1147 | " V2 | \n",
1148 | " V3 | \n",
1149 | " V4 | \n",
1150 | " V5 | \n",
1151 | " V6 | \n",
1152 | " V7 | \n",
1153 | " V8 | \n",
1154 | " V9 | \n",
1155 | " V10 | \n",
1156 | " V11 | \n",
1157 | " V12 | \n",
1158 | " V13 | \n",
1159 | " V14 | \n",
1160 | " V15 | \n",
1161 | " V16 | \n",
1162 | " V17 | \n",
1163 | " V18 | \n",
1164 | " V19 | \n",
1165 | " V20 | \n",
1166 | " V21 | \n",
1167 | " V22 | \n",
1168 | " V23 | \n",
1169 | " V24 | \n",
1170 | " V25 | \n",
1171 | " V26 | \n",
1172 | " V27 | \n",
1173 | " V28 | \n",
1174 | " Amount | \n",
1175 | " Class | \n",
1176 | "
\n",
1177 | " \n",
1178 | " \n",
1179 | " \n",
1180 | " 135283 | \n",
1181 | " 81184.0 | \n",
1182 | " -1.692426 | \n",
1183 | " -1.426632 | \n",
1184 | " 2.048187 | \n",
1185 | " -0.103634 | \n",
1186 | " 0.598736 | \n",
1187 | " -1.825026 | \n",
1188 | " 0.131808 | \n",
1189 | " -0.271939 | \n",
1190 | " -1.366525 | \n",
1191 | " 0.321437 | \n",
1192 | " -0.314836 | \n",
1193 | " -0.277355 | \n",
1194 | " 0.019219 | \n",
1195 | " -0.045576 | \n",
1196 | " 0.312121 | \n",
1197 | " -1.389728 | \n",
1198 | " -0.362287 | \n",
1199 | " 1.182754 | \n",
1200 | " -1.886769 | \n",
1201 | " 0.172073 | \n",
1202 | " -0.165291 | \n",
1203 | " -0.585107 | \n",
1204 | " 0.503270 | \n",
1205 | " 0.826948 | \n",
1206 | " 0.039087 | \n",
1207 | " -0.682189 | \n",
1208 | " -0.146479 | \n",
1209 | " -0.060679 | \n",
1210 | " 139.89 | \n",
1211 | " 0 | \n",
1212 | "
\n",
1213 | " \n",
1214 | " 51737 | \n",
1215 | " 45096.0 | \n",
1216 | " 1.254523 | \n",
1217 | " 0.426688 | \n",
1218 | " -0.659489 | \n",
1219 | " 0.744471 | \n",
1220 | " 1.044157 | \n",
1221 | " 0.434640 | \n",
1222 | " 0.444509 | \n",
1223 | " -0.048640 | \n",
1224 | " -0.690491 | \n",
1225 | " 0.187445 | \n",
1226 | " 0.527436 | \n",
1227 | " 1.137836 | \n",
1228 | " 1.297485 | \n",
1229 | " 0.475704 | \n",
1230 | " 0.167112 | \n",
1231 | " 0.072254 | \n",
1232 | " -0.859530 | \n",
1233 | " -0.028722 | \n",
1234 | " 0.123554 | \n",
1235 | " -0.029020 | \n",
1236 | " 0.014457 | \n",
1237 | " 0.123478 | \n",
1238 | " -0.321232 | \n",
1239 | " -1.302410 | \n",
1240 | " 0.958048 | \n",
1241 | " -0.188910 | \n",
1242 | " 0.006375 | \n",
1243 | " -0.011336 | \n",
1244 | " 16.44 | \n",
1245 | " 0 | \n",
1246 | "
\n",
1247 | " \n",
1248 | " 207351 | \n",
1249 | " 136634.0 | \n",
1250 | " 1.578821 | \n",
1251 | " -1.037392 | \n",
1252 | " -0.700904 | \n",
1253 | " 0.442737 | \n",
1254 | " -0.871919 | \n",
1255 | " -0.767364 | \n",
1256 | " -0.020586 | \n",
1257 | " -0.181463 | \n",
1258 | " 1.334681 | \n",
1259 | " -0.364394 | \n",
1260 | " -0.915410 | \n",
1261 | " 0.579226 | \n",
1262 | " -0.170789 | \n",
1263 | " 0.004850 | \n",
1264 | " 0.022921 | \n",
1265 | " -0.126676 | \n",
1266 | " -0.207565 | \n",
1267 | " -0.637480 | \n",
1268 | " 0.289813 | \n",
1269 | " 0.204680 | \n",
1270 | " -0.276729 | \n",
1271 | " -1.100509 | \n",
1272 | " 0.257801 | \n",
1273 | " 0.012313 | \n",
1274 | " -0.580246 | \n",
1275 | " -0.438032 | \n",
1276 | " -0.041466 | \n",
1277 | " -0.001984 | \n",
1278 | " 225.96 | \n",
1279 | " 0 | \n",
1280 | "
\n",
1281 | " \n",
1282 | " 104157 | \n",
1283 | " 68934.0 | \n",
1284 | " -0.723808 | \n",
1285 | " 1.277781 | \n",
1286 | " 0.907896 | \n",
1287 | " 0.123921 | \n",
1288 | " -0.311881 | \n",
1289 | " -1.065993 | \n",
1290 | " 0.308144 | \n",
1291 | " 0.387264 | \n",
1292 | " -0.478737 | \n",
1293 | " -0.721150 | \n",
1294 | " -0.457502 | \n",
1295 | " -0.209393 | \n",
1296 | " -0.364916 | \n",
1297 | " -0.001098 | \n",
1298 | " 0.917757 | \n",
1299 | " 0.532673 | \n",
1300 | " 0.207107 | \n",
1301 | " -0.028286 | \n",
1302 | " -0.092203 | \n",
1303 | " -0.004759 | \n",
1304 | " -0.220555 | \n",
1305 | " -0.751067 | \n",
1306 | " 0.058320 | \n",
1307 | " 0.312616 | \n",
1308 | " -0.152713 | \n",
1309 | " 0.075810 | \n",
1310 | " 0.115830 | \n",
1311 | " 0.026851 | \n",
1312 | " 8.99 | \n",
1313 | " 0 | \n",
1314 | "
\n",
1315 | " \n",
1316 | " 180887 | \n",
1317 | " 124736.0 | \n",
1318 | " 0.990966 | \n",
1319 | " -2.717490 | \n",
1320 | " -3.215571 | \n",
1321 | " -0.290394 | \n",
1322 | " -0.665947 | \n",
1323 | " -1.638020 | \n",
1324 | " 1.248703 | \n",
1325 | " -0.755283 | \n",
1326 | " -0.826472 | \n",
1327 | " 0.680349 | \n",
1328 | " -1.059235 | \n",
1329 | " -1.720531 | \n",
1330 | " -1.996273 | \n",
1331 | " 0.994732 | \n",
1332 | " -0.185029 | \n",
1333 | " 0.199565 | \n",
1334 | " 0.721654 | \n",
1335 | " -1.448993 | \n",
1336 | " 0.502027 | \n",
1337 | " 1.161925 | \n",
1338 | " 0.863855 | \n",
1339 | " 0.964671 | \n",
1340 | " -0.800307 | \n",
1341 | " -0.000461 | \n",
1342 | " 0.443869 | \n",
1343 | " 0.242508 | \n",
1344 | " -0.232639 | \n",
1345 | " 0.018657 | \n",
1346 | " 685.10 | \n",
1347 | " 0 | \n",
1348 | "
\n",
1349 | " \n",
1350 | "
\n",
1351 | "
"
1352 | ],
1353 | "text/plain": [
1354 | " Time V1 V2 ... V28 Amount Class\n",
1355 | "135283 81184.0 -1.692426 -1.426632 ... -0.060679 139.89 0\n",
1356 | "51737 45096.0 1.254523 0.426688 ... -0.011336 16.44 0\n",
1357 | "207351 136634.0 1.578821 -1.037392 ... -0.001984 225.96 0\n",
1358 | "104157 68934.0 -0.723808 1.277781 ... 0.026851 8.99 0\n",
1359 | "180887 124736.0 0.990966 -2.717490 ... 0.018657 685.10 0\n",
1360 | "\n",
1361 | "[5 rows x 31 columns]"
1362 | ]
1363 | },
1364 | "metadata": {
1365 | "tags": []
1366 | },
1367 | "execution_count": 15
1368 | }
1369 | ]
1370 | },
1371 | {
1372 | "cell_type": "code",
1373 | "metadata": {
1374 | "colab": {
1375 | "base_uri": "https://localhost:8080/",
1376 | "height": 223
1377 | },
1378 | "id": "YdvEWT33DsCC",
1379 | "outputId": "6b6c7624-219a-40d7-9cdb-241754493097"
1380 | },
1381 | "source": [
1382 | "new_dataset.tail()"
1383 | ],
1384 | "execution_count": 16,
1385 | "outputs": [
1386 | {
1387 | "output_type": "execute_result",
1388 | "data": {
1389 | "text/html": [
1390 | "\n",
1391 | "\n",
1404 | "
\n",
1405 | " \n",
1406 | " \n",
1407 | " | \n",
1408 | " Time | \n",
1409 | " V1 | \n",
1410 | " V2 | \n",
1411 | " V3 | \n",
1412 | " V4 | \n",
1413 | " V5 | \n",
1414 | " V6 | \n",
1415 | " V7 | \n",
1416 | " V8 | \n",
1417 | " V9 | \n",
1418 | " V10 | \n",
1419 | " V11 | \n",
1420 | " V12 | \n",
1421 | " V13 | \n",
1422 | " V14 | \n",
1423 | " V15 | \n",
1424 | " V16 | \n",
1425 | " V17 | \n",
1426 | " V18 | \n",
1427 | " V19 | \n",
1428 | " V20 | \n",
1429 | " V21 | \n",
1430 | " V22 | \n",
1431 | " V23 | \n",
1432 | " V24 | \n",
1433 | " V25 | \n",
1434 | " V26 | \n",
1435 | " V27 | \n",
1436 | " V28 | \n",
1437 | " Amount | \n",
1438 | " Class | \n",
1439 | "
\n",
1440 | " \n",
1441 | " \n",
1442 | " \n",
1443 | " 279863 | \n",
1444 | " 169142.0 | \n",
1445 | " -1.927883 | \n",
1446 | " 1.125653 | \n",
1447 | " -4.518331 | \n",
1448 | " 1.749293 | \n",
1449 | " -1.566487 | \n",
1450 | " -2.010494 | \n",
1451 | " -0.882850 | \n",
1452 | " 0.697211 | \n",
1453 | " -2.064945 | \n",
1454 | " -5.587794 | \n",
1455 | " 2.115795 | \n",
1456 | " -5.417424 | \n",
1457 | " -1.235123 | \n",
1458 | " -6.665177 | \n",
1459 | " 0.401701 | \n",
1460 | " -2.897825 | \n",
1461 | " -4.570529 | \n",
1462 | " -1.315147 | \n",
1463 | " 0.391167 | \n",
1464 | " 1.252967 | \n",
1465 | " 0.778584 | \n",
1466 | " -0.319189 | \n",
1467 | " 0.639419 | \n",
1468 | " -0.294885 | \n",
1469 | " 0.537503 | \n",
1470 | " 0.788395 | \n",
1471 | " 0.292680 | \n",
1472 | " 0.147968 | \n",
1473 | " 390.00 | \n",
1474 | " 1 | \n",
1475 | "
\n",
1476 | " \n",
1477 | " 280143 | \n",
1478 | " 169347.0 | \n",
1479 | " 1.378559 | \n",
1480 | " 1.289381 | \n",
1481 | " -5.004247 | \n",
1482 | " 1.411850 | \n",
1483 | " 0.442581 | \n",
1484 | " -1.326536 | \n",
1485 | " -1.413170 | \n",
1486 | " 0.248525 | \n",
1487 | " -1.127396 | \n",
1488 | " -3.232153 | \n",
1489 | " 2.858466 | \n",
1490 | " -3.096915 | \n",
1491 | " -0.792532 | \n",
1492 | " -5.210141 | \n",
1493 | " -0.613803 | \n",
1494 | " -2.155297 | \n",
1495 | " -3.267116 | \n",
1496 | " -0.688505 | \n",
1497 | " 0.737657 | \n",
1498 | " 0.226138 | \n",
1499 | " 0.370612 | \n",
1500 | " 0.028234 | \n",
1501 | " -0.145640 | \n",
1502 | " -0.081049 | \n",
1503 | " 0.521875 | \n",
1504 | " 0.739467 | \n",
1505 | " 0.389152 | \n",
1506 | " 0.186637 | \n",
1507 | " 0.76 | \n",
1508 | " 1 | \n",
1509 | "
\n",
1510 | " \n",
1511 | " 280149 | \n",
1512 | " 169351.0 | \n",
1513 | " -0.676143 | \n",
1514 | " 1.126366 | \n",
1515 | " -2.213700 | \n",
1516 | " 0.468308 | \n",
1517 | " -1.120541 | \n",
1518 | " -0.003346 | \n",
1519 | " -2.234739 | \n",
1520 | " 1.210158 | \n",
1521 | " -0.652250 | \n",
1522 | " -3.463891 | \n",
1523 | " 1.794969 | \n",
1524 | " -2.775022 | \n",
1525 | " -0.418950 | \n",
1526 | " -4.057162 | \n",
1527 | " -0.712616 | \n",
1528 | " -1.603015 | \n",
1529 | " -5.035326 | \n",
1530 | " -0.507000 | \n",
1531 | " 0.266272 | \n",
1532 | " 0.247968 | \n",
1533 | " 0.751826 | \n",
1534 | " 0.834108 | \n",
1535 | " 0.190944 | \n",
1536 | " 0.032070 | \n",
1537 | " -0.739695 | \n",
1538 | " 0.471111 | \n",
1539 | " 0.385107 | \n",
1540 | " 0.194361 | \n",
1541 | " 77.89 | \n",
1542 | " 1 | \n",
1543 | "
\n",
1544 | " \n",
1545 | " 281144 | \n",
1546 | " 169966.0 | \n",
1547 | " -3.113832 | \n",
1548 | " 0.585864 | \n",
1549 | " -5.399730 | \n",
1550 | " 1.817092 | \n",
1551 | " -0.840618 | \n",
1552 | " -2.943548 | \n",
1553 | " -2.208002 | \n",
1554 | " 1.058733 | \n",
1555 | " -1.632333 | \n",
1556 | " -5.245984 | \n",
1557 | " 1.933520 | \n",
1558 | " -5.030465 | \n",
1559 | " -1.127455 | \n",
1560 | " -6.416628 | \n",
1561 | " 0.141237 | \n",
1562 | " -2.549498 | \n",
1563 | " -4.614717 | \n",
1564 | " -1.478138 | \n",
1565 | " -0.035480 | \n",
1566 | " 0.306271 | \n",
1567 | " 0.583276 | \n",
1568 | " -0.269209 | \n",
1569 | " -0.456108 | \n",
1570 | " -0.183659 | \n",
1571 | " -0.328168 | \n",
1572 | " 0.606116 | \n",
1573 | " 0.884876 | \n",
1574 | " -0.253700 | \n",
1575 | " 245.00 | \n",
1576 | " 1 | \n",
1577 | "
\n",
1578 | " \n",
1579 | " 281674 | \n",
1580 | " 170348.0 | \n",
1581 | " 1.991976 | \n",
1582 | " 0.158476 | \n",
1583 | " -2.583441 | \n",
1584 | " 0.408670 | \n",
1585 | " 1.151147 | \n",
1586 | " -0.096695 | \n",
1587 | " 0.223050 | \n",
1588 | " -0.068384 | \n",
1589 | " 0.577829 | \n",
1590 | " -0.888722 | \n",
1591 | " 0.491140 | \n",
1592 | " 0.728903 | \n",
1593 | " 0.380428 | \n",
1594 | " -1.948883 | \n",
1595 | " -0.832498 | \n",
1596 | " 0.519436 | \n",
1597 | " 0.903562 | \n",
1598 | " 1.197315 | \n",
1599 | " 0.593509 | \n",
1600 | " -0.017652 | \n",
1601 | " -0.164350 | \n",
1602 | " -0.295135 | \n",
1603 | " -0.072173 | \n",
1604 | " -0.450261 | \n",
1605 | " 0.313267 | \n",
1606 | " -0.289617 | \n",
1607 | " 0.002988 | \n",
1608 | " -0.015309 | \n",
1609 | " 42.53 | \n",
1610 | " 1 | \n",
1611 | "
\n",
1612 | " \n",
1613 | "
\n",
1614 | "
"
1615 | ],
1616 | "text/plain": [
1617 | " Time V1 V2 ... V28 Amount Class\n",
1618 | "279863 169142.0 -1.927883 1.125653 ... 0.147968 390.00 1\n",
1619 | "280143 169347.0 1.378559 1.289381 ... 0.186637 0.76 1\n",
1620 | "280149 169351.0 -0.676143 1.126366 ... 0.194361 77.89 1\n",
1621 | "281144 169966.0 -3.113832 0.585864 ... -0.253700 245.00 1\n",
1622 | "281674 170348.0 1.991976 0.158476 ... -0.015309 42.53 1\n",
1623 | "\n",
1624 | "[5 rows x 31 columns]"
1625 | ]
1626 | },
1627 | "metadata": {
1628 | "tags": []
1629 | },
1630 | "execution_count": 16
1631 | }
1632 | ]
1633 | },
1634 | {
1635 | "cell_type": "code",
1636 | "metadata": {
1637 | "colab": {
1638 | "base_uri": "https://localhost:8080/"
1639 | },
1640 | "id": "SJjyXuTHDuQr",
1641 | "outputId": "768d1286-ce01-413b-f297-4cb7c133b41c"
1642 | },
1643 | "source": [
1644 | "new_dataset['Class'].value_counts()"
1645 | ],
1646 | "execution_count": 17,
1647 | "outputs": [
1648 | {
1649 | "output_type": "execute_result",
1650 | "data": {
1651 | "text/plain": [
1652 | "1 492\n",
1653 | "0 492\n",
1654 | "Name: Class, dtype: int64"
1655 | ]
1656 | },
1657 | "metadata": {
1658 | "tags": []
1659 | },
1660 | "execution_count": 17
1661 | }
1662 | ]
1663 | },
1664 | {
1665 | "cell_type": "code",
1666 | "metadata": {
1667 | "colab": {
1668 | "base_uri": "https://localhost:8080/",
1669 | "height": 162
1670 | },
1671 | "id": "Lchuy7YfDv3R",
1672 | "outputId": "40478540-d57c-4ee0-dc6f-32740b8a8890"
1673 | },
1674 | "source": [
1675 | "new_dataset.groupby('Class').mean()"
1676 | ],
1677 | "execution_count": 18,
1678 | "outputs": [
1679 | {
1680 | "output_type": "execute_result",
1681 | "data": {
1682 | "text/html": [
1683 | "\n",
1684 | "\n",
1697 | "
\n",
1698 | " \n",
1699 | " \n",
1700 | " | \n",
1701 | " Time | \n",
1702 | " V1 | \n",
1703 | " V2 | \n",
1704 | " V3 | \n",
1705 | " V4 | \n",
1706 | " V5 | \n",
1707 | " V6 | \n",
1708 | " V7 | \n",
1709 | " V8 | \n",
1710 | " V9 | \n",
1711 | " V10 | \n",
1712 | " V11 | \n",
1713 | " V12 | \n",
1714 | " V13 | \n",
1715 | " V14 | \n",
1716 | " V15 | \n",
1717 | " V16 | \n",
1718 | " V17 | \n",
1719 | " V18 | \n",
1720 | " V19 | \n",
1721 | " V20 | \n",
1722 | " V21 | \n",
1723 | " V22 | \n",
1724 | " V23 | \n",
1725 | " V24 | \n",
1726 | " V25 | \n",
1727 | " V26 | \n",
1728 | " V27 | \n",
1729 | " V28 | \n",
1730 | " Amount | \n",
1731 | "
\n",
1732 | " \n",
1733 | " Class | \n",
1734 | " | \n",
1735 | " | \n",
1736 | " | \n",
1737 | " | \n",
1738 | " | \n",
1739 | " | \n",
1740 | " | \n",
1741 | " | \n",
1742 | " | \n",
1743 | " | \n",
1744 | " | \n",
1745 | " | \n",
1746 | " | \n",
1747 | " | \n",
1748 | " | \n",
1749 | " | \n",
1750 | " | \n",
1751 | " | \n",
1752 | " | \n",
1753 | " | \n",
1754 | " | \n",
1755 | " | \n",
1756 | " | \n",
1757 | " | \n",
1758 | " | \n",
1759 | " | \n",
1760 | " | \n",
1761 | " | \n",
1762 | " | \n",
1763 | " | \n",
1764 | "
\n",
1765 | " \n",
1766 | " \n",
1767 | " \n",
1768 | " 0 | \n",
1769 | " 91689.526423 | \n",
1770 | " -0.004396 | \n",
1771 | " -0.072772 | \n",
1772 | " 0.078174 | \n",
1773 | " -0.056607 | \n",
1774 | " -0.036143 | \n",
1775 | " 0.002296 | \n",
1776 | " 0.015672 | \n",
1777 | " 0.036220 | \n",
1778 | " 0.018225 | \n",
1779 | " -0.063194 | \n",
1780 | " 0.008175 | \n",
1781 | " 0.052299 | \n",
1782 | " -0.009125 | \n",
1783 | " 0.007462 | \n",
1784 | " -0.023142 | \n",
1785 | " -0.002752 | \n",
1786 | " 0.037757 | \n",
1787 | " -0.050499 | \n",
1788 | " -0.029256 | \n",
1789 | " 0.057827 | \n",
1790 | " 0.007346 | \n",
1791 | " 0.011248 | \n",
1792 | " -0.006803 | \n",
1793 | " -0.006657 | \n",
1794 | " -0.005947 | \n",
1795 | " 0.017890 | \n",
1796 | " 0.000840 | \n",
1797 | " 0.029621 | \n",
1798 | " 104.587378 | \n",
1799 | "
\n",
1800 | " \n",
1801 | " 1 | \n",
1802 | " 80746.806911 | \n",
1803 | " -4.771948 | \n",
1804 | " 3.623778 | \n",
1805 | " -7.033281 | \n",
1806 | " 4.542029 | \n",
1807 | " -3.151225 | \n",
1808 | " -1.397737 | \n",
1809 | " -5.568731 | \n",
1810 | " 0.570636 | \n",
1811 | " -2.581123 | \n",
1812 | " -5.676883 | \n",
1813 | " 3.800173 | \n",
1814 | " -6.259393 | \n",
1815 | " -0.109334 | \n",
1816 | " -6.971723 | \n",
1817 | " -0.092929 | \n",
1818 | " -4.139946 | \n",
1819 | " -6.665836 | \n",
1820 | " -2.246308 | \n",
1821 | " 0.680659 | \n",
1822 | " 0.372319 | \n",
1823 | " 0.713588 | \n",
1824 | " 0.014049 | \n",
1825 | " -0.040308 | \n",
1826 | " -0.105130 | \n",
1827 | " 0.041449 | \n",
1828 | " 0.051648 | \n",
1829 | " 0.170575 | \n",
1830 | " 0.075667 | \n",
1831 | " 122.211321 | \n",
1832 | "
\n",
1833 | " \n",
1834 | "
\n",
1835 | "
"
1836 | ],
1837 | "text/plain": [
1838 | " Time V1 V2 ... V27 V28 Amount\n",
1839 | "Class ... \n",
1840 | "0 91689.526423 -0.004396 -0.072772 ... 0.000840 0.029621 104.587378\n",
1841 | "1 80746.806911 -4.771948 3.623778 ... 0.170575 0.075667 122.211321\n",
1842 | "\n",
1843 | "[2 rows x 30 columns]"
1844 | ]
1845 | },
1846 | "metadata": {
1847 | "tags": []
1848 | },
1849 | "execution_count": 18
1850 | }
1851 | ]
1852 | },
1853 | {
1854 | "cell_type": "markdown",
1855 | "metadata": {
1856 | "id": "jVdxPud4DzPe"
1857 | },
1858 | "source": [
1859 | "Splitting the data into Features and Targets"
1860 | ]
1861 | },
1862 | {
1863 | "cell_type": "code",
1864 | "metadata": {
1865 | "id": "uF10Oe5RDxlX"
1866 | },
1867 | "source": [
1868 | "X = new_dataset.drop(columns='Class', axis=1)\n",
1869 | "Y = new_dataset['Class']"
1870 | ],
1871 | "execution_count": 19,
1872 | "outputs": []
1873 | },
1874 | {
1875 | "cell_type": "code",
1876 | "metadata": {
1877 | "colab": {
1878 | "base_uri": "https://localhost:8080/"
1879 | },
1880 | "id": "rj3oUiHMD10n",
1881 | "outputId": "fe729953-b3c0-4c96-f475-1db4d2677b0d"
1882 | },
1883 | "source": [
1884 | "print(X)"
1885 | ],
1886 | "execution_count": 20,
1887 | "outputs": [
1888 | {
1889 | "output_type": "stream",
1890 | "text": [
1891 | " Time V1 V2 ... V27 V28 Amount\n",
1892 | "135283 81184.0 -1.692426 -1.426632 ... -0.146479 -0.060679 139.89\n",
1893 | "51737 45096.0 1.254523 0.426688 ... 0.006375 -0.011336 16.44\n",
1894 | "207351 136634.0 1.578821 -1.037392 ... -0.041466 -0.001984 225.96\n",
1895 | "104157 68934.0 -0.723808 1.277781 ... 0.115830 0.026851 8.99\n",
1896 | "180887 124736.0 0.990966 -2.717490 ... -0.232639 0.018657 685.10\n",
1897 | "... ... ... ... ... ... ... ...\n",
1898 | "279863 169142.0 -1.927883 1.125653 ... 0.292680 0.147968 390.00\n",
1899 | "280143 169347.0 1.378559 1.289381 ... 0.389152 0.186637 0.76\n",
1900 | "280149 169351.0 -0.676143 1.126366 ... 0.385107 0.194361 77.89\n",
1901 | "281144 169966.0 -3.113832 0.585864 ... 0.884876 -0.253700 245.00\n",
1902 | "281674 170348.0 1.991976 0.158476 ... 0.002988 -0.015309 42.53\n",
1903 | "\n",
1904 | "[984 rows x 30 columns]\n"
1905 | ],
1906 | "name": "stdout"
1907 | }
1908 | ]
1909 | },
1910 | {
1911 | "cell_type": "code",
1912 | "metadata": {
1913 | "colab": {
1914 | "base_uri": "https://localhost:8080/"
1915 | },
1916 | "id": "m5GvzTNJD3eE",
1917 | "outputId": "ae12650f-76ef-4248-fb90-10bfd1dfe1d1"
1918 | },
1919 | "source": [
1920 | "print(Y)"
1921 | ],
1922 | "execution_count": 21,
1923 | "outputs": [
1924 | {
1925 | "output_type": "stream",
1926 | "text": [
1927 | "135283 0\n",
1928 | "51737 0\n",
1929 | "207351 0\n",
1930 | "104157 0\n",
1931 | "180887 0\n",
1932 | " ..\n",
1933 | "279863 1\n",
1934 | "280143 1\n",
1935 | "280149 1\n",
1936 | "281144 1\n",
1937 | "281674 1\n",
1938 | "Name: Class, Length: 984, dtype: int64\n"
1939 | ],
1940 | "name": "stdout"
1941 | }
1942 | ]
1943 | },
1944 | {
1945 | "cell_type": "markdown",
1946 | "metadata": {
1947 | "id": "L3nrBewjD_Pb"
1948 | },
1949 | "source": [
1950 | "Train and Testing Split"
1951 | ]
1952 | },
1953 | {
1954 | "cell_type": "code",
1955 | "metadata": {
1956 | "id": "UDvZRJoyD4xQ"
1957 | },
1958 | "source": [
1959 | "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)"
1960 | ],
1961 | "execution_count": 22,
1962 | "outputs": []
1963 | },
1964 | {
1965 | "cell_type": "code",
1966 | "metadata": {
1967 | "colab": {
1968 | "base_uri": "https://localhost:8080/"
1969 | },
1970 | "id": "f0g6WjruEBJl",
1971 | "outputId": "be7e97cc-0c5d-43c8-ef3c-b336eccbd0ec"
1972 | },
1973 | "source": [
1974 | "print(X.shape, X_train.shape, X_test.shape)"
1975 | ],
1976 | "execution_count": 23,
1977 | "outputs": [
1978 | {
1979 | "output_type": "stream",
1980 | "text": [
1981 | "(984, 30) (787, 30) (197, 30)\n"
1982 | ],
1983 | "name": "stdout"
1984 | }
1985 | ]
1986 | },
1987 | {
1988 | "cell_type": "markdown",
1989 | "metadata": {
1990 | "id": "_NumLl3qEEX9"
1991 | },
1992 | "source": [
1993 | "Logistic Regression Model Training"
1994 | ]
1995 | },
1996 | {
1997 | "cell_type": "code",
1998 | "metadata": {
1999 | "id": "ASdVtR5fECNj"
2000 | },
2001 | "source": [
2002 | "model = LogisticRegression()"
2003 | ],
2004 | "execution_count": 24,
2005 | "outputs": []
2006 | },
2007 | {
2008 | "cell_type": "code",
2009 | "metadata": {
2010 | "colab": {
2011 | "base_uri": "https://localhost:8080/"
2012 | },
2013 | "id": "poA9v678EGtW",
2014 | "outputId": "c41e0334-d44d-4705-9990-9b2beace01d9"
2015 | },
2016 | "source": [
2017 | "#training the Logistic Regression Model with Training Data\n",
2018 | "model.fit(X_train, Y_train)"
2019 | ],
2020 | "execution_count": 25,
2021 | "outputs": [
2022 | {
2023 | "output_type": "execute_result",
2024 | "data": {
2025 | "text/plain": [
2026 | "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
2027 | " intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
2028 | " multi_class='auto', n_jobs=None, penalty='l2',\n",
2029 | " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
2030 | " warm_start=False)"
2031 | ]
2032 | },
2033 | "metadata": {
2034 | "tags": []
2035 | },
2036 | "execution_count": 25
2037 | }
2038 | ]
2039 | },
2040 | {
2041 | "cell_type": "markdown",
2042 | "metadata": {
2043 | "id": "YilGdZ-qELK8"
2044 | },
2045 | "source": [
2046 | "Model Evaluation: Accuracy Score"
2047 | ]
2048 | },
2049 | {
2050 | "cell_type": "code",
2051 | "metadata": {
2052 | "id": "W3yCpyPZEIqv"
2053 | },
2054 | "source": [
2055 | "#accuracy on training data\n",
2056 | "X_train_prediction = model.predict(X_train)\n",
2057 | "training_data_accuracy = accuracy_score(X_train_prediction, Y_train)"
2058 | ],
2059 | "execution_count": 26,
2060 | "outputs": []
2061 | },
2062 | {
2063 | "cell_type": "code",
2064 | "metadata": {
2065 | "colab": {
2066 | "base_uri": "https://localhost:8080/"
2067 | },
2068 | "id": "NheDGoa7EPFA",
2069 | "outputId": "9970a7fb-c119-40af-a850-befa902c709e"
2070 | },
2071 | "source": [
2072 | "print('Accuracy on Training Data : ', training_data_accuracy)"
2073 | ],
2074 | "execution_count": 27,
2075 | "outputs": [
2076 | {
2077 | "output_type": "stream",
2078 | "text": [
2079 | "Accuracy on Training Data : 0.9364675984752223\n"
2080 | ],
2081 | "name": "stdout"
2082 | }
2083 | ]
2084 | },
2085 | {
2086 | "cell_type": "code",
2087 | "metadata": {
2088 | "id": "eUXUuaE5ERM2"
2089 | },
2090 | "source": [
2091 | "#accuracy on test data\n",
2092 | "X_test_prediction = model.predict(X_test)\n",
2093 | "test_data_accuracy = accuracy_score(X_test_prediction, Y_test)"
2094 | ],
2095 | "execution_count": 28,
2096 | "outputs": []
2097 | },
2098 | {
2099 | "cell_type": "code",
2100 | "metadata": {
2101 | "colab": {
2102 | "base_uri": "https://localhost:8080/"
2103 | },
2104 | "id": "CYtrUlRMEUjM",
2105 | "outputId": "3b1b4299-394e-4538-d39b-a88e4a88f70c"
2106 | },
2107 | "source": [
2108 | "print('Accuracy score on Test Data : ', test_data_accuracy)"
2109 | ],
2110 | "execution_count": 29,
2111 | "outputs": [
2112 | {
2113 | "output_type": "stream",
2114 | "text": [
2115 | "Accuracy score on Test Data : 0.9289340101522843\n"
2116 | ],
2117 | "name": "stdout"
2118 | }
2119 | ]
2120 | },
2121 | {
2122 | "cell_type": "code",
2123 | "metadata": {
2124 | "id": "QajzXv5GEWmj"
2125 | },
2126 | "source": [
2127 | ""
2128 | ],
2129 | "execution_count": null,
2130 | "outputs": []
2131 | }
2132 | ]
2133 | }
--------------------------------------------------------------------------------