├── Movie_Genre_Classification.ipynb
├── README.md
├── SPAM_SMS_DETECTION.ipynb
└── credit_card_fraud_Detection.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # CodeWay-ML-Internship
2 |
3 | Welcome to the repository for my machine learning internship at Codeway!
4 |
5 | Task-1: MOVIE GENRE CLASSIFICATION
6 | -Creating a machine learning model that can predict the genre of a
7 | movie based on its plot summary or other textual information. We
8 | can use techniques like TF-IDF or word embeddings with classifiers
9 | such as Naive Bayes, Logistic Regression, or Support Vector
10 | Machines.
11 |
12 | DATA SET LINK: www.kaggle.com/datasets/hijest/genre-classification-dataset-imdb
13 |
14 | Task-2: CREDIT CARD FRAUD DETECTION
15 | -Building a model to detect fraudulent credit card transactions. Use a
16 | dataset containing information about credit card transactions, and
17 | experiment with algorithms like Logistic Regression, Decision Trees,
18 | or Random Forests to classify transactions as fraudulent or legitimate.
19 |
20 | DATA SET LINK: www.kaggle.com/datasets/kartik2112/fraud-detection
21 |
22 | Task-3: CUSTOMER CHURN PREDICTION
23 | -Developing a model to predict customer churn for a subscription- based
24 | service or business. Use historical customer data, including features like
25 | usage behavior and customer demographics, and try algorithms like
26 | Logistic Regression, Random Forests, or Gradient Boosting to predict
27 | churn.
28 |
29 | DATA SET LINK: www.kaggle.com/datasets/shantanudhakadd/bank-customer-churn-prediction
30 |
31 | Task-4: SPAM SMS DETECTION
32 | -Building an AI model that can classify SMS messages as spam or legitimate.
33 | Use techniques like TF-IDF or word embeddings with classifiers like
34 | Naive Bayes, Logistic Regression, or Support Vector Machines to identify spam messages.
35 |
36 | DATA SET LINK: www.kaggle.com/datasets/uciml/sms-spam-collection-dataset
37 |
38 | Task-5: HANDWRITTEN TEXT GENERATION
39 | -Implementing a character-level recurrent neural network (RNN) to generate
40 | handwritten-like text. Train the model on a dataset of handwritten text
41 | examples, and let it generate new text based on the learned patterns.
42 |
43 | DATA SET LINK: www.paperswithcode.com/dataset/deepwriting
44 |
45 | DATA SET LINK: www.paperswithcode.com/dataset/iam
46 |
--------------------------------------------------------------------------------
/credit_card_fraud_Detection.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "authorship_tag": "ABX9TyMbEY1vks5Dwg9BIzeZIV30",
8 | "include_colab_link": true
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "language_info": {
15 | "name": "python"
16 | }
17 | },
18 | "cells": [
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {
22 | "id": "view-in-github",
23 | "colab_type": "text"
24 | },
25 | "source": [
26 | " "
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 36,
32 | "metadata": {
33 | "id": "nkdTwCvQYLU0"
34 | },
35 | "outputs": [],
36 | "source": [
37 | "import numpy as np\n",
38 | "import pandas as pd\n",
39 | "import matplotlib.pyplot as plt\n",
40 | "import seaborn as sns\n",
41 | "import plotly.express as px"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "source": [
47 | "from google.colab import drive\n",
48 | "drive.mount('/content/drive')"
49 | ],
50 | "metadata": {
51 | "id": "4BtsOfP4L2Gd",
52 | "colab": {
53 | "base_uri": "https://localhost:8080/"
54 | },
55 | "outputId": "1775b664-0d0c-4acc-c572-c6c8246808e8"
56 | },
57 | "execution_count": 37,
58 | "outputs": [
59 | {
60 | "output_type": "stream",
61 | "name": "stdout",
62 | "text": [
63 | "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
64 | ]
65 | }
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "source": [
71 | "raw_data=pd.read_csv(\"/content/drive/MyDrive/FML_DATA_SETS/credit card fraud Detection /PS_20174392719_1491204439457_log.csv\")\n",
72 | "raw_data.head()"
73 | ],
74 | "metadata": {
75 | "id": "tFf6NpkCL3Uv",
76 | "colab": {
77 | "base_uri": "https://localhost:8080/",
78 | "height": 206
79 | },
80 | "outputId": "d64fcb4a-926b-46f3-ce47-a7fc3e9091fc"
81 | },
82 | "execution_count": 38,
83 | "outputs": [
84 | {
85 | "output_type": "execute_result",
86 | "data": {
87 | "text/plain": [
88 | " step type amount nameOrig oldbalanceOrg newbalanceOrig \\\n",
89 | "0 1 PAYMENT 9839.64 C1231006815 170136.0 160296.36 \n",
90 | "1 1 PAYMENT 1864.28 C1666544295 21249.0 19384.72 \n",
91 | "2 1 TRANSFER 181.00 C1305486145 181.0 0.00 \n",
92 | "3 1 CASH_OUT 181.00 C840083671 181.0 0.00 \n",
93 | "4 1 PAYMENT 11668.14 C2048537720 41554.0 29885.86 \n",
94 | "\n",
95 | " nameDest oldbalanceDest newbalanceDest isFraud isFlaggedFraud \n",
96 | "0 M1979787155 0.0 0.0 0 0 \n",
97 | "1 M2044282225 0.0 0.0 0 0 \n",
98 | "2 C553264065 0.0 0.0 1 0 \n",
99 | "3 C38997010 21182.0 0.0 1 0 \n",
100 | "4 M1230701703 0.0 0.0 0 0 "
101 | ],
102 | "text/html": [
103 | "\n",
104 | "
\n",
105 | "
\n",
106 | "\n",
119 | "
\n",
120 | " \n",
121 | " \n",
122 | " \n",
123 | " step \n",
124 | " type \n",
125 | " amount \n",
126 | " nameOrig \n",
127 | " oldbalanceOrg \n",
128 | " newbalanceOrig \n",
129 | " nameDest \n",
130 | " oldbalanceDest \n",
131 | " newbalanceDest \n",
132 | " isFraud \n",
133 | " isFlaggedFraud \n",
134 | " \n",
135 | " \n",
136 | " \n",
137 | " \n",
138 | " 0 \n",
139 | " 1 \n",
140 | " PAYMENT \n",
141 | " 9839.64 \n",
142 | " C1231006815 \n",
143 | " 170136.0 \n",
144 | " 160296.36 \n",
145 | " M1979787155 \n",
146 | " 0.0 \n",
147 | " 0.0 \n",
148 | " 0 \n",
149 | " 0 \n",
150 | " \n",
151 | " \n",
152 | " 1 \n",
153 | " 1 \n",
154 | " PAYMENT \n",
155 | " 1864.28 \n",
156 | " C1666544295 \n",
157 | " 21249.0 \n",
158 | " 19384.72 \n",
159 | " M2044282225 \n",
160 | " 0.0 \n",
161 | " 0.0 \n",
162 | " 0 \n",
163 | " 0 \n",
164 | " \n",
165 | " \n",
166 | " 2 \n",
167 | " 1 \n",
168 | " TRANSFER \n",
169 | " 181.00 \n",
170 | " C1305486145 \n",
171 | " 181.0 \n",
172 | " 0.00 \n",
173 | " C553264065 \n",
174 | " 0.0 \n",
175 | " 0.0 \n",
176 | " 1 \n",
177 | " 0 \n",
178 | " \n",
179 | " \n",
180 | " 3 \n",
181 | " 1 \n",
182 | " CASH_OUT \n",
183 | " 181.00 \n",
184 | " C840083671 \n",
185 | " 181.0 \n",
186 | " 0.00 \n",
187 | " C38997010 \n",
188 | " 21182.0 \n",
189 | " 0.0 \n",
190 | " 1 \n",
191 | " 0 \n",
192 | " \n",
193 | " \n",
194 | " 4 \n",
195 | " 1 \n",
196 | " PAYMENT \n",
197 | " 11668.14 \n",
198 | " C2048537720 \n",
199 | " 41554.0 \n",
200 | " 29885.86 \n",
201 | " M1230701703 \n",
202 | " 0.0 \n",
203 | " 0.0 \n",
204 | " 0 \n",
205 | " 0 \n",
206 | " \n",
207 | " \n",
208 | "
\n",
209 | "
\n",
210 | "
\n",
417 | "
\n"
418 | ]
419 | },
420 | "metadata": {},
421 | "execution_count": 38
422 | }
423 | ]
424 | },
425 | {
426 | "cell_type": "code",
427 | "source": [
428 | "data=raw_data.iloc[:2620]"
429 | ],
430 | "metadata": {
431 | "id": "-8jzuXzEMf3n"
432 | },
433 | "execution_count": 39,
434 | "outputs": []
435 | },
436 | {
437 | "cell_type": "code",
438 | "source": [
439 | "data.shape"
440 | ],
441 | "metadata": {
442 | "colab": {
443 | "base_uri": "https://localhost:8080/"
444 | },
445 | "id": "8S3vS7h8MuFR",
446 | "outputId": "f8231664-bc1b-4a4c-85df-2fbbc3d8542d"
447 | },
448 | "execution_count": 40,
449 | "outputs": [
450 | {
451 | "output_type": "execute_result",
452 | "data": {
453 | "text/plain": [
454 | "(2620, 11)"
455 | ]
456 | },
457 | "metadata": {},
458 | "execution_count": 40
459 | }
460 | ]
461 | },
462 | {
463 | "cell_type": "code",
464 | "source": [
465 | "data.describe()"
466 | ],
467 | "metadata": {
468 | "colab": {
469 | "base_uri": "https://localhost:8080/",
470 | "height": 300
471 | },
472 | "id": "ByZFhuUbM00B",
473 | "outputId": "13ba0028-452f-4581-ef59-698e185af00b"
474 | },
475 | "execution_count": 41,
476 | "outputs": [
477 | {
478 | "output_type": "execute_result",
479 | "data": {
480 | "text/plain": [
481 | " step amount oldbalanceOrg newbalanceOrig oldbalanceDest \\\n",
482 | "count 2620.0 2.620000e+03 2.620000e+03 2.620000e+03 2.620000e+03 \n",
483 | "mean 1.0 1.055404e+05 8.207836e+05 8.398497e+05 7.024779e+05 \n",
484 | "std 0.0 2.468862e+05 1.936319e+06 1.980422e+06 2.143229e+06 \n",
485 | "min 1.0 6.420000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
486 | "25% 1.0 3.915938e+03 9.150250e+01 0.000000e+00 0.000000e+00 \n",
487 | "50% 1.0 9.910020e+03 2.211219e+04 1.386267e+04 0.000000e+00 \n",
488 | "75% 1.0 1.146133e+05 2.011165e+05 2.015433e+05 3.560640e+05 \n",
489 | "max 1.0 3.776389e+06 1.010284e+07 1.024625e+07 1.951612e+07 \n",
490 | "\n",
491 | " newbalanceDest isFraud isFlaggedFraud \n",
492 | "count 2.620000e+03 2620.000000 2620.0 \n",
493 | "mean 9.559343e+05 0.006107 0.0 \n",
494 | "std 2.860721e+06 0.077922 0.0 \n",
495 | "min 0.000000e+00 0.000000 0.0 \n",
496 | "25% 0.000000e+00 0.000000 0.0 \n",
497 | "50% 0.000000e+00 0.000000 0.0 \n",
498 | "75% 2.650924e+05 0.000000 0.0 \n",
499 | "max 1.916920e+07 1.000000 0.0 "
500 | ],
501 | "text/html": [
502 | "\n",
503 | " \n",
504 | "
\n",
505 | "\n",
518 | "
\n",
519 | " \n",
520 | " \n",
521 | " \n",
522 | " step \n",
523 | " amount \n",
524 | " oldbalanceOrg \n",
525 | " newbalanceOrig \n",
526 | " oldbalanceDest \n",
527 | " newbalanceDest \n",
528 | " isFraud \n",
529 | " isFlaggedFraud \n",
530 | " \n",
531 | " \n",
532 | " \n",
533 | " \n",
534 | " count \n",
535 | " 2620.0 \n",
536 | " 2.620000e+03 \n",
537 | " 2.620000e+03 \n",
538 | " 2.620000e+03 \n",
539 | " 2.620000e+03 \n",
540 | " 2.620000e+03 \n",
541 | " 2620.000000 \n",
542 | " 2620.0 \n",
543 | " \n",
544 | " \n",
545 | " mean \n",
546 | " 1.0 \n",
547 | " 1.055404e+05 \n",
548 | " 8.207836e+05 \n",
549 | " 8.398497e+05 \n",
550 | " 7.024779e+05 \n",
551 | " 9.559343e+05 \n",
552 | " 0.006107 \n",
553 | " 0.0 \n",
554 | " \n",
555 | " \n",
556 | " std \n",
557 | " 0.0 \n",
558 | " 2.468862e+05 \n",
559 | " 1.936319e+06 \n",
560 | " 1.980422e+06 \n",
561 | " 2.143229e+06 \n",
562 | " 2.860721e+06 \n",
563 | " 0.077922 \n",
564 | " 0.0 \n",
565 | " \n",
566 | " \n",
567 | " min \n",
568 | " 1.0 \n",
569 | " 6.420000e+00 \n",
570 | " 0.000000e+00 \n",
571 | " 0.000000e+00 \n",
572 | " 0.000000e+00 \n",
573 | " 0.000000e+00 \n",
574 | " 0.000000 \n",
575 | " 0.0 \n",
576 | " \n",
577 | " \n",
578 | " 25% \n",
579 | " 1.0 \n",
580 | " 3.915938e+03 \n",
581 | " 9.150250e+01 \n",
582 | " 0.000000e+00 \n",
583 | " 0.000000e+00 \n",
584 | " 0.000000e+00 \n",
585 | " 0.000000 \n",
586 | " 0.0 \n",
587 | " \n",
588 | " \n",
589 | " 50% \n",
590 | " 1.0 \n",
591 | " 9.910020e+03 \n",
592 | " 2.211219e+04 \n",
593 | " 1.386267e+04 \n",
594 | " 0.000000e+00 \n",
595 | " 0.000000e+00 \n",
596 | " 0.000000 \n",
597 | " 0.0 \n",
598 | " \n",
599 | " \n",
600 | " 75% \n",
601 | " 1.0 \n",
602 | " 1.146133e+05 \n",
603 | " 2.011165e+05 \n",
604 | " 2.015433e+05 \n",
605 | " 3.560640e+05 \n",
606 | " 2.650924e+05 \n",
607 | " 0.000000 \n",
608 | " 0.0 \n",
609 | " \n",
610 | " \n",
611 | " max \n",
612 | " 1.0 \n",
613 | " 3.776389e+06 \n",
614 | " 1.010284e+07 \n",
615 | " 1.024625e+07 \n",
616 | " 1.951612e+07 \n",
617 | " 1.916920e+07 \n",
618 | " 1.000000 \n",
619 | " 0.0 \n",
620 | " \n",
621 | " \n",
622 | "
\n",
623 | "
\n",
624 | "
\n",
831 | "
\n"
832 | ]
833 | },
834 | "metadata": {},
835 | "execution_count": 41
836 | }
837 | ]
838 | },
839 | {
840 | "cell_type": "code",
841 | "source": [
842 | "data['isFraud'].value_counts()"
843 | ],
844 | "metadata": {
845 | "id": "Nb8OmICVNCE7",
846 | "colab": {
847 | "base_uri": "https://localhost:8080/"
848 | },
849 | "outputId": "df1a6a72-35dd-437a-f684-5908435d3abf"
850 | },
851 | "execution_count": 42,
852 | "outputs": [
853 | {
854 | "output_type": "execute_result",
855 | "data": {
856 | "text/plain": [
857 | "0 2604\n",
858 | "1 16\n",
859 | "Name: isFraud, dtype: int64"
860 | ]
861 | },
862 | "metadata": {},
863 | "execution_count": 42
864 | }
865 | ]
866 | },
867 | {
868 | "cell_type": "code",
869 | "source": [
870 | "data.drop(['nameOrig', 'nameDest'], axis=1, inplace=True)\n"
871 | ],
872 | "metadata": {
873 | "colab": {
874 | "base_uri": "https://localhost:8080/"
875 | },
876 | "id": "8Qb9m-A6ORB_",
877 | "outputId": "251e7470-8135-4c35-c186-dd100e303d9f"
878 | },
879 | "execution_count": 43,
880 | "outputs": [
881 | {
882 | "output_type": "stream",
883 | "name": "stderr",
884 | "text": [
885 | ":1: SettingWithCopyWarning:\n",
886 | "\n",
887 | "\n",
888 | "A value is trying to be set on a copy of a slice from a DataFrame\n",
889 | "\n",
890 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
891 | "\n"
892 | ]
893 | }
894 | ]
895 | },
896 | {
897 | "cell_type": "code",
898 | "source": [
899 | "data.head()"
900 | ],
901 | "metadata": {
902 | "colab": {
903 | "base_uri": "https://localhost:8080/",
904 | "height": 206
905 | },
906 | "id": "2CE77zbVO5LS",
907 | "outputId": "ae3b3629-dc0b-4138-8f25-94dcae5b08ca"
908 | },
909 | "execution_count": 44,
910 | "outputs": [
911 | {
912 | "output_type": "execute_result",
913 | "data": {
914 | "text/plain": [
915 | " step type amount oldbalanceOrg newbalanceOrig oldbalanceDest \\\n",
916 | "0 1 PAYMENT 9839.64 170136.0 160296.36 0.0 \n",
917 | "1 1 PAYMENT 1864.28 21249.0 19384.72 0.0 \n",
918 | "2 1 TRANSFER 181.00 181.0 0.00 0.0 \n",
919 | "3 1 CASH_OUT 181.00 181.0 0.00 21182.0 \n",
920 | "4 1 PAYMENT 11668.14 41554.0 29885.86 0.0 \n",
921 | "\n",
922 | " newbalanceDest isFraud isFlaggedFraud \n",
923 | "0 0.0 0 0 \n",
924 | "1 0.0 0 0 \n",
925 | "2 0.0 1 0 \n",
926 | "3 0.0 1 0 \n",
927 | "4 0.0 0 0 "
928 | ],
929 | "text/html": [
930 | "\n",
931 | " \n",
932 | "
\n",
933 | "\n",
946 | "
\n",
947 | " \n",
948 | " \n",
949 | " \n",
950 | " step \n",
951 | " type \n",
952 | " amount \n",
953 | " oldbalanceOrg \n",
954 | " newbalanceOrig \n",
955 | " oldbalanceDest \n",
956 | " newbalanceDest \n",
957 | " isFraud \n",
958 | " isFlaggedFraud \n",
959 | " \n",
960 | " \n",
961 | " \n",
962 | " \n",
963 | " 0 \n",
964 | " 1 \n",
965 | " PAYMENT \n",
966 | " 9839.64 \n",
967 | " 170136.0 \n",
968 | " 160296.36 \n",
969 | " 0.0 \n",
970 | " 0.0 \n",
971 | " 0 \n",
972 | " 0 \n",
973 | " \n",
974 | " \n",
975 | " 1 \n",
976 | " 1 \n",
977 | " PAYMENT \n",
978 | " 1864.28 \n",
979 | " 21249.0 \n",
980 | " 19384.72 \n",
981 | " 0.0 \n",
982 | " 0.0 \n",
983 | " 0 \n",
984 | " 0 \n",
985 | " \n",
986 | " \n",
987 | " 2 \n",
988 | " 1 \n",
989 | " TRANSFER \n",
990 | " 181.00 \n",
991 | " 181.0 \n",
992 | " 0.00 \n",
993 | " 0.0 \n",
994 | " 0.0 \n",
995 | " 1 \n",
996 | " 0 \n",
997 | " \n",
998 | " \n",
999 | " 3 \n",
1000 | " 1 \n",
1001 | " CASH_OUT \n",
1002 | " 181.00 \n",
1003 | " 181.0 \n",
1004 | " 0.00 \n",
1005 | " 21182.0 \n",
1006 | " 0.0 \n",
1007 | " 1 \n",
1008 | " 0 \n",
1009 | " \n",
1010 | " \n",
1011 | " 4 \n",
1012 | " 1 \n",
1013 | " PAYMENT \n",
1014 | " 11668.14 \n",
1015 | " 41554.0 \n",
1016 | " 29885.86 \n",
1017 | " 0.0 \n",
1018 | " 0.0 \n",
1019 | " 0 \n",
1020 | " 0 \n",
1021 | " \n",
1022 | " \n",
1023 | "
\n",
1024 | "
\n",
1025 | "
\n",
1232 | "
\n"
1233 | ]
1234 | },
1235 | "metadata": {},
1236 | "execution_count": 44
1237 | }
1238 | ]
1239 | },
1240 | {
1241 | "cell_type": "code",
1242 | "source": [
1243 | "from sklearn.preprocessing import LabelEncoder\n",
1244 | "encoder=LabelEncoder()\n",
1245 | "data['type']=encoder.fit_transform(data['type'])"
1246 | ],
1247 | "metadata": {
1248 | "id": "rLdvROOSPFW0",
1249 | "colab": {
1250 | "base_uri": "https://localhost:8080/"
1251 | },
1252 | "outputId": "51593faf-7cba-44f5-c4d2-0a53a1ef3e1a"
1253 | },
1254 | "execution_count": 45,
1255 | "outputs": [
1256 | {
1257 | "output_type": "stream",
1258 | "name": "stderr",
1259 | "text": [
1260 | ":3: SettingWithCopyWarning:\n",
1261 | "\n",
1262 | "\n",
1263 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
1264 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
1265 | "\n",
1266 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
1267 | "\n"
1268 | ]
1269 | }
1270 | ]
1271 | },
1272 | {
1273 | "cell_type": "code",
1274 | "source": [
1275 | "data.head()"
1276 | ],
1277 | "metadata": {
1278 | "colab": {
1279 | "base_uri": "https://localhost:8080/",
1280 | "height": 206
1281 | },
1282 | "id": "Bjzd0w1My4rY",
1283 | "outputId": "1fc53da8-2076-440c-e092-b4cc0dbaab58"
1284 | },
1285 | "execution_count": 46,
1286 | "outputs": [
1287 | {
1288 | "output_type": "execute_result",
1289 | "data": {
1290 | "text/plain": [
1291 | " step type amount oldbalanceOrg newbalanceOrig oldbalanceDest \\\n",
1292 | "0 1 3 9839.64 170136.0 160296.36 0.0 \n",
1293 | "1 1 3 1864.28 21249.0 19384.72 0.0 \n",
1294 | "2 1 4 181.00 181.0 0.00 0.0 \n",
1295 | "3 1 1 181.00 181.0 0.00 21182.0 \n",
1296 | "4 1 3 11668.14 41554.0 29885.86 0.0 \n",
1297 | "\n",
1298 | " newbalanceDest isFraud isFlaggedFraud \n",
1299 | "0 0.0 0 0 \n",
1300 | "1 0.0 0 0 \n",
1301 | "2 0.0 1 0 \n",
1302 | "3 0.0 1 0 \n",
1303 | "4 0.0 0 0 "
1304 | ],
1305 | "text/html": [
1306 | "\n",
1307 | " \n",
1308 | "
\n",
1309 | "\n",
1322 | "
\n",
1323 | " \n",
1324 | " \n",
1325 | " \n",
1326 | " step \n",
1327 | " type \n",
1328 | " amount \n",
1329 | " oldbalanceOrg \n",
1330 | " newbalanceOrig \n",
1331 | " oldbalanceDest \n",
1332 | " newbalanceDest \n",
1333 | " isFraud \n",
1334 | " isFlaggedFraud \n",
1335 | " \n",
1336 | " \n",
1337 | " \n",
1338 | " \n",
1339 | " 0 \n",
1340 | " 1 \n",
1341 | " 3 \n",
1342 | " 9839.64 \n",
1343 | " 170136.0 \n",
1344 | " 160296.36 \n",
1345 | " 0.0 \n",
1346 | " 0.0 \n",
1347 | " 0 \n",
1348 | " 0 \n",
1349 | " \n",
1350 | " \n",
1351 | " 1 \n",
1352 | " 1 \n",
1353 | " 3 \n",
1354 | " 1864.28 \n",
1355 | " 21249.0 \n",
1356 | " 19384.72 \n",
1357 | " 0.0 \n",
1358 | " 0.0 \n",
1359 | " 0 \n",
1360 | " 0 \n",
1361 | " \n",
1362 | " \n",
1363 | " 2 \n",
1364 | " 1 \n",
1365 | " 4 \n",
1366 | " 181.00 \n",
1367 | " 181.0 \n",
1368 | " 0.00 \n",
1369 | " 0.0 \n",
1370 | " 0.0 \n",
1371 | " 1 \n",
1372 | " 0 \n",
1373 | " \n",
1374 | " \n",
1375 | " 3 \n",
1376 | " 1 \n",
1377 | " 1 \n",
1378 | " 181.00 \n",
1379 | " 181.0 \n",
1380 | " 0.00 \n",
1381 | " 21182.0 \n",
1382 | " 0.0 \n",
1383 | " 1 \n",
1384 | " 0 \n",
1385 | " \n",
1386 | " \n",
1387 | " 4 \n",
1388 | " 1 \n",
1389 | " 3 \n",
1390 | " 11668.14 \n",
1391 | " 41554.0 \n",
1392 | " 29885.86 \n",
1393 | " 0.0 \n",
1394 | " 0.0 \n",
1395 | " 0 \n",
1396 | " 0 \n",
1397 | " \n",
1398 | " \n",
1399 | "
\n",
1400 | "
\n",
1401 | "
\n",
1608 | "
\n"
1609 | ]
1610 | },
1611 | "metadata": {},
1612 | "execution_count": 46
1613 | }
1614 | ]
1615 | },
1616 | {
1617 | "cell_type": "code",
1618 | "source": [
1619 | "data.isna().sum()"
1620 | ],
1621 | "metadata": {
1622 | "colab": {
1623 | "base_uri": "https://localhost:8080/"
1624 | },
1625 | "id": "ntfOh8Ew7BkS",
1626 | "outputId": "15962cea-5a74-41b7-a86d-82db2ea87083"
1627 | },
1628 | "execution_count": 47,
1629 | "outputs": [
1630 | {
1631 | "output_type": "execute_result",
1632 | "data": {
1633 | "text/plain": [
1634 | "step 0\n",
1635 | "type 0\n",
1636 | "amount 0\n",
1637 | "oldbalanceOrg 0\n",
1638 | "newbalanceOrig 0\n",
1639 | "oldbalanceDest 0\n",
1640 | "newbalanceDest 0\n",
1641 | "isFraud 0\n",
1642 | "isFlaggedFraud 0\n",
1643 | "dtype: int64"
1644 | ]
1645 | },
1646 | "metadata": {},
1647 | "execution_count": 47
1648 | }
1649 | ]
1650 | },
1651 | {
1652 | "cell_type": "code",
1653 | "source": [
1654 | "fig = px.scatter(data, x = 'amount', y =data.index, color = data.isFraud,\n",
1655 | " title = 'Distribution of Amount Values')\n",
1656 | "fig.update_layout(xaxis_title='Transaction Amount (in €)',\n",
1657 | " yaxis_title='Transactions')\n",
1658 | "fig.show()"
1659 | ],
1660 | "metadata": {
1661 | "colab": {
1662 | "base_uri": "https://localhost:8080/",
1663 | "height": 542
1664 | },
1665 | "id": "OP05kcp4y7Ux",
1666 | "outputId": "afe85dd4-4bc1-4bc2-8edf-9545c7e19627"
1667 | },
1668 | "execution_count": 48,
1669 | "outputs": [
1670 | {
1671 | "output_type": "display_data",
1672 | "data": {
1673 | "text/html": [
1674 | "\n",
1675 | " \n",
1676 | "\n",
1677 | " \n",
1703 | "\n",
1704 | ""
1705 | ]
1706 | },
1707 | "metadata": {}
1708 | }
1709 | ]
1710 | },
1711 | {
1712 | "cell_type": "code",
1713 | "source": [
1714 | "X = data.drop(columns=['isFraud'], axis=1)\n",
1715 | "Y = data.isFraud"
1716 | ],
1717 | "metadata": {
1718 | "id": "tvcLfN924yHi"
1719 | },
1720 | "execution_count": 49,
1721 | "outputs": []
1722 | },
1723 | {
1724 | "cell_type": "code",
1725 | "source": [
1726 | "from sklearn.model_selection import train_test_split\n",
1727 | "xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.27,random_state=41)"
1728 | ],
1729 | "metadata": {
1730 | "id": "ngE3UCEz0R-g"
1731 | },
1732 | "execution_count": 50,
1733 | "outputs": []
1734 | },
1735 | {
1736 | "cell_type": "code",
1737 | "source": [
1738 | "#handling data imbalance using smote\n",
1739 | "from imblearn.over_sampling import SMOTE\n",
1740 | "xtrain,ytrain = SMOTE().fit_resample(xtrain,ytrain)"
1741 | ],
1742 | "metadata": {
1743 | "id": "-Xj5aj1p6Wub"
1744 | },
1745 | "execution_count": 51,
1746 | "outputs": []
1747 | },
1748 | {
1749 | "cell_type": "code",
1750 | "source": [
1751 | "ytrain.value_counts()"
1752 | ],
1753 | "metadata": {
1754 | "colab": {
1755 | "base_uri": "https://localhost:8080/"
1756 | },
1757 | "id": "c48pQ-yB6nMF",
1758 | "outputId": "dc50a523-e903-498f-8562-01028fea03d5"
1759 | },
1760 | "execution_count": 52,
1761 | "outputs": [
1762 | {
1763 | "output_type": "execute_result",
1764 | "data": {
1765 | "text/plain": [
1766 | "0 1900\n",
1767 | "1 1900\n",
1768 | "Name: isFraud, dtype: int64"
1769 | ]
1770 | },
1771 | "metadata": {},
1772 | "execution_count": 52
1773 | }
1774 | ]
1775 | },
1776 | {
1777 | "cell_type": "code",
1778 | "source": [
1779 | "xtrain.info()"
1780 | ],
1781 | "metadata": {
1782 | "colab": {
1783 | "base_uri": "https://localhost:8080/"
1784 | },
1785 | "id": "d1Zio1jo6w0N",
1786 | "outputId": "97449573-636e-4f87-a4f6-f92d7ab8ae4a"
1787 | },
1788 | "execution_count": 53,
1789 | "outputs": [
1790 | {
1791 | "output_type": "stream",
1792 | "name": "stdout",
1793 | "text": [
1794 | "\n",
1795 | "RangeIndex: 3800 entries, 0 to 3799\n",
1796 | "Data columns (total 8 columns):\n",
1797 | " # Column Non-Null Count Dtype \n",
1798 | "--- ------ -------------- ----- \n",
1799 | " 0 step 3800 non-null int64 \n",
1800 | " 1 type 3800 non-null int64 \n",
1801 | " 2 amount 3800 non-null float64\n",
1802 | " 3 oldbalanceOrg 3800 non-null float64\n",
1803 | " 4 newbalanceOrig 3800 non-null float64\n",
1804 | " 5 oldbalanceDest 3800 non-null float64\n",
1805 | " 6 newbalanceDest 3800 non-null float64\n",
1806 | " 7 isFlaggedFraud 3800 non-null int64 \n",
1807 | "dtypes: float64(5), int64(3)\n",
1808 | "memory usage: 237.6 KB\n"
1809 | ]
1810 | }
1811 | ]
1812 | },
1813 | {
1814 | "cell_type": "code",
1815 | "source": [
1816 | "from sklearn.linear_model import LogisticRegression\n",
1817 | "lr=LogisticRegression()\n",
1818 | "lr.fit(xtrain,ytrain)"
1819 | ],
1820 | "metadata": {
1821 | "colab": {
1822 | "base_uri": "https://localhost:8080/",
1823 | "height": 74
1824 | },
1825 | "id": "2rai8l9466iQ",
1826 | "outputId": "6f625d27-07a4-414e-fadb-eb394177c654"
1827 | },
1828 | "execution_count": 54,
1829 | "outputs": [
1830 | {
1831 | "output_type": "execute_result",
1832 | "data": {
1833 | "text/plain": [
1834 | "LogisticRegression()"
1835 | ],
1836 | "text/html": [
1837 | "LogisticRegression() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
1838 | ]
1839 | },
1840 | "metadata": {},
1841 | "execution_count": 54
1842 | }
1843 | ]
1844 | },
1845 | {
1846 | "cell_type": "code",
1847 | "source": [
1848 | "ypred=lr.predict(xtest)\n"
1849 | ],
1850 | "metadata": {
1851 | "id": "CJRIfdtS8ynz"
1852 | },
1853 | "execution_count": 61,
1854 | "outputs": []
1855 | },
1856 | {
1857 | "cell_type": "code",
1858 | "source": [
1859 | "from sklearn.metrics import accuracy_score, recall_score, precision_score\n",
1860 | "print(\"accuracy_score :\", accuracy_score(ytest, ypred))\n",
1861 | "print(\"recall_score :\", recall_score(ytest, ypred))\n",
1862 | "print(\"precision_score :\", precision_score(ytest, ypred))\n"
1863 | ],
1864 | "metadata": {
1865 | "colab": {
1866 | "base_uri": "https://localhost:8080/"
1867 | },
1868 | "id": "Sqmyo3_t84E7",
1869 | "outputId": "60d35975-5a83-4059-8573-821fb37236a4"
1870 | },
1871 | "execution_count": 57,
1872 | "outputs": [
1873 | {
1874 | "output_type": "stream",
1875 | "name": "stdout",
1876 | "text": [
1877 | "accuracy_score : 0.9081920903954802\n",
1878 | "recall_score : 1.0\n",
1879 | "precision_score : 0.057971014492753624\n"
1880 | ]
1881 | }
1882 | ]
1883 | },
1884 | {
1885 | "cell_type": "code",
1886 | "source": [
1887 | "from sklearn.metrics import confusion_matrix\n",
1888 | "cm=confusion_matrix(ytest, ypred)\n",
1889 | "print(cm)\n",
1890 | "sns.heatmap(cm,annot=True)"
1891 | ],
1892 | "metadata": {
1893 | "id": "x4KvjUaI9I_k",
1894 | "colab": {
1895 | "base_uri": "https://localhost:8080/",
1896 | "height": 482
1897 | },
1898 | "outputId": "1fe5fa23-4cd0-4976-fb19-2447482beea8"
1899 | },
1900 | "execution_count": 59,
1901 | "outputs": [
1902 | {
1903 | "output_type": "stream",
1904 | "name": "stdout",
1905 | "text": [
1906 | "[[639 65]\n",
1907 | " [ 0 4]]\n"
1908 | ]
1909 | },
1910 | {
1911 | "output_type": "execute_result",
1912 | "data": {
1913 | "text/plain": [
1914 | ""
1915 | ]
1916 | },
1917 | "metadata": {},
1918 | "execution_count": 59
1919 | },
1920 | {
1921 | "output_type": "display_data",
1922 | "data": {
1923 | "text/plain": [
1924 | ""
1925 | ],
1926 | "image/png": "\n"
1927 | },
1928 | "metadata": {}
1929 | }
1930 | ]
1931 | },
1932 | {
1933 | "cell_type": "code",
1934 | "source": [
1935 | "from sklearn.ensemble import RandomForestClassifier\n",
1936 | "rfc = RandomForestClassifier(n_estimators=10).fit(xtrain, ytrain)\n"
1937 | ],
1938 | "metadata": {
1939 | "id": "vN7HfaB4Cssk"
1940 | },
1941 | "execution_count": 64,
1942 | "outputs": []
1943 | },
1944 | {
1945 | "cell_type": "code",
1946 | "source": [
1947 | "ypred1=rfc.predict(xtest)"
1948 | ],
1949 | "metadata": {
1950 | "id": "ORoQetdiFs0F"
1951 | },
1952 | "execution_count": 65,
1953 | "outputs": []
1954 | },
1955 | {
1956 | "cell_type": "code",
1957 | "source": [
1958 | "print(\"accuracy_score :\", accuracy_score(ytest, ypred1))\n",
1959 | "print(\"recall_score :\", recall_score(ytest, ypred1))\n",
1960 | "print(\"confusion_matrix :\", precision_score(ytest, ypred1))\n",
1961 | "cm=confusion_matrix(ytest, ypred1)\n",
1962 | "print(\"precision_score :\", cm)\n",
1963 | "sns.heatmap(cm,annot=True)"
1964 | ],
1965 | "metadata": {
1966 | "colab": {
1967 | "base_uri": "https://localhost:8080/",
1968 | "height": 569
1969 | },
1970 | "id": "C49N6K68F2vA",
1971 | "outputId": "89d809be-7ca2-4a34-f33d-7d00a3b7f06f"
1972 | },
1973 | "execution_count": 66,
1974 | "outputs": [
1975 | {
1976 | "output_type": "stream",
1977 | "name": "stdout",
1978 | "text": [
1979 | "accuracy_score : 0.9915254237288136\n",
1980 | "recall_score : 0.75\n",
1981 | "precision_score : 0.375\n",
1982 | "precision_score : [[699 5]\n",
1983 | " [ 1 3]]\n",
1984 | "[[699 5]\n",
1985 | " [ 1 3]]\n"
1986 | ]
1987 | },
1988 | {
1989 | "output_type": "execute_result",
1990 | "data": {
1991 | "text/plain": [
1992 | ""
1993 | ]
1994 | },
1995 | "metadata": {},
1996 | "execution_count": 66
1997 | },
1998 | {
1999 | "output_type": "display_data",
2000 | "data": {
2001 | "text/plain": [
2002 | ""
2003 | ],
2004 | "image/png": "\n"
2005 | },
2006 | "metadata": {}
2007 | }
2008 | ]
2009 | },
2010 | {
2011 | "cell_type": "code",
2012 | "source": [],
2013 | "metadata": {
2014 | "id": "cIR_TkyiH9BG"
2015 | },
2016 | "execution_count": null,
2017 | "outputs": []
2018 | }
2019 | ]
2020 | }
--------------------------------------------------------------------------------