├── LICENSE
├── README.md
├── Gaussian Naive Bayes Classifier.ipynb
├── K Nearest Neighbours.ipynb
├── Logistic Regression Classifier.ipynb
├── Random Forest Classifier.ipynb
├── Neural Network Classifier.ipynb
└── Multiple Linear Regression.ipynb
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Azhan Mohammed
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | Credit-Card-Fraud-Detection-using-Machine-Learning-and-Deep-Learning-Techniques
3 |
4 |
5 |
6 |
7 | This repository contains the files for various machine learning and deep learning algorithms that have been used in our submission for Credit Card Fraud Detection using Machine Learning and Deep Learning Techniques paper at 2020 3rd International Conference on Intelligent Sustainable Systems (ICISS). The paper can be found [here](https://ieeexplore.ieee.org/abstract/document/9316002).
8 | A list of the ML and DL algorithms that have been used are:
9 | - [Multiple Linear Regression](https://github.com/sheikhazhanmohammed/Credit-Card-Fraud-Detection-using-Machine-Learning-and-Deep-Learning-Techniques/blob/main/Multiple%20Linear%20Regression.ipynb)
10 | - [Logistic Regression](https://github.com/sheikhazhanmohammed/Credit-Card-Fraud-Detection-using-Machine-Learning-and-Deep-Learning-Techniques/blob/main/Logistic%20Regression%20Classifier.ipynb)
11 | - [K Nearest Neighbours](https://github.com/sheikhazhanmohammed/Credit-Card-Fraud-Detection-using-Machine-Learning-and-Deep-Learning-Techniques/blob/main/K%20Nearest%20Neighbours.ipynb)
12 | - [Naive Bayes Classifier](https://github.com/sheikhazhanmohammed/Credit-Card-Fraud-Detection-using-Machine-Learning-and-Deep-Learning-Techniques/blob/main/Gaussian%20Naive%20Bayes%20Classifier.ipynb)
13 | - [Random Forest Classifier](https://github.com/sheikhazhanmohammed/Credit-Card-Fraud-Detection-using-Machine-Learning-and-Deep-Learning-Techniques/blob/main/Random%20Forest%20Classifier.ipynb)
14 | - [Neural Network Classifier](https://github.com/sheikhazhanmohammed/Credit-Card-Fraud-Detection-using-Machine-Learning-and-Deep-Learning-Techniques/blob/main/Neural%20Network%20Classifier.ipynb)
15 |
16 | The neural network classifier contains a custom loss function, which helps to deal with the dataset imbalance. The dataset is taken from Kaggle, and is available in the Dataset folder.
17 |
18 | #### Contact
19 |
20 | - Azhan Mohammed: azhanmohammed1999@gmail.com
21 | - Shazli Meraj: shazlimeraj@gmail.com
22 |
--------------------------------------------------------------------------------
/Gaussian Naive Bayes Classifier.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#import important libraries\n",
10 | "import pandas as pd\n",
11 | "import numpy as np\n",
12 | "import seaborn as sns\n",
13 | "from sklearn.metrics import classification_report\n",
14 | "from sklearn.metrics import roc_auc_score as roc\n",
15 | "from sklearn.model_selection import train_test_split as tts\n",
16 | "from sklearn.preprocessing import StandardScaler\n",
17 | "from sklearn.naive_bayes import GaussianNB"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 2,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "data": {
27 | "text/html": [
28 | "\n",
29 | "\n",
42 | "
\n",
43 | " \n",
44 | " \n",
45 | " \n",
46 | " Time \n",
47 | " V1 \n",
48 | " V2 \n",
49 | " V3 \n",
50 | " V4 \n",
51 | " V5 \n",
52 | " V6 \n",
53 | " V7 \n",
54 | " V8 \n",
55 | " V9 \n",
56 | " ... \n",
57 | " V21 \n",
58 | " V22 \n",
59 | " V23 \n",
60 | " V24 \n",
61 | " V25 \n",
62 | " V26 \n",
63 | " V27 \n",
64 | " V28 \n",
65 | " Amount \n",
66 | " Class \n",
67 | " \n",
68 | " \n",
69 | " \n",
70 | " \n",
71 | " 0 \n",
72 | " 0.0 \n",
73 | " -1.359807 \n",
74 | " -0.072781 \n",
75 | " 2.536347 \n",
76 | " 1.378155 \n",
77 | " -0.338321 \n",
78 | " 0.462388 \n",
79 | " 0.239599 \n",
80 | " 0.098698 \n",
81 | " 0.363787 \n",
82 | " ... \n",
83 | " -0.018307 \n",
84 | " 0.277838 \n",
85 | " -0.110474 \n",
86 | " 0.066928 \n",
87 | " 0.128539 \n",
88 | " -0.189115 \n",
89 | " 0.133558 \n",
90 | " -0.021053 \n",
91 | " 149.62 \n",
92 | " 0 \n",
93 | " \n",
94 | " \n",
95 | " 1 \n",
96 | " 0.0 \n",
97 | " 1.191857 \n",
98 | " 0.266151 \n",
99 | " 0.166480 \n",
100 | " 0.448154 \n",
101 | " 0.060018 \n",
102 | " -0.082361 \n",
103 | " -0.078803 \n",
104 | " 0.085102 \n",
105 | " -0.255425 \n",
106 | " ... \n",
107 | " -0.225775 \n",
108 | " -0.638672 \n",
109 | " 0.101288 \n",
110 | " -0.339846 \n",
111 | " 0.167170 \n",
112 | " 0.125895 \n",
113 | " -0.008983 \n",
114 | " 0.014724 \n",
115 | " 2.69 \n",
116 | " 0 \n",
117 | " \n",
118 | " \n",
119 | " 2 \n",
120 | " 1.0 \n",
121 | " -1.358354 \n",
122 | " -1.340163 \n",
123 | " 1.773209 \n",
124 | " 0.379780 \n",
125 | " -0.503198 \n",
126 | " 1.800499 \n",
127 | " 0.791461 \n",
128 | " 0.247676 \n",
129 | " -1.514654 \n",
130 | " ... \n",
131 | " 0.247998 \n",
132 | " 0.771679 \n",
133 | " 0.909412 \n",
134 | " -0.689281 \n",
135 | " -0.327642 \n",
136 | " -0.139097 \n",
137 | " -0.055353 \n",
138 | " -0.059752 \n",
139 | " 378.66 \n",
140 | " 0 \n",
141 | " \n",
142 | " \n",
143 | " 3 \n",
144 | " 1.0 \n",
145 | " -0.966272 \n",
146 | " -0.185226 \n",
147 | " 1.792993 \n",
148 | " -0.863291 \n",
149 | " -0.010309 \n",
150 | " 1.247203 \n",
151 | " 0.237609 \n",
152 | " 0.377436 \n",
153 | " -1.387024 \n",
154 | " ... \n",
155 | " -0.108300 \n",
156 | " 0.005274 \n",
157 | " -0.190321 \n",
158 | " -1.175575 \n",
159 | " 0.647376 \n",
160 | " -0.221929 \n",
161 | " 0.062723 \n",
162 | " 0.061458 \n",
163 | " 123.50 \n",
164 | " 0 \n",
165 | " \n",
166 | " \n",
167 | " 4 \n",
168 | " 2.0 \n",
169 | " -1.158233 \n",
170 | " 0.877737 \n",
171 | " 1.548718 \n",
172 | " 0.403034 \n",
173 | " -0.407193 \n",
174 | " 0.095921 \n",
175 | " 0.592941 \n",
176 | " -0.270533 \n",
177 | " 0.817739 \n",
178 | " ... \n",
179 | " -0.009431 \n",
180 | " 0.798278 \n",
181 | " -0.137458 \n",
182 | " 0.141267 \n",
183 | " -0.206010 \n",
184 | " 0.502292 \n",
185 | " 0.219422 \n",
186 | " 0.215153 \n",
187 | " 69.99 \n",
188 | " 0 \n",
189 | " \n",
190 | " \n",
191 | "
\n",
192 | "
5 rows × 31 columns
\n",
193 | "
"
194 | ],
195 | "text/plain": [
196 | " Time V1 V2 V3 V4 V5 V6 V7 \\\n",
197 | "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n",
198 | "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n",
199 | "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n",
200 | "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n",
201 | "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n",
202 | "\n",
203 | " V8 V9 ... V21 V22 V23 V24 V25 \\\n",
204 | "0 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 \n",
205 | "1 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 \n",
206 | "2 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 \n",
207 | "3 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 \n",
208 | "4 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 \n",
209 | "\n",
210 | " V26 V27 V28 Amount Class \n",
211 | "0 -0.189115 0.133558 -0.021053 149.62 0 \n",
212 | "1 0.125895 -0.008983 0.014724 2.69 0 \n",
213 | "2 -0.139097 -0.055353 -0.059752 378.66 0 \n",
214 | "3 -0.221929 0.062723 0.061458 123.50 0 \n",
215 | "4 0.502292 0.219422 0.215153 69.99 0 \n",
216 | "\n",
217 | "[5 rows x 31 columns]"
218 | ]
219 | },
220 | "execution_count": 2,
221 | "metadata": {},
222 | "output_type": "execute_result"
223 | }
224 | ],
225 | "source": [
226 | "#import the dataset and visualize the dataset\n",
227 | "dataset = pd.read_csv('./dataset/creditcard.csv')\n",
228 | "dataset.head()"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 3,
234 | "metadata": {},
235 | "outputs": [
236 | {
237 | "data": {
238 | "text/plain": [
239 | ""
240 | ]
241 | },
242 | "execution_count": 3,
243 | "metadata": {},
244 | "output_type": "execute_result"
245 | },
246 | {
247 | "data": {
248 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEGCAYAAABYV4NmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAATPUlEQVR4nO3df6zd9X3f8ecrOKV0DcyAQ4nNYlacasBWUjwHNdqUDs32Km0mHbQ3U2Nrs+YKkampokpQaSMCWSpaUlaShokMhx/qAAua4mmh1IW0WTUKXEfWjM0QXmDBwcNObQGdBIud9/44nxuOr48v1+793GPs50M6Ot/z/n4/n/P5IksvPt/v53xvqgpJkuba+8Y9AEnSqcmAkSR1YcBIkrowYCRJXRgwkqQuFox7ACeL888/v5YuXTruYUjSe8q2bdu+X1WLRu0zYJqlS5cyOTk57mFI0ntKkv99rH1eIpMkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdeEv+efQlb9537iHoJPQtn+/dtxDkMbCGYwkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK66BYwSS5K8s0kzyfZmeTXW/3zSb6XZHt7/eJQm5uS7E7yQpJVQ/Urk+xo++5IklY/M8lDrf50kqVDbdYlebG91vU6T0nSaAs69n0I+FxVfTvJB4BtSba2fbdX1ReGD05yKTABXAZ8CPiTJB+pqsPAncAG4C+AbwCrgceA9cDBqrokyQRwG/ArSc4FbgaWA9W+e0tVHex4vpKkId1mMFW1t6q+3bbfBJ4HFs/QZA3wYFW9XVUvAbuBFUkuBM6uqqeqqoD7gGuG2tzbth8Grm6zm1XA1qo60EJlK4NQkiTNk3m5B9MuXX0UeLqVPpPkfyTZlGRhqy0GXhlqtqfVFrft6fUj2lTVIeB14LwZ+po+rg1JJpNM7t+//4TPT5J0tO4Bk+QngUeAz1bVGwwud/00cAWwF/ji1KEjmtcM9RNt806h6q6qWl5VyxctWjTjeUiSjk/XgEnyfgbh8vtV9QcAVfVaVR2uqh8CXwVWtMP3ABcNNV8CvNrqS0bUj2iTZAFwDnBghr4kSfOk5yqyAHcDz1fV7wzVLxw67JPAc217CzDRVoZdDCwDnqmqvcCbSa5qfa4FHh1qM7VC7FrgyXaf5nFgZZKF7RLcylaTJM2TnqvIPg58GtiRZHur/RbwqSRXMLhk9TLwawBVtTPJZmAXgxVoN7QVZADXA/cAZzFYPfZYq98N3J9kN4OZy0Tr60CSW4Fn23G3VNWBTucpSRqhW8BU1Z8z+l7IN2ZosxHYOKI+CVw+ov4WcN0x+toEbJrteCVJc8tf8kuSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC66BUySi5J8M8nzSXYm+fVWPzfJ1iQvtveFQ21uSrI7yQtJVg3Vr0yyo+27I0la/cwkD7X600mWDrVZ177jxSTrep2nJGm0njOYQ8DnqurvAFcBNyS5FLgReKKqlgFPtM+0fRPAZcBq4CtJzmh93QlsAJa11+pWXw8crKpLgNuB21pf5wI3Ax8DVgA3DweZJKm/bgFTVXur6ttt+03geWAxsAa4tx12L3BN214DPFhVb1fVS8BuYEWSC4Gzq+qpqirgvmltpvp6GLi6zW5WAVur6kBVHQS28k4oSZLmwbzcg2mXrj4KPA1cUFV7YRBCwAfbYYuBV4aa7Wm1xW17ev2INlV1CHgdOG+GvqaPa0OSySST+/fvP/ETlCQdpXvAJPlJ4BHgs1X1xkyHjqjVDPUTbfNOoequqlpeVcsXLVo0w9AkScera8AkeT+DcPn9qvqDVn6tXfaive9r9T3ARUPNlwCvtvqSEfUj2iRZAJwDHJihL0nSPOm5iizA3cDzVfU7Q7u2AFOrutYBjw7VJ9rKsIsZ3Mx/pl1GezPJVa3PtdPaTPV1LfBku0/zOLAyycJ2c39lq0mS5smCjn1/HPg0sCPJ9lb7LeC3gc1J1gPfBa4DqKqdSTYDuxisQLuhqg63dtcD9wBnAY+1FwwC7P4kuxnMXCZaXweS3Ao82467paoO9DpRSdLRugVMVf05o++FAFx9jDYbgY0j6pPA5SPqb9ECasS+TcCm2Y5XkjS3/CW/JKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHUxq4BJ8sRsapIkTVkw084kPw78BHB+koVA2q6zgQ91Hpsk6T1sxoABfg34LIMw2cY7AfMG8HsdxyVJeo+bMWCq6neB303yb6rqS/M0JknSKeDdZjAAVNWXkvw8sHS4TVXd12lckqT3uFkFTJL7gZ8GtgOHW7kAA0aSNNKsAgZYDlxaVdVzMJKkU8dsfwfzHPBTx9Nxkk1J9iV5bqj2+STfS7K9vX5xaN9NSXYneSHJqqH6lUl2tH13JEmrn5nkoVZ/OsnSoTbrkrzYXuuOZ9ySpLkx2xnM+cCuJM8Ab08Vq+qfzdDmHuDLHH0Z7faq+sJwIcmlwARwGYMVa3+S5CNVdRi4E9gA/AXwDWA18BiwHjhYVZckmQBuA34lybnAzQxmXQVsS7Klqg7O8lwlSXNgtgHz+ePtuKq+NTyreBdrgAer6m3gpSS7gRVJXgbOrqqnAJLcB1zDIGDWDI3rYeDLbXazCthaVQdam60MQumB4z0HSdKJm+0qsj+bw+/8TJK1wCTwuTazWMxghjJlT6v9oG1Pr9PeX2njO5TkdeC84fqINpKkeTLbR8W8meSN9noryeEkb5zA993JYDXaFcBe4ItTXzHi2JqhfqJtjpBkQ5LJJJP79++fadySpOM0q4Cpqg9U1dnt9ePAP2dwf+W4VNVrVXW4qn4IfBVY0XbtAS4aOnQJ8GqrLxlRP6JNkgXAOcCBGfoaNZ67qmp5VS1ftGjR8Z6OJGkGJ/Q05ar6Q+AfHW+7JBcOffwkg9VpAFuAibYy7GJgGfBMVe0F3kxyVbu/shZ4dKjN1Aqxa4En2zLqx4GVSRa256etbDVJ0jya7Q8tf2no4/t4Z4XWTG0eAD7B4EGZexis7PpEkita25cZPOuMqtqZZDOwCzgE3NBWkAFcz2BF2lkMbu4/1up3A/e3BQEHGKxCo6oOJLkVeLYdd8vUDX9J0vyZ7Sqyfzq0fYhBOKyZqUFVfWpE+e4Zjt8IbBxRnwQuH1F/C7juGH1tAjbNND5JUl+zXUX2L3sPRJJ0apntKrIlSb7efpn/WpJHkix595aSpNPVbG/yf43BTfUPMfhNyX9pNUmSRpptwCyqqq9V1aH2ugdwXa8k6ZhmGzDfT/KrSc5or18F/rLnwCRJ722zDZh/Bfwy8H8Y/AL/WsAb/5KkY5rtMuVbgXVTTyRuTyz+AoPgkSTpKLOdwfy94cfdtx8ufrTPkCRJp4LZBsz72mNXgB/NYGY7+5EknYZmGxJfBP57kocZPObllxnxq3tJkqbM9pf89yWZZPCAywC/VFW7uo5MkvSeNuvLXC1QDBVJ0qyc0OP6JUl6NwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC66BUySTUn2JXluqHZukq1JXmzvC4f23ZRkd5IXkqwaql+ZZEfbd0eStPqZSR5q9aeTLB1qs659x4tJ1vU6R0nSsfWcwdwDrJ5WuxF4oqqWAU+0zyS5FJgALmttvpLkjNbmTmADsKy9pvpcDxysqkuA24HbWl/nAjcDHwNWADcPB5kkaX50C5iq+hZwYFp5DXBv274XuGao/mBVvV1VLwG7gRVJLgTOrqqnqqqA+6a1merrYeDqNrtZBWytqgNVdRDYytFBJ0nqbL7vwVxQVXsB2vsHW30x8MrQcXtabXHbnl4/ok1VHQJeB86boa+jJNmQZDLJ5P79+/8apyVJmu5kucmfEbWaoX6ibY4sVt1VVcuravmiRYtmNVBJ0uzMd8C81i570d73tfoe4KKh45YAr7b6khH1I9okWQCcw+CS3LH6kiTNo/kOmC3A1KqudcCjQ/WJtjLsYgY3859pl9HeTHJVu7+ydlqbqb6uBZ5s92keB1YmWdhu7q9sNUnSPFrQq+MkDwCfAM5PsofByq7fBjYnWQ98F7gOoKp2JtkM7AIOATdU1eHW1fUMVqSdBTzWXgB3A/cn2c1g5jLR+jqQ5Fbg2XbcLVU1fbGBJKmzbgFTVZ86xq6rj3H8RmDjiPokcPmI+lu0gBqxbxOwadaDlSTNuZPlJr8k6RRjwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldjCVgkrycZEeS7UkmW+3cJFuTvNjeFw4df1OS3UleSLJqqH5l62d3kjuSpNXPTPJQqz+dZOl8n6Mkne7GOYP5haq6oqqWt883Ak9U1TLgifaZJJcCE8BlwGrgK0nOaG3uBDYAy9prdauvBw5W1SXA7cBt83A+kqQhJ9MlsjXAvW37XuCaofqDVfV2Vb0E7AZWJLkQOLuqnqqqAu6b1maqr4eBq6dmN5Kk+TGugCngj5NsS7Kh1S6oqr0A7f2Drb4YeGWo7Z5WW9y2p9ePaFNVh4DXgfOmDyLJhiSTSSb3798/JycmSRpYMKbv/XhVvZrkg8DWJP9zhmNHzTxqhvpMbY4sVN0F3AWwfPnyo/ZLkk7cWGYwVfVqe98HfB1YAbzWLnvR3ve1w/cAFw01XwK82upLRtSPaJNkAXAOcKDHuUiSRpv3gEnyN5J8YGobWAk8B2wB1rXD1gGPtu0twERbGXYxg5v5z7TLaG8muardX1k7rc1UX9cCT7b7NJKkeTKOS2QXAF9v99wXAP+5qv4oybPA5iTrge8C1wFU1c4km4FdwCHghqo63Pq6HrgHOAt4rL0A7gbuT7KbwcxlYj5OTJL0jnkPmKr6DvCzI+p/CVx9jDYbgY0j6pPA5SPqb9ECSpI0HifTMmVJ0inEgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV2c0gGTZHWSF5LsTnLjuMcjSaeTUzZgkpwB/B7wT4BLgU8luXS8o5Kk08eCcQ+goxXA7qr6DkCSB4E1wK6xjkoak+/e8nfHPQSdhP7Wv9vRre9TOWAWA68Mfd4DfGz4gCQbgA3t418leWGexnY6OB/4/rgHcTLIF9aNewg6mv8+p9ycv24PHz7WjlM5YEb9V6sjPlTdBdw1P8M5vSSZrKrl4x6HNIr/PufHKXsPhsGM5aKhz0uAV8c0Fkk67ZzKAfMssCzJxUl+DJgAtox5TJJ02jhlL5FV1aEknwEeB84ANlXVzjEP63TipUedzPz3OQ9SVe9+lCRJx+lUvkQmSRojA0aS1IUBoznnI3p0MkqyKcm+JM+NeyynCwNGc8pH9Ogkdg+wetyDOJ0YMJprP3pET1X9P2DqET3SWFXVt4AD4x7H6cSA0Vwb9YiexWMai6QxMmA01971ET2STg8GjOaaj+iRBBgwmns+okcSYMBojlXVIWDqET3PA5t9RI9OBkkeAJ4CfibJniTrxz2mU52PipEkdeEMRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMNIYJPmpJA8m+V9JdiX5RpKP+KRfnUpO2T+ZLJ2skgT4OnBvVU202hXABWMdmDTHnMFI8+8XgB9U1X+cKlTVdoYeEppkaZL/luTb7fXzrX5hkm8l2Z7kuST/IMkZSe5pn3ck+Y35PyXpaM5gpPl3ObDtXY7ZB/zjqnoryTLgAWA58C+Ax6tqY/vbOz8BXAEsrqrLAZL8zX5Dl2bPgJFOTu8HvtwunR0GPtLqzwKbkrwf+MOq2p7kO8DfTvIl4L8CfzyWEUvTeIlMmn87gSvf5ZjfAF4DfpbBzOXH4Ed/NOsfAt8D7k+ytqoOtuP+FLgB+E99hi0dHwNGmn9PAmcm+ddThSR/H/jw0DHnAHur6ofAp4Ez2nEfBvZV1VeBu4GfS3I+8L6qegT4t8DPzc9pSDPzEpk0z6qqknwS+A9JbgTeAl4GPjt02FeAR5JcB3wT+L+t/gngN5P8APgrYC2Dvxj6tSRT/8N4U/eTkGbBpylLkrrwEpkkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLv4/ceRZXQx4oy0AAAAASUVORK5CYII=\n",
249 | "text/plain": [
250 | ""
251 | ]
252 | },
253 | "metadata": {
254 | "needs_background": "light"
255 | },
256 | "output_type": "display_data"
257 | }
258 | ],
259 | "source": [
260 | "#visualize the target class 'Class'\n",
261 | "sns.countplot(x='Class', data=dataset)"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": 4,
267 | "metadata": {},
268 | "outputs": [],
269 | "source": [
270 | "#seperating positive and negative classes\n",
271 | "positiveDataset = dataset.loc[dataset['Class'] == 1]\n",
272 | "negativeDataset = dataset.loc[dataset['Class'] == 0]"
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": 5,
278 | "metadata": {},
279 | "outputs": [],
280 | "source": [
281 | "#creating training and testing set with negative class split 1:1 and positive class split 4:1, also keeping random_state constant so that all splits are same\n",
282 | "positiveTrain, positiveTest = tts(positiveDataset, test_size=0.2, random_state=21)\n",
283 | "negativeTrain, negativeTest = tts(negativeDataset, test_size=0.5)\n",
284 | "trainDataset = positiveTrain.append(negativeTrain)\n",
285 | "testDataset = positiveTest.append(negativeTest)"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 6,
291 | "metadata": {},
292 | "outputs": [],
293 | "source": [
294 | "#create Regression object and scale the dataset\n",
295 | "classifier = GaussianNB()\n",
296 | "yTrain = trainDataset['Class']\n",
297 | "yTest = testDataset['Class']\n",
298 | "xTrain = trainDataset.drop(columns=['Class'])\n",
299 | "xTest = testDataset.drop(columns=['Class'])\n",
300 | "scaler = StandardScaler()\n",
301 | "xTrain = scaler.fit_transform(xTrain)\n",
302 | "xTest = scaler.fit_transform(xTest)"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": 7,
308 | "metadata": {},
309 | "outputs": [
310 | {
311 | "data": {
312 | "text/plain": [
313 | "GaussianNB()"
314 | ]
315 | },
316 | "execution_count": 7,
317 | "metadata": {},
318 | "output_type": "execute_result"
319 | }
320 | ],
321 | "source": [
322 | "#fit the dataset to the train values\n",
323 | "classifier.fit(xTrain, yTrain)"
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": 8,
329 | "metadata": {},
330 | "outputs": [
331 | {
332 | "name": "stdout",
333 | "output_type": "stream",
334 | "text": [
335 | " precision recall f1-score support\n",
336 | "\n",
337 | " 0 1.00 0.98 0.99 142157\n",
338 | " 1 0.10 0.83 0.17 393\n",
339 | "\n",
340 | " accuracy 0.98 142550\n",
341 | " macro avg 0.55 0.90 0.58 142550\n",
342 | "weighted avg 1.00 0.98 0.99 142550\n",
343 | "\n",
344 | "ROC AUC Score: 0.90383724578178\n"
345 | ]
346 | }
347 | ],
348 | "source": [
349 | "#predict the model on the train values and check results\n",
350 | "predTrain = classifier.predict(xTrain)\n",
351 | "print(classification_report(yTrain, predTrain))\n",
352 | "print('ROC AUC Score: ',roc(yTrain, predTrain))"
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": 9,
358 | "metadata": {},
359 | "outputs": [
360 | {
361 | "name": "stdout",
362 | "output_type": "stream",
363 | "text": [
364 | " precision recall f1-score support\n",
365 | "\n",
366 | " 0 1.00 0.98 0.99 142158\n",
367 | " 1 0.02 0.86 0.05 99\n",
368 | "\n",
369 | " accuracy 0.98 142257\n",
370 | " macro avg 0.51 0.92 0.52 142257\n",
371 | "weighted avg 1.00 0.98 0.99 142257\n",
372 | "\n",
373 | "ROC AUC Score: 0.9173062665655414\n"
374 | ]
375 | }
376 | ],
377 | "source": [
378 | "#predict test values and check results\n",
379 | "predTest = classifier.predict(xTest)\n",
380 | "print(classification_report(yTest, predTest))\n",
381 | "print('ROC AUC Score: ',roc(yTest, predTest))"
382 | ]
383 | }
384 | ],
385 | "metadata": {
386 | "kernelspec": {
387 | "display_name": "Python 3",
388 | "language": "python",
389 | "name": "python3"
390 | },
391 | "language_info": {
392 | "codemirror_mode": {
393 | "name": "ipython",
394 | "version": 3
395 | },
396 | "file_extension": ".py",
397 | "mimetype": "text/x-python",
398 | "name": "python",
399 | "nbconvert_exporter": "python",
400 | "pygments_lexer": "ipython3",
401 | "version": "3.8.3"
402 | }
403 | },
404 | "nbformat": 4,
405 | "nbformat_minor": 4
406 | }
407 |
--------------------------------------------------------------------------------
/K Nearest Neighbours.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#import important libraries\n",
10 | "import pandas as pd\n",
11 | "import numpy as np\n",
12 | "import seaborn as sns\n",
13 | "from sklearn.metrics import classification_report\n",
14 | "from sklearn.metrics import roc_auc_score as roc\n",
15 | "from sklearn.model_selection import train_test_split as tts\n",
16 | "from sklearn.preprocessing import StandardScaler\n",
17 | "from sklearn.neighbors import KNeighborsClassifier"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 2,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "data": {
27 | "text/html": [
28 | "\n",
29 | "\n",
42 | "
\n",
43 | " \n",
44 | " \n",
45 | " \n",
46 | " Time \n",
47 | " V1 \n",
48 | " V2 \n",
49 | " V3 \n",
50 | " V4 \n",
51 | " V5 \n",
52 | " V6 \n",
53 | " V7 \n",
54 | " V8 \n",
55 | " V9 \n",
56 | " ... \n",
57 | " V21 \n",
58 | " V22 \n",
59 | " V23 \n",
60 | " V24 \n",
61 | " V25 \n",
62 | " V26 \n",
63 | " V27 \n",
64 | " V28 \n",
65 | " Amount \n",
66 | " Class \n",
67 | " \n",
68 | " \n",
69 | " \n",
70 | " \n",
71 | " 0 \n",
72 | " 0.0 \n",
73 | " -1.359807 \n",
74 | " -0.072781 \n",
75 | " 2.536347 \n",
76 | " 1.378155 \n",
77 | " -0.338321 \n",
78 | " 0.462388 \n",
79 | " 0.239599 \n",
80 | " 0.098698 \n",
81 | " 0.363787 \n",
82 | " ... \n",
83 | " -0.018307 \n",
84 | " 0.277838 \n",
85 | " -0.110474 \n",
86 | " 0.066928 \n",
87 | " 0.128539 \n",
88 | " -0.189115 \n",
89 | " 0.133558 \n",
90 | " -0.021053 \n",
91 | " 149.62 \n",
92 | " 0 \n",
93 | " \n",
94 | " \n",
95 | " 1 \n",
96 | " 0.0 \n",
97 | " 1.191857 \n",
98 | " 0.266151 \n",
99 | " 0.166480 \n",
100 | " 0.448154 \n",
101 | " 0.060018 \n",
102 | " -0.082361 \n",
103 | " -0.078803 \n",
104 | " 0.085102 \n",
105 | " -0.255425 \n",
106 | " ... \n",
107 | " -0.225775 \n",
108 | " -0.638672 \n",
109 | " 0.101288 \n",
110 | " -0.339846 \n",
111 | " 0.167170 \n",
112 | " 0.125895 \n",
113 | " -0.008983 \n",
114 | " 0.014724 \n",
115 | " 2.69 \n",
116 | " 0 \n",
117 | " \n",
118 | " \n",
119 | " 2 \n",
120 | " 1.0 \n",
121 | " -1.358354 \n",
122 | " -1.340163 \n",
123 | " 1.773209 \n",
124 | " 0.379780 \n",
125 | " -0.503198 \n",
126 | " 1.800499 \n",
127 | " 0.791461 \n",
128 | " 0.247676 \n",
129 | " -1.514654 \n",
130 | " ... \n",
131 | " 0.247998 \n",
132 | " 0.771679 \n",
133 | " 0.909412 \n",
134 | " -0.689281 \n",
135 | " -0.327642 \n",
136 | " -0.139097 \n",
137 | " -0.055353 \n",
138 | " -0.059752 \n",
139 | " 378.66 \n",
140 | " 0 \n",
141 | " \n",
142 | " \n",
143 | " 3 \n",
144 | " 1.0 \n",
145 | " -0.966272 \n",
146 | " -0.185226 \n",
147 | " 1.792993 \n",
148 | " -0.863291 \n",
149 | " -0.010309 \n",
150 | " 1.247203 \n",
151 | " 0.237609 \n",
152 | " 0.377436 \n",
153 | " -1.387024 \n",
154 | " ... \n",
155 | " -0.108300 \n",
156 | " 0.005274 \n",
157 | " -0.190321 \n",
158 | " -1.175575 \n",
159 | " 0.647376 \n",
160 | " -0.221929 \n",
161 | " 0.062723 \n",
162 | " 0.061458 \n",
163 | " 123.50 \n",
164 | " 0 \n",
165 | " \n",
166 | " \n",
167 | " 4 \n",
168 | " 2.0 \n",
169 | " -1.158233 \n",
170 | " 0.877737 \n",
171 | " 1.548718 \n",
172 | " 0.403034 \n",
173 | " -0.407193 \n",
174 | " 0.095921 \n",
175 | " 0.592941 \n",
176 | " -0.270533 \n",
177 | " 0.817739 \n",
178 | " ... \n",
179 | " -0.009431 \n",
180 | " 0.798278 \n",
181 | " -0.137458 \n",
182 | " 0.141267 \n",
183 | " -0.206010 \n",
184 | " 0.502292 \n",
185 | " 0.219422 \n",
186 | " 0.215153 \n",
187 | " 69.99 \n",
188 | " 0 \n",
189 | " \n",
190 | " \n",
191 | "
\n",
192 | "
5 rows × 31 columns
\n",
193 | "
"
194 | ],
195 | "text/plain": [
196 | " Time V1 V2 V3 V4 V5 V6 V7 \\\n",
197 | "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n",
198 | "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n",
199 | "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n",
200 | "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n",
201 | "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n",
202 | "\n",
203 | " V8 V9 ... V21 V22 V23 V24 V25 \\\n",
204 | "0 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 \n",
205 | "1 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 \n",
206 | "2 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 \n",
207 | "3 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 \n",
208 | "4 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 \n",
209 | "\n",
210 | " V26 V27 V28 Amount Class \n",
211 | "0 -0.189115 0.133558 -0.021053 149.62 0 \n",
212 | "1 0.125895 -0.008983 0.014724 2.69 0 \n",
213 | "2 -0.139097 -0.055353 -0.059752 378.66 0 \n",
214 | "3 -0.221929 0.062723 0.061458 123.50 0 \n",
215 | "4 0.502292 0.219422 0.215153 69.99 0 \n",
216 | "\n",
217 | "[5 rows x 31 columns]"
218 | ]
219 | },
220 | "execution_count": 2,
221 | "metadata": {},
222 | "output_type": "execute_result"
223 | }
224 | ],
225 | "source": [
226 | "#import the dataset and visualize the dataset\n",
227 | "dataset = pd.read_csv('./dataset/creditcard.csv')\n",
228 | "dataset.head()"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 3,
234 | "metadata": {},
235 | "outputs": [
236 | {
237 | "data": {
238 | "text/plain": [
239 | ""
240 | ]
241 | },
242 | "execution_count": 3,
243 | "metadata": {},
244 | "output_type": "execute_result"
245 | },
246 | {
247 | "data": {
248 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEGCAYAAABYV4NmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAATPUlEQVR4nO3df6zd9X3f8ecrOKV0DcyAQ4nNYlacasBWUjwHNdqUDs32Km0mHbQ3U2Nrs+YKkampokpQaSMCWSpaUlaShokMhx/qAAua4mmh1IW0WTUKXEfWjM0QXmDBwcNObQGdBIud9/44nxuOr48v1+793GPs50M6Ot/z/n4/n/P5IksvPt/v53xvqgpJkuba+8Y9AEnSqcmAkSR1YcBIkrowYCRJXRgwkqQuFox7ACeL888/v5YuXTruYUjSe8q2bdu+X1WLRu0zYJqlS5cyOTk57mFI0ntKkv99rH1eIpMkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdeEv+efQlb9537iHoJPQtn+/dtxDkMbCGYwkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK66BYwSS5K8s0kzyfZmeTXW/3zSb6XZHt7/eJQm5uS7E7yQpJVQ/Urk+xo++5IklY/M8lDrf50kqVDbdYlebG91vU6T0nSaAs69n0I+FxVfTvJB4BtSba2fbdX1ReGD05yKTABXAZ8CPiTJB+pqsPAncAG4C+AbwCrgceA9cDBqrokyQRwG/ArSc4FbgaWA9W+e0tVHex4vpKkId1mMFW1t6q+3bbfBJ4HFs/QZA3wYFW9XVUvAbuBFUkuBM6uqqeqqoD7gGuG2tzbth8Grm6zm1XA1qo60EJlK4NQkiTNk3m5B9MuXX0UeLqVPpPkfyTZlGRhqy0GXhlqtqfVFrft6fUj2lTVIeB14LwZ+po+rg1JJpNM7t+//4TPT5J0tO4Bk+QngUeAz1bVGwwud/00cAWwF/ji1KEjmtcM9RNt806h6q6qWl5VyxctWjTjeUiSjk/XgEnyfgbh8vtV9QcAVfVaVR2uqh8CXwVWtMP3ABcNNV8CvNrqS0bUj2iTZAFwDnBghr4kSfOk5yqyAHcDz1fV7wzVLxw67JPAc217CzDRVoZdDCwDnqmqvcCbSa5qfa4FHh1qM7VC7FrgyXaf5nFgZZKF7RLcylaTJM2TnqvIPg58GtiRZHur/RbwqSRXMLhk9TLwawBVtTPJZmAXgxVoN7QVZADXA/cAZzFYPfZYq98N3J9kN4OZy0Tr60CSW4Fn23G3VNWBTucpSRqhW8BU1Z8z+l7IN2ZosxHYOKI+CVw+ov4WcN0x+toEbJrteCVJc8tf8kuSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC66BUySi5J8M8nzSXYm+fVWPzfJ1iQvtveFQ21uSrI7yQtJVg3Vr0yyo+27I0la/cwkD7X600mWDrVZ177jxSTrep2nJGm0njOYQ8DnqurvAFcBNyS5FLgReKKqlgFPtM+0fRPAZcBq4CtJzmh93QlsAJa11+pWXw8crKpLgNuB21pf5wI3Ax8DVgA3DweZJKm/bgFTVXur6ttt+03geWAxsAa4tx12L3BN214DPFhVb1fVS8BuYEWSC4Gzq+qpqirgvmltpvp6GLi6zW5WAVur6kBVHQS28k4oSZLmwbzcg2mXrj4KPA1cUFV7YRBCwAfbYYuBV4aa7Wm1xW17ev2INlV1CHgdOG+GvqaPa0OSySST+/fvP/ETlCQdpXvAJPlJ4BHgs1X1xkyHjqjVDPUTbfNOoequqlpeVcsXLVo0w9AkScera8AkeT+DcPn9qvqDVn6tXfaive9r9T3ARUPNlwCvtvqSEfUj2iRZAJwDHJihL0nSPOm5iizA3cDzVfU7Q7u2AFOrutYBjw7VJ9rKsIsZ3Mx/pl1GezPJVa3PtdPaTPV1LfBku0/zOLAyycJ2c39lq0mS5smCjn1/HPg0sCPJ9lb7LeC3gc1J1gPfBa4DqKqdSTYDuxisQLuhqg63dtcD9wBnAY+1FwwC7P4kuxnMXCZaXweS3Ao82467paoO9DpRSdLRugVMVf05o++FAFx9jDYbgY0j6pPA5SPqb9ECasS+TcCm2Y5XkjS3/CW/JKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHUxq4BJ8sRsapIkTVkw084kPw78BHB+koVA2q6zgQ91Hpsk6T1sxoABfg34LIMw2cY7AfMG8HsdxyVJeo+bMWCq6neB303yb6rqS/M0JknSKeDdZjAAVNWXkvw8sHS4TVXd12lckqT3uFkFTJL7gZ8GtgOHW7kAA0aSNNKsAgZYDlxaVdVzMJKkU8dsfwfzHPBTx9Nxkk1J9iV5bqj2+STfS7K9vX5xaN9NSXYneSHJqqH6lUl2tH13JEmrn5nkoVZ/OsnSoTbrkrzYXuuOZ9ySpLkx2xnM+cCuJM8Ab08Vq+qfzdDmHuDLHH0Z7faq+sJwIcmlwARwGYMVa3+S5CNVdRi4E9gA/AXwDWA18BiwHjhYVZckmQBuA34lybnAzQxmXQVsS7Klqg7O8lwlSXNgtgHz+ePtuKq+NTyreBdrgAer6m3gpSS7gRVJXgbOrqqnAJLcB1zDIGDWDI3rYeDLbXazCthaVQdam60MQumB4z0HSdKJm+0qsj+bw+/8TJK1wCTwuTazWMxghjJlT6v9oG1Pr9PeX2njO5TkdeC84fqINpKkeTLbR8W8meSN9noryeEkb5zA993JYDXaFcBe4ItTXzHi2JqhfqJtjpBkQ5LJJJP79++fadySpOM0q4Cpqg9U1dnt9ePAP2dwf+W4VNVrVXW4qn4IfBVY0XbtAS4aOnQJ8GqrLxlRP6JNkgXAOcCBGfoaNZ67qmp5VS1ftGjR8Z6OJGkGJ/Q05ar6Q+AfHW+7JBcOffwkg9VpAFuAibYy7GJgGfBMVe0F3kxyVbu/shZ4dKjN1Aqxa4En2zLqx4GVSRa256etbDVJ0jya7Q8tf2no4/t4Z4XWTG0eAD7B4EGZexis7PpEkita25cZPOuMqtqZZDOwCzgE3NBWkAFcz2BF2lkMbu4/1up3A/e3BQEHGKxCo6oOJLkVeLYdd8vUDX9J0vyZ7Sqyfzq0fYhBOKyZqUFVfWpE+e4Zjt8IbBxRnwQuH1F/C7juGH1tAjbNND5JUl+zXUX2L3sPRJJ0apntKrIlSb7efpn/WpJHkix595aSpNPVbG/yf43BTfUPMfhNyX9pNUmSRpptwCyqqq9V1aH2ugdwXa8k6ZhmGzDfT/KrSc5or18F/rLnwCRJ722zDZh/Bfwy8H8Y/AL/WsAb/5KkY5rtMuVbgXVTTyRuTyz+AoPgkSTpKLOdwfy94cfdtx8ufrTPkCRJp4LZBsz72mNXgB/NYGY7+5EknYZmGxJfBP57kocZPObllxnxq3tJkqbM9pf89yWZZPCAywC/VFW7uo5MkvSeNuvLXC1QDBVJ0qyc0OP6JUl6NwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC66BUySTUn2JXluqHZukq1JXmzvC4f23ZRkd5IXkqwaql+ZZEfbd0eStPqZSR5q9aeTLB1qs659x4tJ1vU6R0nSsfWcwdwDrJ5WuxF4oqqWAU+0zyS5FJgALmttvpLkjNbmTmADsKy9pvpcDxysqkuA24HbWl/nAjcDHwNWADcPB5kkaX50C5iq+hZwYFp5DXBv274XuGao/mBVvV1VLwG7gRVJLgTOrqqnqqqA+6a1merrYeDqNrtZBWytqgNVdRDYytFBJ0nqbL7vwVxQVXsB2vsHW30x8MrQcXtabXHbnl4/ok1VHQJeB86boa+jJNmQZDLJ5P79+/8apyVJmu5kucmfEbWaoX6ibY4sVt1VVcuravmiRYtmNVBJ0uzMd8C81i570d73tfoe4KKh45YAr7b6khH1I9okWQCcw+CS3LH6kiTNo/kOmC3A1KqudcCjQ/WJtjLsYgY3859pl9HeTHJVu7+ydlqbqb6uBZ5s92keB1YmWdhu7q9sNUnSPFrQq+MkDwCfAM5PsofByq7fBjYnWQ98F7gOoKp2JtkM7AIOATdU1eHW1fUMVqSdBTzWXgB3A/cn2c1g5jLR+jqQ5Fbg2XbcLVU1fbGBJKmzbgFTVZ86xq6rj3H8RmDjiPokcPmI+lu0gBqxbxOwadaDlSTNuZPlJr8k6RRjwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldjCVgkrycZEeS7UkmW+3cJFuTvNjeFw4df1OS3UleSLJqqH5l62d3kjuSpNXPTPJQqz+dZOl8n6Mkne7GOYP5haq6oqqWt883Ak9U1TLgifaZJJcCE8BlwGrgK0nOaG3uBDYAy9prdauvBw5W1SXA7cBt83A+kqQhJ9MlsjXAvW37XuCaofqDVfV2Vb0E7AZWJLkQOLuqnqqqAu6b1maqr4eBq6dmN5Kk+TGugCngj5NsS7Kh1S6oqr0A7f2Drb4YeGWo7Z5WW9y2p9ePaFNVh4DXgfOmDyLJhiSTSSb3798/JycmSRpYMKbv/XhVvZrkg8DWJP9zhmNHzTxqhvpMbY4sVN0F3AWwfPnyo/ZLkk7cWGYwVfVqe98HfB1YAbzWLnvR3ve1w/cAFw01XwK82upLRtSPaJNkAXAOcKDHuUiSRpv3gEnyN5J8YGobWAk8B2wB1rXD1gGPtu0twERbGXYxg5v5z7TLaG8muardX1k7rc1UX9cCT7b7NJKkeTKOS2QXAF9v99wXAP+5qv4oybPA5iTrge8C1wFU1c4km4FdwCHghqo63Pq6HrgHOAt4rL0A7gbuT7KbwcxlYj5OTJL0jnkPmKr6DvCzI+p/CVx9jDYbgY0j6pPA5SPqb9ECSpI0HifTMmVJ0inEgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV2c0gGTZHWSF5LsTnLjuMcjSaeTUzZgkpwB/B7wT4BLgU8luXS8o5Kk08eCcQ+goxXA7qr6DkCSB4E1wK6xjkoak+/e8nfHPQSdhP7Wv9vRre9TOWAWA68Mfd4DfGz4gCQbgA3t418leWGexnY6OB/4/rgHcTLIF9aNewg6mv8+p9ycv24PHz7WjlM5YEb9V6sjPlTdBdw1P8M5vSSZrKrl4x6HNIr/PufHKXsPhsGM5aKhz0uAV8c0Fkk67ZzKAfMssCzJxUl+DJgAtox5TJJ02jhlL5FV1aEknwEeB84ANlXVzjEP63TipUedzPz3OQ9SVe9+lCRJx+lUvkQmSRojA0aS1IUBoznnI3p0MkqyKcm+JM+NeyynCwNGc8pH9Ogkdg+wetyDOJ0YMJprP3pET1X9P2DqET3SWFXVt4AD4x7H6cSA0Vwb9YiexWMai6QxMmA01971ET2STg8GjOaaj+iRBBgwmns+okcSYMBojlXVIWDqET3PA5t9RI9OBkkeAJ4CfibJniTrxz2mU52PipEkdeEMRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMNIYJPmpJA8m+V9JdiX5RpKP+KRfnUpO2T+ZLJ2skgT4OnBvVU202hXABWMdmDTHnMFI8+8XgB9U1X+cKlTVdoYeEppkaZL/luTb7fXzrX5hkm8l2Z7kuST/IMkZSe5pn3ck+Y35PyXpaM5gpPl3ObDtXY7ZB/zjqnoryTLgAWA58C+Ax6tqY/vbOz8BXAEsrqrLAZL8zX5Dl2bPgJFOTu8HvtwunR0GPtLqzwKbkrwf+MOq2p7kO8DfTvIl4L8CfzyWEUvTeIlMmn87gSvf5ZjfAF4DfpbBzOXH4Ed/NOsfAt8D7k+ytqoOtuP+FLgB+E99hi0dHwNGmn9PAmcm+ddThSR/H/jw0DHnAHur6ofAp4Ez2nEfBvZV1VeBu4GfS3I+8L6qegT4t8DPzc9pSDPzEpk0z6qqknwS+A9JbgTeAl4GPjt02FeAR5JcB3wT+L+t/gngN5P8APgrYC2Dvxj6tSRT/8N4U/eTkGbBpylLkrrwEpkkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLv4/ceRZXQx4oy0AAAAASUVORK5CYII=\n",
249 | "text/plain": [
250 | ""
251 | ]
252 | },
253 | "metadata": {
254 | "needs_background": "light"
255 | },
256 | "output_type": "display_data"
257 | }
258 | ],
259 | "source": [
260 | "#visualize the target class 'Class'\n",
261 | "sns.countplot(x='Class', data=dataset)"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": 4,
267 | "metadata": {},
268 | "outputs": [],
269 | "source": [
270 | "#seperating positive and negative classes\n",
271 | "positiveDataset = dataset.loc[dataset['Class'] == 1]\n",
272 | "negativeDataset = dataset.loc[dataset['Class'] == 0]"
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": 5,
278 | "metadata": {},
279 | "outputs": [],
280 | "source": [
281 | "#creating training and testing set with negative class split 1:1 and positive class split 4:1, also keeping random_state constant so that all splits are same\n",
282 | "positiveTrain, positiveTest = tts(positiveDataset, test_size=0.2, random_state=21)\n",
283 | "negativeTrain, negativeTest = tts(negativeDataset, test_size=0.5)\n",
284 | "trainDataset = positiveTrain.append(negativeTrain)\n",
285 | "testDataset = positiveTest.append(negativeTest)"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 6,
291 | "metadata": {},
292 | "outputs": [],
293 | "source": [
294 | "#create Regression object and scale the dataset\n",
295 | "classifier = KNeighborsClassifier(n_neighbors=5)\n",
296 | "yTrain = trainDataset['Class']\n",
297 | "yTest = testDataset['Class']\n",
298 | "xTrain = trainDataset.drop(columns=['Class'])\n",
299 | "xTest = testDataset.drop(columns=['Class'])\n",
300 | "scaler = StandardScaler()\n",
301 | "xTrain = scaler.fit_transform(xTrain)\n",
302 | "xTest = scaler.fit_transform(xTest)"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": 7,
308 | "metadata": {},
309 | "outputs": [
310 | {
311 | "data": {
312 | "text/plain": [
313 | "KNeighborsClassifier()"
314 | ]
315 | },
316 | "execution_count": 7,
317 | "metadata": {},
318 | "output_type": "execute_result"
319 | }
320 | ],
321 | "source": [
322 | "#fit the dataset to the train values\n",
323 | "classifier.fit(xTrain, yTrain)"
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": 8,
329 | "metadata": {},
330 | "outputs": [
331 | {
332 | "name": "stdout",
333 | "output_type": "stream",
334 | "text": [
335 | " precision recall f1-score support\n",
336 | "\n",
337 | " 0 1.00 1.00 1.00 142157\n",
338 | " 1 0.94 0.81 0.87 393\n",
339 | "\n",
340 | " accuracy 1.00 142550\n",
341 | " macro avg 0.97 0.90 0.93 142550\n",
342 | "weighted avg 1.00 1.00 1.00 142550\n",
343 | "\n",
344 | "ROC AUC Score: 0.9045098079120887\n"
345 | ]
346 | }
347 | ],
348 | "source": [
349 | "#predict the model on the train values and check results\n",
350 | "predTrain = classifier.predict(xTrain)\n",
351 | "print(classification_report(yTrain, predTrain))\n",
352 | "print('ROC AUC Score: ',roc(yTrain, predTrain))"
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": 9,
358 | "metadata": {},
359 | "outputs": [
360 | {
361 | "name": "stdout",
362 | "output_type": "stream",
363 | "text": [
364 | " precision recall f1-score support\n",
365 | "\n",
366 | " 0 1.00 1.00 1.00 142158\n",
367 | " 1 0.75 0.81 0.78 99\n",
368 | "\n",
369 | " accuracy 1.00 142257\n",
370 | " macro avg 0.88 0.90 0.89 142257\n",
371 | "weighted avg 1.00 1.00 1.00 142257\n",
372 | "\n",
373 | "ROC AUC Score: 0.9039489564961224\n"
374 | ]
375 | }
376 | ],
377 | "source": [
378 | "#predict test values and check results\n",
379 | "predTest = classifier.predict(xTest)\n",
380 | "print(classification_report(yTest, predTest))\n",
381 | "print('ROC AUC Score: ',roc(yTest, predTest))"
382 | ]
383 | }
384 | ],
385 | "metadata": {
386 | "kernelspec": {
387 | "display_name": "Python 3",
388 | "language": "python",
389 | "name": "python3"
390 | },
391 | "language_info": {
392 | "codemirror_mode": {
393 | "name": "ipython",
394 | "version": 3
395 | },
396 | "file_extension": ".py",
397 | "mimetype": "text/x-python",
398 | "name": "python",
399 | "nbconvert_exporter": "python",
400 | "pygments_lexer": "ipython3",
401 | "version": "3.8.3"
402 | }
403 | },
404 | "nbformat": 4,
405 | "nbformat_minor": 4
406 | }
407 |
--------------------------------------------------------------------------------
/Logistic Regression Classifier.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#import important libraries\n",
10 | "import pandas as pd\n",
11 | "import numpy as np\n",
12 | "import seaborn as sns\n",
13 | "from sklearn.metrics import classification_report\n",
14 | "from sklearn.metrics import roc_auc_score as roc\n",
15 | "from sklearn.model_selection import train_test_split as tts\n",
16 | "from sklearn.linear_model import LogisticRegression\n",
17 | "from sklearn.preprocessing import StandardScaler"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 2,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "data": {
27 | "text/html": [
28 | "\n",
29 | "\n",
42 | "
\n",
43 | " \n",
44 | " \n",
45 | " \n",
46 | " Time \n",
47 | " V1 \n",
48 | " V2 \n",
49 | " V3 \n",
50 | " V4 \n",
51 | " V5 \n",
52 | " V6 \n",
53 | " V7 \n",
54 | " V8 \n",
55 | " V9 \n",
56 | " ... \n",
57 | " V21 \n",
58 | " V22 \n",
59 | " V23 \n",
60 | " V24 \n",
61 | " V25 \n",
62 | " V26 \n",
63 | " V27 \n",
64 | " V28 \n",
65 | " Amount \n",
66 | " Class \n",
67 | " \n",
68 | " \n",
69 | " \n",
70 | " \n",
71 | " 0 \n",
72 | " 0.0 \n",
73 | " -1.359807 \n",
74 | " -0.072781 \n",
75 | " 2.536347 \n",
76 | " 1.378155 \n",
77 | " -0.338321 \n",
78 | " 0.462388 \n",
79 | " 0.239599 \n",
80 | " 0.098698 \n",
81 | " 0.363787 \n",
82 | " ... \n",
83 | " -0.018307 \n",
84 | " 0.277838 \n",
85 | " -0.110474 \n",
86 | " 0.066928 \n",
87 | " 0.128539 \n",
88 | " -0.189115 \n",
89 | " 0.133558 \n",
90 | " -0.021053 \n",
91 | " 149.62 \n",
92 | " 0 \n",
93 | " \n",
94 | " \n",
95 | " 1 \n",
96 | " 0.0 \n",
97 | " 1.191857 \n",
98 | " 0.266151 \n",
99 | " 0.166480 \n",
100 | " 0.448154 \n",
101 | " 0.060018 \n",
102 | " -0.082361 \n",
103 | " -0.078803 \n",
104 | " 0.085102 \n",
105 | " -0.255425 \n",
106 | " ... \n",
107 | " -0.225775 \n",
108 | " -0.638672 \n",
109 | " 0.101288 \n",
110 | " -0.339846 \n",
111 | " 0.167170 \n",
112 | " 0.125895 \n",
113 | " -0.008983 \n",
114 | " 0.014724 \n",
115 | " 2.69 \n",
116 | " 0 \n",
117 | " \n",
118 | " \n",
119 | " 2 \n",
120 | " 1.0 \n",
121 | " -1.358354 \n",
122 | " -1.340163 \n",
123 | " 1.773209 \n",
124 | " 0.379780 \n",
125 | " -0.503198 \n",
126 | " 1.800499 \n",
127 | " 0.791461 \n",
128 | " 0.247676 \n",
129 | " -1.514654 \n",
130 | " ... \n",
131 | " 0.247998 \n",
132 | " 0.771679 \n",
133 | " 0.909412 \n",
134 | " -0.689281 \n",
135 | " -0.327642 \n",
136 | " -0.139097 \n",
137 | " -0.055353 \n",
138 | " -0.059752 \n",
139 | " 378.66 \n",
140 | " 0 \n",
141 | " \n",
142 | " \n",
143 | " 3 \n",
144 | " 1.0 \n",
145 | " -0.966272 \n",
146 | " -0.185226 \n",
147 | " 1.792993 \n",
148 | " -0.863291 \n",
149 | " -0.010309 \n",
150 | " 1.247203 \n",
151 | " 0.237609 \n",
152 | " 0.377436 \n",
153 | " -1.387024 \n",
154 | " ... \n",
155 | " -0.108300 \n",
156 | " 0.005274 \n",
157 | " -0.190321 \n",
158 | " -1.175575 \n",
159 | " 0.647376 \n",
160 | " -0.221929 \n",
161 | " 0.062723 \n",
162 | " 0.061458 \n",
163 | " 123.50 \n",
164 | " 0 \n",
165 | " \n",
166 | " \n",
167 | " 4 \n",
168 | " 2.0 \n",
169 | " -1.158233 \n",
170 | " 0.877737 \n",
171 | " 1.548718 \n",
172 | " 0.403034 \n",
173 | " -0.407193 \n",
174 | " 0.095921 \n",
175 | " 0.592941 \n",
176 | " -0.270533 \n",
177 | " 0.817739 \n",
178 | " ... \n",
179 | " -0.009431 \n",
180 | " 0.798278 \n",
181 | " -0.137458 \n",
182 | " 0.141267 \n",
183 | " -0.206010 \n",
184 | " 0.502292 \n",
185 | " 0.219422 \n",
186 | " 0.215153 \n",
187 | " 69.99 \n",
188 | " 0 \n",
189 | " \n",
190 | " \n",
191 | "
\n",
192 | "
5 rows × 31 columns
\n",
193 | "
"
194 | ],
195 | "text/plain": [
196 | " Time V1 V2 V3 V4 V5 V6 V7 \\\n",
197 | "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n",
198 | "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n",
199 | "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n",
200 | "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n",
201 | "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n",
202 | "\n",
203 | " V8 V9 ... V21 V22 V23 V24 V25 \\\n",
204 | "0 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 \n",
205 | "1 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 \n",
206 | "2 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 \n",
207 | "3 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 \n",
208 | "4 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 \n",
209 | "\n",
210 | " V26 V27 V28 Amount Class \n",
211 | "0 -0.189115 0.133558 -0.021053 149.62 0 \n",
212 | "1 0.125895 -0.008983 0.014724 2.69 0 \n",
213 | "2 -0.139097 -0.055353 -0.059752 378.66 0 \n",
214 | "3 -0.221929 0.062723 0.061458 123.50 0 \n",
215 | "4 0.502292 0.219422 0.215153 69.99 0 \n",
216 | "\n",
217 | "[5 rows x 31 columns]"
218 | ]
219 | },
220 | "execution_count": 2,
221 | "metadata": {},
222 | "output_type": "execute_result"
223 | }
224 | ],
225 | "source": [
226 | "#import the dataset and visualize the dataset\n",
227 | "dataset = pd.read_csv('./dataset/creditcard.csv')\n",
228 | "dataset.head()"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 3,
234 | "metadata": {},
235 | "outputs": [
236 | {
237 | "data": {
238 | "text/plain": [
239 | ""
240 | ]
241 | },
242 | "execution_count": 3,
243 | "metadata": {},
244 | "output_type": "execute_result"
245 | },
246 | {
247 | "data": {
248 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEGCAYAAABYV4NmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAATPUlEQVR4nO3df6zd9X3f8ecrOKV0DcyAQ4nNYlacasBWUjwHNdqUDs32Km0mHbQ3U2Nrs+YKkampokpQaSMCWSpaUlaShokMhx/qAAua4mmh1IW0WTUKXEfWjM0QXmDBwcNObQGdBIud9/44nxuOr48v1+793GPs50M6Ot/z/n4/n/P5IksvPt/v53xvqgpJkuba+8Y9AEnSqcmAkSR1YcBIkrowYCRJXRgwkqQuFox7ACeL888/v5YuXTruYUjSe8q2bdu+X1WLRu0zYJqlS5cyOTk57mFI0ntKkv99rH1eIpMkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdeEv+efQlb9537iHoJPQtn+/dtxDkMbCGYwkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK66BYwSS5K8s0kzyfZmeTXW/3zSb6XZHt7/eJQm5uS7E7yQpJVQ/Urk+xo++5IklY/M8lDrf50kqVDbdYlebG91vU6T0nSaAs69n0I+FxVfTvJB4BtSba2fbdX1ReGD05yKTABXAZ8CPiTJB+pqsPAncAG4C+AbwCrgceA9cDBqrokyQRwG/ArSc4FbgaWA9W+e0tVHex4vpKkId1mMFW1t6q+3bbfBJ4HFs/QZA3wYFW9XVUvAbuBFUkuBM6uqqeqqoD7gGuG2tzbth8Grm6zm1XA1qo60EJlK4NQkiTNk3m5B9MuXX0UeLqVPpPkfyTZlGRhqy0GXhlqtqfVFrft6fUj2lTVIeB14LwZ+po+rg1JJpNM7t+//4TPT5J0tO4Bk+QngUeAz1bVGwwud/00cAWwF/ji1KEjmtcM9RNt806h6q6qWl5VyxctWjTjeUiSjk/XgEnyfgbh8vtV9QcAVfVaVR2uqh8CXwVWtMP3ABcNNV8CvNrqS0bUj2iTZAFwDnBghr4kSfOk5yqyAHcDz1fV7wzVLxw67JPAc217CzDRVoZdDCwDnqmqvcCbSa5qfa4FHh1qM7VC7FrgyXaf5nFgZZKF7RLcylaTJM2TnqvIPg58GtiRZHur/RbwqSRXMLhk9TLwawBVtTPJZmAXgxVoN7QVZADXA/cAZzFYPfZYq98N3J9kN4OZy0Tr60CSW4Fn23G3VNWBTucpSRqhW8BU1Z8z+l7IN2ZosxHYOKI+CVw+ov4WcN0x+toEbJrteCVJc8tf8kuSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC66BUySi5J8M8nzSXYm+fVWPzfJ1iQvtveFQ21uSrI7yQtJVg3Vr0yyo+27I0la/cwkD7X600mWDrVZ177jxSTrep2nJGm0njOYQ8DnqurvAFcBNyS5FLgReKKqlgFPtM+0fRPAZcBq4CtJzmh93QlsAJa11+pWXw8crKpLgNuB21pf5wI3Ax8DVgA3DweZJKm/bgFTVXur6ttt+03geWAxsAa4tx12L3BN214DPFhVb1fVS8BuYEWSC4Gzq+qpqirgvmltpvp6GLi6zW5WAVur6kBVHQS28k4oSZLmwbzcg2mXrj4KPA1cUFV7YRBCwAfbYYuBV4aa7Wm1xW17ev2INlV1CHgdOG+GvqaPa0OSySST+/fvP/ETlCQdpXvAJPlJ4BHgs1X1xkyHjqjVDPUTbfNOoequqlpeVcsXLVo0w9AkScera8AkeT+DcPn9qvqDVn6tXfaive9r9T3ARUPNlwCvtvqSEfUj2iRZAJwDHJihL0nSPOm5iizA3cDzVfU7Q7u2AFOrutYBjw7VJ9rKsIsZ3Mx/pl1GezPJVa3PtdPaTPV1LfBku0/zOLAyycJ2c39lq0mS5smCjn1/HPg0sCPJ9lb7LeC3gc1J1gPfBa4DqKqdSTYDuxisQLuhqg63dtcD9wBnAY+1FwwC7P4kuxnMXCZaXweS3Ao82467paoO9DpRSdLRugVMVf05o++FAFx9jDYbgY0j6pPA5SPqb9ECasS+TcCm2Y5XkjS3/CW/JKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHUxq4BJ8sRsapIkTVkw084kPw78BHB+koVA2q6zgQ91Hpsk6T1sxoABfg34LIMw2cY7AfMG8HsdxyVJeo+bMWCq6neB303yb6rqS/M0JknSKeDdZjAAVNWXkvw8sHS4TVXd12lckqT3uFkFTJL7gZ8GtgOHW7kAA0aSNNKsAgZYDlxaVdVzMJKkU8dsfwfzHPBTx9Nxkk1J9iV5bqj2+STfS7K9vX5xaN9NSXYneSHJqqH6lUl2tH13JEmrn5nkoVZ/OsnSoTbrkrzYXuuOZ9ySpLkx2xnM+cCuJM8Ab08Vq+qfzdDmHuDLHH0Z7faq+sJwIcmlwARwGYMVa3+S5CNVdRi4E9gA/AXwDWA18BiwHjhYVZckmQBuA34lybnAzQxmXQVsS7Klqg7O8lwlSXNgtgHz+ePtuKq+NTyreBdrgAer6m3gpSS7gRVJXgbOrqqnAJLcB1zDIGDWDI3rYeDLbXazCthaVQdam60MQumB4z0HSdKJm+0qsj+bw+/8TJK1wCTwuTazWMxghjJlT6v9oG1Pr9PeX2njO5TkdeC84fqINpKkeTLbR8W8meSN9noryeEkb5zA993JYDXaFcBe4ItTXzHi2JqhfqJtjpBkQ5LJJJP79++fadySpOM0q4Cpqg9U1dnt9ePAP2dwf+W4VNVrVXW4qn4IfBVY0XbtAS4aOnQJ8GqrLxlRP6JNkgXAOcCBGfoaNZ67qmp5VS1ftGjR8Z6OJGkGJ/Q05ar6Q+AfHW+7JBcOffwkg9VpAFuAibYy7GJgGfBMVe0F3kxyVbu/shZ4dKjN1Aqxa4En2zLqx4GVSRa256etbDVJ0jya7Q8tf2no4/t4Z4XWTG0eAD7B4EGZexis7PpEkita25cZPOuMqtqZZDOwCzgE3NBWkAFcz2BF2lkMbu4/1up3A/e3BQEHGKxCo6oOJLkVeLYdd8vUDX9J0vyZ7Sqyfzq0fYhBOKyZqUFVfWpE+e4Zjt8IbBxRnwQuH1F/C7juGH1tAjbNND5JUl+zXUX2L3sPRJJ0apntKrIlSb7efpn/WpJHkix595aSpNPVbG/yf43BTfUPMfhNyX9pNUmSRpptwCyqqq9V1aH2ugdwXa8k6ZhmGzDfT/KrSc5or18F/rLnwCRJ722zDZh/Bfwy8H8Y/AL/WsAb/5KkY5rtMuVbgXVTTyRuTyz+AoPgkSTpKLOdwfy94cfdtx8ufrTPkCRJp4LZBsz72mNXgB/NYGY7+5EknYZmGxJfBP57kocZPObllxnxq3tJkqbM9pf89yWZZPCAywC/VFW7uo5MkvSeNuvLXC1QDBVJ0qyc0OP6JUl6NwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC66BUySTUn2JXluqHZukq1JXmzvC4f23ZRkd5IXkqwaql+ZZEfbd0eStPqZSR5q9aeTLB1qs659x4tJ1vU6R0nSsfWcwdwDrJ5WuxF4oqqWAU+0zyS5FJgALmttvpLkjNbmTmADsKy9pvpcDxysqkuA24HbWl/nAjcDHwNWADcPB5kkaX50C5iq+hZwYFp5DXBv274XuGao/mBVvV1VLwG7gRVJLgTOrqqnqqqA+6a1merrYeDqNrtZBWytqgNVdRDYytFBJ0nqbL7vwVxQVXsB2vsHW30x8MrQcXtabXHbnl4/ok1VHQJeB86boa+jJNmQZDLJ5P79+/8apyVJmu5kucmfEbWaoX6ibY4sVt1VVcuravmiRYtmNVBJ0uzMd8C81i570d73tfoe4KKh45YAr7b6khH1I9okWQCcw+CS3LH6kiTNo/kOmC3A1KqudcCjQ/WJtjLsYgY3859pl9HeTHJVu7+ydlqbqb6uBZ5s92keB1YmWdhu7q9sNUnSPFrQq+MkDwCfAM5PsofByq7fBjYnWQ98F7gOoKp2JtkM7AIOATdU1eHW1fUMVqSdBTzWXgB3A/cn2c1g5jLR+jqQ5Fbg2XbcLVU1fbGBJKmzbgFTVZ86xq6rj3H8RmDjiPokcPmI+lu0gBqxbxOwadaDlSTNuZPlJr8k6RRjwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldjCVgkrycZEeS7UkmW+3cJFuTvNjeFw4df1OS3UleSLJqqH5l62d3kjuSpNXPTPJQqz+dZOl8n6Mkne7GOYP5haq6oqqWt883Ak9U1TLgifaZJJcCE8BlwGrgK0nOaG3uBDYAy9prdauvBw5W1SXA7cBt83A+kqQhJ9MlsjXAvW37XuCaofqDVfV2Vb0E7AZWJLkQOLuqnqqqAu6b1maqr4eBq6dmN5Kk+TGugCngj5NsS7Kh1S6oqr0A7f2Drb4YeGWo7Z5WW9y2p9ePaFNVh4DXgfOmDyLJhiSTSSb3798/JycmSRpYMKbv/XhVvZrkg8DWJP9zhmNHzTxqhvpMbY4sVN0F3AWwfPnyo/ZLkk7cWGYwVfVqe98HfB1YAbzWLnvR3ve1w/cAFw01XwK82upLRtSPaJNkAXAOcKDHuUiSRpv3gEnyN5J8YGobWAk8B2wB1rXD1gGPtu0twERbGXYxg5v5z7TLaG8muardX1k7rc1UX9cCT7b7NJKkeTKOS2QXAF9v99wXAP+5qv4oybPA5iTrge8C1wFU1c4km4FdwCHghqo63Pq6HrgHOAt4rL0A7gbuT7KbwcxlYj5OTJL0jnkPmKr6DvCzI+p/CVx9jDYbgY0j6pPA5SPqb9ECSpI0HifTMmVJ0inEgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV2c0gGTZHWSF5LsTnLjuMcjSaeTUzZgkpwB/B7wT4BLgU8luXS8o5Kk08eCcQ+goxXA7qr6DkCSB4E1wK6xjkoak+/e8nfHPQSdhP7Wv9vRre9TOWAWA68Mfd4DfGz4gCQbgA3t418leWGexnY6OB/4/rgHcTLIF9aNewg6mv8+p9ycv24PHz7WjlM5YEb9V6sjPlTdBdw1P8M5vSSZrKrl4x6HNIr/PufHKXsPhsGM5aKhz0uAV8c0Fkk67ZzKAfMssCzJxUl+DJgAtox5TJJ02jhlL5FV1aEknwEeB84ANlXVzjEP63TipUedzPz3OQ9SVe9+lCRJx+lUvkQmSRojA0aS1IUBoznnI3p0MkqyKcm+JM+NeyynCwNGc8pH9Ogkdg+wetyDOJ0YMJprP3pET1X9P2DqET3SWFXVt4AD4x7H6cSA0Vwb9YiexWMai6QxMmA01971ET2STg8GjOaaj+iRBBgwmns+okcSYMBojlXVIWDqET3PA5t9RI9OBkkeAJ4CfibJniTrxz2mU52PipEkdeEMRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMNIYJPmpJA8m+V9JdiX5RpKP+KRfnUpO2T+ZLJ2skgT4OnBvVU202hXABWMdmDTHnMFI8+8XgB9U1X+cKlTVdoYeEppkaZL/luTb7fXzrX5hkm8l2Z7kuST/IMkZSe5pn3ck+Y35PyXpaM5gpPl3ObDtXY7ZB/zjqnoryTLgAWA58C+Ax6tqY/vbOz8BXAEsrqrLAZL8zX5Dl2bPgJFOTu8HvtwunR0GPtLqzwKbkrwf+MOq2p7kO8DfTvIl4L8CfzyWEUvTeIlMmn87gSvf5ZjfAF4DfpbBzOXH4Ed/NOsfAt8D7k+ytqoOtuP+FLgB+E99hi0dHwNGmn9PAmcm+ddThSR/H/jw0DHnAHur6ofAp4Ez2nEfBvZV1VeBu4GfS3I+8L6qegT4t8DPzc9pSDPzEpk0z6qqknwS+A9JbgTeAl4GPjt02FeAR5JcB3wT+L+t/gngN5P8APgrYC2Dvxj6tSRT/8N4U/eTkGbBpylLkrrwEpkkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLv4/ceRZXQx4oy0AAAAASUVORK5CYII=\n",
249 | "text/plain": [
250 | ""
251 | ]
252 | },
253 | "metadata": {
254 | "needs_background": "light"
255 | },
256 | "output_type": "display_data"
257 | }
258 | ],
259 | "source": [
260 | "#visualize the target class 'Class'\n",
261 | "sns.countplot(x='Class', data=dataset)"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": 4,
267 | "metadata": {},
268 | "outputs": [],
269 | "source": [
270 | "#seperating positive and negative classes\n",
271 | "positiveDataset = dataset.loc[dataset['Class'] == 1]\n",
272 | "negativeDataset = dataset.loc[dataset['Class'] == 0]"
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": 5,
278 | "metadata": {},
279 | "outputs": [],
280 | "source": [
281 | "#creating training and testing set with negative class split 1:1 and positive class split 4:1, also keeping random_state constant so that all splits are same\n",
282 | "positiveTrain, positiveTest = tts(positiveDataset, test_size=0.2, random_state=21)\n",
283 | "negativeTrain, negativeTest = tts(negativeDataset, test_size=0.5)\n",
284 | "trainDataset = positiveTrain.append(negativeTrain)\n",
285 | "testDataset = positiveTest.append(negativeTest)"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 6,
291 | "metadata": {},
292 | "outputs": [],
293 | "source": [
294 | "#create Regression object and scale the dataset\n",
295 | "classifier = LogisticRegression(random_state=21)\n",
296 | "yTrain = trainDataset['Class']\n",
297 | "yTest = testDataset['Class']\n",
298 | "xTrain = trainDataset.drop(columns=['Class'])\n",
299 | "xTest = testDataset.drop(columns=['Class'])\n",
300 | "scaler = StandardScaler()\n",
301 | "xTrain = scaler.fit_transform(xTrain)\n",
302 | "xTest = scaler.fit_transform(xTest)"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": 7,
308 | "metadata": {},
309 | "outputs": [
310 | {
311 | "data": {
312 | "text/plain": [
313 | "LogisticRegression(random_state=21)"
314 | ]
315 | },
316 | "execution_count": 7,
317 | "metadata": {},
318 | "output_type": "execute_result"
319 | }
320 | ],
321 | "source": [
322 | "#fit the dataset to the train values\n",
323 | "classifier.fit(xTrain, yTrain)"
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": 8,
329 | "metadata": {},
330 | "outputs": [
331 | {
332 | "name": "stdout",
333 | "output_type": "stream",
334 | "text": [
335 | " precision recall f1-score support\n",
336 | "\n",
337 | " 0 1.00 1.00 1.00 142157\n",
338 | " 1 0.91 0.72 0.80 393\n",
339 | "\n",
340 | " accuracy 1.00 142550\n",
341 | " macro avg 0.95 0.86 0.90 142550\n",
342 | "weighted avg 1.00 1.00 1.00 142550\n",
343 | "\n",
344 | "ROC AUC Score: 0.8574078786596213\n"
345 | ]
346 | }
347 | ],
348 | "source": [
349 | "#predict the model on the train values and check results\n",
350 | "predTrain = classifier.predict(xTrain)\n",
351 | "print(classification_report(yTrain, predTrain))\n",
352 | "print('ROC AUC Score: ',roc(yTrain, predTrain))"
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": 9,
358 | "metadata": {},
359 | "outputs": [
360 | {
361 | "name": "stdout",
362 | "output_type": "stream",
363 | "text": [
364 | " precision recall f1-score support\n",
365 | "\n",
366 | " 0 1.00 1.00 1.00 142158\n",
367 | " 1 0.66 0.76 0.70 99\n",
368 | "\n",
369 | " accuracy 1.00 142257\n",
370 | " macro avg 0.83 0.88 0.85 142257\n",
371 | "weighted avg 1.00 1.00 1.00 142257\n",
372 | "\n",
373 | "ROC AUC Score: 0.8786507074714562\n"
374 | ]
375 | }
376 | ],
377 | "source": [
378 | "#predict test values and check results\n",
379 | "predTest = classifier.predict(xTest)\n",
380 | "print(classification_report(yTest, predTest))\n",
381 | "print('ROC AUC Score: ',roc(yTest, predTest))"
382 | ]
383 | }
384 | ],
385 | "metadata": {
386 | "kernelspec": {
387 | "display_name": "Python 3",
388 | "language": "python",
389 | "name": "python3"
390 | },
391 | "language_info": {
392 | "codemirror_mode": {
393 | "name": "ipython",
394 | "version": 3
395 | },
396 | "file_extension": ".py",
397 | "mimetype": "text/x-python",
398 | "name": "python",
399 | "nbconvert_exporter": "python",
400 | "pygments_lexer": "ipython3",
401 | "version": "3.8.3"
402 | }
403 | },
404 | "nbformat": 4,
405 | "nbformat_minor": 4
406 | }
407 |
--------------------------------------------------------------------------------
/Random Forest Classifier.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#import important libraries\n",
10 | "import pandas as pd\n",
11 | "import numpy as np\n",
12 | "import seaborn as sns\n",
13 | "from sklearn.metrics import classification_report\n",
14 | "from sklearn.metrics import roc_auc_score as roc\n",
15 | "from sklearn.model_selection import train_test_split as tts\n",
16 | "from sklearn.preprocessing import StandardScaler\n",
17 | "from sklearn.ensemble import RandomForestClassifier"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 2,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "data": {
27 | "text/html": [
28 | "\n",
29 | "\n",
42 | "
\n",
43 | " \n",
44 | " \n",
45 | " \n",
46 | " Time \n",
47 | " V1 \n",
48 | " V2 \n",
49 | " V3 \n",
50 | " V4 \n",
51 | " V5 \n",
52 | " V6 \n",
53 | " V7 \n",
54 | " V8 \n",
55 | " V9 \n",
56 | " ... \n",
57 | " V21 \n",
58 | " V22 \n",
59 | " V23 \n",
60 | " V24 \n",
61 | " V25 \n",
62 | " V26 \n",
63 | " V27 \n",
64 | " V28 \n",
65 | " Amount \n",
66 | " Class \n",
67 | " \n",
68 | " \n",
69 | " \n",
70 | " \n",
71 | " 0 \n",
72 | " 0.0 \n",
73 | " -1.359807 \n",
74 | " -0.072781 \n",
75 | " 2.536347 \n",
76 | " 1.378155 \n",
77 | " -0.338321 \n",
78 | " 0.462388 \n",
79 | " 0.239599 \n",
80 | " 0.098698 \n",
81 | " 0.363787 \n",
82 | " ... \n",
83 | " -0.018307 \n",
84 | " 0.277838 \n",
85 | " -0.110474 \n",
86 | " 0.066928 \n",
87 | " 0.128539 \n",
88 | " -0.189115 \n",
89 | " 0.133558 \n",
90 | " -0.021053 \n",
91 | " 149.62 \n",
92 | " 0 \n",
93 | " \n",
94 | " \n",
95 | " 1 \n",
96 | " 0.0 \n",
97 | " 1.191857 \n",
98 | " 0.266151 \n",
99 | " 0.166480 \n",
100 | " 0.448154 \n",
101 | " 0.060018 \n",
102 | " -0.082361 \n",
103 | " -0.078803 \n",
104 | " 0.085102 \n",
105 | " -0.255425 \n",
106 | " ... \n",
107 | " -0.225775 \n",
108 | " -0.638672 \n",
109 | " 0.101288 \n",
110 | " -0.339846 \n",
111 | " 0.167170 \n",
112 | " 0.125895 \n",
113 | " -0.008983 \n",
114 | " 0.014724 \n",
115 | " 2.69 \n",
116 | " 0 \n",
117 | " \n",
118 | " \n",
119 | " 2 \n",
120 | " 1.0 \n",
121 | " -1.358354 \n",
122 | " -1.340163 \n",
123 | " 1.773209 \n",
124 | " 0.379780 \n",
125 | " -0.503198 \n",
126 | " 1.800499 \n",
127 | " 0.791461 \n",
128 | " 0.247676 \n",
129 | " -1.514654 \n",
130 | " ... \n",
131 | " 0.247998 \n",
132 | " 0.771679 \n",
133 | " 0.909412 \n",
134 | " -0.689281 \n",
135 | " -0.327642 \n",
136 | " -0.139097 \n",
137 | " -0.055353 \n",
138 | " -0.059752 \n",
139 | " 378.66 \n",
140 | " 0 \n",
141 | " \n",
142 | " \n",
143 | " 3 \n",
144 | " 1.0 \n",
145 | " -0.966272 \n",
146 | " -0.185226 \n",
147 | " 1.792993 \n",
148 | " -0.863291 \n",
149 | " -0.010309 \n",
150 | " 1.247203 \n",
151 | " 0.237609 \n",
152 | " 0.377436 \n",
153 | " -1.387024 \n",
154 | " ... \n",
155 | " -0.108300 \n",
156 | " 0.005274 \n",
157 | " -0.190321 \n",
158 | " -1.175575 \n",
159 | " 0.647376 \n",
160 | " -0.221929 \n",
161 | " 0.062723 \n",
162 | " 0.061458 \n",
163 | " 123.50 \n",
164 | " 0 \n",
165 | " \n",
166 | " \n",
167 | " 4 \n",
168 | " 2.0 \n",
169 | " -1.158233 \n",
170 | " 0.877737 \n",
171 | " 1.548718 \n",
172 | " 0.403034 \n",
173 | " -0.407193 \n",
174 | " 0.095921 \n",
175 | " 0.592941 \n",
176 | " -0.270533 \n",
177 | " 0.817739 \n",
178 | " ... \n",
179 | " -0.009431 \n",
180 | " 0.798278 \n",
181 | " -0.137458 \n",
182 | " 0.141267 \n",
183 | " -0.206010 \n",
184 | " 0.502292 \n",
185 | " 0.219422 \n",
186 | " 0.215153 \n",
187 | " 69.99 \n",
188 | " 0 \n",
189 | " \n",
190 | " \n",
191 | "
\n",
192 | "
5 rows × 31 columns
\n",
193 | "
"
194 | ],
195 | "text/plain": [
196 | " Time V1 V2 V3 V4 V5 V6 V7 \\\n",
197 | "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n",
198 | "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n",
199 | "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n",
200 | "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n",
201 | "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n",
202 | "\n",
203 | " V8 V9 ... V21 V22 V23 V24 V25 \\\n",
204 | "0 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 \n",
205 | "1 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 \n",
206 | "2 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 \n",
207 | "3 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 \n",
208 | "4 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 \n",
209 | "\n",
210 | " V26 V27 V28 Amount Class \n",
211 | "0 -0.189115 0.133558 -0.021053 149.62 0 \n",
212 | "1 0.125895 -0.008983 0.014724 2.69 0 \n",
213 | "2 -0.139097 -0.055353 -0.059752 378.66 0 \n",
214 | "3 -0.221929 0.062723 0.061458 123.50 0 \n",
215 | "4 0.502292 0.219422 0.215153 69.99 0 \n",
216 | "\n",
217 | "[5 rows x 31 columns]"
218 | ]
219 | },
220 | "execution_count": 2,
221 | "metadata": {},
222 | "output_type": "execute_result"
223 | }
224 | ],
225 | "source": [
226 | "#import the dataset and visualize the dataset\n",
227 | "dataset = pd.read_csv('./dataset/creditcard.csv')\n",
228 | "dataset.head()"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 3,
234 | "metadata": {},
235 | "outputs": [
236 | {
237 | "data": {
238 | "text/plain": [
239 | ""
240 | ]
241 | },
242 | "execution_count": 3,
243 | "metadata": {},
244 | "output_type": "execute_result"
245 | },
246 | {
247 | "data": {
248 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEGCAYAAABYV4NmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAATPUlEQVR4nO3df6zd9X3f8ecrOKV0DcyAQ4nNYlacasBWUjwHNdqUDs32Km0mHbQ3U2Nrs+YKkampokpQaSMCWSpaUlaShokMhx/qAAua4mmh1IW0WTUKXEfWjM0QXmDBwcNObQGdBIud9/44nxuOr48v1+793GPs50M6Ot/z/n4/n/P5IksvPt/v53xvqgpJkuba+8Y9AEnSqcmAkSR1YcBIkrowYCRJXRgwkqQuFox7ACeL888/v5YuXTruYUjSe8q2bdu+X1WLRu0zYJqlS5cyOTk57mFI0ntKkv99rH1eIpMkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdeEv+efQlb9537iHoJPQtn+/dtxDkMbCGYwkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK66BYwSS5K8s0kzyfZmeTXW/3zSb6XZHt7/eJQm5uS7E7yQpJVQ/Urk+xo++5IklY/M8lDrf50kqVDbdYlebG91vU6T0nSaAs69n0I+FxVfTvJB4BtSba2fbdX1ReGD05yKTABXAZ8CPiTJB+pqsPAncAG4C+AbwCrgceA9cDBqrokyQRwG/ArSc4FbgaWA9W+e0tVHex4vpKkId1mMFW1t6q+3bbfBJ4HFs/QZA3wYFW9XVUvAbuBFUkuBM6uqqeqqoD7gGuG2tzbth8Grm6zm1XA1qo60EJlK4NQkiTNk3m5B9MuXX0UeLqVPpPkfyTZlGRhqy0GXhlqtqfVFrft6fUj2lTVIeB14LwZ+po+rg1JJpNM7t+//4TPT5J0tO4Bk+QngUeAz1bVGwwud/00cAWwF/ji1KEjmtcM9RNt806h6q6qWl5VyxctWjTjeUiSjk/XgEnyfgbh8vtV9QcAVfVaVR2uqh8CXwVWtMP3ABcNNV8CvNrqS0bUj2iTZAFwDnBghr4kSfOk5yqyAHcDz1fV7wzVLxw67JPAc217CzDRVoZdDCwDnqmqvcCbSa5qfa4FHh1qM7VC7FrgyXaf5nFgZZKF7RLcylaTJM2TnqvIPg58GtiRZHur/RbwqSRXMLhk9TLwawBVtTPJZmAXgxVoN7QVZADXA/cAZzFYPfZYq98N3J9kN4OZy0Tr60CSW4Fn23G3VNWBTucpSRqhW8BU1Z8z+l7IN2ZosxHYOKI+CVw+ov4WcN0x+toEbJrteCVJc8tf8kuSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC66BUySi5J8M8nzSXYm+fVWPzfJ1iQvtveFQ21uSrI7yQtJVg3Vr0yyo+27I0la/cwkD7X600mWDrVZ177jxSTrep2nJGm0njOYQ8DnqurvAFcBNyS5FLgReKKqlgFPtM+0fRPAZcBq4CtJzmh93QlsAJa11+pWXw8crKpLgNuB21pf5wI3Ax8DVgA3DweZJKm/bgFTVXur6ttt+03geWAxsAa4tx12L3BN214DPFhVb1fVS8BuYEWSC4Gzq+qpqirgvmltpvp6GLi6zW5WAVur6kBVHQS28k4oSZLmwbzcg2mXrj4KPA1cUFV7YRBCwAfbYYuBV4aa7Wm1xW17ev2INlV1CHgdOG+GvqaPa0OSySST+/fvP/ETlCQdpXvAJPlJ4BHgs1X1xkyHjqjVDPUTbfNOoequqlpeVcsXLVo0w9AkScera8AkeT+DcPn9qvqDVn6tXfaive9r9T3ARUPNlwCvtvqSEfUj2iRZAJwDHJihL0nSPOm5iizA3cDzVfU7Q7u2AFOrutYBjw7VJ9rKsIsZ3Mx/pl1GezPJVa3PtdPaTPV1LfBku0/zOLAyycJ2c39lq0mS5smCjn1/HPg0sCPJ9lb7LeC3gc1J1gPfBa4DqKqdSTYDuxisQLuhqg63dtcD9wBnAY+1FwwC7P4kuxnMXCZaXweS3Ao82467paoO9DpRSdLRugVMVf05o++FAFx9jDYbgY0j6pPA5SPqb9ECasS+TcCm2Y5XkjS3/CW/JKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHUxq4BJ8sRsapIkTVkw084kPw78BHB+koVA2q6zgQ91Hpsk6T1sxoABfg34LIMw2cY7AfMG8HsdxyVJeo+bMWCq6neB303yb6rqS/M0JknSKeDdZjAAVNWXkvw8sHS4TVXd12lckqT3uFkFTJL7gZ8GtgOHW7kAA0aSNNKsAgZYDlxaVdVzMJKkU8dsfwfzHPBTx9Nxkk1J9iV5bqj2+STfS7K9vX5xaN9NSXYneSHJqqH6lUl2tH13JEmrn5nkoVZ/OsnSoTbrkrzYXuuOZ9ySpLkx2xnM+cCuJM8Ab08Vq+qfzdDmHuDLHH0Z7faq+sJwIcmlwARwGYMVa3+S5CNVdRi4E9gA/AXwDWA18BiwHjhYVZckmQBuA34lybnAzQxmXQVsS7Klqg7O8lwlSXNgtgHz+ePtuKq+NTyreBdrgAer6m3gpSS7gRVJXgbOrqqnAJLcB1zDIGDWDI3rYeDLbXazCthaVQdam60MQumB4z0HSdKJm+0qsj+bw+/8TJK1wCTwuTazWMxghjJlT6v9oG1Pr9PeX2njO5TkdeC84fqINpKkeTLbR8W8meSN9noryeEkb5zA993JYDXaFcBe4ItTXzHi2JqhfqJtjpBkQ5LJJJP79++fadySpOM0q4Cpqg9U1dnt9ePAP2dwf+W4VNVrVXW4qn4IfBVY0XbtAS4aOnQJ8GqrLxlRP6JNkgXAOcCBGfoaNZ67qmp5VS1ftGjR8Z6OJGkGJ/Q05ar6Q+AfHW+7JBcOffwkg9VpAFuAibYy7GJgGfBMVe0F3kxyVbu/shZ4dKjN1Aqxa4En2zLqx4GVSRa256etbDVJ0jya7Q8tf2no4/t4Z4XWTG0eAD7B4EGZexis7PpEkita25cZPOuMqtqZZDOwCzgE3NBWkAFcz2BF2lkMbu4/1up3A/e3BQEHGKxCo6oOJLkVeLYdd8vUDX9J0vyZ7Sqyfzq0fYhBOKyZqUFVfWpE+e4Zjt8IbBxRnwQuH1F/C7juGH1tAjbNND5JUl+zXUX2L3sPRJJ0apntKrIlSb7efpn/WpJHkix595aSpNPVbG/yf43BTfUPMfhNyX9pNUmSRpptwCyqqq9V1aH2ugdwXa8k6ZhmGzDfT/KrSc5or18F/rLnwCRJ722zDZh/Bfwy8H8Y/AL/WsAb/5KkY5rtMuVbgXVTTyRuTyz+AoPgkSTpKLOdwfy94cfdtx8ufrTPkCRJp4LZBsz72mNXgB/NYGY7+5EknYZmGxJfBP57kocZPObllxnxq3tJkqbM9pf89yWZZPCAywC/VFW7uo5MkvSeNuvLXC1QDBVJ0qyc0OP6JUl6NwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC66BUySTUn2JXluqHZukq1JXmzvC4f23ZRkd5IXkqwaql+ZZEfbd0eStPqZSR5q9aeTLB1qs659x4tJ1vU6R0nSsfWcwdwDrJ5WuxF4oqqWAU+0zyS5FJgALmttvpLkjNbmTmADsKy9pvpcDxysqkuA24HbWl/nAjcDHwNWADcPB5kkaX50C5iq+hZwYFp5DXBv274XuGao/mBVvV1VLwG7gRVJLgTOrqqnqqqA+6a1merrYeDqNrtZBWytqgNVdRDYytFBJ0nqbL7vwVxQVXsB2vsHW30x8MrQcXtabXHbnl4/ok1VHQJeB86boa+jJNmQZDLJ5P79+/8apyVJmu5kucmfEbWaoX6ibY4sVt1VVcuravmiRYtmNVBJ0uzMd8C81i570d73tfoe4KKh45YAr7b6khH1I9okWQCcw+CS3LH6kiTNo/kOmC3A1KqudcCjQ/WJtjLsYgY3859pl9HeTHJVu7+ydlqbqb6uBZ5s92keB1YmWdhu7q9sNUnSPFrQq+MkDwCfAM5PsofByq7fBjYnWQ98F7gOoKp2JtkM7AIOATdU1eHW1fUMVqSdBTzWXgB3A/cn2c1g5jLR+jqQ5Fbg2XbcLVU1fbGBJKmzbgFTVZ86xq6rj3H8RmDjiPokcPmI+lu0gBqxbxOwadaDlSTNuZPlJr8k6RRjwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldjCVgkrycZEeS7UkmW+3cJFuTvNjeFw4df1OS3UleSLJqqH5l62d3kjuSpNXPTPJQqz+dZOl8n6Mkne7GOYP5haq6oqqWt883Ak9U1TLgifaZJJcCE8BlwGrgK0nOaG3uBDYAy9prdauvBw5W1SXA7cBt83A+kqQhJ9MlsjXAvW37XuCaofqDVfV2Vb0E7AZWJLkQOLuqnqqqAu6b1maqr4eBq6dmN5Kk+TGugCngj5NsS7Kh1S6oqr0A7f2Drb4YeGWo7Z5WW9y2p9ePaFNVh4DXgfOmDyLJhiSTSSb3798/JycmSRpYMKbv/XhVvZrkg8DWJP9zhmNHzTxqhvpMbY4sVN0F3AWwfPnyo/ZLkk7cWGYwVfVqe98HfB1YAbzWLnvR3ve1w/cAFw01XwK82upLRtSPaJNkAXAOcKDHuUiSRpv3gEnyN5J8YGobWAk8B2wB1rXD1gGPtu0twERbGXYxg5v5z7TLaG8muardX1k7rc1UX9cCT7b7NJKkeTKOS2QXAF9v99wXAP+5qv4oybPA5iTrge8C1wFU1c4km4FdwCHghqo63Pq6HrgHOAt4rL0A7gbuT7KbwcxlYj5OTJL0jnkPmKr6DvCzI+p/CVx9jDYbgY0j6pPA5SPqb9ECSpI0HifTMmVJ0inEgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV2c0gGTZHWSF5LsTnLjuMcjSaeTUzZgkpwB/B7wT4BLgU8luXS8o5Kk08eCcQ+goxXA7qr6DkCSB4E1wK6xjkoak+/e8nfHPQSdhP7Wv9vRre9TOWAWA68Mfd4DfGz4gCQbgA3t418leWGexnY6OB/4/rgHcTLIF9aNewg6mv8+p9ycv24PHz7WjlM5YEb9V6sjPlTdBdw1P8M5vSSZrKrl4x6HNIr/PufHKXsPhsGM5aKhz0uAV8c0Fkk67ZzKAfMssCzJxUl+DJgAtox5TJJ02jhlL5FV1aEknwEeB84ANlXVzjEP63TipUedzPz3OQ9SVe9+lCRJx+lUvkQmSRojA0aS1IUBoznnI3p0MkqyKcm+JM+NeyynCwNGc8pH9Ogkdg+wetyDOJ0YMJprP3pET1X9P2DqET3SWFXVt4AD4x7H6cSA0Vwb9YiexWMai6QxMmA01971ET2STg8GjOaaj+iRBBgwmns+okcSYMBojlXVIWDqET3PA5t9RI9OBkkeAJ4CfibJniTrxz2mU52PipEkdeEMRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMNIYJPmpJA8m+V9JdiX5RpKP+KRfnUpO2T+ZLJ2skgT4OnBvVU202hXABWMdmDTHnMFI8+8XgB9U1X+cKlTVdoYeEppkaZL/luTb7fXzrX5hkm8l2Z7kuST/IMkZSe5pn3ck+Y35PyXpaM5gpPl3ObDtXY7ZB/zjqnoryTLgAWA58C+Ax6tqY/vbOz8BXAEsrqrLAZL8zX5Dl2bPgJFOTu8HvtwunR0GPtLqzwKbkrwf+MOq2p7kO8DfTvIl4L8CfzyWEUvTeIlMmn87gSvf5ZjfAF4DfpbBzOXH4Ed/NOsfAt8D7k+ytqoOtuP+FLgB+E99hi0dHwNGmn9PAmcm+ddThSR/H/jw0DHnAHur6ofAp4Ez2nEfBvZV1VeBu4GfS3I+8L6qegT4t8DPzc9pSDPzEpk0z6qqknwS+A9JbgTeAl4GPjt02FeAR5JcB3wT+L+t/gngN5P8APgrYC2Dvxj6tSRT/8N4U/eTkGbBpylLkrrwEpkkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLv4/ceRZXQx4oy0AAAAASUVORK5CYII=\n",
249 | "text/plain": [
250 | ""
251 | ]
252 | },
253 | "metadata": {
254 | "needs_background": "light"
255 | },
256 | "output_type": "display_data"
257 | }
258 | ],
259 | "source": [
260 | "#visualize the target class 'Class'\n",
261 | "sns.countplot(x='Class', data=dataset)"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": 4,
267 | "metadata": {},
268 | "outputs": [],
269 | "source": [
270 | "#seperating positive and negative classes\n",
271 | "positiveDataset = dataset.loc[dataset['Class'] == 1]\n",
272 | "negativeDataset = dataset.loc[dataset['Class'] == 0]"
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": 5,
278 | "metadata": {},
279 | "outputs": [],
280 | "source": [
281 | "#creating training and testing set with negative class split 1:1 and positive class split 4:1, also keeping random_state constant so that all splits are same\n",
282 | "positiveTrain, positiveTest = tts(positiveDataset, test_size=0.2, random_state=21)\n",
283 | "negativeTrain, negativeTest = tts(negativeDataset, test_size=0.5)\n",
284 | "trainDataset = positiveTrain.append(negativeTrain)\n",
285 | "testDataset = positiveTest.append(negativeTest)"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 6,
291 | "metadata": {},
292 | "outputs": [],
293 | "source": [
294 | "#create Regression object and scale the dataset\n",
295 | "classifier = RandomForestClassifier(n_estimators=500, max_depth=3)\n",
296 | "yTrain = trainDataset['Class']\n",
297 | "yTest = testDataset['Class']\n",
298 | "xTrain = trainDataset.drop(columns=['Class'])\n",
299 | "xTest = testDataset.drop(columns=['Class'])\n",
300 | "scaler = StandardScaler()\n",
301 | "xTrain = scaler.fit_transform(xTrain)\n",
302 | "xTest = scaler.fit_transform(xTest)"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": 7,
308 | "metadata": {},
309 | "outputs": [
310 | {
311 | "data": {
312 | "text/plain": [
313 | "RandomForestClassifier(max_depth=3, n_estimators=500)"
314 | ]
315 | },
316 | "execution_count": 7,
317 | "metadata": {},
318 | "output_type": "execute_result"
319 | }
320 | ],
321 | "source": [
322 | "#fit the dataset to the train values\n",
323 | "classifier.fit(xTrain, yTrain)"
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": 8,
329 | "metadata": {},
330 | "outputs": [
331 | {
332 | "name": "stdout",
333 | "output_type": "stream",
334 | "text": [
335 | " precision recall f1-score support\n",
336 | "\n",
337 | " 0 1.00 1.00 1.00 142157\n",
338 | " 1 0.93 0.68 0.79 393\n",
339 | "\n",
340 | " accuracy 1.00 142550\n",
341 | " macro avg 0.96 0.84 0.89 142550\n",
342 | "weighted avg 1.00 1.00 1.00 142550\n",
343 | "\n",
344 | "ROC AUC Score: 0.8421688409909691\n"
345 | ]
346 | }
347 | ],
348 | "source": [
349 | "#predict the model on the train values and check results\n",
350 | "predTrain = classifier.predict(xTrain)\n",
351 | "print(classification_report(yTrain, predTrain))\n",
352 | "print('ROC AUC Score: ',roc(yTrain, predTrain))"
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": 9,
358 | "metadata": {},
359 | "outputs": [
360 | {
361 | "name": "stdout",
362 | "output_type": "stream",
363 | "text": [
364 | " precision recall f1-score support\n",
365 | "\n",
366 | " 0 1.00 1.00 1.00 142158\n",
367 | " 1 0.69 0.75 0.71 99\n",
368 | "\n",
369 | " accuracy 1.00 142257\n",
370 | " macro avg 0.84 0.87 0.86 142257\n",
371 | "weighted avg 1.00 1.00 1.00 142257\n",
372 | "\n",
373 | "ROC AUC Score: 0.8736177884871592\n"
374 | ]
375 | }
376 | ],
377 | "source": [
378 | "#predict test values and check results\n",
379 | "predTest = classifier.predict(xTest)\n",
380 | "print(classification_report(yTest, predTest))\n",
381 | "print('ROC AUC Score: ',roc(yTest, predTest))"
382 | ]
383 | }
384 | ],
385 | "metadata": {
386 | "kernelspec": {
387 | "display_name": "Python 3",
388 | "language": "python",
389 | "name": "python3"
390 | },
391 | "language_info": {
392 | "codemirror_mode": {
393 | "name": "ipython",
394 | "version": 3
395 | },
396 | "file_extension": ".py",
397 | "mimetype": "text/x-python",
398 | "name": "python",
399 | "nbconvert_exporter": "python",
400 | "pygments_lexer": "ipython3",
401 | "version": "3.8.3"
402 | }
403 | },
404 | "nbformat": 4,
405 | "nbformat_minor": 4
406 | }
407 |
--------------------------------------------------------------------------------
/Neural Network Classifier.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#import important libraries\n",
10 | "import pandas as pd\n",
11 | "import numpy as np\n",
12 | "import seaborn as sns\n",
13 | "from sklearn.metrics import classification_report\n",
14 | "from sklearn.metrics import roc_auc_score as roc\n",
15 | "from sklearn.model_selection import train_test_split as tts\n",
16 | "from sklearn.preprocessing import StandardScaler\n",
17 | "import tensorflow as tf\n",
18 | "from tensorflow import keras"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "data": {
28 | "text/html": [
29 | "\n",
30 | "\n",
43 | "
\n",
44 | " \n",
45 | " \n",
46 | " \n",
47 | " Time \n",
48 | " V1 \n",
49 | " V2 \n",
50 | " V3 \n",
51 | " V4 \n",
52 | " V5 \n",
53 | " V6 \n",
54 | " V7 \n",
55 | " V8 \n",
56 | " V9 \n",
57 | " ... \n",
58 | " V21 \n",
59 | " V22 \n",
60 | " V23 \n",
61 | " V24 \n",
62 | " V25 \n",
63 | " V26 \n",
64 | " V27 \n",
65 | " V28 \n",
66 | " Amount \n",
67 | " Class \n",
68 | " \n",
69 | " \n",
70 | " \n",
71 | " \n",
72 | " 0 \n",
73 | " 0.0 \n",
74 | " -1.359807 \n",
75 | " -0.072781 \n",
76 | " 2.536347 \n",
77 | " 1.378155 \n",
78 | " -0.338321 \n",
79 | " 0.462388 \n",
80 | " 0.239599 \n",
81 | " 0.098698 \n",
82 | " 0.363787 \n",
83 | " ... \n",
84 | " -0.018307 \n",
85 | " 0.277838 \n",
86 | " -0.110474 \n",
87 | " 0.066928 \n",
88 | " 0.128539 \n",
89 | " -0.189115 \n",
90 | " 0.133558 \n",
91 | " -0.021053 \n",
92 | " 149.62 \n",
93 | " 0 \n",
94 | " \n",
95 | " \n",
96 | " 1 \n",
97 | " 0.0 \n",
98 | " 1.191857 \n",
99 | " 0.266151 \n",
100 | " 0.166480 \n",
101 | " 0.448154 \n",
102 | " 0.060018 \n",
103 | " -0.082361 \n",
104 | " -0.078803 \n",
105 | " 0.085102 \n",
106 | " -0.255425 \n",
107 | " ... \n",
108 | " -0.225775 \n",
109 | " -0.638672 \n",
110 | " 0.101288 \n",
111 | " -0.339846 \n",
112 | " 0.167170 \n",
113 | " 0.125895 \n",
114 | " -0.008983 \n",
115 | " 0.014724 \n",
116 | " 2.69 \n",
117 | " 0 \n",
118 | " \n",
119 | " \n",
120 | " 2 \n",
121 | " 1.0 \n",
122 | " -1.358354 \n",
123 | " -1.340163 \n",
124 | " 1.773209 \n",
125 | " 0.379780 \n",
126 | " -0.503198 \n",
127 | " 1.800499 \n",
128 | " 0.791461 \n",
129 | " 0.247676 \n",
130 | " -1.514654 \n",
131 | " ... \n",
132 | " 0.247998 \n",
133 | " 0.771679 \n",
134 | " 0.909412 \n",
135 | " -0.689281 \n",
136 | " -0.327642 \n",
137 | " -0.139097 \n",
138 | " -0.055353 \n",
139 | " -0.059752 \n",
140 | " 378.66 \n",
141 | " 0 \n",
142 | " \n",
143 | " \n",
144 | " 3 \n",
145 | " 1.0 \n",
146 | " -0.966272 \n",
147 | " -0.185226 \n",
148 | " 1.792993 \n",
149 | " -0.863291 \n",
150 | " -0.010309 \n",
151 | " 1.247203 \n",
152 | " 0.237609 \n",
153 | " 0.377436 \n",
154 | " -1.387024 \n",
155 | " ... \n",
156 | " -0.108300 \n",
157 | " 0.005274 \n",
158 | " -0.190321 \n",
159 | " -1.175575 \n",
160 | " 0.647376 \n",
161 | " -0.221929 \n",
162 | " 0.062723 \n",
163 | " 0.061458 \n",
164 | " 123.50 \n",
165 | " 0 \n",
166 | " \n",
167 | " \n",
168 | " 4 \n",
169 | " 2.0 \n",
170 | " -1.158233 \n",
171 | " 0.877737 \n",
172 | " 1.548718 \n",
173 | " 0.403034 \n",
174 | " -0.407193 \n",
175 | " 0.095921 \n",
176 | " 0.592941 \n",
177 | " -0.270533 \n",
178 | " 0.817739 \n",
179 | " ... \n",
180 | " -0.009431 \n",
181 | " 0.798278 \n",
182 | " -0.137458 \n",
183 | " 0.141267 \n",
184 | " -0.206010 \n",
185 | " 0.502292 \n",
186 | " 0.219422 \n",
187 | " 0.215153 \n",
188 | " 69.99 \n",
189 | " 0 \n",
190 | " \n",
191 | " \n",
192 | "
\n",
193 | "
5 rows × 31 columns
\n",
194 | "
"
195 | ],
196 | "text/plain": [
197 | " Time V1 V2 V3 V4 V5 V6 V7 \\\n",
198 | "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n",
199 | "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n",
200 | "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n",
201 | "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n",
202 | "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n",
203 | "\n",
204 | " V8 V9 ... V21 V22 V23 V24 V25 \\\n",
205 | "0 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 \n",
206 | "1 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 \n",
207 | "2 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 \n",
208 | "3 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 \n",
209 | "4 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 \n",
210 | "\n",
211 | " V26 V27 V28 Amount Class \n",
212 | "0 -0.189115 0.133558 -0.021053 149.62 0 \n",
213 | "1 0.125895 -0.008983 0.014724 2.69 0 \n",
214 | "2 -0.139097 -0.055353 -0.059752 378.66 0 \n",
215 | "3 -0.221929 0.062723 0.061458 123.50 0 \n",
216 | "4 0.502292 0.219422 0.215153 69.99 0 \n",
217 | "\n",
218 | "[5 rows x 31 columns]"
219 | ]
220 | },
221 | "execution_count": 2,
222 | "metadata": {},
223 | "output_type": "execute_result"
224 | }
225 | ],
226 | "source": [
227 | "#import the dataset and visualize the dataset\n",
228 | "dataset = pd.read_csv('./dataset/creditcard.csv')\n",
229 | "dataset.head()"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 3,
235 | "metadata": {},
236 | "outputs": [
237 | {
238 | "data": {
239 | "text/plain": [
240 | ""
241 | ]
242 | },
243 | "execution_count": 3,
244 | "metadata": {},
245 | "output_type": "execute_result"
246 | },
247 | {
248 | "data": {
249 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEGCAYAAABYV4NmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAATPUlEQVR4nO3df6zd9X3f8ecrOKV0DcyAQ4nNYlacasBWUjwHNdqUDs32Km0mHbQ3U2Nrs+YKkampokpQaSMCWSpaUlaShokMhx/qAAua4mmh1IW0WTUKXEfWjM0QXmDBwcNObQGdBIud9/44nxuOr48v1+793GPs50M6Ot/z/n4/n/P5IksvPt/v53xvqgpJkuba+8Y9AEnSqcmAkSR1YcBIkrowYCRJXRgwkqQuFox7ACeL888/v5YuXTruYUjSe8q2bdu+X1WLRu0zYJqlS5cyOTk57mFI0ntKkv99rH1eIpMkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdeEv+efQlb9537iHoJPQtn+/dtxDkMbCGYwkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK66BYwSS5K8s0kzyfZmeTXW/3zSb6XZHt7/eJQm5uS7E7yQpJVQ/Urk+xo++5IklY/M8lDrf50kqVDbdYlebG91vU6T0nSaAs69n0I+FxVfTvJB4BtSba2fbdX1ReGD05yKTABXAZ8CPiTJB+pqsPAncAG4C+AbwCrgceA9cDBqrokyQRwG/ArSc4FbgaWA9W+e0tVHex4vpKkId1mMFW1t6q+3bbfBJ4HFs/QZA3wYFW9XVUvAbuBFUkuBM6uqqeqqoD7gGuG2tzbth8Grm6zm1XA1qo60EJlK4NQkiTNk3m5B9MuXX0UeLqVPpPkfyTZlGRhqy0GXhlqtqfVFrft6fUj2lTVIeB14LwZ+po+rg1JJpNM7t+//4TPT5J0tO4Bk+QngUeAz1bVGwwud/00cAWwF/ji1KEjmtcM9RNt806h6q6qWl5VyxctWjTjeUiSjk/XgEnyfgbh8vtV9QcAVfVaVR2uqh8CXwVWtMP3ABcNNV8CvNrqS0bUj2iTZAFwDnBghr4kSfOk5yqyAHcDz1fV7wzVLxw67JPAc217CzDRVoZdDCwDnqmqvcCbSa5qfa4FHh1qM7VC7FrgyXaf5nFgZZKF7RLcylaTJM2TnqvIPg58GtiRZHur/RbwqSRXMLhk9TLwawBVtTPJZmAXgxVoN7QVZADXA/cAZzFYPfZYq98N3J9kN4OZy0Tr60CSW4Fn23G3VNWBTucpSRqhW8BU1Z8z+l7IN2ZosxHYOKI+CVw+ov4WcN0x+toEbJrteCVJc8tf8kuSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC66BUySi5J8M8nzSXYm+fVWPzfJ1iQvtveFQ21uSrI7yQtJVg3Vr0yyo+27I0la/cwkD7X600mWDrVZ177jxSTrep2nJGm0njOYQ8DnqurvAFcBNyS5FLgReKKqlgFPtM+0fRPAZcBq4CtJzmh93QlsAJa11+pWXw8crKpLgNuB21pf5wI3Ax8DVgA3DweZJKm/bgFTVXur6ttt+03geWAxsAa4tx12L3BN214DPFhVb1fVS8BuYEWSC4Gzq+qpqirgvmltpvp6GLi6zW5WAVur6kBVHQS28k4oSZLmwbzcg2mXrj4KPA1cUFV7YRBCwAfbYYuBV4aa7Wm1xW17ev2INlV1CHgdOG+GvqaPa0OSySST+/fvP/ETlCQdpXvAJPlJ4BHgs1X1xkyHjqjVDPUTbfNOoequqlpeVcsXLVo0w9AkScera8AkeT+DcPn9qvqDVn6tXfaive9r9T3ARUPNlwCvtvqSEfUj2iRZAJwDHJihL0nSPOm5iizA3cDzVfU7Q7u2AFOrutYBjw7VJ9rKsIsZ3Mx/pl1GezPJVa3PtdPaTPV1LfBku0/zOLAyycJ2c39lq0mS5smCjn1/HPg0sCPJ9lb7LeC3gc1J1gPfBa4DqKqdSTYDuxisQLuhqg63dtcD9wBnAY+1FwwC7P4kuxnMXCZaXweS3Ao82467paoO9DpRSdLRugVMVf05o++FAFx9jDYbgY0j6pPA5SPqb9ECasS+TcCm2Y5XkjS3/CW/JKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHUxq4BJ8sRsapIkTVkw084kPw78BHB+koVA2q6zgQ91Hpsk6T1sxoABfg34LIMw2cY7AfMG8HsdxyVJeo+bMWCq6neB303yb6rqS/M0JknSKeDdZjAAVNWXkvw8sHS4TVXd12lckqT3uFkFTJL7gZ8GtgOHW7kAA0aSNNKsAgZYDlxaVdVzMJKkU8dsfwfzHPBTx9Nxkk1J9iV5bqj2+STfS7K9vX5xaN9NSXYneSHJqqH6lUl2tH13JEmrn5nkoVZ/OsnSoTbrkrzYXuuOZ9ySpLkx2xnM+cCuJM8Ab08Vq+qfzdDmHuDLHH0Z7faq+sJwIcmlwARwGYMVa3+S5CNVdRi4E9gA/AXwDWA18BiwHjhYVZckmQBuA34lybnAzQxmXQVsS7Klqg7O8lwlSXNgtgHz+ePtuKq+NTyreBdrgAer6m3gpSS7gRVJXgbOrqqnAJLcB1zDIGDWDI3rYeDLbXazCthaVQdam60MQumB4z0HSdKJm+0qsj+bw+/8TJK1wCTwuTazWMxghjJlT6v9oG1Pr9PeX2njO5TkdeC84fqINpKkeTLbR8W8meSN9noryeEkb5zA993JYDXaFcBe4ItTXzHi2JqhfqJtjpBkQ5LJJJP79++fadySpOM0q4Cpqg9U1dnt9ePAP2dwf+W4VNVrVXW4qn4IfBVY0XbtAS4aOnQJ8GqrLxlRP6JNkgXAOcCBGfoaNZ67qmp5VS1ftGjR8Z6OJGkGJ/Q05ar6Q+AfHW+7JBcOffwkg9VpAFuAibYy7GJgGfBMVe0F3kxyVbu/shZ4dKjN1Aqxa4En2zLqx4GVSRa256etbDVJ0jya7Q8tf2no4/t4Z4XWTG0eAD7B4EGZexis7PpEkita25cZPOuMqtqZZDOwCzgE3NBWkAFcz2BF2lkMbu4/1up3A/e3BQEHGKxCo6oOJLkVeLYdd8vUDX9J0vyZ7Sqyfzq0fYhBOKyZqUFVfWpE+e4Zjt8IbBxRnwQuH1F/C7juGH1tAjbNND5JUl+zXUX2L3sPRJJ0apntKrIlSb7efpn/WpJHkix595aSpNPVbG/yf43BTfUPMfhNyX9pNUmSRpptwCyqqq9V1aH2ugdwXa8k6ZhmGzDfT/KrSc5or18F/rLnwCRJ722zDZh/Bfwy8H8Y/AL/WsAb/5KkY5rtMuVbgXVTTyRuTyz+AoPgkSTpKLOdwfy94cfdtx8ufrTPkCRJp4LZBsz72mNXgB/NYGY7+5EknYZmGxJfBP57kocZPObllxnxq3tJkqbM9pf89yWZZPCAywC/VFW7uo5MkvSeNuvLXC1QDBVJ0qyc0OP6JUl6NwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC66BUySTUn2JXluqHZukq1JXmzvC4f23ZRkd5IXkqwaql+ZZEfbd0eStPqZSR5q9aeTLB1qs659x4tJ1vU6R0nSsfWcwdwDrJ5WuxF4oqqWAU+0zyS5FJgALmttvpLkjNbmTmADsKy9pvpcDxysqkuA24HbWl/nAjcDHwNWADcPB5kkaX50C5iq+hZwYFp5DXBv274XuGao/mBVvV1VLwG7gRVJLgTOrqqnqqqA+6a1merrYeDqNrtZBWytqgNVdRDYytFBJ0nqbL7vwVxQVXsB2vsHW30x8MrQcXtabXHbnl4/ok1VHQJeB86boa+jJNmQZDLJ5P79+/8apyVJmu5kucmfEbWaoX6ibY4sVt1VVcuravmiRYtmNVBJ0uzMd8C81i570d73tfoe4KKh45YAr7b6khH1I9okWQCcw+CS3LH6kiTNo/kOmC3A1KqudcCjQ/WJtjLsYgY3859pl9HeTHJVu7+ydlqbqb6uBZ5s92keB1YmWdhu7q9sNUnSPFrQq+MkDwCfAM5PsofByq7fBjYnWQ98F7gOoKp2JtkM7AIOATdU1eHW1fUMVqSdBTzWXgB3A/cn2c1g5jLR+jqQ5Fbg2XbcLVU1fbGBJKmzbgFTVZ86xq6rj3H8RmDjiPokcPmI+lu0gBqxbxOwadaDlSTNuZPlJr8k6RRjwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldjCVgkrycZEeS7UkmW+3cJFuTvNjeFw4df1OS3UleSLJqqH5l62d3kjuSpNXPTPJQqz+dZOl8n6Mkne7GOYP5haq6oqqWt883Ak9U1TLgifaZJJcCE8BlwGrgK0nOaG3uBDYAy9prdauvBw5W1SXA7cBt83A+kqQhJ9MlsjXAvW37XuCaofqDVfV2Vb0E7AZWJLkQOLuqnqqqAu6b1maqr4eBq6dmN5Kk+TGugCngj5NsS7Kh1S6oqr0A7f2Drb4YeGWo7Z5WW9y2p9ePaFNVh4DXgfOmDyLJhiSTSSb3798/JycmSRpYMKbv/XhVvZrkg8DWJP9zhmNHzTxqhvpMbY4sVN0F3AWwfPnyo/ZLkk7cWGYwVfVqe98HfB1YAbzWLnvR3ve1w/cAFw01XwK82upLRtSPaJNkAXAOcKDHuUiSRpv3gEnyN5J8YGobWAk8B2wB1rXD1gGPtu0twERbGXYxg5v5z7TLaG8muardX1k7rc1UX9cCT7b7NJKkeTKOS2QXAF9v99wXAP+5qv4oybPA5iTrge8C1wFU1c4km4FdwCHghqo63Pq6HrgHOAt4rL0A7gbuT7KbwcxlYj5OTJL0jnkPmKr6DvCzI+p/CVx9jDYbgY0j6pPA5SPqb9ECSpI0HifTMmVJ0inEgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV2c0gGTZHWSF5LsTnLjuMcjSaeTUzZgkpwB/B7wT4BLgU8luXS8o5Kk08eCcQ+goxXA7qr6DkCSB4E1wK6xjkoak+/e8nfHPQSdhP7Wv9vRre9TOWAWA68Mfd4DfGz4gCQbgA3t418leWGexnY6OB/4/rgHcTLIF9aNewg6mv8+p9ycv24PHz7WjlM5YEb9V6sjPlTdBdw1P8M5vSSZrKrl4x6HNIr/PufHKXsPhsGM5aKhz0uAV8c0Fkk67ZzKAfMssCzJxUl+DJgAtox5TJJ02jhlL5FV1aEknwEeB84ANlXVzjEP63TipUedzPz3OQ9SVe9+lCRJx+lUvkQmSRojA0aS1IUBoznnI3p0MkqyKcm+JM+NeyynCwNGc8pH9Ogkdg+wetyDOJ0YMJprP3pET1X9P2DqET3SWFXVt4AD4x7H6cSA0Vwb9YiexWMai6QxMmA01971ET2STg8GjOaaj+iRBBgwmns+okcSYMBojlXVIWDqET3PA5t9RI9OBkkeAJ4CfibJniTrxz2mU52PipEkdeEMRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMNIYJPmpJA8m+V9JdiX5RpKP+KRfnUpO2T+ZLJ2skgT4OnBvVU202hXABWMdmDTHnMFI8+8XgB9U1X+cKlTVdoYeEppkaZL/luTb7fXzrX5hkm8l2Z7kuST/IMkZSe5pn3ck+Y35PyXpaM5gpPl3ObDtXY7ZB/zjqnoryTLgAWA58C+Ax6tqY/vbOz8BXAEsrqrLAZL8zX5Dl2bPgJFOTu8HvtwunR0GPtLqzwKbkrwf+MOq2p7kO8DfTvIl4L8CfzyWEUvTeIlMmn87gSvf5ZjfAF4DfpbBzOXH4Ed/NOsfAt8D7k+ytqoOtuP+FLgB+E99hi0dHwNGmn9PAmcm+ddThSR/H/jw0DHnAHur6ofAp4Ez2nEfBvZV1VeBu4GfS3I+8L6qegT4t8DPzc9pSDPzEpk0z6qqknwS+A9JbgTeAl4GPjt02FeAR5JcB3wT+L+t/gngN5P8APgrYC2Dvxj6tSRT/8N4U/eTkGbBpylLkrrwEpkkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLv4/ceRZXQx4oy0AAAAASUVORK5CYII=\n",
250 | "text/plain": [
251 | ""
252 | ]
253 | },
254 | "metadata": {
255 | "needs_background": "light"
256 | },
257 | "output_type": "display_data"
258 | }
259 | ],
260 | "source": [
261 | "#visualize the target class 'Class'\n",
262 | "sns.countplot(x='Class', data=dataset)"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": 4,
268 | "metadata": {},
269 | "outputs": [],
270 | "source": [
271 | "#seperating positive and negative classes\n",
272 | "positiveDataset = dataset.loc[dataset['Class'] == 1]\n",
273 | "negativeDataset = dataset.loc[dataset['Class'] == 0]"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": 5,
279 | "metadata": {},
280 | "outputs": [],
281 | "source": [
282 | "#creating training and testing set with negative class split 1:1 and positive class split 4:1, also keeping random_state constant so that all splits are same\n",
283 | "positiveTrain, positiveTest = tts(positiveDataset, test_size=0.2, random_state=21)\n",
284 | "negativeTrain, negativeTest = tts(negativeDataset, test_size=0.5)\n",
285 | "trainDataset = positiveTrain.append(negativeTrain)\n",
286 | "testDataset = positiveTest.append(negativeTest)"
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "execution_count": 6,
292 | "metadata": {},
293 | "outputs": [],
294 | "source": [
295 | "#create classifier\n",
296 | "def classifierCreator():\n",
297 | " clf = keras.models.Sequential()\n",
298 | " clf.add(keras.layers.Dense(10, activation='relu'))\n",
299 | " clf.add(keras.layers.Dense(5, activation='relu'))\n",
300 | " clf.add(keras.layers.Dense(1, activation='sigmoid'))\n",
301 | " return clf"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 7,
307 | "metadata": {},
308 | "outputs": [],
309 | "source": [
310 | "#create Classifier object and scale the dataset\n",
311 | "classifier = classifierCreator()\n",
312 | "classifier.compile(loss='binary_crossentropy', optimizer='sgd')\n",
313 | "yTrain = trainDataset['Class']\n",
314 | "yTest = testDataset['Class']\n",
315 | "xTrain = trainDataset.drop(columns=['Class'])\n",
316 | "xTest = testDataset.drop(columns=['Class'])\n",
317 | "scaler = StandardScaler()\n",
318 | "xTrain = scaler.fit_transform(xTrain)\n",
319 | "xTest = scaler.fit_transform(xTest)"
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": 26,
325 | "metadata": {},
326 | "outputs": [],
327 | "source": [
328 | "#fit the dataset to the train values\n",
329 | "weights = {0:1, 1:1.5}\n",
330 | "history = classifier.fit(xTrain, yTrain, class_weight=weights, epochs=15, verbose=0)"
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 27,
336 | "metadata": {},
337 | "outputs": [
338 | {
339 | "name": "stdout",
340 | "output_type": "stream",
341 | "text": [
342 | " precision recall f1-score support\n",
343 | "\n",
344 | " 0 1.00 1.00 1.00 142157\n",
345 | " 1 0.86 0.85 0.86 393\n",
346 | "\n",
347 | " accuracy 1.00 142550\n",
348 | " macro avg 0.93 0.93 0.93 142550\n",
349 | "weighted avg 1.00 1.00 1.00 142550\n",
350 | "\n",
351 | "ROC AUC Score: 0.9272909851794332\n"
352 | ]
353 | }
354 | ],
355 | "source": [
356 | "#predict the model on the train values and check results\n",
357 | "predTrain = classifier.predict_classes(xTrain)\n",
358 | "print(classification_report(yTrain, predTrain))\n",
359 | "print('ROC AUC Score: ',roc(yTrain, predTrain))"
360 | ]
361 | },
362 | {
363 | "cell_type": "code",
364 | "execution_count": 28,
365 | "metadata": {},
366 | "outputs": [
367 | {
368 | "name": "stdout",
369 | "output_type": "stream",
370 | "text": [
371 | " precision recall f1-score support\n",
372 | "\n",
373 | " 0 1.00 1.00 1.00 142158\n",
374 | " 1 0.58 0.83 0.68 99\n",
375 | "\n",
376 | " accuracy 1.00 142257\n",
377 | " macro avg 0.79 0.91 0.84 142257\n",
378 | "weighted avg 1.00 1.00 1.00 142257\n",
379 | "\n",
380 | "ROC AUC Score: 0.9139338985601595\n"
381 | ]
382 | }
383 | ],
384 | "source": [
385 | "#predict test values and check results\n",
386 | "predTest = classifier.predict_classes(xTest)\n",
387 | "print(classification_report(yTest, predTest))\n",
388 | "print('ROC AUC Score: ',roc(yTest, predTest))"
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": 32,
394 | "metadata": {},
395 | "outputs": [
396 | {
397 | "name": "stdout",
398 | "output_type": "stream",
399 | "text": [
400 | "Ratio of weights: 1: 1.01\n",
401 | " precision recall f1-score support\n",
402 | "\n",
403 | " 0 1.00 1.00 1.00 142157\n",
404 | " 1 0.91 0.78 0.84 393\n",
405 | "\n",
406 | " accuracy 1.00 142550\n",
407 | " macro avg 0.95 0.89 0.92 142550\n",
408 | "weighted avg 1.00 1.00 1.00 142550\n",
409 | "\n",
410 | "ROC AUC Score: 0.887928160852726\n",
411 | " precision recall f1-score support\n",
412 | "\n",
413 | " 0 1.00 1.00 1.00 142158\n",
414 | " 1 0.74 0.80 0.77 99\n",
415 | "\n",
416 | " accuracy 1.00 142257\n",
417 | " macro avg 0.87 0.90 0.88 142257\n",
418 | "weighted avg 1.00 1.00 1.00 142257\n",
419 | "\n",
420 | "ROC AUC Score: 0.8988914170191341\n",
421 | "Ratio of weights: 1: 1.02\n",
422 | " precision recall f1-score support\n",
423 | "\n",
424 | " 0 1.00 1.00 1.00 142157\n",
425 | " 1 0.90 0.81 0.85 393\n",
426 | "\n",
427 | " accuracy 1.00 142550\n",
428 | " macro avg 0.95 0.90 0.92 142550\n",
429 | "weighted avg 1.00 1.00 1.00 142550\n",
430 | "\n",
431 | "ROC AUC Score: 0.9031812674733117\n",
432 | " precision recall f1-score support\n",
433 | "\n",
434 | " 0 1.00 1.00 1.00 142158\n",
435 | " 1 0.72 0.81 0.76 99\n",
436 | "\n",
437 | " accuracy 1.00 142257\n",
438 | " macro avg 0.86 0.90 0.88 142257\n",
439 | "weighted avg 1.00 1.00 1.00 142257\n",
440 | "\n",
441 | "ROC AUC Score: 0.9039313704299141\n",
442 | "Ratio of weights: 1: 1.03\n"
443 | ]
444 | },
445 | {
446 | "name": "stderr",
447 | "output_type": "stream",
448 | "text": [
449 | "/home/azhanmohammed/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
450 | " _warn_prf(average, modifier, msg_start, len(result))\n"
451 | ]
452 | },
453 | {
454 | "name": "stdout",
455 | "output_type": "stream",
456 | "text": [
457 | " precision recall f1-score support\n",
458 | "\n",
459 | " 0 1.00 1.00 1.00 142157\n",
460 | " 1 0.00 0.00 0.00 393\n",
461 | "\n",
462 | " accuracy 1.00 142550\n",
463 | " macro avg 0.50 0.50 0.50 142550\n",
464 | "weighted avg 0.99 1.00 1.00 142550\n",
465 | "\n",
466 | "ROC AUC Score: 0.5\n"
467 | ]
468 | },
469 | {
470 | "name": "stderr",
471 | "output_type": "stream",
472 | "text": [
473 | "/home/azhanmohammed/anaconda3/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
474 | " _warn_prf(average, modifier, msg_start, len(result))\n"
475 | ]
476 | },
477 | {
478 | "name": "stdout",
479 | "output_type": "stream",
480 | "text": [
481 | " precision recall f1-score support\n",
482 | "\n",
483 | " 0 1.00 1.00 1.00 142158\n",
484 | " 1 0.00 0.00 0.00 99\n",
485 | "\n",
486 | " accuracy 1.00 142257\n",
487 | " macro avg 0.50 0.50 0.50 142257\n",
488 | "weighted avg 1.00 1.00 1.00 142257\n",
489 | "\n",
490 | "ROC AUC Score: 0.5\n",
491 | "Ratio of weights: 1: 1.04\n",
492 | " precision recall f1-score support\n",
493 | "\n",
494 | " 0 1.00 1.00 1.00 142157\n",
495 | " 1 0.92 0.76 0.83 393\n",
496 | "\n",
497 | " accuracy 1.00 142550\n",
498 | " macro avg 0.96 0.88 0.92 142550\n",
499 | "weighted avg 1.00 1.00 1.00 142550\n",
500 | "\n",
501 | "ROC AUC Score: 0.8815879411254097\n",
502 | " precision recall f1-score support\n",
503 | "\n",
504 | " 0 1.00 1.00 1.00 142158\n",
505 | " 1 0.75 0.81 0.78 99\n",
506 | "\n",
507 | " accuracy 1.00 142257\n",
508 | " macro avg 0.87 0.90 0.89 142257\n",
509 | "weighted avg 1.00 1.00 1.00 142257\n",
510 | "\n",
511 | "ROC AUC Score: 0.9039454392828807\n",
512 | "Ratio of weights: 1: 1.05\n",
513 | " precision recall f1-score support\n",
514 | "\n",
515 | " 0 1.00 1.00 1.00 142157\n",
516 | " 1 0.91 0.63 0.75 393\n",
517 | "\n",
518 | " accuracy 1.00 142550\n",
519 | " macro avg 0.95 0.82 0.87 142550\n",
520 | "weighted avg 1.00 1.00 1.00 142550\n",
521 | "\n",
522 | "ROC AUC Score: 0.8167024449422036\n",
523 | " precision recall f1-score support\n",
524 | "\n",
525 | " 0 1.00 1.00 1.00 142158\n",
526 | " 1 0.75 0.63 0.68 99\n",
527 | "\n",
528 | " accuracy 1.00 142257\n",
529 | " macro avg 0.87 0.81 0.84 142257\n",
530 | "weighted avg 1.00 1.00 1.00 142257\n",
531 | "\n",
532 | "ROC AUC Score: 0.8130574516532395\n",
533 | "Ratio of weights: 1: 1.06\n",
534 | " precision recall f1-score support\n",
535 | "\n",
536 | " 0 1.00 1.00 1.00 142157\n",
537 | " 1 0.90 0.79 0.84 393\n",
538 | "\n",
539 | " accuracy 1.00 142550\n",
540 | " macro avg 0.95 0.90 0.92 142550\n",
541 | "weighted avg 1.00 1.00 1.00 142550\n",
542 | "\n",
543 | "ROC AUC Score: 0.8955476796870521\n",
544 | " precision recall f1-score support\n",
545 | "\n",
546 | " 0 1.00 1.00 1.00 142158\n",
547 | " 1 0.72 0.79 0.75 99\n",
548 | "\n",
549 | " accuracy 1.00 142257\n",
550 | " macro avg 0.86 0.89 0.88 142257\n",
551 | "weighted avg 1.00 1.00 1.00 142257\n",
552 | "\n",
553 | "ROC AUC Score: 0.8938338775421458\n",
554 | "Ratio of weights: 1: 1.07\n",
555 | " precision recall f1-score support\n",
556 | "\n",
557 | " 0 1.00 1.00 1.00 142157\n",
558 | " 1 0.90 0.76 0.83 393\n",
559 | "\n",
560 | " accuracy 1.00 142550\n",
561 | " macro avg 0.95 0.88 0.91 142550\n",
562 | "weighted avg 1.00 1.00 1.00 142550\n",
563 | "\n",
564 | "ROC AUC Score: 0.8815668376975097\n",
565 | " precision recall f1-score support\n",
566 | "\n",
567 | " 0 1.00 1.00 1.00 142158\n",
568 | " 1 0.73 0.79 0.76 99\n",
569 | "\n",
570 | " accuracy 1.00 142257\n",
571 | " macro avg 0.86 0.89 0.88 142257\n",
572 | "weighted avg 1.00 1.00 1.00 142257\n",
573 | "\n",
574 | "ROC AUC Score: 0.8938373947553874\n",
575 | "Ratio of weights: 1: 1.08\n",
576 | " precision recall f1-score support\n",
577 | "\n",
578 | " 0 1.00 1.00 1.00 142157\n",
579 | " 1 0.90 0.72 0.80 393\n",
580 | "\n",
581 | " accuracy 1.00 142550\n",
582 | " macro avg 0.95 0.86 0.90 142550\n",
583 | "weighted avg 1.00 1.00 1.00 142550\n",
584 | "\n",
585 | "ROC AUC Score: 0.8612141208388009\n",
586 | " precision recall f1-score support\n",
587 | "\n",
588 | " 0 1.00 1.00 1.00 142158\n",
589 | " 1 0.75 0.76 0.75 99\n",
590 | "\n",
591 | " accuracy 1.00 142257\n",
592 | " macro avg 0.87 0.88 0.88 142257\n",
593 | "weighted avg 1.00 1.00 1.00 142257\n",
594 | "\n",
595 | "ROC AUC Score: 0.8786999484568386\n",
596 | "Ratio of weights: 1: 1.09\n",
597 | " precision recall f1-score support\n",
598 | "\n",
599 | " 0 1.00 1.00 1.00 142157\n",
600 | " 1 0.90 0.79 0.84 393\n",
601 | "\n",
602 | " accuracy 1.00 142550\n",
603 | " macro avg 0.95 0.90 0.92 142550\n",
604 | "weighted avg 1.00 1.00 1.00 142550\n",
605 | "\n",
606 | "ROC AUC Score: 0.8955476796870521\n",
607 | " precision recall f1-score support\n",
608 | "\n",
609 | " 0 1.00 1.00 1.00 142158\n",
610 | " 1 0.74 0.79 0.76 99\n",
611 | "\n",
612 | " accuracy 1.00 142257\n",
613 | " macro avg 0.87 0.89 0.88 142257\n",
614 | "weighted avg 1.00 1.00 1.00 142257\n",
615 | "\n",
616 | "ROC AUC Score: 0.8938444291818706\n"
617 | ]
618 | }
619 | ],
620 | "source": [
621 | "#finding the best weight ratio\n",
622 | "for i in range(1, 10, 1):\n",
623 | " print('Ratio of weights: 1:',1+(i*0.01))\n",
624 | " classifier = classifierCreator()\n",
625 | " classifier.compile(loss='binary_crossentropy', optimizer='sgd')\n",
626 | " weights = {0:1, 1:1+(i*0.01)}\n",
627 | " history = classifier.fit(xTrain, yTrain, class_weight=weights, epochs=15, verbose=0)\n",
628 | " #predict the model on the train values and check results\n",
629 | " predTrain = classifier.predict_classes(xTrain)\n",
630 | " print(classification_report(yTrain, predTrain))\n",
631 | " print('ROC AUC Score: ',roc(yTrain, predTrain))\n",
632 | " #predict test values and check results\n",
633 | " predTest = classifier.predict_classes(xTest)\n",
634 | " print(classification_report(yTest, predTest))\n",
635 | " print('ROC AUC Score: ',roc(yTest, predTest))"
636 | ]
637 | },
638 | {
639 | "cell_type": "code",
640 | "execution_count": null,
641 | "metadata": {},
642 | "outputs": [],
643 | "source": []
644 | }
645 | ],
646 | "metadata": {
647 | "kernelspec": {
648 | "display_name": "Python 3",
649 | "language": "python",
650 | "name": "python3"
651 | },
652 | "language_info": {
653 | "codemirror_mode": {
654 | "name": "ipython",
655 | "version": 3
656 | },
657 | "file_extension": ".py",
658 | "mimetype": "text/x-python",
659 | "name": "python",
660 | "nbconvert_exporter": "python",
661 | "pygments_lexer": "ipython3",
662 | "version": "3.8.3"
663 | }
664 | },
665 | "nbformat": 4,
666 | "nbformat_minor": 4
667 | }
668 |
--------------------------------------------------------------------------------
/Multiple Linear Regression.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#import important libraries\n",
10 | "import pandas as pd\n",
11 | "import numpy as np\n",
12 | "import seaborn as sns\n",
13 | "from sklearn.metrics import classification_report\n",
14 | "from sklearn.metrics import roc_auc_score as roc\n",
15 | "from sklearn.model_selection import train_test_split as tts\n",
16 | "from sklearn.linear_model import LinearRegression\n",
17 | "from sklearn.preprocessing import StandardScaler"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 2,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "data": {
27 | "text/html": [
28 | "\n",
29 | "\n",
42 | "
\n",
43 | " \n",
44 | " \n",
45 | " \n",
46 | " Time \n",
47 | " V1 \n",
48 | " V2 \n",
49 | " V3 \n",
50 | " V4 \n",
51 | " V5 \n",
52 | " V6 \n",
53 | " V7 \n",
54 | " V8 \n",
55 | " V9 \n",
56 | " ... \n",
57 | " V21 \n",
58 | " V22 \n",
59 | " V23 \n",
60 | " V24 \n",
61 | " V25 \n",
62 | " V26 \n",
63 | " V27 \n",
64 | " V28 \n",
65 | " Amount \n",
66 | " Class \n",
67 | " \n",
68 | " \n",
69 | " \n",
70 | " \n",
71 | " 0 \n",
72 | " 0.0 \n",
73 | " -1.359807 \n",
74 | " -0.072781 \n",
75 | " 2.536347 \n",
76 | " 1.378155 \n",
77 | " -0.338321 \n",
78 | " 0.462388 \n",
79 | " 0.239599 \n",
80 | " 0.098698 \n",
81 | " 0.363787 \n",
82 | " ... \n",
83 | " -0.018307 \n",
84 | " 0.277838 \n",
85 | " -0.110474 \n",
86 | " 0.066928 \n",
87 | " 0.128539 \n",
88 | " -0.189115 \n",
89 | " 0.133558 \n",
90 | " -0.021053 \n",
91 | " 149.62 \n",
92 | " 0 \n",
93 | " \n",
94 | " \n",
95 | " 1 \n",
96 | " 0.0 \n",
97 | " 1.191857 \n",
98 | " 0.266151 \n",
99 | " 0.166480 \n",
100 | " 0.448154 \n",
101 | " 0.060018 \n",
102 | " -0.082361 \n",
103 | " -0.078803 \n",
104 | " 0.085102 \n",
105 | " -0.255425 \n",
106 | " ... \n",
107 | " -0.225775 \n",
108 | " -0.638672 \n",
109 | " 0.101288 \n",
110 | " -0.339846 \n",
111 | " 0.167170 \n",
112 | " 0.125895 \n",
113 | " -0.008983 \n",
114 | " 0.014724 \n",
115 | " 2.69 \n",
116 | " 0 \n",
117 | " \n",
118 | " \n",
119 | " 2 \n",
120 | " 1.0 \n",
121 | " -1.358354 \n",
122 | " -1.340163 \n",
123 | " 1.773209 \n",
124 | " 0.379780 \n",
125 | " -0.503198 \n",
126 | " 1.800499 \n",
127 | " 0.791461 \n",
128 | " 0.247676 \n",
129 | " -1.514654 \n",
130 | " ... \n",
131 | " 0.247998 \n",
132 | " 0.771679 \n",
133 | " 0.909412 \n",
134 | " -0.689281 \n",
135 | " -0.327642 \n",
136 | " -0.139097 \n",
137 | " -0.055353 \n",
138 | " -0.059752 \n",
139 | " 378.66 \n",
140 | " 0 \n",
141 | " \n",
142 | " \n",
143 | " 3 \n",
144 | " 1.0 \n",
145 | " -0.966272 \n",
146 | " -0.185226 \n",
147 | " 1.792993 \n",
148 | " -0.863291 \n",
149 | " -0.010309 \n",
150 | " 1.247203 \n",
151 | " 0.237609 \n",
152 | " 0.377436 \n",
153 | " -1.387024 \n",
154 | " ... \n",
155 | " -0.108300 \n",
156 | " 0.005274 \n",
157 | " -0.190321 \n",
158 | " -1.175575 \n",
159 | " 0.647376 \n",
160 | " -0.221929 \n",
161 | " 0.062723 \n",
162 | " 0.061458 \n",
163 | " 123.50 \n",
164 | " 0 \n",
165 | " \n",
166 | " \n",
167 | " 4 \n",
168 | " 2.0 \n",
169 | " -1.158233 \n",
170 | " 0.877737 \n",
171 | " 1.548718 \n",
172 | " 0.403034 \n",
173 | " -0.407193 \n",
174 | " 0.095921 \n",
175 | " 0.592941 \n",
176 | " -0.270533 \n",
177 | " 0.817739 \n",
178 | " ... \n",
179 | " -0.009431 \n",
180 | " 0.798278 \n",
181 | " -0.137458 \n",
182 | " 0.141267 \n",
183 | " -0.206010 \n",
184 | " 0.502292 \n",
185 | " 0.219422 \n",
186 | " 0.215153 \n",
187 | " 69.99 \n",
188 | " 0 \n",
189 | " \n",
190 | " \n",
191 | "
\n",
192 | "
5 rows × 31 columns
\n",
193 | "
"
194 | ],
195 | "text/plain": [
196 | " Time V1 V2 V3 V4 V5 V6 V7 \\\n",
197 | "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n",
198 | "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n",
199 | "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n",
200 | "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n",
201 | "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n",
202 | "\n",
203 | " V8 V9 ... V21 V22 V23 V24 V25 \\\n",
204 | "0 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 \n",
205 | "1 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 \n",
206 | "2 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 \n",
207 | "3 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 \n",
208 | "4 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 \n",
209 | "\n",
210 | " V26 V27 V28 Amount Class \n",
211 | "0 -0.189115 0.133558 -0.021053 149.62 0 \n",
212 | "1 0.125895 -0.008983 0.014724 2.69 0 \n",
213 | "2 -0.139097 -0.055353 -0.059752 378.66 0 \n",
214 | "3 -0.221929 0.062723 0.061458 123.50 0 \n",
215 | "4 0.502292 0.219422 0.215153 69.99 0 \n",
216 | "\n",
217 | "[5 rows x 31 columns]"
218 | ]
219 | },
220 | "execution_count": 2,
221 | "metadata": {},
222 | "output_type": "execute_result"
223 | }
224 | ],
225 | "source": [
226 | "#import the dataset and visualize the dataset\n",
227 | "dataset = pd.read_csv('./dataset/creditcard.csv')\n",
228 | "dataset.head()"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 3,
234 | "metadata": {},
235 | "outputs": [
236 | {
237 | "data": {
238 | "text/plain": [
239 | ""
240 | ]
241 | },
242 | "execution_count": 3,
243 | "metadata": {},
244 | "output_type": "execute_result"
245 | },
246 | {
247 | "data": {
248 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEGCAYAAABYV4NmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAATPUlEQVR4nO3df6zd9X3f8ecrOKV0DcyAQ4nNYlacasBWUjwHNdqUDs32Km0mHbQ3U2Nrs+YKkampokpQaSMCWSpaUlaShokMhx/qAAua4mmh1IW0WTUKXEfWjM0QXmDBwcNObQGdBIud9/44nxuOr48v1+793GPs50M6Ot/z/n4/n/P5IksvPt/v53xvqgpJkuba+8Y9AEnSqcmAkSR1YcBIkrowYCRJXRgwkqQuFox7ACeL888/v5YuXTruYUjSe8q2bdu+X1WLRu0zYJqlS5cyOTk57mFI0ntKkv99rH1eIpMkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdeEv+efQlb9537iHoJPQtn+/dtxDkMbCGYwkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK66BYwSS5K8s0kzyfZmeTXW/3zSb6XZHt7/eJQm5uS7E7yQpJVQ/Urk+xo++5IklY/M8lDrf50kqVDbdYlebG91vU6T0nSaAs69n0I+FxVfTvJB4BtSba2fbdX1ReGD05yKTABXAZ8CPiTJB+pqsPAncAG4C+AbwCrgceA9cDBqrokyQRwG/ArSc4FbgaWA9W+e0tVHex4vpKkId1mMFW1t6q+3bbfBJ4HFs/QZA3wYFW9XVUvAbuBFUkuBM6uqqeqqoD7gGuG2tzbth8Grm6zm1XA1qo60EJlK4NQkiTNk3m5B9MuXX0UeLqVPpPkfyTZlGRhqy0GXhlqtqfVFrft6fUj2lTVIeB14LwZ+po+rg1JJpNM7t+//4TPT5J0tO4Bk+QngUeAz1bVGwwud/00cAWwF/ji1KEjmtcM9RNt806h6q6qWl5VyxctWjTjeUiSjk/XgEnyfgbh8vtV9QcAVfVaVR2uqh8CXwVWtMP3ABcNNV8CvNrqS0bUj2iTZAFwDnBghr4kSfOk5yqyAHcDz1fV7wzVLxw67JPAc217CzDRVoZdDCwDnqmqvcCbSa5qfa4FHh1qM7VC7FrgyXaf5nFgZZKF7RLcylaTJM2TnqvIPg58GtiRZHur/RbwqSRXMLhk9TLwawBVtTPJZmAXgxVoN7QVZADXA/cAZzFYPfZYq98N3J9kN4OZy0Tr60CSW4Fn23G3VNWBTucpSRqhW8BU1Z8z+l7IN2ZosxHYOKI+CVw+ov4WcN0x+toEbJrteCVJc8tf8kuSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC66BUySi5J8M8nzSXYm+fVWPzfJ1iQvtveFQ21uSrI7yQtJVg3Vr0yyo+27I0la/cwkD7X600mWDrVZ177jxSTrep2nJGm0njOYQ8DnqurvAFcBNyS5FLgReKKqlgFPtM+0fRPAZcBq4CtJzmh93QlsAJa11+pWXw8crKpLgNuB21pf5wI3Ax8DVgA3DweZJKm/bgFTVXur6ttt+03geWAxsAa4tx12L3BN214DPFhVb1fVS8BuYEWSC4Gzq+qpqirgvmltpvp6GLi6zW5WAVur6kBVHQS28k4oSZLmwbzcg2mXrj4KPA1cUFV7YRBCwAfbYYuBV4aa7Wm1xW17ev2INlV1CHgdOG+GvqaPa0OSySST+/fvP/ETlCQdpXvAJPlJ4BHgs1X1xkyHjqjVDPUTbfNOoequqlpeVcsXLVo0w9AkScera8AkeT+DcPn9qvqDVn6tXfaive9r9T3ARUPNlwCvtvqSEfUj2iRZAJwDHJihL0nSPOm5iizA3cDzVfU7Q7u2AFOrutYBjw7VJ9rKsIsZ3Mx/pl1GezPJVa3PtdPaTPV1LfBku0/zOLAyycJ2c39lq0mS5smCjn1/HPg0sCPJ9lb7LeC3gc1J1gPfBa4DqKqdSTYDuxisQLuhqg63dtcD9wBnAY+1FwwC7P4kuxnMXCZaXweS3Ao82467paoO9DpRSdLRugVMVf05o++FAFx9jDYbgY0j6pPA5SPqb9ECasS+TcCm2Y5XkjS3/CW/JKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHUxq4BJ8sRsapIkTVkw084kPw78BHB+koVA2q6zgQ91Hpsk6T1sxoABfg34LIMw2cY7AfMG8HsdxyVJeo+bMWCq6neB303yb6rqS/M0JknSKeDdZjAAVNWXkvw8sHS4TVXd12lckqT3uFkFTJL7gZ8GtgOHW7kAA0aSNNKsAgZYDlxaVdVzMJKkU8dsfwfzHPBTx9Nxkk1J9iV5bqj2+STfS7K9vX5xaN9NSXYneSHJqqH6lUl2tH13JEmrn5nkoVZ/OsnSoTbrkrzYXuuOZ9ySpLkx2xnM+cCuJM8Ab08Vq+qfzdDmHuDLHH0Z7faq+sJwIcmlwARwGYMVa3+S5CNVdRi4E9gA/AXwDWA18BiwHjhYVZckmQBuA34lybnAzQxmXQVsS7Klqg7O8lwlSXNgtgHz+ePtuKq+NTyreBdrgAer6m3gpSS7gRVJXgbOrqqnAJLcB1zDIGDWDI3rYeDLbXazCthaVQdam60MQumB4z0HSdKJm+0qsj+bw+/8TJK1wCTwuTazWMxghjJlT6v9oG1Pr9PeX2njO5TkdeC84fqINpKkeTLbR8W8meSN9noryeEkb5zA993JYDXaFcBe4ItTXzHi2JqhfqJtjpBkQ5LJJJP79++fadySpOM0q4Cpqg9U1dnt9ePAP2dwf+W4VNVrVXW4qn4IfBVY0XbtAS4aOnQJ8GqrLxlRP6JNkgXAOcCBGfoaNZ67qmp5VS1ftGjR8Z6OJGkGJ/Q05ar6Q+AfHW+7JBcOffwkg9VpAFuAibYy7GJgGfBMVe0F3kxyVbu/shZ4dKjN1Aqxa4En2zLqx4GVSRa256etbDVJ0jya7Q8tf2no4/t4Z4XWTG0eAD7B4EGZexis7PpEkita25cZPOuMqtqZZDOwCzgE3NBWkAFcz2BF2lkMbu4/1up3A/e3BQEHGKxCo6oOJLkVeLYdd8vUDX9J0vyZ7Sqyfzq0fYhBOKyZqUFVfWpE+e4Zjt8IbBxRnwQuH1F/C7juGH1tAjbNND5JUl+zXUX2L3sPRJJ0apntKrIlSb7efpn/WpJHkix595aSpNPVbG/yf43BTfUPMfhNyX9pNUmSRpptwCyqqq9V1aH2ugdwXa8k6ZhmGzDfT/KrSc5or18F/rLnwCRJ722zDZh/Bfwy8H8Y/AL/WsAb/5KkY5rtMuVbgXVTTyRuTyz+AoPgkSTpKLOdwfy94cfdtx8ufrTPkCRJp4LZBsz72mNXgB/NYGY7+5EknYZmGxJfBP57kocZPObllxnxq3tJkqbM9pf89yWZZPCAywC/VFW7uo5MkvSeNuvLXC1QDBVJ0qyc0OP6JUl6NwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC66BUySTUn2JXluqHZukq1JXmzvC4f23ZRkd5IXkqwaql+ZZEfbd0eStPqZSR5q9aeTLB1qs659x4tJ1vU6R0nSsfWcwdwDrJ5WuxF4oqqWAU+0zyS5FJgALmttvpLkjNbmTmADsKy9pvpcDxysqkuA24HbWl/nAjcDHwNWADcPB5kkaX50C5iq+hZwYFp5DXBv274XuGao/mBVvV1VLwG7gRVJLgTOrqqnqqqA+6a1merrYeDqNrtZBWytqgNVdRDYytFBJ0nqbL7vwVxQVXsB2vsHW30x8MrQcXtabXHbnl4/ok1VHQJeB86boa+jJNmQZDLJ5P79+/8apyVJmu5kucmfEbWaoX6ibY4sVt1VVcuravmiRYtmNVBJ0uzMd8C81i570d73tfoe4KKh45YAr7b6khH1I9okWQCcw+CS3LH6kiTNo/kOmC3A1KqudcCjQ/WJtjLsYgY3859pl9HeTHJVu7+ydlqbqb6uBZ5s92keB1YmWdhu7q9sNUnSPFrQq+MkDwCfAM5PsofByq7fBjYnWQ98F7gOoKp2JtkM7AIOATdU1eHW1fUMVqSdBTzWXgB3A/cn2c1g5jLR+jqQ5Fbg2XbcLVU1fbGBJKmzbgFTVZ86xq6rj3H8RmDjiPokcPmI+lu0gBqxbxOwadaDlSTNuZPlJr8k6RRjwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldGDCSpC4MGElSFwaMJKkLA0aS1IUBI0nqwoCRJHVhwEiSujBgJEldjCVgkrycZEeS7UkmW+3cJFuTvNjeFw4df1OS3UleSLJqqH5l62d3kjuSpNXPTPJQqz+dZOl8n6Mkne7GOYP5haq6oqqWt883Ak9U1TLgifaZJJcCE8BlwGrgK0nOaG3uBDYAy9prdauvBw5W1SXA7cBt83A+kqQhJ9MlsjXAvW37XuCaofqDVfV2Vb0E7AZWJLkQOLuqnqqqAu6b1maqr4eBq6dmN5Kk+TGugCngj5NsS7Kh1S6oqr0A7f2Drb4YeGWo7Z5WW9y2p9ePaFNVh4DXgfOmDyLJhiSTSSb3798/JycmSRpYMKbv/XhVvZrkg8DWJP9zhmNHzTxqhvpMbY4sVN0F3AWwfPnyo/ZLkk7cWGYwVfVqe98HfB1YAbzWLnvR3ve1w/cAFw01XwK82upLRtSPaJNkAXAOcKDHuUiSRpv3gEnyN5J8YGobWAk8B2wB1rXD1gGPtu0twERbGXYxg5v5z7TLaG8muardX1k7rc1UX9cCT7b7NJKkeTKOS2QXAF9v99wXAP+5qv4oybPA5iTrge8C1wFU1c4km4FdwCHghqo63Pq6HrgHOAt4rL0A7gbuT7KbwcxlYj5OTJL0jnkPmKr6DvCzI+p/CVx9jDYbgY0j6pPA5SPqb9ECSpI0HifTMmVJ0inEgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLgwYSVIXBowkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV2c0gGTZHWSF5LsTnLjuMcjSaeTUzZgkpwB/B7wT4BLgU8luXS8o5Kk08eCcQ+goxXA7qr6DkCSB4E1wK6xjkoak+/e8nfHPQSdhP7Wv9vRre9TOWAWA68Mfd4DfGz4gCQbgA3t418leWGexnY6OB/4/rgHcTLIF9aNewg6mv8+p9ycv24PHz7WjlM5YEb9V6sjPlTdBdw1P8M5vSSZrKrl4x6HNIr/PufHKXsPhsGM5aKhz0uAV8c0Fkk67ZzKAfMssCzJxUl+DJgAtox5TJJ02jhlL5FV1aEknwEeB84ANlXVzjEP63TipUedzPz3OQ9SVe9+lCRJx+lUvkQmSRojA0aS1IUBoznnI3p0MkqyKcm+JM+NeyynCwNGc8pH9Ogkdg+wetyDOJ0YMJprP3pET1X9P2DqET3SWFXVt4AD4x7H6cSA0Vwb9YiexWMai6QxMmA01971ET2STg8GjOaaj+iRBBgwmns+okcSYMBojlXVIWDqET3PA5t9RI9OBkkeAJ4CfibJniTrxz2mU52PipEkdeEMRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMNIYJPmpJA8m+V9JdiX5RpKP+KRfnUpO2T+ZLJ2skgT4OnBvVU202hXABWMdmDTHnMFI8+8XgB9U1X+cKlTVdoYeEppkaZL/luTb7fXzrX5hkm8l2Z7kuST/IMkZSe5pn3ck+Y35PyXpaM5gpPl3ObDtXY7ZB/zjqnoryTLgAWA58C+Ax6tqY/vbOz8BXAEsrqrLAZL8zX5Dl2bPgJFOTu8HvtwunR0GPtLqzwKbkrwf+MOq2p7kO8DfTvIl4L8CfzyWEUvTeIlMmn87gSvf5ZjfAF4DfpbBzOXH4Ed/NOsfAt8D7k+ytqoOtuP+FLgB+E99hi0dHwNGmn9PAmcm+ddThSR/H/jw0DHnAHur6ofAp4Ez2nEfBvZV1VeBu4GfS3I+8L6qegT4t8DPzc9pSDPzEpk0z6qqknwS+A9JbgTeAl4GPjt02FeAR5JcB3wT+L+t/gngN5P8APgrYC2Dvxj6tSRT/8N4U/eTkGbBpylLkrrwEpkkqQsDRpLUhQEjSerCgJEkdWHASJK6MGAkSV0YMJKkLv4/ceRZXQx4oy0AAAAASUVORK5CYII=\n",
249 | "text/plain": [
250 | ""
251 | ]
252 | },
253 | "metadata": {
254 | "needs_background": "light"
255 | },
256 | "output_type": "display_data"
257 | }
258 | ],
259 | "source": [
260 | "#visualize the target class 'Class'\n",
261 | "sns.countplot(x='Class', data=dataset)"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": 4,
267 | "metadata": {},
268 | "outputs": [],
269 | "source": [
270 | "#seperating positive and negative classes\n",
271 | "positiveDataset = dataset.loc[dataset['Class'] == 1]\n",
272 | "negativeDataset = dataset.loc[dataset['Class'] == 0]"
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": 5,
278 | "metadata": {},
279 | "outputs": [],
280 | "source": [
281 | "#creating training and testing set with negative class split 1:1 and positive class split 4:1, also keeping random_state constant so that all splits are same\n",
282 | "positiveTrain, positiveTest = tts(positiveDataset, test_size=0.2, random_state=21)\n",
283 | "negativeTrain, negativeTest = tts(negativeDataset, test_size=0.5)\n",
284 | "trainDataset = positiveTrain.append(negativeTrain)\n",
285 | "testDataset = positiveTest.append(negativeTest)"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 6,
291 | "metadata": {},
292 | "outputs": [],
293 | "source": [
294 | "#create Regression object and scale the dataset\n",
295 | "lr = LinearRegression()\n",
296 | "yTrain = trainDataset['Class']\n",
297 | "yTest = testDataset['Class']\n",
298 | "xTrain = trainDataset.drop(columns=['Class'])\n",
299 | "xTest = testDataset.drop(columns=['Class'])\n",
300 | "scaler = StandardScaler()\n",
301 | "xTrain = scaler.fit_transform(xTrain)\n",
302 | "xTest = scaler.fit_transform(xTest)"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": 7,
308 | "metadata": {},
309 | "outputs": [
310 | {
311 | "data": {
312 | "text/plain": [
313 | "LinearRegression()"
314 | ]
315 | },
316 | "execution_count": 7,
317 | "metadata": {},
318 | "output_type": "execute_result"
319 | }
320 | ],
321 | "source": [
322 | "#fit the dataset to the train values\n",
323 | "lr.fit(xTrain, yTrain)"
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": 8,
329 | "metadata": {},
330 | "outputs": [],
331 | "source": [
332 | "#predict the model on the train values\n",
333 | "predTrain = lr.predict(xTrain)"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 9,
339 | "metadata": {},
340 | "outputs": [
341 | {
342 | "name": "stdout",
343 | "output_type": "stream",
344 | "text": [
345 | " precision recall f1-score support\n",
346 | "\n",
347 | " 0 1.00 1.00 1.00 142157\n",
348 | " 1 0.88 0.45 0.60 393\n",
349 | "\n",
350 | " accuracy 1.00 142550\n",
351 | " macro avg 0.94 0.73 0.80 142550\n",
352 | "weighted avg 1.00 1.00 1.00 142550\n",
353 | "\n",
354 | "ROC AUC Score: 0.7263751733761158\n"
355 | ]
356 | }
357 | ],
358 | "source": [
359 | "#checking training records by applying 0.5 threshold\n",
360 | "for i in range(0, len(predTrain)):\n",
361 | " if(predTrain[i]>=0.5):\n",
362 | " predTrain[i]=1\n",
363 | " else:\n",
364 | " predTrain[i]=0\n",
365 | "print(classification_report(yTrain, predTrain))\n",
366 | "print('ROC AUC Score: ',roc(yTrain, predTrain))"
367 | ]
368 | },
369 | {
370 | "cell_type": "code",
371 | "execution_count": 10,
372 | "metadata": {},
373 | "outputs": [
374 | {
375 | "name": "stdout",
376 | "output_type": "stream",
377 | "text": [
378 | " precision recall f1-score support\n",
379 | "\n",
380 | " 0 1.00 1.00 1.00 142158\n",
381 | " 1 0.76 0.51 0.61 99\n",
382 | "\n",
383 | " accuracy 1.00 142257\n",
384 | " macro avg 0.88 0.75 0.80 142257\n",
385 | "weighted avg 1.00 1.00 1.00 142257\n",
386 | "\n",
387 | "ROC AUC Score: 0.752468977113387\n"
388 | ]
389 | }
390 | ],
391 | "source": [
392 | "#checking test records using 0.5 threshold\n",
393 | "predTest = lr.predict(xTest)\n",
394 | "for i in range(0, len(predTest)):\n",
395 | " if(predTest[i]>=0.5):\n",
396 | " predTest[i]=1\n",
397 | " else:\n",
398 | " predTest[i]=0\n",
399 | "print(classification_report(yTest, predTest))\n",
400 | "print('ROC AUC Score: ',roc(yTest, predTest))"
401 | ]
402 | },
403 | {
404 | "cell_type": "code",
405 | "execution_count": 11,
406 | "metadata": {},
407 | "outputs": [
408 | {
409 | "name": "stdout",
410 | "output_type": "stream",
411 | "text": [
412 | "Threshold set at: 0.1\n",
413 | " precision recall f1-score support\n",
414 | "\n",
415 | " 0 1.00 1.00 1.00 142158\n",
416 | " 1 0.66 0.80 0.72 99\n",
417 | "\n",
418 | " accuracy 1.00 142257\n",
419 | " macro avg 0.83 0.90 0.86 142257\n",
420 | "weighted avg 1.00 1.00 1.00 142257\n",
421 | "\n",
422 | "ROC AUC Score: 0.8988492104602348\n",
423 | "Threshold set at: 0.12\n",
424 | " precision recall f1-score support\n",
425 | "\n",
426 | " 0 1.00 1.00 1.00 142158\n",
427 | " 1 0.67 0.80 0.73 99\n",
428 | "\n",
429 | " accuracy 1.00 142257\n",
430 | " macro avg 0.83 0.90 0.86 142257\n",
431 | "weighted avg 1.00 1.00 1.00 142257\n",
432 | "\n",
433 | "ROC AUC Score: 0.8988527276734765\n",
434 | "Threshold set at: 0.14\n",
435 | " precision recall f1-score support\n",
436 | "\n",
437 | " 0 1.00 1.00 1.00 142158\n",
438 | " 1 0.67 0.80 0.73 99\n",
439 | "\n",
440 | " accuracy 1.00 142257\n",
441 | " macro avg 0.83 0.90 0.86 142257\n",
442 | "weighted avg 1.00 1.00 1.00 142257\n",
443 | "\n",
444 | "ROC AUC Score: 0.8988527276734765\n",
445 | "Threshold set at: 0.16\n",
446 | " precision recall f1-score support\n",
447 | "\n",
448 | " 0 1.00 1.00 1.00 142158\n",
449 | " 1 0.67 0.80 0.73 99\n",
450 | "\n",
451 | " accuracy 1.00 142257\n",
452 | " macro avg 0.83 0.90 0.86 142257\n",
453 | "weighted avg 1.00 1.00 1.00 142257\n",
454 | "\n",
455 | "ROC AUC Score: 0.8988527276734765\n",
456 | "Threshold set at: 0.18\n",
457 | " precision recall f1-score support\n",
458 | "\n",
459 | " 0 1.00 1.00 1.00 142158\n",
460 | " 1 0.67 0.80 0.73 99\n",
461 | "\n",
462 | " accuracy 1.00 142257\n",
463 | " macro avg 0.83 0.90 0.86 142257\n",
464 | "weighted avg 1.00 1.00 1.00 142257\n",
465 | "\n",
466 | "ROC AUC Score: 0.8988527276734765\n",
467 | "Threshold set at: 0.2\n",
468 | " precision recall f1-score support\n",
469 | "\n",
470 | " 0 1.00 1.00 1.00 142158\n",
471 | " 1 0.67 0.80 0.73 99\n",
472 | "\n",
473 | " accuracy 1.00 142257\n",
474 | " macro avg 0.83 0.90 0.86 142257\n",
475 | "weighted avg 1.00 1.00 1.00 142257\n",
476 | "\n",
477 | "ROC AUC Score: 0.8988527276734765\n",
478 | "Threshold set at: 0.22\n",
479 | " precision recall f1-score support\n",
480 | "\n",
481 | " 0 1.00 1.00 1.00 142158\n",
482 | " 1 0.69 0.80 0.74 99\n",
483 | "\n",
484 | " accuracy 1.00 142257\n",
485 | " macro avg 0.84 0.90 0.87 142257\n",
486 | "weighted avg 1.00 1.00 1.00 142257\n",
487 | "\n",
488 | "ROC AUC Score: 0.8988632793132013\n",
489 | "Threshold set at: 0.24\n",
490 | " precision recall f1-score support\n",
491 | "\n",
492 | " 0 1.00 1.00 1.00 142158\n",
493 | " 1 0.69 0.79 0.74 99\n",
494 | "\n",
495 | " accuracy 1.00 142257\n",
496 | " macro avg 0.85 0.89 0.87 142257\n",
497 | "weighted avg 1.00 1.00 1.00 142257\n",
498 | "\n",
499 | "ROC AUC Score: 0.8938162914759378\n",
500 | "Threshold set at: 0.26\n",
501 | " precision recall f1-score support\n",
502 | "\n",
503 | " 0 1.00 1.00 1.00 142158\n",
504 | " 1 0.71 0.79 0.75 99\n",
505 | "\n",
506 | " accuracy 1.00 142257\n",
507 | " macro avg 0.85 0.89 0.87 142257\n",
508 | "weighted avg 1.00 1.00 1.00 142257\n",
509 | "\n",
510 | "ROC AUC Score: 0.8938268431156625\n",
511 | "Threshold set at: 0.28\n",
512 | " precision recall f1-score support\n",
513 | "\n",
514 | " 0 1.00 1.00 1.00 142158\n",
515 | " 1 0.74 0.79 0.76 99\n",
516 | "\n",
517 | " accuracy 1.00 142257\n",
518 | " macro avg 0.87 0.89 0.88 142257\n",
519 | "weighted avg 1.00 1.00 1.00 142257\n",
520 | "\n",
521 | "ROC AUC Score: 0.893840911968629\n",
522 | "Threshold set at: 0.3\n",
523 | " precision recall f1-score support\n",
524 | "\n",
525 | " 0 1.00 1.00 1.00 142158\n",
526 | " 1 0.75 0.79 0.77 99\n",
527 | "\n",
528 | " accuracy 1.00 142257\n",
529 | " macro avg 0.87 0.89 0.88 142257\n",
530 | "weighted avg 1.00 1.00 1.00 142257\n",
531 | "\n",
532 | "ROC AUC Score: 0.8938479463951122\n",
533 | "Threshold set at: 0.32\n",
534 | " precision recall f1-score support\n",
535 | "\n",
536 | " 0 1.00 1.00 1.00 142158\n",
537 | " 1 0.73 0.73 0.73 99\n",
538 | "\n",
539 | " accuracy 1.00 142257\n",
540 | " macro avg 0.87 0.86 0.87 142257\n",
541 | "weighted avg 1.00 1.00 1.00 142257\n",
542 | "\n",
543 | "ROC AUC Score: 0.863544916092082\n",
544 | "Threshold set at: 0.34\n",
545 | " precision recall f1-score support\n",
546 | "\n",
547 | " 0 1.00 1.00 1.00 142158\n",
548 | " 1 0.75 0.72 0.73 99\n",
549 | "\n",
550 | " accuracy 1.00 142257\n",
551 | " macro avg 0.87 0.86 0.87 142257\n",
552 | "weighted avg 1.00 1.00 1.00 142257\n",
553 | "\n",
554 | "ROC AUC Score: 0.8585014454680601\n",
555 | "Threshold set at: 0.36\n",
556 | " precision recall f1-score support\n",
557 | "\n",
558 | " 0 1.00 1.00 1.00 142158\n",
559 | " 1 0.76 0.64 0.69 99\n",
560 | "\n",
561 | " accuracy 1.00 142257\n",
562 | " macro avg 0.88 0.82 0.85 142257\n",
563 | "weighted avg 1.00 1.00 1.00 142257\n",
564 | "\n",
565 | "ROC AUC Score: 0.818111473916986\n",
566 | "Threshold set at: 0.38\n",
567 | " precision recall f1-score support\n",
568 | "\n",
569 | " 0 1.00 1.00 1.00 142158\n",
570 | " 1 0.78 0.62 0.69 99\n",
571 | "\n",
572 | " accuracy 1.00 142257\n",
573 | " macro avg 0.89 0.81 0.84 142257\n",
574 | "weighted avg 1.00 1.00 1.00 142257\n",
575 | "\n",
576 | "ROC AUC Score: 0.8080210154557008\n",
577 | "Threshold set at: 0.4\n",
578 | " precision recall f1-score support\n",
579 | "\n",
580 | " 0 1.00 1.00 1.00 142158\n",
581 | " 1 0.79 0.62 0.69 99\n",
582 | "\n",
583 | " accuracy 1.00 142257\n",
584 | " macro avg 0.90 0.81 0.85 142257\n",
585 | "weighted avg 1.00 1.00 1.00 142257\n",
586 | "\n",
587 | "ROC AUC Score: 0.8080245326689425\n",
588 | "Threshold set at: 0.42\n",
589 | " precision recall f1-score support\n",
590 | "\n",
591 | " 0 1.00 1.00 1.00 142158\n",
592 | " 1 0.79 0.60 0.68 99\n",
593 | "\n",
594 | " accuracy 1.00 142257\n",
595 | " macro avg 0.89 0.80 0.84 142257\n",
596 | "weighted avg 1.00 1.00 1.00 142257\n",
597 | "\n",
598 | "ROC AUC Score: 0.7979235225679323\n",
599 | "Threshold set at: 0.44\n",
600 | " precision recall f1-score support\n",
601 | "\n",
602 | " 0 1.00 1.00 1.00 142158\n",
603 | " 1 0.78 0.59 0.67 99\n",
604 | "\n",
605 | " accuracy 1.00 142257\n",
606 | " macro avg 0.89 0.79 0.84 142257\n",
607 | "weighted avg 1.00 1.00 1.00 142257\n",
608 | "\n",
609 | "ROC AUC Score: 0.7928730175174272\n",
610 | "Threshold set at: 0.46\n",
611 | " precision recall f1-score support\n",
612 | "\n",
613 | " 0 1.00 1.00 1.00 142158\n",
614 | " 1 0.78 0.59 0.67 99\n",
615 | "\n",
616 | " accuracy 1.00 142257\n",
617 | " macro avg 0.89 0.79 0.84 142257\n",
618 | "weighted avg 1.00 1.00 1.00 142257\n",
619 | "\n",
620 | "ROC AUC Score: 0.7928730175174272\n",
621 | "Threshold set at: 0.48\n",
622 | " precision recall f1-score support\n",
623 | "\n",
624 | " 0 1.00 1.00 1.00 142158\n",
625 | " 1 0.78 0.58 0.66 99\n",
626 | "\n",
627 | " accuracy 1.00 142257\n",
628 | " macro avg 0.89 0.79 0.83 142257\n",
629 | "weighted avg 1.00 1.00 1.00 142257\n",
630 | "\n",
631 | "ROC AUC Score: 0.7878225124669223\n",
632 | "Threshold set at: 0.5\n",
633 | " precision recall f1-score support\n",
634 | "\n",
635 | " 0 1.00 1.00 1.00 142158\n",
636 | " 1 0.76 0.51 0.61 99\n",
637 | "\n",
638 | " accuracy 1.00 142257\n",
639 | " macro avg 0.88 0.75 0.80 142257\n",
640 | "weighted avg 1.00 1.00 1.00 142257\n",
641 | "\n",
642 | "ROC AUC Score: 0.752468977113387\n"
643 | ]
644 | }
645 | ],
646 | "source": [
647 | "#since the classes are highly imbalanced, varying thresholds from 0.1 to 0.5 in steps of 0.02 to find optimal threshold\n",
648 | "for x in range(10, 51, 2):\n",
649 | " print('Threshold set at: ',x*0.01)\n",
650 | " predTest = lr.predict(xTest)\n",
651 | " for i in range(0, len(predTest)):\n",
652 | " if(predTest[i]>=(x*0.01)):\n",
653 | " predTest[i]=1\n",
654 | " else:\n",
655 | " predTest[i]=0\n",
656 | " print(classification_report(yTest, predTest))\n",
657 | " print('ROC AUC Score: ',roc(yTest, predTest)) "
658 | ]
659 | }
660 | ],
661 | "metadata": {
662 | "kernelspec": {
663 | "display_name": "Python 3",
664 | "language": "python",
665 | "name": "python3"
666 | },
667 | "language_info": {
668 | "codemirror_mode": {
669 | "name": "ipython",
670 | "version": 3
671 | },
672 | "file_extension": ".py",
673 | "mimetype": "text/x-python",
674 | "name": "python",
675 | "nbconvert_exporter": "python",
676 | "pygments_lexer": "ipython3",
677 | "version": "3.8.3"
678 | }
679 | },
680 | "nbformat": 4,
681 | "nbformat_minor": 4
682 | }
683 |
--------------------------------------------------------------------------------