├── .gitattributes ├── README.md ├── .gitignore └── credit_card_fraud_detector.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CreditCardFraudDetection 2 | An attempt to detect fraud in online transaction in deep neural network using pytorch 3 | 4 | This is a feed forward neural network built using pytorch to detect fraudulent credit card transaction. The data set is not included in this 5 | repo. (The link to the dataset is given below) 6 | 7 | credit_card_fraud_detector.py loads the dataset, and preprocesses it, and feeds it to the neural network. The one problem here is that the 8 | dataset is highly skewed. So while meausring the accuracy of the model you need a different method such as F1 score which is not included in 9 | this repo. Feel free to add it. 10 | 11 | Enjoy!! 12 | 13 | Here's the data set download link : https://www.kaggle.com/mlg-ulb/creditcardfraud/downloads/creditcardfraud.zip/3 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # Windows Installer files 12 | *.cab 13 | *.msi 14 | *.msm 15 | *.msp 16 | 17 | # Windows shortcuts 18 | *.lnk 19 | 20 | # ========================= 21 | # Operating System Files 22 | # ========================= 23 | 24 | # OSX 25 | # ========================= 26 | 27 | .DS_Store 28 | .AppleDouble 29 | .LSOverride 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear in the root of a volume 35 | .DocumentRevisions-V100 36 | .fseventsd 37 | .Spotlight-V100 38 | .TemporaryItems 39 | .Trashes 40 | .VolumeIcon.icns 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | -------------------------------------------------------------------------------- /credit_card_fraud_detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset, DataLoader 3 | from torch.autograd import Variable 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import pandas as pd 7 | import numpy as np 8 | from sklearn.model_selection import train_test_split 9 | 10 | 11 | data = pd.read_csv("creditcard.csv") 12 | data = np.array(data) 13 | 14 | X_data = data[:, 0:-1] 15 | y_data = data[:, -1] 16 | X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.33, random_state=42) 17 | 18 | 19 | class CreditCardTrainDataLoader(Dataset): 20 | def __init__(self): 21 | self.x_data = torch.from_numpy(X_train) 22 | self.x_data.type(torch.FloatTensor) 23 | self.y_data = torch.from_numpy(y_train) 24 | self.y_data.type(torch.FloatTensor) 25 | self.len = X_train.shape[0] 26 | 27 | def __getitem__(self, index): 28 | return self.x_data[index], self.y_data[index] 29 | 30 | def __len__(self): 31 | return self.len 32 | 33 | 34 | class CreditCardTestDataLoader(Dataset): 35 | def __init__(self): 36 | self.x_data = torch.from_numpy(X_test) 37 | self.x_data.type(torch.FloatTensor) 38 | self.y_data = torch.from_numpy(y_test) 39 | self.y_data.type(torch.FloatTensor) 40 | self.len = X_test.shape[0] 41 | 42 | def __getitem__(self, index): 43 | return self.x_data[index], self.y_data[index] 44 | 45 | def __len__(self): 46 | return self.len 47 | 48 | batch_size = 50 49 | 50 | data_set = CreditCardTrainDataLoader() 51 | train_loader = DataLoader( 52 | dataset=data_set, 53 | batch_size=batch_size, 54 | shuffle=True 55 | ) 56 | 57 | test_data_set = CreditCardTestDataLoader() 58 | test_loader = DataLoader( 59 | dataset=test_data_set, 60 | batch_size=1, 61 | shuffle=False 62 | ) 63 | 64 | 65 | class Model(nn.Module): 66 | def __init__(self, input_dim, hidden_dim, output_dim): 67 | super(Model, self).__init__() 68 | 69 | self.layer1 = nn.Linear(input_dim, hidden_dim) 70 | self.sigmoid = nn.Sigmoid() 71 | self.layer2 = nn.Linear(hidden_dim, output_dim) 72 | 73 | def forward(self, x): 74 | out = self.layer1(x) 75 | out = self.sigmoid(out) 76 | out = self.layer2(out) 77 | return out 78 | 79 | 80 | input_dim = 30 81 | hidden_dim = 32 82 | num_of_classes = 2 83 | 84 | model = Model(input_dim, hidden_dim, num_of_classes) 85 | 86 | loss_func = nn.CrossEntropyLoss() 87 | optimizer = torch.optim.SGD(model.parameters(), lr=0.001) 88 | epochs = 5 89 | 90 | for epoch in range(epochs): 91 | for i, (inputs, labels) in enumerate(train_loader): 92 | inputs = Variable(inputs.type(torch.FloatTensor)) 93 | labels = Variable(labels.type(torch.FloatTensor)) 94 | 95 | optimizer.zero_grad() 96 | outputs = model(inputs) 97 | loss = loss_func(outputs, labels.type(torch.LongTensor)) 98 | loss.backward() 99 | optimizer.step() 100 | 101 | if(i+1) % 100 == 0: 102 | total = 0 103 | correct = 0 104 | for ins, l in test_loader: 105 | ins = Variable(ins.type(torch.FloatTensor)) 106 | outs = model(ins) 107 | _, predicted = torch.max(outs.data, 1) 108 | 109 | total += l.size(0) 110 | equal = predicted.type(torch.IntTensor) == l.type(torch.IntTensor) 111 | if int(equal) == 1: 112 | correct += 1 113 | 114 | accuracy = 100 * correct/total 115 | print("iteration ", i+1, " in Epoch ", epoch, " Loss : ", loss.data[0], " Accuracy: ", accuracy) 116 | 117 | 118 | 119 | 120 | 121 | --------------------------------------------------------------------------------