├── .gitattributes
├── README.md
├── .gitignore
└── credit_card_fraud_detector.py


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CreditCardFraudDetection
 2 | An attempt to detect fraud in online transaction in deep neural network using pytorch
 3 | 
 4 | This is a feed forward neural network built using pytorch to detect fraudulent credit card transaction. The data set is not included in this 
 5 | repo. (The link to the dataset is given below)
 6 | 
 7 | credit_card_fraud_detector.py loads the dataset, and preprocesses it, and feeds it to the neural network. The one problem here is that the 
 8 | dataset is highly skewed. So while meausring the accuracy of the model you need a different method such as F1 score which is not included in
 9 | this repo. Feel free to add it.
10 | 
11 | Enjoy!!
12 | 
13 | Here's the data set download link : https://www.kaggle.com/mlg-ulb/creditcardfraud/downloads/creditcardfraud.zip/3
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 | 
17 | # Windows shortcuts
18 | *.lnk
19 | 
20 | # =========================
21 | # Operating System Files
22 | # =========================
23 | 
24 | # OSX
25 | # =========================
26 | 
27 | .DS_Store
28 | .AppleDouble
29 | .LSOverride
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear in the root of a volume
35 | .DocumentRevisions-V100
36 | .fseventsd
37 | .Spotlight-V100
38 | .TemporaryItems
39 | .Trashes
40 | .VolumeIcon.icns
41 | 
42 | # Directories potentially created on remote AFP share
43 | .AppleDB
44 | .AppleDesktop
45 | Network Trash Folder
46 | Temporary Items
47 | .apdisk
48 | 


--------------------------------------------------------------------------------
/credit_card_fraud_detector.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.utils.data import Dataset, DataLoader
  3 | from torch.autograd import Variable
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import pandas as pd
  7 | import numpy as np
  8 | from sklearn.model_selection import train_test_split
  9 | 
 10 | 
 11 | data = pd.read_csv("creditcard.csv")
 12 | data = np.array(data)
 13 | 
 14 | X_data = data[:, 0:-1]
 15 | y_data = data[:, -1]
 16 | X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.33, random_state=42)
 17 | 
 18 | 
 19 | class CreditCardTrainDataLoader(Dataset):
 20 |     def __init__(self):
 21 |         self.x_data = torch.from_numpy(X_train)
 22 |         self.x_data.type(torch.FloatTensor)
 23 |         self.y_data = torch.from_numpy(y_train)
 24 |         self.y_data.type(torch.FloatTensor)
 25 |         self.len = X_train.shape[0]
 26 | 
 27 |     def __getitem__(self, index):
 28 |         return self.x_data[index], self.y_data[index]
 29 | 
 30 |     def __len__(self):
 31 |         return self.len
 32 | 
 33 | 
 34 | class CreditCardTestDataLoader(Dataset):
 35 |     def __init__(self):
 36 |         self.x_data = torch.from_numpy(X_test)
 37 |         self.x_data.type(torch.FloatTensor)
 38 |         self.y_data = torch.from_numpy(y_test)
 39 |         self.y_data.type(torch.FloatTensor)
 40 |         self.len = X_test.shape[0]
 41 | 
 42 |     def __getitem__(self, index):
 43 |         return self.x_data[index], self.y_data[index]
 44 | 
 45 |     def __len__(self):
 46 |         return self.len
 47 | 
 48 | batch_size = 50
 49 | 
 50 | data_set = CreditCardTrainDataLoader()
 51 | train_loader = DataLoader(
 52 |     dataset=data_set,
 53 |     batch_size=batch_size,
 54 |     shuffle=True
 55 | )
 56 | 
 57 | test_data_set = CreditCardTestDataLoader()
 58 | test_loader = DataLoader(
 59 |     dataset=test_data_set,
 60 |     batch_size=1,
 61 |     shuffle=False
 62 | )
 63 | 
 64 | 
 65 | class Model(nn.Module):
 66 |     def __init__(self, input_dim, hidden_dim, output_dim):
 67 |         super(Model, self).__init__()
 68 | 
 69 |         self.layer1 = nn.Linear(input_dim, hidden_dim)
 70 |         self.sigmoid = nn.Sigmoid()
 71 |         self.layer2 = nn.Linear(hidden_dim, output_dim)
 72 | 
 73 |     def forward(self, x):
 74 |         out = self.layer1(x)
 75 |         out = self.sigmoid(out)
 76 |         out = self.layer2(out)
 77 |         return out
 78 | 
 79 | 
 80 | input_dim = 30
 81 | hidden_dim = 32
 82 | num_of_classes = 2
 83 | 
 84 | model = Model(input_dim, hidden_dim, num_of_classes)
 85 | 
 86 | loss_func = nn.CrossEntropyLoss()
 87 | optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
 88 | epochs = 5
 89 | 
 90 | for epoch in range(epochs):
 91 |     for i, (inputs, labels) in enumerate(train_loader):
 92 |         inputs = Variable(inputs.type(torch.FloatTensor))
 93 |         labels = Variable(labels.type(torch.FloatTensor))
 94 | 
 95 |         optimizer.zero_grad()
 96 |         outputs = model(inputs)
 97 |         loss = loss_func(outputs, labels.type(torch.LongTensor))
 98 |         loss.backward()
 99 |         optimizer.step()
100 | 
101 |         if(i+1) % 100 == 0:
102 |             total = 0
103 |             correct = 0
104 |             for ins, l in test_loader:
105 |                 ins = Variable(ins.type(torch.FloatTensor))
106 |                 outs = model(ins)
107 |                 _, predicted = torch.max(outs.data, 1)
108 | 
109 |                 total += l.size(0)
110 |                 equal = predicted.type(torch.IntTensor) == l.type(torch.IntTensor)
111 |                 if int(equal) == 1:
112 |                     correct += 1
113 | 
114 |             accuracy = 100 * correct/total
115 |             print("iteration ", i+1, " in Epoch ", epoch, " Loss : ", loss.data[0], " Accuracy: ", accuracy)
116 | 
117 | 
118 |     
119 | 
120 | 
121 | 


--------------------------------------------------------------------------------