├── .gitignore
├── Deep_feature_extractors
    ├── extractor.py
    ├── googlenet.py
    ├── resnet18.py
    └── vgg19.py
├── Genetic_Algorithm
    ├── GA.py
    └── GA_functions.py
├── LICENSE
├── README.md
├── main.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/Deep_feature_extractors/extractor.py:
--------------------------------------------------------------------------------
 1 | from Deep_feature_extractors import googlenet,resnet18,vgg19
 2 | import numpy as np
 3 | 
 4 | def feature_extractor(folder_path, ext, out_classes):
 5 | 
 6 |     if ext == 'googlenet':
 7 | 
 8 |         df5, df6 = googlenet.model(folder_path, out_classes)
 9 |         array_train = np.asarray(df5)
10 |         array_val = array_val = np.asarray(df6)
11 |         return array_train, array_val
12 | 
13 |     elif ext == 'vgg':
14 |         
15 |         df5, df6 = vgg19.model(folder_path, out_classes)
16 |         array_train = np.asarray(df5)
17 |         array_val = array_val = np.asarray(df6)
18 |         return array_train, array_val
19 | 
20 |     else:
21 |         
22 |         df5, df6 = resnet18.model(folder_path, out_classes)
23 |         array_train = np.asarray(df5)
24 |         array_val = array_val = np.asarray(df6)
25 |         return array_train, array_val


--------------------------------------------------------------------------------
/Deep_feature_extractors/googlenet.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import torch
  4 | import torchvision
  5 | from torchvision.datasets import ImageFolder
  6 | from torch.utils.data import DataLoader
  7 | import torchvision.transforms as transf
  8 | import numpy as np
  9 | import pandas as pd
 10 | import torch.nn as  nn
 11 | import torch.nn.functional as F
 12 | import time
 13 | 
 14 | 
 15 | def model(folder_path, out_classes):
 16 | 
 17 |     # Configuring Device
 18 | 
 19 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 20 | 
 21 |     # Hyperparameter tuning
 22 | 
 23 |     epoch_no = 5
 24 |     l_rate = 0.0001
 25 |     batch_size_tr = 50
 26 |     batch_size_val = 30
 27 | 
 28 |     # Transforming data
 29 | 
 30 |     train_transform = transf.Compose([
 31 |         transf.Resize((224,224)),
 32 |         transf.ToTensor()
 33 |     ])
 34 | 
 35 |     val_transform = transf.Compose([
 36 |         transf.Resize((224,224)),
 37 |         transf.ToTensor()
 38 |     ])
 39 | 
 40 |     # Loading the dataset in the system
 41 | 
 42 |     train_ds = ImageFolder(folder_path +'/train',transform=train_transform)
 43 |     val_ds = ImageFolder(folder_path +'/val', transform = val_transform)
 44 |     train_loader = DataLoader(train_ds, batch_size=batch_size_tr, shuffle=True, num_workers=2, drop_last=True)
 45 |     val_loader = DataLoader(val_ds, batch_size=batch_size_val, shuffle=True, num_workers=2, drop_last=True)
 46 | 
 47 |     # Specifying reference model
 48 | 
 49 |     old_model = torchvision.models.googlenet(pretrained=True)
 50 | 
 51 |     # Changing the model for deep feature extraction
 52 | 
 53 |     class whole_cnn(nn.Module):
 54 |         def __init__(self):
 55 |             super(whole_cnn, self).__init__()
 56 |             
 57 |             # Removing the classifier list from VGG-19 after which the deep features are to be extracted
 58 | 
 59 |             self.remv_linear = torch.nn.Sequential(*(list(old_model.children())[:-1]))
 60 | 
 61 |             # Re-assigning the removed layers of VGG-19 and storing it in our own CNN to find change in accuracy
 62 | 
 63 |             self.flatten = nn.Flatten()
 64 |             self.add_linear = torch.nn.Sequential(nn.Linear(1024,out_classes,bias=True))
 65 |             
 66 | 
 67 |         def forward(self,x):
 68 |             output = self.remv_linear(x)
 69 |             output = self.flatten(output)
 70 |             x_deep = output
 71 |             output_new = self.add_linear(output)
 72 |             return x_deep,output_new
 73 | 
 74 |     # Specifying model and performing Loss calculation and Optimization
 75 | 
 76 |     model = whole_cnn()
 77 |     model = model.to(device)
 78 |     criterion = nn.CrossEntropyLoss()
 79 |     criterion.to(device)
 80 |     optim = torch.optim.Adam(model.parameters(), lr = l_rate)
 81 | 
 82 |     # Training and Validating with our CNN model
 83 | 
 84 |     def train_model(model, criterion, optim, epoch_no):
 85 |         best_deep_featr_train=[]
 86 |         best_labels_train=[]
 87 |         best_deep_featr_val=[]
 88 |         best_labels_val=[]
 89 |         since = time.time()
 90 |         best_acc= 0.0
 91 |         for epoch in range(epoch_no):
 92 |             train_features = np.zeros((50,1024))
 93 |             train_labels = []
 94 |             running_loss = 0.0
 95 |             running_acc = 0.0
 96 |             model.train()
 97 |             for images,labels in train_loader:
 98 |                 images = images.to(device)
 99 |                 labels = labels.to(device)
100 |                 with torch.set_grad_enabled(True):
101 |                     deep_featr,outputs = model(images)
102 |                     train_features = np.append(train_features, deep_featr.detach().cpu().numpy(), axis=0)  
103 |                     train_labels = np.append(train_labels, labels.cpu().detach().numpy(), axis=0)
104 |                     _ ,preds = torch.max(outputs,1)
105 |                     loss = criterion(outputs,labels)
106 |                     loss.backward()
107 |                     optim.step()
108 |                 optim.zero_grad()
109 | 
110 |             # Calculating and printing all Statistics
111 | 
112 |                 running_loss += loss.item()*batch_size_tr
113 |                 running_acc += torch.sum(preds==labels)
114 |             running_val_loss, running_val_acc, val_features, val_labels = model_val(model, criterion, optim)
115 |             epoch_train_loss = running_loss/len(train_ds)
116 |             epoch_train_acc = running_acc.double()/len(train_ds)
117 |             print("Epoch: {}".format(epoch+1))
118 |             print('-'*10)
119 |             print('Train Loss: {:.4f}   Train Acc: {:.4f}'.format(epoch_train_loss,epoch_train_acc))
120 |             epoch_val_loss = running_val_loss/len(val_ds)
121 |             epoch_val_acc = running_val_acc.double()/len(val_ds)
122 |             print('Val Loss: {:.4f}   Val Acc: {:.4f}'.format(epoch_val_loss,epoch_val_acc))
123 |             print()
124 |             if epoch_val_acc > best_acc:
125 |                 best_acc = epoch_val_acc
126 |                 best_deep_featr_train = train_features
127 |                 best_labels_train = train_labels
128 |                 best_deep_featr_val = val_features
129 |                 best_labels_val = val_labels
130 | 
131 |         # Printing Time Elapsed and best Validation Accuracy
132 | 
133 |         time_elapsed = time.time() - since
134 |         print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
135 |         print("Best model has validation accuracy: {}".format(best_acc))
136 |         return best_deep_featr_train, best_labels_train, best_deep_featr_val, best_labels_val
137 | 
138 | 
139 |     def model_val(model, criterion, optim):
140 |         model.eval()
141 |         running_val_loss = 0.0
142 |         running_val_acc = 0.0
143 |         val_features = np.zeros((30,1024))
144 |         val_labels = []
145 |         for images,labels in val_loader:
146 |             images = images.to(device)
147 |             labels = labels.to(device)
148 |             deep_featr,outputs = model(images)
149 |             val_features = np.append(val_features, deep_featr.detach().cpu().numpy(), axis=0)   
150 |             val_labels = np.append(val_labels, labels.cpu().detach().numpy(), axis=0)
151 |             _ ,preds = torch.max(outputs,1)
152 |             loss = criterion(outputs,labels)
153 |             running_val_loss += loss.item()*batch_size_val
154 |             running_val_acc += torch.sum(preds==labels)
155 |         return running_val_loss, running_val_acc, val_features, val_labels
156 | 
157 |     # Calling the function to train our CNN model
158 | 
159 |     best_deep_featr_train, best_labels_train, best_deep_featr_val, best_labels_val  = train_model(model, criterion, optim, epoch_no)
160 | 
161 |     # Creating the dataframes for the extracted deep features
162 | 
163 |     df1=pd.DataFrame(best_deep_featr_train[50:,:])
164 |     df2=pd.DataFrame(best_labels_train)
165 |     df3=pd.DataFrame(best_deep_featr_val[30:,:])
166 |     df4=pd.DataFrame(best_labels_val)
167 |     print(df1.shape,df2.shape,df3.shape,df4.shape)
168 |     df5 = pd.concat([df1,df2], axis=1)
169 |     df6 = pd.concat([df3,df4], axis=1)
170 | 
171 |     return df5, df6


--------------------------------------------------------------------------------
/Deep_feature_extractors/resnet18.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import torch
  4 | import torchvision
  5 | from torchvision.datasets import ImageFolder
  6 | from torch.utils.data import DataLoader
  7 | import torchvision.transforms as transf
  8 | import numpy as np
  9 | import pandas as pd
 10 | import torch.nn as  nn
 11 | import torch.nn.functional as F
 12 | import time
 13 | 
 14 | 
 15 | def model(folder_path, out_classes):
 16 | 
 17 |     # Configuring Device
 18 | 
 19 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 20 | 
 21 |     # Hyperparameter tuning
 22 | 
 23 |     epoch_no = 5
 24 |     l_rate = 0.0001
 25 |     batch_size_tr = 50
 26 |     batch_size_val = 30
 27 | 
 28 |     # Transforming data
 29 | 
 30 |     train_transform = transf.Compose([
 31 |         transf.Resize((224,224)),
 32 |         transf.ToTensor()
 33 |     ])
 34 | 
 35 |     val_transform = transf.Compose([
 36 |         transf.Resize((224,224)),
 37 |         transf.ToTensor()
 38 |     ])
 39 | 
 40 |     # Loading the dataset in the system
 41 | 
 42 |     train_ds = ImageFolder(folder_path +'/train',transform=train_transform)
 43 |     val_ds = ImageFolder(folder_path +'/val', transform = val_transform)
 44 |     train_loader = DataLoader(train_ds, batch_size=batch_size_tr, shuffle=True, num_workers=2, drop_last=True)
 45 |     val_loader = DataLoader(val_ds, batch_size=batch_size_val, shuffle=True, num_workers=2, drop_last=True)
 46 | 
 47 |     # Specifying reference model
 48 | 
 49 |     old_model = torchvision.models.resnet18(pretrained=True)
 50 | 
 51 |     # Changing the model for deep feature extraction
 52 | 
 53 |     class whole_cnn(nn.Module):
 54 |         def __init__(self):
 55 |             super(whole_cnn, self).__init__()
 56 |             
 57 |             # Removing the classifier list from VGG-19 after which the deep features are to be extracted
 58 | 
 59 |             self.remv_linear = torch.nn.Sequential(*(list(old_model.children())[:-1]))
 60 | 
 61 |             # Re-assigning the removed layers of VGG-19 and storing it in our own CNN to find change in accuracy
 62 | 
 63 |             self.flatten = nn.Flatten()
 64 |             self.add_linear = torch.nn.Sequential(nn.Linear(512,out_classes,bias=True))
 65 |             
 66 | 
 67 |         def forward(self,x):
 68 |             output = self.remv_linear(x)
 69 |             output = self.flatten(output)
 70 |             x_deep = output
 71 |             output_new = self.add_linear(output)
 72 |             return x_deep,output_new
 73 | 
 74 |     # Specifying model and performing Loss calculation and Optimization
 75 | 
 76 |     model = whole_cnn()
 77 |     model = model.to(device)
 78 |     criterion = nn.CrossEntropyLoss()
 79 |     criterion.to(device)
 80 |     optim = torch.optim.Adam(model.parameters(), lr = l_rate)
 81 | 
 82 |     # Training and Validating with our CNN model
 83 | 
 84 |     def train_model(model, criterion, optim, epoch_no):
 85 |         best_deep_featr_train=[]
 86 |         best_labels_train=[]
 87 |         best_deep_featr_val=[]
 88 |         best_labels_val=[]
 89 |         since = time.time()
 90 |         best_acc= 0.0
 91 |         for epoch in range(epoch_no):
 92 |             train_features = np.zeros((50,512))
 93 |             train_labels = []
 94 |             running_loss = 0.0
 95 |             running_acc = 0.0
 96 |             model.train()
 97 |             for images,labels in train_loader:
 98 |                 images = images.to(device)
 99 |                 labels = labels.to(device)
100 |                 with torch.set_grad_enabled(True):
101 |                     deep_featr,outputs = model(images)
102 |                     train_features = np.append(train_features, deep_featr.detach().cpu().numpy(), axis=0)  
103 |                     train_labels = np.append(train_labels, labels.cpu().detach().numpy(), axis=0)
104 |                     _ ,preds = torch.max(outputs,1)
105 |                     loss = criterion(outputs,labels)
106 |                     loss.backward()
107 |                     optim.step()
108 |                 optim.zero_grad()
109 | 
110 |             # Calculating and printing all Statistics
111 | 
112 |                 running_loss += loss.item()*batch_size_tr
113 |                 running_acc += torch.sum(preds==labels)
114 |             running_val_loss, running_val_acc, val_features, val_labels = model_val(model, criterion, optim)
115 |             epoch_train_loss = running_loss/len(train_ds)
116 |             epoch_train_acc = running_acc.double()/len(train_ds)
117 |             print("Epoch: {}".format(epoch+1))
118 |             print('-'*10)
119 |             print('Train Loss: {:.4f}   Train Acc: {:.4f}'.format(epoch_train_loss,epoch_train_acc))
120 |             epoch_val_loss = running_val_loss/len(val_ds)
121 |             epoch_val_acc = running_val_acc.double()/len(val_ds)
122 |             print('Val Loss: {:.4f}   Val Acc: {:.4f}'.format(epoch_val_loss,epoch_val_acc))
123 |             print()
124 |             if epoch_val_acc > best_acc:
125 |                 best_acc = epoch_val_acc
126 |                 best_deep_featr_train = train_features
127 |                 best_labels_train = train_labels
128 |                 best_deep_featr_val = val_features
129 |                 best_labels_val = val_labels
130 | 
131 |         # Printing Time Elapsed and best Validation Accuracy
132 | 
133 |         time_elapsed = time.time() - since
134 |         print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
135 |         print("Best model has validation accuracy: {}".format(best_acc))
136 |         return best_deep_featr_train, best_labels_train, best_deep_featr_val, best_labels_val
137 | 
138 | 
139 |     def model_val(model, criterion, optim):
140 |         model.eval()
141 |         running_val_loss = 0.0
142 |         running_val_acc = 0.0
143 |         val_features = np.zeros((30,512))
144 |         val_labels = []
145 |         for images,labels in val_loader:
146 |             images = images.to(device)
147 |             labels = labels.to(device)
148 |             deep_featr,outputs = model(images)
149 |             val_features = np.append(val_features, deep_featr.detach().cpu().numpy(), axis=0)   
150 |             val_labels = np.append(val_labels, labels.cpu().detach().numpy(), axis=0)
151 |             _ ,preds = torch.max(outputs,1)
152 |             loss = criterion(outputs,labels)
153 |             running_val_loss += loss.item()*batch_size_val
154 |             running_val_acc += torch.sum(preds==labels)
155 |         return running_val_loss, running_val_acc, val_features, val_labels
156 | 
157 |     # Calling the function to train our CNN model
158 | 
159 |     best_deep_featr_train, best_labels_train, best_deep_featr_val, best_labels_val  = train_model(model, criterion, optim, epoch_no)
160 | 
161 |     # Creating the dataframes for the extracted deep features
162 | 
163 |     df1=pd.DataFrame(best_deep_featr_train[50:,:])
164 |     df2=pd.DataFrame(best_labels_train)
165 |     df3=pd.DataFrame(best_deep_featr_val[30:,:])
166 |     df4=pd.DataFrame(best_labels_val)
167 |     print(df1.shape,df2.shape,df3.shape,df4.shape)
168 |     df5 = pd.concat([df1,df2], axis=1)
169 |     df6 = pd.concat([df3,df4], axis=1)
170 | 
171 |     return df5, df6


--------------------------------------------------------------------------------
/Deep_feature_extractors/vgg19.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import torch
  4 | import torchvision
  5 | from torchvision.datasets import ImageFolder
  6 | from torch.utils.data import DataLoader
  7 | import torchvision.transforms as transf
  8 | import numpy as np
  9 | import pandas as pd
 10 | import torch.nn as  nn
 11 | import torch.nn.functional as F
 12 | import time
 13 | 
 14 | 
 15 | def model(folder_path, out_classes):
 16 | 
 17 |     # Configuring Device
 18 | 
 19 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 20 | 
 21 |     # Hyperparameter tuning
 22 | 
 23 |     epoch_no = 5
 24 |     l_rate = 0.0001
 25 |     batch_size_tr = 50
 26 |     batch_size_val = 30
 27 | 
 28 |     # Transforming data
 29 | 
 30 |     train_transform = transf.Compose([
 31 |         transf.Resize((224,224)),
 32 |         transf.ToTensor()
 33 |     ])
 34 | 
 35 |     val_transform = transf.Compose([
 36 |         transf.Resize((224,224)),
 37 |         transf.ToTensor()
 38 |     ])
 39 | 
 40 |     # Loading the dataset in the system
 41 | 
 42 |     train_ds = ImageFolder(folder_path +'/train',transform=train_transform)
 43 |     val_ds = ImageFolder(folder_path +'/val', transform = val_transform)
 44 |     train_loader = DataLoader(train_ds, batch_size=batch_size_tr, shuffle=True, num_workers=2, drop_last=True)
 45 |     val_loader = DataLoader(val_ds, batch_size=batch_size_val, shuffle=True, num_workers=2, drop_last=True)
 46 | 
 47 |     # Specifying reference model
 48 | 
 49 |     old_model = torchvision.models.vgg19(pretrained=True)
 50 | 
 51 |     # Changing the model for deep feature extraction
 52 | 
 53 |     class whole_cnn(nn.Module):
 54 |         def __init__(self):
 55 |             super(whole_cnn, self).__init__()
 56 |             
 57 |             # Removing the classifier list from VGG-19 after which the deep features are to be extracted
 58 | 
 59 |             self.remv_classifier = torch.nn.Sequential(*(list(old_model.children())[:-1]))
 60 | 
 61 |             # Re-assigning the removed layers of VGG-19 and storing it in our own CNN to find change in accuracy
 62 | 
 63 |             self.flatten = nn.Flatten()
 64 |             self.add_classifier = torch.nn.Sequential(nn.Linear(25088,4096,bias=True),
 65 |                                                 nn.ReLU(),
 66 |                                                 nn.Dropout(0.5),
 67 |                                                 nn.Linear(4096,4096,bias=True),
 68 |                                                 nn.ReLU(),
 69 |                                                 nn.Dropout(0.5),
 70 |                                                 nn.Linear(4096, out_classes))
 71 |             
 72 | 
 73 |         def forward(self,x):
 74 |             output = self.remv_classifier(x)
 75 |             output = self.flatten(output)
 76 |             x_deep = output
 77 |             output_new = self.add_classifier(output)
 78 |             return x_deep,output_new
 79 | 
 80 |     # Specifying model and performing Loss calculation and Optimization
 81 | 
 82 |     model = whole_cnn()
 83 |     model = model.to(device)
 84 |     criterion = nn.CrossEntropyLoss()
 85 |     criterion.to(device)
 86 |     optim = torch.optim.Adam(model.parameters(), lr = l_rate)
 87 | 
 88 |     # Training and Validating with our CNN model
 89 | 
 90 |     def train_model(model, criterion, optim, epoch_no):
 91 |         best_deep_featr_train=[]
 92 |         best_labels_train=[]
 93 |         best_deep_featr_val=[]
 94 |         best_labels_val=[]
 95 |         since = time.time()
 96 |         best_acc= 0.0
 97 |         for epoch in range(epoch_no):
 98 |             train_features = np.zeros((50,25088))
 99 |             train_labels = []
100 |             running_loss = 0.0
101 |             running_acc = 0.0
102 |             model.train()
103 |             for images,labels in train_loader:
104 |                 images = images.to(device)
105 |                 labels = labels.to(device)
106 |                 with torch.set_grad_enabled(True):
107 |                     deep_featr,outputs = model(images)
108 |                     train_features = np.append(train_features, deep_featr.detach().cpu().numpy(), axis=0)  
109 |                     train_labels = np.append(train_labels, labels.cpu().detach().numpy(), axis=0)
110 |                     _ ,preds = torch.max(outputs,1)
111 |                     loss = criterion(outputs,labels)
112 |                     loss.backward()
113 |                     optim.step()
114 |                 optim.zero_grad()
115 | 
116 |             # Calculating and printing all Statistics
117 | 
118 |                 running_loss += loss.item()*batch_size_tr
119 |                 running_acc += torch.sum(preds==labels)
120 |             running_val_loss, running_val_acc, val_features, val_labels = model_val(model, criterion, optim)
121 |             epoch_train_loss = running_loss/len(train_ds)
122 |             epoch_train_acc = running_acc.double()/len(train_ds)
123 |             print("Epoch: {}".format(epoch+1))
124 |             print('-'*10)
125 |             print('Train Loss: {:.4f}   Train Acc: {:.4f}'.format(epoch_train_loss,epoch_train_acc))
126 |             epoch_val_loss = running_val_loss/len(val_ds)
127 |             epoch_val_acc = running_val_acc.double()/len(val_ds)
128 |             print('Val Loss: {:.4f}   Val Acc: {:.4f}'.format(epoch_val_loss,epoch_val_acc))
129 |             print()
130 |             if epoch_val_acc > best_acc:
131 |                 best_acc = epoch_val_acc
132 |                 best_deep_featr_train = train_features
133 |                 best_labels_train = train_labels
134 |                 best_deep_featr_val = val_features
135 |                 best_labels_val = val_labels
136 | 
137 |         # Printing Time Elapsed and best Validation Accuracy
138 | 
139 |         time_elapsed = time.time() - since
140 |         print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
141 |         print("Best model has validation accuracy: {}".format(best_acc))
142 |         return best_deep_featr_train, best_labels_train, best_deep_featr_val, best_labels_val
143 | 
144 | 
145 |     def model_val(model, criterion, optim):
146 |         model.eval()
147 |         running_val_loss = 0.0
148 |         running_val_acc = 0.0
149 |         val_features = np.zeros((30,25088))
150 |         val_labels = []
151 |         for images,labels in val_loader:
152 |             images = images.to(device)
153 |             labels = labels.to(device)
154 |             deep_featr,outputs = model(images)
155 |             val_features = np.append(val_features, deep_featr.detach().cpu().numpy(), axis=0)   
156 |             val_labels = np.append(val_labels, labels.cpu().detach().numpy(), axis=0)
157 |             _ ,preds = torch.max(outputs,1)
158 |             loss = criterion(outputs,labels)
159 |             running_val_loss += loss.item()*batch_size_val
160 |             running_val_acc += torch.sum(preds==labels)
161 |         return running_val_loss, running_val_acc, val_features, val_labels
162 | 
163 |     # Calling the function to train our CNN model
164 | 
165 |     best_deep_featr_train, best_labels_train, best_deep_featr_val, best_labels_val  = train_model(model, criterion, optim, epoch_no)
166 | 
167 |     # Creating the dataframes for the extracted deep features
168 | 
169 |     df1=pd.DataFrame(best_deep_featr_train[50:,:])
170 |     df2=pd.DataFrame(best_labels_train)
171 |     df3=pd.DataFrame(best_deep_featr_val[30:,:])
172 |     df4=pd.DataFrame(best_labels_val)
173 |     print(df1.shape,df2.shape,df3.shape,df4.shape)
174 |     df5 = pd.concat([df1,df2], axis=1)
175 |     df6 = pd.concat([df3,df4], axis=1)
176 | 
177 |     return df5, df6


--------------------------------------------------------------------------------
/Genetic_Algorithm/GA.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from Genetic_Algorithm import GA_functions
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | 
 8 | def algorithm(array_train, array_val, classif):
 9 | 
10 |     # Loading the necessary variables for applying GA
11 | 
12 |     pop_size = 50 # Population size
13 |     num_parents_mating = (int)(pop_size*0.5) # Number of parents inside the mating pool.
14 |     num_mutations = 3 # Number of elements to mutate.
15 |     num_generations = 10 # Number of generations
16 |     classifier = classif
17 | 
18 |     num_feature_elements_train = array_train.shape[1]-1
19 | 
20 | 
21 |     # Applying Genetic Algorithm (GA)
22 | 
23 |     # Defining the population shape.
24 |     pop_shape = (pop_size, num_feature_elements_train)
25 | 
26 |     # Creating the initial population.
27 |     new_population = np.random.randint(low=0, high=2, size=pop_shape)
28 |     print(new_population.shape)
29 | 
30 |     best_outputs = []
31 |     for generation in range(num_generations):
32 |         print("Generation : ", (generation+1))
33 |         # Measuring the fitness of each chromosome in the population.
34 |         fitness = GA_functions.cal_pop_fitness(new_population, array_train, array_val, classifier)
35 | 
36 |         best_outputs.append(np.max(fitness))
37 |         # The best result in the current iteration.
38 |         print("Best result : ", best_outputs[-1])
39 | 
40 |         # Selecting the best parents in the population for mating.
41 |         parents = GA_functions.select_mating_pool(new_population, fitness, num_parents_mating)
42 | 
43 |         # Generating next generation using crossover.
44 |         crossed_offsprings = GA_functions.crossover(parents, offspring_size=(pop_shape[0]-parents.shape[0], num_feature_elements_train))
45 | 
46 |         # Adding some variations to the offspring using mutation.
47 |         mutated_offsprings = GA_functions.mutation(crossed_offsprings, num_mutations)
48 | 
49 |         # Creating the new population based on the parents and offspring.
50 |         new_population[ :parents.shape[0], :] = parents
51 |         new_population[parents.shape[0]: , :] = mutated_offsprings
52 | 
53 | 
54 |     # Getting the best solution after iterating finishing all generations.
55 | 
56 |     best_match_idx = np.where(best_outputs == np.max(best_outputs))[0]
57 |     best_match_idx = best_match_idx[0]
58 | 
59 |     best_acc = (best_outputs[best_match_idx])*100.0
60 |     best_solution = new_population[best_match_idx, :]
61 |     best_solution_indices = np.where(best_solution == 1)[0]
62 |     best_solution_num_elements = best_solution_indices.shape[0]
63 | 
64 |     # Printing the required statistics
65 | 
66 |     print("The accuracy of the best candidate solution is {:.4f}".format(best_acc))
67 |     print("Selected feature indices by GA : ", best_solution_indices)
68 |     print("Number of selected features by GA : ", best_solution_num_elements)
69 | 
70 |     # Plotting the 'Accuracy' vs 'Generation' curve
71 | 
72 |     plt.plot(range(num_generations), best_outputs,'b')
73 |     plt.xlabel('Generations')
74 |     plt.ylabel("Accuracy")


--------------------------------------------------------------------------------
/Genetic_Algorithm/GA_functions.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import numpy as np
  4 | 
  5 | # Defining the necessary functions for GA
  6 | 
  7 | 
  8 | # Feature Reduction to eliminate the redundant features
  9 | 
 10 | def reduce_features(solution, array_train, array_val):
 11 |     solution = np.append(solution, [1])
 12 |     selected_elements_indices = np.where(solution == 1)[0] # Selecting elements whose genes are given a value of 1
 13 |     reduced_train_features = array_train[:, selected_elements_indices]
 14 |     reduced_val_features = array_val[:, selected_elements_indices]
 15 |     return reduced_train_features, reduced_val_features
 16 | 
 17 | 
 18 | # Calculating and returning accuracy of population
 19 | 
 20 | def classification_accuracy(labels, val_predictions):
 21 |     correct = np.where(labels == val_predictions)
 22 |     accuracy = correct[0].shape[0]/val_predictions.shape[0]
 23 |     return accuracy
 24 |     
 25 | 
 26 | # Calculating the population fitness using SVM, KNN or MLP classifiers
 27 | 
 28 | def cal_pop_fitness(pop, array_train, array_val, classifier):
 29 |     accuracies = np.zeros(pop.shape[0])
 30 |     idx = 0 # Counter variable for creating the array accuracies
 31 | 
 32 |     for curr_solution in pop: # Current solution is the chromosome and pop is the total set of chromosomes (population)
 33 |         reduced_train_features, reduced_val_features = reduce_features(curr_solution, array_train, array_val)
 34 |         X=reduced_train_features[:,:-1] # Taking all the features columns
 35 |         y=reduced_train_features[:,-1]  # Taking the labels column
 36 |         reduced_validation_features = reduced_val_features[:,:-1]
 37 |         validation_labels = reduced_val_features[:,-1]
 38 |             
 39 |         # Mentioning classifier
 40 | 
 41 |         if classifier == 'SVM':
 42 |             ## SVM CLASSIFIER ##
 43 |             from sklearn.svm import SVC
 44 |             SVM_classifier = SVC(kernel='rbf')
 45 |             SVM_classifier.fit(X, y)
 46 |             val_predictions = SVM_classifier.predict(reduced_validation_features)
 47 | 
 48 |         elif classifier == 'MLP':
 49 |             ## MLP CLASSIFIER ##
 50 |             from sklearn.neural_network import MLPClassifier
 51 |             MLP_classifier = MLPClassifier()
 52 |             MLP_classifier.fit(X, y)
 53 |             val_predictions = MLP_classifier.predict(reduced_validation_features)
 54 | 
 55 | 
 56 |         else:
 57 |             ## KNN CLASSIFIER ##
 58 |             from sklearn.neighbors import KNeighborsClassifier
 59 |             KNN_classifier = KNeighborsClassifier(n_neighbors=2)
 60 |             KNN_classifier.fit(X, y)
 61 |             val_predictions = KNN_classifier.predict(reduced_validation_features)
 62 | 
 63 |         accuracies[idx] = classification_accuracy(validation_labels, val_predictions)     
 64 |                 
 65 |         idx = idx + 1
 66 |     return accuracies
 67 | 
 68 | 
 69 | # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.
 70 | 
 71 | def select_mating_pool(pop, fitness, num_parents):
 72 |     parents = np.empty((num_parents, pop.shape[1]))
 73 |     for parent_num in range(num_parents):
 74 |         max_fitness_index = np.where(fitness == np.max(fitness))[0]
 75 |         max_fitness_index = max_fitness_index[0]
 76 |         parents[parent_num, :] = pop[max_fitness_index, :]
 77 |         fitness[max_fitness_index] = -99999999999
 78 |     return parents
 79 | 
 80 | 
 81 | # Applying One Point Crossover
 82 | 
 83 | def crossover(parents, offspring_size):
 84 |     crossed_offsprings = np.empty(offspring_size)
 85 |     # The point at which crossover takes place between two parents. Usually, it is at the center.
 86 |     crossover_point = np.uint8(offspring_size[1]/2)
 87 | 
 88 |     for k in range(offspring_size[0]):
 89 |         # Index of the first parent to mate.
 90 |         parent1_index = k%parents.shape[0]
 91 |         
 92 |         # Index of the second parent to mate.
 93 |         parent2_index = (k+1)%parents.shape[0]
 94 |         
 95 |         # The new offspring will have its first half of its genes taken from the first parent.
 96 |         crossed_offsprings[k, 0:crossover_point] = parents[parent1_index, 0:crossover_point]
 97 |         
 98 |         # The new offspring will have its second half of its genes taken from the second parent.
 99 |         crossed_offsprings[k, crossover_point:] = parents[parent2_index, crossover_point:]
100 |     return crossed_offsprings
101 | 
102 | 
103 | # Performing Mutation of randomly selected genes by flipping their values
104 | 
105 | def mutation(crossed_offsprings, num_mutations):
106 |     mutation_index = np.random.randint(low=0, high=crossed_offsprings.shape[1], size=num_mutations)
107 |     for index in range(crossed_offsprings.shape[0]):
108 |         # The random value to be added to the gene.
109 |         crossed_offsprings[index, mutation_index] = 1 - crossed_offsprings[index, mutation_index]
110 |     return crossed_offsprings


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Agnish Bhattacharya
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Deep-feature-extraction-from-CNNs-followed-by-optimization-with-GA    
  2 | 
  3 | ## Project Description
  4 | This is a python-based project classifying the breast tumor tissue into benign or malignant, based on deep analysis of histopathological image samples of the popular `BreakHis` through the application of three popular `pre-trained` and `fine-tuned` Convolution Neural Networks, namely `GoogLeNet`, `ResNet-18`, and `VGG-19`, one at a time, followed by the extraction of the deep features from the CNN's `pre-final` layer and optimization (removal of redundant features) using the Genetic Algorithm (`GA`) for improved accuracy. The images are finally classified using any one of the `SVM`, `KNN` or `MLP` classifiers.
  5 | 
  6 | ## Dataset description
  7 | The Breast Cancer Histopathological Image Classification (BreakHis) is composed of 9,109 microscopic images of breast tumor tissue collected from 82 patients using different magnifying factors (40X, 100X, 200X, and 400X). To date, it contains 2,480 benign and 5,429 malignant samples (700X460 pixels, 3-channel RGB, 8-bit depth in each channel, PNG format). This database has been built in collaboration with the P&D Laboratory – Pathological Anatomy and Cytopathology, Parana, Brazil (http://www.prevencaoediagnose.com.br). The dataset is available at:    
  8 | https://www.kaggle.com/ambarish/breakhis
  9 | 
 10 | ## Classes of Division
 11 | In this project, we have used the histopathological image samples of human breast tissue, which have been classified into two categories, namely:  
 12 | - `Benign tissue`  
 13 | - `Malignant tissue` 
 14 | 
 15 | ## Convolution Neural Network models used
 16 | Three CNN models have been applied on the dataset, namely:  
 17 | -	`GoogLeNet`  
 18 | -	`ResNet-18`  
 19 | -	`Visual Geometry Group (VGG-19)`
 20 | 
 21 | ## Classifier models used inside the Genetic Algorithm (GA)
 22 | Three classifier models have been used, namely:  
 23 | -	`Support Vector Machines (SVM) (RBF Kernel)`  
 24 | -	`K-Nearest Neighbors (KNN) (K=2 used)`  
 25 | -	`Multi-Layer Perceptron (MLP)`  
 26 | 
 27 | ## 'Accuracy' vs 'Generation' plots
 28 | The following plots show the variation in accuracy of our validation dataset with the increasing number of generations after the application of Genetic Algorithm (GA) in each of the three Convolutional Neural Networks. The 'KNN' classifier has been used in every case.     
 29 | No. of Epochs: `5`, No. of Generations: `10`
 30 | -     GoogleNet using KNN classifier
 31 |      ![image](https://user-images.githubusercontent.com/84792746/154838869-35486f58-74c8-46b3-9b15-75333ddd2eef.png)
 32 | -     ResNet-18 using KNN classifier
 33 |      ![image](https://user-images.githubusercontent.com/84792746/154838818-d415193f-1736-4272-a9c3-90ce013bfc37.png)
 34 | -     VGG-19 using KNN classifier
 35 |      ![image](https://user-images.githubusercontent.com/84792746/154838834-71b65cea-cc2b-4527-b6ad-0d01e2c532c5.png)
 36 |      
 37 | ## Flow diagram of the Genetic Algorithm
 38 | 
 39 | 
 40 | ![image](https://user-images.githubusercontent.com/84792746/154852688-200dd978-ec4a-47c1-b073-0d3249e3d89c.png)
 41 | 
 42 | 
 43 | ## Dependencies
 44 | Since the entire project is based on `Python` programming language, it is necessary to have Python installed in the system. It is recommended to use Python with version `>=3.6`.
 45 | The Python packages which are in use in this project are  `matplotlib`, `numpy`, `pandas`, `scikit-learn`, `torch` and `torchvision`. All these dependencies can be installed just by the following command line argument
 46 | - pip install `requirements.txt`
 47 | 
 48 | ## Code implementation
 49 | - ### Data paths :
 50 |       Current directory -------> data
 51 |                                   |
 52 |                                   |
 53 |                                   |               
 54 |                                   --------------------->  train
 55 |                                   |                         |
 56 |                                   |             -------------------------
 57 |                                   |             |        |              |
 58 |                                   |             V        V              V
 59 |                                   |           class_1  class_2 ...... class_n
 60 |                                   |
 61 |                                   |
 62 |                                   |              
 63 |                                   --------------------->   val
 64 |                                                             |
 65 |                                                 -------------------------
 66 |                                                 |        |              |
 67 |                                                 V        V              V
 68 |                                               class_1  class_2 ...... class_n
 69 |                                               
 70 |                                
 71 | - Where the folders `train` and `val` contain the folders `benign` and `malignant`, which include the original histopathological images of respective type of human breast tumor tissue in `.jpg`/`.png` format.
 72 | 
 73 | - ### Training and Evaluation :
 74 | 
 75 |           usage: main.py [-h] [-data DATA_FOLDER] [-classes NUM_CLASSES]
 76 |                          [-ext EXTRACTOR_TYPE] [-classif CLASSIFIER_TYPE]
 77 | 
 78 |           Application of Genetic Algorithm
 79 | 
 80 |           optional arguments:
 81 |             -h, --help            show this help message and exit
 82 |             -data DATA_FOLDER, --data_folder DATA_FOLDER
 83 |                                   Path to data
 84 |             -classes NUM_CLASSES, --num_classes NUM_CLASSES
 85 |                                   Number of data classes
 86 |             -ext EXTRACTOR_TYPE, --extractor_type EXTRACTOR_TYPE
 87 |                                   Choice of deep feature extractor
 88 |             -classif CLASSIFIER_TYPE, --classifier_type CLASSIFIER_TYPE
 89 |                                   Choice of classifier for GA
 90 |         
 91 | -  ### Run the following for training and validation :
 92 |   
 93 |       `python main.py -data data -classes n -ext resnet -classif KNN`
 94 |       
 95 | -  ### Specific tokens :
 96 | 
 97 |           GoogLeNet: 'googlenet'
 98 |           ResNet-18: 'resnet'
 99 |           VGG-19: 'vgg16'
100 |           SVM Classifier: 'SVM'
101 |           KNN Classifier: 'KNN'
102 |           MLP Classifier: 'MLP'
103 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from Deep_feature_extractors import extractor
 4 | from Genetic_Algorithm import GA
 5 | import argparse
 6 | 
 7 | 
 8 | parser = argparse.ArgumentParser(description = 'Application of Genetic Algorithm')
 9 | # Paths
10 | parser.add_argument('-data','--data_folder',type=str, 
11 |                     default = 'data', 
12 |                     help = 'Path to data')
13 | parser.add_argument('-classes','--num_classes',type=int, 
14 |                     default = 2, 
15 |                     help = 'Number of data classes')
16 | parser.add_argument('-ext','--extractor_type',type=str, 
17 |                     default = 'resnet', 
18 |                     help = 'Choice of deep feature extractor')                    
19 | parser.add_argument('-classif','--classifier_type',type=str, 
20 |                     default = 'KNN', 
21 |                     help = 'Choice of classifier for GA')
22 | 
23 | 
24 | 
25 | args = parser.parse_args()
26 | folder_path = args.data_folder
27 | out_classes = args.num_classes
28 | ext = args.extractor_type
29 | classif = args.classifier_type
30 | 
31 | 
32 | print("Extracting deep features...")
33 | print('\n'*2)
34 | array_train, array_val = extractor.feature_extractor(folder_path, ext, out_classes)
35 | print("Deep features extracted.")
36 | print('\n'*2)
37 | print('Applying Genetic Algorithm...')
38 | print('\n'*2)
39 | GA.algorithm(array_train, array_val, classif)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.10.0+cu102 torchvision==0.11.1+cu102 torchaudio===0.10.0+cu102 -f https://download.pytorch.org/whl/cu102/torch_stable.html
2 | matplotlib==3.2.1
3 | numpy==1.18.3
4 | pandas==1.0.3
5 | scikit_learn==0.24.2


--------------------------------------------------------------------------------