├── dataset ├── __init__.py ├── __pycache__ │ ├── ImgLoader.cpython-36.pyc │ ├── __init__.cpython-36.pyc │ ├── RGBImgLoader.cpython-36.pyc │ └── TestImgLoader.cpython-36.pyc ├── ImgLoader.py ├── RGBImgLoader.py └── TestImgLoader.py ├── resnet34.png ├── models ├── __pycache__ │ ├── vgg.cpython-36.pyc │ └── resnet.cpython-36.pyc ├── vgg.py ├── squeezenet.py ├── mobilenetv2.py ├── preactresnet.py ├── resnext.py ├── googlenet.py ├── shufflenetv2.py ├── densenet.py ├── senet.py ├── mobilenet.py ├── xception.py ├── rir.py ├── shufflenet.py ├── nasnet.py ├── inceptionv3.py ├── attention.py └── resnet.py ├── conf ├── __pycache__ │ ├── __init__.cpython-36.pyc │ └── global_settings.cpython-36.pyc ├── __init__.py └── global_settings.py ├── README.md ├── result_combine.py ├── roc.py ├── imbalanced_dataset_sampler.py ├── dataset.py ├── test_cefa.py ├── class_balanced_loss.py ├── lr_finder.py ├── test.py ├── utils.py ├── train.py ├── train_wo_CB.py ├── train_wo_CB_weight.py └── train_cefa.py /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /resnet34.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgqtmac/cvprw2020/HEAD/resnet34.png -------------------------------------------------------------------------------- /models/__pycache__/vgg.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgqtmac/cvprw2020/HEAD/models/__pycache__/vgg.cpython-36.pyc -------------------------------------------------------------------------------- /conf/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgqtmac/cvprw2020/HEAD/conf/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/resnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgqtmac/cvprw2020/HEAD/models/__pycache__/resnet.cpython-36.pyc -------------------------------------------------------------------------------- /dataset/__pycache__/ImgLoader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgqtmac/cvprw2020/HEAD/dataset/__pycache__/ImgLoader.cpython-36.pyc -------------------------------------------------------------------------------- /dataset/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgqtmac/cvprw2020/HEAD/dataset/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /conf/__pycache__/global_settings.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgqtmac/cvprw2020/HEAD/conf/__pycache__/global_settings.cpython-36.pyc -------------------------------------------------------------------------------- /dataset/__pycache__/RGBImgLoader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgqtmac/cvprw2020/HEAD/dataset/__pycache__/RGBImgLoader.cpython-36.pyc -------------------------------------------------------------------------------- /dataset/__pycache__/TestImgLoader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgqtmac/cvprw2020/HEAD/dataset/__pycache__/TestImgLoader.cpython-36.pyc -------------------------------------------------------------------------------- /conf/__init__.py: -------------------------------------------------------------------------------- 1 | """ dynamically load settings 2 | 3 | author baiyu 4 | """ 5 | import conf.global_settings as settings 6 | 7 | class Settings: 8 | def __init__(self, settings): 9 | 10 | for attr in dir(settings): 11 | if attr.isupper(): 12 | setattr(self, attr, getattr(settings, attr)) 13 | 14 | settings = Settings(settings) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CVPRW2020 2 | 3 | 4 | ## Requirements 5 | 6 | This is my experiment eviroument, pytorch0.4 should also be fine 7 | - python3.6 8 | - pytorch1.0 9 | - cuda8.0 10 | 11 | 12 | ## Usage 13 | 14 | ### 1. train the model 15 | 16 | ```bash 17 | $ python train_cefa.py 18 | ``` 19 | 20 | 21 | ### 2. test the model 22 | Test the model using test.py 23 | ```bash 24 | $ python test_cefa.py 25 | ``` 26 | 27 | ### 2. combine the result 28 | Test the model using test.py 29 | ```bash 30 | $ python result_combine.py 31 | ``` 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /conf/global_settings.py: -------------------------------------------------------------------------------- 1 | """ configurations for this project 2 | 3 | author baiyu 4 | """ 5 | import os 6 | from datetime import datetime 7 | 8 | #CIFAR100 dataset path (python version) 9 | #CIFAR100_PATH = '/nfs/private/cifar100/cifar-100-python' 10 | 11 | #mean and std of cifar100 dataset 12 | CIFAR100_TRAIN_MEAN = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343) 13 | CIFAR100_TRAIN_STD = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404) 14 | 15 | #CIFAR100_TEST_MEAN = (0.5088964127604166, 0.48739301317401956, 0.44194221124387256) 16 | #CIFAR100_TEST_STD = (0.2682515741720801, 0.2573637364478126, 0.2770957707973042) 17 | 18 | #directory to save weights file 19 | CHECKPOINT_PATH = 'checkpoint' 20 | 21 | #total training epoches 22 | EPOCH = 100 23 | MILESTONES = [20, 40, 60] 24 | 25 | #initial learning rate 26 | #INIT_LR = 0.1 27 | 28 | #time of we run the script 29 | TIME_NOW = datetime.now().isoformat() 30 | 31 | #tensorboard log dir 32 | LOG_DIR = 'runs' 33 | 34 | #save weights file per SAVE_EPOCH epoch 35 | SAVE_EPOCH = 10 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /result_combine.py: -------------------------------------------------------------------------------- 1 | # import pandas 2 | import os 3 | 4 | file_path = '4@all_test_res.txt' 5 | result_path = 'result@all.txt' 6 | root_folder = '/home/gqwang/Spoof_Croped/CASIA_CeFA' 7 | f_dev = open(os.path.join(root_folder, file_path)) 8 | dev_items = [line.rstrip('\n').split(' ') for line in open(os.path.join(root_folder, file_path)).readlines()] 9 | # dev_lines = f_dev.readlines() 10 | f_res = open(os.path.join(root_folder, result_path)) 11 | 12 | res_items = [line.rstrip('\n').split(' ') for line in open(os.path.join(root_folder, result_path))] 13 | 14 | # dev_item_total = 0. 15 | # dev_item_cnt = 0 16 | 17 | fp = open("./result_total_0227.txt", 'a+') 18 | 19 | 20 | for i in range(len(dev_items)): 21 | print(dev_items[i][0]) 22 | dev_item_total = 0. 23 | dev_item_cnt = 0 24 | for j in range(len(res_items)): 25 | if res_items[j][0].find(dev_items[i][0]) == 0: 26 | dev_item_cnt += 1 27 | dev_item_total += float(res_items[j][1]) 28 | dev_item_avg = float(dev_item_total / dev_item_cnt) 29 | newline = dev_items[i][0] + ' ' + "{:.8f}".format(dev_item_avg) + "\n" 30 | fp.write(newline) 31 | print(newline) 32 | 33 | # print('\n') 34 | 35 | fp.close() 36 | -------------------------------------------------------------------------------- /roc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from scipy import interpolate 4 | 5 | import matplotlib.pyplot as plt 6 | 7 | from sklearn.metrics import roc_curve 8 | from sklearn.metrics import roc_auc_score 9 | 10 | 11 | def cal_metric(groundTruth, predicted, save): 12 | groundTruth = np.array(groundTruth) 13 | predicted = np.array(predicted) 14 | fpr, tpr, thresholds = roc_curve(groundTruth, predicted) 15 | y = (tpr) 16 | x = (fpr) 17 | z = tpr +fpr 18 | tpr = tpr.reshape((tpr.shape[0],1)) 19 | fpr = fpr.reshape((fpr.shape[0],1)) 20 | xnew = np.arange(0, 1, 0.0000001) 21 | func = interpolate.interp1d(x, y) 22 | 23 | ynew = func(xnew) 24 | 25 | znew = abs(xnew + ynew-1) 26 | 27 | eer=xnew[np.argmin(znew)] 28 | 29 | # print('EER',eer) 30 | 31 | FPR = {"TPR(1.%)": 0.01, "TPR(.5%)": 0.005} 32 | 33 | TPRs = {"TPR(1.%)": 0.01, "TPR(.5%)": 0.005} 34 | for i, (key, value) in enumerate(FPR.items()): 35 | index = np.argwhere(xnew == value) 36 | 37 | score = ynew[index] 38 | 39 | TPRs[key] = float(np.squeeze(score)) 40 | # print(key, score) 41 | if save: 42 | plt.plot(xnew, ynew) 43 | plt.savefig('./Roc.png') 44 | auc = roc_auc_score(groundTruth, predicted) 45 | return eer,TPRs, auc, {'x':xnew, 'y':ynew} -------------------------------------------------------------------------------- /dataset/ImgLoader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.utils.data as data 4 | from PIL import Image 5 | 6 | def default_loader(path): 7 | return Image.open(path).convert('RGB') 8 | 9 | def YCbCr_loader(path): 10 | return Image.open(path).convert('YCbCr') 11 | 12 | class ImgLoader(data.Dataset): 13 | def __init__(self, root_folder, list_file, transform=None, loader1=default_loader, loader2=YCbCr_loader, stage='Train'): 14 | self.root_folder = root_folder 15 | self.loader1 = loader1 16 | self.loader2 = YCbCr_loader 17 | self.transform = transform 18 | 19 | items = [] 20 | 21 | if stage == 'Train': 22 | fp_items = [line.rstrip('\n').split(' ') for line in open(list_file)] 23 | for file_name, label in fp_items: 24 | if os.path.isfile(os.path.join(root_folder, file_name)): 25 | tup = (file_name, int(label)) 26 | items.append(tup) 27 | else: 28 | 29 | f = open(list_file) 30 | lines = f.readlines() 31 | 32 | for line in lines: 33 | line = line.strip().split(' ') 34 | items.append(line[0]) 35 | 36 | 37 | # fp_items = [line.rstrip('\n').split(' ') for line in open(list_file)] 38 | # for file_name in fp_items: 39 | # if os.path.isfile(os.path.join(root_folder, file_name)): 40 | # # tup = (file_name) 41 | # items.append(file_name) 42 | 43 | self.items = items 44 | print('\nStage: ' + stage) 45 | print('The number of samples: {}'.format(len(items))) 46 | 47 | def __getitem__(self, index): 48 | image, label = self.items[index] 49 | img = self.loader1(os.path.join(self.root_folder, image)) 50 | # img2 = self.loader2(os.path.join(self.root_folder, image)) 51 | if self.transform is not None: 52 | img = self.transform(img) 53 | # img2 = self.transform(img2) 54 | 55 | return img, label 56 | 57 | def __len__(self): 58 | return len(self.items) 59 | -------------------------------------------------------------------------------- /dataset/RGBImgLoader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.utils.data as data 4 | from PIL import Image 5 | import torchvision.transforms as transforms 6 | 7 | 8 | def default_loader(path): 9 | return Image.open(path).convert('RGB') 10 | 11 | 12 | class RGBImgLoader(data.Dataset): 13 | def __init__(self, root_folder, list_file, transform=None, loader=default_loader, stage='Train'): 14 | self.root_folder = root_folder 15 | self.loader = loader 16 | self.transform = transform 17 | 18 | items = [] 19 | 20 | fp_items = [line.rstrip('\n').split(' ') for line in open(list_file)] 21 | for file_color, file_depth, file_ir, label in fp_items: 22 | if os.path.isfile(os.path.join(root_folder, file_color)): 23 | tup = (file_color, int(label)) 24 | items.append(tup) 25 | # if os.path.isfile(os.path.join(root_folder, file_depth)): 26 | # tup = (file_depth, int(label)) 27 | # if os.path.isfile(os.path.join(root_folder, file_ir)): 28 | # tup = (file_ir, int(label)) 29 | self.items = items 30 | print('\nStage: ' + stage) 31 | print('The number of samples: {}'.format(len(items))) 32 | 33 | def __getitem__(self, index): 34 | image, label = self.items[index] 35 | img = self.loader(os.path.join(self.root_folder, image)) 36 | if self.transform is not None: 37 | img = self.transform(img) 38 | 39 | return img, label 40 | 41 | def __len__(self): 42 | return len(self.items) 43 | 44 | 45 | # if __name__ == '__main__': 46 | # print torch.__version__ 47 | # src_dataset = ColorImgLoader(params.root_folder, os.path.join(params.root_folder, params.src_train_list), 48 | # transforms.Compose([ 49 | # transforms.Resize(256), 50 | # transforms.RandomCrop(248), 51 | # transforms.RandomHorizontalFlip(), 52 | # transforms.ToTensor() 53 | # ])) 54 | 55 | -------------------------------------------------------------------------------- /imbalanced_dataset_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data 3 | import torchvision 4 | 5 | 6 | class ImbalancedDatasetSampler(torch.utils.data.sampler.Sampler): 7 | """Samples elements randomly from a given list of indices for imbalanced dataset 8 | Arguments: 9 | indices (list, optional): a list of indices 10 | num_samples (int, optional): number of samples to draw 11 | """ 12 | 13 | def __init__(self, dataset, indices=None, num_samples=None): 14 | 15 | # if indices is not provided, 16 | # all elements in the dataset will be considered 17 | self.indices = list(range(len(dataset))) \ 18 | if indices is None else indices 19 | 20 | # if num_samples is not provided, 21 | # draw `len(indices)` samples in each iteration 22 | self.num_samples = len(self.indices) \ 23 | if num_samples is None else num_samples 24 | 25 | # distribution of classes in the dataset 26 | label_to_count = {} 27 | for idx in self.indices: 28 | label = self._get_label(dataset, idx) 29 | if label in label_to_count: 30 | label_to_count[label] += 1 31 | else: 32 | label_to_count[label] = 1 33 | 34 | # weight for each sample 35 | weights = [1.0 / label_to_count[self._get_label(dataset, idx)] 36 | for idx in self.indices] 37 | self.weights = torch.DoubleTensor(weights) 38 | 39 | def _get_label(self, dataset, idx): 40 | dataset_type = type(dataset) 41 | return dataset.items[idx][1] 42 | # if dataset_type is torchvision.datasets.MNIST: 43 | # return dataset.train_labels[idx].item() 44 | # elif dataset_type is torchvision.datasets.ImageFolder: 45 | # return dataset.imgs[idx][1] 46 | # else: 47 | # raise NotImplementedError 48 | 49 | def __iter__(self): 50 | return (self.indices[i] for i in torch.multinomial( 51 | self.weights, self.num_samples, replacement=True)) 52 | 53 | def __len__(self): 54 | return self.num_samples 55 | 56 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | """ train and test dataset 2 | 3 | author baiyu 4 | """ 5 | import os 6 | import sys 7 | import pickle 8 | 9 | from skimage import io 10 | import matplotlib.pyplot as plt 11 | import numpy 12 | import torch 13 | from torch.utils.data import Dataset 14 | 15 | class CIFAR100Train(Dataset): 16 | """cifar100 test dataset, derived from 17 | torch.utils.data.DataSet 18 | """ 19 | 20 | def __init__(self, path, transform=None): 21 | #if transform is given, we transoform data using 22 | with open(os.path.join(path, 'train'), 'rb') as cifar100: 23 | self.data = pickle.load(cifar100, encoding='bytes') 24 | self.transform = transform 25 | 26 | def __len__(self): 27 | return len(self.data['fine_labels'.encode()]) 28 | 29 | def __getitem__(self, index): 30 | label = self.data['fine_labels'.encode()][index] 31 | r = self.data['data'.encode()][index, :1024].reshape(32, 32) 32 | g = self.data['data'.encode()][index, 1024:2048].reshape(32, 32) 33 | b = self.data['data'.encode()][index, 2048:].reshape(32, 32) 34 | image = numpy.dstack((r, g, b)) 35 | 36 | if self.transform: 37 | image = self.transform(image) 38 | return label, image 39 | 40 | class CIFAR100Test(Dataset): 41 | """cifar100 test dataset, derived from 42 | torch.utils.data.DataSet 43 | """ 44 | 45 | def __init__(self, path, transform=None): 46 | with open(os.path.join(path, 'test'), 'rb') as cifar100: 47 | self.data = pickle.load(cifar100, encoding='bytes') 48 | self.transform = transform 49 | 50 | def __len__(self): 51 | return len(self.data['data'.encode()]) 52 | 53 | def __getitem__(self, index): 54 | label = self.data['fine_labels'.encode()][index] 55 | r = self.data['data'.encode()][index, :1024].reshape(32, 32) 56 | g = self.data['data'.encode()][index, 1024:2048].reshape(32, 32) 57 | b = self.data['data'.encode()][index, 2048:].reshape(32, 32) 58 | image = numpy.dstack((r, g, b)) 59 | 60 | if self.transform: 61 | image = self.transform(image) 62 | return label, image 63 | 64 | -------------------------------------------------------------------------------- /dataset/TestImgLoader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.utils.data as data 4 | from PIL import Image 5 | 6 | 7 | def default_loader(path): 8 | return Image.open(path).convert('RGB') 9 | 10 | 11 | def YCbCr_loader(path): 12 | return Image.open(path).convert('YCbCr') 13 | 14 | 15 | class ImgLoader(data.Dataset): 16 | def __init__(self, root_folder, list_file, transform=None, loader1=default_loader, loader2=YCbCr_loader, 17 | stage='Train'): 18 | self.root_folder = root_folder 19 | self.loader1 = loader1 20 | self.loader2 = YCbCr_loader 21 | self.transform = transform 22 | 23 | items = [] 24 | 25 | if stage == 'Train': 26 | fp_items = [line.rstrip('\n').split(' ') for line in open(list_file)] 27 | for file_name, label in fp_items: 28 | if os.path.isfile(os.path.join(root_folder, file_name)): 29 | tup = (file_name, int(label)) 30 | items.append(tup) 31 | elif stage == 'Test': 32 | 33 | f = open(list_file) 34 | lines = f.readlines() 35 | 36 | for line in lines: 37 | line = line.strip().split(' ') 38 | test_dir = os.listdir(os.path.join(root_folder,line[0],'profile')) 39 | for i in range(len(test_dir)): 40 | items.append(os.path.join(line[0], 'profile', test_dir[i])) 41 | else: 42 | 43 | f = open(list_file) 44 | lines = f.readlines() 45 | 46 | for line in lines: 47 | line = line.strip().split(' ') 48 | items.append(line[0]) 49 | 50 | # fp_items = [line.rstrip('\n').split(' ') for line in open(list_file)] 51 | # for file_name in fp_items: 52 | # if os.path.isfile(os.path.join(root_folder, file_name)): 53 | # # tup = (file_name) 54 | # items.append(file_name) 55 | 56 | self.items = items 57 | print('\nStage: ' + stage) 58 | print('The number of samples: {}'.format(len(items))) 59 | 60 | def __getitem__(self, index): 61 | image = self.items[index] 62 | img = self.loader1(os.path.join(self.root_folder, image)) 63 | # img2 = self.loader2(os.path.join(self.root_folder, image)) 64 | if self.transform is not None: 65 | img = self.transform(img) 66 | # img2 = self.transform(img2) 67 | 68 | return img, image 69 | 70 | def __len__(self): 71 | return len(self.items) 72 | -------------------------------------------------------------------------------- /models/vgg.py: -------------------------------------------------------------------------------- 1 | """vgg in pytorch 2 | [1] Karen Simonyan, Andrew Zisserman 3 | Very Deep Convolutional Networks for Large-Scale Image Recognition. 4 | https://arxiv.org/abs/1409.1556v6 5 | """ 6 | '''VGG11/13/16/19 in Pytorch.''' 7 | 8 | import torch 9 | import torch.nn as nn 10 | 11 | cfg = { 12 | 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 13 | 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 14 | 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 15 | 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'] 16 | } 17 | 18 | 19 | class VGG(nn.Module): 20 | 21 | def __init__(self, features, num_class=2): 22 | super().__init__() 23 | self.features = features 24 | 25 | self.classifier = nn.Sequential( 26 | nn.Linear(512*7*7, 4096), 27 | nn.ReLU(inplace=True), 28 | nn.Dropout(), 29 | nn.Linear(4096, 4096), 30 | nn.ReLU(inplace=True), 31 | nn.Dropout(), 32 | nn.Linear(4096, num_class) 33 | ) 34 | 35 | def forward(self, x): 36 | output = self.features(x) 37 | output = output.view(output.size()[0], -1) 38 | output = self.classifier(output) 39 | 40 | return output 41 | 42 | 43 | def make_layers(cfg, batch_norm=False): 44 | layers = [] 45 | 46 | input_channel = 3 47 | for l in cfg: 48 | if l == 'M': 49 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 50 | continue 51 | 52 | layers += [nn.Conv2d(input_channel, l, kernel_size=3, padding=1)] 53 | 54 | if batch_norm: 55 | layers += [nn.BatchNorm2d(l)] 56 | 57 | layers += [nn.ReLU(inplace=True)] 58 | input_channel = l 59 | 60 | return nn.Sequential(*layers) 61 | 62 | 63 | def vgg11_bn(): 64 | return VGG(make_layers(cfg['A'], batch_norm=True)) 65 | 66 | 67 | def vgg13_bn(): 68 | return VGG(make_layers(cfg['B'], batch_norm=True)) 69 | 70 | 71 | def vgg16_bn(): 72 | return VGG(make_layers(cfg['D'], batch_norm=True)) 73 | 74 | 75 | def vgg19_bn(): 76 | return VGG(make_layers(cfg['E'], batch_norm=True)) 77 | 78 | if __name__ == '__main__': 79 | # 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19' 80 | # Example 81 | net11 = vgg11_bn() 82 | 83 | x = torch.autograd.Variable(torch.Tensor(2, 3, 248, 248)) 84 | # model = resnet80(num_classes=41857) 85 | print(net11) 86 | out = net11(x) 87 | print(out.shape) -------------------------------------------------------------------------------- /models/squeezenet.py: -------------------------------------------------------------------------------- 1 | """squeezenet in pytorch 2 | 3 | 4 | 5 | [1] Song Han, Jeff Pool, John Tran, William J. Dally 6 | 7 | squeezenet: Learning both Weights and Connections for Efficient Neural Networks 8 | https://arxiv.org/abs/1506.02626 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | 15 | class Fire(nn.Module): 16 | 17 | def __init__(self, in_channel, out_channel, squzee_channel): 18 | 19 | super().__init__() 20 | self.squeeze = nn.Sequential( 21 | nn.Conv2d(in_channel, squzee_channel, 1), 22 | nn.BatchNorm2d(squzee_channel), 23 | nn.ReLU(inplace=True) 24 | ) 25 | 26 | self.expand_1x1 = nn.Sequential( 27 | nn.Conv2d(squzee_channel, int(out_channel / 2), 1), 28 | nn.BatchNorm2d(int(out_channel / 2)), 29 | nn.ReLU(inplace=True) 30 | ) 31 | 32 | self.expand_3x3 = nn.Sequential( 33 | nn.Conv2d(squzee_channel, int(out_channel / 2), 3, padding=1), 34 | nn.BatchNorm2d(int(out_channel / 2)), 35 | nn.ReLU(inplace=True) 36 | ) 37 | 38 | def forward(self, x): 39 | 40 | x = self.squeeze(x) 41 | x = torch.cat([ 42 | self.expand_1x1(x), 43 | self.expand_3x3(x) 44 | ], 1) 45 | 46 | return x 47 | 48 | class SqueezeNet(nn.Module): 49 | 50 | """mobile net with simple bypass""" 51 | def __init__(self, class_num=100): 52 | 53 | super().__init__() 54 | self.stem = nn.Sequential( 55 | nn.Conv2d(3, 96, 3, padding=1), 56 | nn.BatchNorm2d(96), 57 | nn.ReLU(inplace=True), 58 | nn.MaxPool2d(2, 2) 59 | ) 60 | 61 | self.fire2 = Fire(96, 128, 16) 62 | self.fire3 = Fire(128, 128, 16) 63 | self.fire4 = Fire(128, 256, 32) 64 | self.fire5 = Fire(256, 256, 32) 65 | self.fire6 = Fire(256, 384, 48) 66 | self.fire7 = Fire(384, 384, 48) 67 | self.fire8 = Fire(384, 512, 64) 68 | self.fire9 = Fire(512, 512, 64) 69 | 70 | self.conv10 = nn.Conv2d(512, class_num, 1) 71 | self.avg = nn.AdaptiveAvgPool2d(1) 72 | self.maxpool = nn.MaxPool2d(2, 2) 73 | 74 | def forward(self, x): 75 | x = self.stem(x) 76 | 77 | f2 = self.fire2(x) 78 | f3 = self.fire3(f2) + f2 79 | f4 = self.fire4(f3) 80 | f4 = self.maxpool(f4) 81 | 82 | f5 = self.fire5(f4) + f4 83 | f6 = self.fire6(f5) 84 | f7 = self.fire7(f6) + f6 85 | f8 = self.fire8(f7) 86 | f8 = self.maxpool(f8) 87 | 88 | f9 = self.fire9(f8) 89 | c10 = self.conv10(f9) 90 | 91 | x = self.avg(c10) 92 | x = x.view(x.size(0), -1) 93 | 94 | return x 95 | 96 | def squeezenet(class_num=100): 97 | return SqueezeNet(class_num=class_num) 98 | -------------------------------------------------------------------------------- /models/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | """mobilenetv2 in pytorch 2 | 3 | 4 | 5 | [1] Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen 6 | 7 | MobileNetV2: Inverted Residuals and Linear Bottlenecks 8 | https://arxiv.org/abs/1801.04381 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | 16 | class LinearBottleNeck(nn.Module): 17 | 18 | def __init__(self, in_channels, out_channels, stride, t=6, class_num=100): 19 | super().__init__() 20 | 21 | self.residual = nn.Sequential( 22 | nn.Conv2d(in_channels, in_channels * t, 1), 23 | nn.BatchNorm2d(in_channels * t), 24 | nn.ReLU6(inplace=True), 25 | 26 | nn.Conv2d(in_channels * t, in_channels * t, 3, stride=stride, padding=1, groups=in_channels * t), 27 | nn.BatchNorm2d(in_channels * t), 28 | nn.ReLU6(inplace=True), 29 | 30 | nn.Conv2d(in_channels * t, out_channels, 1), 31 | nn.BatchNorm2d(out_channels) 32 | ) 33 | 34 | self.stride = stride 35 | self.in_channels = in_channels 36 | self.out_channels = out_channels 37 | 38 | def forward(self, x): 39 | 40 | residual = self.residual(x) 41 | 42 | if self.stride == 1 and self.in_channels == self.out_channels: 43 | residual += x 44 | 45 | return residual 46 | 47 | class MobileNetV2(nn.Module): 48 | 49 | def __init__(self, class_num=100): 50 | super().__init__() 51 | 52 | self.pre = nn.Sequential( 53 | nn.Conv2d(3, 32, 1, padding=1), 54 | nn.BatchNorm2d(32), 55 | nn.ReLU6(inplace=True) 56 | ) 57 | 58 | self.stage1 = LinearBottleNeck(32, 16, 1, 1) 59 | self.stage2 = self._make_stage(2, 16, 24, 2, 6) 60 | self.stage3 = self._make_stage(3, 24, 32, 2, 6) 61 | self.stage4 = self._make_stage(4, 32, 64, 2, 6) 62 | self.stage5 = self._make_stage(3, 64, 96, 1, 6) 63 | self.stage6 = self._make_stage(3, 96, 160, 1, 6) 64 | self.stage7 = LinearBottleNeck(160, 320, 1, 6) 65 | 66 | self.conv1 = nn.Sequential( 67 | nn.Conv2d(320, 1280, 1), 68 | nn.BatchNorm2d(1280), 69 | nn.ReLU6(inplace=True) 70 | ) 71 | 72 | self.conv2 = nn.Conv2d(1280, class_num, 1) 73 | 74 | def forward(self, x): 75 | x = self.pre(x) 76 | x = self.stage1(x) 77 | x = self.stage2(x) 78 | x = self.stage3(x) 79 | x = self.stage4(x) 80 | x = self.stage5(x) 81 | x = self.stage6(x) 82 | x = self.stage7(x) 83 | x = self.conv1(x) 84 | x = F.adaptive_avg_pool2d(x, 1) 85 | x = self.conv2(x) 86 | x = x.view(x.size(0), -1) 87 | 88 | return x 89 | 90 | def _make_stage(self, repeat, in_channels, out_channels, stride, t): 91 | 92 | layers = [] 93 | layers.append(LinearBottleNeck(in_channels, out_channels, stride, t)) 94 | 95 | while repeat - 1: 96 | layers.append(LinearBottleNeck(out_channels, out_channels, 1, t)) 97 | repeat -= 1 98 | 99 | return nn.Sequential(*layers) 100 | 101 | def mobilenetv2(): 102 | return MobileNetV2() -------------------------------------------------------------------------------- /test_cefa.py: -------------------------------------------------------------------------------- 1 | #test.py 2 | #!/usr/bin/env python3 3 | 4 | """ test neuron network performace 5 | print top1 and top5 err on test dataset 6 | of a model 7 | 8 | author baiyu 9 | """ 10 | 11 | import argparse 12 | #from dataset import * 13 | 14 | #from skimage import io 15 | from matplotlib import pyplot as plt 16 | 17 | import torch 18 | import torchvision.transforms as transforms 19 | from torch.utils.data import DataLoader 20 | from torch.autograd import Variable 21 | # from dataset.ImgLoader import ImgLoader 22 | from dataset.TestImgLoader import ImgLoader 23 | import os 24 | import roc 25 | import numpy as np 26 | 27 | from conf import settings 28 | from utils import get_network, get_test_dataloader 29 | 30 | if __name__ == '__main__': 31 | 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument('-net', type=str, default='resnet18', help='net type') 34 | parser.add_argument('-weights', type=str, default='./checkpoint/resnet18/2020-02-26T23:06:01.904184/resnet18-42-best.pth', help='the weights file you want to test') 35 | parser.add_argument('-gpu', type=bool, default=True, help='use gpu or not') 36 | parser.add_argument('-w', type=int, default=2, help='number of workers for dataloader') 37 | parser.add_argument('-b', type=int, default=1, help='batch size for dataloader') 38 | parser.add_argument('-s', type=bool, default=True, help='whether shuffle the dataset') 39 | parser.add_argument('-test_list', type=str, default='4@all_test_res.txt', help='initial learning rate') 40 | parser.add_argument('-root_folder', type=str, default='/home/gqwang/Spoof_Croped/CASIA_CeFA', help='initial learning rate') 41 | args = parser.parse_args() 42 | 43 | net = get_network(args) 44 | 45 | test_dataset = ImgLoader(args.root_folder, os.path.join(args.root_folder, args.test_list), 46 | transforms.Compose([ 47 | transforms.Resize(248), 48 | # transforms.RandomAffine(10), 49 | transforms.CenterCrop(248), 50 | transforms.RandomRotation(15), 51 | transforms.ToTensor() 52 | # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 53 | 54 | ]), stage='Test') 55 | test_loader = torch.utils.data.DataLoader(test_dataset, 56 | batch_size=args.b, 57 | num_workers=2, 58 | pin_memory=True) 59 | 60 | net.load_state_dict(torch.load(args.weights), args.gpu) 61 | print(net) 62 | net.eval() 63 | 64 | correct = 0.0 65 | total = 0 66 | 67 | result_list = [] 68 | label_list = [] 69 | 70 | fp = open("./result@all.txt", 'a+') 71 | 72 | for n_iter, (image, image_name) in enumerate(test_loader): 73 | print("iteration: {}\ttotal {} iterations".format(n_iter + 1, len(test_loader))) 74 | 75 | image = Variable(image).cuda() 76 | outputs = net(image) 77 | 78 | probability = torch.nn.functional.softmax(outputs, dim=1)[:, 1].detach().tolist() 79 | probability_value = np.array(probability) 80 | 81 | newline = image_name[0] + ' ' + "{:.8f}".format(probability_value[0]) + "\n" 82 | fp.write(newline) 83 | print(newline) 84 | 85 | fp.close() 86 | 87 | print() 88 | -------------------------------------------------------------------------------- /class_balanced_loss.py: -------------------------------------------------------------------------------- 1 | """Pytorch implementation of Class-Balanced-Loss 2 | Reference: "Class-Balanced Loss Based on Effective Number of Samples" 3 | Authors: Yin Cui and 4 | Menglin Jia and 5 | Tsung Yi Lin and 6 | Yang Song and 7 | Serge J. Belongie 8 | https://arxiv.org/abs/1901.05555, CVPR'19. 9 | """ 10 | 11 | 12 | import numpy as np 13 | import torch 14 | import torch.nn.functional as F 15 | 16 | 17 | 18 | def focal_loss(labels, logits, alpha, gamma): 19 | """Compute the focal loss between `logits` and the ground truth `labels`. 20 | 21 | Focal loss = -alpha_t * (1-pt)^gamma * log(pt) 22 | where pt is the probability of being classified to the true class. 23 | pt = p (if true class), otherwise pt = 1 - p. p = sigmoid(logit). 24 | 25 | Args: 26 | labels: A float tensor of size [batch, num_classes]. 27 | logits: A float tensor of size [batch, num_classes]. 28 | alpha: A float tensor of size [batch_size] 29 | specifying per-example weight for balanced cross entropy. 30 | gamma: A float scalar modulating loss from hard and easy examples. 31 | 32 | Returns: 33 | focal_loss: A float32 scalar representing normalized total loss. 34 | """ 35 | BCLoss = F.binary_cross_entropy_with_logits(input = logits, target = labels,reduction = "none") 36 | 37 | if gamma == 0.0: 38 | modulator = 1.0 39 | else: 40 | modulator = torch.exp(-gamma * labels * logits - gamma * torch.log(1 + 41 | torch.exp(-1.0 * logits))) 42 | 43 | loss = modulator * BCLoss 44 | 45 | weighted_loss = alpha * loss 46 | focal_loss = torch.sum(weighted_loss) 47 | 48 | focal_loss /= torch.sum(labels) 49 | return focal_loss 50 | 51 | 52 | 53 | def CB_loss(labels, logits, samples_per_cls, no_of_classes, loss_type, beta, gamma): 54 | """Compute the Class Balanced Loss between `logits` and the ground truth `labels`. 55 | 56 | Class Balanced Loss: ((1-beta)/(1-beta^n))*Loss(labels, logits) 57 | where Loss is one of the standard losses used for Neural Networks. 58 | 59 | Args: 60 | labels: A int tensor of size [batch]. 61 | logits: A float tensor of size [batch, no_of_classes]. 62 | samples_per_cls: A python list of size [no_of_classes]. 63 | no_of_classes: total number of classes. int 64 | loss_type: string. One of "sigmoid", "focal", "softmax". 65 | beta: float. Hyperparameter for Class balanced loss. 66 | gamma: float. Hyperparameter for Focal loss. 67 | 68 | Returns: 69 | cb_loss: A float tensor representing class balanced loss 70 | """ 71 | effective_num = 1.0 - np.power(beta, samples_per_cls) 72 | weights = (1.0 - beta) / np.array(effective_num) 73 | weights = weights / np.sum(weights) * no_of_classes 74 | 75 | labels_one_hot = F.one_hot(labels, no_of_classes).float() 76 | 77 | weights = torch.tensor(weights).float().cuda() 78 | weights = weights.unsqueeze(0) 79 | weights = weights.repeat(labels_one_hot.shape[0],1) * labels_one_hot 80 | weights = weights.sum(1) 81 | weights = weights.unsqueeze(1) 82 | weights = weights.repeat(1,no_of_classes) 83 | 84 | if loss_type == "focal": 85 | cb_loss = focal_loss(labels_one_hot, logits, weights, gamma) 86 | elif loss_type == "sigmoid": 87 | cb_loss = F.binary_cross_entropy_with_logits(input = logits,target = labels_one_hot, weights = weights) 88 | elif loss_type == "softmax": 89 | pred = logits.softmax(dim = 1) 90 | cb_loss = F.binary_cross_entropy(input = pred, target = labels_one_hot, weight = weights) 91 | return cb_loss 92 | 93 | 94 | 95 | if __name__ == '__main__': 96 | no_of_classes = 5 97 | logits = torch.rand(10,no_of_classes).float() 98 | labels = torch.randint(0,no_of_classes, size = (10,)) 99 | beta = 0.9999 100 | gamma = 2.0 101 | samples_per_cls = [2,3,1,2,2] 102 | loss_type = "focal" 103 | cb_loss = CB_loss(labels, logits, samples_per_cls, no_of_classes,loss_type, beta, gamma) 104 | print(cb_loss) 105 | -------------------------------------------------------------------------------- /lr_finder.py: -------------------------------------------------------------------------------- 1 | 2 | import argparse 3 | import glob 4 | import os 5 | 6 | import cv2 7 | import torch 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from torch.utils.data import DataLoader 11 | import numpy as np 12 | 13 | #from PIL import Image 14 | #import transforms 15 | from torchvision import transforms 16 | #from tensorboardX import SummaryWriter 17 | from conf import settings 18 | from utils import * 19 | 20 | import matplotlib 21 | matplotlib.use('Agg') 22 | import matplotlib.pyplot as plt 23 | 24 | 25 | from torch.optim.lr_scheduler import _LRScheduler 26 | 27 | 28 | class FindLR(_LRScheduler): 29 | """exponentially increasing learning rate 30 | 31 | Args: 32 | optimizer: optimzier(e.g. SGD) 33 | num_iter: totoal_iters 34 | max_lr: maximum learning rate 35 | """ 36 | def __init__(self, optimizer, max_lr=10, num_iter=100, last_epoch=-1): 37 | 38 | self.total_iters = num_iter 39 | self.max_lr = max_lr 40 | super().__init__(optimizer, last_epoch) 41 | 42 | def get_lr(self): 43 | 44 | return [base_lr * (self.max_lr / base_lr) ** (self.last_epoch / (self.total_iters + 1e-32)) for base_lr in self.base_lrs] 45 | 46 | if __name__ == '__main__': 47 | parser = argparse.ArgumentParser() 48 | parser.add_argument('-net', type=str, required=True, help='net type') 49 | parser.add_argument('-w', type=int, default=2, help='number of workers for dataloader') 50 | parser.add_argument('-b', type=int, default=64, help='batch size for dataloader') 51 | parser.add_argument('-base_lr', type=float, default=1e-7, help='min learning rate') 52 | parser.add_argument('-max_lr', type=float, default=10, help='max learning rate') 53 | parser.add_argument('-num_iter', type=int, default=100, help='num of iteration') 54 | parser.add_argument('-gpus', nargs='+', type=int, default=0, help='gpu device') 55 | args = parser.parse_args() 56 | 57 | cifar100_training_loader = get_training_dataloader( 58 | settings.CIFAR100_TRAIN_MEAN, 59 | settings.CIFAR100_TRAIN_STD, 60 | num_workers=args.w, 61 | batch_size=args.b, 62 | ) 63 | 64 | net = get_network(args) 65 | 66 | loss_function = nn.CrossEntropyLoss() 67 | optimizer = optim.SGD(net.parameters(), lr=args.base_lr, momentum=0.9, weight_decay=1e-4, nesterov=True) 68 | 69 | #set up warmup phase learning rate scheduler 70 | lr_scheduler = FindLR(optimizer, max_lr=args.max_lr, num_iter=args.num_iter) 71 | epoches = int(args.num_iter / len(cifar100_training_loader)) + 1 72 | 73 | n = 0 74 | 75 | learning_rate = [] 76 | losses = [] 77 | for epoch in range(epoches): 78 | 79 | #training procedure 80 | net.train() 81 | 82 | for batch_index, (images, labels) in enumerate(cifar100_training_loader): 83 | if n > args.num_iter: 84 | break 85 | 86 | lr_scheduler.step() 87 | 88 | images = images.cuda() 89 | labels = labels.cuda() 90 | 91 | optimizer.zero_grad() 92 | predicts = net(images) 93 | loss = loss_function(predicts, labels) 94 | if torch.isnan(loss).any(): 95 | n += 1e8 96 | break 97 | loss.backward() 98 | optimizer.step() 99 | 100 | print('Iterations: {iter_num} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.8f}'.format( 101 | loss.item(), 102 | optimizer.param_groups[0]['lr'], 103 | iter_num=n, 104 | trained_samples=batch_index * args.b + len(images), 105 | total_samples=len(cifar100_training_loader.dataset), 106 | )) 107 | 108 | learning_rate.append(optimizer.param_groups[0]['lr']) 109 | losses.append(loss.item()) 110 | n += 1 111 | 112 | learning_rate = learning_rate[10:-5] 113 | losses = losses[10:-5] 114 | 115 | fig, ax = plt.subplots(1,1) 116 | ax.plot(learning_rate, losses) 117 | ax.set_xlabel('learning rate') 118 | ax.set_ylabel('losses') 119 | ax.set_xscale('log') 120 | ax.xaxis.set_major_formatter(plt.FormatStrFormatter('%.0e')) 121 | 122 | fig.savefig('result.jpg') 123 | -------------------------------------------------------------------------------- /models/preactresnet.py: -------------------------------------------------------------------------------- 1 | """preactresnet in pytorch 2 | 3 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 4 | 5 | Identity Mappings in Deep Residual Networks 6 | https://arxiv.org/abs/1603.05027 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | class PreActBasic(nn.Module): 14 | 15 | expansion = 1 16 | def __init__(self, in_channels, out_channels, stride): 17 | super().__init__() 18 | self.residual = nn.Sequential( 19 | nn.BatchNorm2d(in_channels), 20 | nn.ReLU(inplace=True), 21 | nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1), 22 | nn.BatchNorm2d(out_channels), 23 | nn.ReLU(inplace=True), 24 | nn.Conv2d(out_channels, out_channels * PreActBasic.expansion, kernel_size=3, padding=1) 25 | ) 26 | 27 | self.shortcut = nn.Sequential() 28 | if stride != 1 or in_channels != out_channels * PreActBasic.expansion: 29 | self.shortcut = nn.Conv2d(in_channels, out_channels * PreActBasic.expansion, 1, stride=stride) 30 | 31 | def forward(self, x): 32 | 33 | res = self.residual(x) 34 | shortcut = self.shortcut(x) 35 | 36 | return res + shortcut 37 | 38 | 39 | class PreActBottleNeck(nn.Module): 40 | 41 | expansion = 4 42 | def __init__(self, in_channels, out_channels, stride): 43 | super().__init__() 44 | 45 | self.residual = nn.Sequential( 46 | nn.BatchNorm2d(in_channels), 47 | nn.ReLU(inplace=True), 48 | nn.Conv2d(in_channels, out_channels, 1, stride=stride), 49 | 50 | nn.BatchNorm2d(out_channels), 51 | nn.ReLU(inplace=True), 52 | nn.Conv2d(out_channels, out_channels, 3, padding=1), 53 | 54 | nn.BatchNorm2d(out_channels), 55 | nn.ReLU(inplace=True), 56 | nn.Conv2d(out_channels, out_channels * PreActBottleNeck.expansion, 1) 57 | ) 58 | 59 | self.shortcut = nn.Sequential() 60 | 61 | if stride != 1 or in_channels != out_channels * PreActBottleNeck.expansion: 62 | self.shortcut = nn.Conv2d(in_channels, out_channels * PreActBottleNeck.expansion, 1, stride=stride) 63 | 64 | def forward(self, x): 65 | 66 | res = self.residual(x) 67 | shortcut = self.shortcut(x) 68 | 69 | return res + shortcut 70 | 71 | class PreActResNet(nn.Module): 72 | 73 | def __init__(self, block, num_block, class_num=100): 74 | super().__init__() 75 | self.input_channels = 64 76 | 77 | self.pre = nn.Sequential( 78 | nn.Conv2d(3, 64, 3, padding=1), 79 | nn.BatchNorm2d(64), 80 | nn.ReLU(inplace=True) 81 | ) 82 | 83 | self.stage1 = self._make_layers(block, num_block[0], 64, 1) 84 | self.stage2 = self._make_layers(block, num_block[1], 128, 2) 85 | self.stage3 = self._make_layers(block, num_block[2], 256, 2) 86 | self.stage4 = self._make_layers(block, num_block[3], 512, 2) 87 | 88 | self.linear = nn.Linear(self.input_channels, class_num) 89 | 90 | def _make_layers(self, block, block_num, out_channels, stride): 91 | layers = [] 92 | 93 | layers.append(block(self.input_channels, out_channels, stride)) 94 | self.input_channels = out_channels * block.expansion 95 | 96 | while block_num - 1: 97 | layers.append(block(self.input_channels, out_channels, 1)) 98 | self.input_channels = out_channels * block.expansion 99 | block_num -= 1 100 | 101 | return nn.Sequential(*layers) 102 | 103 | def forward(self, x): 104 | x = self.pre(x) 105 | 106 | x = self.stage1(x) 107 | x = self.stage2(x) 108 | x = self.stage3(x) 109 | x = self.stage4(x) 110 | 111 | x = F.adaptive_avg_pool2d(x, 1) 112 | x = x.view(x.size(0), -1) 113 | x = self.linear(x) 114 | 115 | return x 116 | 117 | def preactresnet18(): 118 | return PreActResNet(PreActBasic, [2, 2, 2, 2]) 119 | 120 | def preactresnet34(): 121 | return PreActResNet(PreActBasic, [3, 4, 6, 3]) 122 | 123 | def preactresnet50(): 124 | return PreActResNet(PreActBottleNeck, [3, 4, 6, 3]) 125 | 126 | def preactresnet101(): 127 | return PreActResNet(PreActBottleNeck, [3, 4, 23, 3]) 128 | 129 | def preactresnet152(): 130 | return PreActResNet(PreActBottleNeck, [3, 8, 36, 3]) 131 | 132 | -------------------------------------------------------------------------------- /models/resnext.py: -------------------------------------------------------------------------------- 1 | """resnext in pytorch 2 | 3 | 4 | 5 | [1] Saining Xie, Ross Girshick, Piotr Dollár, Zhuowen Tu, Kaiming He. 6 | 7 | Aggregated Residual Transformations for Deep Neural Networks 8 | https://arxiv.org/abs/1611.05431 9 | """ 10 | 11 | import math 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | 16 | #only implements ResNext bottleneck c 17 | 18 | 19 | #"""This strategy exposes a new dimension, which we call “cardinality” 20 | #(the size of the set of transformations), as an essential factor 21 | #in addition to the dimensions of depth and width.""" 22 | CARDINALITY = 32 23 | DEPTH = 4 24 | BASEWIDTH = 64 25 | 26 | #"""The grouped convolutional layer in Fig. 3(c) performs 32 groups 27 | #of convolutions whose input and output channels are 4-dimensional. 28 | #The grouped convolutional layer concatenates them as the outputs 29 | #of the layer.""" 30 | 31 | class ResNextBottleNeckC(nn.Module): 32 | 33 | def __init__(self, in_channels, out_channels, stride): 34 | super().__init__() 35 | 36 | C = CARDINALITY #How many groups a feature map was splitted into 37 | 38 | #"""We note that the input/output width of the template is fixed as 39 | #256-d (Fig. 3), We note that the input/output width of the template 40 | #is fixed as 256-d (Fig. 3), and all widths are dou- bled each time 41 | #when the feature map is subsampled (see Table 1).""" 42 | D = int(DEPTH * out_channels / BASEWIDTH) #number of channels per group 43 | self.split_transforms = nn.Sequential( 44 | nn.Conv2d(in_channels, C * D, kernel_size=1, groups=C, bias=False), 45 | nn.BatchNorm2d(C * D), 46 | nn.ReLU(inplace=True), 47 | nn.Conv2d(C * D, C * D, kernel_size=3, stride=stride, groups=C, padding=1, bias=False), 48 | nn.BatchNorm2d(C * D), 49 | nn.ReLU(inplace=True), 50 | nn.Conv2d(C * D, out_channels * 4, kernel_size=1, bias=False), 51 | nn.BatchNorm2d(out_channels * 4), 52 | ) 53 | 54 | self.shortcut = nn.Sequential() 55 | 56 | if stride != 1 or in_channels != out_channels * 4: 57 | self.shortcut = nn.Sequential( 58 | nn.Conv2d(in_channels, out_channels * 4, stride=stride, kernel_size=1, bias=False), 59 | nn.BatchNorm2d(out_channels * 4) 60 | ) 61 | 62 | def forward(self, x): 63 | return F.relu(self.split_transforms(x) + self.shortcut(x)) 64 | 65 | class ResNext(nn.Module): 66 | 67 | def __init__(self, block, num_blocks, class_names=100): 68 | super().__init__() 69 | self.in_channels = 64 70 | 71 | self.conv1 = nn.Sequential( 72 | nn.Conv2d(3, 64, 3, stride=1, padding=1, bias=False), 73 | nn.BatchNorm2d(64), 74 | nn.ReLU(inplace=True) 75 | ) 76 | 77 | self.conv2 = self._make_layer(block, num_blocks[0], 64, 1) 78 | self.conv3 = self._make_layer(block, num_blocks[1], 128, 2) 79 | self.conv4 = self._make_layer(block, num_blocks[2], 256, 2) 80 | self.conv5 = self._make_layer(block, num_blocks[3], 512, 2) 81 | self.avg = nn.AdaptiveAvgPool2d((1, 1)) 82 | self.fc = nn.Linear(512 * 4, 100) 83 | 84 | def forward(self, x): 85 | x = self.conv1(x) 86 | x = self.conv2(x) 87 | x = self.conv3(x) 88 | x = self.conv4(x) 89 | x = self.conv5(x) 90 | x = self.avg(x) 91 | x = x.view(x.size(0), -1) 92 | x = self.fc(x) 93 | return x 94 | 95 | def _make_layer(self, block, num_block, out_channels, stride): 96 | """Building resnext block 97 | Args: 98 | block: block type(default resnext bottleneck c) 99 | num_block: number of blocks per layer 100 | out_channels: output channels per block 101 | stride: block stride 102 | 103 | Returns: 104 | a resnext layer 105 | """ 106 | strides = [stride] + [1] * (num_block - 1) 107 | layers = [] 108 | for stride in strides: 109 | layers.append(block(self.in_channels, out_channels, stride)) 110 | self.in_channels = out_channels * 4 111 | 112 | return nn.Sequential(*layers) 113 | 114 | def resnext50(): 115 | """ return a resnext50(c32x4d) network 116 | """ 117 | return ResNext(ResNextBottleNeckC, [3, 4, 6, 3]) 118 | 119 | def resnext101(): 120 | """ return a resnext101(c32x4d) network 121 | """ 122 | return ResNext(ResNextBottleNeckC, [3, 4, 23, 3]) 123 | 124 | def resnext152(): 125 | """ return a resnext101(c32x4d) network 126 | """ 127 | return ResNext(ResNextBottleNeckC, [3, 4, 36, 3]) 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #test.py 2 | #!/usr/bin/env python3 3 | 4 | """ test neuron network performace 5 | print top1 and top5 err on test dataset 6 | of a model 7 | 8 | author baiyu 9 | """ 10 | 11 | import argparse 12 | #from dataset import * 13 | 14 | #from skimage import io 15 | from matplotlib import pyplot as plt 16 | 17 | import torch 18 | import torchvision.transforms as transforms 19 | from torch.utils.data import DataLoader 20 | from torch.autograd import Variable 21 | from dataset.ImgLoader import ImgLoader 22 | import os 23 | import roc 24 | 25 | from conf import settings 26 | from utils import get_network, get_test_dataloader 27 | 28 | if __name__ == '__main__': 29 | 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument('-net', type=str, default='resnet18', help='net type') 32 | parser.add_argument('-weights', type=str, default='./checkpoint/resnet18/2019-12-30T20:04:53.202543/resnet18-20-regular.pth', help='the weights file you want to test') 33 | parser.add_argument('-gpu', type=bool, default=True, help='use gpu or not') 34 | parser.add_argument('-w', type=int, default=2, help='number of workers for dataloader') 35 | parser.add_argument('-b', type=int, default=16, help='batch size for dataloader') 36 | parser.add_argument('-s', type=bool, default=True, help='whether shuffle the dataset') 37 | parser.add_argument('-test_list', type=str, default='4test_list.txt', help='initial learning rate') 38 | parser.add_argument('-root_folder', type=str, default='/home/gqwang/Spoof_Croped', help='initial learning rate') 39 | args = parser.parse_args() 40 | 41 | net = get_network(args) 42 | 43 | test_dataset = ImgLoader(args.root_folder, os.path.join(args.root_folder, args.test_list), 44 | transforms.Compose([ 45 | transforms.Resize(248), 46 | # transforms.RandomAffine(10), 47 | transforms.CenterCrop(248), 48 | transforms.RandomRotation(15), 49 | transforms.ToTensor() 50 | # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 51 | 52 | ])) 53 | test_loader = torch.utils.data.DataLoader(test_dataset, 54 | batch_size=args.b, 55 | num_workers=2, 56 | pin_memory=True) 57 | 58 | net.load_state_dict(torch.load(args.weights), args.gpu) 59 | print(net) 60 | net.eval() 61 | 62 | correct = 0.0 63 | total = 0 64 | 65 | result_list = [] 66 | label_list = [] 67 | TP = 0. 68 | TN = 0. 69 | FP = 0. 70 | FN = 0. 71 | 72 | for n_iter, (image, label) in enumerate(test_loader): 73 | print("iteration: {}\ttotal {} iterations".format(n_iter + 1, len(test_loader))) 74 | image = Variable(image).cuda() 75 | labels = Variable(label).cuda() 76 | outputs = net(image) 77 | _, preds = outputs.max(1) 78 | correct += preds.eq(labels).sum() 79 | 80 | for i in range(len(preds)): 81 | if labels[i] == 1 and preds[i] == 1: 82 | TP += 1 83 | elif labels[i] == 0 and preds[i] == 0: 84 | TN += 1 85 | elif labels[i] == 1 and preds[i] == 0: 86 | FN += 1 87 | elif labels[i] == 0 and preds[i] == 1: 88 | FP += 1 89 | 90 | outputs = torch.softmax(outputs, dim=-1) 91 | preds_prob = outputs.to('cpu').detach().numpy() 92 | labels = labels.to('cpu').detach().numpy() 93 | for i_batch in range(preds.shape[0]): 94 | result_list.append(preds_prob[i_batch, 1]) 95 | label_list.append(labels[i_batch]) 96 | 97 | TP_rate = float(TP / (TP + FN)) 98 | TN_rate = float(TN / (TN + FP)) 99 | 100 | HTER = 1 - (TP_rate + TN_rate) / 2 101 | metric = roc.cal_metric(label_list, result_list, True) 102 | 103 | print('Test set: Accuracy: {:.4f}, Auc: {:.4f}, HTER: {:.4f}'.format( 104 | correct.float() / len(test_loader.dataset), metric[0], HTER 105 | )) 106 | print() 107 | # _, pred = output.topk(1, 1, largest=True, sorted=True) 108 | 109 | # label = label.view(label.size(0), -1).expand_as(pred) 110 | # correct = pred.eq(label).float() 111 | # 112 | # # #compute top 5 113 | # # correct_5 += correct[:, :5].sum() 114 | # 115 | # #compute top1 116 | # correct_1 += correct[:, :1].sum() 117 | 118 | 119 | # print() 120 | # print("Top 1 err: ", 1 - correct_1 / len(test_loader.dataset)) 121 | # # print("Top 5 err: ", 1 - correct_5 / len(test_loader.dataset)) 122 | # print("Parameter numbers: {}".format(sum(p.numel() for p in net.parameters()))) -------------------------------------------------------------------------------- /models/googlenet.py: -------------------------------------------------------------------------------- 1 | """google net in pytorch 2 | 3 | 4 | 5 | [1] Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, 6 | Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich. 7 | 8 | Going Deeper with Convolutions 9 | https://arxiv.org/abs/1409.4842v1 10 | """ 11 | 12 | import torch 13 | import torch.nn as nn 14 | 15 | class Inception(nn.Module): 16 | def __init__(self, input_channels, n1x1, n3x3_reduce, n3x3, n5x5_reduce, n5x5, pool_proj): 17 | super().__init__() 18 | 19 | #1x1conv branch 20 | self.b1 = nn.Sequential( 21 | nn.Conv2d(input_channels, n1x1, kernel_size=1), 22 | nn.BatchNorm2d(n1x1), 23 | nn.ReLU(inplace=True) 24 | ) 25 | 26 | #1x1conv -> 3x3conv branch 27 | self.b2 = nn.Sequential( 28 | nn.Conv2d(input_channels, n3x3_reduce, kernel_size=1), 29 | nn.BatchNorm2d(n3x3_reduce), 30 | nn.ReLU(inplace=True), 31 | nn.Conv2d(n3x3_reduce, n3x3, kernel_size=3, padding=1), 32 | nn.BatchNorm2d(n3x3), 33 | nn.ReLU(inplace=True) 34 | ) 35 | 36 | #1x1conv -> 5x5conv branch 37 | #we use 2 3x3 conv filters stacked instead 38 | #of 1 5x5 filters to obtain the same receptive 39 | #field with fewer parameters 40 | self.b3 = nn.Sequential( 41 | nn.Conv2d(input_channels, n5x5_reduce, kernel_size=1), 42 | nn.BatchNorm2d(n5x5_reduce), 43 | nn.ReLU(inplace=True), 44 | nn.Conv2d(n5x5_reduce, n5x5, kernel_size=3, padding=1), 45 | nn.BatchNorm2d(n5x5, n5x5), 46 | nn.ReLU(inplace=True), 47 | nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1), 48 | nn.BatchNorm2d(n5x5), 49 | nn.ReLU(inplace=True) 50 | ) 51 | 52 | #3x3pooling -> 1x1conv 53 | #same conv 54 | self.b4 = nn.Sequential( 55 | nn.MaxPool2d(3, stride=1, padding=1), 56 | nn.Conv2d(input_channels, pool_proj, kernel_size=1), 57 | nn.BatchNorm2d(pool_proj), 58 | nn.ReLU(inplace=True) 59 | ) 60 | 61 | def forward(self, x): 62 | return torch.cat([self.b1(x), self.b2(x), self.b3(x), self.b4(x)], dim=1) 63 | 64 | 65 | class GoogleNet(nn.Module): 66 | 67 | def __init__(self, num_class=100): 68 | super().__init__() 69 | self.prelayer = nn.Sequential( 70 | nn.Conv2d(3, 192, kernel_size=3, padding=1), 71 | nn.BatchNorm2d(192), 72 | nn.ReLU(inplace=True) 73 | ) 74 | 75 | #although we only use 1 conv layer as prelayer, 76 | #we still use name a3, b3....... 77 | self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) 78 | self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) 79 | 80 | #"""In general, an Inception network is a network consisting of 81 | #modules of the above type stacked upon each other, with occasional 82 | #max-pooling layers with stride 2 to halve the resolution of the 83 | #grid""" 84 | self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) 85 | 86 | self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) 87 | self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) 88 | self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) 89 | self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) 90 | self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) 91 | 92 | self.a5 = Inception(832, 256, 160, 320, 32, 128, 128) 93 | self.b5 = Inception(832, 384, 192, 384, 48, 128, 128) 94 | 95 | #input feature size: 8*8*1024 96 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 97 | self.dropout = nn.Dropout2d(p=0.4) 98 | self.linear = nn.Linear(1024, num_class) 99 | 100 | def forward(self, x): 101 | output = self.prelayer(x) 102 | output = self.a3(output) 103 | output = self.b3(output) 104 | 105 | output = self.maxpool(output) 106 | 107 | output = self.a4(output) 108 | output = self.b4(output) 109 | output = self.c4(output) 110 | output = self.d4(output) 111 | output = self.e4(output) 112 | 113 | output = self.maxpool(output) 114 | 115 | output = self.a5(output) 116 | output = self.b5(output) 117 | 118 | #"""It was found that a move from fully connected layers to 119 | #average pooling improved the top-1 accuracy by about 0.6%, 120 | #however the use of dropout remained essential even after 121 | #removing the fully connected layers.""" 122 | output = self.avgpool(output) 123 | output = self.dropout(output) 124 | output = output.view(output.size()[0], -1) 125 | output = self.linear(output) 126 | 127 | return output 128 | 129 | def googlenet(): 130 | return GoogleNet() 131 | 132 | 133 | -------------------------------------------------------------------------------- /models/shufflenetv2.py: -------------------------------------------------------------------------------- 1 | """shufflenetv2 in pytorch 2 | 3 | 4 | 5 | [1] Ningning Ma, Xiangyu Zhang, Hai-Tao Zheng, Jian Sun 6 | 7 | ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design 8 | https://arxiv.org/abs/1807.11164 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | 16 | def channel_split(x, split): 17 | """split a tensor into two pieces along channel dimension 18 | Args: 19 | x: input tensor 20 | split:(int) channel size for each pieces 21 | """ 22 | assert x.size(1) == split * 2 23 | return torch.split(x, split, dim=1) 24 | 25 | def channel_shuffle(x, groups): 26 | """channel shuffle operation 27 | Args: 28 | x: input tensor 29 | groups: input branch number 30 | """ 31 | 32 | batch_size, channels, height, width = x.size() 33 | channels_per_group = int(channels / groups) 34 | 35 | x = x.view(batch_size, groups, channels_per_group, height, width) 36 | x = x.transpose(1, 2).contiguous() 37 | x = x.view(batch_size, -1, height, width) 38 | 39 | return x 40 | 41 | class ShuffleUnit(nn.Module): 42 | 43 | def __init__(self, in_channels, out_channels, stride): 44 | super().__init__() 45 | 46 | self.stride = stride 47 | self.in_channels = in_channels 48 | self.out_channels = out_channels 49 | 50 | if stride != 1 or in_channels != out_channels: 51 | self.residual = nn.Sequential( 52 | nn.Conv2d(in_channels, in_channels, 1), 53 | nn.BatchNorm2d(in_channels), 54 | nn.ReLU(inplace=True), 55 | nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels), 56 | nn.BatchNorm2d(in_channels), 57 | nn.Conv2d(in_channels, int(out_channels / 2), 1), 58 | nn.BatchNorm2d(int(out_channels / 2)), 59 | nn.ReLU(inplace=True) 60 | ) 61 | 62 | self.shortcut = nn.Sequential( 63 | nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels), 64 | nn.BatchNorm2d(in_channels), 65 | nn.Conv2d(in_channels, int(out_channels / 2), 1), 66 | nn.BatchNorm2d(int(out_channels / 2)), 67 | nn.ReLU(inplace=True) 68 | ) 69 | else: 70 | self.shortcut = nn.Sequential() 71 | 72 | in_channels = int(in_channels / 2) 73 | self.residual = nn.Sequential( 74 | nn.Conv2d(in_channels, in_channels, 1), 75 | nn.BatchNorm2d(in_channels), 76 | nn.ReLU(inplace=True), 77 | nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels), 78 | nn.BatchNorm2d(in_channels), 79 | nn.Conv2d(in_channels, in_channels, 1), 80 | nn.BatchNorm2d(in_channels), 81 | nn.ReLU(inplace=True) 82 | ) 83 | 84 | 85 | def forward(self, x): 86 | 87 | if self.stride == 1 and self.out_channels == self.in_channels: 88 | shortcut, residual = channel_split(x, int(self.in_channels / 2)) 89 | else: 90 | shortcut = x 91 | residual = x 92 | 93 | shortcut = self.shortcut(shortcut) 94 | residual = self.residual(residual) 95 | x = torch.cat([shortcut, residual], dim=1) 96 | x = channel_shuffle(x, 2) 97 | 98 | return x 99 | 100 | class ShuffleNetV2(nn.Module): 101 | 102 | def __init__(self, ratio=1, class_num=100): 103 | super().__init__() 104 | if ratio == 0.5: 105 | out_channels = [48, 96, 192, 1024] 106 | elif ratio == 1: 107 | out_channels = [116, 232, 464, 1024] 108 | elif ratio == 1.5: 109 | out_channels = [176, 352, 704, 1024] 110 | elif ratio == 2: 111 | out_channels = [244, 488, 976, 2048] 112 | else: 113 | ValueError('unsupported ratio number') 114 | 115 | self.pre = nn.Sequential( 116 | nn.Conv2d(3, 24, 3, padding=1), 117 | nn.BatchNorm2d(24) 118 | ) 119 | 120 | self.stage2 = self._make_stage(24, out_channels[0], 3) 121 | self.stage3 = self._make_stage(out_channels[0], out_channels[1], 7) 122 | self.stage4 = self._make_stage(out_channels[1], out_channels[2], 3) 123 | self.conv5 = nn.Sequential( 124 | nn.Conv2d(out_channels[2], out_channels[3], 1), 125 | nn.BatchNorm2d(out_channels[3]), 126 | nn.ReLU(inplace=True) 127 | ) 128 | 129 | self.fc = nn.Linear(out_channels[3], class_num) 130 | 131 | def forward(self, x): 132 | x = self.pre(x) 133 | x = self.stage2(x) 134 | x = self.stage3(x) 135 | x = self.stage4(x) 136 | x = self.conv5(x) 137 | x = F.adaptive_avg_pool2d(x, 1) 138 | x = x.view(x.size(0), -1) 139 | x = self.fc(x) 140 | 141 | return x 142 | 143 | def _make_stage(self, in_channels, out_channels, repeat): 144 | layers = [] 145 | layers.append(ShuffleUnit(in_channels, out_channels, 2)) 146 | 147 | while repeat: 148 | layers.append(ShuffleUnit(out_channels, out_channels, 1)) 149 | repeat -= 1 150 | 151 | return nn.Sequential(*layers) 152 | 153 | def shufflenetv2(): 154 | return ShuffleNetV2() 155 | 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /models/densenet.py: -------------------------------------------------------------------------------- 1 | """dense net in pytorch 2 | 3 | 4 | 5 | [1] Gao Huang, Zhuang Liu, Laurens van der Maaten, Kilian Q. Weinberger. 6 | 7 | Densely Connected Convolutional Networks 8 | https://arxiv.org/abs/1608.06993v5 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | 15 | 16 | #"""Bottleneck layers. Although each layer only produces k 17 | #output feature-maps, it typically has many more inputs. It 18 | #has been noted in [37, 11] that a 1×1 convolution can be in- 19 | #troduced as bottleneck layer before each 3×3 convolution 20 | #to reduce the number of input feature-maps, and thus to 21 | #improve computational efficiency.""" 22 | class Bottleneck(nn.Module): 23 | def __init__(self, in_channels, growth_rate): 24 | super().__init__() 25 | #"""In our experiments, we let each 1×1 convolution 26 | #produce 4k feature-maps.""" 27 | inner_channel = 4 * growth_rate 28 | 29 | #"""We find this design especially effective for DenseNet and 30 | #we refer to our network with such a bottleneck layer, i.e., 31 | #to the BN-ReLU-Conv(1×1)-BN-ReLU-Conv(3×3) version of H ` , 32 | #as DenseNet-B.""" 33 | self.bottle_neck = nn.Sequential( 34 | nn.BatchNorm2d(in_channels), 35 | nn.ReLU(inplace=True), 36 | nn.Conv2d(in_channels, inner_channel, kernel_size=1, bias=False), 37 | nn.BatchNorm2d(inner_channel), 38 | nn.ReLU(inplace=True), 39 | nn.Conv2d(inner_channel, growth_rate, kernel_size=3, padding=1, bias=False) 40 | ) 41 | 42 | def forward(self, x): 43 | return torch.cat([x, self.bottle_neck(x)], 1) 44 | 45 | #"""We refer to layers between blocks as transition 46 | #layers, which do convolution and pooling.""" 47 | class Transition(nn.Module): 48 | def __init__(self, in_channels, out_channels): 49 | super().__init__() 50 | #"""The transition layers used in our experiments 51 | #consist of a batch normalization layer and an 1×1 52 | #convolutional layer followed by a 2×2 average pooling 53 | #layer""". 54 | self.down_sample = nn.Sequential( 55 | nn.BatchNorm2d(in_channels), 56 | nn.Conv2d(in_channels, out_channels, 1, bias=False), 57 | nn.AvgPool2d(2, stride=2) 58 | ) 59 | 60 | def forward(self, x): 61 | return self.down_sample(x) 62 | 63 | #DesneNet-BC 64 | #B stands for bottleneck layer(BN-RELU-CONV(1x1)-BN-RELU-CONV(3x3)) 65 | #C stands for compression factor(0<=theta<=1) 66 | class DenseNet(nn.Module): 67 | def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_class=100): 68 | super().__init__() 69 | self.growth_rate = growth_rate 70 | 71 | #"""Before entering the first dense block, a convolution 72 | #with 16 (or twice the growth rate for DenseNet-BC) 73 | #output channels is performed on the input images.""" 74 | inner_channels = 2 * growth_rate 75 | 76 | #For convolutional layers with kernel size 3×3, each 77 | #side of the inputs is zero-padded by one pixel to keep 78 | #the feature-map size fixed. 79 | self.conv1 = nn.Conv2d(3, inner_channels, kernel_size=3, padding=1, bias=False) 80 | 81 | self.features = nn.Sequential() 82 | 83 | for index in range(len(nblocks) - 1): 84 | self.features.add_module("dense_block_layer_{}".format(index), self._make_dense_layers(block, inner_channels, nblocks[index])) 85 | inner_channels += growth_rate * nblocks[index] 86 | 87 | #"""If a dense block contains m feature-maps, we let the 88 | #following transition layer generate θm output feature- 89 | #maps, where 0 < θ ≤ 1 is referred to as the compression 90 | #fac-tor. 91 | out_channels = int(reduction * inner_channels) # int() will automatic floor the value 92 | self.features.add_module("transition_layer_{}".format(index), Transition(inner_channels, out_channels)) 93 | inner_channels = out_channels 94 | 95 | self.features.add_module("dense_block{}".format(len(nblocks) - 1), self._make_dense_layers(block, inner_channels, nblocks[len(nblocks)-1])) 96 | inner_channels += growth_rate * nblocks[len(nblocks) - 1] 97 | self.features.add_module('bn', nn.BatchNorm2d(inner_channels)) 98 | self.features.add_module('relu', nn.ReLU(inplace=True)) 99 | 100 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 101 | 102 | self.linear = nn.Linear(inner_channels, num_class) 103 | 104 | def forward(self, x): 105 | output = self.conv1(x) 106 | output = self.features(output) 107 | output = self.avgpool(output) 108 | output = output.view(output.size()[0], -1) 109 | output = self.linear(output) 110 | return output 111 | 112 | def _make_dense_layers(self, block, in_channels, nblocks): 113 | dense_block = nn.Sequential() 114 | for index in range(nblocks): 115 | dense_block.add_module('bottle_neck_layer_{}'.format(index), block(in_channels, self.growth_rate)) 116 | in_channels += self.growth_rate 117 | return dense_block 118 | 119 | def densenet121(): 120 | return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32) 121 | 122 | def densenet169(): 123 | return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32) 124 | 125 | def densenet201(): 126 | return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32) 127 | 128 | def densenet161(): 129 | return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48) 130 | 131 | -------------------------------------------------------------------------------- /models/senet.py: -------------------------------------------------------------------------------- 1 | """senet in pytorch 2 | 3 | 4 | 5 | [1] Jie Hu, Li Shen, Samuel Albanie, Gang Sun, Enhua Wu 6 | 7 | Squeeze-and-Excitation Networks 8 | https://arxiv.org/abs/1709.01507 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | class BasicResidualSEBlock(nn.Module): 16 | 17 | expansion = 1 18 | 19 | def __init__(self, in_channels, out_channels, stride, r=16): 20 | super().__init__() 21 | 22 | self.residual = nn.Sequential( 23 | nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1), 24 | nn.BatchNorm2d(out_channels), 25 | nn.ReLU(inplace=True), 26 | 27 | nn.Conv2d(out_channels, out_channels * self.expansion, 3, padding=1), 28 | nn.BatchNorm2d(out_channels * self.expansion), 29 | nn.ReLU(inplace=True) 30 | ) 31 | 32 | self.shortcut = nn.Sequential() 33 | if stride != 1 or in_channels != out_channels * self.expansion: 34 | self.shortcut = nn.Sequential( 35 | nn.Conv2d(in_channels, out_channels * self.expansion, 1, stride=stride), 36 | nn.BatchNorm2d(out_channels * self.expansion) 37 | ) 38 | 39 | self.squeeze = nn.AdaptiveAvgPool2d(1) 40 | self.excitation = nn.Sequential( 41 | nn.Linear(out_channels * self.expansion, out_channels * self.expansion // r), 42 | nn.ReLU(inplace=True), 43 | nn.Linear(out_channels * self.expansion // r, out_channels * self.expansion), 44 | nn.Sigmoid() 45 | ) 46 | 47 | def forward(self, x): 48 | shortcut = self.shortcut(x) 49 | residual = self.residual(x) 50 | 51 | squeeze = self.squeeze(residual) 52 | squeeze = squeeze.view(squeeze.size(0), -1) 53 | excitation = self.excitation(squeeze) 54 | excitation = excitation.view(residual.size(0), residual.size(1), 1, 1) 55 | 56 | x = residual * excitation.expand_as(residual) + shortcut 57 | 58 | return F.relu(x) 59 | 60 | class BottleneckResidualSEBlock(nn.Module): 61 | 62 | expansion = 4 63 | 64 | def __init__(self, in_channels, out_channels, stride, r=16): 65 | super().__init__() 66 | 67 | self.residual = nn.Sequential( 68 | nn.Conv2d(in_channels, out_channels, 1), 69 | nn.BatchNorm2d(out_channels), 70 | nn.ReLU(inplace=True), 71 | 72 | nn.Conv2d(out_channels, out_channels, 3, stride=stride, padding=1), 73 | nn.BatchNorm2d(out_channels), 74 | nn.ReLU(inplace=True), 75 | 76 | nn.Conv2d(out_channels, out_channels * self.expansion, 1), 77 | nn.BatchNorm2d(out_channels * self.expansion), 78 | nn.ReLU(inplace=True) 79 | ) 80 | 81 | self.squeeze = nn.AdaptiveAvgPool2d(1) 82 | self.excitation = nn.Sequential( 83 | nn.Linear(out_channels * self.expansion, out_channels * self.expansion // r), 84 | nn.ReLU(inplace=True), 85 | nn.Linear(out_channels * self.expansion // r, out_channels * self.expansion), 86 | nn.Sigmoid() 87 | ) 88 | 89 | self.shortcut = nn.Sequential() 90 | if stride != 1 or in_channels != out_channels * self.expansion: 91 | self.shortcut = nn.Sequential( 92 | nn.Conv2d(in_channels, out_channels * self.expansion, 1, stride=stride), 93 | nn.BatchNorm2d(out_channels * self.expansion) 94 | ) 95 | 96 | def forward(self, x): 97 | 98 | shortcut = self.shortcut(x) 99 | 100 | residual = self.residual(x) 101 | squeeze = self.squeeze(residual) 102 | squeeze = squeeze.view(squeeze.size(0), -1) 103 | excitation = self.excitation(squeeze) 104 | excitation = excitation.view(residual.size(0), residual.size(1), 1, 1) 105 | 106 | x = residual * excitation.expand_as(residual) + shortcut 107 | 108 | return F.relu(x) 109 | 110 | class SEResNet(nn.Module): 111 | 112 | def __init__(self, block, block_num, class_num=100): 113 | super().__init__() 114 | 115 | self.in_channels = 64 116 | 117 | self.pre = nn.Sequential( 118 | nn.Conv2d(3, 64, 3, padding=1), 119 | nn.BatchNorm2d(64), 120 | nn.ReLU(inplace=True) 121 | ) 122 | 123 | self.stage1 = self._make_stage(block, block_num[0], 64, 1) 124 | self.stage2 = self._make_stage(block, block_num[1], 128, 2) 125 | self.stage3 = self._make_stage(block, block_num[2], 256, 2) 126 | self.stage4 = self._make_stage(block, block_num[3], 516, 2) 127 | 128 | self.linear = nn.Linear(self.in_channels, class_num) 129 | 130 | def forward(self, x): 131 | x = self.pre(x) 132 | 133 | x = self.stage1(x) 134 | x = self.stage2(x) 135 | x = self.stage3(x) 136 | x = self.stage4(x) 137 | 138 | x = F.adaptive_avg_pool2d(x, 1) 139 | x = x.view(x.size(0), -1) 140 | 141 | x = self.linear(x) 142 | 143 | return x 144 | 145 | 146 | def _make_stage(self, block, num, out_channels, stride): 147 | 148 | layers = [] 149 | layers.append(block(self.in_channels, out_channels, stride)) 150 | self.in_channels = out_channels * block.expansion 151 | 152 | while num - 1: 153 | layers.append(block(self.in_channels, out_channels, 1)) 154 | num -= 1 155 | 156 | return nn.Sequential(*layers) 157 | 158 | def seresnet18(): 159 | return SEResNet(BasicResidualSEBlock, [2, 2, 2, 2]) 160 | 161 | def seresnet34(): 162 | return SEResNet(BasicResidualSEBlock, [3, 4, 6, 3]) 163 | 164 | def seresnet50(): 165 | return SEResNet(BottleneckResidualSEBlock, [3, 4, 6, 3]) 166 | 167 | def seresnet101(): 168 | return SEResNet(BottleneckResidualSEBlock, [3, 4, 23, 3]) 169 | 170 | def seresnet152(): 171 | return SEResNet(BottleneckResidualSEBlock, [3, 8, 36, 3]) -------------------------------------------------------------------------------- /models/mobilenet.py: -------------------------------------------------------------------------------- 1 | """mobilenet in pytorch 2 | 3 | 4 | 5 | [1] Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam 6 | 7 | MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications 8 | https://arxiv.org/abs/1704.04861 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | 15 | class DepthSeperabelConv2d(nn.Module): 16 | 17 | def __init__(self, input_channels, output_channels, kernel_size, **kwargs): 18 | super().__init__() 19 | self.depthwise = nn.Sequential( 20 | nn.Conv2d( 21 | input_channels, 22 | input_channels, 23 | kernel_size, 24 | groups=input_channels, 25 | **kwargs), 26 | nn.BatchNorm2d(input_channels), 27 | nn.ReLU(inplace=True) 28 | ) 29 | 30 | self.pointwise = nn.Sequential( 31 | nn.Conv2d(input_channels, output_channels, 1), 32 | nn.BatchNorm2d(output_channels), 33 | nn.ReLU(inplace=True) 34 | ) 35 | 36 | def forward(self, x): 37 | x = self.depthwise(x) 38 | x = self.pointwise(x) 39 | 40 | return x 41 | 42 | 43 | class BasicConv2d(nn.Module): 44 | 45 | def __init__(self, input_channels, output_channels, kernel_size, **kwargs): 46 | 47 | super().__init__() 48 | self.conv = nn.Conv2d( 49 | input_channels, output_channels, kernel_size, **kwargs) 50 | self.bn = nn.BatchNorm2d(output_channels) 51 | self.relu = nn.ReLU(inplace=True) 52 | 53 | def forward(self, x): 54 | x = self.conv(x) 55 | x = self.bn(x) 56 | x = self.relu(x) 57 | 58 | return x 59 | 60 | 61 | class MobileNet(nn.Module): 62 | 63 | """ 64 | Args: 65 | width multipler: The role of the width multiplier α is to thin 66 | a network uniformly at each layer. For a given 67 | layer and width multiplier α, the number of 68 | input channels M becomes αM and the number of 69 | output channels N becomes αN. 70 | """ 71 | 72 | def __init__(self, width_multiplier=1, class_num=100): 73 | super().__init__() 74 | 75 | alpha = width_multiplier 76 | self.stem = nn.Sequential( 77 | BasicConv2d(3, int(32 * alpha), 3, padding=1, bias=False), 78 | DepthSeperabelConv2d( 79 | int(32 * alpha), 80 | int(64 * alpha), 81 | 3, 82 | padding=1, 83 | bias=False 84 | ) 85 | ) 86 | 87 | #downsample 88 | self.conv1 = nn.Sequential( 89 | DepthSeperabelConv2d( 90 | int(64 * alpha), 91 | int(128 * alpha), 92 | 3, 93 | stride=2, 94 | padding=1, 95 | bias=False 96 | ), 97 | DepthSeperabelConv2d( 98 | int(128 * alpha), 99 | int(128 * alpha), 100 | 3, 101 | padding=1, 102 | bias=False 103 | ) 104 | ) 105 | 106 | #downsample 107 | self.conv2 = nn.Sequential( 108 | DepthSeperabelConv2d( 109 | int(128 * alpha), 110 | int(256 * alpha), 111 | 3, 112 | stride=2, 113 | padding=1, 114 | bias=False 115 | ), 116 | DepthSeperabelConv2d( 117 | int(256 * alpha), 118 | int(256 * alpha), 119 | 3, 120 | padding=1, 121 | bias=False 122 | ) 123 | ) 124 | 125 | #downsample 126 | self.conv3 = nn.Sequential( 127 | DepthSeperabelConv2d( 128 | int(256 * alpha), 129 | int(512 * alpha), 130 | 3, 131 | stride=2, 132 | padding=1, 133 | bias=False 134 | ), 135 | 136 | DepthSeperabelConv2d( 137 | int(512 * alpha), 138 | int(512 * alpha), 139 | 3, 140 | padding=1, 141 | bias=False 142 | ), 143 | DepthSeperabelConv2d( 144 | int(512 * alpha), 145 | int(512 * alpha), 146 | 3, 147 | padding=1, 148 | bias=False 149 | ), 150 | DepthSeperabelConv2d( 151 | int(512 * alpha), 152 | int(512 * alpha), 153 | 3, 154 | padding=1, 155 | bias=False 156 | ), 157 | DepthSeperabelConv2d( 158 | int(512 * alpha), 159 | int(512 * alpha), 160 | 3, 161 | padding=1, 162 | bias=False 163 | ), 164 | DepthSeperabelConv2d( 165 | int(512 * alpha), 166 | int(512 * alpha), 167 | 3, 168 | padding=1, 169 | bias=False 170 | ) 171 | ) 172 | 173 | #downsample 174 | self.conv4 = nn.Sequential( 175 | DepthSeperabelConv2d( 176 | int(512 * alpha), 177 | int(1024 * alpha), 178 | 3, 179 | stride=2, 180 | padding=1, 181 | bias=False 182 | ), 183 | DepthSeperabelConv2d( 184 | int(1024 * alpha), 185 | int(1024 * alpha), 186 | 3, 187 | padding=1, 188 | bias=False 189 | ) 190 | ) 191 | 192 | self.fc = nn.Linear(int(1024 * alpha), class_num) 193 | self.avg = nn.AdaptiveAvgPool2d(1) 194 | 195 | def forward(self, x): 196 | x = self.stem(x) 197 | 198 | x = self.conv1(x) 199 | x = self.conv2(x) 200 | x = self.conv3(x) 201 | x = self.conv4(x) 202 | 203 | x = self.avg(x) 204 | x = x.view(x.size(0), -1) 205 | x = self.fc(x) 206 | return x 207 | 208 | 209 | def mobilenet(alpha=1, class_num=100): 210 | return MobileNet(alpha, class_num) 211 | 212 | -------------------------------------------------------------------------------- /models/xception.py: -------------------------------------------------------------------------------- 1 | """xception in pytorch 2 | 3 | 4 | [1] François Chollet 5 | 6 | Xception: Deep Learning with Depthwise Separable Convolutions 7 | https://arxiv.org/abs/1610.02357 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | class SeperableConv2d(nn.Module): 14 | 15 | #***Figure 4. An “extreme” version of our Inception module, 16 | #with one spatial convolution per output channel of the 1x1 17 | #convolution.""" 18 | def __init__(self, input_channels, output_channels, kernel_size, **kwargs): 19 | 20 | super().__init__() 21 | self.depthwise = nn.Conv2d( 22 | input_channels, 23 | input_channels, 24 | kernel_size, 25 | groups=input_channels, 26 | bias=False, 27 | **kwargs 28 | ) 29 | 30 | self.pointwise = nn.Conv2d(input_channels, output_channels, 1, bias=False) 31 | 32 | def forward(self, x): 33 | x = self.depthwise(x) 34 | x = self.pointwise(x) 35 | 36 | return x 37 | 38 | class EntryFlow(nn.Module): 39 | 40 | def __init__(self): 41 | 42 | super().__init__() 43 | self.conv1 = nn.Sequential( 44 | nn.Conv2d(3, 32, 3, padding=1, bias=False), 45 | nn.BatchNorm2d(32), 46 | nn.ReLU(inplace=True) 47 | ) 48 | 49 | self.conv2 = nn.Sequential( 50 | nn.Conv2d(32, 64, 3, padding=1, bias=False), 51 | nn.BatchNorm2d(64), 52 | nn.ReLU(inplace=True) 53 | ) 54 | 55 | self.conv3_residual = nn.Sequential( 56 | SeperableConv2d(64, 128, 3, padding=1), 57 | nn.BatchNorm2d(128), 58 | nn.ReLU(inplace=True), 59 | SeperableConv2d(128, 128, 3, padding=1), 60 | nn.BatchNorm2d(128), 61 | nn.MaxPool2d(3, stride=2, padding=1), 62 | ) 63 | 64 | self.conv3_shortcut = nn.Sequential( 65 | nn.Conv2d(64, 128, 1, stride=2), 66 | nn.BatchNorm2d(128), 67 | ) 68 | 69 | self.conv4_residual = nn.Sequential( 70 | nn.ReLU(inplace=True), 71 | SeperableConv2d(128, 256, 3, padding=1), 72 | nn.BatchNorm2d(256), 73 | nn.ReLU(inplace=True), 74 | SeperableConv2d(256, 256, 3, padding=1), 75 | nn.BatchNorm2d(256), 76 | nn.MaxPool2d(3, stride=2, padding=1) 77 | ) 78 | 79 | self.conv4_shortcut = nn.Sequential( 80 | nn.Conv2d(128, 256, 1, stride=2), 81 | nn.BatchNorm2d(256), 82 | ) 83 | 84 | #no downsampling 85 | self.conv5_residual = nn.Sequential( 86 | nn.ReLU(inplace=True), 87 | SeperableConv2d(256, 728, 3, padding=1), 88 | nn.BatchNorm2d(728), 89 | nn.ReLU(inplace=True), 90 | SeperableConv2d(728, 728, 3, padding=1), 91 | nn.BatchNorm2d(728), 92 | nn.MaxPool2d(3, 1, padding=1) 93 | ) 94 | 95 | #no downsampling 96 | self.conv5_shortcut = nn.Sequential( 97 | nn.Conv2d(256, 728, 1), 98 | nn.BatchNorm2d(728) 99 | ) 100 | 101 | def forward(self, x): 102 | x = self.conv1(x) 103 | x = self.conv2(x) 104 | residual = self.conv3_residual(x) 105 | shortcut = self.conv3_shortcut(x) 106 | x = residual + shortcut 107 | residual = self.conv4_residual(x) 108 | shortcut = self.conv4_shortcut(x) 109 | x = residual + shortcut 110 | residual = self.conv5_residual(x) 111 | shortcut = self.conv5_shortcut(x) 112 | x = residual + shortcut 113 | 114 | return x 115 | 116 | class MiddleFLowBlock(nn.Module): 117 | 118 | def __init__(self): 119 | super().__init__() 120 | 121 | self.shortcut = nn.Sequential() 122 | self.conv1 = nn.Sequential( 123 | nn.ReLU(inplace=True), 124 | SeperableConv2d(728, 728, 3, padding=1), 125 | nn.BatchNorm2d(728) 126 | ) 127 | self.conv2 = nn.Sequential( 128 | nn.ReLU(inplace=True), 129 | SeperableConv2d(728, 728, 3, padding=1), 130 | nn.BatchNorm2d(728) 131 | ) 132 | self.conv3 = nn.Sequential( 133 | nn.ReLU(inplace=True), 134 | SeperableConv2d(728, 728, 3, padding=1), 135 | nn.BatchNorm2d(728) 136 | ) 137 | 138 | def forward(self, x): 139 | residual = self.conv1(x) 140 | residual = self.conv2(residual) 141 | residual = self.conv3(residual) 142 | 143 | shortcut = self.shortcut(x) 144 | 145 | return shortcut + residual 146 | 147 | class MiddleFlow(nn.Module): 148 | def __init__(self, block): 149 | super().__init__() 150 | 151 | #"""then through the middle flow which is repeated eight times""" 152 | self.middel_block = self._make_flow(block, 8) 153 | 154 | def forward(self, x): 155 | x = self.middel_block(x) 156 | return x 157 | 158 | def _make_flow(self, block, times): 159 | flows = [] 160 | for i in range(times): 161 | flows.append(block()) 162 | 163 | return nn.Sequential(*flows) 164 | 165 | 166 | class ExitFLow(nn.Module): 167 | 168 | def __init__(self): 169 | super().__init__() 170 | self.residual = nn.Sequential( 171 | nn.ReLU(), 172 | SeperableConv2d(728, 728, 3, padding=1), 173 | nn.BatchNorm2d(728), 174 | nn.ReLU(), 175 | SeperableConv2d(728, 1024, 3, padding=1), 176 | nn.BatchNorm2d(1024), 177 | nn.MaxPool2d(3, stride=2, padding=1) 178 | ) 179 | 180 | self.shortcut = nn.Sequential( 181 | nn.Conv2d(728, 1024, 1, stride=2), 182 | nn.BatchNorm2d(1024) 183 | ) 184 | 185 | self.conv = nn.Sequential( 186 | SeperableConv2d(1024, 1536, 3, padding=1), 187 | nn.BatchNorm2d(1536), 188 | nn.ReLU(inplace=True), 189 | SeperableConv2d(1536, 2048, 3, padding=1), 190 | nn.BatchNorm2d(2048), 191 | nn.ReLU(inplace=True) 192 | ) 193 | 194 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 195 | 196 | def forward(self, x): 197 | shortcut = self.shortcut(x) 198 | residual = self.residual(x) 199 | output = shortcut + residual 200 | output = self.conv(output) 201 | output = self.avgpool(output) 202 | 203 | return output 204 | 205 | class Xception(nn.Module): 206 | 207 | def __init__(self, block, num_class=100): 208 | super().__init__() 209 | self.entry_flow = EntryFlow() 210 | self.middel_flow = MiddleFlow(block) 211 | self.exit_flow = ExitFLow() 212 | 213 | self.fc = nn.Linear(2048, num_class) 214 | 215 | def forward(self, x): 216 | x = self.entry_flow(x) 217 | x = self.middel_flow(x) 218 | x = self.exit_flow(x) 219 | x = x.view(x.size(0), -1) 220 | x = self.fc(x) 221 | 222 | return x 223 | 224 | def xception(): 225 | return Xception(MiddleFLowBlock) 226 | 227 | 228 | -------------------------------------------------------------------------------- /models/rir.py: -------------------------------------------------------------------------------- 1 | """resnet in resnet in pytorch 2 | 3 | 4 | 5 | [1] Sasha Targ, Diogo Almeida, Kevin Lyman. 6 | 7 | Resnet in Resnet: Generalizing Residual Architectures 8 | https://arxiv.org/abs/1603.08029v1 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | #geralized 15 | class ResnetInit(nn.Module): 16 | def __init__(self, in_channel, out_channel, stride): 17 | super().__init__() 18 | 19 | #"""The modular unit of the generalized residual network architecture is a 20 | #generalized residual block consisting of parallel states for a residual stream, 21 | #r, which contains identity shortcut connections and is similar to the structure 22 | #of a residual block from the original ResNet with a single convolutional layer 23 | #(parameters W l,r→r ) 24 | self.residual_stream_conv = nn.Conv2d(in_channel, out_channel, 3, padding=1, stride=stride) 25 | 26 | #"""and a transient stream, t, which is a standard convolutional layer 27 | #(W l,t→t ).""" 28 | self.transient_stream_conv = nn.Conv2d(in_channel, out_channel, 3, padding=1, stride=stride) 29 | 30 | #"""Two additional sets of convolutional filters in each block (W l,r→t , W l,t→r ) 31 | #also transfer information across streams.""" 32 | self.residual_stream_conv_across = nn.Conv2d(in_channel, out_channel, 3, padding=1, stride=stride) 33 | 34 | #"""We use equal numbers of filters for the residual and transient streams of the 35 | #generalized residual network, but optimizing this hyperparameter could lead to 36 | #further potential improvements.""" 37 | self.transient_stream_conv_across = nn.Conv2d(in_channel, out_channel, 3, padding=1, stride=stride) 38 | 39 | self.residual_bn_relu = nn.Sequential( 40 | nn.BatchNorm2d(out_channel), 41 | nn.ReLU(inplace=True) 42 | ) 43 | 44 | self.transient_bn_relu = nn.Sequential( 45 | nn.BatchNorm2d(out_channel), 46 | nn.ReLU(inplace=True) 47 | ) 48 | 49 | #"""The form of the shortcut connection can be an identity function with 50 | #the appropriate padding or a projection as in He et al. (2015b).""" 51 | self.short_cut = nn.Sequential() 52 | if in_channel != out_channel or stride != 1: 53 | self.short_cut = nn.Sequential( 54 | nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride) 55 | ) 56 | 57 | 58 | def forward(self, x): 59 | x_residual, x_transient = x 60 | residual_r_r = self.residual_stream_conv(x_residual) 61 | residual_r_t = self.residual_stream_conv_across(x_residual) 62 | residual_shortcut = self.short_cut(x_residual) 63 | 64 | transient_t_t = self.transient_stream_conv(x_transient) 65 | transient_t_r = self.transient_stream_conv_across(x_transient) 66 | 67 | #transient_t_t = self.transient_stream_conv(x_residual) 68 | #transient_t_r = self.transient_stream_conv_across(x_residual) 69 | #"""Same-stream and cross-stream activations are summed (along with the 70 | #shortcut connection for the residual stream) before applying batch 71 | #normalization and ReLU nonlinearities (together σ) to get the output 72 | #states of the block (Equation 1) (Ioffe & Szegedy, 2015).""" 73 | x_residual = self.residual_bn_relu(residual_r_r + transient_t_r + residual_shortcut) 74 | x_transient = self.transient_bn_relu(residual_r_t + transient_t_t) 75 | 76 | return x_residual, x_transient 77 | 78 | 79 | 80 | class RiRBlock(nn.Module): 81 | def __init__(self, in_channel, out_channel, layer_num, stride, layer=ResnetInit): 82 | super().__init__() 83 | self.resnetinit = self._make_layers(in_channel, out_channel, layer_num, stride) 84 | 85 | #self.short_cut = nn.Sequential() 86 | #if stride != 1 or in_channel != out_channel: 87 | # self.short_cut = nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride) 88 | 89 | def forward(self, x): 90 | x_residual, x_transient = self.resnetinit(x) 91 | #x_residual = x_residual + self.short_cut(x[0]) 92 | #x_transient = x_transient + self.short_cut(x[1]) 93 | 94 | return (x_residual, x_transient) 95 | 96 | #"""Replacing each of the convolutional layers within a residual 97 | #block from the original ResNet (Figure 1a) with a generalized residual block 98 | #(Figure 1b) leads us to a new architecture we call ResNet in ResNet (RiR) 99 | #(Figure 1d).""" 100 | def _make_layers(self, in_channel, out_channel, layer_num, stride, layer=ResnetInit): 101 | strides = [stride] + [1] * (layer_num - 1) 102 | layers = nn.Sequential() 103 | for index, s in enumerate(strides): 104 | layers.add_module("generalized layers{}".format(index), layer(in_channel, out_channel, s)) 105 | in_channel = out_channel 106 | 107 | return layers 108 | 109 | class ResnetInResneet(nn.Module): 110 | def __init__(self, num_classes=100): 111 | super().__init__() 112 | base = int(96 / 2) 113 | self.residual_pre_conv = nn.Sequential( 114 | nn.Conv2d(3, base, 3, padding=1), 115 | nn.BatchNorm2d(base), 116 | nn.ReLU(inplace=True) 117 | ) 118 | self.transient_pre_conv = nn.Sequential( 119 | nn.Conv2d(3, base, 3, padding=1), 120 | nn.BatchNorm2d(base), 121 | nn.ReLU(inplace=True) 122 | ) 123 | 124 | self.rir1 = RiRBlock(base, base, 2, 1) 125 | self.rir2 = RiRBlock(base, base, 2, 1) 126 | self.rir3 = RiRBlock(base, base * 2, 2, 2) 127 | self.rir4 = RiRBlock(base * 2, base * 2, 2, 1) 128 | self.rir5 = RiRBlock(base * 2, base * 2, 2, 1) 129 | self.rir6 = RiRBlock(base * 2, base * 4, 2, 2) 130 | self.rir7 = RiRBlock(base * 4, base * 4, 2, 1) 131 | self.rir8 = RiRBlock(base * 4, base * 4, 2, 1) 132 | 133 | self.conv1 = nn.Sequential( 134 | nn.Conv2d(384, num_classes, kernel_size=3, stride=2), #without this convolution, loss will soon be nan 135 | nn.BatchNorm2d(num_classes), 136 | nn.ReLU(inplace=True), 137 | ) 138 | 139 | self.classifier = nn.Sequential( 140 | nn.Linear(900, 450), 141 | nn.ReLU(), 142 | nn.Dropout(), 143 | nn.Linear(450, 100), 144 | ) 145 | 146 | self._weight_init() 147 | 148 | def forward(self, x): 149 | x_residual = self.residual_pre_conv(x) 150 | x_transient = self.transient_pre_conv(x) 151 | 152 | x_residual, x_transient = self.rir1((x_residual, x_transient)) 153 | x_residual, x_transient = self.rir2((x_residual, x_transient)) 154 | x_residual, x_transient = self.rir3((x_residual, x_transient)) 155 | x_residual, x_transient = self.rir4((x_residual, x_transient)) 156 | x_residual, x_transient = self.rir5((x_residual, x_transient)) 157 | x_residual, x_transient = self.rir6((x_residual, x_transient)) 158 | x_residual, x_transient = self.rir7((x_residual, x_transient)) 159 | x_residual, x_transient = self.rir8((x_residual, x_transient)) 160 | h = torch.cat([x_residual, x_transient], 1) 161 | h = self.conv1(h) 162 | h = h.view(h.size()[0], -1) 163 | h = self.classifier(h) 164 | 165 | return h 166 | 167 | def _weight_init(self): 168 | for m in self.modules(): 169 | if isinstance(m, nn.Conv2d): 170 | torch.nn.init.kaiming_normal(m.weight) 171 | m.bias.data.fill_(0.01) 172 | 173 | 174 | def resnet_in_resnet(): 175 | return ResnetInResneet() 176 | 177 | #from torch.autograd import Variable 178 | # 179 | #net = resnet_in_resnet() 180 | #print(net(Variable(torch.randn(3, 3, 32, 32))).shape) 181 | -------------------------------------------------------------------------------- /models/shufflenet.py: -------------------------------------------------------------------------------- 1 | """shufflenet in pytorch 2 | 3 | 4 | 5 | [1] Xiangyu Zhang, Xinyu Zhou, Mengxiao Lin, Jian Sun. 6 | 7 | ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices 8 | https://arxiv.org/abs/1707.01083v2 9 | """ 10 | 11 | from functools import partial 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | 17 | class BasicConv2d(nn.Module): 18 | 19 | def __init__(self, input_channels, output_channels, kernel_size, **kwargs): 20 | super().__init__() 21 | self.conv = nn.Conv2d(input_channels, output_channels, kernel_size, **kwargs) 22 | self.bn = nn.BatchNorm2d(output_channels) 23 | self.relu = nn.ReLU(inplace=True) 24 | 25 | def forward(self, x): 26 | x = self.conv(x) 27 | x = self.bn(x) 28 | x = self.relu(x) 29 | return x 30 | 31 | class ChannelShuffle(nn.Module): 32 | 33 | def __init__(self, groups): 34 | super().__init__() 35 | self.groups = groups 36 | 37 | def forward(self, x): 38 | batchsize, channels, height, width = x.data.size() 39 | channels_per_group = int(channels / self.groups) 40 | 41 | #"""suppose a convolutional layer with g groups whose output has 42 | #g x n channels; we first reshape the output channel dimension 43 | #into (g, n)""" 44 | x = x.view(batchsize, self.groups, channels_per_group, height, width) 45 | 46 | #"""transposing and then flattening it back as the input of next layer.""" 47 | x = x.transpose(1, 2).contiguous() 48 | x = x.view(batchsize, -1, height, width) 49 | 50 | return x 51 | 52 | class DepthwiseConv2d(nn.Module): 53 | 54 | def __init__(self, input_channels, output_channels, kernel_size, **kwargs): 55 | super().__init__() 56 | self.depthwise = nn.Sequential( 57 | nn.Conv2d(input_channels, output_channels, kernel_size, **kwargs), 58 | nn.BatchNorm2d(output_channels) 59 | ) 60 | 61 | def forward(self, x): 62 | return self.depthwise(x) 63 | 64 | class PointwiseConv2d(nn.Module): 65 | def __init__(self, input_channels, output_channels, **kwargs): 66 | super().__init__() 67 | self.pointwise = nn.Sequential( 68 | nn.Conv2d(input_channels, output_channels, 1, **kwargs), 69 | nn.BatchNorm2d(output_channels) 70 | ) 71 | 72 | def forward(self, x): 73 | return self.pointwise(x) 74 | 75 | class ShuffleNetUnit(nn.Module): 76 | 77 | def __init__(self, input_channels, output_channels, stage, stride, groups): 78 | super().__init__() 79 | 80 | #"""Similar to [9], we set the number of bottleneck channels to 1/4 81 | #of the output channels for each ShuffleNet unit.""" 82 | self.bottlneck = nn.Sequential( 83 | PointwiseConv2d( 84 | input_channels, 85 | int(output_channels / 4), 86 | groups=groups 87 | ), 88 | nn.ReLU(inplace=True) 89 | ) 90 | 91 | #"""Note that for Stage 2, we do not apply group convolution on the first pointwise 92 | #layer because the number of input channels is relatively small.""" 93 | if stage == 2: 94 | self.bottlneck = nn.Sequential( 95 | PointwiseConv2d( 96 | input_channels, 97 | int(output_channels / 4), 98 | groups=groups 99 | ), 100 | nn.ReLU(inplace=True) 101 | ) 102 | 103 | self.channel_shuffle = ChannelShuffle(groups) 104 | 105 | self.depthwise = DepthwiseConv2d( 106 | int(output_channels / 4), 107 | int(output_channels / 4), 108 | 3, 109 | groups=int(output_channels / 4), 110 | stride=stride, 111 | padding=1 112 | ) 113 | 114 | self.expand = PointwiseConv2d( 115 | int(output_channels / 4), 116 | output_channels, 117 | groups=groups 118 | ) 119 | 120 | self.relu = nn.ReLU(inplace=True) 121 | self.fusion = self._add 122 | self.shortcut = nn.Sequential() 123 | 124 | #"""As for the case where ShuffleNet is applied with stride, 125 | #we simply make two modifications (see Fig 2 (c)): 126 | #(i) add a 3 × 3 average pooling on the shortcut path; 127 | #(ii) replace the element-wise addition with channel concatenation, 128 | #which makes it easy to enlarge channel dimension with little extra 129 | #computation cost. 130 | if stride != 1 or input_channels != output_channels: 131 | self.shortcut = nn.AvgPool2d(3, stride=2, padding=1) 132 | 133 | self.expand = PointwiseConv2d( 134 | int(output_channels / 4), 135 | output_channels - input_channels, 136 | groups=groups 137 | ) 138 | 139 | self.fusion = self._cat 140 | 141 | def _add(self, x, y): 142 | return torch.add(x, y) 143 | 144 | def _cat(self, x, y): 145 | return torch.cat([x, y], dim=1) 146 | 147 | def forward(self, x): 148 | shortcut = self.shortcut(x) 149 | 150 | shuffled = self.bottlneck(x) 151 | shuffled = self.channel_shuffle(shuffled) 152 | shuffled = self.depthwise(shuffled) 153 | shuffled = self.expand(shuffled) 154 | 155 | output = self.fusion(shortcut, shuffled) 156 | output = self.relu(output) 157 | 158 | return output 159 | 160 | class ShuffleNet(nn.Module): 161 | 162 | def __init__(self, num_blocks, num_classes=100, groups=3): 163 | super().__init__() 164 | 165 | if groups == 1: 166 | out_channels = [24, 144, 288, 567] 167 | elif groups == 2: 168 | out_channels = [24, 200, 400, 800] 169 | elif groups == 3: 170 | out_channels = [24, 240, 480, 960] 171 | elif groups == 4: 172 | out_channels = [24, 272, 544, 1088] 173 | elif groups == 8: 174 | out_channels = [24, 384, 768, 1536] 175 | 176 | self.conv1 = BasicConv2d(3, out_channels[0], 3, padding=1, stride=1) 177 | self.input_channels = out_channels[0] 178 | 179 | self.stage2 = self._make_stage( 180 | ShuffleNetUnit, 181 | num_blocks[0], 182 | out_channels[1], 183 | stride=2, 184 | stage=2, 185 | groups=groups 186 | ) 187 | 188 | self.stage3 = self._make_stage( 189 | ShuffleNetUnit, 190 | num_blocks[1], 191 | out_channels[2], 192 | stride=2, 193 | stage=3, 194 | groups=groups 195 | ) 196 | 197 | self.stage4 = self._make_stage( 198 | ShuffleNetUnit, 199 | num_blocks[2], 200 | out_channels[3], 201 | stride=2, 202 | stage=4, 203 | groups=groups 204 | ) 205 | 206 | self.avg = nn.AdaptiveAvgPool2d((1, 1)) 207 | self.fc = nn.Linear(out_channels[3], num_classes) 208 | 209 | def forward(self, x): 210 | x = self.conv1(x) 211 | x = self.stage2(x) 212 | x = self.stage3(x) 213 | x = self.stage4(x) 214 | x = self.avg(x) 215 | x = x.view(x.size(0), -1) 216 | x = self.fc(x) 217 | 218 | return x 219 | 220 | def _make_stage(self, block, num_blocks, output_channels, stride, stage, groups): 221 | """make shufflenet stage 222 | 223 | Args: 224 | block: block type, shuffle unit 225 | out_channels: output depth channel number of this stage 226 | num_blocks: how many blocks per stage 227 | stride: the stride of the first block of this stage 228 | stage: stage index 229 | groups: group number of group convolution 230 | Return: 231 | return a shuffle net stage 232 | """ 233 | strides = [stride] + [1] * (num_blocks - 1) 234 | 235 | stage = [] 236 | 237 | for stride in strides: 238 | stage.append( 239 | block( 240 | self.input_channels, 241 | output_channels, 242 | stride=stride, 243 | stage=stage, 244 | groups=groups 245 | ) 246 | ) 247 | self.input_channels = output_channels 248 | 249 | return nn.Sequential(*stage) 250 | 251 | def shufflenet(): 252 | return ShuffleNet([4, 8, 4]) 253 | 254 | 255 | 256 | 257 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | """ helper function 2 | 3 | author baiyu 4 | """ 5 | 6 | import sys 7 | 8 | import numpy 9 | 10 | import torch 11 | from torch.optim.lr_scheduler import _LRScheduler 12 | import torchvision 13 | import torchvision.transforms as transforms 14 | from torch.utils.data import DataLoader 15 | 16 | #from dataset import CIFAR100Train, CIFAR100Test 17 | 18 | def get_network(args, use_gpu=True): 19 | """ return given network 20 | """ 21 | 22 | if args.net == 'vgg16': 23 | from models.vgg import vgg16_bn 24 | net = vgg16_bn() 25 | elif args.net == 'vgg13': 26 | from models.vgg import vgg13_bn 27 | net = vgg13_bn() 28 | elif args.net == 'vgg11': 29 | from models.vgg import vgg11_bn 30 | net = vgg11_bn() 31 | elif args.net == 'vgg19': 32 | from models.vgg import vgg19_bn 33 | net = vgg19_bn() 34 | elif args.net == 'densenet121': 35 | from models.densenet import densenet121 36 | net = densenet121() 37 | elif args.net == 'densenet161': 38 | from models.densenet import densenet161 39 | net = densenet161() 40 | elif args.net == 'densenet169': 41 | from models.densenet import densenet169 42 | net = densenet169() 43 | elif args.net == 'densenet201': 44 | from models.densenet import densenet201 45 | net = densenet201() 46 | elif args.net == 'googlenet': 47 | from models.googlenet import googlenet 48 | net = googlenet() 49 | elif args.net == 'inceptionv3': 50 | from models.inceptionv3 import inceptionv3 51 | net = inceptionv3() 52 | elif args.net == 'inceptionv4': 53 | from models.inceptionv4 import inceptionv4 54 | net = inceptionv4() 55 | elif args.net == 'inceptionresnetv2': 56 | from models.inceptionv4 import inception_resnet_v2 57 | net = inception_resnet_v2() 58 | elif args.net == 'xception': 59 | from models.xception import xception 60 | net = xception() 61 | elif args.net == 'resnet18': 62 | from models.resnet import resnet18 63 | net = resnet18() 64 | elif args.net == 'resnet34': 65 | from models.resnet import resnet34 66 | net = resnet34() 67 | elif args.net == 'resnet50': 68 | from models.resnet import resnet50 69 | net = resnet50() 70 | elif args.net == 'resnet101': 71 | from models.resnet import resnet101 72 | net = resnet101() 73 | elif args.net == 'resnet152': 74 | from models.resnet import resnet152 75 | net = resnet152() 76 | elif args.net == 'preactresnet18': 77 | from models.preactresnet import preactresnet18 78 | net = preactresnet18() 79 | elif args.net == 'preactresnet34': 80 | from models.preactresnet import preactresnet34 81 | net = preactresnet34() 82 | elif args.net == 'preactresnet50': 83 | from models.preactresnet import preactresnet50 84 | net = preactresnet50() 85 | elif args.net == 'preactresnet101': 86 | from models.preactresnet import preactresnet101 87 | net = preactresnet101() 88 | elif args.net == 'preactresnet152': 89 | from models.preactresnet import preactresnet152 90 | net = preactresnet152() 91 | elif args.net == 'resnext50': 92 | from models.resnext import resnext50 93 | net = resnext50() 94 | elif args.net == 'resnext101': 95 | from models.resnext import resnext101 96 | net = resnext101() 97 | elif args.net == 'resnext152': 98 | from models.resnext import resnext152 99 | net = resnext152() 100 | elif args.net == 'shufflenet': 101 | from models.shufflenet import shufflenet 102 | net = shufflenet() 103 | elif args.net == 'shufflenetv2': 104 | from models.shufflenetv2 import shufflenetv2 105 | net = shufflenetv2() 106 | elif args.net == 'squeezenet': 107 | from models.squeezenet import squeezenet 108 | net = squeezenet() 109 | elif args.net == 'mobilenet': 110 | from models.mobilenet import mobilenet 111 | net = mobilenet() 112 | elif args.net == 'mobilenetv2': 113 | from models.mobilenetv2 import mobilenetv2 114 | net = mobilenetv2() 115 | elif args.net == 'nasnet': 116 | from models.nasnet import nasnet 117 | net = nasnet() 118 | elif args.net == 'attention56': 119 | from models.attention import attention56 120 | net = attention56() 121 | elif args.net == 'attention92': 122 | from models.attention import attention92 123 | net = attention92() 124 | elif args.net == 'seresnet18': 125 | from models.senet import seresnet18 126 | net = seresnet18() 127 | elif args.net == 'seresnet34': 128 | from models.senet import seresnet34 129 | net = seresnet34() 130 | elif args.net == 'seresnet50': 131 | from models.senet import seresnet50 132 | net = seresnet50() 133 | elif args.net == 'seresnet101': 134 | from models.senet import seresnet101 135 | net = seresnet101() 136 | elif args.net == 'seresnet152': 137 | from models.senet import seresnet152 138 | net = seresnet152() 139 | elif args.net == 'efficientnet': 140 | from efficientnet_pytorch import EfficientNet 141 | net = EfficientNet.from_pretrained('efficientnet-b1', num_classes=2) 142 | elif args.net == 'efficientnet-b2': 143 | from efficientnet_pytorch import EfficientNet 144 | net = EfficientNet.from_pretrained('efficientnet-b2', num_classes=2) 145 | 146 | else: 147 | print('the network name you have entered is not supported yet') 148 | sys.exit() 149 | 150 | if use_gpu: 151 | net = net.cuda() 152 | 153 | return net 154 | 155 | 156 | def get_training_dataloader(mean, std, batch_size=16, num_workers=2, shuffle=True): 157 | """ return training dataloader 158 | Args: 159 | mean: mean of cifar100 training dataset 160 | std: std of cifar100 training dataset 161 | path: path to cifar100 training python dataset 162 | batch_size: dataloader batchsize 163 | num_workers: dataloader num_works 164 | shuffle: whether to shuffle 165 | Returns: train_data_loader:torch dataloader object 166 | """ 167 | 168 | transform_train = transforms.Compose([ 169 | #transforms.ToPILImage(), 170 | transforms.RandomCrop(32, padding=4), 171 | transforms.RandomHorizontalFlip(), 172 | transforms.RandomRotation(15), 173 | transforms.ToTensor(), 174 | transforms.Normalize(mean, std) 175 | ]) 176 | #cifar100_training = CIFAR100Train(path, transform=transform_train) 177 | cifar100_training = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train) 178 | cifar100_training_loader = DataLoader( 179 | cifar100_training, shuffle=shuffle, num_workers=num_workers, batch_size=batch_size) 180 | 181 | return cifar100_training_loader 182 | 183 | def get_test_dataloader(mean, std, batch_size=16, num_workers=2, shuffle=True): 184 | """ return training dataloader 185 | Args: 186 | mean: mean of cifar100 test dataset 187 | std: std of cifar100 test dataset 188 | path: path to cifar100 test python dataset 189 | batch_size: dataloader batchsize 190 | num_workers: dataloader num_works 191 | shuffle: whether to shuffle 192 | Returns: cifar100_test_loader:torch dataloader object 193 | """ 194 | 195 | transform_test = transforms.Compose([ 196 | transforms.ToTensor(), 197 | transforms.Normalize(mean, std) 198 | ]) 199 | #cifar100_test = CIFAR100Test(path, transform=transform_test) 200 | cifar100_test = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test) 201 | cifar100_test_loader = DataLoader( 202 | cifar100_test, shuffle=shuffle, num_workers=num_workers, batch_size=batch_size) 203 | 204 | return cifar100_test_loader 205 | 206 | def compute_mean_std(cifar100_dataset): 207 | """compute the mean and std of cifar100 dataset 208 | Args: 209 | cifar100_training_dataset or cifar100_test_dataset 210 | witch derived from class torch.utils.data 211 | 212 | Returns: 213 | a tuple contains mean, std value of entire dataset 214 | """ 215 | 216 | data_r = numpy.dstack([cifar100_dataset[i][1][:, :, 0] for i in range(len(cifar100_dataset))]) 217 | data_g = numpy.dstack([cifar100_dataset[i][1][:, :, 1] for i in range(len(cifar100_dataset))]) 218 | data_b = numpy.dstack([cifar100_dataset[i][1][:, :, 2] for i in range(len(cifar100_dataset))]) 219 | mean = numpy.mean(data_r), numpy.mean(data_g), numpy.mean(data_b) 220 | std = numpy.std(data_r), numpy.std(data_g), numpy.std(data_b) 221 | 222 | return mean, std 223 | 224 | class WarmUpLR(_LRScheduler): 225 | """warmup_training learning rate scheduler 226 | Args: 227 | optimizer: optimzier(e.g. SGD) 228 | total_iters: totoal_iters of warmup phase 229 | """ 230 | def __init__(self, optimizer, total_iters, last_epoch=-1): 231 | 232 | self.total_iters = total_iters 233 | super().__init__(optimizer, last_epoch) 234 | 235 | def get_lr(self): 236 | """we will use the first m batches, and set the learning 237 | rate to base_lr * m / total_iters 238 | """ 239 | return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs] 240 | 241 | 242 | 243 | 244 | class Logger(object): 245 | def __init__(self): 246 | self.terminal = sys.stdout #stdout 247 | self.file = None 248 | 249 | def open(self, file, mode=None): 250 | if mode is None: mode ='w' 251 | self.file = open(file, mode) 252 | 253 | def write(self, message, is_terminal=1, is_file=1 ): 254 | if '\r' in message: is_file=0 255 | 256 | if is_terminal == 1: 257 | self.terminal.write(message) 258 | self.terminal.flush() 259 | #time.sleep(1) 260 | 261 | if is_file == 1: 262 | self.file.write(message) 263 | self.file.flush() 264 | 265 | def flush(self): 266 | # this flush method is needed for python 3 compatibility. 267 | # this handles the flush command by doing nothing. 268 | # you might want to specify some extra behavior here. 269 | pass 270 | -------------------------------------------------------------------------------- /models/nasnet.py: -------------------------------------------------------------------------------- 1 | """nasnet in pytorch 2 | 3 | 4 | 5 | [1] Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc V. Le 6 | 7 | Learning Transferable Architectures for Scalable Image Recognition 8 | https://arxiv.org/abs/1707.07012 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | class SeperableConv2d(nn.Module): 15 | 16 | def __init__(self, input_channels, output_channels, kernel_size, **kwargs): 17 | 18 | super().__init__() 19 | self.depthwise = nn.Conv2d( 20 | input_channels, 21 | input_channels, 22 | kernel_size, 23 | groups=input_channels, 24 | **kwargs 25 | ) 26 | 27 | self.pointwise = nn.Conv2d( 28 | input_channels, 29 | output_channels, 30 | 1 31 | ) 32 | def forward(self, x): 33 | x = self.depthwise(x) 34 | x = self.pointwise(x) 35 | 36 | return x 37 | 38 | class SeperableBranch(nn.Module): 39 | 40 | def __init__(self, input_channels, output_channels, kernel_size, **kwargs): 41 | """Adds 2 blocks of [relu-separable conv-batchnorm].""" 42 | super().__init__() 43 | self.block1 = nn.Sequential( 44 | nn.ReLU(), 45 | SeperableConv2d(input_channels, output_channels, kernel_size, **kwargs), 46 | nn.BatchNorm2d(output_channels) 47 | ) 48 | 49 | self.block2 = nn.Sequential( 50 | nn.ReLU(), 51 | SeperableConv2d(output_channels, output_channels, kernel_size, stride=1, padding=int(kernel_size / 2)), 52 | nn.BatchNorm2d(output_channels) 53 | ) 54 | 55 | def forward(self, x): 56 | x = self.block1(x) 57 | x = self.block2(x) 58 | 59 | return x 60 | 61 | class Fit(nn.Module): 62 | """Make the cell outputs compatible 63 | 64 | Args: 65 | prev_filters: filter number of tensor prev, needs to be modified 66 | filters: filter number of normal cell branch output filters 67 | """ 68 | 69 | def __init__(self, prev_filters, filters): 70 | super().__init__() 71 | self.relu = nn.ReLU() 72 | 73 | self.p1 = nn.Sequential( 74 | nn.AvgPool2d(1, stride=2), 75 | nn.Conv2d(prev_filters, int(filters / 2), 1) 76 | ) 77 | 78 | #make sure there is no information loss 79 | self.p2 = nn.Sequential( 80 | nn.ConstantPad2d((0, 1, 0, 1), 0), 81 | nn.ConstantPad2d((-1, 0, -1, 0), 0), #cropping 82 | nn.AvgPool2d(1, stride=2), 83 | nn.Conv2d(prev_filters, int(filters / 2), 1) 84 | ) 85 | 86 | self.bn = nn.BatchNorm2d(filters) 87 | 88 | self.dim_reduce = nn.Sequential( 89 | nn.ReLU(), 90 | nn.Conv2d(prev_filters, filters, 1), 91 | nn.BatchNorm2d(filters) 92 | ) 93 | 94 | self.filters = filters 95 | 96 | def forward(self, inputs): 97 | x, prev = inputs 98 | if prev is None: 99 | return x 100 | 101 | #image size does not match 102 | elif x.size(2) != prev.size(2): 103 | prev = self.relu(prev) 104 | p1 = self.p1(prev) 105 | p2 = self.p2(prev) 106 | prev = torch.cat([p1, p2], 1) 107 | prev = self.bn(prev) 108 | 109 | elif prev.size(1) != self.filters: 110 | prev = self.dim_reduce(prev) 111 | 112 | return prev 113 | 114 | 115 | class NormalCell(nn.Module): 116 | 117 | def __init__(self, x_in, prev_in, output_channels): 118 | super().__init__() 119 | 120 | self.dem_reduce = nn.Sequential( 121 | nn.ReLU(), 122 | nn.Conv2d(x_in, output_channels, 1, bias=False), 123 | nn.BatchNorm2d(output_channels) 124 | ) 125 | 126 | self.block1_left = SeperableBranch( 127 | output_channels, 128 | output_channels, 129 | kernel_size=3, 130 | padding=1, 131 | bias=False 132 | ) 133 | self.block1_right = nn.Sequential() 134 | 135 | self.block2_left = SeperableBranch( 136 | output_channels, 137 | output_channels, 138 | kernel_size=3, 139 | padding=1, 140 | bias=False 141 | ) 142 | self.block2_right = SeperableBranch( 143 | output_channels, 144 | output_channels, 145 | kernel_size=5, 146 | padding=2, 147 | bias=False 148 | ) 149 | 150 | self.block3_left = nn.AvgPool2d(3, stride=1, padding=1) 151 | self.block3_right = nn.Sequential() 152 | 153 | self.block4_left = nn.AvgPool2d(3, stride=1, padding=1) 154 | self.block4_right = nn.AvgPool2d(3, stride=1, padding=1) 155 | 156 | self.block5_left = SeperableBranch( 157 | output_channels, 158 | output_channels, 159 | kernel_size=5, 160 | padding=2, 161 | bias=False 162 | ) 163 | self.block5_right = SeperableBranch( 164 | output_channels, 165 | output_channels, 166 | kernel_size=3, 167 | padding=1, 168 | bias=False 169 | ) 170 | 171 | self.fit = Fit(prev_in, output_channels) 172 | 173 | def forward(self, x): 174 | x, prev = x 175 | 176 | #return transformed x as new x, and original x as prev 177 | #only prev tensor needs to be modified 178 | prev = self.fit((x, prev)) 179 | 180 | h = self.dem_reduce(x) 181 | 182 | x1 = self.block1_left(h) + self.block1_right(h) 183 | x2 = self.block2_left(prev) + self.block2_right(h) 184 | x3 = self.block3_left(h) + self.block3_right(h) 185 | x4 = self.block4_left(prev) + self.block4_right(prev) 186 | x5 = self.block5_left(prev) + self.block5_right(prev) 187 | 188 | return torch.cat([prev, x1, x2, x3, x4, x5], 1), x 189 | 190 | class ReductionCell(nn.Module): 191 | 192 | def __init__(self, x_in, prev_in, output_channels): 193 | super().__init__() 194 | 195 | self.dim_reduce = nn.Sequential( 196 | nn.ReLU(), 197 | nn.Conv2d(x_in, output_channels, 1), 198 | nn.BatchNorm2d(output_channels) 199 | ) 200 | 201 | #block1 202 | self.layer1block1_left = SeperableBranch(output_channels, output_channels, 7, stride=2, padding=3) 203 | self.layer1block1_right = SeperableBranch(output_channels, output_channels, 5, stride=2, padding=2) 204 | 205 | #block2 206 | self.layer1block2_left = nn.MaxPool2d(3, stride=2, padding=1) 207 | self.layer1block2_right = SeperableBranch(output_channels, output_channels, 7, stride=2, padding=3) 208 | 209 | #block3 210 | self.layer1block3_left = nn.AvgPool2d(3, 2, 1) 211 | self.layer1block3_right = SeperableBranch(output_channels, output_channels, 5, stride=2, padding=2) 212 | 213 | #block5 214 | self.layer2block1_left = nn.MaxPool2d(3, 2, 1) 215 | self.layer2block1_right = SeperableBranch(output_channels, output_channels, 3, stride=1, padding=1) 216 | 217 | #block4 218 | self.layer2block2_left = nn.AvgPool2d(3, 1, 1) 219 | self.layer2block2_right = nn.Sequential() 220 | 221 | self.fit = Fit(prev_in, output_channels) 222 | 223 | def forward(self, x): 224 | x, prev = x 225 | prev = self.fit((x, prev)) 226 | 227 | h = self.dim_reduce(x) 228 | 229 | layer1block1 = self.layer1block1_left(prev) + self.layer1block1_right(h) 230 | layer1block2 = self.layer1block2_left(h) + self.layer1block2_right(prev) 231 | layer1block3 = self.layer1block3_left(h) + self.layer1block3_right(prev) 232 | layer2block1 = self.layer2block1_left(h) + self.layer2block1_right(layer1block1) 233 | layer2block2 = self.layer2block2_left(layer1block1) + self.layer2block2_right(layer1block2) 234 | 235 | return torch.cat([ 236 | layer1block2, #https://github.com/keras-team/keras-applications/blob/master/keras_applications/nasnet.py line 739 237 | layer1block3, 238 | layer2block1, 239 | layer2block2 240 | ], 1), x 241 | 242 | 243 | class NasNetA(nn.Module): 244 | 245 | def __init__(self, repeat_cell_num, reduction_num, filters, stemfilter, class_num=100): 246 | super().__init__() 247 | 248 | self.stem = nn.Sequential( 249 | nn.Conv2d(3, stemfilter, 3, padding=1, bias=False), 250 | nn.BatchNorm2d(stemfilter) 251 | ) 252 | 253 | self.prev_filters = stemfilter 254 | self.x_filters = stemfilter 255 | self.filters = filters 256 | 257 | self.cell_layers = self._make_layers(repeat_cell_num, reduction_num) 258 | 259 | self.relu = nn.ReLU() 260 | self.avg = nn.AdaptiveAvgPool2d(1) 261 | self.fc = nn.Linear(self.filters * 6, class_num) 262 | 263 | 264 | def _make_normal(self, block, repeat, output): 265 | """make normal cell 266 | Args: 267 | block: cell type 268 | repeat: number of repeated normal cell 269 | output: output filters for each branch in normal cell 270 | Returns: 271 | stacked normal cells 272 | """ 273 | 274 | layers = [] 275 | for r in range(repeat): 276 | layers.append(block(self.x_filters, self.prev_filters, output)) 277 | self.prev_filters = self.x_filters 278 | self.x_filters = output * 6 #concatenate 6 branches 279 | 280 | return layers 281 | 282 | def _make_reduction(self, block, output): 283 | """make normal cell 284 | Args: 285 | block: cell type 286 | output: output filters for each branch in reduction cell 287 | Returns: 288 | reduction cell 289 | """ 290 | 291 | reduction = block(self.x_filters, self.prev_filters, output) 292 | self.prev_filters = self.x_filters 293 | self.x_filters = output * 4 #stack for 4 branches 294 | 295 | return reduction 296 | 297 | def _make_layers(self, repeat_cell_num, reduction_num): 298 | 299 | layers = [] 300 | for i in range(reduction_num): 301 | 302 | layers.extend(self._make_normal(NormalCell, repeat_cell_num, self.filters)) 303 | self.filters *= 2 304 | layers.append(self._make_reduction(ReductionCell, self.filters)) 305 | 306 | layers.extend(self._make_normal(NormalCell, repeat_cell_num, self.filters)) 307 | 308 | return nn.Sequential(*layers) 309 | 310 | 311 | def forward(self, x): 312 | 313 | x = self.stem(x) 314 | prev = None 315 | x, prev = self.cell_layers((x, prev)) 316 | x = self.relu(x) 317 | x = self.avg(x) 318 | x = x.view(x.size(0), -1) 319 | x = self.fc(x) 320 | 321 | return x 322 | 323 | 324 | def nasnet(): 325 | 326 | #stem filters must be 44, it's a pytorch workaround, cant change to other number 327 | return NasNetA(4, 2, 44, 44) 328 | 329 | -------------------------------------------------------------------------------- /models/inceptionv3.py: -------------------------------------------------------------------------------- 1 | """ inceptionv3 in pytorch 2 | 3 | 4 | [1] Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna 5 | 6 | Rethinking the Inception Architecture for Computer Vision 7 | https://arxiv.org/abs/1512.00567v3 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | 14 | class BasicConv2d(nn.Module): 15 | 16 | def __init__(self, input_channels, output_channels, **kwargs): 17 | super().__init__() 18 | self.conv = nn.Conv2d(input_channels, output_channels, bias=False, **kwargs) 19 | self.bn = nn.BatchNorm2d(output_channels) 20 | self.relu = nn.ReLU(inplace=True) 21 | 22 | def forward(self, x): 23 | x = self.conv(x) 24 | x = self.bn(x) 25 | x = self.relu(x) 26 | 27 | return x 28 | 29 | #same naive inception module 30 | class InceptionA(nn.Module): 31 | 32 | def __init__(self, input_channels, pool_features): 33 | super().__init__() 34 | self.branch1x1 = BasicConv2d(input_channels, 64, kernel_size=1) 35 | 36 | self.branch5x5 = nn.Sequential( 37 | BasicConv2d(input_channels, 48, kernel_size=1), 38 | BasicConv2d(48, 64, kernel_size=5, padding=2) 39 | ) 40 | 41 | self.branch3x3 = nn.Sequential( 42 | BasicConv2d(input_channels, 64, kernel_size=1), 43 | BasicConv2d(64, 96, kernel_size=3, padding=1), 44 | BasicConv2d(96, 96, kernel_size=3, padding=1) 45 | ) 46 | 47 | self.branchpool = nn.Sequential( 48 | nn.AvgPool2d(kernel_size=3, stride=1, padding=1), 49 | BasicConv2d(input_channels, pool_features, kernel_size=3, padding=1) 50 | ) 51 | 52 | def forward(self, x): 53 | 54 | #x -> 1x1(same) 55 | branch1x1 = self.branch1x1(x) 56 | 57 | #x -> 1x1 -> 5x5(same) 58 | branch5x5 = self.branch5x5(x) 59 | #branch5x5 = self.branch5x5_2(branch5x5) 60 | 61 | #x -> 1x1 -> 3x3 -> 3x3(same) 62 | branch3x3 = self.branch3x3(x) 63 | 64 | #x -> pool -> 1x1(same) 65 | branchpool = self.branchpool(x) 66 | 67 | outputs = [branch1x1, branch5x5, branch3x3, branchpool] 68 | 69 | return torch.cat(outputs, 1) 70 | 71 | #downsample 72 | #Factorization into smaller convolutions 73 | class InceptionB(nn.Module): 74 | 75 | def __init__(self, input_channels): 76 | super().__init__() 77 | 78 | self.branch3x3 = BasicConv2d(input_channels, 384, kernel_size=3, stride=2) 79 | 80 | self.branch3x3stack = nn.Sequential( 81 | BasicConv2d(input_channels, 64, kernel_size=1), 82 | BasicConv2d(64, 96, kernel_size=3, padding=1), 83 | BasicConv2d(96, 96, kernel_size=3, stride=2) 84 | ) 85 | 86 | self.branchpool = nn.MaxPool2d(kernel_size=3, stride=2) 87 | 88 | def forward(self, x): 89 | 90 | #x - > 3x3(downsample) 91 | branch3x3 = self.branch3x3(x) 92 | 93 | #x -> 3x3 -> 3x3(downsample) 94 | branch3x3stack = self.branch3x3stack(x) 95 | 96 | #x -> avgpool(downsample) 97 | branchpool = self.branchpool(x) 98 | 99 | #"""We can use two parallel stride 2 blocks: P and C. P is a pooling 100 | #layer (either average or maximum pooling) the activation, both of 101 | #them are stride 2 the filter banks of which are concatenated as in 102 | #figure 10.""" 103 | outputs = [branch3x3, branch3x3stack, branchpool] 104 | 105 | return torch.cat(outputs, 1) 106 | 107 | #Factorizing Convolutions with Large Filter Size 108 | class InceptionC(nn.Module): 109 | def __init__(self, input_channels, channels_7x7): 110 | super().__init__() 111 | self.branch1x1 = BasicConv2d(input_channels, 192, kernel_size=1) 112 | 113 | c7 = channels_7x7 114 | 115 | #In theory, we could go even further and argue that one can replace any n × n 116 | #convolution by a 1 × n convolution followed by a n × 1 convolution and the 117 | #computational cost saving increases dramatically as n grows (see figure 6). 118 | self.branch7x7 = nn.Sequential( 119 | BasicConv2d(input_channels, c7, kernel_size=1), 120 | BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)), 121 | BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)) 122 | ) 123 | 124 | self.branch7x7stack = nn.Sequential( 125 | BasicConv2d(input_channels, c7, kernel_size=1), 126 | BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)), 127 | BasicConv2d(c7, c7, kernel_size=(1, 7), padding=(0, 3)), 128 | BasicConv2d(c7, c7, kernel_size=(7, 1), padding=(3, 0)), 129 | BasicConv2d(c7, 192, kernel_size=(1, 7), padding=(0, 3)) 130 | ) 131 | 132 | self.branch_pool = nn.Sequential( 133 | nn.AvgPool2d(kernel_size=3, stride=1, padding=1), 134 | BasicConv2d(input_channels, 192, kernel_size=1), 135 | ) 136 | 137 | def forward(self, x): 138 | 139 | #x -> 1x1(same) 140 | branch1x1 = self.branch1x1(x) 141 | 142 | #x -> 1layer 1*7 and 7*1 (same) 143 | branch7x7 = self.branch7x7(x) 144 | 145 | #x-> 2layer 1*7 and 7*1(same) 146 | branch7x7stack = self.branch7x7stack(x) 147 | 148 | #x-> avgpool (same) 149 | branchpool = self.branch_pool(x) 150 | 151 | outputs = [branch1x1, branch7x7, branch7x7stack, branchpool] 152 | 153 | return torch.cat(outputs, 1) 154 | 155 | class InceptionD(nn.Module): 156 | 157 | def __init__(self, input_channels): 158 | super().__init__() 159 | 160 | self.branch3x3 = nn.Sequential( 161 | BasicConv2d(input_channels, 192, kernel_size=1), 162 | BasicConv2d(192, 320, kernel_size=3, stride=2) 163 | ) 164 | 165 | self.branch7x7 = nn.Sequential( 166 | BasicConv2d(input_channels, 192, kernel_size=1), 167 | BasicConv2d(192, 192, kernel_size=(1, 7), padding=(0, 3)), 168 | BasicConv2d(192, 192, kernel_size=(7, 1), padding=(3, 0)), 169 | BasicConv2d(192, 192, kernel_size=3, stride=2) 170 | ) 171 | 172 | self.branchpool = nn.AvgPool2d(kernel_size=3, stride=2) 173 | 174 | def forward(self, x): 175 | 176 | #x -> 1x1 -> 3x3(downsample) 177 | branch3x3 = self.branch3x3(x) 178 | 179 | #x -> 1x1 -> 1x7 -> 7x1 -> 3x3 (downsample) 180 | branch7x7 = self.branch7x7(x) 181 | 182 | #x -> avgpool (downsample) 183 | branchpool = self.branchpool(x) 184 | 185 | outputs = [branch3x3, branch7x7, branchpool] 186 | 187 | return torch.cat(outputs, 1) 188 | 189 | 190 | #same 191 | class InceptionE(nn.Module): 192 | def __init__(self, input_channels): 193 | super().__init__() 194 | self.branch1x1 = BasicConv2d(input_channels, 320, kernel_size=1) 195 | 196 | self.branch3x3_1 = BasicConv2d(input_channels, 384, kernel_size=1) 197 | self.branch3x3_2a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) 198 | self.branch3x3_2b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) 199 | 200 | self.branch3x3stack_1 = BasicConv2d(input_channels, 448, kernel_size=1) 201 | self.branch3x3stack_2 = BasicConv2d(448, 384, kernel_size=3, padding=1) 202 | self.branch3x3stack_3a = BasicConv2d(384, 384, kernel_size=(1, 3), padding=(0, 1)) 203 | self.branch3x3stack_3b = BasicConv2d(384, 384, kernel_size=(3, 1), padding=(1, 0)) 204 | 205 | self.branch_pool = nn.Sequential( 206 | nn.AvgPool2d(kernel_size=3, stride=1, padding=1), 207 | BasicConv2d(input_channels, 192, kernel_size=1) 208 | ) 209 | 210 | def forward(self, x): 211 | 212 | #x -> 1x1 (same) 213 | branch1x1 = self.branch1x1(x) 214 | 215 | # x -> 1x1 -> 3x1 216 | # x -> 1x1 -> 1x3 217 | # concatenate(3x1, 1x3) 218 | #"""7. Inception modules with expanded the filter bank outputs. 219 | #This architecture is used on the coarsest (8 × 8) grids to promote 220 | #high dimensional representations, as suggested by principle 221 | #2 of Section 2.""" 222 | branch3x3 = self.branch3x3_1(x) 223 | branch3x3 = [ 224 | self.branch3x3_2a(branch3x3), 225 | self.branch3x3_2b(branch3x3) 226 | ] 227 | branch3x3 = torch.cat(branch3x3, 1) 228 | 229 | # x -> 1x1 -> 3x3 -> 1x3 230 | # x -> 1x1 -> 3x3 -> 3x1 231 | #concatenate(1x3, 3x1) 232 | branch3x3stack = self.branch3x3stack_1(x) 233 | branch3x3stack = self.branch3x3stack_2(branch3x3stack) 234 | branch3x3stack = [ 235 | self.branch3x3stack_3a(branch3x3stack), 236 | self.branch3x3stack_3b(branch3x3stack) 237 | ] 238 | branch3x3stack = torch.cat(branch3x3stack, 1) 239 | 240 | branchpool = self.branch_pool(x) 241 | 242 | outputs = [branch1x1, branch3x3, branch3x3stack, branchpool] 243 | 244 | return torch.cat(outputs, 1) 245 | 246 | class InceptionV3(nn.Module): 247 | 248 | def __init__(self, num_classes=100): 249 | super().__init__() 250 | self.Conv2d_1a_3x3 = BasicConv2d(3, 32, kernel_size=3, padding=1) 251 | self.Conv2d_2a_3x3 = BasicConv2d(32, 32, kernel_size=3, padding=1) 252 | self.Conv2d_2b_3x3 = BasicConv2d(32, 64, kernel_size=3, padding=1) 253 | self.Conv2d_3b_1x1 = BasicConv2d(64, 80, kernel_size=1) 254 | self.Conv2d_4a_3x3 = BasicConv2d(80, 192, kernel_size=3) 255 | 256 | #naive inception module 257 | self.Mixed_5b = InceptionA(192, pool_features=32) 258 | self.Mixed_5c = InceptionA(256, pool_features=64) 259 | self.Mixed_5d = InceptionA(288, pool_features=64) 260 | 261 | #downsample 262 | self.Mixed_6a = InceptionB(288) 263 | 264 | self.Mixed_6b = InceptionC(768, channels_7x7=128) 265 | self.Mixed_6c = InceptionC(768, channels_7x7=160) 266 | self.Mixed_6d = InceptionC(768, channels_7x7=160) 267 | self.Mixed_6e = InceptionC(768, channels_7x7=192) 268 | 269 | #downsample 270 | self.Mixed_7a = InceptionD(768) 271 | 272 | self.Mixed_7b = InceptionE(1280) 273 | self.Mixed_7c = InceptionE(2048) 274 | 275 | #6*6 feature size 276 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 277 | self.dropout = nn.Dropout2d() 278 | self.linear = nn.Linear(2048, num_classes) 279 | 280 | def forward(self, x): 281 | 282 | #32 -> 30 283 | x = self.Conv2d_1a_3x3(x) 284 | x = self.Conv2d_2a_3x3(x) 285 | x = self.Conv2d_2b_3x3(x) 286 | x = self.Conv2d_3b_1x1(x) 287 | x = self.Conv2d_4a_3x3(x) 288 | 289 | #30 -> 30 290 | x = self.Mixed_5b(x) 291 | x = self.Mixed_5c(x) 292 | x = self.Mixed_5d(x) 293 | 294 | #30 -> 14 295 | #Efficient Grid Size Reduction to avoid representation 296 | #bottleneck 297 | x = self.Mixed_6a(x) 298 | 299 | #14 -> 14 300 | #"""In practice, we have found that employing this factorization does not 301 | #work well on early layers, but it gives very good results on medium 302 | #grid-sizes (On m × m feature maps, where m ranges between 12 and 20). 303 | #On that level, very good results can be achieved by using 1 × 7 convolutions 304 | #followed by 7 × 1 convolutions.""" 305 | x = self.Mixed_6b(x) 306 | x = self.Mixed_6c(x) 307 | x = self.Mixed_6d(x) 308 | x = self.Mixed_6e(x) 309 | 310 | #14 -> 6 311 | #Efficient Grid Size Reduction 312 | x = self.Mixed_7a(x) 313 | 314 | #6 -> 6 315 | #We are using this solution only on the coarsest grid, 316 | #since that is the place where producing high dimensional 317 | #sparse representation is the most critical as the ratio of 318 | #local processing (by 1 × 1 convolutions) is increased compared 319 | #to the spatial aggregation.""" 320 | x = self.Mixed_7b(x) 321 | x = self.Mixed_7c(x) 322 | 323 | #6 -> 1 324 | x = self.avgpool(x) 325 | x = self.dropout(x) 326 | x = x.view(x.size(0), -1) 327 | x = self.linear(x) 328 | return x 329 | 330 | 331 | def inceptionv3(): 332 | return InceptionV3() 333 | 334 | 335 | 336 | -------------------------------------------------------------------------------- /models/attention.py: -------------------------------------------------------------------------------- 1 | """residual attention network in pytorch 2 | 3 | 4 | 5 | [1] Fei Wang, Mengqing Jiang, Chen Qian, Shuo Yang, Cheng Li, Honggang Zhang, Xiaogang Wang, Xiaoou Tang 6 | 7 | Residual Attention Network for Image Classification 8 | https://arxiv.org/abs/1704.06904 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | #"""The Attention Module is built by pre-activation Residual Unit [11] with the 16 | #number of channels in each stage is the same as ResNet [10].""" 17 | 18 | class PreActResidualUnit(nn.Module): 19 | """PreAct Residual Unit 20 | Args: 21 | in_channels: residual unit input channel number 22 | out_channels: residual unit output channel numebr 23 | stride: stride of residual unit when stride = 2, downsample the featuremap 24 | """ 25 | 26 | def __init__(self, in_channels, out_channels, stride): 27 | super().__init__() 28 | 29 | bottleneck_channels = int(out_channels / 4) 30 | self.residual_function = nn.Sequential( 31 | #1x1 conv 32 | nn.BatchNorm2d(in_channels), 33 | nn.ReLU(inplace=True), 34 | nn.Conv2d(in_channels, bottleneck_channels, 1, stride), 35 | 36 | #3x3 conv 37 | nn.BatchNorm2d(bottleneck_channels), 38 | nn.ReLU(inplace=True), 39 | nn.Conv2d(bottleneck_channels, bottleneck_channels, 3, padding=1), 40 | 41 | #1x1 conv 42 | nn.BatchNorm2d(bottleneck_channels), 43 | nn.ReLU(inplace=True), 44 | nn.Conv2d(bottleneck_channels, out_channels, 1) 45 | ) 46 | 47 | self.shortcut = nn.Sequential() 48 | if stride != 2 or (in_channels != out_channels): 49 | self.shortcut = nn.Conv2d(in_channels, out_channels, 1, stride=stride) 50 | 51 | def forward(self, x): 52 | 53 | res = self.residual_function(x) 54 | shortcut = self.shortcut(x) 55 | 56 | return res + shortcut 57 | 58 | class AttentionModule1(nn.Module): 59 | 60 | def __init__(self, in_channels, out_channels, p=1, t=2, r=1): 61 | super().__init__() 62 | #"""The hyperparameter p denotes the number of preprocessing Residual 63 | #Units before splitting into trunk branch and mask branch. t denotes 64 | #the number of Residual Units in trunk branch. r denotes the number of 65 | #Residual Units between adjacent pooling layer in the mask branch.""" 66 | assert in_channels == out_channels 67 | 68 | self.pre = self._make_residual(in_channels, out_channels, p) 69 | self.trunk = self._make_residual(in_channels, out_channels, t) 70 | self.soft_resdown1 = self._make_residual(in_channels, out_channels, r) 71 | self.soft_resdown2 = self._make_residual(in_channels, out_channels, r) 72 | self.soft_resdown3 = self._make_residual(in_channels, out_channels, r) 73 | self.soft_resdown4 = self._make_residual(in_channels, out_channels, r) 74 | 75 | self.soft_resup1 = self._make_residual(in_channels, out_channels, r) 76 | self.soft_resup2 = self._make_residual(in_channels, out_channels, r) 77 | self.soft_resup3 = self._make_residual(in_channels, out_channels, r) 78 | self.soft_resup4 = self._make_residual(in_channels, out_channels, r) 79 | 80 | self.shortcut_short = PreActResidualUnit(in_channels, out_channels, 1) 81 | self.shortcut_long = PreActResidualUnit(in_channels, out_channels, 1) 82 | 83 | self.sigmoid = nn.Sequential( 84 | nn.BatchNorm2d(out_channels), 85 | nn.ReLU(inplace=True), 86 | nn.Conv2d(out_channels, out_channels, kernel_size=1), 87 | nn.BatchNorm2d(out_channels), 88 | nn.ReLU(inplace=True), 89 | nn.Conv2d(out_channels, out_channels, kernel_size=1), 90 | nn.Sigmoid() 91 | ) 92 | 93 | self.last = self._make_residual(in_channels, out_channels, p) 94 | 95 | def forward(self, x): 96 | ###We make the size of the smallest output map in each mask branch 7*7 to be consistent 97 | #with the smallest trunk output map size. 98 | ###Thus 3,2,1 max-pooling layers are used in mask branch with input size 56 * 56, 28 * 28, 14 * 14 respectively. 99 | x = self.pre(x) 100 | input_size = (x.size(2), x.size(3)) 101 | 102 | x_t = self.trunk(x) 103 | 104 | #first downsample out 28 105 | x_s = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) 106 | x_s = self.soft_resdown1(x_s) 107 | 108 | #28 shortcut 109 | shape1 = (x_s.size(2), x_s.size(3)) 110 | shortcut_long = self.shortcut_long(x_s) 111 | 112 | #seccond downsample out 14 113 | x_s = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) 114 | x_s = self.soft_resdown2(x_s) 115 | 116 | #14 shortcut 117 | shape2 = (x_s.size(2), x_s.size(3)) 118 | shortcut_short = self.soft_resdown3(x_s) 119 | 120 | #third downsample out 7 121 | x_s = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) 122 | x_s = self.soft_resdown3(x_s) 123 | 124 | #mid 125 | x_s = self.soft_resdown4(x_s) 126 | x_s = self.soft_resup1(x_s) 127 | 128 | #first upsample out 14 129 | x_s = self.soft_resup2(x_s) 130 | x_s = F.interpolate(x_s, size=shape2) 131 | x_s += shortcut_short 132 | 133 | #second upsample out 28 134 | x_s = self.soft_resup3(x_s) 135 | x_s = F.interpolate(x_s, size=shape1) 136 | x_s += shortcut_long 137 | 138 | #thrid upsample out 54 139 | x_s = self.soft_resup4(x_s) 140 | x_s = F.interpolate(x_s, size=input_size) 141 | 142 | x_s = self.sigmoid(x_s) 143 | x = (1 + x_s) * x_t 144 | x = self.last(x) 145 | 146 | return x 147 | 148 | def _make_residual(self, in_channels, out_channels, p): 149 | 150 | layers = [] 151 | for _ in range(p): 152 | layers.append(PreActResidualUnit(in_channels, out_channels, 1)) 153 | 154 | return nn.Sequential(*layers) 155 | 156 | class AttentionModule2(nn.Module): 157 | 158 | def __init__(self, in_channels, out_channels, p=1, t=2, r=1): 159 | super().__init__() 160 | #"""The hyperparameter p denotes the number of preprocessing Residual 161 | #Units before splitting into trunk branch and mask branch. t denotes 162 | #the number of Residual Units in trunk branch. r denotes the number of 163 | #Residual Units between adjacent pooling layer in the mask branch.""" 164 | assert in_channels == out_channels 165 | 166 | self.pre = self._make_residual(in_channels, out_channels, p) 167 | self.trunk = self._make_residual(in_channels, out_channels, t) 168 | self.soft_resdown1 = self._make_residual(in_channels, out_channels, r) 169 | self.soft_resdown2 = self._make_residual(in_channels, out_channels, r) 170 | self.soft_resdown3 = self._make_residual(in_channels, out_channels, r) 171 | 172 | self.soft_resup1 = self._make_residual(in_channels, out_channels, r) 173 | self.soft_resup2 = self._make_residual(in_channels, out_channels, r) 174 | self.soft_resup3 = self._make_residual(in_channels, out_channels, r) 175 | 176 | self.shortcut = PreActResidualUnit(in_channels, out_channels, 1) 177 | 178 | self.sigmoid = nn.Sequential( 179 | nn.BatchNorm2d(out_channels), 180 | nn.ReLU(inplace=True), 181 | nn.Conv2d(out_channels, out_channels, kernel_size=1), 182 | nn.BatchNorm2d(out_channels), 183 | nn.ReLU(inplace=True), 184 | nn.Conv2d(out_channels, out_channels, kernel_size=1), 185 | nn.Sigmoid() 186 | ) 187 | 188 | self.last = self._make_residual(in_channels, out_channels, p) 189 | 190 | def forward(self, x): 191 | x = self.pre(x) 192 | input_size = (x.size(2), x.size(3)) 193 | 194 | x_t = self.trunk(x) 195 | 196 | #first downsample out 14 197 | x_s = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) 198 | x_s = self.soft_resdown1(x_s) 199 | 200 | #14 shortcut 201 | shape1 = (x_s.size(2), x_s.size(3)) 202 | shortcut = self.shortcut(x_s) 203 | 204 | #seccond downsample out 7 205 | x_s = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) 206 | x_s = self.soft_resdown2(x_s) 207 | 208 | #mid 209 | x_s = self.soft_resdown3(x_s) 210 | x_s = self.soft_resup1(x_s) 211 | 212 | #first upsample out 14 213 | x_s = self.soft_resup2(x_s) 214 | x_s = F.interpolate(x_s, size=shape1) 215 | x_s += shortcut 216 | 217 | #second upsample out 28 218 | x_s = self.soft_resup3(x_s) 219 | x_s = F.interpolate(x_s, size=input_size) 220 | 221 | x_s = self.sigmoid(x_s) 222 | x = (1 + x_s) * x_t 223 | x = self.last(x) 224 | 225 | return x 226 | 227 | def _make_residual(self, in_channels, out_channels, p): 228 | 229 | layers = [] 230 | for _ in range(p): 231 | layers.append(PreActResidualUnit(in_channels, out_channels, 1)) 232 | 233 | return nn.Sequential(*layers) 234 | 235 | class AttentionModule3(nn.Module): 236 | 237 | def __init__(self, in_channels, out_channels, p=1, t=2, r=1): 238 | super().__init__() 239 | 240 | assert in_channels == out_channels 241 | 242 | self.pre = self._make_residual(in_channels, out_channels, p) 243 | self.trunk = self._make_residual(in_channels, out_channels, t) 244 | self.soft_resdown1 = self._make_residual(in_channels, out_channels, r) 245 | self.soft_resdown2 = self._make_residual(in_channels, out_channels, r) 246 | 247 | self.soft_resup1 = self._make_residual(in_channels, out_channels, r) 248 | self.soft_resup2 = self._make_residual(in_channels, out_channels, r) 249 | 250 | self.shortcut = PreActResidualUnit(in_channels, out_channels, 1) 251 | 252 | self.sigmoid = nn.Sequential( 253 | nn.BatchNorm2d(out_channels), 254 | nn.ReLU(inplace=True), 255 | nn.Conv2d(out_channels, out_channels, kernel_size=1), 256 | nn.BatchNorm2d(out_channels), 257 | nn.ReLU(inplace=True), 258 | nn.Conv2d(out_channels, out_channels, kernel_size=1), 259 | nn.Sigmoid() 260 | ) 261 | 262 | self.last = self._make_residual(in_channels, out_channels, p) 263 | 264 | def forward(self, x): 265 | x = self.pre(x) 266 | input_size = (x.size(2), x.size(3)) 267 | 268 | x_t = self.trunk(x) 269 | 270 | #first downsample out 14 271 | x_s = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) 272 | x_s = self.soft_resdown1(x_s) 273 | 274 | #mid 275 | x_s = self.soft_resdown2(x_s) 276 | x_s = self.soft_resup1(x_s) 277 | 278 | #first upsample out 14 279 | x_s = self.soft_resup2(x_s) 280 | x_s = F.interpolate(x_s, size=input_size) 281 | 282 | x_s = self.sigmoid(x_s) 283 | x = (1 + x_s) * x_t 284 | x = self.last(x) 285 | 286 | return x 287 | 288 | def _make_residual(self, in_channels, out_channels, p): 289 | 290 | layers = [] 291 | for _ in range(p): 292 | layers.append(PreActResidualUnit(in_channels, out_channels, 1)) 293 | 294 | return nn.Sequential(*layers) 295 | 296 | class Attention(nn.Module): 297 | """residual attention netowrk 298 | Args: 299 | block_num: attention module number for each stage 300 | """ 301 | 302 | def __init__(self, block_num, class_num=100): 303 | 304 | super().__init__() 305 | self.pre_conv = nn.Sequential( 306 | nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1), 307 | nn.BatchNorm2d(64), 308 | nn.ReLU(inplace=True) 309 | ) 310 | 311 | self.stage1 = self._make_stage(64, 256, block_num[0], AttentionModule1) 312 | self.stage2 = self._make_stage(256, 512, block_num[1], AttentionModule2) 313 | self.stage3 = self._make_stage(512, 1024, block_num[2], AttentionModule3) 314 | self.stage4 = nn.Sequential( 315 | PreActResidualUnit(1024, 2048, 2), 316 | PreActResidualUnit(2048, 2048, 1), 317 | PreActResidualUnit(2048, 2048, 1) 318 | ) 319 | self.avg = nn.AdaptiveAvgPool2d(1) 320 | self.linear = nn.Linear(2048, 100) 321 | 322 | def forward(self, x): 323 | x = self.pre_conv(x) 324 | x = self.stage1(x) 325 | x = self.stage2(x) 326 | x = self.stage3(x) 327 | x = self.stage4(x) 328 | x = self.avg(x) 329 | x = x.view(x.size(0), -1) 330 | x = self.linear(x) 331 | 332 | return x 333 | 334 | def _make_stage(self, in_channels, out_channels, num, block): 335 | 336 | layers = [] 337 | layers.append(PreActResidualUnit(in_channels, out_channels, 2)) 338 | 339 | for _ in range(num): 340 | layers.append(block(out_channels, out_channels)) 341 | 342 | return nn.Sequential(*layers) 343 | 344 | def attention56(): 345 | return Attention([1, 1, 1]) 346 | 347 | def attention92(): 348 | return Attention([1, 2, 3]) 349 | 350 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # train.py 2 | #!/usr/bin/env python3 3 | 4 | """ train network using pytorch 5 | 6 | author baiyu 7 | """ 8 | 9 | import os 10 | import sys 11 | import argparse 12 | from datetime import datetime 13 | 14 | import numpy as np 15 | import torch 16 | import torch.nn as nn 17 | import torch.optim as optim 18 | import torchvision 19 | import torchvision.transforms as transforms 20 | 21 | from torch.utils.data import DataLoader 22 | #from dataset import * 23 | from torch.autograd import Variable 24 | 25 | from tensorboardX import SummaryWriter 26 | 27 | from dataset.ImgLoader import ImgLoader 28 | from class_balanced_loss import CB_loss 29 | from imbalanced_dataset_sampler import ImbalancedDatasetSampler 30 | from collections import Counter 31 | from conf import settings 32 | from utils import get_network, Logger, WarmUpLR 33 | import roc 34 | 35 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 36 | 37 | def train(epoch): 38 | 39 | net.train() 40 | for batch_index, (images, labels) in enumerate(train_loader): 41 | if epoch <= args.warm: 42 | warmup_scheduler.step() 43 | 44 | cnt = Counter(np.array(labels)) 45 | samples_per_cls = [] 46 | samples_per_cls.append(cnt[0]) 47 | samples_per_cls.append(cnt[1]) 48 | 49 | images = Variable(images) 50 | labels = Variable(labels) 51 | 52 | labels = labels.cuda() 53 | images = images.cuda() 54 | 55 | optimizer.zero_grad() 56 | outputs = net(images) 57 | # loss = loss_function(outputs, labels) 58 | loss = CB_loss(labels, outputs, samples_per_cls, 2, 'softmax', 0.9999, 2.0) 59 | loss.backward() 60 | optimizer.step() 61 | 62 | n_iter = (epoch - 1) * len(train_loader) + batch_index + 1 63 | 64 | last_layer = list(net.children())[-1] 65 | # for name, para in last_layer.named_parameters(): 66 | # if 'weight' in name: 67 | # writer.add_scalar('LastLayerGradients/grad_norm2_weights', para.grad.norm(), n_iter) 68 | # if 'bias' in name: 69 | # writer.add_scalar('LastLayerGradients/grad_norm2_bias', para.grad.norm(), n_iter) 70 | 71 | print('Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format( 72 | loss.item(), 73 | optimizer.param_groups[0]['lr'], 74 | epoch=epoch, 75 | trained_samples=batch_index * args.tb + len(images), 76 | total_samples=len(train_loader.dataset) 77 | )) 78 | 79 | #update training loss for each iteration 80 | writer.add_scalar('Train/loss', loss.item(), n_iter) 81 | 82 | for name, param in net.named_parameters(): 83 | layer, attr = os.path.splitext(name) 84 | attr = attr[1:] 85 | writer.add_histogram("{}/{}".format(layer, attr), param, epoch) 86 | 87 | def eval_training(epoch): 88 | net.eval() 89 | 90 | test_loss = 0.0 # cost function error 91 | correct = 0.0 92 | 93 | result_list = [] 94 | label_list = [] 95 | TP = 0. 96 | TN = 0. 97 | FP = 0. 98 | FN = 0. 99 | 100 | 101 | for (images, labels) in test_loader: 102 | images = Variable(images) 103 | labels = Variable(labels) 104 | 105 | images = images.cuda() 106 | labels = labels.cuda() 107 | 108 | outputs = net(images) 109 | loss = loss_function(outputs, labels) 110 | # loss = CB_loss(labels, outputs, samples_per_cls, 2, 'softmax', 0.9999, 2.0) 111 | test_loss += loss.item() 112 | _, preds = outputs.max(1) 113 | correct += preds.eq(labels).sum() 114 | 115 | for i in range(len(preds)): 116 | if labels[i] == 1 and preds[i] == 1: 117 | TP += 1 118 | elif labels[i] == 0 and preds[i] == 0: 119 | TN += 1 120 | elif labels[i] == 1 and preds[i] == 0: 121 | FN += 1 122 | elif labels[i] == 0 and preds[i] == 1: 123 | FP += 1 124 | 125 | 126 | 127 | outputs = torch.softmax(outputs, dim=-1) 128 | preds_prob = outputs.to('cpu').detach().numpy() 129 | labels = labels.to('cpu').detach().numpy() 130 | for i_batch in range(preds.shape[0]): 131 | result_list.append(preds_prob[i_batch, 1]) 132 | label_list.append(labels[i_batch]) 133 | 134 | TP_rate = float(TP / (TP + FN)) 135 | TN_rate = float(TN / (TN + FP)) 136 | 137 | HTER = 1 - (TP_rate + TN_rate) / 2 138 | metric = roc.cal_metric(label_list, result_list, False) 139 | 140 | # print('Test set: Average loss: {:.4f}, Accuracy: {:.4f}, Auc: {:.4f}, HTER: {:.4f}'.format( 141 | # test_loss / len(test_loader.dataset), 142 | # correct.float() / len(test_loader.dataset), 143 | # metric[2], HTER 144 | # )) 145 | log.write('Test set: Average loss: {:.4f}, Accuracy: {:.4f}, Auc: {:.4f}, HTER: {:.4f}'.format( 146 | test_loss / len(test_loader.dataset), 147 | correct.float() / len(test_loader.dataset), 148 | metric[2], HTER 149 | )) 150 | print() 151 | 152 | #add informations to tensorboard 153 | writer.add_scalar('Test/Average loss', test_loss / len(test_loader.dataset), epoch) 154 | writer.add_scalar('Test/Accuracy', correct.float() / len(test_loader.dataset), epoch) 155 | 156 | return correct.float() / len(test_loader.dataset) 157 | 158 | def eval_turn(model, dataloader, epoch): 159 | 160 | model.train(False) 161 | 162 | TP = 0. 163 | TN = 0. 164 | FP = 0. 165 | FN = 0. 166 | 167 | val_corrects = 0 168 | item_count = len(dataloader.dataset) 169 | with torch.no_grad(): 170 | for cnt, data in enumerate(dataloader, 0): 171 | 172 | img, label = data 173 | batch_size = img.size(0) 174 | 175 | 176 | preds = model(img.cuda()) 177 | 178 | preds_ = preds.data.max(1)[1] 179 | batch_correct = preds_.eq(label.cuda().data).cpu().sum() 180 | 181 | val_corrects += batch_correct 182 | 183 | for i in range(len(preds_)): 184 | if label[i] == 1 and preds_[i] == 1: 185 | TP += 1 186 | elif label[i] == 0 and preds_[i] == 0: 187 | TN += 1 188 | elif label[i] == 1 and preds_[i] == 0: 189 | FN += 1 190 | elif label[i] == 0 and preds_[i] == 1: 191 | FP += 1 192 | 193 | TP_rate = float(TP / (TP + FN)) 194 | TN_rate = float(TN / (TN + FP)) 195 | 196 | HTER = 1 - (TP_rate + TN_rate) / 2 197 | 198 | # print('total eval item {:d}'.format(item_count)) 199 | val_acc = float(float(val_corrects) / (item_count)) 200 | 201 | # print('acc: %.4f, total item: %d, correct item: %d, TP rate: %.4f, TN rate: %.4f, HTER : %.4f' % (val_acc, item_count, val_corrects, TP_rate, TN_rate, HTER)) 202 | print('epoch: %d acc: %.4f, total item: %d, correct item: %d, TP rate: %.4f, TN rate: %.4f, HTER : %.4f \n' % (epoch, val_acc, item_count, val_corrects, TP_rate, TN_rate, HTER)) 203 | 204 | return val_acc 205 | 206 | 207 | if __name__ == '__main__': 208 | 209 | parser = argparse.ArgumentParser() 210 | parser.add_argument('-net', type=str, default='resnet18', help='net type') 211 | parser.add_argument('-gpu', type=bool, default=True, help='use gpu or not') 212 | parser.add_argument('-w', type=int, default=2, help='number of workers for dataloader') 213 | parser.add_argument('-tb', type=int, default=64, help='batch size for train dataloader') 214 | parser.add_argument('-vb', type=int, default=32, help='batch size for val dataloader') 215 | parser.add_argument('-s', type=bool, default=True, help='whether shuffle the dataset') 216 | parser.add_argument('-warm', type=int, default=1, help='warm up training phase') 217 | parser.add_argument('-lr', type=float, default=0.01, help='initial learning rate') 218 | parser.add_argument('-train_list', type=str, default='4train_list.txt', help='initial learning rate') 219 | parser.add_argument('-test_list', type=str, default='2test_list.txt', help='initial learning rate') 220 | parser.add_argument('-root_folder', type=str, default='/home/gqwang/Spoof_Croped', help='initial learning rate') 221 | args = parser.parse_args() 222 | 223 | net = get_network(args, use_gpu=args.gpu) 224 | 225 | out_dir = './logs' 226 | log = Logger() 227 | log.open(os.path.join(out_dir, args.net + '4to5.txt'), mode='a') 228 | 229 | #data preprocessing: 230 | # cifar100_training_loader = get_training_dataloader( 231 | # settings.CIFAR100_TRAIN_MEAN, 232 | # settings.CIFAR100_TRAIN_STD, 233 | # num_workers=args.w, 234 | # batch_size=args.b, 235 | # shuffle=args.s 236 | # ) 237 | # 238 | # cifar100_test_loader = get_test_dataloader( 239 | # settings.CIFAR100_TRAIN_MEAN, 240 | # settings.CIFAR100_TRAIN_STD, 241 | # num_workers=args.w, 242 | # batch_size=args.b, 243 | # shuffle=args.s 244 | # ) 245 | 246 | train_dataset = ImgLoader(args.root_folder, os.path.join(args.root_folder, args.train_list), 247 | transforms.Compose([ 248 | transforms.Resize(248), 249 | # transforms.RandomAffine(10), 250 | transforms.CenterCrop(248), 251 | transforms.RandomHorizontalFlip(), 252 | transforms.RandomRotation(15), 253 | transforms.ToTensor() 254 | # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 255 | # transforms.RandomRotation(15), 256 | # transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), 257 | 258 | ])) 259 | 260 | weights = [3 if label == 1 else 1 for data, label in train_dataset.items] 261 | from torch.utils.data.sampler import WeightedRandomSampler 262 | 263 | sampler = WeightedRandomSampler(weights, 264 | num_samples=len(train_dataset.items), 265 | replacement=True) 266 | train_loader = torch.utils.data.DataLoader(train_dataset, 267 | batch_size=args.tb, 268 | num_workers=2, 269 | # shuffle=True, 270 | sampler=ImbalancedDatasetSampler(train_dataset), 271 | pin_memory=True) 272 | 273 | test_dataset = ImgLoader(args.root_folder, os.path.join(args.root_folder, args.test_list), 274 | transforms.Compose([ 275 | transforms.Resize(248), 276 | # transforms.RandomAffine(10), 277 | transforms.CenterCrop(248), 278 | transforms.RandomRotation(15), 279 | transforms.ToTensor() 280 | # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 281 | 282 | ])) 283 | test_loader = torch.utils.data.DataLoader(test_dataset, 284 | batch_size=args.vb, 285 | num_workers=2, 286 | pin_memory=True) 287 | 288 | loss_function = nn.CrossEntropyLoss() 289 | # loss_function = CB_loss() 290 | optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) 291 | train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=settings.MILESTONES, gamma=0.2) #learning rate decay 292 | iter_per_epoch = len(train_loader) 293 | warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm) 294 | checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.net, settings.TIME_NOW) 295 | 296 | # use tensorboard 297 | if not os.path.exists(settings.LOG_DIR): 298 | os.mkdir(settings.LOG_DIR) 299 | writer = SummaryWriter(log_dir=os.path.join( 300 | settings.LOG_DIR, args.net, settings.TIME_NOW)) 301 | input_tensor = torch.Tensor(12, 3, 248, 248).cuda() 302 | writer.add_graph(net, Variable(input_tensor, requires_grad=True)) 303 | 304 | #create checkpoint folder to save model 305 | if not os.path.exists(checkpoint_path): 306 | os.makedirs(checkpoint_path) 307 | checkpoint_path = os.path.join(checkpoint_path, '{net}-{epoch}-{type}.pth') 308 | 309 | best_acc = 0.0 310 | for epoch in range(1, settings.EPOCH): 311 | if epoch > args.warm: 312 | train_scheduler.step(epoch) 313 | 314 | train(epoch) 315 | acc = eval_training(epoch) 316 | 317 | #start to save best performance model after learning rate decay to 0.01 318 | if epoch > settings.MILESTONES[1] and best_acc < acc: 319 | torch.save(net.state_dict(), checkpoint_path.format(net=args.net, epoch=epoch, type='best')) 320 | best_acc = acc 321 | continue 322 | 323 | if not epoch % settings.SAVE_EPOCH: 324 | torch.save(net.state_dict(), checkpoint_path.format(net=args.net, epoch=epoch, type='regular')) 325 | 326 | writer.close() 327 | -------------------------------------------------------------------------------- /train_wo_CB.py: -------------------------------------------------------------------------------- 1 | # train.py 2 | #!/usr/bin/env python3 3 | 4 | """ train network using pytorch 5 | 6 | author baiyu 7 | """ 8 | 9 | import os 10 | import sys 11 | import argparse 12 | from datetime import datetime 13 | 14 | import numpy as np 15 | import torch 16 | import torch.nn as nn 17 | import torch.optim as optim 18 | import torchvision 19 | import torchvision.transforms as transforms 20 | 21 | from torch.utils.data import DataLoader 22 | #from dataset import * 23 | from torch.autograd import Variable 24 | 25 | from tensorboardX import SummaryWriter 26 | 27 | from dataset.ImgLoader import ImgLoader 28 | from class_balanced_loss import CB_loss 29 | from imbalanced_dataset_sampler import ImbalancedDatasetSampler 30 | from collections import Counter 31 | from conf import settings 32 | from utils import get_network, Logger, WarmUpLR 33 | import roc 34 | 35 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 36 | 37 | def train(epoch): 38 | 39 | net.train() 40 | for batch_index, (images, labels) in enumerate(train_loader): 41 | if epoch <= args.warm: 42 | warmup_scheduler.step() 43 | 44 | cnt = Counter(np.array(labels)) 45 | samples_per_cls = [] 46 | samples_per_cls.append(cnt[0]) 47 | samples_per_cls.append(cnt[1]) 48 | 49 | images = Variable(images) 50 | labels = Variable(labels) 51 | 52 | labels = labels.cuda() 53 | images = images.cuda() 54 | 55 | optimizer.zero_grad() 56 | outputs = net(images) 57 | loss = loss_function(outputs, labels) 58 | # loss = CB_loss(labels, outputs, samples_per_cls, 2, 'softmax', 0.9999, 2.0) 59 | loss.backward() 60 | optimizer.step() 61 | 62 | n_iter = (epoch - 1) * len(train_loader) + batch_index + 1 63 | 64 | last_layer = list(net.children())[-1] 65 | # for name, para in last_layer.named_parameters(): 66 | # if 'weight' in name: 67 | # writer.add_scalar('LastLayerGradients/grad_norm2_weights', para.grad.norm(), n_iter) 68 | # if 'bias' in name: 69 | # writer.add_scalar('LastLayerGradients/grad_norm2_bias', para.grad.norm(), n_iter) 70 | 71 | print('Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format( 72 | loss.item(), 73 | optimizer.param_groups[0]['lr'], 74 | epoch=epoch, 75 | trained_samples=batch_index * args.tb + len(images), 76 | total_samples=len(train_loader.dataset) 77 | )) 78 | 79 | #update training loss for each iteration 80 | writer.add_scalar('Train/loss', loss.item(), n_iter) 81 | 82 | for name, param in net.named_parameters(): 83 | layer, attr = os.path.splitext(name) 84 | attr = attr[1:] 85 | writer.add_histogram("{}/{}".format(layer, attr), param, epoch) 86 | 87 | def eval_training(epoch): 88 | net.eval() 89 | 90 | test_loss = 0.0 # cost function error 91 | correct = 0.0 92 | 93 | result_list = [] 94 | label_list = [] 95 | TP = 0. 96 | TN = 0. 97 | FP = 0. 98 | FN = 0. 99 | 100 | 101 | for (images, labels) in test_loader: 102 | images = Variable(images) 103 | labels = Variable(labels) 104 | 105 | images = images.cuda() 106 | labels = labels.cuda() 107 | 108 | outputs = net(images) 109 | loss = loss_function(outputs, labels) 110 | # loss = CB_loss(labels, outputs, samples_per_cls, 2, 'softmax', 0.9999, 2.0) 111 | test_loss += loss.item() 112 | _, preds = outputs.max(1) 113 | correct += preds.eq(labels).sum() 114 | 115 | for i in range(len(preds)): 116 | if labels[i] == 1 and preds[i] == 1: 117 | TP += 1 118 | elif labels[i] == 0 and preds[i] == 0: 119 | TN += 1 120 | elif labels[i] == 1 and preds[i] == 0: 121 | FN += 1 122 | elif labels[i] == 0 and preds[i] == 1: 123 | FP += 1 124 | 125 | 126 | 127 | outputs = torch.softmax(outputs, dim=-1) 128 | preds_prob = outputs.to('cpu').detach().numpy() 129 | labels = labels.to('cpu').detach().numpy() 130 | for i_batch in range(preds.shape[0]): 131 | result_list.append(preds_prob[i_batch, 1]) 132 | label_list.append(labels[i_batch]) 133 | 134 | TP_rate = float(TP / (TP + FN)) 135 | TN_rate = float(TN / (TN + FP)) 136 | 137 | HTER = 1 - (TP_rate + TN_rate) / 2 138 | metric = roc.cal_metric(label_list, result_list, False) 139 | 140 | # print('Test set: Average loss: {:.4f}, Accuracy: {:.4f}, Auc: {:.4f}, HTER: {:.4f}'.format( 141 | # test_loss / len(test_loader.dataset), 142 | # correct.float() / len(test_loader.dataset), 143 | # metric[2], HTER 144 | # )) 145 | log.write('Test set: Average loss: {:.4f}, Accuracy: {:.4f}, Auc: {:.4f}, HTER: {:.4f}'.format( 146 | test_loss / len(test_loader.dataset), 147 | correct.float() / len(test_loader.dataset), 148 | metric[2], HTER 149 | )) 150 | print() 151 | 152 | #add informations to tensorboard 153 | writer.add_scalar('Test/Average loss', test_loss / len(test_loader.dataset), epoch) 154 | writer.add_scalar('Test/Accuracy', correct.float() / len(test_loader.dataset), epoch) 155 | 156 | return correct.float() / len(test_loader.dataset) 157 | 158 | def eval_turn(model, dataloader, epoch): 159 | 160 | model.train(False) 161 | 162 | TP = 0. 163 | TN = 0. 164 | FP = 0. 165 | FN = 0. 166 | 167 | val_corrects = 0 168 | item_count = len(dataloader.dataset) 169 | with torch.no_grad(): 170 | for cnt, data in enumerate(dataloader, 0): 171 | 172 | img, label = data 173 | batch_size = img.size(0) 174 | 175 | 176 | preds = model(img.cuda()) 177 | 178 | preds_ = preds.data.max(1)[1] 179 | batch_correct = preds_.eq(label.cuda().data).cpu().sum() 180 | 181 | val_corrects += batch_correct 182 | 183 | for i in range(len(preds_)): 184 | if label[i] == 1 and preds_[i] == 1: 185 | TP += 1 186 | elif label[i] == 0 and preds_[i] == 0: 187 | TN += 1 188 | elif label[i] == 1 and preds_[i] == 0: 189 | FN += 1 190 | elif label[i] == 0 and preds_[i] == 1: 191 | FP += 1 192 | 193 | TP_rate = float(TP / (TP + FN)) 194 | TN_rate = float(TN / (TN + FP)) 195 | 196 | HTER = 1 - (TP_rate + TN_rate) / 2 197 | 198 | # print('total eval item {:d}'.format(item_count)) 199 | val_acc = float(float(val_corrects) / (item_count)) 200 | 201 | # print('acc: %.4f, total item: %d, correct item: %d, TP rate: %.4f, TN rate: %.4f, HTER : %.4f' % (val_acc, item_count, val_corrects, TP_rate, TN_rate, HTER)) 202 | print('epoch: %d acc: %.4f, total item: %d, correct item: %d, TP rate: %.4f, TN rate: %.4f, HTER : %.4f \n' % (epoch, val_acc, item_count, val_corrects, TP_rate, TN_rate, HTER)) 203 | 204 | return val_acc 205 | 206 | 207 | if __name__ == '__main__': 208 | 209 | parser = argparse.ArgumentParser() 210 | parser.add_argument('-net', type=str, default='resnet18', help='net type') 211 | parser.add_argument('-gpu', type=bool, default=True, help='use gpu or not') 212 | parser.add_argument('-w', type=int, default=2, help='number of workers for dataloader') 213 | parser.add_argument('-tb', type=int, default=64, help='batch size for train dataloader') 214 | parser.add_argument('-vb', type=int, default=32, help='batch size for val dataloader') 215 | parser.add_argument('-s', type=bool, default=True, help='whether shuffle the dataset') 216 | parser.add_argument('-warm', type=int, default=1, help='warm up training phase') 217 | parser.add_argument('-lr', type=float, default=0.01, help='initial learning rate') 218 | parser.add_argument('-train_list', type=str, default='4train_list.txt', help='initial learning rate') 219 | parser.add_argument('-test_list', type=str, default='2test_list.txt', help='initial learning rate') 220 | parser.add_argument('-root_folder', type=str, default='/home/gqwang/Spoof_Croped', help='initial learning rate') 221 | args = parser.parse_args() 222 | 223 | net = get_network(args, use_gpu=args.gpu) 224 | 225 | out_dir = './logs' 226 | log = Logger() 227 | log.open(os.path.join(out_dir, args.net + '4to5_wo_CB.txt'), mode='a') 228 | 229 | #data preprocessing: 230 | # cifar100_training_loader = get_training_dataloader( 231 | # settings.CIFAR100_TRAIN_MEAN, 232 | # settings.CIFAR100_TRAIN_STD, 233 | # num_workers=args.w, 234 | # batch_size=args.b, 235 | # shuffle=args.s 236 | # ) 237 | # 238 | # cifar100_test_loader = get_test_dataloader( 239 | # settings.CIFAR100_TRAIN_MEAN, 240 | # settings.CIFAR100_TRAIN_STD, 241 | # num_workers=args.w, 242 | # batch_size=args.b, 243 | # shuffle=args.s 244 | # ) 245 | 246 | train_dataset = ImgLoader(args.root_folder, os.path.join(args.root_folder, args.train_list), 247 | transforms.Compose([ 248 | transforms.Resize(248), 249 | # transforms.RandomAffine(10), 250 | transforms.CenterCrop(248), 251 | transforms.RandomHorizontalFlip(), 252 | transforms.RandomRotation(15), 253 | transforms.ToTensor() 254 | # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 255 | # transforms.RandomRotation(15), 256 | # transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), 257 | 258 | ])) 259 | 260 | weights = [3 if label == 1 else 1 for data, label in train_dataset.items] 261 | from torch.utils.data.sampler import WeightedRandomSampler 262 | 263 | sampler = WeightedRandomSampler(weights, 264 | num_samples=len(train_dataset.items), 265 | replacement=True) 266 | train_loader = torch.utils.data.DataLoader(train_dataset, 267 | batch_size=args.tb, 268 | num_workers=2, 269 | # shuffle=True, 270 | sampler=ImbalancedDatasetSampler(train_dataset), 271 | pin_memory=True) 272 | 273 | test_dataset = ImgLoader(args.root_folder, os.path.join(args.root_folder, args.test_list), 274 | transforms.Compose([ 275 | transforms.Resize(248), 276 | # transforms.RandomAffine(10), 277 | transforms.CenterCrop(248), 278 | transforms.RandomRotation(15), 279 | transforms.ToTensor() 280 | # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 281 | 282 | ])) 283 | test_loader = torch.utils.data.DataLoader(test_dataset, 284 | batch_size=args.vb, 285 | num_workers=2, 286 | pin_memory=True) 287 | 288 | loss_function = nn.CrossEntropyLoss() 289 | # loss_function = CB_loss() 290 | optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) 291 | train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=settings.MILESTONES, gamma=0.2) #learning rate decay 292 | iter_per_epoch = len(train_loader) 293 | warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm) 294 | checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.net, settings.TIME_NOW) 295 | 296 | # use tensorboard 297 | if not os.path.exists(settings.LOG_DIR): 298 | os.mkdir(settings.LOG_DIR) 299 | writer = SummaryWriter(log_dir=os.path.join( 300 | settings.LOG_DIR, args.net, settings.TIME_NOW)) 301 | input_tensor = torch.Tensor(12, 3, 248, 248).cuda() 302 | writer.add_graph(net, Variable(input_tensor, requires_grad=True)) 303 | 304 | #create checkpoint folder to save model 305 | if not os.path.exists(checkpoint_path): 306 | os.makedirs(checkpoint_path) 307 | checkpoint_path = os.path.join(checkpoint_path, '{net}-{epoch}-{type}.pth') 308 | 309 | best_acc = 0.0 310 | for epoch in range(1, settings.EPOCH): 311 | if epoch > args.warm: 312 | train_scheduler.step(epoch) 313 | 314 | train(epoch) 315 | acc = eval_training(epoch) 316 | 317 | #start to save best performance model after learning rate decay to 0.01 318 | if epoch > settings.MILESTONES[1] and best_acc < acc: 319 | torch.save(net.state_dict(), checkpoint_path.format(net=args.net, epoch=epoch, type='best')) 320 | best_acc = acc 321 | continue 322 | 323 | if not epoch % settings.SAVE_EPOCH: 324 | torch.save(net.state_dict(), checkpoint_path.format(net=args.net, epoch=epoch, type='regular')) 325 | 326 | writer.close() 327 | -------------------------------------------------------------------------------- /train_wo_CB_weight.py: -------------------------------------------------------------------------------- 1 | # train.py 2 | #!/usr/bin/env python3 3 | 4 | """ train network using pytorch 5 | 6 | author baiyu 7 | """ 8 | 9 | import os 10 | import sys 11 | import argparse 12 | from datetime import datetime 13 | 14 | import numpy as np 15 | import torch 16 | import torch.nn as nn 17 | import torch.optim as optim 18 | import torchvision 19 | import torchvision.transforms as transforms 20 | 21 | from torch.utils.data import DataLoader 22 | #from dataset import * 23 | from torch.autograd import Variable 24 | 25 | from tensorboardX import SummaryWriter 26 | 27 | from dataset.ImgLoader import ImgLoader 28 | from class_balanced_loss import CB_loss 29 | from imbalanced_dataset_sampler import ImbalancedDatasetSampler 30 | from collections import Counter 31 | from conf import settings 32 | from utils import get_network, Logger, WarmUpLR 33 | import roc 34 | 35 | os.environ["CUDA_VISIBLE_DEVICES"] = "2" 36 | 37 | def train(epoch): 38 | 39 | net.train() 40 | for batch_index, (images, labels) in enumerate(train_loader): 41 | if epoch <= args.warm: 42 | warmup_scheduler.step() 43 | 44 | cnt = Counter(np.array(labels)) 45 | samples_per_cls = [] 46 | samples_per_cls.append(cnt[0]) 47 | samples_per_cls.append(cnt[1]) 48 | 49 | images = Variable(images) 50 | labels = Variable(labels) 51 | 52 | labels = labels.cuda() 53 | images = images.cuda() 54 | 55 | optimizer.zero_grad() 56 | outputs = net(images) 57 | loss = loss_function(outputs, labels) 58 | # loss = CB_loss(labels, outputs, samples_per_cls, 2, 'softmax', 0.9999, 2.0) 59 | loss.backward() 60 | optimizer.step() 61 | 62 | n_iter = (epoch - 1) * len(train_loader) + batch_index + 1 63 | 64 | last_layer = list(net.children())[-1] 65 | # for name, para in last_layer.named_parameters(): 66 | # if 'weight' in name: 67 | # writer.add_scalar('LastLayerGradients/grad_norm2_weights', para.grad.norm(), n_iter) 68 | # if 'bias' in name: 69 | # writer.add_scalar('LastLayerGradients/grad_norm2_bias', para.grad.norm(), n_iter) 70 | 71 | print('Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format( 72 | loss.item(), 73 | optimizer.param_groups[0]['lr'], 74 | epoch=epoch, 75 | trained_samples=batch_index * args.tb + len(images), 76 | total_samples=len(train_loader.dataset) 77 | )) 78 | 79 | #update training loss for each iteration 80 | writer.add_scalar('Train/loss', loss.item(), n_iter) 81 | 82 | for name, param in net.named_parameters(): 83 | layer, attr = os.path.splitext(name) 84 | attr = attr[1:] 85 | writer.add_histogram("{}/{}".format(layer, attr), param, epoch) 86 | 87 | def eval_training(epoch): 88 | net.eval() 89 | 90 | test_loss = 0.0 # cost function error 91 | correct = 0.0 92 | 93 | result_list = [] 94 | label_list = [] 95 | TP = 0. 96 | TN = 0. 97 | FP = 0. 98 | FN = 0. 99 | 100 | 101 | for (images, labels) in test_loader: 102 | images = Variable(images) 103 | labels = Variable(labels) 104 | 105 | images = images.cuda() 106 | labels = labels.cuda() 107 | 108 | outputs = net(images) 109 | loss = loss_function(outputs, labels) 110 | # loss = CB_loss(labels, outputs, samples_per_cls, 2, 'softmax', 0.9999, 2.0) 111 | test_loss += loss.item() 112 | _, preds = outputs.max(1) 113 | correct += preds.eq(labels).sum() 114 | 115 | for i in range(len(preds)): 116 | if labels[i] == 1 and preds[i] == 1: 117 | TP += 1 118 | elif labels[i] == 0 and preds[i] == 0: 119 | TN += 1 120 | elif labels[i] == 1 and preds[i] == 0: 121 | FN += 1 122 | elif labels[i] == 0 and preds[i] == 1: 123 | FP += 1 124 | 125 | 126 | 127 | outputs = torch.softmax(outputs, dim=-1) 128 | preds_prob = outputs.to('cpu').detach().numpy() 129 | labels = labels.to('cpu').detach().numpy() 130 | for i_batch in range(preds.shape[0]): 131 | result_list.append(preds_prob[i_batch, 1]) 132 | label_list.append(labels[i_batch]) 133 | 134 | TP_rate = float(TP / (TP + FN)) 135 | TN_rate = float(TN / (TN + FP)) 136 | 137 | HTER = 1 - (TP_rate + TN_rate) / 2 138 | metric = roc.cal_metric(label_list, result_list, False) 139 | 140 | # print('Test set: Average loss: {:.4f}, Accuracy: {:.4f}, Auc: {:.4f}, HTER: {:.4f}'.format( 141 | # test_loss / len(test_loader.dataset), 142 | # correct.float() / len(test_loader.dataset), 143 | # metric[2], HTER 144 | # )) 145 | log.write('Test set: Average loss: {:.4f}, Accuracy: {:.4f}, Auc: {:.4f}, HTER: {:.4f}'.format( 146 | test_loss / len(test_loader.dataset), 147 | correct.float() / len(test_loader.dataset), 148 | metric[2], HTER 149 | )) 150 | print() 151 | 152 | #add informations to tensorboard 153 | writer.add_scalar('Test/Average loss', test_loss / len(test_loader.dataset), epoch) 154 | writer.add_scalar('Test/Accuracy', correct.float() / len(test_loader.dataset), epoch) 155 | 156 | return correct.float() / len(test_loader.dataset) 157 | 158 | def eval_turn(model, dataloader, epoch): 159 | 160 | model.train(False) 161 | 162 | TP = 0. 163 | TN = 0. 164 | FP = 0. 165 | FN = 0. 166 | 167 | val_corrects = 0 168 | item_count = len(dataloader.dataset) 169 | with torch.no_grad(): 170 | for cnt, data in enumerate(dataloader, 0): 171 | 172 | img, label = data 173 | batch_size = img.size(0) 174 | 175 | 176 | preds = model(img.cuda()) 177 | 178 | preds_ = preds.data.max(1)[1] 179 | batch_correct = preds_.eq(label.cuda().data).cpu().sum() 180 | 181 | val_corrects += batch_correct 182 | 183 | for i in range(len(preds_)): 184 | if label[i] == 1 and preds_[i] == 1: 185 | TP += 1 186 | elif label[i] == 0 and preds_[i] == 0: 187 | TN += 1 188 | elif label[i] == 1 and preds_[i] == 0: 189 | FN += 1 190 | elif label[i] == 0 and preds_[i] == 1: 191 | FP += 1 192 | 193 | TP_rate = float(TP / (TP + FN)) 194 | TN_rate = float(TN / (TN + FP)) 195 | 196 | HTER = 1 - (TP_rate + TN_rate) / 2 197 | 198 | # print('total eval item {:d}'.format(item_count)) 199 | val_acc = float(float(val_corrects) / (item_count)) 200 | 201 | # print('acc: %.4f, total item: %d, correct item: %d, TP rate: %.4f, TN rate: %.4f, HTER : %.4f' % (val_acc, item_count, val_corrects, TP_rate, TN_rate, HTER)) 202 | print('epoch: %d acc: %.4f, total item: %d, correct item: %d, TP rate: %.4f, TN rate: %.4f, HTER : %.4f \n' % (epoch, val_acc, item_count, val_corrects, TP_rate, TN_rate, HTER)) 203 | 204 | return val_acc 205 | 206 | 207 | if __name__ == '__main__': 208 | 209 | parser = argparse.ArgumentParser() 210 | parser.add_argument('-net', type=str, default='resnet18', help='net type') 211 | parser.add_argument('-gpu', type=bool, default=True, help='use gpu or not') 212 | parser.add_argument('-w', type=int, default=2, help='number of workers for dataloader') 213 | parser.add_argument('-tb', type=int, default=64, help='batch size for train dataloader') 214 | parser.add_argument('-vb', type=int, default=32, help='batch size for val dataloader') 215 | parser.add_argument('-s', type=bool, default=True, help='whether shuffle the dataset') 216 | parser.add_argument('-warm', type=int, default=1, help='warm up training phase') 217 | parser.add_argument('-lr', type=float, default=0.01, help='initial learning rate') 218 | parser.add_argument('-train_list', type=str, default='4train_list.txt', help='initial learning rate') 219 | parser.add_argument('-test_list', type=str, default='2test_list.txt', help='initial learning rate') 220 | parser.add_argument('-root_folder', type=str, default='/home/gqwang/Spoof_Croped', help='initial learning rate') 221 | args = parser.parse_args() 222 | 223 | net = get_network(args, use_gpu=args.gpu) 224 | 225 | out_dir = './logs' 226 | log = Logger() 227 | log.open(os.path.join(out_dir, args.net + '4to5_wo_CB_weight.txt'), mode='a') 228 | 229 | #data preprocessing: 230 | # cifar100_training_loader = get_training_dataloader( 231 | # settings.CIFAR100_TRAIN_MEAN, 232 | # settings.CIFAR100_TRAIN_STD, 233 | # num_workers=args.w, 234 | # batch_size=args.b, 235 | # shuffle=args.s 236 | # ) 237 | # 238 | # cifar100_test_loader = get_test_dataloader( 239 | # settings.CIFAR100_TRAIN_MEAN, 240 | # settings.CIFAR100_TRAIN_STD, 241 | # num_workers=args.w, 242 | # batch_size=args.b, 243 | # shuffle=args.s 244 | # ) 245 | 246 | train_dataset = ImgLoader(args.root_folder, os.path.join(args.root_folder, args.train_list), 247 | transforms.Compose([ 248 | transforms.Resize(248), 249 | # transforms.RandomAffine(10), 250 | transforms.CenterCrop(248), 251 | transforms.RandomHorizontalFlip(), 252 | transforms.RandomRotation(15), 253 | transforms.ToTensor() 254 | # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 255 | # transforms.RandomRotation(15), 256 | # transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), 257 | 258 | ])) 259 | 260 | weights = [3 if label == 1 else 1 for data, label in train_dataset.items] 261 | from torch.utils.data.sampler import WeightedRandomSampler 262 | 263 | sampler = WeightedRandomSampler(weights, 264 | num_samples=len(train_dataset.items), 265 | replacement=True) 266 | train_loader = torch.utils.data.DataLoader(train_dataset, 267 | batch_size=args.tb, 268 | num_workers=2, 269 | shuffle=True, 270 | # sampler=ImbalancedDatasetSampler(train_dataset), 271 | pin_memory=True) 272 | 273 | test_dataset = ImgLoader(args.root_folder, os.path.join(args.root_folder, args.test_list), 274 | transforms.Compose([ 275 | transforms.Resize(248), 276 | # transforms.RandomAffine(10), 277 | transforms.CenterCrop(248), 278 | transforms.RandomRotation(15), 279 | transforms.ToTensor() 280 | # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 281 | 282 | ])) 283 | test_loader = torch.utils.data.DataLoader(test_dataset, 284 | batch_size=args.vb, 285 | num_workers=2, 286 | pin_memory=True) 287 | 288 | loss_function = nn.CrossEntropyLoss() 289 | # loss_function = CB_loss() 290 | optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) 291 | train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=settings.MILESTONES, gamma=0.2) #learning rate decay 292 | iter_per_epoch = len(train_loader) 293 | warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm) 294 | checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.net, settings.TIME_NOW) 295 | 296 | # use tensorboard 297 | if not os.path.exists(settings.LOG_DIR): 298 | os.mkdir(settings.LOG_DIR) 299 | writer = SummaryWriter(log_dir=os.path.join( 300 | settings.LOG_DIR, args.net, settings.TIME_NOW)) 301 | input_tensor = torch.Tensor(12, 3, 248, 248).cuda() 302 | writer.add_graph(net, Variable(input_tensor, requires_grad=True)) 303 | 304 | #create checkpoint folder to save model 305 | if not os.path.exists(checkpoint_path): 306 | os.makedirs(checkpoint_path) 307 | checkpoint_path = os.path.join(checkpoint_path, '{net}-{epoch}-{type}.pth') 308 | 309 | best_acc = 0.0 310 | for epoch in range(1, settings.EPOCH): 311 | if epoch > args.warm: 312 | train_scheduler.step(epoch) 313 | 314 | train(epoch) 315 | acc = eval_training(epoch) 316 | 317 | #start to save best performance model after learning rate decay to 0.01 318 | if epoch > settings.MILESTONES[1] and best_acc < acc: 319 | torch.save(net.state_dict(), checkpoint_path.format(net=args.net, epoch=epoch, type='best')) 320 | best_acc = acc 321 | continue 322 | 323 | if not epoch % settings.SAVE_EPOCH: 324 | torch.save(net.state_dict(), checkpoint_path.format(net=args.net, epoch=epoch, type='regular')) 325 | 326 | writer.close() 327 | -------------------------------------------------------------------------------- /models/resnet.py: -------------------------------------------------------------------------------- 1 | # """resnet in pytorch 2 | # 3 | # 4 | # 5 | # [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. 6 | # 7 | # Deep Residual Learning for Image Recognition 8 | # https://arxiv.org/abs/1512.03385v1 9 | # """ 10 | # 11 | # import torch 12 | # import torch.nn as nn 13 | # 14 | # class BasicBlock(nn.Module): 15 | # """Basic Block for resnet 18 and resnet 34 16 | # 17 | # """ 18 | # 19 | # #BasicBlock and BottleNeck block 20 | # #have different output size 21 | # #we use class attribute expansion 22 | # #to distinct 23 | # expansion = 1 24 | # 25 | # def __init__(self, in_channels, out_channels, stride=1): 26 | # super().__init__() 27 | # 28 | # #residual function 29 | # self.residual_function = nn.Sequential( 30 | # nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False), 31 | # nn.BatchNorm2d(out_channels), 32 | # nn.ReLU(inplace=True), 33 | # nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, padding=1, bias=False), 34 | # nn.BatchNorm2d(out_channels * BasicBlock.expansion) 35 | # ) 36 | # 37 | # #shortcut 38 | # self.shortcut = nn.Sequential() 39 | # 40 | # #the shortcut output dimension is not the same with residual function 41 | # #use 1*1 convolution to match the dimension 42 | # if stride != 1 or in_channels != BasicBlock.expansion * out_channels: 43 | # self.shortcut = nn.Sequential( 44 | # nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False), 45 | # nn.BatchNorm2d(out_channels * BasicBlock.expansion) 46 | # ) 47 | # 48 | # def forward(self, x): 49 | # return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x)) 50 | # 51 | # class BottleNeck(nn.Module): 52 | # """Residual block for resnet over 50 layers 53 | # 54 | # """ 55 | # expansion = 4 56 | # def __init__(self, in_channels, out_channels, stride=1): 57 | # super().__init__() 58 | # self.residual_function = nn.Sequential( 59 | # nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False), 60 | # nn.BatchNorm2d(out_channels), 61 | # nn.ReLU(inplace=True), 62 | # nn.Conv2d(out_channels, out_channels, stride=stride, kernel_size=3, padding=1, bias=False), 63 | # nn.BatchNorm2d(out_channels), 64 | # nn.ReLU(inplace=True), 65 | # nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, bias=False), 66 | # nn.BatchNorm2d(out_channels * BottleNeck.expansion), 67 | # ) 68 | # 69 | # self.shortcut = nn.Sequential() 70 | # 71 | # if stride != 1 or in_channels != out_channels * BottleNeck.expansion: 72 | # self.shortcut = nn.Sequential( 73 | # nn.Conv2d(in_channels, out_channels * BottleNeck.expansion, stride=stride, kernel_size=1, bias=False), 74 | # nn.BatchNorm2d(out_channels * BottleNeck.expansion) 75 | # ) 76 | # 77 | # def forward(self, x): 78 | # return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x)) 79 | # 80 | # class ResNet(nn.Module): 81 | # 82 | # def __init__(self, block, num_block, num_classes=2): 83 | # super().__init__() 84 | # 85 | # self.in_channels = 64 86 | # 87 | # self.conv1 = nn.Sequential( 88 | # nn.Conv2d(3, 64, kernel_size=3, padding=1, bias=False), 89 | # nn.BatchNorm2d(64), 90 | # nn.ReLU(inplace=True)) 91 | # #we use a different inputsize than the original paper 92 | # #so conv2_x's stride is 1 93 | # self.conv2_x = self._make_layer(block, 64, num_block[0], 1) 94 | # self.conv3_x = self._make_layer(block, 128, num_block[1], 2) 95 | # self.conv4_x = self._make_layer(block, 256, num_block[2], 2) 96 | # self.conv5_x = self._make_layer(block, 512, num_block[3], 2) 97 | # self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) 98 | # self.fc = nn.Linear(512 * block.expansion, num_classes) 99 | # 100 | # def _make_layer(self, block, out_channels, num_blocks, stride): 101 | # """make resnet layers(by layer i didnt mean this 'layer' was the 102 | # same as a neuron netowork layer, ex. conv layer), one layer may 103 | # contain more than one residual block 104 | # 105 | # Args: 106 | # block: block type, basic block or bottle neck block 107 | # out_channels: output depth channel number of this layer 108 | # num_blocks: how many blocks per layer 109 | # stride: the stride of the first block of this layer 110 | # 111 | # Return: 112 | # return a resnet layer 113 | # """ 114 | # 115 | # # we have num_block blocks per layer, the first block 116 | # # could be 1 or 2, other blocks would always be 1 117 | # strides = [stride] + [1] * (num_blocks - 1) 118 | # layers = [] 119 | # for stride in strides: 120 | # layers.append(block(self.in_channels, out_channels, stride)) 121 | # self.in_channels = out_channels * block.expansion 122 | # 123 | # return nn.Sequential(*layers) 124 | # 125 | # def forward(self, x): 126 | # output = self.conv1(x) 127 | # output = self.conv2_x(output) 128 | # output = self.conv3_x(output) 129 | # output = self.conv4_x(output) 130 | # output = self.conv5_x(output) 131 | # output = self.avg_pool(output) 132 | # output = output.view(output.size(0), -1) 133 | # output = self.fc(output) 134 | # 135 | # return output 136 | # 137 | # def resnet18(): 138 | # """ return a ResNet 18 object 139 | # """ 140 | # return ResNet(BasicBlock, [2, 2, 2, 2]) 141 | # 142 | # def resnet34(): 143 | # """ return a ResNet 34 object 144 | # """ 145 | # return ResNet(BasicBlock, [3, 4, 6, 3]) 146 | # 147 | # def resnet50(): 148 | # """ return a ResNet 50 object 149 | # """ 150 | # return ResNet(BottleNeck, [3, 4, 6, 3]) 151 | # 152 | # def resnet101(): 153 | # """ return a ResNet 101 object 154 | # """ 155 | # return ResNet(BottleNeck, [3, 4, 23, 3]) 156 | # 157 | # def resnet152(): 158 | # """ return a ResNet 152 object 159 | # """ 160 | # return ResNet(BottleNeck, [3, 8, 36, 3]) 161 | # 162 | # 163 | # 164 | 165 | 166 | import torch.nn as nn 167 | import torch.utils.model_zoo as model_zoo 168 | import torch 169 | 170 | 171 | def conv3x3(in_planes, out_planes, stride=1): 172 | """3x3 convolution with padding""" 173 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 174 | padding=1, bias=False) 175 | 176 | 177 | def conv1x1(in_planes, out_planes, stride=1): 178 | """1x1 convolution""" 179 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 180 | 181 | 182 | def _make_deconv(inplanes, outplanes, kernel_size, stride, padding, bias=True): 183 | return nn.Sequential( 184 | nn.BatchNorm2d(inplanes), 185 | nn.ReLU(inplace=True), 186 | nn.ConvTranspose2d(inplanes, outplanes, kernel_size=kernel_size, 187 | stride=stride, padding=padding, bias=bias) ) 188 | 189 | 190 | def _make_conv(inplanes, outplanes, kernel_size=1, stride=1, padding=0, bias=True): 191 | return nn.Sequential( 192 | nn.BatchNorm2d(inplanes), 193 | nn.ReLU(inplace=True), 194 | nn.Conv2d(inplanes, outplanes, kernel_size=kernel_size, 195 | stride=stride, padding=padding, bias=bias) ) 196 | 197 | 198 | class BasicBlock(nn.Module): 199 | expansion = 1 200 | 201 | def __init__(self, inplanes, planes, stride=1, downsample=None): 202 | super(BasicBlock, self).__init__() 203 | self.conv1 = conv3x3(inplanes, planes, stride) 204 | self.bn1 = nn.BatchNorm2d(planes) 205 | self.relu = nn.ReLU(inplace=True) 206 | self.conv2 = conv3x3(planes, planes) 207 | self.bn2 = nn.BatchNorm2d(planes) 208 | self.downsample = downsample 209 | self.stride = stride 210 | 211 | def forward(self, x): 212 | residual = x 213 | 214 | out = self.conv1(x) 215 | out = self.bn1(out) 216 | out = self.relu(out) 217 | 218 | out = self.conv2(out) 219 | out = self.bn2(out) 220 | 221 | if self.downsample is not None: 222 | residual = self.downsample(x) 223 | 224 | out += residual 225 | out = self.relu(out) 226 | 227 | return out 228 | 229 | 230 | class Bottleneck(nn.Module): 231 | expansion = 4 232 | 233 | def __init__(self, inplanes, planes, stride=1, downsample=None): 234 | super(Bottleneck, self).__init__() 235 | self.conv1 = conv1x1(inplanes, planes) 236 | self.bn1 = nn.BatchNorm2d(planes) 237 | self.conv2 = conv3x3(planes, planes, stride) 238 | self.bn2 = nn.BatchNorm2d(planes) 239 | self.conv3 = conv1x1(planes, planes * self.expansion) 240 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 241 | self.relu = nn.ReLU(inplace=True) 242 | self.downsample = downsample 243 | self.stride = stride 244 | 245 | def forward(self, x): 246 | residual = x 247 | 248 | out = self.conv1(x) 249 | out = self.bn1(out) 250 | out = self.relu(out) 251 | 252 | out = self.conv2(out) 253 | out = self.bn2(out) 254 | out = self.relu(out) 255 | 256 | out = self.conv3(out) 257 | out = self.bn3(out) 258 | 259 | if self.downsample is not None: 260 | residual = self.downsample(x) 261 | 262 | out += residual 263 | out = self.relu(out) 264 | 265 | return out 266 | 267 | class ResNet(nn.Module): 268 | 269 | def __init__(self, block, layers, num_classes=1000): 270 | super(ResNet, self).__init__() 271 | 272 | self.restored = False 273 | self.inplanes = 64 274 | num_feats = 256 275 | self.bias = False 276 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 277 | bias=False) 278 | self.bn1 = nn.BatchNorm2d(64) 279 | self.relu = nn.ReLU(inplace=True) 280 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 281 | self.layer1 = self._make_layer(block, 64, layers[0]) 282 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 283 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 284 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 285 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 286 | self.fc = nn.Linear(512 * block.expansion, 2) 287 | # self.fc = nn.Sequential(nn.Linear(512 * block.expansion, 256), 288 | # nn.ReLU(), 289 | # nn.Linear(256, 128) 290 | # ) 291 | # self.fc = nn.Sequential(nn.Linear(512 * block.expansion, 256) 292 | # ) 293 | 294 | 295 | 296 | 297 | for m in self.modules(): 298 | if isinstance(m, nn.Conv2d): 299 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 300 | elif isinstance(m, nn.ConvTranspose2d): 301 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 302 | elif isinstance(m, nn.BatchNorm2d): 303 | nn.init.constant_(m.weight, 1) 304 | nn.init.constant_(m.bias, 0) 305 | 306 | def _make_layer(self, block, planes, blocks, stride=1): 307 | downsample = None 308 | if stride != 1 or self.inplanes != planes * block.expansion: 309 | downsample = nn.Sequential( 310 | conv1x1(self.inplanes, planes * block.expansion, stride), 311 | nn.BatchNorm2d(planes * block.expansion), 312 | ) 313 | 314 | layers = [] 315 | layers.append(block(self.inplanes, planes, stride, downsample)) 316 | self.inplanes = planes * block.expansion 317 | for _ in range(1, blocks): 318 | layers.append(block(self.inplanes, planes)) 319 | 320 | return nn.Sequential(*layers) 321 | 322 | def forward(self, x): 323 | x = self.conv1(x) 324 | x = self.bn1(x) 325 | x = self.relu(x) 326 | x = self.maxpool(x) 327 | 328 | x1 = self.layer1(x) 329 | x2 = self.layer2(x1) 330 | x3 = self.layer3(x2) 331 | x4 = self.layer4(x3) 332 | 333 | x = self.avgpool(x4) 334 | x = x.view(x.size(0), -1) 335 | x = self.fc(x) 336 | 337 | return x 338 | 339 | 340 | 341 | 342 | def resnet18(pretrained=False, **kwargs): 343 | """Constructs a ResNet-18 model. 344 | Args: 345 | pretrained (bool): If True, returns a model pre-trained on ImageNet 346 | """ 347 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 348 | return model 349 | 350 | def resnet34(pretrained=False, **kwargs): 351 | """ return a ResNet 34 object 352 | """ 353 | return ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 354 | 355 | def resnet50(pretrained=False, **kwargs): 356 | """ return a ResNet 50 object 357 | """ 358 | return ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 359 | 360 | def resnet101(pretrained=False, **kwargs): 361 | """ return a ResNet 101 object 362 | """ 363 | return ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 364 | 365 | def resnet152(pretrained=False, **kwargs): 366 | """ return a ResNet 152 object 367 | """ 368 | return ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 369 | 370 | 371 | if __name__ == '__main__': 372 | print(torch.__version__) 373 | x = torch.autograd.Variable(torch.Tensor(2, 3, 256, 256)) 374 | # model = resnet80(num_classes=41857) 375 | model = resnet34() 376 | print(model) 377 | out = model(x) 378 | print(out.shape) -------------------------------------------------------------------------------- /train_cefa.py: -------------------------------------------------------------------------------- 1 | # train.py 2 | # !/usr/bin/env python3 3 | 4 | """ train network using pytorch 5 | 6 | author baiyu 7 | """ 8 | 9 | import os 10 | import sys 11 | import argparse 12 | from datetime import datetime 13 | 14 | import numpy as np 15 | import torch 16 | import torch.nn as nn 17 | import torch.optim as optim 18 | import torchvision 19 | import torchvision.transforms as transforms 20 | 21 | from torch.utils.data import DataLoader 22 | # from dataset import * 23 | from torch.autograd import Variable 24 | 25 | from tensorboardX import SummaryWriter 26 | 27 | from dataset.ImgLoader import ImgLoader 28 | from class_balanced_loss import CB_loss 29 | from imbalanced_dataset_sampler import ImbalancedDatasetSampler 30 | from collections import Counter 31 | from conf import settings 32 | from utils import get_network, Logger, WarmUpLR 33 | from dataset.RGBImgLoader import RGBImgLoader 34 | import roc 35 | 36 | os.environ["CUDA_VISIBLE_DEVICES"] = "2" 37 | 38 | 39 | def train(epoch): 40 | net.train() 41 | for batch_index, (images, labels) in enumerate(train_loader): 42 | if epoch <= args.warm: 43 | warmup_scheduler.step() 44 | 45 | cnt = Counter(np.array(labels)) 46 | # samples_per_cls = [] 47 | # samples_per_cls.append(cnt[0]) 48 | # samples_per_cls.append(cnt[1]) 49 | 50 | images = Variable(images) 51 | labels = Variable(labels) 52 | 53 | labels = labels.cuda() 54 | images = images.cuda() 55 | 56 | optimizer.zero_grad() 57 | outputs = net(images) 58 | loss = loss_function(outputs, labels) 59 | # loss = CB_loss(labels, outputs, samples_per_cls, 2, 'softmax', 0.9999, 2.0) 60 | loss.backward() 61 | optimizer.step() 62 | 63 | n_iter = (epoch - 1) * len(train_loader) + batch_index + 1 64 | 65 | last_layer = list(net.children())[-1] 66 | # for name, para in last_layer.named_parameters(): 67 | # if 'weight' in name: 68 | # writer.add_scalar('LastLayerGradients/grad_norm2_weights', para.grad.norm(), n_iter) 69 | # if 'bias' in name: 70 | # writer.add_scalar('LastLayerGradients/grad_norm2_bias', para.grad.norm(), n_iter) 71 | 72 | print('Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format( 73 | loss.item(), 74 | optimizer.param_groups[0]['lr'], 75 | epoch=epoch, 76 | trained_samples=batch_index * args.tb + len(images), 77 | total_samples=len(train_loader.dataset) 78 | )) 79 | 80 | # update training loss for each iteration 81 | # writer.add_scalar('Train/loss', loss.item(), n_iter) 82 | 83 | # for name, param in net.named_parameters(): 84 | # layer, attr = os.path.splitext(name) 85 | # attr = attr[1:] 86 | # writer.add_histogram("{}/{}".format(layer, attr), param, epoch) 87 | 88 | 89 | def eval_training(epoch): 90 | net.eval() 91 | 92 | test_loss = 0.0 # cost function error 93 | correct = 0.0 94 | 95 | result_list = [] 96 | label_list = [] 97 | TP = 0. 98 | TN = 0. 99 | FP = 0. 100 | FN = 0. 101 | 102 | for (images, labels) in val_loader: 103 | images = Variable(images) 104 | labels = Variable(labels) 105 | 106 | images = images.cuda() 107 | labels = labels.cuda() 108 | 109 | outputs = net(images) 110 | loss = loss_function(outputs, labels) 111 | # loss = CB_loss(labels, outputs, samples_per_cls, 2, 'softmax', 0.9999, 2.0) 112 | test_loss += loss.item() 113 | _, preds = outputs.max(1) 114 | correct += preds.eq(labels).sum() 115 | 116 | for i in range(len(preds)): 117 | if labels[i] == 1 and preds[i] == 1: 118 | TP += 1 119 | elif labels[i] == 0 and preds[i] == 0: 120 | TN += 1 121 | elif labels[i] == 1 and preds[i] == 0: 122 | FN += 1 123 | elif labels[i] == 0 and preds[i] == 1: 124 | FP += 1 125 | 126 | outputs = torch.softmax(outputs, dim=-1) 127 | preds_prob = outputs.to('cpu').detach().numpy() 128 | labels = labels.to('cpu').detach().numpy() 129 | for i_batch in range(preds.shape[0]): 130 | result_list.append(preds_prob[i_batch, 1]) 131 | label_list.append(labels[i_batch]) 132 | 133 | TP_rate = float(TP / (TP + FN)) 134 | TN_rate = float(TN / (TN + FP)) 135 | 136 | HTER = 1 - (TP_rate + TN_rate) / 2 137 | metric = roc.cal_metric(label_list, result_list, False) 138 | 139 | # print('Test set: Average loss: {:.4f}, Accuracy: {:.4f}, Auc: {:.4f}, HTER: {:.4f}'.format( 140 | # test_loss / len(test_loader.dataset), 141 | # correct.float() / len(test_loader.dataset), 142 | # metric[2], HTER 143 | # )) 144 | log.write('Test set: Average loss: {:.4f}, Accuracy: {:.4f}, Auc: {:.4f}, HTER: {:.4f}'.format( 145 | test_loss / len(val_loader.dataset), 146 | correct.float() / len(val_loader.dataset), 147 | metric[2], HTER 148 | )) 149 | print() 150 | 151 | # # add informations to tensorboard 152 | # writer.add_scalar('Test/Average loss', test_loss / len(test_loader.dataset), epoch) 153 | # writer.add_scalar('Test/Accuracy', correct.float() / len(test_loader.dataset), epoch) 154 | 155 | return correct.float() / len(test_loader.dataset) 156 | 157 | 158 | def eval_turn(model, dataloader, epoch): 159 | model.train(False) 160 | 161 | TP = 0. 162 | TN = 0. 163 | FP = 0. 164 | FN = 0. 165 | 166 | val_corrects = 0 167 | item_count = len(dataloader.dataset) 168 | with torch.no_grad(): 169 | for cnt, data in enumerate(dataloader, 0): 170 | 171 | img, label = data 172 | batch_size = img.size(0) 173 | 174 | preds = model(img.cuda()) 175 | 176 | preds_ = preds.data.max(1)[1] 177 | batch_correct = preds_.eq(label.cuda().data).cpu().sum() 178 | 179 | val_corrects += batch_correct 180 | 181 | for i in range(len(preds_)): 182 | if label[i] == 1 and preds_[i] == 1: 183 | TP += 1 184 | elif label[i] == 0 and preds_[i] == 0: 185 | TN += 1 186 | elif label[i] == 1 and preds_[i] == 0: 187 | FN += 1 188 | elif label[i] == 0 and preds_[i] == 1: 189 | FP += 1 190 | 191 | TP_rate = float(TP / (TP + FN)) 192 | TN_rate = float(TN / (TN + FP)) 193 | 194 | HTER = 1 - (TP_rate + TN_rate) / 2 195 | 196 | # print('total eval item {:d}'.format(item_count)) 197 | val_acc = float(float(val_corrects) / (item_count)) 198 | 199 | # print('acc: %.4f, total item: %d, correct item: %d, TP rate: %.4f, TN rate: %.4f, HTER : %.4f' % (val_acc, item_count, val_corrects, TP_rate, TN_rate, HTER)) 200 | print('epoch: %d acc: %.4f, total item: %d, correct item: %d, TP rate: %.4f, TN rate: %.4f, HTER : %.4f \n' % ( 201 | epoch, val_acc, item_count, val_corrects, TP_rate, TN_rate, HTER)) 202 | 203 | return val_acc 204 | 205 | def generate_result(model, dataloader): 206 | data_write = open("./result.txt", 'a+') 207 | 208 | item_count = len(dataloader.dataset) 209 | with torch.no_grad(): 210 | for cnt, data in enumerate(dataloader, 0): 211 | 212 | img, label = data 213 | batch_size = img.size(0) 214 | 215 | preds = model(img.cuda()) 216 | 217 | probability = torch.nn.functional.softmax(preds[1], dim=1)[:, 1].detach().tolist() 218 | probability_value = np.array(probability) 219 | 220 | newline = img + "{:.8f}".format(probability_value[0]) + "\n" 221 | 222 | data_write.write(newline) 223 | 224 | data_write.close() 225 | 226 | if __name__ == '__main__': 227 | 228 | parser = argparse.ArgumentParser() 229 | parser.add_argument('-net', type=str, default='resnet18', help='net type') 230 | parser.add_argument('-gpu', type=bool, default=True, help='use gpu or not') 231 | parser.add_argument('-w', type=int, default=2, help='number of workers for dataloader') 232 | parser.add_argument('-tb', type=int, default=64, help='batch size for train dataloader') 233 | parser.add_argument('-vb', type=int, default=32, help='batch size for val dataloader') 234 | parser.add_argument('-s', type=bool, default=True, help='whether shuffle the dataset') 235 | parser.add_argument('-warm', type=int, default=1, help='warm up training phase') 236 | parser.add_argument('-lr', type=float, default=0.01, help='initial learning rate') 237 | # parser.add_argument('-train_list', type=str, default='4train_list.txt', help='initial learning rate') 238 | # parser.add_argument('-test_list', type=str, default='4test_list.txt', help='initial learning rate') 239 | # parser.add_argument('-root_folder', type=str, default='/home/gqwang/Spoof_Croped', help='initial learning rate') 240 | parser.add_argument('-train_list', type=str, default='4@all_train.txt', help='initial learning rate') 241 | parser.add_argument('-test_list', type=str, default='4@all_dev.txt', help='initial learning rate') 242 | parser.add_argument('-root_folder', type=str, default='/home/gqwang/Spoof_Croped/CASIA_CeFA', help='initial learning rate') 243 | args = parser.parse_args() 244 | 245 | net = get_network(args, use_gpu=args.gpu) 246 | 247 | out_dir = './logs' 248 | log = Logger() 249 | # log.open(os.path.join(out_dir, args.net + 'CeFA@3.txt'), mode='a') 250 | log.open(os.path.join(out_dir, args.net + 'CASIA@4.txt'), mode='a') 251 | 252 | # data preprocessing: 253 | # cifar100_training_loader = get_training_dataloader( 254 | # settings.CIFAR100_TRAIN_MEAN, 255 | # settings.CIFAR100_TRAIN_STD, 256 | # num_workers=args.w, 257 | # batch_size=args.b, 258 | # shuffle=args.s 259 | # ) 260 | # 261 | # cifar100_test_loader = get_test_dataloader( 262 | # settings.CIFAR100_TRAIN_MEAN, 263 | # settings.CIFAR100_TRAIN_STD, 264 | # num_workers=args.w, 265 | # batch_size=args.b, 266 | # shuffle=args.s 267 | # ) 268 | 269 | train_dataset = ImgLoader(args.root_folder, os.path.join(args.root_folder, args.train_list), 270 | transforms.Compose([ 271 | transforms.Resize(248), 272 | # transforms.RandomAffine(10), 273 | transforms.CenterCrop(248), 274 | transforms.RandomHorizontalFlip(), 275 | transforms.RandomRotation(15), 276 | transforms.ToTensor() 277 | # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 278 | # transforms.RandomRotation(15), 279 | # transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), 280 | 281 | ])) 282 | 283 | val_dataset = ImgLoader(args.root_folder, os.path.join(args.root_folder, args.test_list), 284 | transforms.Compose([ 285 | transforms.Resize(248), 286 | # transforms.RandomAffine(10), 287 | transforms.CenterCrop(248), 288 | transforms.RandomHorizontalFlip(), 289 | transforms.RandomRotation(15), 290 | transforms.ToTensor() 291 | # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 292 | # transforms.RandomRotation(15), 293 | # transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), 294 | 295 | ])) 296 | 297 | # train_size = int(0.8 * len(train_dataset)) 298 | # test_size = len(train_dataset) - train_size 299 | # train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, test_size]) 300 | 301 | # weights = [3 if label == 1 else 1 for data, label in train_dataset.items] 302 | # from torch.utils.data.sampler import WeightedRandomSampler 303 | # 304 | # sampler = WeightedRandomSampler(weights, 305 | # num_samples=len(train_dataset.items), 306 | # replacement=True) 307 | train_loader = torch.utils.data.DataLoader(train_dataset, 308 | batch_size=args.tb, 309 | num_workers=2, 310 | shuffle=True, 311 | # sampler=ImbalancedDatasetSampler(train_dataset), 312 | pin_memory=True) 313 | 314 | val_loader = torch.utils.data.DataLoader(val_dataset, 315 | batch_size=args.vb, 316 | num_workers=2, 317 | pin_memory=True) 318 | 319 | test_dataset = ImgLoader(args.root_folder, os.path.join(args.root_folder, args.test_list), 320 | transforms.Compose([ 321 | transforms.Resize(248), 322 | # transforms.RandomAffine(10), 323 | transforms.CenterCrop(248), 324 | transforms.RandomRotation(15), 325 | transforms.ToTensor() 326 | # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 327 | 328 | ]), stage='Test') 329 | test_loader = torch.utils.data.DataLoader(test_dataset, 330 | batch_size=args.vb, 331 | num_workers=2, 332 | pin_memory=True) 333 | 334 | loss_function = nn.CrossEntropyLoss() 335 | # loss_function = CB_loss() 336 | optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) 337 | train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=settings.MILESTONES, 338 | gamma=0.2) # learning rate decay 339 | iter_per_epoch = len(train_loader) 340 | warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm) 341 | checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.net, settings.TIME_NOW) 342 | 343 | # use tensorboard 344 | if not os.path.exists(settings.LOG_DIR): 345 | os.mkdir(settings.LOG_DIR) 346 | # writer = SummaryWriter(log_dir=os.path.join( 347 | # settings.LOG_DIR, args.net, settings.TIME_NOW)) 348 | # input_tensor = torch.Tensor(12, 3, 248, 248).cuda() 349 | # writer.add_graph(net, Variable(input_tensor, requires_grad=True)) 350 | 351 | # create checkpoint folder to save model 352 | if not os.path.exists(checkpoint_path): 353 | os.makedirs(checkpoint_path) 354 | checkpoint_path = os.path.join(checkpoint_path, '{net}-{epoch}-{type}.pth') 355 | 356 | best_acc = 0.0 357 | for epoch in range(1, settings.EPOCH): 358 | if epoch > args.warm: 359 | train_scheduler.step(epoch) 360 | 361 | train(epoch) 362 | acc = eval_training(epoch) 363 | 364 | # start to save best performance model after learning rate decay to 0.01 365 | if epoch > settings.MILESTONES[1] and best_acc < acc: 366 | torch.save(net.state_dict(), checkpoint_path.format(net=args.net, epoch=epoch, type='best')) 367 | best_acc = acc 368 | continue 369 | 370 | if not epoch % settings.SAVE_EPOCH: 371 | torch.save(net.state_dict(), checkpoint_path.format(net=args.net, epoch=epoch, type='regular')) 372 | 373 | # writer.close() 374 | --------------------------------------------------------------------------------