├── data ├── __init__.py ├── datasets │ ├── __init__.py │ └── idrnd.py └── transform.py ├── models ├── __init__.py ├── backbones │ ├── __init__.py │ ├── densenet.py │ ├── mobilenet.py │ ├── resnet.py │ └── senet.py ├── blocks.py └── encoders.py ├── utils ├── __init__.py ├── handlers.py └── storage.py ├── metrics ├── __init__.py └── classification.py ├── optimizers ├── __init__.py ├── lr_scheduler.py ├── sgdw.py └── adamw.py ├── __init__.py ├── losses ├── __init__.py ├── bce_loss.py └── focal_loss.py ├── meta.json ├── tmp ├── meta.json └── test.py ├── config ├── se_resnext50_bce.yaml ├── densenet121_bce.yaml ├── densenet121_focal.yaml ├── se_resnext101_focal.yaml └── se_resnext50_focal.yaml ├── README.md ├── LICENSE ├── test.py └── train.py /data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | cv2.setNumThreads(0) -------------------------------------------------------------------------------- /losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .bce_loss import * 2 | from .focal_loss import * -------------------------------------------------------------------------------- /meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "image": "ksanvatds/idrnd-antispoof", 3 | "entrypoint": "python3 test.py --path-images-csv $PATH_INPUT/meta.csv --path-test-dir $PATH_INPUT --path-submission-csv $PATH_OUTPUT/solution.csv" 4 | } 5 | -------------------------------------------------------------------------------- /tmp/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "image": "ksanvatds/idrnd-antispoof", 3 | "entrypoint": "python3 test.py --config config/se_resnext50_focal.yaml --path-images-csv $PATH_INPUT/meta.csv --path-test-dir $PATH_INPUT --path-submission-csv $PATH_OUTPUT/solution.csv" 4 | } 5 | -------------------------------------------------------------------------------- /config/se_resnext50_bce.yaml: -------------------------------------------------------------------------------- 1 | prefix: 'se_resnext50_bce' 2 | parallel: yes 3 | save_freq: 1 4 | num_workers: 4 5 | 6 | encoder: 'se_resnext50' 7 | input_size: 720 8 | out_features: 1 9 | pretrained: yes 10 | 11 | loss: 'bce' 12 | optimizer: 'Adam' 13 | 14 | learning_rate: 0.0001 15 | weight_decay: 0.0001 16 | momentum: 0.9 17 | batch_size: 8 18 | step: 4 19 | num_epochs: 20 20 | thresholds: 50 21 | tta: 1 22 | 23 | frames: [1, 5] 24 | 25 | train: 26 | folder: '/shared/datasets/faces/anti_spoofing/IDRnD/train/' 27 | 28 | snapshot: 29 | use: no 30 | epoch: 20 -------------------------------------------------------------------------------- /config/densenet121_bce.yaml: -------------------------------------------------------------------------------- 1 | prefix: 'densenet121_bce' 2 | parallel: yes 3 | save_freq: 1 4 | num_workers: 4 5 | 6 | encoder: 'densenet121' 7 | input_size: 1080 8 | out_features: 1 9 | pretrained: yes 10 | 11 | loss: 'bce' 12 | optimizer: 'Adam' 13 | 14 | learning_rate: 0.0001 15 | weight_decay: 0.0001 16 | momentum: 0.9 17 | batch_size: 8 18 | step: 4 19 | num_epochs: 20 20 | thresholds: 50 21 | tta: 1 22 | 23 | frames: [2, 4] 24 | 25 | train: 26 | folder: '/shared/datasets/faces/anti_spoofing/IDRnD/train/' 27 | 28 | snapshot: 29 | use: no 30 | epoch: 4 31 | -------------------------------------------------------------------------------- /config/densenet121_focal.yaml: -------------------------------------------------------------------------------- 1 | prefix: 'densenet121_focal' 2 | parallel: yes 3 | save_freq: 1 4 | num_workers: 4 5 | 6 | encoder: 'densenet121' 7 | input_size: 800 8 | out_features: 1 9 | pretrained: yes 10 | 11 | loss: 'focal' 12 | optimizer: 'Adam' 13 | 14 | learning_rate: 0.0001 15 | weight_decay: 0.0001 16 | momentum: 0.9 17 | batch_size: 8 18 | step: 4 19 | num_epochs: 20 20 | thresholds: 50 21 | tta: 1 22 | 23 | frames: [2, 3, 4] 24 | 25 | train: 26 | folder: '/shared/datasets/faces/anti_spoofing/IDRnD/train/' 27 | 28 | snapshot: 29 | use: no 30 | epoch: 4 -------------------------------------------------------------------------------- /config/se_resnext101_focal.yaml: -------------------------------------------------------------------------------- 1 | prefix: 'se_resnext101_focal' 2 | parallel: yes 3 | save_freq: 1 4 | num_workers: 4 5 | 6 | encoder: 'se_resnext101' 7 | input_size: 720 8 | out_features: 1 9 | pretrained: yes 10 | 11 | loss: 'focal' 12 | optimizer: 'Adam' 13 | 14 | learning_rate: 0.0001 15 | weight_decay: 0.0001 16 | momentum: 0.9 17 | batch_size: 8 18 | step: 4 19 | num_epochs: 20 20 | thresholds: 50 21 | tta: 1 22 | 23 | frames: [2, 3, 4] 24 | 25 | train: 26 | folder: '/shared/datasets/faces/anti_spoofing/IDRnD/train/' 27 | 28 | snapshot: 29 | use: no 30 | epoch: 20 -------------------------------------------------------------------------------- /config/se_resnext50_focal.yaml: -------------------------------------------------------------------------------- 1 | prefix: 'se_resnext50_focal' 2 | parallel: yes 3 | save_freq: 1 4 | num_workers: 4 5 | 6 | encoder: 'se_resnext50' 7 | input_size: 800 8 | out_features: 1 9 | pretrained: yes 10 | 11 | loss: 'focal' 12 | optimizer: 'Adam' 13 | 14 | learning_rate: 0.0001 15 | weight_decay: 0.0001 16 | momentum: 0.9 17 | batch_size: 8 18 | step: 4 19 | num_epochs: 20 20 | thresholds: 50 21 | tta: 1 22 | 23 | frames: [1, 3, 5] 24 | 25 | train: 26 | folder: '/shared/datasets/faces/anti_spoofing/IDRnD/train/' 27 | 28 | snapshot: 29 | use: no 30 | epoch: 20 31 | -------------------------------------------------------------------------------- /utils/handlers.py: -------------------------------------------------------------------------------- 1 | class AverageMeter(object): 2 | def __init__(self): 3 | self.reset() 4 | 5 | def reset(self): 6 | self.val = 0 7 | self.avg = 0 8 | self.sum = 0 9 | self.count = 0 10 | 11 | def update(self, val, n=1): 12 | self.val = val 13 | self.sum += val * n 14 | self.count += n 15 | self.avg = self.sum / self.count 16 | 17 | 18 | class MetaData(object): 19 | def __init__(self): 20 | self.reset() 21 | 22 | def reset(self): 23 | self.loss = np.inf 24 | self.score = 0 25 | 26 | def update(self, loss, score): 27 | self.loss = loss 28 | self.score = score -------------------------------------------------------------------------------- /losses/bce_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class BCELoss(nn.Module): 7 | def __init__(self, logits=True, reduce=True): 8 | super(BCELoss, self).__init__() 9 | self.logits = logits 10 | self.reduce = reduce 11 | 12 | def forward(self, inputs, targets): 13 | if self.logits: 14 | bce = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none') 15 | else: 16 | bce = F.binary_cross_entropy(inputs, targets, reduction='none') 17 | 18 | if self.reduce: 19 | return torch.mean(bce) 20 | else: 21 | return bce 22 | 23 | 24 | def bce(*argv, **kwargs): 25 | return BCELoss(*argv, **kwargs) -------------------------------------------------------------------------------- /metrics/classification.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def accuracy(output, target): 5 | """Computes the accuracy over the k top predictions for the specified values of k""" 6 | with torch.no_grad(): 7 | res = torch.sum(output == target) 8 | return res.float() / target.size(0) 9 | 10 | 11 | def min_c(output, target): 12 | # FP/(FP+TN) + 19⋅FN/(FN+TP) 13 | with torch.no_grad(): 14 | eps=1e-9 15 | 16 | TP = (output & target).sum().float() 17 | TN = (~output & ~target).sum().float() 18 | FP = (output & ~target).sum().float() 19 | FN = (~output & target).sum().float() 20 | 21 | res = FP / (FP + TN + eps) + 19 * FN / (FN + TP + eps) 22 | return res / target.size(0) -------------------------------------------------------------------------------- /utils/storage.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | def load_weights(model, prefix, model_type, epoch): 5 | file = os.path.join('snapshots', 6 | '{}_{}_epoch_{}.pth'.format(prefix, 7 | model_type, 8 | epoch)) 9 | checkpoint = torch.load(file) 10 | model.load_state_dict(checkpoint['state_dict']) 11 | 12 | 13 | def save_weights(model, prefix, model_type, epoch, parallel=True): 14 | file = os.path.join('snapshots', 15 | '{}_{}_epoch_{}.pth'.format(prefix, 16 | model_type, 17 | epoch)) 18 | if torch.cuda.is_available() and parallel: 19 | state_dict = model.module.state_dict() 20 | else: 21 | state_dict = model.state_dict() 22 | 23 | torch.save({'state_dict': state_dict}, file) -------------------------------------------------------------------------------- /losses/focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class FocalLoss(nn.Module): 7 | def __init__(self, alpha=1, gamma=2, logits=True, reduce=True): 8 | super(FocalLoss, self).__init__() 9 | self.alpha = alpha 10 | self.gamma = gamma 11 | self.logits = logits 12 | self.reduce = reduce 13 | 14 | def forward(self, inputs, targets): 15 | if self.logits: 16 | bce = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none') 17 | else: 18 | bce = F.binary_cross_entropy(inputs, targets, reduction='none') 19 | 20 | p = torch.exp(-bce) 21 | f_loss = self.alpha * (1 - p)**self.gamma * bce 22 | 23 | if self.reduce: 24 | return torch.mean(f_loss) 25 | else: 26 | return f_loss 27 | 28 | 29 | def focal(*argv, **kwargs): 30 | return FocalLoss(*argv, **kwargs) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ID R&D Anti-spoofing Challenge 1st place Solution ([link](https://datasouls.com/c/idrnd-antispoof/leaderboard)) 2 | 3 | Train model: python train.py --config 'configuration file' 4 | 5 | Predict: python test.py --path-images-csv 'annotation' --path-test-dir 'path to images' --path-submission-csv 'submission' 6 | 7 | Final solution is based on ensemble of two models: se_resnext50 and densnet121. 8 | 9 | The following configs are used to train this models: [se_resnext50_focal.yaml](https://github.com/romavlasov/idrnd-anti-spoofing-challenge/blob/master/config/se_resnext50_focal.yaml) and [densnet121_focal.yaml](https://github.com/romavlasov/idrnd-anti-spoofing-challenge/blob/master/config/densenet121_focal.yaml) 10 | 11 | Pretrained models: [se_resnext50_focal.pth](https://www.dropbox.com/s/o0mpw0ep7ntamzv/se_resnext50_focal_model_epoch_best.pth?dl=0) and [densnet121_focal.pth](https://www.dropbox.com/s/i5utd1nooulyh7z/densenet121_focal_model_epoch_best.pth?dl=0) 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 romavlasov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /models/blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class SELayer(nn.Module): 6 | def __init__(self, inplanes, squeeze_ratio=8, activation=nn.PReLU, size=None): 7 | super(SELayer, self).__init__() 8 | if size is not None: 9 | self.global_avgpool = nn.AvgPool2d(size) 10 | else: 11 | self.global_avgpool = nn.AdaptiveAvgPool2d(1) 12 | self.conv1 = nn.Conv2d(inplanes, int(inplanes / squeeze_ratio), kernel_size=1, stride=1) 13 | self.conv2 = nn.Conv2d(int(inplanes / squeeze_ratio), inplanes, kernel_size=1, stride=1) 14 | self.relu = nn.ReLU(inplace=True) 15 | self.sigmoid = nn.Sigmoid() 16 | 17 | def forward(self, x): 18 | out = self.global_avgpool(x) 19 | out = self.conv1(out) 20 | out = self.relu(out) 21 | out = self.conv2(out) 22 | out = self.sigmoid(out) 23 | return x * out 24 | 25 | 26 | class InvertedResidual(nn.Module): 27 | def __init__(self, in_channels, out_channels, stride, expand_ratio, outp_size=None): 28 | super(InvertedResidual, self).__init__() 29 | self.stride = stride 30 | assert stride in [1, 2] 31 | 32 | self.use_res_connect = self.stride == 1 and in_channels == out_channels 33 | 34 | self.inv_block = nn.Sequential( 35 | nn.Conv2d(in_channels, in_channels * expand_ratio, 1, 1, 0, bias=False), 36 | nn.BatchNorm2d(in_channels * expand_ratio), 37 | nn.ReLU(), 38 | 39 | nn.Conv2d(in_channels * expand_ratio, in_channels * expand_ratio, 3, stride, 1, 40 | groups=in_channels * expand_ratio, bias=False), 41 | nn.BatchNorm2d(in_channels * expand_ratio), 42 | nn.ReLU(), 43 | 44 | nn.Conv2d(in_channels * expand_ratio, out_channels, 1, 1, 0, bias=False), 45 | nn.BatchNorm2d(out_channels), 46 | SELayer(out_channels, 8, nn.ReLU, outp_size) 47 | ) 48 | 49 | def forward(self, x): 50 | if self.use_res_connect: 51 | return x + self.inv_block(x) 52 | 53 | return self.inv_block(x) 54 | 55 | 56 | def build_layers(in_channel): 57 | setting = [ 58 | # t, c, n, s 59 | [2, in_channel, 2, 2], 60 | [2, in_channel, 2, 2], 61 | ] 62 | layers = [] 63 | for t, c, n, s in setting: 64 | out_channel = c 65 | for i in range(n): 66 | if i == 0: 67 | layers.append(InvertedResidual(in_channel, out_channel, s, t)) 68 | else: 69 | layers.append(InvertedResidual(in_channel, out_channel, 1, t)) 70 | in_channel = out_channel 71 | 72 | return nn.Sequential(*layers) -------------------------------------------------------------------------------- /tmp/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import yaml 4 | import pandas as pd 5 | import torch 6 | 7 | from sklearn.model_selection import train_test_split 8 | from torch.utils.data import DataLoader 9 | 10 | from models import encoders 11 | 12 | from data.datasets import idrnd 13 | from data.transform import Transforms 14 | 15 | from utils.storage import load_weights 16 | 17 | 18 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 19 | 20 | 21 | if __name__ == '__main__': 22 | 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument('--config', type=str, required=True) 25 | parser.add_argument('--path-images-csv', type=str, required=True) 26 | parser.add_argument('--path-test-dir', type=str, required=True) 27 | parser.add_argument('--path-submission-csv', type=str, required=True) 28 | args = parser.parse_args() 29 | 30 | # prepare image paths 31 | config = yaml.load(open(args.config), Loader=yaml.FullLoader) 32 | test_dataset_paths = pd.read_csv(args.path_images_csv) 33 | path_test_dir = args.path_test_dir 34 | 35 | paths = [ 36 | { 37 | 'id': row.id, 38 | 'frame': row.frame, 39 | 'path': os.path.join(path_test_dir, row.path) 40 | } for _, row in test_dataset_paths.iterrows() if int(row.frame) in config['frames']] 41 | test_df = pd.DataFrame(paths) 42 | 43 | test_loader = DataLoader(idrnd.TestAntispoofDataset( 44 | test_df, Transforms(input_size=config['input_size'], train=False), config['tta']), 45 | batch_size=config['batch_size'], 46 | num_workers=config['num_workers'], 47 | shuffle=False) 48 | 49 | model = getattr(encoders, config['encoder'])(device=device, 50 | out_features=1, 51 | pretrained=False) 52 | load_weights(model, config['prefix'], 'model', 'best') 53 | model.eval() 54 | 55 | samples, frames, probabilities = [], [], [] 56 | 57 | with torch.no_grad(): 58 | for batch, video, frame in test_loader: 59 | batch = batch.to(device) 60 | probability = torch.sigmoid(model(batch).view(-1)) 61 | 62 | samples.extend(video) 63 | frames.extend(frame.numpy()) 64 | probabilities.extend(probability.cpu().numpy()) 65 | 66 | # save 67 | predictions = pd.DataFrame.from_dict({ 68 | 'id': samples, 69 | 'frame': frames, 70 | 'probability': probabilities}) 71 | 72 | predictions = predictions.groupby('id').probability.mean().reset_index() 73 | predictions['prediction'] = predictions.probability 74 | predictions[['id', 'prediction']].to_csv( 75 | args.path_submission_csv, index=False) -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import yaml 4 | import pandas as pd 5 | import torch 6 | 7 | from sklearn.model_selection import train_test_split 8 | from torch.utils.data import DataLoader 9 | 10 | from models import encoders 11 | 12 | from data.datasets import idrnd 13 | from data.transform import Transforms 14 | 15 | from utils.storage import load_weights 16 | 17 | 18 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 19 | 20 | 21 | if __name__ == '__main__': 22 | 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument('--path-images-csv', type=str, required=True) 25 | parser.add_argument('--path-test-dir', type=str, required=True) 26 | parser.add_argument('--path-submission-csv', type=str, required=True) 27 | args = parser.parse_args() 28 | 29 | configs = ['config/densenet121_bce.yaml', 'config/se_resnext50_bce.yaml'] 30 | 31 | test_dataset_paths = pd.read_csv(args.path_images_csv) 32 | path_test_dir = args.path_test_dir 33 | 34 | samples, frames, probabilities = [], [], [] 35 | 36 | for c in configs: 37 | config = yaml.load(open(c), Loader=yaml.FullLoader) 38 | paths = [ 39 | { 40 | 'id': row.id, 41 | 'frame': row.frame, 42 | 'path': os.path.join(path_test_dir, row.path) 43 | } for _, row in test_dataset_paths.iterrows() if int(row.frame) in config['frames']] 44 | test_df = pd.DataFrame(paths) 45 | 46 | test_loader = DataLoader(idrnd.TestAntispoofDataset( 47 | test_df, Transforms(input_size=config['input_size'], train=False), config['tta']), 48 | batch_size=config['batch_size'], 49 | num_workers=config['num_workers'], 50 | shuffle=False) 51 | 52 | model = getattr(encoders, config['encoder'])(device=device, 53 | out_features=1, 54 | pretrained=False) 55 | load_weights(model, config['prefix'], 'model', 'best') 56 | model.eval() 57 | 58 | with torch.no_grad(): 59 | for batch, video, frame in test_loader: 60 | batch = batch.to(device) 61 | probability = torch.sigmoid(model(batch).view(-1)) 62 | 63 | samples.extend(video) 64 | frames.extend(frame.numpy()) 65 | probabilities.extend(probability.cpu().numpy()) 66 | 67 | # save 68 | predictions = pd.DataFrame.from_dict({ 69 | 'id': samples, 70 | 'frame': frames, 71 | 'probability': probabilities}) 72 | 73 | predictions = predictions.groupby('id').probability.mean().reset_index() 74 | 75 | predictions['prediction'] = predictions.probability 76 | predictions[['id', 'prediction']].to_csv( 77 | args.path_submission_csv, index=False) -------------------------------------------------------------------------------- /optimizers/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class CosineWithRestarts(torch.optim.lr_scheduler._LRScheduler): # pylint: disable=protected-access 7 | """ 8 | Cosine annealing with restarts. 9 | This is decribed in the paper https://arxiv.org/abs/1608.03983. 10 | Parameters 11 | ---------- 12 | optimizer : ``torch.optim.Optimizer`` 13 | t_max : ``int`` 14 | The maximum number of iterations within the first cycle. 15 | eta_min : ``float``, optional (default=0) 16 | The minimum learning rate. 17 | last_epoch : ``int``, optional (default=-1) 18 | The index of the last epoch. This is used when restarting. 19 | factor : ``float``, optional (default=1) 20 | The factor by which the cycle length (``T_max``) increases after each restart. 21 | """ 22 | 23 | def __init__(self, 24 | optimizer, 25 | t_max, 26 | eta_min=0., 27 | last_epoch=-1, 28 | factor=1.): 29 | assert t_max > 0 30 | assert eta_min >= 0 31 | if t_max == 1 and factor == 1: 32 | print("Cosine annealing scheduler will have no effect on the learning " 33 | "rate since T_max = 1 and factor = 1.") 34 | self.t_max = t_max 35 | self.eta_min = eta_min 36 | self.factor = factor 37 | self._last_restart = 0 38 | self._cycle_counter = 0 39 | self._cycle_factor = 1. 40 | self._updated_cycle_len = t_max 41 | self._initialized = False 42 | super(CosineWithRestarts, self).__init__(optimizer, last_epoch) 43 | 44 | def get_lr(self): 45 | """Get updated learning rate.""" 46 | # HACK: We need to check if this is the first time ``self.get_lr()`` was called, 47 | # since ``torch.optim.lr_scheduler._LRScheduler`` will call ``self.get_lr()`` 48 | # when first initialized, but the learning rate should remain unchanged 49 | # for the first epoch. 50 | if not self._initialized: 51 | self._initialized = True 52 | return self.base_lrs 53 | 54 | step = self.last_epoch + 1 55 | self._cycle_counter = step - self._last_restart 56 | 57 | lrs = [ 58 | self.eta_min + ((lr - self.eta_min) / 2) * ( 59 | np.cos( 60 | np.pi * 61 | (self._cycle_counter % self._updated_cycle_len) / 62 | self._updated_cycle_len 63 | ) + 1 64 | ) 65 | for lr in self.base_lrs 66 | ] 67 | 68 | if self._cycle_counter % self._updated_cycle_len == 0: 69 | # Adjust the cycle length. 70 | self._cycle_factor *= self.factor 71 | self._cycle_counter = 0 72 | self._updated_cycle_len = int(self._cycle_factor * self.t_max) 73 | self._last_restart = step 74 | 75 | return lrs 76 | 77 | 78 | if __name__ == '__main__': 79 | lin = nn.Linear(128, 256) 80 | 81 | optim = torch.optim.SGD(lin.parameters(), lr=0.002) 82 | scheduler = CosineWithRestarts(optim, t_max=5, eta_min=0.0001, factor=np.sqrt(1.6)) 83 | 84 | for _ in range(100): 85 | cur_lr = scheduler.get_lr() 86 | print(cur_lr[0]) 87 | scheduler.step() 88 | -------------------------------------------------------------------------------- /data/datasets/idrnd.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import datetime 4 | 5 | import numpy as np 6 | import pandas as pd 7 | 8 | import cv2 9 | import torch 10 | 11 | import torch.utils.data as data 12 | 13 | from sklearn.model_selection import train_test_split 14 | 15 | 16 | class TrainAntispoofDataset(data.Dataset): 17 | def __init__(self, df, transform=None, tta=1): 18 | self.df = df 19 | self.tta = tta 20 | self.transform = transform 21 | 22 | def __getitem__(self, index): 23 | item = self.df.iloc[index % len(self.df)] 24 | 25 | image = self._load_image(item['path']) 26 | if self.transform is not None: 27 | image = self.transform(image) 28 | 29 | return image, torch.tensor(item['label']).float() 30 | 31 | def __len__(self): 32 | return len(self.df) * self.tta 33 | 34 | def _load_image(self, path): 35 | image = cv2.imread(path, cv2.IMREAD_COLOR) 36 | return image / 255. 37 | 38 | 39 | class TestAntispoofDataset(data.Dataset): 40 | def __init__(self, df, transform=None, tta=4): 41 | self.df = df 42 | self.tta = tta 43 | self.transform = transform 44 | 45 | def __getitem__(self, index): 46 | item = self.df.iloc[index % len(self.df)] 47 | 48 | image = self._load_image(item['path']) 49 | if self.transform is not None: 50 | image = self.transform(image) 51 | 52 | return image, item['id'], item['frame'] 53 | 54 | def __len__(self): 55 | return len(self.df) * self.tta 56 | 57 | def _load_image(self, path): 58 | image = cv2.imread(path, cv2.IMREAD_COLOR) 59 | return image / 255. 60 | 61 | 62 | def load_dataset(path_data, test_size=0.1): 63 | path_images = [] 64 | 65 | for label in ['2dmask', 'real', 'printed', 'replay']: 66 | videos = os.listdir(os.path.join(path_data, label)) 67 | for video in videos: 68 | frames = os.listdir(os.path.join(path_data, label, video)) 69 | for frame in frames: 70 | path_images.append({ 71 | 'path': os.path.join(path_data, label, video, frame), 72 | 'label': int(label != 'real'), 73 | 'video': video}) 74 | 75 | videos = list(set(x['video'] for x in path_images)) 76 | videos_tr, videos_ts = train_test_split(videos, test_size=test_size, random_state=123) 77 | 78 | train_path_images = pd.DataFrame([x for x in path_images if x['video'] in videos_tr]) 79 | test_path_images = pd.DataFrame([x for x in path_images if x['video'] in videos_ts]) 80 | 81 | return train_path_images, test_path_images 82 | 83 | 84 | def load_test_dataset(path_data): 85 | path_images = [] 86 | 87 | for label in ['live', 'spoof']: 88 | videos = os.listdir(os.path.join(path_data, label)) 89 | for video in videos: 90 | frames = os.listdir(os.path.join(path_data, label, video)) 91 | for frame in frames: 92 | if frame.endswith('_120.jpg'): 93 | path_images.append({ 94 | 'path': os.path.join(path_data, label, video, frame), 95 | 'label': int(label != 'live'), 96 | 'video': video, 97 | 'frame': frame}) 98 | 99 | return pd.DataFrame(path_images) -------------------------------------------------------------------------------- /optimizers/sgdw.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.optim.optimizer import Optimizer, required 3 | 4 | 5 | class SGDW(Optimizer): 6 | r"""Implements stochastic gradient descent (optionally with momentum). 7 | 8 | Nesterov momentum is based on the formula from 9 | `On the importance of initialization and momentum in deep learning`__. 10 | 11 | Args: 12 | params (iterable): iterable of parameters to optimize or dicts defining 13 | parameter groups 14 | lr (float): learning rate 15 | momentum (float, optional): momentum factor (default: 0) 16 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 17 | dampening (float, optional): dampening for momentum (default: 0) 18 | nesterov (bool, optional): enables Nesterov momentum (default: False) 19 | 20 | Example: 21 | >>> optimizer = torch.optim.SGDW(model.parameters(), lr=0.1, momentum=0.9) 22 | >>> optimizer.zero_grad() 23 | >>> loss_fn(model(input), target).backward() 24 | >>> optimizer.step() 25 | 26 | __ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf 27 | 28 | .. note:: 29 | The implementation of SGD with Momentum/Nesterov subtly differs from 30 | Sutskever et. al. and implementations in some other frameworks. 31 | 32 | Considering the specific case of Momentum, the update can be written as 33 | 34 | .. math:: 35 | v = \rho * v + g \\ 36 | p = p - lr * v 37 | 38 | where p, g, v and :math:`\rho` denote the parameters, gradient, 39 | velocity, and momentum respectively. 40 | 41 | This is in contrast to Sutskever et. al. and 42 | other frameworks which employ an update of the form 43 | 44 | .. math:: 45 | v = \rho * v + lr * g \\ 46 | p = p - v 47 | 48 | The Nesterov version is analogously modified. 49 | """ 50 | 51 | def __init__(self, params, lr=required, momentum=0.9, dampening=0, 52 | weight_decay=0, nesterov=False): 53 | if lr is not required and lr < 0.0: 54 | raise ValueError("Invalid learning rate: {}".format(lr)) 55 | if momentum < 0.0: 56 | raise ValueError("Invalid momentum value: {}".format(momentum)) 57 | if weight_decay < 0.0: 58 | raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) 59 | 60 | defaults = dict(lr=lr, momentum=momentum, dampening=dampening, 61 | weight_decay=weight_decay, nesterov=nesterov) 62 | if nesterov and (momentum <= 0 or dampening != 0): 63 | raise ValueError("Nesterov momentum requires a momentum and zero dampening") 64 | super(SGDW, self).__init__(params, defaults) 65 | 66 | def __setstate__(self, state): 67 | super(SGDW, self).__setstate__(state) 68 | for group in self.param_groups: 69 | group.setdefault('nesterov', False) 70 | 71 | def step(self, closure=None): 72 | """Performs a single optimization step. 73 | 74 | Arguments: 75 | closure (callable, optional): A closure that reevaluates the model 76 | and returns the loss. 77 | """ 78 | loss = None 79 | if closure is not None: 80 | loss = closure() 81 | 82 | for group in self.param_groups: 83 | weight_decay = group['weight_decay'] 84 | momentum = group['momentum'] 85 | dampening = group['dampening'] 86 | nesterov = group['nesterov'] 87 | 88 | for p in group['params']: 89 | if p.grad is None: 90 | continue 91 | d_p = p.grad.data 92 | 93 | if momentum != 0: 94 | param_state = self.state[p] 95 | if 'momentum_buffer' not in param_state: 96 | buf = param_state['momentum_buffer'] = torch.zeros_like(p.data) 97 | buf.mul_(momentum).add_(d_p) 98 | else: 99 | buf = param_state['momentum_buffer'] 100 | buf.mul_(momentum).add_(1 - dampening, d_p) 101 | if nesterov: 102 | d_p = d_p.add(momentum, buf) 103 | else: 104 | d_p = buf 105 | 106 | p.data.add_(-group['lr'] * weight_decay, p.data) 107 | p.data.add_(-group['lr'], d_p) 108 | 109 | return loss 110 | -------------------------------------------------------------------------------- /optimizers/adamw.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch.optim.optimizer import Optimizer 4 | 5 | # https://github.com/egg-west/AdamW-pytorch 6 | # https://github.com/anandsaha/fastai.part1.v2/commit/159e1712e60f299e11c42caab35c726f367bcd61 7 | # https://forums.fast.ai/t/challenge-for-advanced-students-implement-adamw-and-sgdw/8004/8 8 | 9 | 10 | class AdamW(Optimizer): 11 | """Implements Adam algorithm. 12 | It has been proposed in `Adam: A Method for Stochastic Optimization`_. 13 | Arguments: 14 | params (iterable): iterable of parameters to optimize or dicts defining 15 | parameter groups 16 | lr (float, optional): learning rate (default: 1e-3) 17 | betas (Tuple[float, float], optional): coefficients used for computing 18 | running averages of gradient and its square (default: (0.9, 0.999)) 19 | eps (float, optional): term added to the denominator to improve 20 | numerical stability (default: 1e-8) 21 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 22 | amsgrad (boolean, optional): whether to use the AMSGrad variant of this 23 | algorithm from the paper `On the Convergence of Adam and Beyond`_ 24 | .. _Adam\: A Method for Stochastic Optimization: 25 | https://arxiv.org/abs/1412.6980 26 | .. _On the Convergence of Adam and Beyond: 27 | https://openreview.net/forum?id=ryQu7f-RZ 28 | """ 29 | 30 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, 31 | weight_decay=0, amsgrad=False): 32 | if not 0.0 <= lr: 33 | raise ValueError("Invalid learning rate: {}".format(lr)) 34 | if not 0.0 <= eps: 35 | raise ValueError("Invalid epsilon value: {}".format(eps)) 36 | if not 0.0 <= betas[0] < 1.0: 37 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 38 | if not 0.0 <= betas[1] < 1.0: 39 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 40 | defaults = dict(lr=lr, betas=betas, eps=eps, 41 | weight_decay=weight_decay, amsgrad=amsgrad) 42 | super(AdamW, self).__init__(params, defaults) 43 | 44 | def __setstate__(self, state): 45 | super(AdamW, self).__setstate__(state) 46 | for group in self.param_groups: 47 | group.setdefault('amsgrad', False) 48 | 49 | def step(self, closure=None): 50 | """Performs a single optimization step. 51 | Arguments: 52 | closure (callable, optional): A closure that reevaluates the model 53 | and returns the loss. 54 | """ 55 | loss = None 56 | if closure is not None: 57 | loss = closure() 58 | 59 | for group in self.param_groups: 60 | for p in group['params']: 61 | if p.grad is None: 62 | continue 63 | grad = p.grad.data 64 | if grad.is_sparse: 65 | raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead') 66 | amsgrad = group['amsgrad'] 67 | 68 | state = self.state[p] 69 | 70 | # State initialization 71 | if len(state) == 0: 72 | state['step'] = 0 73 | # Exponential moving average of gradient values 74 | state['exp_avg'] = torch.zeros_like(p.data) 75 | # Exponential moving average of squared gradient values 76 | state['exp_avg_sq'] = torch.zeros_like(p.data) 77 | if amsgrad: 78 | # Maintains max of all exp. moving avg. of sq. grad. values 79 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 80 | 81 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 82 | if amsgrad: 83 | max_exp_avg_sq = state['max_exp_avg_sq'] 84 | beta1, beta2 = group['betas'] 85 | 86 | state['step'] += 1 87 | 88 | # if group['weight_decay'] != 0: 89 | # grad = grad.add(group['weight_decay'], p.data) 90 | 91 | # Decay the first and second moment running average coefficient 92 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 93 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 94 | if amsgrad: 95 | # Maintains the maximum of all 2nd moment running avg. till now 96 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 97 | # Use the max. for normalizing running avg. of gradient 98 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 99 | else: 100 | denom = exp_avg_sq.sqrt().add_(group['eps']) 101 | 102 | bias_correction1 = 1 - beta1 ** state['step'] 103 | bias_correction2 = 1 - beta2 ** state['step'] 104 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 105 | 106 | p.data.add_(-group['lr'] * group['weight_decay'], p.data) 107 | p.data.addcdiv_(-step_size, exp_avg, denom) 108 | 109 | return loss 110 | -------------------------------------------------------------------------------- /models/backbones/densenet.py: -------------------------------------------------------------------------------- 1 | import re 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from collections import OrderedDict 6 | 7 | 8 | class _DenseLayer(nn.Sequential): 9 | def __init__(self, num_input_features, growth_rate, bn_size, drop_rate): 10 | super(_DenseLayer, self).__init__() 11 | self.add_module('norm1', nn.BatchNorm2d(num_input_features)), 12 | self.add_module('relu1', nn.ReLU(inplace=True)), 13 | self.add_module('conv1', nn.Conv2d(num_input_features, bn_size * 14 | growth_rate, kernel_size=1, stride=1, 15 | bias=False)), 16 | self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)), 17 | self.add_module('relu2', nn.ReLU(inplace=True)), 18 | self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate, 19 | kernel_size=3, stride=1, padding=1, 20 | bias=False)), 21 | self.drop_rate = drop_rate 22 | 23 | def forward(self, x): 24 | new_features = super(_DenseLayer, self).forward(x) 25 | if self.drop_rate > 0: 26 | new_features = F.dropout(new_features, p=self.drop_rate, 27 | training=self.training) 28 | return torch.cat([x, new_features], 1) 29 | 30 | 31 | class _DenseBlock(nn.Sequential): 32 | def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate): 33 | super(_DenseBlock, self).__init__() 34 | for i in range(num_layers): 35 | layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, 36 | bn_size, drop_rate) 37 | self.add_module('denselayer%d' % (i + 1), layer) 38 | 39 | 40 | class _Transition(nn.Sequential): 41 | def __init__(self, num_input_features, num_output_features): 42 | super(_Transition, self).__init__() 43 | self.add_module('norm', nn.BatchNorm2d(num_input_features)) 44 | self.add_module('relu', nn.ReLU(inplace=True)) 45 | self.add_module('conv', nn.Conv2d(num_input_features, num_output_features, 46 | kernel_size=1, stride=1, bias=False)) 47 | self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) 48 | 49 | 50 | class DenseNet(nn.Module): 51 | r"""Densenet-BC model class, based on 52 | `"Densely Connected Convolutional Networks" `_ 53 | Args: 54 | growth_rate (int) - how many filters to add each layer (`k` in paper) 55 | block_config (list of 4 ints) - how many layers in each pooling block 56 | num_init_features (int) - the number of filters to learn in the first convolution layer 57 | bn_size (int) - multiplicative factor for number of bottle neck layers 58 | (i.e. bn_size * k features in the bottleneck layer) 59 | drop_rate (float) - dropout rate after each dense layer 60 | num_classes (int) - number of classification classes 61 | """ 62 | 63 | def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), 64 | num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000): 65 | 66 | super(DenseNet, self).__init__() 67 | 68 | # First convolution 69 | self.features = nn.Sequential(OrderedDict([ 70 | ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, 71 | padding=3, bias=False)), 72 | ('norm0', nn.BatchNorm2d(num_init_features)), 73 | ('relu0', nn.ReLU(inplace=True)), 74 | ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), 75 | ])) 76 | 77 | # Each denseblock 78 | num_features = num_init_features 79 | for i, num_layers in enumerate(block_config): 80 | block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, 81 | bn_size=bn_size, growth_rate=growth_rate, 82 | drop_rate=drop_rate) 83 | self.features.add_module('denseblock%d' % (i + 1), block) 84 | num_features = num_features + num_layers * growth_rate 85 | if i != len(block_config) - 1: 86 | trans = _Transition(num_input_features=num_features, 87 | num_output_features=num_features // 2) 88 | self.features.add_module('transition%d' % (i + 1), trans) 89 | num_features = num_features // 2 90 | 91 | # Final batch norm 92 | self.features.add_module('norm5', nn.BatchNorm2d(num_features)) 93 | 94 | # Linear layer 95 | self.classifier = nn.Linear(num_features, num_classes) 96 | 97 | # Official init from torch repo. 98 | for m in self.modules(): 99 | if isinstance(m, nn.Conv2d): 100 | nn.init.kaiming_normal_(m.weight) 101 | elif isinstance(m, nn.BatchNorm2d): 102 | nn.init.constant_(m.weight, 1) 103 | nn.init.constant_(m.bias, 0) 104 | elif isinstance(m, nn.Linear): 105 | nn.init.constant_(m.bias, 0) 106 | 107 | def linear_params(self): 108 | return self.classifier.parameters() 109 | 110 | def forward(self, x): 111 | features = self.features(x) 112 | out = F.relu(features, inplace=True) 113 | out = F.adaptive_avg_pool2d(out, (1, 1)).view(features.size(0), -1) 114 | out = self.classifier(out) 115 | return out 116 | -------------------------------------------------------------------------------- /models/backbones/mobilenet.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch.nn as nn 3 | 4 | 5 | class SELayer(nn.Module): 6 | def __init__(self, inplanes, squeeze_ratio=8, activation=nn.PReLU, size=None): 7 | super(SELayer, self).__init__() 8 | if size is not None: 9 | self.global_avgpool = nn.AvgPool2d(size) 10 | else: 11 | self.global_avgpool = nn.AdaptiveAvgPool2d(1) 12 | self.conv1 = nn.Conv2d(inplanes, int(inplanes / squeeze_ratio), kernel_size=1, stride=1) 13 | self.conv2 = nn.Conv2d(int(inplanes / squeeze_ratio), inplanes, kernel_size=1, stride=1) 14 | self.relu = nn.ReLU(inplace=True) 15 | self.sigmoid = nn.Sigmoid() 16 | 17 | def forward(self, x): 18 | out = self.global_avgpool(x) 19 | out = self.conv1(out) 20 | out = self.relu(out) 21 | out = self.conv2(out) 22 | out = self.sigmoid(out) 23 | return x * out 24 | 25 | 26 | class InvertedResidual(nn.Module): 27 | def __init__(self, in_channels, out_channels, stride, expand_ratio, outp_size=None): 28 | super(InvertedResidual, self).__init__() 29 | self.stride = stride 30 | assert stride in [1, 2] 31 | 32 | self.use_res_connect = self.stride == 1 and in_channels == out_channels 33 | 34 | self.inv_block = nn.Sequential( 35 | nn.Conv2d(in_channels, in_channels * expand_ratio, 1, 1, 0, bias=False), 36 | nn.BatchNorm2d(in_channels * expand_ratio), 37 | nn.PReLU(), 38 | 39 | nn.Conv2d(in_channels * expand_ratio, in_channels * expand_ratio, 3, stride, 1, 40 | groups=in_channels * expand_ratio, bias=False), 41 | nn.BatchNorm2d(in_channels * expand_ratio), 42 | nn.PReLU(), 43 | 44 | nn.Conv2d(in_channels * expand_ratio, out_channels, 1, 1, 0, bias=False), 45 | nn.BatchNorm2d(out_channels), 46 | SELayer(out_channels, 8, nn.PReLU, outp_size) 47 | ) 48 | 49 | def forward(self, x): 50 | if self.use_res_connect: 51 | return x + self.inv_block(x) 52 | 53 | return self.inv_block(x) 54 | 55 | 56 | def init_block(in_channels, out_channels, stride, activation=nn.PReLU): 57 | return nn.Sequential( 58 | nn.BatchNorm2d(3), 59 | nn.Conv2d(in_channels, out_channels, 3, stride, 1, bias=False), 60 | nn.BatchNorm2d(out_channels), 61 | nn.ReLU(inplace=True) 62 | ) 63 | 64 | 65 | class MobileNet(nn.Module): 66 | def __init__(self, out_features=256, input_size=112, width_multiplier=1., feature=True): 67 | super(MobileNet, self).__init__() 68 | self.feature = feature 69 | 70 | # Set up of inverted residual blocks 71 | inverted_residual_setting = [ 72 | # t, c, n, s 73 | [2, 64, 5, 2], 74 | [4, 128, 1, 2], 75 | [2, 128, 6, 1], 76 | [4, 128, 1, 2], 77 | [2, 128, 2, 1] 78 | ] 79 | 80 | first_channel_num = 64 81 | last_channel_num = 512 82 | self.features = [init_block(3, first_channel_num, 2)] 83 | 84 | self.features.append(nn.Conv2d(first_channel_num, first_channel_num, 3, 1, 1, 85 | groups=first_channel_num, bias=False)) 86 | self.features.append(nn.BatchNorm2d(64)) 87 | self.features.append(nn.PReLU()) 88 | 89 | # Inverted Residual Blocks 90 | in_channel_num = first_channel_num 91 | size_h, size_w = input_size, input_size 92 | size_h, size_w = size_h // 2, size_w // 2 93 | for t, c, n, s in inverted_residual_setting: 94 | output_channel = int(c * width_multiplier) 95 | for i in range(n): 96 | if i == 0: 97 | size_h, size_w = size_h // s, size_w // s 98 | self.features.append(InvertedResidual(in_channel_num, output_channel, 99 | s, t, outp_size=(size_h, size_w))) 100 | else: 101 | self.features.append(InvertedResidual(in_channel_num, output_channel, 102 | 1, t, outp_size=(size_h, size_w))) 103 | in_channel_num = output_channel 104 | 105 | # 1x1 expand block 106 | self.features.append(nn.Sequential(nn.Conv2d(in_channel_num, last_channel_num, 1, 1, 0, bias=False), 107 | nn.BatchNorm2d(last_channel_num), 108 | nn.PReLU())) 109 | self.features = nn.Sequential(*self.features) 110 | 111 | # Depth-wise pooling 112 | k_size = (input_size // 16, input_size // 16) 113 | self.dw_pool = nn.Conv2d(last_channel_num, last_channel_num, k_size, 114 | groups=last_channel_num, bias=False) 115 | self.dw_bn = nn.BatchNorm2d(last_channel_num) 116 | self.conv1_extra = nn.Conv2d(last_channel_num, out_features, 1, stride=1, padding=0, bias=False) 117 | 118 | self.init_weights() 119 | 120 | def forward(self, x): 121 | x = self.features(x) 122 | x = self.dw_bn(self.dw_pool(x)) 123 | x = self.conv1_extra(x) 124 | x = x.view(x.size(0), -1) 125 | return x 126 | 127 | def init_weights(self): 128 | for m in self.modules(): 129 | if isinstance(m, nn.Conv2d): 130 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 131 | m.weight.data.normal_(0, math.sqrt(2. / n)) 132 | if m.bias is not None: 133 | m.bias.data.zero_() 134 | elif isinstance(m, nn.BatchNorm2d): 135 | m.weight.data.fill_(1) 136 | m.bias.data.zero_() 137 | elif isinstance(m, nn.Linear): 138 | n = m.weight.size(1) 139 | m.weight.data.normal_(0, 0.01) 140 | m.bias.data.zero_() 141 | -------------------------------------------------------------------------------- /models/encoders.py: -------------------------------------------------------------------------------- 1 | import re 2 | import torch 3 | import torch.nn as nn 4 | 5 | from torch.utils import model_zoo 6 | 7 | from models.backbones.mobilenet import MobileNet 8 | 9 | from models.backbones.resnet import ResNet 10 | from models.backbones.resnet import BasicBlock 11 | from models.backbones.resnet import Bottleneck 12 | 13 | from models.backbones.senet import SENet 14 | from models.backbones.senet import SEBottleneck 15 | from models.backbones.senet import SEResNetBottleneck 16 | from models.backbones.senet import SEResNeXtBottleneck 17 | 18 | from models.backbones.densenet import DenseNet 19 | 20 | from models.blocks import build_layers 21 | 22 | 23 | def mobilenet(device='cpu', *argv, **kwargs): 24 | model = MobileNet(*argv, **kwargs) 25 | return model.to(device) 26 | 27 | 28 | def resnet18(device='cpu', out_features=1, pretrained=False, *argv, **kwargs): 29 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 30 | if pretrained: 31 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnet18-5c106cde.pth')) 32 | 33 | model.fc = nn.Linear(model.fc.in_features, out_features) 34 | return model.to(device) 35 | 36 | 37 | def resnet34(device='cpu', out_features=1, pretrained=False, *argv, **kwargs): 38 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 39 | if pretrained: 40 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnet34-333f7ec4.pth')) 41 | 42 | model.fc = nn.Linear(model.fc.in_features, out_features) 43 | return model.to(device) 44 | 45 | 46 | def resnet50(device='cpu', out_features=1, pretrained=False, *argv, **kwargs): 47 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 48 | if pretrained: 49 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnet50-19c8e357.pth')) 50 | 51 | model.fc = nn.Linear(model.fc.in_features, out_features) 52 | return model.to(device) 53 | 54 | 55 | def resnet101(device='cpu', out_features=1, pretrained=False, *argv, **kwargs): 56 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 57 | if pretrained: 58 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnet101-5d3b4d8f.pth')) 59 | 60 | model.fc = nn.Linear(model.fc.in_features, out_features) 61 | return model.to(device) 62 | 63 | 64 | def resnet152(device='cpu', out_features=1, pretrained=False, *argv, **kwargs): 65 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 66 | if pretrained: 67 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnet152-b121ed2d.pth')) 68 | 69 | model.fc = nn.Linear(model.fc.in_features, out_features) 70 | return model.to(device) 71 | 72 | 73 | def resnext50(device='cpu', out_features=1, pretrained=False, *argv, **kwargs): 74 | kwargs['groups'] = 32 75 | kwargs['width_per_group'] = 4 76 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 77 | if pretrained: 78 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth')) 79 | 80 | model.last_linear = nn.Linear(model.last_linear.in_features, out_features) 81 | return model.to(device) 82 | 83 | 84 | def resnext101(device='cpu', out_features=1, pretrained=False, *argv, **kwargs): 85 | kwargs['groups'] = 32 86 | kwargs['width_per_group'] = 8 87 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 88 | if pretrained: 89 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth')) 90 | 91 | model.last_linear = nn.Linear(model.last_linear.in_features, out_features) 92 | return model.to(device) 93 | 94 | 95 | def senet154(device='cpu', *argv, **kwargs): 96 | model = SENet(SEBottleneck, [3, 8, 36, 3], groups=64, reduction=16, 97 | **kwargs) 98 | return model.to(device) 99 | 100 | 101 | def se_resnet50(device='cpu', out_features=1, pretrained=False, *argv, **kwargs): 102 | model = SENet(SEResNetBottleneck, [3, 4, 6, 3], groups=1, reduction=16, 103 | inplanes=64, input_3x3=False, 104 | downsample_kernel_size=1, downsample_padding=0, 105 | **kwargs) 106 | if pretrained: 107 | model.load_state_dict(model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth')) 108 | 109 | model.last_linear = nn.Linear(model.last_linear.in_features, out_features) 110 | return model.to(device) 111 | 112 | 113 | def se_resnet101(device='cpu', out_features=1, pretrained=False, *argv, **kwargs): 114 | model = SENet(SEResNetBottleneck, [3, 4, 23, 3], groups=1, reduction=16, 115 | inplanes=64, input_3x3=False, 116 | downsample_kernel_size=1, downsample_padding=0, 117 | **kwargs) 118 | return model.to(device) 119 | 120 | 121 | def se_resnet152(device='cpu', *argv, **kwargs): 122 | model = SENet(SEResNetBottleneck, [3, 8, 36, 3], groups=1, reduction=16, 123 | inplanes=64, input_3x3=False, 124 | downsample_kernel_size=1, downsample_padding=0, 125 | **kwargs) 126 | return model.to(device) 127 | 128 | 129 | def se_resnext50(device='cpu', out_features=1, pretrained=False, *argv, **kwargs): 130 | model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16, 131 | inplanes=64, input_3x3=False, 132 | downsample_kernel_size=1, downsample_padding=0, 133 | **kwargs) 134 | if pretrained: 135 | model.load_state_dict(model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth')) 136 | 137 | model.last_linear = nn.Linear(model.last_linear.in_features, out_features) 138 | return model.to(device) 139 | 140 | 141 | def se_resnext101(device='cpu', out_features=1, pretrained=False, *argv, **kwargs): 142 | model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3], groups=32, reduction=16, 143 | inplanes=64, input_3x3=False, 144 | downsample_kernel_size=1, downsample_padding=0, 145 | **kwargs) 146 | if pretrained: 147 | model.load_state_dict(model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth')) 148 | 149 | model.last_linear = nn.Linear(model.last_linear.in_features, out_features) 150 | return model.to(device) 151 | 152 | 153 | def densenet121(device='cpu', out_features=1, pretrained=False, *argv, **kwargs): 154 | model = DenseNet(32, (6, 12, 24, 16), 64, **kwargs) 155 | if pretrained: 156 | _load_densenet(model, 'https://download.pytorch.org/models/densenet121-a639ec97.pth') 157 | 158 | #model.features.add_module('final', build_layers(1024)) 159 | model.classifier = nn.Linear(model.classifier.in_features, out_features) 160 | return model.to(device) 161 | 162 | 163 | def densenet201(device='cpu', out_features=1, pretrained=False, *argv, **kwargs): 164 | model = DenseNet(32, (6, 12, 48, 32), 64, **kwargs) 165 | if pretrained: 166 | _load_densenet(model, 'https://download.pytorch.org/models/densenet201-c1103571.pth') 167 | 168 | model.classifier = nn.Linear(model.classifier.in_features, out_features) 169 | return model.to(device) 170 | 171 | 172 | def _load_densenet(model, model_url): 173 | pattern = re.compile( 174 | r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') 175 | 176 | state_dict = model_zoo.load_url(model_url) 177 | for key in list(state_dict.keys()): 178 | res = pattern.match(key) 179 | if res: 180 | new_key = res.group(1) + res.group(2) 181 | state_dict[new_key] = state_dict[key] 182 | del state_dict[key] 183 | model.load_state_dict(state_dict) -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import datetime 4 | import argparse 5 | import yaml 6 | 7 | import cv2 8 | import numpy as np 9 | import pandas as pd 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.optim as optim 14 | from torch.utils.data import DataLoader 15 | 16 | from tqdm import tqdm 17 | 18 | import metrics.classification as metrics 19 | 20 | import models 21 | import losses 22 | 23 | from data.datasets import idrnd 24 | from data.transform import Transforms 25 | 26 | from utils.handlers import AverageMeter 27 | from utils.handlers import MetaData 28 | 29 | from utils.storage import save_weights 30 | from utils.storage import load_weights 31 | 32 | 33 | cv2.setNumThreads(0) 34 | 35 | 36 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 37 | 38 | 39 | def main(config): 40 | model = getattr(models, config['encoder'])(device=device, 41 | out_features=config['out_features'], 42 | pretrained=config['pretrained']) 43 | 44 | start_epoch = 0 45 | if config['snapshot']['use']: 46 | load_weights(model, config['prefix'], 'model', config['snapshot']['epoch']) 47 | start_epoch = config['snapshot']['epoch'] 48 | 49 | if torch.cuda.is_available() and config['parallel']: 50 | model = nn.DataParallel(model) 51 | 52 | criterion = getattr(losses, config['loss'])() 53 | optimizer = optim.Adam(model.parameters(), lr=config['learning_rate']) 54 | 55 | lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 56 | factor=0.5, 57 | patience=2, 58 | min_lr=1e-6) 59 | 60 | train_df, test_df = idrnd.load_dataset(config['train']['folder'], test_size=0.05) 61 | 62 | train_loader = DataLoader(idrnd.TrainAntispoofDataset( 63 | train_df, Transforms(input_size=config['input_size'], train=True)), 64 | batch_size=config['batch_size'], 65 | num_workers=config['num_workers'], 66 | shuffle=True) 67 | 68 | test_loader = DataLoader(idrnd.TrainAntispoofDataset( 69 | test_df, Transforms(input_size=config['input_size'], train=False), config['tta']), 70 | batch_size=config['batch_size'], 71 | num_workers=config['num_workers'], 72 | shuffle=False) 73 | 74 | thresholds = np.linspace(0.001, 0.6, num=config['thresholds']) 75 | best_threshold = 0.5 76 | best_epoch = 0 77 | best_score = np.inf 78 | best_loss = np.inf 79 | 80 | for epoch in range(start_epoch, config['num_epochs']): 81 | if epoch == 0: 82 | opt = optim.Adam(model.module.linear_params(), lr=config['learning_rate']) 83 | train(train_loader, model, criterion, opt, epoch, config) 84 | else: 85 | train(train_loader, model, criterion, optimizer, epoch, config) 86 | 87 | loss, accuracy, score = validation(test_loader, model, criterion, thresholds) 88 | 89 | current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') 90 | print(' Validation:' 91 | ' Time: {}' 92 | ' Epoch: {}' 93 | ' Loss: {:.4f}'.format(current_time, epoch + 1, loss)) 94 | 95 | best_index = np.argmin(score) 96 | print(' Threshold: {:.4f}' 97 | ' Accuracy: {:.5f}' 98 | ' Score: {:.5f}'.format(thresholds[best_index], accuracy[best_index], score[best_index])) 99 | 100 | if best_loss > loss: 101 | best_threshold = thresholds[best_index] 102 | best_score = score[best_index] 103 | best_loss = loss 104 | best_epoch = epoch + 1 105 | save_weights(model, config['prefix'], 'model', 'best', config['parallel']) 106 | 107 | if epoch != 0: 108 | lr_scheduler.step(loss) 109 | 110 | save_weights(model, config['prefix'], 'model', epoch + 1, config['parallel']) 111 | 112 | print(' Best threshold: {:.4f}' 113 | ' Best score: {:.5f}' 114 | ' Best loss: {:.4f}' 115 | ' Best epoch: {}'.format(best_threshold, best_score, best_loss, best_epoch)) 116 | 117 | 118 | def train(data_loader, model, criterion, optimizer, epoch, config): 119 | model.train() 120 | 121 | loss_handler = AverageMeter() 122 | accuracy_handler = AverageMeter() 123 | score_handler = AverageMeter() 124 | 125 | tq = tqdm(total=len(data_loader) * config['batch_size']) 126 | tq.set_description('Epoch {}, lr {:.2e}'.format(epoch + 1, 127 | get_learning_rate(optimizer))) 128 | 129 | for i, (image, target) in enumerate(data_loader): 130 | image = image.to(device) 131 | target = target.to(device) 132 | 133 | output = model(image).view(-1) 134 | 135 | loss = criterion(output, target) 136 | loss.backward() 137 | 138 | batch_size = image.size(0) 139 | 140 | if (i + 1) % config['step'] == 0: 141 | optimizer.step() 142 | optimizer.zero_grad() 143 | 144 | pred = torch.sigmoid(output) > 0.5 145 | target = target > 0.5 146 | 147 | accuracy = metrics.accuracy(pred, target) 148 | score = metrics.min_c(pred, target) 149 | 150 | loss_handler.update(loss) 151 | accuracy_handler.update(accuracy) 152 | score_handler.update(score) 153 | 154 | current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') 155 | current_lr = get_learning_rate(optimizer) 156 | 157 | tq.update(batch_size) 158 | tq.set_postfix(loss='{:.4f}'.format(loss_handler.avg), 159 | accuracy='{:.5f}'.format(accuracy_handler.avg), 160 | score='{:.5f}'.format(score_handler.avg)) 161 | tq.close() 162 | 163 | 164 | def validation(data_loader, model, criterion, thresholds): 165 | model.eval() 166 | 167 | loss_handler = AverageMeter() 168 | accuracy_handler = [AverageMeter() for _ in thresholds] 169 | score_handler = [AverageMeter() for _ in thresholds] 170 | 171 | with torch.no_grad(): 172 | for i, (image, target) in enumerate(data_loader): 173 | image = image.to(device) 174 | target = target.to(device) 175 | 176 | output = model(image).view(-1) 177 | 178 | loss = criterion(output, target) 179 | loss_handler.update(loss) 180 | 181 | target = target.byte() 182 | for i, threshold in enumerate(thresholds): 183 | pred = torch.sigmoid(output) > threshold 184 | 185 | accuracy = metrics.accuracy(pred, target) 186 | score = metrics.min_c(pred, target) 187 | 188 | accuracy_handler[i].update(accuracy) 189 | score_handler[i].update(score) 190 | 191 | return (loss_handler.avg, 192 | [i.avg for i in accuracy_handler], 193 | [i.avg for i in score_handler]) 194 | 195 | 196 | def get_learning_rate(optimizer): 197 | for param_group in optimizer.param_groups: 198 | return param_group['lr'] 199 | 200 | 201 | if __name__ == '__main__': 202 | parser = argparse.ArgumentParser(description='Train code') 203 | parser.add_argument('--config', required=True, help='configuration file') 204 | args = parser.parse_args() 205 | 206 | config = yaml.load(open(args.config), Loader=yaml.FullLoader) 207 | main(config) 208 | -------------------------------------------------------------------------------- /models/backbones/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 5 | """3x3 convolution with padding""" 6 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 7 | padding=dilation, groups=groups, bias=False, dilation=dilation) 8 | 9 | 10 | def conv1x1(in_planes, out_planes, stride=1): 11 | """1x1 convolution""" 12 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 13 | 14 | 15 | class BasicBlock(nn.Module): 16 | expansion = 1 17 | 18 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 19 | base_width=64, dilation=1, norm_layer=None): 20 | super(BasicBlock, self).__init__() 21 | if norm_layer is None: 22 | norm_layer = nn.BatchNorm2d 23 | if groups != 1 or base_width != 64: 24 | raise ValueError('BasicBlock only supports groups=1 and base_width=64') 25 | if dilation > 1: 26 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 27 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 28 | self.conv1 = conv3x3(inplanes, planes, stride) 29 | self.bn1 = norm_layer(planes) 30 | self.relu = nn.ReLU(inplace=True) 31 | self.conv2 = conv3x3(planes, planes) 32 | self.bn2 = norm_layer(planes) 33 | self.downsample = downsample 34 | self.stride = stride 35 | 36 | def forward(self, x): 37 | identity = x 38 | 39 | out = self.conv1(x) 40 | out = self.bn1(out) 41 | out = self.relu(out) 42 | 43 | out = self.conv2(out) 44 | out = self.bn2(out) 45 | 46 | if self.downsample is not None: 47 | identity = self.downsample(x) 48 | 49 | out += identity 50 | out = self.relu(out) 51 | 52 | return out 53 | 54 | 55 | class Bottleneck(nn.Module): 56 | expansion = 4 57 | 58 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 59 | base_width=64, dilation=1, norm_layer=None): 60 | super(Bottleneck, self).__init__() 61 | if norm_layer is None: 62 | norm_layer = nn.BatchNorm2d 63 | width = int(planes * (base_width / 64.)) * groups 64 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 65 | self.conv1 = conv1x1(inplanes, width) 66 | self.bn1 = norm_layer(width) 67 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 68 | self.bn2 = norm_layer(width) 69 | self.conv3 = conv1x1(width, planes * self.expansion) 70 | self.bn3 = norm_layer(planes * self.expansion) 71 | self.relu = nn.ReLU(inplace=True) 72 | self.downsample = downsample 73 | self.stride = stride 74 | 75 | def forward(self, x): 76 | identity = x 77 | 78 | out = self.conv1(x) 79 | out = self.bn1(out) 80 | out = self.relu(out) 81 | 82 | out = self.conv2(out) 83 | out = self.bn2(out) 84 | out = self.relu(out) 85 | 86 | out = self.conv3(out) 87 | out = self.bn3(out) 88 | 89 | if self.downsample is not None: 90 | identity = self.downsample(x) 91 | 92 | out += identity 93 | out = self.relu(out) 94 | 95 | return out 96 | 97 | 98 | class ResNet(nn.Module): 99 | 100 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, 101 | groups=1, width_per_group=64, replace_stride_with_dilation=None, 102 | norm_layer=None): 103 | super(ResNet, self).__init__() 104 | if norm_layer is None: 105 | norm_layer = nn.BatchNorm2d 106 | self._norm_layer = norm_layer 107 | 108 | self.inplanes = 64 109 | self.dilation = 1 110 | if replace_stride_with_dilation is None: 111 | # each element in the tuple indicates if we should replace 112 | # the 2x2 stride with a dilated convolution instead 113 | replace_stride_with_dilation = [False, False, False] 114 | if len(replace_stride_with_dilation) != 3: 115 | raise ValueError("replace_stride_with_dilation should be None " 116 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) 117 | self.groups = groups 118 | self.base_width = width_per_group 119 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, 120 | bias=False) 121 | self.bn1 = norm_layer(self.inplanes) 122 | self.relu = nn.ReLU(inplace=True) 123 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 124 | self.layer1 = self._make_layer(block, 64, layers[0]) 125 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 126 | dilate=replace_stride_with_dilation[0]) 127 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 128 | dilate=replace_stride_with_dilation[1]) 129 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 130 | dilate=replace_stride_with_dilation[2]) 131 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 132 | self.fc = nn.Linear(512 * block.expansion, num_classes) 133 | 134 | for m in self.modules(): 135 | if isinstance(m, nn.Conv2d): 136 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 137 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 138 | nn.init.constant_(m.weight, 1) 139 | nn.init.constant_(m.bias, 0) 140 | 141 | # Zero-initialize the last BN in each residual branch, 142 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 143 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 144 | if zero_init_residual: 145 | for m in self.modules(): 146 | if isinstance(m, Bottleneck): 147 | nn.init.constant_(m.bn3.weight, 0) 148 | elif isinstance(m, BasicBlock): 149 | nn.init.constant_(m.bn2.weight, 0) 150 | 151 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 152 | norm_layer = self._norm_layer 153 | downsample = None 154 | previous_dilation = self.dilation 155 | if dilate: 156 | self.dilation *= stride 157 | stride = 1 158 | if stride != 1 or self.inplanes != planes * block.expansion: 159 | downsample = nn.Sequential( 160 | conv1x1(self.inplanes, planes * block.expansion, stride), 161 | norm_layer(planes * block.expansion), 162 | ) 163 | 164 | layers = [] 165 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 166 | self.base_width, previous_dilation, norm_layer)) 167 | self.inplanes = planes * block.expansion 168 | for _ in range(1, blocks): 169 | layers.append(block(self.inplanes, planes, groups=self.groups, 170 | base_width=self.base_width, dilation=self.dilation, 171 | norm_layer=norm_layer)) 172 | 173 | return nn.Sequential(*layers) 174 | 175 | def forward(self, x): 176 | x = self.conv1(x) 177 | x = self.bn1(x) 178 | x = self.relu(x) 179 | x = self.maxpool(x) 180 | 181 | x = self.layer1(x) 182 | x = self.layer2(x) 183 | x = self.layer3(x) 184 | x = self.layer4(x) 185 | 186 | x = self.avgpool(x) 187 | x = x.reshape(x.size(0), -1) 188 | x = self.fc(x) 189 | 190 | return x 191 | -------------------------------------------------------------------------------- /data/transform.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import torch 3 | 4 | import numpy as np 5 | 6 | 7 | class Compose(object): 8 | def __init__(self, transforms): 9 | self.transforms = transforms 10 | 11 | def __call__(self, image): 12 | for t in self.transforms: 13 | image= t(image) 14 | return image 15 | 16 | 17 | class OneOf(object): 18 | def __init__(self, transforms): 19 | self.transforms = transforms 20 | 21 | def __call__(self, image): 22 | transform = np.random.choice(self.transforms) 23 | image = transform(image) 24 | return image 25 | 26 | 27 | class RandomApply(object): 28 | def __init__(self, transforms, prob=0.5): 29 | self.transforms = transforms 30 | self.prob = prob 31 | 32 | def __call__(self, image): 33 | for t in self.transforms: 34 | if np.random.rand() < self.prob: 35 | image = t(image) 36 | return image 37 | 38 | 39 | class RandomApply(object): 40 | def __init__(self, transforms, prob=0.5): 41 | self.transforms = transforms 42 | self.prob = prob 43 | 44 | def __call__(self, image): 45 | for t in self.transforms: 46 | if np.random.rand() < self.prob: 47 | image = t(image) 48 | return image 49 | 50 | 51 | class CenterCrop(object): 52 | def __init__(self, size=None): 53 | self.size = size 54 | 55 | def __call__(self, image): 56 | height, width = image.shape[:2] 57 | 58 | if height > width: 59 | center = height // 2 60 | top = center - width // 2 61 | bottom = center + width // 2 62 | image = image[top:bottom, :] 63 | else: 64 | center = width // 2 65 | left = center - height // 2 66 | right = center + height // 2 67 | image = image[:, left:right] 68 | 69 | if self.size and self.size < image.shape[0]: 70 | center = height // 2 71 | top = center - self.size // 2 72 | bottom = center + self.size // 2 73 | left = center - self.size // 2 74 | right = center + self.size // 2 75 | image = image[top:bottom, left:right] 76 | 77 | return image 78 | 79 | 80 | class RandomCrop(object): 81 | def __init__(self, ratio): 82 | self.ratio = ratio 83 | 84 | def __call__(self, image): 85 | width = int(image.shape[1] * self.ratio) 86 | height = int(image.shape[0] * self.ratio) 87 | 88 | min_x = image.shape[1] - width 89 | min_y = image.shape[0] - height 90 | 91 | x = np.random.randint(0, min_x) if min_x else 0 92 | y = np.random.randint(0, min_y) if min_y else 0 93 | 94 | image = image[y:y + height, x:x + width] 95 | return image 96 | 97 | 98 | class Contrast(object): 99 | def __init__(self, lower=0.9, upper=1.1): 100 | self.lower = lower 101 | self.upper = upper 102 | 103 | def __call__(self, image): 104 | alpha = np.random.uniform(self.lower, self.upper) 105 | image *= alpha 106 | image = np.clip(image, 0, 1) 107 | return image 108 | 109 | 110 | class Brightness(object): 111 | def __init__(self, delta=0.125): 112 | self.delta = delta 113 | 114 | def __call__(self, image): 115 | delta = np.random.uniform(-self.delta, self.delta) 116 | image += delta 117 | image = np.clip(image, 0, 1) 118 | return image 119 | 120 | 121 | class GaussianBlur(object): 122 | def __init__(self, kernel=3): 123 | self.kernel = (kernel, kernel) 124 | 125 | def __call__(self, image): 126 | image = cv2.blur(image, self.kernel) 127 | return image 128 | 129 | 130 | class Expand(object): 131 | def __init__(self, size=1024, diff=0.3, noise=False): 132 | self.size = size 133 | self.noise = noise 134 | self.diff = diff 135 | 136 | def __call__(self, image): 137 | height, width = image.shape[:2] 138 | max_ratio = self.size / max(height, width) 139 | min_ratio = max_ratio * self.diff 140 | 141 | ratio = np.random.uniform(min_ratio, max_ratio) 142 | left = np.random.uniform(0, self.size - width*ratio) 143 | top = np.random.uniform(0, self.size - height*ratio) 144 | 145 | expand_image = np.zeros((self.size, self.size, 3), dtype=image.dtype) 146 | if self.noise: 147 | mean = np.full(3, 0.5) 148 | std = np.full(3, 0.5) 149 | expand_image = cv2.randn(expand_image, mean, std) 150 | expand_image = np.clip(expand_image, 0, 1) 151 | 152 | image = cv2.resize(image, (int(width*ratio), int(height*ratio))) 153 | 154 | expand_image[int(top):int(top) + int(height*ratio), 155 | int(left):int(left) + int(width*ratio)] = image 156 | image = expand_image 157 | 158 | return image 159 | 160 | 161 | class Pad(object): 162 | def __init__(self, size): 163 | self.size = size 164 | 165 | def __call__(self, image): 166 | height, width = image.shape[:2] 167 | 168 | ratio = self.size / max(height, width) 169 | 170 | new_height = int(height * ratio) 171 | new_width = int(width * ratio) 172 | 173 | # new_size should be in (width, height) format 174 | 175 | image = cv2.resize(image, (new_width, new_height)) 176 | 177 | delta_w = self.size - new_width 178 | delta_h = self.size - new_height 179 | 180 | top, bottom = delta_h // 2, delta_h - (delta_h // 2) 181 | left, right = delta_w // 2, delta_w - (delta_w // 2) 182 | 183 | color = [0, 0, 0] 184 | image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, 185 | value=color) 186 | return image 187 | 188 | 189 | class Rotate(object): 190 | def __init__(self, angle=10, aligne=False): 191 | self.angle = angle 192 | self.aligne = aligne 193 | 194 | def __call__(self, image): 195 | angle = np.random.uniform(-self.angle, self.angle) 196 | 197 | height, width = image.shape[:2] 198 | cX, cY = width / 2, height / 2 199 | 200 | M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0) 201 | 202 | if self.aligne: 203 | cos = np.abs(M[0, 0]) 204 | sin = np.abs(M[0, 1]) 205 | 206 | width = int((height * sin) + (width * cos)) 207 | height = int((height * cos) + (width * sin)) 208 | 209 | M[0, 2] += (width / 2) - cX 210 | M[1, 2] += (height / 2) - cY 211 | 212 | image = cv2.warpAffine(image, M, (width, height), borderMode=cv2.BORDER_CONSTANT) 213 | return image 214 | 215 | 216 | class Resize(object): 217 | def __init__(self, size): 218 | self.size = size 219 | 220 | def __call__(self, image): 221 | height, width = image.shape[:2] 222 | 223 | h_scale = self.size / height 224 | w_scale = self.size / width 225 | 226 | image = cv2.resize(image, (self.size, self.size)) 227 | return image 228 | 229 | 230 | class HorizontalFlip(object): 231 | def __call__(self, image): 232 | image = cv2.flip(image, 1) 233 | return image 234 | 235 | 236 | class ToTensor(object): 237 | def __call__(self, image, target=None, mask=None): 238 | image = image.transpose((2, 0, 1)) 239 | image = torch.from_numpy(image) 240 | return image.float() 241 | 242 | 243 | class Normalize(object): 244 | def __init__(self, mean=None, std=None): 245 | self.mean = np.array(mean or [0.485, 0.456, 0.406]) 246 | self.std = np.array(std or [0.229, 0.224, 0.225]) 247 | 248 | def __call__(self, image): 249 | image = (image - self.mean) / self.std 250 | return image 251 | 252 | 253 | class Transforms(object): 254 | def __init__(self, input_size, train=True): 255 | self.train = train 256 | 257 | self.transforms_train = RandomApply([ 258 | RandomCrop(0.9), 259 | Rotate(angle=10, aligne=False), 260 | HorizontalFlip(), 261 | ]) 262 | 263 | self.transforms_test = RandomApply([ 264 | HorizontalFlip(), 265 | ]) 266 | 267 | self.normalize = Compose([ 268 | Resize(input_size), 269 | Normalize(), 270 | ToTensor(), 271 | ]) 272 | 273 | def __call__(self, image): 274 | if self.train: 275 | image = self.transforms_train(image) 276 | else: 277 | image = self.transforms_test(image) 278 | 279 | image = self.normalize(image) 280 | return image 281 | -------------------------------------------------------------------------------- /models/backbones/senet.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch.nn as nn 3 | 4 | from collections import OrderedDict 5 | 6 | 7 | class SEModule(nn.Module): 8 | 9 | def __init__(self, channels, reduction): 10 | super(SEModule, self).__init__() 11 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 12 | self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, 13 | padding=0) 14 | self.relu = nn.ReLU(inplace=True) 15 | self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, 16 | padding=0) 17 | self.sigmoid = nn.Sigmoid() 18 | 19 | def forward(self, x): 20 | module_input = x 21 | x = self.avg_pool(x) 22 | x = self.fc1(x) 23 | x = self.relu(x) 24 | x = self.fc2(x) 25 | x = self.sigmoid(x) 26 | return module_input * x 27 | 28 | 29 | class Bottleneck(nn.Module): 30 | """ 31 | Base class for bottlenecks that implements `forward()` method. 32 | """ 33 | def forward(self, x): 34 | residual = x 35 | 36 | out = self.conv1(x) 37 | out = self.bn1(out) 38 | out = self.relu(out) 39 | 40 | out = self.conv2(out) 41 | out = self.bn2(out) 42 | out = self.relu(out) 43 | 44 | out = self.conv3(out) 45 | out = self.bn3(out) 46 | 47 | if self.downsample is not None: 48 | residual = self.downsample(x) 49 | 50 | out = self.se_module(out) + residual 51 | out = self.relu(out) 52 | 53 | return out 54 | 55 | 56 | class SEBottleneck(Bottleneck): 57 | """ 58 | Bottleneck for SENet154. 59 | """ 60 | expansion = 4 61 | 62 | def __init__(self, inplanes, planes, groups, reduction, stride=1, 63 | downsample=None): 64 | super(SEBottleneck, self).__init__() 65 | self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False) 66 | self.bn1 = nn.BatchNorm2d(planes * 2) 67 | self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3, 68 | stride=stride, padding=1, groups=groups, 69 | bias=False) 70 | self.bn2 = nn.BatchNorm2d(planes * 4) 71 | self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1, 72 | bias=False) 73 | self.bn3 = nn.BatchNorm2d(planes * 4) 74 | self.relu = nn.ReLU(inplace=True) 75 | self.se_module = SEModule(planes * 4, reduction=reduction) 76 | self.downsample = downsample 77 | self.stride = stride 78 | 79 | 80 | class SEResNetBottleneck(Bottleneck): 81 | """ 82 | ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe 83 | implementation and uses `stride=stride` in `conv1` and not in `conv2` 84 | (the latter is used in the torchvision implementation of ResNet). 85 | """ 86 | expansion = 4 87 | 88 | def __init__(self, inplanes, planes, groups, reduction, stride=1, 89 | downsample=None): 90 | super(SEResNetBottleneck, self).__init__() 91 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False, 92 | stride=stride) 93 | self.bn1 = nn.BatchNorm2d(planes) 94 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, 95 | groups=groups, bias=False) 96 | self.bn2 = nn.BatchNorm2d(planes) 97 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 98 | self.bn3 = nn.BatchNorm2d(planes * 4) 99 | self.relu = nn.ReLU(inplace=True) 100 | self.se_module = SEModule(planes * 4, reduction=reduction) 101 | self.downsample = downsample 102 | self.stride = stride 103 | 104 | 105 | class SEResNeXtBottleneck(Bottleneck): 106 | """ 107 | ResNeXt bottleneck type C with a Squeeze-and-Excitation module. 108 | """ 109 | expansion = 4 110 | 111 | def __init__(self, inplanes, planes, groups, reduction, stride=1, 112 | downsample=None, base_width=4): 113 | super(SEResNeXtBottleneck, self).__init__() 114 | width = math.floor(planes * (base_width / 64)) * groups 115 | self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False, 116 | stride=1) 117 | self.bn1 = nn.BatchNorm2d(width) 118 | self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride, 119 | padding=1, groups=groups, bias=False) 120 | self.bn2 = nn.BatchNorm2d(width) 121 | self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False) 122 | self.bn3 = nn.BatchNorm2d(planes * 4) 123 | self.relu = nn.ReLU(inplace=True) 124 | self.se_module = SEModule(planes * 4, reduction=reduction) 125 | self.downsample = downsample 126 | self.stride = stride 127 | 128 | 129 | class SENet(nn.Module): 130 | 131 | def __init__(self, block, layers, groups, reduction, dropout_p=0.2, 132 | inplanes=128, input_3x3=True, downsample_kernel_size=3, 133 | downsample_padding=1, num_classes=1000): 134 | """ 135 | Parameters 136 | ---------- 137 | block (nn.Module): Bottleneck class. 138 | - For SENet154: SEBottleneck 139 | - For SE-ResNet models: SEResNetBottleneck 140 | - For SE-ResNeXt models: SEResNeXtBottleneck 141 | layers (list of ints): Number of residual blocks for 4 layers of the 142 | network (layer1...layer4). 143 | groups (int): Number of groups for the 3x3 convolution in each 144 | bottleneck block. 145 | - For SENet154: 64 146 | - For SE-ResNet models: 1 147 | - For SE-ResNeXt models: 32 148 | reduction (int): Reduction ratio for Squeeze-and-Excitation modules. 149 | - For all models: 16 150 | dropout_p (float or None): Drop probability for the Dropout layer. 151 | If `None` the Dropout layer is not used. 152 | - For SENet154: 0.2 153 | - For SE-ResNet models: None 154 | - For SE-ResNeXt models: None 155 | inplanes (int): Number of input channels for layer1. 156 | - For SENet154: 128 157 | - For SE-ResNet models: 64 158 | - For SE-ResNeXt models: 64 159 | input_3x3 (bool): If `True`, use three 3x3 convolutions instead of 160 | a single 7x7 convolution in layer0. 161 | - For SENet154: True 162 | - For SE-ResNet models: False 163 | - For SE-ResNeXt models: False 164 | downsample_kernel_size (int): Kernel size for downsampling convolutions 165 | in layer2, layer3 and layer4. 166 | - For SENet154: 3 167 | - For SE-ResNet models: 1 168 | - For SE-ResNeXt models: 1 169 | downsample_padding (int): Padding for downsampling convolutions in 170 | layer2, layer3 and layer4. 171 | - For SENet154: 1 172 | - For SE-ResNet models: 0 173 | - For SE-ResNeXt models: 0 174 | num_classes (int): Number of outputs in `last_linear` layer. 175 | - For all models: 1000 176 | """ 177 | super(SENet, self).__init__() 178 | self.inplanes = inplanes 179 | if input_3x3: 180 | layer0_modules = [ 181 | ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1, 182 | bias=False)), 183 | ('bn1', nn.BatchNorm2d(64)), 184 | ('relu1', nn.ReLU(inplace=True)), 185 | ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1, 186 | bias=False)), 187 | ('bn2', nn.BatchNorm2d(64)), 188 | ('relu2', nn.ReLU(inplace=True)), 189 | ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1, 190 | bias=False)), 191 | ('bn3', nn.BatchNorm2d(inplanes)), 192 | ('relu3', nn.ReLU(inplace=True)), 193 | ] 194 | else: 195 | layer0_modules = [ 196 | ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2, 197 | padding=3, bias=False)), 198 | ('bn1', nn.BatchNorm2d(inplanes)), 199 | ('relu1', nn.ReLU(inplace=True)), 200 | ] 201 | # To preserve compatibility with Caffe weights `ceil_mode=True` 202 | # is used instead of `padding=1`. 203 | layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2, 204 | ceil_mode=True))) 205 | self.layer0 = nn.Sequential(OrderedDict(layer0_modules)) 206 | self.layer1 = self._make_layer( 207 | block, 208 | planes=64, 209 | blocks=layers[0], 210 | groups=groups, 211 | reduction=reduction, 212 | downsample_kernel_size=1, 213 | downsample_padding=0 214 | ) 215 | self.layer2 = self._make_layer( 216 | block, 217 | planes=128, 218 | blocks=layers[1], 219 | stride=2, 220 | groups=groups, 221 | reduction=reduction, 222 | downsample_kernel_size=downsample_kernel_size, 223 | downsample_padding=downsample_padding 224 | ) 225 | self.layer3 = self._make_layer( 226 | block, 227 | planes=256, 228 | blocks=layers[2], 229 | stride=2, 230 | groups=groups, 231 | reduction=reduction, 232 | downsample_kernel_size=downsample_kernel_size, 233 | downsample_padding=downsample_padding 234 | ) 235 | self.layer4 = self._make_layer( 236 | block, 237 | planes=512, 238 | blocks=layers[3], 239 | stride=2, 240 | groups=groups, 241 | reduction=reduction, 242 | downsample_kernel_size=downsample_kernel_size, 243 | downsample_padding=downsample_padding 244 | ) 245 | self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) 246 | self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None 247 | self.last_linear = nn.Linear(512 * block.expansion, num_classes) 248 | 249 | def _make_layer(self, block, planes, blocks, groups, reduction, stride=1, 250 | downsample_kernel_size=1, downsample_padding=0): 251 | downsample = None 252 | if stride != 1 or self.inplanes != planes * block.expansion: 253 | downsample = nn.Sequential( 254 | nn.Conv2d(self.inplanes, planes * block.expansion, 255 | kernel_size=downsample_kernel_size, stride=stride, 256 | padding=downsample_padding, bias=False), 257 | nn.BatchNorm2d(planes * block.expansion), 258 | ) 259 | 260 | layers = [] 261 | layers.append(block(self.inplanes, planes, groups, reduction, stride, 262 | downsample)) 263 | self.inplanes = planes * block.expansion 264 | for i in range(1, blocks): 265 | layers.append(block(self.inplanes, planes, groups, reduction)) 266 | 267 | return nn.Sequential(*layers) 268 | 269 | def linear_params(self): 270 | return self.last_linear.parameters() 271 | 272 | def features(self, x): 273 | x = self.layer0(x) 274 | x = self.layer1(x) 275 | x = self.layer2(x) 276 | x = self.layer3(x) 277 | x = self.layer4(x) 278 | return x 279 | 280 | def logits(self, x): 281 | x = self.avg_pool(x) 282 | if self.dropout is not None: 283 | x = self.dropout(x) 284 | x = x.view(x.size(0), -1) 285 | x = self.last_linear(x) 286 | return x 287 | 288 | def forward(self, x): 289 | x = self.features(x) 290 | x = self.logits(x) 291 | return x --------------------------------------------------------------------------------