├── data
├── __init__.py
├── datasets
│ ├── __init__.py
│ └── idrnd.py
└── transform.py
├── models
├── __init__.py
├── backbones
│ ├── __init__.py
│ ├── densenet.py
│ ├── mobilenet.py
│ ├── resnet.py
│ └── senet.py
├── blocks.py
└── encoders.py
├── utils
├── __init__.py
├── handlers.py
└── storage.py
├── metrics
├── __init__.py
└── classification.py
├── optimizers
├── __init__.py
├── lr_scheduler.py
├── sgdw.py
└── adamw.py
├── __init__.py
├── losses
├── __init__.py
├── bce_loss.py
└── focal_loss.py
├── meta.json
├── tmp
├── meta.json
└── test.py
├── config
├── se_resnext50_bce.yaml
├── densenet121_bce.yaml
├── densenet121_focal.yaml
├── se_resnext101_focal.yaml
└── se_resnext50_focal.yaml
├── README.md
├── LICENSE
├── test.py
└── train.py
/data/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/metrics/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/optimizers/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | import cv2
2 |
3 |
4 | cv2.setNumThreads(0)
--------------------------------------------------------------------------------
/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .bce_loss import *
2 | from .focal_loss import *
--------------------------------------------------------------------------------
/meta.json:
--------------------------------------------------------------------------------
1 | {
2 | "image": "ksanvatds/idrnd-antispoof",
3 | "entrypoint": "python3 test.py --path-images-csv $PATH_INPUT/meta.csv --path-test-dir $PATH_INPUT --path-submission-csv $PATH_OUTPUT/solution.csv"
4 | }
5 |
--------------------------------------------------------------------------------
/tmp/meta.json:
--------------------------------------------------------------------------------
1 | {
2 | "image": "ksanvatds/idrnd-antispoof",
3 | "entrypoint": "python3 test.py --config config/se_resnext50_focal.yaml --path-images-csv $PATH_INPUT/meta.csv --path-test-dir $PATH_INPUT --path-submission-csv $PATH_OUTPUT/solution.csv"
4 | }
5 |
--------------------------------------------------------------------------------
/config/se_resnext50_bce.yaml:
--------------------------------------------------------------------------------
1 | prefix: 'se_resnext50_bce'
2 | parallel: yes
3 | save_freq: 1
4 | num_workers: 4
5 |
6 | encoder: 'se_resnext50'
7 | input_size: 720
8 | out_features: 1
9 | pretrained: yes
10 |
11 | loss: 'bce'
12 | optimizer: 'Adam'
13 |
14 | learning_rate: 0.0001
15 | weight_decay: 0.0001
16 | momentum: 0.9
17 | batch_size: 8
18 | step: 4
19 | num_epochs: 20
20 | thresholds: 50
21 | tta: 1
22 |
23 | frames: [1, 5]
24 |
25 | train:
26 | folder: '/shared/datasets/faces/anti_spoofing/IDRnD/train/'
27 |
28 | snapshot:
29 | use: no
30 | epoch: 20
--------------------------------------------------------------------------------
/config/densenet121_bce.yaml:
--------------------------------------------------------------------------------
1 | prefix: 'densenet121_bce'
2 | parallel: yes
3 | save_freq: 1
4 | num_workers: 4
5 |
6 | encoder: 'densenet121'
7 | input_size: 1080
8 | out_features: 1
9 | pretrained: yes
10 |
11 | loss: 'bce'
12 | optimizer: 'Adam'
13 |
14 | learning_rate: 0.0001
15 | weight_decay: 0.0001
16 | momentum: 0.9
17 | batch_size: 8
18 | step: 4
19 | num_epochs: 20
20 | thresholds: 50
21 | tta: 1
22 |
23 | frames: [2, 4]
24 |
25 | train:
26 | folder: '/shared/datasets/faces/anti_spoofing/IDRnD/train/'
27 |
28 | snapshot:
29 | use: no
30 | epoch: 4
31 |
--------------------------------------------------------------------------------
/config/densenet121_focal.yaml:
--------------------------------------------------------------------------------
1 | prefix: 'densenet121_focal'
2 | parallel: yes
3 | save_freq: 1
4 | num_workers: 4
5 |
6 | encoder: 'densenet121'
7 | input_size: 800
8 | out_features: 1
9 | pretrained: yes
10 |
11 | loss: 'focal'
12 | optimizer: 'Adam'
13 |
14 | learning_rate: 0.0001
15 | weight_decay: 0.0001
16 | momentum: 0.9
17 | batch_size: 8
18 | step: 4
19 | num_epochs: 20
20 | thresholds: 50
21 | tta: 1
22 |
23 | frames: [2, 3, 4]
24 |
25 | train:
26 | folder: '/shared/datasets/faces/anti_spoofing/IDRnD/train/'
27 |
28 | snapshot:
29 | use: no
30 | epoch: 4
--------------------------------------------------------------------------------
/config/se_resnext101_focal.yaml:
--------------------------------------------------------------------------------
1 | prefix: 'se_resnext101_focal'
2 | parallel: yes
3 | save_freq: 1
4 | num_workers: 4
5 |
6 | encoder: 'se_resnext101'
7 | input_size: 720
8 | out_features: 1
9 | pretrained: yes
10 |
11 | loss: 'focal'
12 | optimizer: 'Adam'
13 |
14 | learning_rate: 0.0001
15 | weight_decay: 0.0001
16 | momentum: 0.9
17 | batch_size: 8
18 | step: 4
19 | num_epochs: 20
20 | thresholds: 50
21 | tta: 1
22 |
23 | frames: [2, 3, 4]
24 |
25 | train:
26 | folder: '/shared/datasets/faces/anti_spoofing/IDRnD/train/'
27 |
28 | snapshot:
29 | use: no
30 | epoch: 20
--------------------------------------------------------------------------------
/config/se_resnext50_focal.yaml:
--------------------------------------------------------------------------------
1 | prefix: 'se_resnext50_focal'
2 | parallel: yes
3 | save_freq: 1
4 | num_workers: 4
5 |
6 | encoder: 'se_resnext50'
7 | input_size: 800
8 | out_features: 1
9 | pretrained: yes
10 |
11 | loss: 'focal'
12 | optimizer: 'Adam'
13 |
14 | learning_rate: 0.0001
15 | weight_decay: 0.0001
16 | momentum: 0.9
17 | batch_size: 8
18 | step: 4
19 | num_epochs: 20
20 | thresholds: 50
21 | tta: 1
22 |
23 | frames: [1, 3, 5]
24 |
25 | train:
26 | folder: '/shared/datasets/faces/anti_spoofing/IDRnD/train/'
27 |
28 | snapshot:
29 | use: no
30 | epoch: 20
31 |
--------------------------------------------------------------------------------
/utils/handlers.py:
--------------------------------------------------------------------------------
1 | class AverageMeter(object):
2 | def __init__(self):
3 | self.reset()
4 |
5 | def reset(self):
6 | self.val = 0
7 | self.avg = 0
8 | self.sum = 0
9 | self.count = 0
10 |
11 | def update(self, val, n=1):
12 | self.val = val
13 | self.sum += val * n
14 | self.count += n
15 | self.avg = self.sum / self.count
16 |
17 |
18 | class MetaData(object):
19 | def __init__(self):
20 | self.reset()
21 |
22 | def reset(self):
23 | self.loss = np.inf
24 | self.score = 0
25 |
26 | def update(self, loss, score):
27 | self.loss = loss
28 | self.score = score
--------------------------------------------------------------------------------
/losses/bce_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class BCELoss(nn.Module):
7 | def __init__(self, logits=True, reduce=True):
8 | super(BCELoss, self).__init__()
9 | self.logits = logits
10 | self.reduce = reduce
11 |
12 | def forward(self, inputs, targets):
13 | if self.logits:
14 | bce = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
15 | else:
16 | bce = F.binary_cross_entropy(inputs, targets, reduction='none')
17 |
18 | if self.reduce:
19 | return torch.mean(bce)
20 | else:
21 | return bce
22 |
23 |
24 | def bce(*argv, **kwargs):
25 | return BCELoss(*argv, **kwargs)
--------------------------------------------------------------------------------
/metrics/classification.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def accuracy(output, target):
5 | """Computes the accuracy over the k top predictions for the specified values of k"""
6 | with torch.no_grad():
7 | res = torch.sum(output == target)
8 | return res.float() / target.size(0)
9 |
10 |
11 | def min_c(output, target):
12 | # FP/(FP+TN) + 19⋅FN/(FN+TP)
13 | with torch.no_grad():
14 | eps=1e-9
15 |
16 | TP = (output & target).sum().float()
17 | TN = (~output & ~target).sum().float()
18 | FP = (output & ~target).sum().float()
19 | FN = (~output & target).sum().float()
20 |
21 | res = FP / (FP + TN + eps) + 19 * FN / (FN + TP + eps)
22 | return res / target.size(0)
--------------------------------------------------------------------------------
/utils/storage.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 |
4 | def load_weights(model, prefix, model_type, epoch):
5 | file = os.path.join('snapshots',
6 | '{}_{}_epoch_{}.pth'.format(prefix,
7 | model_type,
8 | epoch))
9 | checkpoint = torch.load(file)
10 | model.load_state_dict(checkpoint['state_dict'])
11 |
12 |
13 | def save_weights(model, prefix, model_type, epoch, parallel=True):
14 | file = os.path.join('snapshots',
15 | '{}_{}_epoch_{}.pth'.format(prefix,
16 | model_type,
17 | epoch))
18 | if torch.cuda.is_available() and parallel:
19 | state_dict = model.module.state_dict()
20 | else:
21 | state_dict = model.state_dict()
22 |
23 | torch.save({'state_dict': state_dict}, file)
--------------------------------------------------------------------------------
/losses/focal_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class FocalLoss(nn.Module):
7 | def __init__(self, alpha=1, gamma=2, logits=True, reduce=True):
8 | super(FocalLoss, self).__init__()
9 | self.alpha = alpha
10 | self.gamma = gamma
11 | self.logits = logits
12 | self.reduce = reduce
13 |
14 | def forward(self, inputs, targets):
15 | if self.logits:
16 | bce = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
17 | else:
18 | bce = F.binary_cross_entropy(inputs, targets, reduction='none')
19 |
20 | p = torch.exp(-bce)
21 | f_loss = self.alpha * (1 - p)**self.gamma * bce
22 |
23 | if self.reduce:
24 | return torch.mean(f_loss)
25 | else:
26 | return f_loss
27 |
28 |
29 | def focal(*argv, **kwargs):
30 | return FocalLoss(*argv, **kwargs)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ID R&D Anti-spoofing Challenge 1st place Solution ([link](https://datasouls.com/c/idrnd-antispoof/leaderboard))
2 |
3 | Train model: python train.py --config 'configuration file'
4 |
5 | Predict: python test.py --path-images-csv 'annotation' --path-test-dir 'path to images' --path-submission-csv 'submission'
6 |
7 | Final solution is based on ensemble of two models: se_resnext50 and densnet121.
8 |
9 | The following configs are used to train this models: [se_resnext50_focal.yaml](https://github.com/romavlasov/idrnd-anti-spoofing-challenge/blob/master/config/se_resnext50_focal.yaml) and [densnet121_focal.yaml](https://github.com/romavlasov/idrnd-anti-spoofing-challenge/blob/master/config/densenet121_focal.yaml)
10 |
11 | Pretrained models: [se_resnext50_focal.pth](https://www.dropbox.com/s/o0mpw0ep7ntamzv/se_resnext50_focal_model_epoch_best.pth?dl=0) and [densnet121_focal.pth](https://www.dropbox.com/s/i5utd1nooulyh7z/densenet121_focal_model_epoch_best.pth?dl=0)
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 romavlasov
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/models/blocks.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class SELayer(nn.Module):
6 | def __init__(self, inplanes, squeeze_ratio=8, activation=nn.PReLU, size=None):
7 | super(SELayer, self).__init__()
8 | if size is not None:
9 | self.global_avgpool = nn.AvgPool2d(size)
10 | else:
11 | self.global_avgpool = nn.AdaptiveAvgPool2d(1)
12 | self.conv1 = nn.Conv2d(inplanes, int(inplanes / squeeze_ratio), kernel_size=1, stride=1)
13 | self.conv2 = nn.Conv2d(int(inplanes / squeeze_ratio), inplanes, kernel_size=1, stride=1)
14 | self.relu = nn.ReLU(inplace=True)
15 | self.sigmoid = nn.Sigmoid()
16 |
17 | def forward(self, x):
18 | out = self.global_avgpool(x)
19 | out = self.conv1(out)
20 | out = self.relu(out)
21 | out = self.conv2(out)
22 | out = self.sigmoid(out)
23 | return x * out
24 |
25 |
26 | class InvertedResidual(nn.Module):
27 | def __init__(self, in_channels, out_channels, stride, expand_ratio, outp_size=None):
28 | super(InvertedResidual, self).__init__()
29 | self.stride = stride
30 | assert stride in [1, 2]
31 |
32 | self.use_res_connect = self.stride == 1 and in_channels == out_channels
33 |
34 | self.inv_block = nn.Sequential(
35 | nn.Conv2d(in_channels, in_channels * expand_ratio, 1, 1, 0, bias=False),
36 | nn.BatchNorm2d(in_channels * expand_ratio),
37 | nn.ReLU(),
38 |
39 | nn.Conv2d(in_channels * expand_ratio, in_channels * expand_ratio, 3, stride, 1,
40 | groups=in_channels * expand_ratio, bias=False),
41 | nn.BatchNorm2d(in_channels * expand_ratio),
42 | nn.ReLU(),
43 |
44 | nn.Conv2d(in_channels * expand_ratio, out_channels, 1, 1, 0, bias=False),
45 | nn.BatchNorm2d(out_channels),
46 | SELayer(out_channels, 8, nn.ReLU, outp_size)
47 | )
48 |
49 | def forward(self, x):
50 | if self.use_res_connect:
51 | return x + self.inv_block(x)
52 |
53 | return self.inv_block(x)
54 |
55 |
56 | def build_layers(in_channel):
57 | setting = [
58 | # t, c, n, s
59 | [2, in_channel, 2, 2],
60 | [2, in_channel, 2, 2],
61 | ]
62 | layers = []
63 | for t, c, n, s in setting:
64 | out_channel = c
65 | for i in range(n):
66 | if i == 0:
67 | layers.append(InvertedResidual(in_channel, out_channel, s, t))
68 | else:
69 | layers.append(InvertedResidual(in_channel, out_channel, 1, t))
70 | in_channel = out_channel
71 |
72 | return nn.Sequential(*layers)
--------------------------------------------------------------------------------
/tmp/test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import yaml
4 | import pandas as pd
5 | import torch
6 |
7 | from sklearn.model_selection import train_test_split
8 | from torch.utils.data import DataLoader
9 |
10 | from models import encoders
11 |
12 | from data.datasets import idrnd
13 | from data.transform import Transforms
14 |
15 | from utils.storage import load_weights
16 |
17 |
18 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
19 |
20 |
21 | if __name__ == '__main__':
22 |
23 | parser = argparse.ArgumentParser()
24 | parser.add_argument('--config', type=str, required=True)
25 | parser.add_argument('--path-images-csv', type=str, required=True)
26 | parser.add_argument('--path-test-dir', type=str, required=True)
27 | parser.add_argument('--path-submission-csv', type=str, required=True)
28 | args = parser.parse_args()
29 |
30 | # prepare image paths
31 | config = yaml.load(open(args.config), Loader=yaml.FullLoader)
32 | test_dataset_paths = pd.read_csv(args.path_images_csv)
33 | path_test_dir = args.path_test_dir
34 |
35 | paths = [
36 | {
37 | 'id': row.id,
38 | 'frame': row.frame,
39 | 'path': os.path.join(path_test_dir, row.path)
40 | } for _, row in test_dataset_paths.iterrows() if int(row.frame) in config['frames']]
41 | test_df = pd.DataFrame(paths)
42 |
43 | test_loader = DataLoader(idrnd.TestAntispoofDataset(
44 | test_df, Transforms(input_size=config['input_size'], train=False), config['tta']),
45 | batch_size=config['batch_size'],
46 | num_workers=config['num_workers'],
47 | shuffle=False)
48 |
49 | model = getattr(encoders, config['encoder'])(device=device,
50 | out_features=1,
51 | pretrained=False)
52 | load_weights(model, config['prefix'], 'model', 'best')
53 | model.eval()
54 |
55 | samples, frames, probabilities = [], [], []
56 |
57 | with torch.no_grad():
58 | for batch, video, frame in test_loader:
59 | batch = batch.to(device)
60 | probability = torch.sigmoid(model(batch).view(-1))
61 |
62 | samples.extend(video)
63 | frames.extend(frame.numpy())
64 | probabilities.extend(probability.cpu().numpy())
65 |
66 | # save
67 | predictions = pd.DataFrame.from_dict({
68 | 'id': samples,
69 | 'frame': frames,
70 | 'probability': probabilities})
71 |
72 | predictions = predictions.groupby('id').probability.mean().reset_index()
73 | predictions['prediction'] = predictions.probability
74 | predictions[['id', 'prediction']].to_csv(
75 | args.path_submission_csv, index=False)
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import yaml
4 | import pandas as pd
5 | import torch
6 |
7 | from sklearn.model_selection import train_test_split
8 | from torch.utils.data import DataLoader
9 |
10 | from models import encoders
11 |
12 | from data.datasets import idrnd
13 | from data.transform import Transforms
14 |
15 | from utils.storage import load_weights
16 |
17 |
18 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
19 |
20 |
21 | if __name__ == '__main__':
22 |
23 | parser = argparse.ArgumentParser()
24 | parser.add_argument('--path-images-csv', type=str, required=True)
25 | parser.add_argument('--path-test-dir', type=str, required=True)
26 | parser.add_argument('--path-submission-csv', type=str, required=True)
27 | args = parser.parse_args()
28 |
29 | configs = ['config/densenet121_bce.yaml', 'config/se_resnext50_bce.yaml']
30 |
31 | test_dataset_paths = pd.read_csv(args.path_images_csv)
32 | path_test_dir = args.path_test_dir
33 |
34 | samples, frames, probabilities = [], [], []
35 |
36 | for c in configs:
37 | config = yaml.load(open(c), Loader=yaml.FullLoader)
38 | paths = [
39 | {
40 | 'id': row.id,
41 | 'frame': row.frame,
42 | 'path': os.path.join(path_test_dir, row.path)
43 | } for _, row in test_dataset_paths.iterrows() if int(row.frame) in config['frames']]
44 | test_df = pd.DataFrame(paths)
45 |
46 | test_loader = DataLoader(idrnd.TestAntispoofDataset(
47 | test_df, Transforms(input_size=config['input_size'], train=False), config['tta']),
48 | batch_size=config['batch_size'],
49 | num_workers=config['num_workers'],
50 | shuffle=False)
51 |
52 | model = getattr(encoders, config['encoder'])(device=device,
53 | out_features=1,
54 | pretrained=False)
55 | load_weights(model, config['prefix'], 'model', 'best')
56 | model.eval()
57 |
58 | with torch.no_grad():
59 | for batch, video, frame in test_loader:
60 | batch = batch.to(device)
61 | probability = torch.sigmoid(model(batch).view(-1))
62 |
63 | samples.extend(video)
64 | frames.extend(frame.numpy())
65 | probabilities.extend(probability.cpu().numpy())
66 |
67 | # save
68 | predictions = pd.DataFrame.from_dict({
69 | 'id': samples,
70 | 'frame': frames,
71 | 'probability': probabilities})
72 |
73 | predictions = predictions.groupby('id').probability.mean().reset_index()
74 |
75 | predictions['prediction'] = predictions.probability
76 | predictions[['id', 'prediction']].to_csv(
77 | args.path_submission_csv, index=False)
--------------------------------------------------------------------------------
/optimizers/lr_scheduler.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 |
5 |
6 | class CosineWithRestarts(torch.optim.lr_scheduler._LRScheduler): # pylint: disable=protected-access
7 | """
8 | Cosine annealing with restarts.
9 | This is decribed in the paper https://arxiv.org/abs/1608.03983.
10 | Parameters
11 | ----------
12 | optimizer : ``torch.optim.Optimizer``
13 | t_max : ``int``
14 | The maximum number of iterations within the first cycle.
15 | eta_min : ``float``, optional (default=0)
16 | The minimum learning rate.
17 | last_epoch : ``int``, optional (default=-1)
18 | The index of the last epoch. This is used when restarting.
19 | factor : ``float``, optional (default=1)
20 | The factor by which the cycle length (``T_max``) increases after each restart.
21 | """
22 |
23 | def __init__(self,
24 | optimizer,
25 | t_max,
26 | eta_min=0.,
27 | last_epoch=-1,
28 | factor=1.):
29 | assert t_max > 0
30 | assert eta_min >= 0
31 | if t_max == 1 and factor == 1:
32 | print("Cosine annealing scheduler will have no effect on the learning "
33 | "rate since T_max = 1 and factor = 1.")
34 | self.t_max = t_max
35 | self.eta_min = eta_min
36 | self.factor = factor
37 | self._last_restart = 0
38 | self._cycle_counter = 0
39 | self._cycle_factor = 1.
40 | self._updated_cycle_len = t_max
41 | self._initialized = False
42 | super(CosineWithRestarts, self).__init__(optimizer, last_epoch)
43 |
44 | def get_lr(self):
45 | """Get updated learning rate."""
46 | # HACK: We need to check if this is the first time ``self.get_lr()`` was called,
47 | # since ``torch.optim.lr_scheduler._LRScheduler`` will call ``self.get_lr()``
48 | # when first initialized, but the learning rate should remain unchanged
49 | # for the first epoch.
50 | if not self._initialized:
51 | self._initialized = True
52 | return self.base_lrs
53 |
54 | step = self.last_epoch + 1
55 | self._cycle_counter = step - self._last_restart
56 |
57 | lrs = [
58 | self.eta_min + ((lr - self.eta_min) / 2) * (
59 | np.cos(
60 | np.pi *
61 | (self._cycle_counter % self._updated_cycle_len) /
62 | self._updated_cycle_len
63 | ) + 1
64 | )
65 | for lr in self.base_lrs
66 | ]
67 |
68 | if self._cycle_counter % self._updated_cycle_len == 0:
69 | # Adjust the cycle length.
70 | self._cycle_factor *= self.factor
71 | self._cycle_counter = 0
72 | self._updated_cycle_len = int(self._cycle_factor * self.t_max)
73 | self._last_restart = step
74 |
75 | return lrs
76 |
77 |
78 | if __name__ == '__main__':
79 | lin = nn.Linear(128, 256)
80 |
81 | optim = torch.optim.SGD(lin.parameters(), lr=0.002)
82 | scheduler = CosineWithRestarts(optim, t_max=5, eta_min=0.0001, factor=np.sqrt(1.6))
83 |
84 | for _ in range(100):
85 | cur_lr = scheduler.get_lr()
86 | print(cur_lr[0])
87 | scheduler.step()
88 |
--------------------------------------------------------------------------------
/data/datasets/idrnd.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import datetime
4 |
5 | import numpy as np
6 | import pandas as pd
7 |
8 | import cv2
9 | import torch
10 |
11 | import torch.utils.data as data
12 |
13 | from sklearn.model_selection import train_test_split
14 |
15 |
16 | class TrainAntispoofDataset(data.Dataset):
17 | def __init__(self, df, transform=None, tta=1):
18 | self.df = df
19 | self.tta = tta
20 | self.transform = transform
21 |
22 | def __getitem__(self, index):
23 | item = self.df.iloc[index % len(self.df)]
24 |
25 | image = self._load_image(item['path'])
26 | if self.transform is not None:
27 | image = self.transform(image)
28 |
29 | return image, torch.tensor(item['label']).float()
30 |
31 | def __len__(self):
32 | return len(self.df) * self.tta
33 |
34 | def _load_image(self, path):
35 | image = cv2.imread(path, cv2.IMREAD_COLOR)
36 | return image / 255.
37 |
38 |
39 | class TestAntispoofDataset(data.Dataset):
40 | def __init__(self, df, transform=None, tta=4):
41 | self.df = df
42 | self.tta = tta
43 | self.transform = transform
44 |
45 | def __getitem__(self, index):
46 | item = self.df.iloc[index % len(self.df)]
47 |
48 | image = self._load_image(item['path'])
49 | if self.transform is not None:
50 | image = self.transform(image)
51 |
52 | return image, item['id'], item['frame']
53 |
54 | def __len__(self):
55 | return len(self.df) * self.tta
56 |
57 | def _load_image(self, path):
58 | image = cv2.imread(path, cv2.IMREAD_COLOR)
59 | return image / 255.
60 |
61 |
62 | def load_dataset(path_data, test_size=0.1):
63 | path_images = []
64 |
65 | for label in ['2dmask', 'real', 'printed', 'replay']:
66 | videos = os.listdir(os.path.join(path_data, label))
67 | for video in videos:
68 | frames = os.listdir(os.path.join(path_data, label, video))
69 | for frame in frames:
70 | path_images.append({
71 | 'path': os.path.join(path_data, label, video, frame),
72 | 'label': int(label != 'real'),
73 | 'video': video})
74 |
75 | videos = list(set(x['video'] for x in path_images))
76 | videos_tr, videos_ts = train_test_split(videos, test_size=test_size, random_state=123)
77 |
78 | train_path_images = pd.DataFrame([x for x in path_images if x['video'] in videos_tr])
79 | test_path_images = pd.DataFrame([x for x in path_images if x['video'] in videos_ts])
80 |
81 | return train_path_images, test_path_images
82 |
83 |
84 | def load_test_dataset(path_data):
85 | path_images = []
86 |
87 | for label in ['live', 'spoof']:
88 | videos = os.listdir(os.path.join(path_data, label))
89 | for video in videos:
90 | frames = os.listdir(os.path.join(path_data, label, video))
91 | for frame in frames:
92 | if frame.endswith('_120.jpg'):
93 | path_images.append({
94 | 'path': os.path.join(path_data, label, video, frame),
95 | 'label': int(label != 'live'),
96 | 'video': video,
97 | 'frame': frame})
98 |
99 | return pd.DataFrame(path_images)
--------------------------------------------------------------------------------
/optimizers/sgdw.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.optim.optimizer import Optimizer, required
3 |
4 |
5 | class SGDW(Optimizer):
6 | r"""Implements stochastic gradient descent (optionally with momentum).
7 |
8 | Nesterov momentum is based on the formula from
9 | `On the importance of initialization and momentum in deep learning`__.
10 |
11 | Args:
12 | params (iterable): iterable of parameters to optimize or dicts defining
13 | parameter groups
14 | lr (float): learning rate
15 | momentum (float, optional): momentum factor (default: 0)
16 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
17 | dampening (float, optional): dampening for momentum (default: 0)
18 | nesterov (bool, optional): enables Nesterov momentum (default: False)
19 |
20 | Example:
21 | >>> optimizer = torch.optim.SGDW(model.parameters(), lr=0.1, momentum=0.9)
22 | >>> optimizer.zero_grad()
23 | >>> loss_fn(model(input), target).backward()
24 | >>> optimizer.step()
25 |
26 | __ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf
27 |
28 | .. note::
29 | The implementation of SGD with Momentum/Nesterov subtly differs from
30 | Sutskever et. al. and implementations in some other frameworks.
31 |
32 | Considering the specific case of Momentum, the update can be written as
33 |
34 | .. math::
35 | v = \rho * v + g \\
36 | p = p - lr * v
37 |
38 | where p, g, v and :math:`\rho` denote the parameters, gradient,
39 | velocity, and momentum respectively.
40 |
41 | This is in contrast to Sutskever et. al. and
42 | other frameworks which employ an update of the form
43 |
44 | .. math::
45 | v = \rho * v + lr * g \\
46 | p = p - v
47 |
48 | The Nesterov version is analogously modified.
49 | """
50 |
51 | def __init__(self, params, lr=required, momentum=0.9, dampening=0,
52 | weight_decay=0, nesterov=False):
53 | if lr is not required and lr < 0.0:
54 | raise ValueError("Invalid learning rate: {}".format(lr))
55 | if momentum < 0.0:
56 | raise ValueError("Invalid momentum value: {}".format(momentum))
57 | if weight_decay < 0.0:
58 | raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
59 |
60 | defaults = dict(lr=lr, momentum=momentum, dampening=dampening,
61 | weight_decay=weight_decay, nesterov=nesterov)
62 | if nesterov and (momentum <= 0 or dampening != 0):
63 | raise ValueError("Nesterov momentum requires a momentum and zero dampening")
64 | super(SGDW, self).__init__(params, defaults)
65 |
66 | def __setstate__(self, state):
67 | super(SGDW, self).__setstate__(state)
68 | for group in self.param_groups:
69 | group.setdefault('nesterov', False)
70 |
71 | def step(self, closure=None):
72 | """Performs a single optimization step.
73 |
74 | Arguments:
75 | closure (callable, optional): A closure that reevaluates the model
76 | and returns the loss.
77 | """
78 | loss = None
79 | if closure is not None:
80 | loss = closure()
81 |
82 | for group in self.param_groups:
83 | weight_decay = group['weight_decay']
84 | momentum = group['momentum']
85 | dampening = group['dampening']
86 | nesterov = group['nesterov']
87 |
88 | for p in group['params']:
89 | if p.grad is None:
90 | continue
91 | d_p = p.grad.data
92 |
93 | if momentum != 0:
94 | param_state = self.state[p]
95 | if 'momentum_buffer' not in param_state:
96 | buf = param_state['momentum_buffer'] = torch.zeros_like(p.data)
97 | buf.mul_(momentum).add_(d_p)
98 | else:
99 | buf = param_state['momentum_buffer']
100 | buf.mul_(momentum).add_(1 - dampening, d_p)
101 | if nesterov:
102 | d_p = d_p.add(momentum, buf)
103 | else:
104 | d_p = buf
105 |
106 | p.data.add_(-group['lr'] * weight_decay, p.data)
107 | p.data.add_(-group['lr'], d_p)
108 |
109 | return loss
110 |
--------------------------------------------------------------------------------
/optimizers/adamw.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | from torch.optim.optimizer import Optimizer
4 |
5 | # https://github.com/egg-west/AdamW-pytorch
6 | # https://github.com/anandsaha/fastai.part1.v2/commit/159e1712e60f299e11c42caab35c726f367bcd61
7 | # https://forums.fast.ai/t/challenge-for-advanced-students-implement-adamw-and-sgdw/8004/8
8 |
9 |
10 | class AdamW(Optimizer):
11 | """Implements Adam algorithm.
12 | It has been proposed in `Adam: A Method for Stochastic Optimization`_.
13 | Arguments:
14 | params (iterable): iterable of parameters to optimize or dicts defining
15 | parameter groups
16 | lr (float, optional): learning rate (default: 1e-3)
17 | betas (Tuple[float, float], optional): coefficients used for computing
18 | running averages of gradient and its square (default: (0.9, 0.999))
19 | eps (float, optional): term added to the denominator to improve
20 | numerical stability (default: 1e-8)
21 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
22 | amsgrad (boolean, optional): whether to use the AMSGrad variant of this
23 | algorithm from the paper `On the Convergence of Adam and Beyond`_
24 | .. _Adam\: A Method for Stochastic Optimization:
25 | https://arxiv.org/abs/1412.6980
26 | .. _On the Convergence of Adam and Beyond:
27 | https://openreview.net/forum?id=ryQu7f-RZ
28 | """
29 |
30 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
31 | weight_decay=0, amsgrad=False):
32 | if not 0.0 <= lr:
33 | raise ValueError("Invalid learning rate: {}".format(lr))
34 | if not 0.0 <= eps:
35 | raise ValueError("Invalid epsilon value: {}".format(eps))
36 | if not 0.0 <= betas[0] < 1.0:
37 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
38 | if not 0.0 <= betas[1] < 1.0:
39 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
40 | defaults = dict(lr=lr, betas=betas, eps=eps,
41 | weight_decay=weight_decay, amsgrad=amsgrad)
42 | super(AdamW, self).__init__(params, defaults)
43 |
44 | def __setstate__(self, state):
45 | super(AdamW, self).__setstate__(state)
46 | for group in self.param_groups:
47 | group.setdefault('amsgrad', False)
48 |
49 | def step(self, closure=None):
50 | """Performs a single optimization step.
51 | Arguments:
52 | closure (callable, optional): A closure that reevaluates the model
53 | and returns the loss.
54 | """
55 | loss = None
56 | if closure is not None:
57 | loss = closure()
58 |
59 | for group in self.param_groups:
60 | for p in group['params']:
61 | if p.grad is None:
62 | continue
63 | grad = p.grad.data
64 | if grad.is_sparse:
65 | raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
66 | amsgrad = group['amsgrad']
67 |
68 | state = self.state[p]
69 |
70 | # State initialization
71 | if len(state) == 0:
72 | state['step'] = 0
73 | # Exponential moving average of gradient values
74 | state['exp_avg'] = torch.zeros_like(p.data)
75 | # Exponential moving average of squared gradient values
76 | state['exp_avg_sq'] = torch.zeros_like(p.data)
77 | if amsgrad:
78 | # Maintains max of all exp. moving avg. of sq. grad. values
79 | state['max_exp_avg_sq'] = torch.zeros_like(p.data)
80 |
81 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
82 | if amsgrad:
83 | max_exp_avg_sq = state['max_exp_avg_sq']
84 | beta1, beta2 = group['betas']
85 |
86 | state['step'] += 1
87 |
88 | # if group['weight_decay'] != 0:
89 | # grad = grad.add(group['weight_decay'], p.data)
90 |
91 | # Decay the first and second moment running average coefficient
92 | exp_avg.mul_(beta1).add_(1 - beta1, grad)
93 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
94 | if amsgrad:
95 | # Maintains the maximum of all 2nd moment running avg. till now
96 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
97 | # Use the max. for normalizing running avg. of gradient
98 | denom = max_exp_avg_sq.sqrt().add_(group['eps'])
99 | else:
100 | denom = exp_avg_sq.sqrt().add_(group['eps'])
101 |
102 | bias_correction1 = 1 - beta1 ** state['step']
103 | bias_correction2 = 1 - beta2 ** state['step']
104 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
105 |
106 | p.data.add_(-group['lr'] * group['weight_decay'], p.data)
107 | p.data.addcdiv_(-step_size, exp_avg, denom)
108 |
109 | return loss
110 |
--------------------------------------------------------------------------------
/models/backbones/densenet.py:
--------------------------------------------------------------------------------
1 | import re
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from collections import OrderedDict
6 |
7 |
8 | class _DenseLayer(nn.Sequential):
9 | def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
10 | super(_DenseLayer, self).__init__()
11 | self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
12 | self.add_module('relu1', nn.ReLU(inplace=True)),
13 | self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
14 | growth_rate, kernel_size=1, stride=1,
15 | bias=False)),
16 | self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
17 | self.add_module('relu2', nn.ReLU(inplace=True)),
18 | self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
19 | kernel_size=3, stride=1, padding=1,
20 | bias=False)),
21 | self.drop_rate = drop_rate
22 |
23 | def forward(self, x):
24 | new_features = super(_DenseLayer, self).forward(x)
25 | if self.drop_rate > 0:
26 | new_features = F.dropout(new_features, p=self.drop_rate,
27 | training=self.training)
28 | return torch.cat([x, new_features], 1)
29 |
30 |
31 | class _DenseBlock(nn.Sequential):
32 | def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
33 | super(_DenseBlock, self).__init__()
34 | for i in range(num_layers):
35 | layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate,
36 | bn_size, drop_rate)
37 | self.add_module('denselayer%d' % (i + 1), layer)
38 |
39 |
40 | class _Transition(nn.Sequential):
41 | def __init__(self, num_input_features, num_output_features):
42 | super(_Transition, self).__init__()
43 | self.add_module('norm', nn.BatchNorm2d(num_input_features))
44 | self.add_module('relu', nn.ReLU(inplace=True))
45 | self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
46 | kernel_size=1, stride=1, bias=False))
47 | self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))
48 |
49 |
50 | class DenseNet(nn.Module):
51 | r"""Densenet-BC model class, based on
52 | `"Densely Connected Convolutional Networks" `_
53 | Args:
54 | growth_rate (int) - how many filters to add each layer (`k` in paper)
55 | block_config (list of 4 ints) - how many layers in each pooling block
56 | num_init_features (int) - the number of filters to learn in the first convolution layer
57 | bn_size (int) - multiplicative factor for number of bottle neck layers
58 | (i.e. bn_size * k features in the bottleneck layer)
59 | drop_rate (float) - dropout rate after each dense layer
60 | num_classes (int) - number of classification classes
61 | """
62 |
63 | def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
64 | num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000):
65 |
66 | super(DenseNet, self).__init__()
67 |
68 | # First convolution
69 | self.features = nn.Sequential(OrderedDict([
70 | ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2,
71 | padding=3, bias=False)),
72 | ('norm0', nn.BatchNorm2d(num_init_features)),
73 | ('relu0', nn.ReLU(inplace=True)),
74 | ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
75 | ]))
76 |
77 | # Each denseblock
78 | num_features = num_init_features
79 | for i, num_layers in enumerate(block_config):
80 | block = _DenseBlock(num_layers=num_layers, num_input_features=num_features,
81 | bn_size=bn_size, growth_rate=growth_rate,
82 | drop_rate=drop_rate)
83 | self.features.add_module('denseblock%d' % (i + 1), block)
84 | num_features = num_features + num_layers * growth_rate
85 | if i != len(block_config) - 1:
86 | trans = _Transition(num_input_features=num_features,
87 | num_output_features=num_features // 2)
88 | self.features.add_module('transition%d' % (i + 1), trans)
89 | num_features = num_features // 2
90 |
91 | # Final batch norm
92 | self.features.add_module('norm5', nn.BatchNorm2d(num_features))
93 |
94 | # Linear layer
95 | self.classifier = nn.Linear(num_features, num_classes)
96 |
97 | # Official init from torch repo.
98 | for m in self.modules():
99 | if isinstance(m, nn.Conv2d):
100 | nn.init.kaiming_normal_(m.weight)
101 | elif isinstance(m, nn.BatchNorm2d):
102 | nn.init.constant_(m.weight, 1)
103 | nn.init.constant_(m.bias, 0)
104 | elif isinstance(m, nn.Linear):
105 | nn.init.constant_(m.bias, 0)
106 |
107 | def linear_params(self):
108 | return self.classifier.parameters()
109 |
110 | def forward(self, x):
111 | features = self.features(x)
112 | out = F.relu(features, inplace=True)
113 | out = F.adaptive_avg_pool2d(out, (1, 1)).view(features.size(0), -1)
114 | out = self.classifier(out)
115 | return out
116 |
--------------------------------------------------------------------------------
/models/backbones/mobilenet.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch.nn as nn
3 |
4 |
5 | class SELayer(nn.Module):
6 | def __init__(self, inplanes, squeeze_ratio=8, activation=nn.PReLU, size=None):
7 | super(SELayer, self).__init__()
8 | if size is not None:
9 | self.global_avgpool = nn.AvgPool2d(size)
10 | else:
11 | self.global_avgpool = nn.AdaptiveAvgPool2d(1)
12 | self.conv1 = nn.Conv2d(inplanes, int(inplanes / squeeze_ratio), kernel_size=1, stride=1)
13 | self.conv2 = nn.Conv2d(int(inplanes / squeeze_ratio), inplanes, kernel_size=1, stride=1)
14 | self.relu = nn.ReLU(inplace=True)
15 | self.sigmoid = nn.Sigmoid()
16 |
17 | def forward(self, x):
18 | out = self.global_avgpool(x)
19 | out = self.conv1(out)
20 | out = self.relu(out)
21 | out = self.conv2(out)
22 | out = self.sigmoid(out)
23 | return x * out
24 |
25 |
26 | class InvertedResidual(nn.Module):
27 | def __init__(self, in_channels, out_channels, stride, expand_ratio, outp_size=None):
28 | super(InvertedResidual, self).__init__()
29 | self.stride = stride
30 | assert stride in [1, 2]
31 |
32 | self.use_res_connect = self.stride == 1 and in_channels == out_channels
33 |
34 | self.inv_block = nn.Sequential(
35 | nn.Conv2d(in_channels, in_channels * expand_ratio, 1, 1, 0, bias=False),
36 | nn.BatchNorm2d(in_channels * expand_ratio),
37 | nn.PReLU(),
38 |
39 | nn.Conv2d(in_channels * expand_ratio, in_channels * expand_ratio, 3, stride, 1,
40 | groups=in_channels * expand_ratio, bias=False),
41 | nn.BatchNorm2d(in_channels * expand_ratio),
42 | nn.PReLU(),
43 |
44 | nn.Conv2d(in_channels * expand_ratio, out_channels, 1, 1, 0, bias=False),
45 | nn.BatchNorm2d(out_channels),
46 | SELayer(out_channels, 8, nn.PReLU, outp_size)
47 | )
48 |
49 | def forward(self, x):
50 | if self.use_res_connect:
51 | return x + self.inv_block(x)
52 |
53 | return self.inv_block(x)
54 |
55 |
56 | def init_block(in_channels, out_channels, stride, activation=nn.PReLU):
57 | return nn.Sequential(
58 | nn.BatchNorm2d(3),
59 | nn.Conv2d(in_channels, out_channels, 3, stride, 1, bias=False),
60 | nn.BatchNorm2d(out_channels),
61 | nn.ReLU(inplace=True)
62 | )
63 |
64 |
65 | class MobileNet(nn.Module):
66 | def __init__(self, out_features=256, input_size=112, width_multiplier=1., feature=True):
67 | super(MobileNet, self).__init__()
68 | self.feature = feature
69 |
70 | # Set up of inverted residual blocks
71 | inverted_residual_setting = [
72 | # t, c, n, s
73 | [2, 64, 5, 2],
74 | [4, 128, 1, 2],
75 | [2, 128, 6, 1],
76 | [4, 128, 1, 2],
77 | [2, 128, 2, 1]
78 | ]
79 |
80 | first_channel_num = 64
81 | last_channel_num = 512
82 | self.features = [init_block(3, first_channel_num, 2)]
83 |
84 | self.features.append(nn.Conv2d(first_channel_num, first_channel_num, 3, 1, 1,
85 | groups=first_channel_num, bias=False))
86 | self.features.append(nn.BatchNorm2d(64))
87 | self.features.append(nn.PReLU())
88 |
89 | # Inverted Residual Blocks
90 | in_channel_num = first_channel_num
91 | size_h, size_w = input_size, input_size
92 | size_h, size_w = size_h // 2, size_w // 2
93 | for t, c, n, s in inverted_residual_setting:
94 | output_channel = int(c * width_multiplier)
95 | for i in range(n):
96 | if i == 0:
97 | size_h, size_w = size_h // s, size_w // s
98 | self.features.append(InvertedResidual(in_channel_num, output_channel,
99 | s, t, outp_size=(size_h, size_w)))
100 | else:
101 | self.features.append(InvertedResidual(in_channel_num, output_channel,
102 | 1, t, outp_size=(size_h, size_w)))
103 | in_channel_num = output_channel
104 |
105 | # 1x1 expand block
106 | self.features.append(nn.Sequential(nn.Conv2d(in_channel_num, last_channel_num, 1, 1, 0, bias=False),
107 | nn.BatchNorm2d(last_channel_num),
108 | nn.PReLU()))
109 | self.features = nn.Sequential(*self.features)
110 |
111 | # Depth-wise pooling
112 | k_size = (input_size // 16, input_size // 16)
113 | self.dw_pool = nn.Conv2d(last_channel_num, last_channel_num, k_size,
114 | groups=last_channel_num, bias=False)
115 | self.dw_bn = nn.BatchNorm2d(last_channel_num)
116 | self.conv1_extra = nn.Conv2d(last_channel_num, out_features, 1, stride=1, padding=0, bias=False)
117 |
118 | self.init_weights()
119 |
120 | def forward(self, x):
121 | x = self.features(x)
122 | x = self.dw_bn(self.dw_pool(x))
123 | x = self.conv1_extra(x)
124 | x = x.view(x.size(0), -1)
125 | return x
126 |
127 | def init_weights(self):
128 | for m in self.modules():
129 | if isinstance(m, nn.Conv2d):
130 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
131 | m.weight.data.normal_(0, math.sqrt(2. / n))
132 | if m.bias is not None:
133 | m.bias.data.zero_()
134 | elif isinstance(m, nn.BatchNorm2d):
135 | m.weight.data.fill_(1)
136 | m.bias.data.zero_()
137 | elif isinstance(m, nn.Linear):
138 | n = m.weight.size(1)
139 | m.weight.data.normal_(0, 0.01)
140 | m.bias.data.zero_()
141 |
--------------------------------------------------------------------------------
/models/encoders.py:
--------------------------------------------------------------------------------
1 | import re
2 | import torch
3 | import torch.nn as nn
4 |
5 | from torch.utils import model_zoo
6 |
7 | from models.backbones.mobilenet import MobileNet
8 |
9 | from models.backbones.resnet import ResNet
10 | from models.backbones.resnet import BasicBlock
11 | from models.backbones.resnet import Bottleneck
12 |
13 | from models.backbones.senet import SENet
14 | from models.backbones.senet import SEBottleneck
15 | from models.backbones.senet import SEResNetBottleneck
16 | from models.backbones.senet import SEResNeXtBottleneck
17 |
18 | from models.backbones.densenet import DenseNet
19 |
20 | from models.blocks import build_layers
21 |
22 |
23 | def mobilenet(device='cpu', *argv, **kwargs):
24 | model = MobileNet(*argv, **kwargs)
25 | return model.to(device)
26 |
27 |
28 | def resnet18(device='cpu', out_features=1, pretrained=False, *argv, **kwargs):
29 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
30 | if pretrained:
31 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnet18-5c106cde.pth'))
32 |
33 | model.fc = nn.Linear(model.fc.in_features, out_features)
34 | return model.to(device)
35 |
36 |
37 | def resnet34(device='cpu', out_features=1, pretrained=False, *argv, **kwargs):
38 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
39 | if pretrained:
40 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnet34-333f7ec4.pth'))
41 |
42 | model.fc = nn.Linear(model.fc.in_features, out_features)
43 | return model.to(device)
44 |
45 |
46 | def resnet50(device='cpu', out_features=1, pretrained=False, *argv, **kwargs):
47 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
48 | if pretrained:
49 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnet50-19c8e357.pth'))
50 |
51 | model.fc = nn.Linear(model.fc.in_features, out_features)
52 | return model.to(device)
53 |
54 |
55 | def resnet101(device='cpu', out_features=1, pretrained=False, *argv, **kwargs):
56 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
57 | if pretrained:
58 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnet101-5d3b4d8f.pth'))
59 |
60 | model.fc = nn.Linear(model.fc.in_features, out_features)
61 | return model.to(device)
62 |
63 |
64 | def resnet152(device='cpu', out_features=1, pretrained=False, *argv, **kwargs):
65 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
66 | if pretrained:
67 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnet152-b121ed2d.pth'))
68 |
69 | model.fc = nn.Linear(model.fc.in_features, out_features)
70 | return model.to(device)
71 |
72 |
73 | def resnext50(device='cpu', out_features=1, pretrained=False, *argv, **kwargs):
74 | kwargs['groups'] = 32
75 | kwargs['width_per_group'] = 4
76 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
77 | if pretrained:
78 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth'))
79 |
80 | model.last_linear = nn.Linear(model.last_linear.in_features, out_features)
81 | return model.to(device)
82 |
83 |
84 | def resnext101(device='cpu', out_features=1, pretrained=False, *argv, **kwargs):
85 | kwargs['groups'] = 32
86 | kwargs['width_per_group'] = 8
87 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
88 | if pretrained:
89 | model.load_state_dict(model_zoo.load_url('https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth'))
90 |
91 | model.last_linear = nn.Linear(model.last_linear.in_features, out_features)
92 | return model.to(device)
93 |
94 |
95 | def senet154(device='cpu', *argv, **kwargs):
96 | model = SENet(SEBottleneck, [3, 8, 36, 3], groups=64, reduction=16,
97 | **kwargs)
98 | return model.to(device)
99 |
100 |
101 | def se_resnet50(device='cpu', out_features=1, pretrained=False, *argv, **kwargs):
102 | model = SENet(SEResNetBottleneck, [3, 4, 6, 3], groups=1, reduction=16,
103 | inplanes=64, input_3x3=False,
104 | downsample_kernel_size=1, downsample_padding=0,
105 | **kwargs)
106 | if pretrained:
107 | model.load_state_dict(model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth'))
108 |
109 | model.last_linear = nn.Linear(model.last_linear.in_features, out_features)
110 | return model.to(device)
111 |
112 |
113 | def se_resnet101(device='cpu', out_features=1, pretrained=False, *argv, **kwargs):
114 | model = SENet(SEResNetBottleneck, [3, 4, 23, 3], groups=1, reduction=16,
115 | inplanes=64, input_3x3=False,
116 | downsample_kernel_size=1, downsample_padding=0,
117 | **kwargs)
118 | return model.to(device)
119 |
120 |
121 | def se_resnet152(device='cpu', *argv, **kwargs):
122 | model = SENet(SEResNetBottleneck, [3, 8, 36, 3], groups=1, reduction=16,
123 | inplanes=64, input_3x3=False,
124 | downsample_kernel_size=1, downsample_padding=0,
125 | **kwargs)
126 | return model.to(device)
127 |
128 |
129 | def se_resnext50(device='cpu', out_features=1, pretrained=False, *argv, **kwargs):
130 | model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16,
131 | inplanes=64, input_3x3=False,
132 | downsample_kernel_size=1, downsample_padding=0,
133 | **kwargs)
134 | if pretrained:
135 | model.load_state_dict(model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth'))
136 |
137 | model.last_linear = nn.Linear(model.last_linear.in_features, out_features)
138 | return model.to(device)
139 |
140 |
141 | def se_resnext101(device='cpu', out_features=1, pretrained=False, *argv, **kwargs):
142 | model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3], groups=32, reduction=16,
143 | inplanes=64, input_3x3=False,
144 | downsample_kernel_size=1, downsample_padding=0,
145 | **kwargs)
146 | if pretrained:
147 | model.load_state_dict(model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth'))
148 |
149 | model.last_linear = nn.Linear(model.last_linear.in_features, out_features)
150 | return model.to(device)
151 |
152 |
153 | def densenet121(device='cpu', out_features=1, pretrained=False, *argv, **kwargs):
154 | model = DenseNet(32, (6, 12, 24, 16), 64, **kwargs)
155 | if pretrained:
156 | _load_densenet(model, 'https://download.pytorch.org/models/densenet121-a639ec97.pth')
157 |
158 | #model.features.add_module('final', build_layers(1024))
159 | model.classifier = nn.Linear(model.classifier.in_features, out_features)
160 | return model.to(device)
161 |
162 |
163 | def densenet201(device='cpu', out_features=1, pretrained=False, *argv, **kwargs):
164 | model = DenseNet(32, (6, 12, 48, 32), 64, **kwargs)
165 | if pretrained:
166 | _load_densenet(model, 'https://download.pytorch.org/models/densenet201-c1103571.pth')
167 |
168 | model.classifier = nn.Linear(model.classifier.in_features, out_features)
169 | return model.to(device)
170 |
171 |
172 | def _load_densenet(model, model_url):
173 | pattern = re.compile(
174 | r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
175 |
176 | state_dict = model_zoo.load_url(model_url)
177 | for key in list(state_dict.keys()):
178 | res = pattern.match(key)
179 | if res:
180 | new_key = res.group(1) + res.group(2)
181 | state_dict[new_key] = state_dict[key]
182 | del state_dict[key]
183 | model.load_state_dict(state_dict)
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import datetime
4 | import argparse
5 | import yaml
6 |
7 | import cv2
8 | import numpy as np
9 | import pandas as pd
10 |
11 | import torch
12 | import torch.nn as nn
13 | import torch.optim as optim
14 | from torch.utils.data import DataLoader
15 |
16 | from tqdm import tqdm
17 |
18 | import metrics.classification as metrics
19 |
20 | import models
21 | import losses
22 |
23 | from data.datasets import idrnd
24 | from data.transform import Transforms
25 |
26 | from utils.handlers import AverageMeter
27 | from utils.handlers import MetaData
28 |
29 | from utils.storage import save_weights
30 | from utils.storage import load_weights
31 |
32 |
33 | cv2.setNumThreads(0)
34 |
35 |
36 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
37 |
38 |
39 | def main(config):
40 | model = getattr(models, config['encoder'])(device=device,
41 | out_features=config['out_features'],
42 | pretrained=config['pretrained'])
43 |
44 | start_epoch = 0
45 | if config['snapshot']['use']:
46 | load_weights(model, config['prefix'], 'model', config['snapshot']['epoch'])
47 | start_epoch = config['snapshot']['epoch']
48 |
49 | if torch.cuda.is_available() and config['parallel']:
50 | model = nn.DataParallel(model)
51 |
52 | criterion = getattr(losses, config['loss'])()
53 | optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
54 |
55 | lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
56 | factor=0.5,
57 | patience=2,
58 | min_lr=1e-6)
59 |
60 | train_df, test_df = idrnd.load_dataset(config['train']['folder'], test_size=0.05)
61 |
62 | train_loader = DataLoader(idrnd.TrainAntispoofDataset(
63 | train_df, Transforms(input_size=config['input_size'], train=True)),
64 | batch_size=config['batch_size'],
65 | num_workers=config['num_workers'],
66 | shuffle=True)
67 |
68 | test_loader = DataLoader(idrnd.TrainAntispoofDataset(
69 | test_df, Transforms(input_size=config['input_size'], train=False), config['tta']),
70 | batch_size=config['batch_size'],
71 | num_workers=config['num_workers'],
72 | shuffle=False)
73 |
74 | thresholds = np.linspace(0.001, 0.6, num=config['thresholds'])
75 | best_threshold = 0.5
76 | best_epoch = 0
77 | best_score = np.inf
78 | best_loss = np.inf
79 |
80 | for epoch in range(start_epoch, config['num_epochs']):
81 | if epoch == 0:
82 | opt = optim.Adam(model.module.linear_params(), lr=config['learning_rate'])
83 | train(train_loader, model, criterion, opt, epoch, config)
84 | else:
85 | train(train_loader, model, criterion, optimizer, epoch, config)
86 |
87 | loss, accuracy, score = validation(test_loader, model, criterion, thresholds)
88 |
89 | current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
90 | print(' Validation:'
91 | ' Time: {}'
92 | ' Epoch: {}'
93 | ' Loss: {:.4f}'.format(current_time, epoch + 1, loss))
94 |
95 | best_index = np.argmin(score)
96 | print(' Threshold: {:.4f}'
97 | ' Accuracy: {:.5f}'
98 | ' Score: {:.5f}'.format(thresholds[best_index], accuracy[best_index], score[best_index]))
99 |
100 | if best_loss > loss:
101 | best_threshold = thresholds[best_index]
102 | best_score = score[best_index]
103 | best_loss = loss
104 | best_epoch = epoch + 1
105 | save_weights(model, config['prefix'], 'model', 'best', config['parallel'])
106 |
107 | if epoch != 0:
108 | lr_scheduler.step(loss)
109 |
110 | save_weights(model, config['prefix'], 'model', epoch + 1, config['parallel'])
111 |
112 | print(' Best threshold: {:.4f}'
113 | ' Best score: {:.5f}'
114 | ' Best loss: {:.4f}'
115 | ' Best epoch: {}'.format(best_threshold, best_score, best_loss, best_epoch))
116 |
117 |
118 | def train(data_loader, model, criterion, optimizer, epoch, config):
119 | model.train()
120 |
121 | loss_handler = AverageMeter()
122 | accuracy_handler = AverageMeter()
123 | score_handler = AverageMeter()
124 |
125 | tq = tqdm(total=len(data_loader) * config['batch_size'])
126 | tq.set_description('Epoch {}, lr {:.2e}'.format(epoch + 1,
127 | get_learning_rate(optimizer)))
128 |
129 | for i, (image, target) in enumerate(data_loader):
130 | image = image.to(device)
131 | target = target.to(device)
132 |
133 | output = model(image).view(-1)
134 |
135 | loss = criterion(output, target)
136 | loss.backward()
137 |
138 | batch_size = image.size(0)
139 |
140 | if (i + 1) % config['step'] == 0:
141 | optimizer.step()
142 | optimizer.zero_grad()
143 |
144 | pred = torch.sigmoid(output) > 0.5
145 | target = target > 0.5
146 |
147 | accuracy = metrics.accuracy(pred, target)
148 | score = metrics.min_c(pred, target)
149 |
150 | loss_handler.update(loss)
151 | accuracy_handler.update(accuracy)
152 | score_handler.update(score)
153 |
154 | current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
155 | current_lr = get_learning_rate(optimizer)
156 |
157 | tq.update(batch_size)
158 | tq.set_postfix(loss='{:.4f}'.format(loss_handler.avg),
159 | accuracy='{:.5f}'.format(accuracy_handler.avg),
160 | score='{:.5f}'.format(score_handler.avg))
161 | tq.close()
162 |
163 |
164 | def validation(data_loader, model, criterion, thresholds):
165 | model.eval()
166 |
167 | loss_handler = AverageMeter()
168 | accuracy_handler = [AverageMeter() for _ in thresholds]
169 | score_handler = [AverageMeter() for _ in thresholds]
170 |
171 | with torch.no_grad():
172 | for i, (image, target) in enumerate(data_loader):
173 | image = image.to(device)
174 | target = target.to(device)
175 |
176 | output = model(image).view(-1)
177 |
178 | loss = criterion(output, target)
179 | loss_handler.update(loss)
180 |
181 | target = target.byte()
182 | for i, threshold in enumerate(thresholds):
183 | pred = torch.sigmoid(output) > threshold
184 |
185 | accuracy = metrics.accuracy(pred, target)
186 | score = metrics.min_c(pred, target)
187 |
188 | accuracy_handler[i].update(accuracy)
189 | score_handler[i].update(score)
190 |
191 | return (loss_handler.avg,
192 | [i.avg for i in accuracy_handler],
193 | [i.avg for i in score_handler])
194 |
195 |
196 | def get_learning_rate(optimizer):
197 | for param_group in optimizer.param_groups:
198 | return param_group['lr']
199 |
200 |
201 | if __name__ == '__main__':
202 | parser = argparse.ArgumentParser(description='Train code')
203 | parser.add_argument('--config', required=True, help='configuration file')
204 | args = parser.parse_args()
205 |
206 | config = yaml.load(open(args.config), Loader=yaml.FullLoader)
207 | main(config)
208 |
--------------------------------------------------------------------------------
/models/backbones/resnet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 |
4 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
5 | """3x3 convolution with padding"""
6 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
7 | padding=dilation, groups=groups, bias=False, dilation=dilation)
8 |
9 |
10 | def conv1x1(in_planes, out_planes, stride=1):
11 | """1x1 convolution"""
12 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
13 |
14 |
15 | class BasicBlock(nn.Module):
16 | expansion = 1
17 |
18 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
19 | base_width=64, dilation=1, norm_layer=None):
20 | super(BasicBlock, self).__init__()
21 | if norm_layer is None:
22 | norm_layer = nn.BatchNorm2d
23 | if groups != 1 or base_width != 64:
24 | raise ValueError('BasicBlock only supports groups=1 and base_width=64')
25 | if dilation > 1:
26 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
27 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1
28 | self.conv1 = conv3x3(inplanes, planes, stride)
29 | self.bn1 = norm_layer(planes)
30 | self.relu = nn.ReLU(inplace=True)
31 | self.conv2 = conv3x3(planes, planes)
32 | self.bn2 = norm_layer(planes)
33 | self.downsample = downsample
34 | self.stride = stride
35 |
36 | def forward(self, x):
37 | identity = x
38 |
39 | out = self.conv1(x)
40 | out = self.bn1(out)
41 | out = self.relu(out)
42 |
43 | out = self.conv2(out)
44 | out = self.bn2(out)
45 |
46 | if self.downsample is not None:
47 | identity = self.downsample(x)
48 |
49 | out += identity
50 | out = self.relu(out)
51 |
52 | return out
53 |
54 |
55 | class Bottleneck(nn.Module):
56 | expansion = 4
57 |
58 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
59 | base_width=64, dilation=1, norm_layer=None):
60 | super(Bottleneck, self).__init__()
61 | if norm_layer is None:
62 | norm_layer = nn.BatchNorm2d
63 | width = int(planes * (base_width / 64.)) * groups
64 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1
65 | self.conv1 = conv1x1(inplanes, width)
66 | self.bn1 = norm_layer(width)
67 | self.conv2 = conv3x3(width, width, stride, groups, dilation)
68 | self.bn2 = norm_layer(width)
69 | self.conv3 = conv1x1(width, planes * self.expansion)
70 | self.bn3 = norm_layer(planes * self.expansion)
71 | self.relu = nn.ReLU(inplace=True)
72 | self.downsample = downsample
73 | self.stride = stride
74 |
75 | def forward(self, x):
76 | identity = x
77 |
78 | out = self.conv1(x)
79 | out = self.bn1(out)
80 | out = self.relu(out)
81 |
82 | out = self.conv2(out)
83 | out = self.bn2(out)
84 | out = self.relu(out)
85 |
86 | out = self.conv3(out)
87 | out = self.bn3(out)
88 |
89 | if self.downsample is not None:
90 | identity = self.downsample(x)
91 |
92 | out += identity
93 | out = self.relu(out)
94 |
95 | return out
96 |
97 |
98 | class ResNet(nn.Module):
99 |
100 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
101 | groups=1, width_per_group=64, replace_stride_with_dilation=None,
102 | norm_layer=None):
103 | super(ResNet, self).__init__()
104 | if norm_layer is None:
105 | norm_layer = nn.BatchNorm2d
106 | self._norm_layer = norm_layer
107 |
108 | self.inplanes = 64
109 | self.dilation = 1
110 | if replace_stride_with_dilation is None:
111 | # each element in the tuple indicates if we should replace
112 | # the 2x2 stride with a dilated convolution instead
113 | replace_stride_with_dilation = [False, False, False]
114 | if len(replace_stride_with_dilation) != 3:
115 | raise ValueError("replace_stride_with_dilation should be None "
116 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
117 | self.groups = groups
118 | self.base_width = width_per_group
119 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
120 | bias=False)
121 | self.bn1 = norm_layer(self.inplanes)
122 | self.relu = nn.ReLU(inplace=True)
123 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
124 | self.layer1 = self._make_layer(block, 64, layers[0])
125 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
126 | dilate=replace_stride_with_dilation[0])
127 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
128 | dilate=replace_stride_with_dilation[1])
129 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
130 | dilate=replace_stride_with_dilation[2])
131 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
132 | self.fc = nn.Linear(512 * block.expansion, num_classes)
133 |
134 | for m in self.modules():
135 | if isinstance(m, nn.Conv2d):
136 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
137 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
138 | nn.init.constant_(m.weight, 1)
139 | nn.init.constant_(m.bias, 0)
140 |
141 | # Zero-initialize the last BN in each residual branch,
142 | # so that the residual branch starts with zeros, and each residual block behaves like an identity.
143 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
144 | if zero_init_residual:
145 | for m in self.modules():
146 | if isinstance(m, Bottleneck):
147 | nn.init.constant_(m.bn3.weight, 0)
148 | elif isinstance(m, BasicBlock):
149 | nn.init.constant_(m.bn2.weight, 0)
150 |
151 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
152 | norm_layer = self._norm_layer
153 | downsample = None
154 | previous_dilation = self.dilation
155 | if dilate:
156 | self.dilation *= stride
157 | stride = 1
158 | if stride != 1 or self.inplanes != planes * block.expansion:
159 | downsample = nn.Sequential(
160 | conv1x1(self.inplanes, planes * block.expansion, stride),
161 | norm_layer(planes * block.expansion),
162 | )
163 |
164 | layers = []
165 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
166 | self.base_width, previous_dilation, norm_layer))
167 | self.inplanes = planes * block.expansion
168 | for _ in range(1, blocks):
169 | layers.append(block(self.inplanes, planes, groups=self.groups,
170 | base_width=self.base_width, dilation=self.dilation,
171 | norm_layer=norm_layer))
172 |
173 | return nn.Sequential(*layers)
174 |
175 | def forward(self, x):
176 | x = self.conv1(x)
177 | x = self.bn1(x)
178 | x = self.relu(x)
179 | x = self.maxpool(x)
180 |
181 | x = self.layer1(x)
182 | x = self.layer2(x)
183 | x = self.layer3(x)
184 | x = self.layer4(x)
185 |
186 | x = self.avgpool(x)
187 | x = x.reshape(x.size(0), -1)
188 | x = self.fc(x)
189 |
190 | return x
191 |
--------------------------------------------------------------------------------
/data/transform.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import torch
3 |
4 | import numpy as np
5 |
6 |
7 | class Compose(object):
8 | def __init__(self, transforms):
9 | self.transforms = transforms
10 |
11 | def __call__(self, image):
12 | for t in self.transforms:
13 | image= t(image)
14 | return image
15 |
16 |
17 | class OneOf(object):
18 | def __init__(self, transforms):
19 | self.transforms = transforms
20 |
21 | def __call__(self, image):
22 | transform = np.random.choice(self.transforms)
23 | image = transform(image)
24 | return image
25 |
26 |
27 | class RandomApply(object):
28 | def __init__(self, transforms, prob=0.5):
29 | self.transforms = transforms
30 | self.prob = prob
31 |
32 | def __call__(self, image):
33 | for t in self.transforms:
34 | if np.random.rand() < self.prob:
35 | image = t(image)
36 | return image
37 |
38 |
39 | class RandomApply(object):
40 | def __init__(self, transforms, prob=0.5):
41 | self.transforms = transforms
42 | self.prob = prob
43 |
44 | def __call__(self, image):
45 | for t in self.transforms:
46 | if np.random.rand() < self.prob:
47 | image = t(image)
48 | return image
49 |
50 |
51 | class CenterCrop(object):
52 | def __init__(self, size=None):
53 | self.size = size
54 |
55 | def __call__(self, image):
56 | height, width = image.shape[:2]
57 |
58 | if height > width:
59 | center = height // 2
60 | top = center - width // 2
61 | bottom = center + width // 2
62 | image = image[top:bottom, :]
63 | else:
64 | center = width // 2
65 | left = center - height // 2
66 | right = center + height // 2
67 | image = image[:, left:right]
68 |
69 | if self.size and self.size < image.shape[0]:
70 | center = height // 2
71 | top = center - self.size // 2
72 | bottom = center + self.size // 2
73 | left = center - self.size // 2
74 | right = center + self.size // 2
75 | image = image[top:bottom, left:right]
76 |
77 | return image
78 |
79 |
80 | class RandomCrop(object):
81 | def __init__(self, ratio):
82 | self.ratio = ratio
83 |
84 | def __call__(self, image):
85 | width = int(image.shape[1] * self.ratio)
86 | height = int(image.shape[0] * self.ratio)
87 |
88 | min_x = image.shape[1] - width
89 | min_y = image.shape[0] - height
90 |
91 | x = np.random.randint(0, min_x) if min_x else 0
92 | y = np.random.randint(0, min_y) if min_y else 0
93 |
94 | image = image[y:y + height, x:x + width]
95 | return image
96 |
97 |
98 | class Contrast(object):
99 | def __init__(self, lower=0.9, upper=1.1):
100 | self.lower = lower
101 | self.upper = upper
102 |
103 | def __call__(self, image):
104 | alpha = np.random.uniform(self.lower, self.upper)
105 | image *= alpha
106 | image = np.clip(image, 0, 1)
107 | return image
108 |
109 |
110 | class Brightness(object):
111 | def __init__(self, delta=0.125):
112 | self.delta = delta
113 |
114 | def __call__(self, image):
115 | delta = np.random.uniform(-self.delta, self.delta)
116 | image += delta
117 | image = np.clip(image, 0, 1)
118 | return image
119 |
120 |
121 | class GaussianBlur(object):
122 | def __init__(self, kernel=3):
123 | self.kernel = (kernel, kernel)
124 |
125 | def __call__(self, image):
126 | image = cv2.blur(image, self.kernel)
127 | return image
128 |
129 |
130 | class Expand(object):
131 | def __init__(self, size=1024, diff=0.3, noise=False):
132 | self.size = size
133 | self.noise = noise
134 | self.diff = diff
135 |
136 | def __call__(self, image):
137 | height, width = image.shape[:2]
138 | max_ratio = self.size / max(height, width)
139 | min_ratio = max_ratio * self.diff
140 |
141 | ratio = np.random.uniform(min_ratio, max_ratio)
142 | left = np.random.uniform(0, self.size - width*ratio)
143 | top = np.random.uniform(0, self.size - height*ratio)
144 |
145 | expand_image = np.zeros((self.size, self.size, 3), dtype=image.dtype)
146 | if self.noise:
147 | mean = np.full(3, 0.5)
148 | std = np.full(3, 0.5)
149 | expand_image = cv2.randn(expand_image, mean, std)
150 | expand_image = np.clip(expand_image, 0, 1)
151 |
152 | image = cv2.resize(image, (int(width*ratio), int(height*ratio)))
153 |
154 | expand_image[int(top):int(top) + int(height*ratio),
155 | int(left):int(left) + int(width*ratio)] = image
156 | image = expand_image
157 |
158 | return image
159 |
160 |
161 | class Pad(object):
162 | def __init__(self, size):
163 | self.size = size
164 |
165 | def __call__(self, image):
166 | height, width = image.shape[:2]
167 |
168 | ratio = self.size / max(height, width)
169 |
170 | new_height = int(height * ratio)
171 | new_width = int(width * ratio)
172 |
173 | # new_size should be in (width, height) format
174 |
175 | image = cv2.resize(image, (new_width, new_height))
176 |
177 | delta_w = self.size - new_width
178 | delta_h = self.size - new_height
179 |
180 | top, bottom = delta_h // 2, delta_h - (delta_h // 2)
181 | left, right = delta_w // 2, delta_w - (delta_w // 2)
182 |
183 | color = [0, 0, 0]
184 | image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT,
185 | value=color)
186 | return image
187 |
188 |
189 | class Rotate(object):
190 | def __init__(self, angle=10, aligne=False):
191 | self.angle = angle
192 | self.aligne = aligne
193 |
194 | def __call__(self, image):
195 | angle = np.random.uniform(-self.angle, self.angle)
196 |
197 | height, width = image.shape[:2]
198 | cX, cY = width / 2, height / 2
199 |
200 | M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)
201 |
202 | if self.aligne:
203 | cos = np.abs(M[0, 0])
204 | sin = np.abs(M[0, 1])
205 |
206 | width = int((height * sin) + (width * cos))
207 | height = int((height * cos) + (width * sin))
208 |
209 | M[0, 2] += (width / 2) - cX
210 | M[1, 2] += (height / 2) - cY
211 |
212 | image = cv2.warpAffine(image, M, (width, height), borderMode=cv2.BORDER_CONSTANT)
213 | return image
214 |
215 |
216 | class Resize(object):
217 | def __init__(self, size):
218 | self.size = size
219 |
220 | def __call__(self, image):
221 | height, width = image.shape[:2]
222 |
223 | h_scale = self.size / height
224 | w_scale = self.size / width
225 |
226 | image = cv2.resize(image, (self.size, self.size))
227 | return image
228 |
229 |
230 | class HorizontalFlip(object):
231 | def __call__(self, image):
232 | image = cv2.flip(image, 1)
233 | return image
234 |
235 |
236 | class ToTensor(object):
237 | def __call__(self, image, target=None, mask=None):
238 | image = image.transpose((2, 0, 1))
239 | image = torch.from_numpy(image)
240 | return image.float()
241 |
242 |
243 | class Normalize(object):
244 | def __init__(self, mean=None, std=None):
245 | self.mean = np.array(mean or [0.485, 0.456, 0.406])
246 | self.std = np.array(std or [0.229, 0.224, 0.225])
247 |
248 | def __call__(self, image):
249 | image = (image - self.mean) / self.std
250 | return image
251 |
252 |
253 | class Transforms(object):
254 | def __init__(self, input_size, train=True):
255 | self.train = train
256 |
257 | self.transforms_train = RandomApply([
258 | RandomCrop(0.9),
259 | Rotate(angle=10, aligne=False),
260 | HorizontalFlip(),
261 | ])
262 |
263 | self.transforms_test = RandomApply([
264 | HorizontalFlip(),
265 | ])
266 |
267 | self.normalize = Compose([
268 | Resize(input_size),
269 | Normalize(),
270 | ToTensor(),
271 | ])
272 |
273 | def __call__(self, image):
274 | if self.train:
275 | image = self.transforms_train(image)
276 | else:
277 | image = self.transforms_test(image)
278 |
279 | image = self.normalize(image)
280 | return image
281 |
--------------------------------------------------------------------------------
/models/backbones/senet.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch.nn as nn
3 |
4 | from collections import OrderedDict
5 |
6 |
7 | class SEModule(nn.Module):
8 |
9 | def __init__(self, channels, reduction):
10 | super(SEModule, self).__init__()
11 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
12 | self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
13 | padding=0)
14 | self.relu = nn.ReLU(inplace=True)
15 | self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
16 | padding=0)
17 | self.sigmoid = nn.Sigmoid()
18 |
19 | def forward(self, x):
20 | module_input = x
21 | x = self.avg_pool(x)
22 | x = self.fc1(x)
23 | x = self.relu(x)
24 | x = self.fc2(x)
25 | x = self.sigmoid(x)
26 | return module_input * x
27 |
28 |
29 | class Bottleneck(nn.Module):
30 | """
31 | Base class for bottlenecks that implements `forward()` method.
32 | """
33 | def forward(self, x):
34 | residual = x
35 |
36 | out = self.conv1(x)
37 | out = self.bn1(out)
38 | out = self.relu(out)
39 |
40 | out = self.conv2(out)
41 | out = self.bn2(out)
42 | out = self.relu(out)
43 |
44 | out = self.conv3(out)
45 | out = self.bn3(out)
46 |
47 | if self.downsample is not None:
48 | residual = self.downsample(x)
49 |
50 | out = self.se_module(out) + residual
51 | out = self.relu(out)
52 |
53 | return out
54 |
55 |
56 | class SEBottleneck(Bottleneck):
57 | """
58 | Bottleneck for SENet154.
59 | """
60 | expansion = 4
61 |
62 | def __init__(self, inplanes, planes, groups, reduction, stride=1,
63 | downsample=None):
64 | super(SEBottleneck, self).__init__()
65 | self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
66 | self.bn1 = nn.BatchNorm2d(planes * 2)
67 | self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3,
68 | stride=stride, padding=1, groups=groups,
69 | bias=False)
70 | self.bn2 = nn.BatchNorm2d(planes * 4)
71 | self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,
72 | bias=False)
73 | self.bn3 = nn.BatchNorm2d(planes * 4)
74 | self.relu = nn.ReLU(inplace=True)
75 | self.se_module = SEModule(planes * 4, reduction=reduction)
76 | self.downsample = downsample
77 | self.stride = stride
78 |
79 |
80 | class SEResNetBottleneck(Bottleneck):
81 | """
82 | ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe
83 | implementation and uses `stride=stride` in `conv1` and not in `conv2`
84 | (the latter is used in the torchvision implementation of ResNet).
85 | """
86 | expansion = 4
87 |
88 | def __init__(self, inplanes, planes, groups, reduction, stride=1,
89 | downsample=None):
90 | super(SEResNetBottleneck, self).__init__()
91 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False,
92 | stride=stride)
93 | self.bn1 = nn.BatchNorm2d(planes)
94 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1,
95 | groups=groups, bias=False)
96 | self.bn2 = nn.BatchNorm2d(planes)
97 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
98 | self.bn3 = nn.BatchNorm2d(planes * 4)
99 | self.relu = nn.ReLU(inplace=True)
100 | self.se_module = SEModule(planes * 4, reduction=reduction)
101 | self.downsample = downsample
102 | self.stride = stride
103 |
104 |
105 | class SEResNeXtBottleneck(Bottleneck):
106 | """
107 | ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
108 | """
109 | expansion = 4
110 |
111 | def __init__(self, inplanes, planes, groups, reduction, stride=1,
112 | downsample=None, base_width=4):
113 | super(SEResNeXtBottleneck, self).__init__()
114 | width = math.floor(planes * (base_width / 64)) * groups
115 | self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False,
116 | stride=1)
117 | self.bn1 = nn.BatchNorm2d(width)
118 | self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride,
119 | padding=1, groups=groups, bias=False)
120 | self.bn2 = nn.BatchNorm2d(width)
121 | self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
122 | self.bn3 = nn.BatchNorm2d(planes * 4)
123 | self.relu = nn.ReLU(inplace=True)
124 | self.se_module = SEModule(planes * 4, reduction=reduction)
125 | self.downsample = downsample
126 | self.stride = stride
127 |
128 |
129 | class SENet(nn.Module):
130 |
131 | def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
132 | inplanes=128, input_3x3=True, downsample_kernel_size=3,
133 | downsample_padding=1, num_classes=1000):
134 | """
135 | Parameters
136 | ----------
137 | block (nn.Module): Bottleneck class.
138 | - For SENet154: SEBottleneck
139 | - For SE-ResNet models: SEResNetBottleneck
140 | - For SE-ResNeXt models: SEResNeXtBottleneck
141 | layers (list of ints): Number of residual blocks for 4 layers of the
142 | network (layer1...layer4).
143 | groups (int): Number of groups for the 3x3 convolution in each
144 | bottleneck block.
145 | - For SENet154: 64
146 | - For SE-ResNet models: 1
147 | - For SE-ResNeXt models: 32
148 | reduction (int): Reduction ratio for Squeeze-and-Excitation modules.
149 | - For all models: 16
150 | dropout_p (float or None): Drop probability for the Dropout layer.
151 | If `None` the Dropout layer is not used.
152 | - For SENet154: 0.2
153 | - For SE-ResNet models: None
154 | - For SE-ResNeXt models: None
155 | inplanes (int): Number of input channels for layer1.
156 | - For SENet154: 128
157 | - For SE-ResNet models: 64
158 | - For SE-ResNeXt models: 64
159 | input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
160 | a single 7x7 convolution in layer0.
161 | - For SENet154: True
162 | - For SE-ResNet models: False
163 | - For SE-ResNeXt models: False
164 | downsample_kernel_size (int): Kernel size for downsampling convolutions
165 | in layer2, layer3 and layer4.
166 | - For SENet154: 3
167 | - For SE-ResNet models: 1
168 | - For SE-ResNeXt models: 1
169 | downsample_padding (int): Padding for downsampling convolutions in
170 | layer2, layer3 and layer4.
171 | - For SENet154: 1
172 | - For SE-ResNet models: 0
173 | - For SE-ResNeXt models: 0
174 | num_classes (int): Number of outputs in `last_linear` layer.
175 | - For all models: 1000
176 | """
177 | super(SENet, self).__init__()
178 | self.inplanes = inplanes
179 | if input_3x3:
180 | layer0_modules = [
181 | ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
182 | bias=False)),
183 | ('bn1', nn.BatchNorm2d(64)),
184 | ('relu1', nn.ReLU(inplace=True)),
185 | ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
186 | bias=False)),
187 | ('bn2', nn.BatchNorm2d(64)),
188 | ('relu2', nn.ReLU(inplace=True)),
189 | ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1,
190 | bias=False)),
191 | ('bn3', nn.BatchNorm2d(inplanes)),
192 | ('relu3', nn.ReLU(inplace=True)),
193 | ]
194 | else:
195 | layer0_modules = [
196 | ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2,
197 | padding=3, bias=False)),
198 | ('bn1', nn.BatchNorm2d(inplanes)),
199 | ('relu1', nn.ReLU(inplace=True)),
200 | ]
201 | # To preserve compatibility with Caffe weights `ceil_mode=True`
202 | # is used instead of `padding=1`.
203 | layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2,
204 | ceil_mode=True)))
205 | self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
206 | self.layer1 = self._make_layer(
207 | block,
208 | planes=64,
209 | blocks=layers[0],
210 | groups=groups,
211 | reduction=reduction,
212 | downsample_kernel_size=1,
213 | downsample_padding=0
214 | )
215 | self.layer2 = self._make_layer(
216 | block,
217 | planes=128,
218 | blocks=layers[1],
219 | stride=2,
220 | groups=groups,
221 | reduction=reduction,
222 | downsample_kernel_size=downsample_kernel_size,
223 | downsample_padding=downsample_padding
224 | )
225 | self.layer3 = self._make_layer(
226 | block,
227 | planes=256,
228 | blocks=layers[2],
229 | stride=2,
230 | groups=groups,
231 | reduction=reduction,
232 | downsample_kernel_size=downsample_kernel_size,
233 | downsample_padding=downsample_padding
234 | )
235 | self.layer4 = self._make_layer(
236 | block,
237 | planes=512,
238 | blocks=layers[3],
239 | stride=2,
240 | groups=groups,
241 | reduction=reduction,
242 | downsample_kernel_size=downsample_kernel_size,
243 | downsample_padding=downsample_padding
244 | )
245 | self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
246 | self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
247 | self.last_linear = nn.Linear(512 * block.expansion, num_classes)
248 |
249 | def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
250 | downsample_kernel_size=1, downsample_padding=0):
251 | downsample = None
252 | if stride != 1 or self.inplanes != planes * block.expansion:
253 | downsample = nn.Sequential(
254 | nn.Conv2d(self.inplanes, planes * block.expansion,
255 | kernel_size=downsample_kernel_size, stride=stride,
256 | padding=downsample_padding, bias=False),
257 | nn.BatchNorm2d(planes * block.expansion),
258 | )
259 |
260 | layers = []
261 | layers.append(block(self.inplanes, planes, groups, reduction, stride,
262 | downsample))
263 | self.inplanes = planes * block.expansion
264 | for i in range(1, blocks):
265 | layers.append(block(self.inplanes, planes, groups, reduction))
266 |
267 | return nn.Sequential(*layers)
268 |
269 | def linear_params(self):
270 | return self.last_linear.parameters()
271 |
272 | def features(self, x):
273 | x = self.layer0(x)
274 | x = self.layer1(x)
275 | x = self.layer2(x)
276 | x = self.layer3(x)
277 | x = self.layer4(x)
278 | return x
279 |
280 | def logits(self, x):
281 | x = self.avg_pool(x)
282 | if self.dropout is not None:
283 | x = self.dropout(x)
284 | x = x.view(x.size(0), -1)
285 | x = self.last_linear(x)
286 | return x
287 |
288 | def forward(self, x):
289 | x = self.features(x)
290 | x = self.logits(x)
291 | return x
--------------------------------------------------------------------------------