├── exp
    ├── Debug.sh
    ├── Debug_Test.sh
    ├── logs
    │   └── debug.log
    ├── logs_test
    │   ├── debug_test.log
    │   ├── testfr_ms1m_dul.log
    │   └── testfr_webface_dul.log
    ├── logtensorboard
    │   └── exp_webface_dul
    │   │   └── events.out.tfevents.1631298816.BJEGS01
    ├── TestFR_ms1m_DUL.sh
    ├── TestFR_webface_DUL.sh
    ├── Exp_webface_DUL.sh
    └── Exp_ms1m_DUL.sh
├── head
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-38.pyc
    │   └── metrics.cpython-38.pyc
    └── metrics.py
├── loss
    ├── __init__.py
    ├── __pycache__
    │   ├── focal.cpython-38.pyc
    │   └── __init__.cpython-38.pyc
    └── focal.py
├── util
    ├── __init__.py
    ├── __pycache__
    │   ├── utils.cpython-38.pyc
    │   ├── __init__.cpython-38.pyc
    │   └── verification.cpython-38.pyc
    ├── verification.py
    └── utils.py
├── align
    ├── __init__.py
    ├── onet.npy
    ├── pnet.npy
    ├── rnet.npy
    ├── visualization_utils.py
    ├── face_resize.py
    ├── face_align.py
    ├── first_stage.py
    ├── detector.py
    ├── get_nets.py
    ├── box_utils.py
    ├── matlab_cp2tform.py
    └── align_trans.py
├── backbone
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-38.pyc
    │   └── model_irse.cpython-38.pyc
    └── model_irse.py
├── .gitignore
├── __pycache__
    └── config.cpython-38.pyc
├── requirements.txt
├── shutdown.py
├── test_fr_dul.py
├── README.md
├── config.py
└── train_dul.py


/exp/Debug.sh:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/exp/Debug_Test.sh:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/exp/logs/debug.log:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/head/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/loss/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/align/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/exp/logs_test/debug_test.log:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pth
2 | .DS_Store
3 | **/.DS_Store
4 | .DS_Store?
5 | 


--------------------------------------------------------------------------------
/align/onet.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/align/onet.npy


--------------------------------------------------------------------------------
/align/pnet.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/align/pnet.npy


--------------------------------------------------------------------------------
/align/rnet.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/align/rnet.npy


--------------------------------------------------------------------------------
/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/__pycache__/config.cpython-38.pyc


--------------------------------------------------------------------------------
/loss/__pycache__/focal.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/loss/__pycache__/focal.cpython-38.pyc


--------------------------------------------------------------------------------
/util/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/util/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/head/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/head/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/head/__pycache__/metrics.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/head/__pycache__/metrics.cpython-38.pyc


--------------------------------------------------------------------------------
/loss/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/loss/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/util/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/util/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/backbone/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/backbone/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/util/__pycache__/verification.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/util/__pycache__/verification.cpython-38.pyc


--------------------------------------------------------------------------------
/backbone/__pycache__/model_irse.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/backbone/__pycache__/model_irse.cpython-38.pyc


--------------------------------------------------------------------------------
/exp/logtensorboard/exp_webface_dul/events.out.tfevents.1631298816.BJEGS01:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/exp/logtensorboard/exp_webface_dul/events.out.tfevents.1631298816.BJEGS01


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy==1.19.5
 2 | matplotlib==3.4.2
 3 | torchvision==0.8.0
 4 | bcolz==1.2.1
 5 | tqdm==4.62.0
 6 | opencv_python==4.5.3.56
 7 | torch==1.7.0
 8 | scipy==1.6.2
 9 | Pillow==8.3.2
10 | scikit_learn==0.24.2
11 | tensorboardX==2.4
12 | 


--------------------------------------------------------------------------------
/exp/TestFR_ms1m_DUL.sh:
--------------------------------------------------------------------------------
 1 | export CUDA_VISIBLE_DEVICES=3
 2 | 
 3 | logs_test_file='./logs_test/testfr_ms1m_dul.log'
 4 | 
 5 | model_for_test=''
 6 | 
 7 | python ../test_fr_dul.py \
 8 |     --model_for_test $model_for_test \
 9 |     >> $logs_test_file 2>&1 &
10 | 


--------------------------------------------------------------------------------
/exp/TestFR_webface_DUL.sh:
--------------------------------------------------------------------------------
 1 | export CUDA_VISIBLE_DEVICES=3
 2 | 
 3 | logs_test_file='./logs_test/testfr_webface_dul.log'
 4 | 
 5 | model_for_test=''
 6 | 
 7 | python ../test_fr_dul.py \
 8 |     --model_for_test $model_for_test \
 9 |     >> $logs_test_file 2>&1 &
10 | 


--------------------------------------------------------------------------------
/shutdown.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | 
 5 | if __name__ == '__main__':
 6 |     parser = argparse.ArgumentParser(description='shut down process by kill command')
 7 | 
 8 |     parser.add_argument('--key', type=str, default='')
 9 | 
10 |     args = parser.parse_args()
11 | 
12 |     os.system('ps -ef | grep ' + args.key + ' | grep -v grep | cut -c 9-16 | xargs kill -9')


--------------------------------------------------------------------------------
/loss/focal.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | # Support: ['FocalLoss']
 6 | 
 7 | 
 8 | class FocalLoss(nn.Module):
 9 |     def __init__(self, gamma = 2, eps = 1e-7):
10 |         super(FocalLoss, self).__init__()
11 |         self.gamma = gamma
12 |         self.eps = eps
13 |         self.ce = nn.CrossEntropyLoss()
14 | 
15 |     def forward(self, input, target):
16 |         logp = self.ce(input, target)
17 |         p = torch.exp(-logp)
18 |         loss = (1 - p) ** self.gamma * logp
19 |         return loss.mean()
20 | 


--------------------------------------------------------------------------------
/exp/Exp_webface_DUL.sh:
--------------------------------------------------------------------------------
 1 | export CUDA_VISIBLE_DEVICES=2,3
 2 | 
 3 | model_save_folder='./checkpoints/exp_webface_dul/'
 4 | log_tensorboard='./logtensorboard/exp_webface_dul/'
 5 | logs_file='./logs/exp_webface_dul.log'
 6 | 
 7 | # notice: default kl_scale is 0.01 in DUL (base on original paper) 
 8 | python ../train_dul.py \
 9 |     --model_save_folder $model_save_folder \
10 |     --log_tensorboard $log_tensorboard \
11 |     --logs $logs_file \
12 |     --gpu_id 0 1 \
13 |     --stages 10 18 \
14 |     --kl_scale 0.01 \
15 |     >> $logs_file 2>&1 &
16 | 


--------------------------------------------------------------------------------
/exp/Exp_ms1m_DUL.sh:
--------------------------------------------------------------------------------
 1 | export CUDA_VISIBLE_DEVICES=4,5,6,7
 2 | 
 3 | model_save_folder='./checkpoints/exp_ms1m_dul/'
 4 | log_tensorboard='./logtensorboard/exp_ms1m_dul/'
 5 | logs_file='./logs/exp_ms1m_dul.log'
 6 | trainset_folder='/home/admin/workspace/fuling/data/face_recog/ms1m/imgs/'
 7 | 
 8 | # notice: default kl_scale is 0.01 in DUL (base on original paper) 
 9 | python ../train_dul.py \
10 |     --model_save_folder $model_save_folder \
11 |     --log_tensorboard $log_tensorboard \
12 |     --logs $logs_file \
13 |     --gpu_id 0 1 2 3 \
14 |     --stages 10 18 \
15 |     --kl_scale 0.01 \
16 |     --batch_size 1024 \
17 |     --trainset_folder $trainset_folder \
18 |     >> $logs_file 2>&1 &
19 | 


--------------------------------------------------------------------------------
/exp/logs_test/testfr_ms1m_dul.log:
--------------------------------------------------------------------------------
 1 | ============================================================
 2 | Model for testing Face Recognition performance is:
 3 |  './exp/checkpoints/exp_ms1m_dul/Backbone_IR_SE_64_DUL_Epoch_22_Batch_125092_Time_2021-09-12-15-57_checkpoint.pth' 
 4 | ============================================================
 5 | Face Recognition Performance on different dataset is as shown below:
 6 | ============================================================
 7 | LFW :  0.9974999999999999
 8 | CFP_FF :  0.9968571428571428
 9 | CFP_FP :  0.9841428571428571
10 | AGEDB_30 :  0.9801666666666667
11 | CALFW :  0.9594999999999999
12 | CPLFW :  0.9296666666666666
13 | VGG2_FP :  0.954
14 | ============================================================
15 | Testing finished!
16 | ============================================================
17 | 


--------------------------------------------------------------------------------
/exp/logs_test/testfr_webface_dul.log:
--------------------------------------------------------------------------------
 1 | ============================================================
 2 | Model for testing Face Recognition performance is:
 3 |  './exp/checkpoints/exp_webface_dul/Backbone_IR_SE_64_DUL_Epoch_22_Batch_19558_Time_2021-09-11-01-39_checkpoint.pth' 
 4 | ============================================================
 5 | Face Recognition Performance on different dataset is as shown below:
 6 | ============================================================
 7 | LFW :  0.9941666666666666
 8 | CFP_FF :  0.9922857142857142
 9 | CFP_FP :  0.9652857142857142
10 | AGEDB_30 :  0.9393333333333335
11 | CALFW :  0.9348333333333333
12 | CPLFW :  0.8959999999999999
13 | VGG2_FP :  0.9376000000000001
14 | ============================================================
15 | Testing finished!
16 | ============================================================
17 | 


--------------------------------------------------------------------------------
/align/visualization_utils.py:
--------------------------------------------------------------------------------
 1 | from PIL import ImageDraw
 2 | 
 3 | 
 4 | def show_results(img, bounding_boxes, facial_landmarks = []):
 5 |     """Draw bounding boxes and facial landmarks.
 6 |     Arguments:
 7 |         img: an instance of PIL.Image.
 8 |         bounding_boxes: a float numpy array of shape [n, 5].
 9 |         facial_landmarks: a float numpy array of shape [n, 10].
10 |     Returns:
11 |         an instance of PIL.Image.
12 |     """
13 |     img_copy = img.copy()
14 |     draw = ImageDraw.Draw(img_copy)
15 | 
16 |     for b in bounding_boxes:
17 |         draw.rectangle([
18 |             (b[0], b[1]), (b[2], b[3])
19 |         ], outline = 'white')
20 | 
21 |     inx = 0
22 |     for p in facial_landmarks:
23 |         for i in range(5):
24 |             draw.ellipse([
25 |                 (p[i] - 1.0, p[i + 5] - 1.0),
26 |                 (p[i] + 1.0, p[i + 5] + 1.0)
27 |             ], outline = 'blue')
28 | 
29 |     return img_copy


--------------------------------------------------------------------------------
/test_fr_dul.py:
--------------------------------------------------------------------------------
 1 | # test face recognition performance of dul model
 2 | import torch
 3 | import os
 4 | 
 5 | from config import dul_args_func, Backbone_Dict, Test_FR_Data_Dict
 6 | from util.utils import get_data_pair, perform_face_recog
 7 | 
 8 | 
 9 | class DUL_FR_Tester():
10 |     def __init__(self, dul_args) -> None:
11 |         self.dul_args = dul_args
12 |         self.dul_args.multi_gpu = False
13 | 
14 |     def face_recog(self):
15 |         BACKBONE = Backbone_Dict[self.dul_args.backbone_name]
16 |         if os.path.isfile(self.dul_args.model_for_test):
17 |             print('=' * 60, flush=True)
18 |             print("Model for testing Face Recognition performance is:\n '{}' ".format(self.dul_args.model_for_test), flush=True)
19 |             BACKBONE.load_state_dict(torch.load(self.dul_args.model_for_test))
20 |             BACKBONE = BACKBONE.cuda().eval()
21 |         else:
22 |             print('=' * 60, flush=True)
23 |             print('No model found for testing!', flush=True)
24 |             print('=' * 60, flush=True)
25 |             return
26 |         print('=' * 60, flush=True)
27 |         print('Face Recognition Performance on different dataset is as shown below:', flush=True)
28 |         print('=' * 60, flush=True)
29 |         for value in Test_FR_Data_Dict.values():
30 |             testdata, testdata_issame = get_data_pair(self.dul_args.testset_fr_folder, value)
31 |             accuracy, best_threshold, roc_curve = perform_face_recog(self.dul_args.multi_gpu, self.dul_args.embedding_size,
32 |                                                                     self.dul_args.batch_size, BACKBONE, testdata, testdata_issame)
33 |             print(value.upper(), ': ', accuracy, flush=True)
34 |         print('=' * 60, flush=True)
35 |         print('Testing finished!', flush=True)
36 |         print('=' * 60, flush=True)
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     dul_fr_test = DUL_FR_Tester(dul_args_func())
41 |     dul_fr_test.face_recog()
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Implementation of DUL (PyTorch version)
 2 | 
 3 | #### Introduction
 4 | 
 5 | ---
 6 | 
 7 | This repo is an ***unofficial*** PyTorch implementation of DUL ([Data Uncertainty Learning in Face Recognition, CVPR2020](https://arxiv.org/abs/2003.11339)). 
 8 | 
 9 | NOTE: 
10 | 
11 | 1. *SE-Resnet64 is used as defult backbone in this repo*, you can define others in `./backbone/model_irse.py`
12 | 2. *Training (process)* & *Testing (results)* logs are saved in `./exp/logs/` & `./exp/logs_test/`
13 | 3. *Implementation details are not exactly the same as the original paper*, seen in `./config.py`
14 | 
15 | 
16 | 
17 | #### Getting Started
18 | 
19 | ---
20 | 
21 | - Star this repo, plz
22 | 
23 |   😊
24 | 
25 | - Clone this repo
26 | 
27 | ```
28 | git clone https://github.com/MouxiaoHuang/DUL.git
29 | ```
30 | 
31 | - Prepare env
32 | 
33 | ```python
34 | conda create --name <env_name> python=3.8
35 | pip install -r requirements.txt
36 | ```
37 | 
38 | - Prepare trainset and testset
39 |   - Trainset: [Casia WebFace or MS-Celeb-1M](https://github.com/ZhaoJ9014/face.evoLVe)
40 |   - Testset: [LFW, CFP_FF, CFP_FP, AgeDB, CALFW, CPLFW, VGG2_FP](https://github.com/ZhaoJ9014/face.evoLVe)
41 | - Training
42 | 
43 | ```python
44 | sh ./exp/Exp_webface_DUL.sh
45 | # or
46 | sh ./exp/Exp_ms1m_DUL.sh
47 | ```
48 | 
49 | - Testing
50 | 
51 | ```python
52 | sh ./exp/TestFR_webface_DUL.sh
53 | # or
54 | sh ./exp/TestFR_ms1m_DUL.sh
55 | ```
56 | 
57 | 
58 | 
59 | #### Results Report
60 | 
61 | ---
62 | 
63 | - Trainset: Casia Webface
64 | 
65 | |                |  LFW  | CFP_FF | CFP_FP | AgeDB | CALFW | CPLFW | VGG2_FP |
66 | | :------------: | :---: | :----: | :----: | :---: | :---: | :---: | :-----: |
67 | | Original paper |   -   |   -    |   -    |   -   |   -   |   -   |    -    |
68 | |   This repo    | 99.42 | 99.23  | 96.53  | 93.93 | 93.48 | 89.60 |  93.76  |
69 | 
70 | - Trainset: MS-Celeb-1M
71 | 
72 | |                           |  LFW  | CFP_FF | CFP_FP | AgeDB | CALFW | CPLFW | VGG2_FP |
73 | | :-----------------------: | :---: | :----: | :----: | :---: | :---: | :---: | :-----: |
74 | | Original paper (ResNet64) | 99.78 |   -    | 98.67  |   -   |   -   |   -   |    -    |
75 | |         This repo         | 99.75 | 99.69  | 98.41  | 98.02 | 95.95 | 92.97 |  95.40  |
76 | |                           |       |        |        |       |       |       |         |
77 | 
78 | 
79 | 
80 | #### Thanks & Refs
81 | 
82 | ---
83 | 
84 | - [ZhaoJ9014/face.evoLVe](https://github.com/ZhaoJ9014/face.evoLVe)
85 | - [Ontheway361/dul-pytorch](https://github.com/Ontheway361/dul-pytorch)
86 | 
87 | 


--------------------------------------------------------------------------------
/align/face_resize.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | from tqdm import tqdm
 4 | 
 5 | 
 6 | def mkdir(path):
 7 |     if not os.path.exists(path):
 8 |         os.mkdir(path)
 9 | 
10 | 
11 | def process_image(img):
12 | 
13 |     size = img.shape
14 |     h, w = size[0], size[1]
15 |     scale = max(w, h) / float(min_side)
16 |     new_w, new_h = int(w / scale), int(h / scale)
17 |     resize_img = cv2.resize(img, (new_w, new_h))
18 |     if new_w % 2 != 0 and new_h % 2 == 0:
19 |         top, bottom, left, right = (min_side - new_h) / 2, (min_side - new_h) / 2, (min_side - new_w) / 2 + 1, (
20 |                     min_side - new_w) / 2
21 |     elif new_h % 2 != 0 and new_w % 2 == 0:
22 |         top, bottom, left, right = (min_side - new_h) / 2 + 1, (min_side - new_h) / 2, (min_side - new_w) / 2, (
23 |                     min_side - new_w) / 2
24 |     elif new_h % 2 == 0 and new_w % 2 == 0:
25 |         top, bottom, left, right = (min_side - new_h) / 2, (min_side - new_h) / 2, (min_side - new_w) / 2, (
26 |                     min_side - new_w) / 2
27 |     else:
28 |         top, bottom, left, right = (min_side - new_h) / 2 + 1, (min_side - new_h) / 2, (min_side - new_w) / 2 + 1, (
29 |                     min_side - new_w) / 2
30 |     pad_img = cv2.copyMakeBorder(resize_img, top, bottom, left, right, cv2.BORDER_CONSTANT,
31 |                                  value=[0, 0, 0])
32 | 
33 |     return pad_img
34 | 
35 | 
36 | def main(source_root):
37 | 
38 |     dest_root = "/media/pc/6T/jasonjzhao/data/MS-Celeb-1M_Resized"
39 |     mkdir(dest_root)
40 |     cwd = os.getcwd()  # delete '.DS_Store' existed in the source_root
41 |     os.chdir(source_root)
42 |     os.system("find . -name '*.DS_Store' -type f -delete")
43 |     os.chdir(cwd)
44 | 
45 |     if not os.path.isdir(dest_root):
46 |         os.mkdir(dest_root)
47 | 
48 |     for subfolder in tqdm(os.listdir(source_root)):
49 |         if not os.path.isdir(os.path.join(dest_root, subfolder)):
50 |             os.mkdir(os.path.join(dest_root, subfolder))
51 |         for image_name in os.listdir(os.path.join(source_root, subfolder)):
52 |             print("Processing\t{}".format(os.path.join(source_root, subfolder, image_name)))
53 |             img = cv2.imread(os.path.join(source_root, subfolder, image_name))
54 |             if type(img) == type(None):
55 |                 print("damaged image %s, del it" % (img))
56 |                 os.remove(img)
57 |                 continue
58 |             size = img.shape
59 |             h, w = size[0], size[1]
60 |             if max(w, h) > 512:
61 |                 img_pad = process_image(img)
62 |             else:
63 |                 img_pad = img
64 |             cv2.imwrite(os.path.join(dest_root, subfolder, image_name.split('.')[0] + '.jpg'), img_pad)
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     min_side = 512
69 |     main(source_root = "/media/pc/6T/jasonjzhao/data/MS-Celeb-1M/database/base")


--------------------------------------------------------------------------------
/align/face_align.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | from detector import detect_faces
 3 | from align_trans import get_reference_facial_points, warp_and_crop_face
 4 | import numpy as np
 5 | import os
 6 | from tqdm import tqdm
 7 | import argparse
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     parser = argparse.ArgumentParser(description = "face alignment")
12 |     parser.add_argument("-source_root", "--source_root", help = "specify your source dir", default = "./data/test", type = str)
13 |     parser.add_argument("-dest_root", "--dest_root", help = "specify your destination dir", default = "./data/test_Aligned", type = str)
14 |     parser.add_argument("-crop_size", "--crop_size", help = "specify size of aligned faces, align and crop with padding", default = 112, type = int)
15 |     args = parser.parse_args()
16 | 
17 |     source_root = args.source_root # specify your source dir
18 |     dest_root = args.dest_root # specify your destination dir
19 |     crop_size = args.crop_size # specify size of aligned faces, align and crop with padding
20 |     scale = crop_size / 112.
21 |     reference = get_reference_facial_points(default_square = True) * scale
22 | 
23 |     cwd = os.getcwd() # delete '.DS_Store' existed in the source_root
24 |     os.chdir(source_root)
25 |     os.system("find . -name '*.DS_Store' -type f -delete")
26 |     os.chdir(cwd)
27 | 
28 |     if not os.path.isdir(dest_root):
29 |         os.mkdir(dest_root)
30 | 
31 |     for subfolder in tqdm(os.listdir(source_root)):
32 |         if not os.path.isdir(os.path.join(dest_root, subfolder)):
33 |             os.mkdir(os.path.join(dest_root, subfolder))
34 |         for image_name in os.listdir(os.path.join(source_root, subfolder)):
35 |             print("Processing\t{}".format(os.path.join(source_root, subfolder, image_name)))
36 |             img = Image.open(os.path.join(source_root, subfolder, image_name))
37 |             try: # Handle exception
38 |                 _, landmarks = detect_faces(img)
39 |             except Exception:
40 |                 print("{} is discarded due to exception!".format(os.path.join(source_root, subfolder, image_name)))
41 |                 continue
42 |             if len(landmarks) == 0: # If the landmarks cannot be detected, the img will be discarded
43 |                 print("{} is discarded due to non-detected landmarks!".format(os.path.join(source_root, subfolder, image_name)))
44 |                 continue
45 |             facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]
46 |             warped_face = warp_and_crop_face(np.array(img), facial5points, reference, crop_size=(crop_size, crop_size))
47 |             img_warped = Image.fromarray(warped_face)
48 |             if image_name.split('.')[-1].lower() not in ['jpg', 'jpeg']: #not from jpg
49 |                 image_name = '.'.join(image_name.split('.')[:-1]) + '.jpg'
50 |             img_warped.save(os.path.join(dest_root, subfolder, image_name))
51 | 


--------------------------------------------------------------------------------
/align/first_stage.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | import math
 4 | from PIL import Image
 5 | import numpy as np
 6 | from box_utils import nms, _preprocess
 7 | 
 8 | 
 9 | def run_first_stage(image, net, scale, threshold):
10 |     """Run P-Net, generate bounding boxes, and do NMS.
11 | 
12 |     Arguments:
13 |         image: an instance of PIL.Image.
14 |         net: an instance of pytorch's nn.Module, P-Net.
15 |         scale: a float number,
16 |             scale width and height of the image by this number.
17 |         threshold: a float number,
18 |             threshold on the probability of a face when generating
19 |             bounding boxes from predictions of the net.
20 | 
21 |     Returns:
22 |         a float numpy array of shape [n_boxes, 9],
23 |             bounding boxes with scores and offsets (4 + 1 + 4).
24 |     """
25 | 
26 |     # scale the image and convert it to a float array
27 |     width, height = image.size
28 |     sw, sh = math.ceil(width*scale), math.ceil(height*scale)
29 |     img = image.resize((sw, sh), Image.BILINEAR)
30 |     img = np.asarray(img, 'float32')
31 | 
32 |     img = Variable(torch.FloatTensor(_preprocess(img)), volatile = True)
33 |     output = net(img)
34 |     probs = output[1].data.numpy()[0, 1, :, :]
35 |     offsets = output[0].data.numpy()
36 |     # probs: probability of a face at each sliding window
37 |     # offsets: transformations to true bounding boxes
38 | 
39 |     boxes = _generate_bboxes(probs, offsets, scale, threshold)
40 |     if len(boxes) == 0:
41 |         return None
42 | 
43 |     keep = nms(boxes[:, 0:5], overlap_threshold = 0.5)
44 |     return boxes[keep]
45 | 
46 | 
47 | def _generate_bboxes(probs, offsets, scale, threshold):
48 |     """Generate bounding boxes at places
49 |     where there is probably a face.
50 | 
51 |     Arguments:
52 |         probs: a float numpy array of shape [n, m].
53 |         offsets: a float numpy array of shape [1, 4, n, m].
54 |         scale: a float number,
55 |             width and height of the image were scaled by this number.
56 |         threshold: a float number.
57 | 
58 |     Returns:
59 |         a float numpy array of shape [n_boxes, 9]
60 |     """
61 | 
62 |     # applying P-Net is equivalent, in some sense, to
63 |     # moving 12x12 window with stride 2
64 |     stride = 2
65 |     cell_size = 12
66 | 
67 |     # indices of boxes where there is probably a face
68 |     inds = np.where(probs > threshold)
69 | 
70 |     if inds[0].size == 0:
71 |         return np.array([])
72 | 
73 |     # transformations of bounding boxes
74 |     tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)]
75 |     # they are defined as:
76 |     # w = x2 - x1 + 1
77 |     # h = y2 - y1 + 1
78 |     # x1_true = x1 + tx1*w
79 |     # x2_true = x2 + tx2*w
80 |     # y1_true = y1 + ty1*h
81 |     # y2_true = y2 + ty2*h
82 | 
83 |     offsets = np.array([tx1, ty1, tx2, ty2])
84 |     score = probs[inds[0], inds[1]]
85 | 
86 |     # P-Net is applied to scaled images
87 |     # so we need to rescale bounding boxes back
88 |     bounding_boxes = np.vstack([
89 |         np.round((stride*inds[1] + 1.0)/scale),
90 |         np.round((stride*inds[0] + 1.0)/scale),
91 |         np.round((stride*inds[1] + 1.0 + cell_size)/scale),
92 |         np.round((stride*inds[0] + 1.0 + cell_size)/scale),
93 |         score, offsets
94 |     ])
95 |     # why one is added?
96 | 
97 |     return bounding_boxes.T


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from backbone.model_irse import *
 3 | 
 4 | 
 5 | def dul_args_func():
 6 |     parser = argparse.ArgumentParser(description='DUL: Data Uncertainty Learning in Face Recognition')
 7 | 
 8 |     # ----- random seed for reproducing
 9 |     parser.add_argument('--random_seed', type=int, default=6666)
10 | 
11 |     # ----- directory (train & test)
12 |     parser.add_argument('--trainset_folder', type=str, default='/home/huangmouxiao.hmx/data/face_rec/casia_maxpy_clean_align/')
13 |     parser.add_argument('--model_save_folder', type=str, default='./checkpoints/')
14 |     parser.add_argument('--log_tensorboard', type=str, default='./logtensorboard/')
15 |     parser.add_argument('--logs', type=str, default='./logs/')
16 |     parser.add_argument('--testset_fr_folder', type=str, default='/home/huangmouxiao.hmx/data/face_rec/usual_test/')
17 |     parser.add_argument('--testset_ood_folder', type=str, default='')
18 |     parser.add_argument('--model_for_test', type=str, default='')
19 | 
20 |     # ----- training env
21 |     parser.add_argument('--multi_gpu', type=bool, default=True)
22 |     parser.add_argument('--gpu_id', type=str, nargs='+')
23 |     
24 |     # ----- resume pretrain details
25 |     parser.add_argument('--resume_backbone', type=str, default='')
26 |     parser.add_argument('--resume_head', type=str, default='')
27 |     parser.add_argument('--resume_epoch', type=int, default=0)
28 |     
29 |     # ----- model & training details
30 |     parser.add_argument('--backbone_name', type=str, default='IR_SE_64_DUL')
31 |     parser.add_argument('--head_name', type=str, default='ArcFace')
32 |     parser.add_argument('--loss_name', type=str, default='Softmax')
33 |     parser.add_argument('--optimizer', type=str, default='SGD')
34 |     parser.add_argument('--arcface_scale', type=int, default=64)
35 |     parser.add_argument('--input_size', type=list, default=[112, 112]) # support: [112, 112] and [224, 224]
36 |     parser.add_argument('--center_crop', type=bool, default=True)
37 |     parser.add_argument('--rgb_mean', type=list, default=[0.5, 0.5, 0.5])
38 |     parser.add_argument('--rgb_std', type=list, default=[0.5, 0.5, 0.5])
39 |     parser.add_argument('--embedding_size', type=int, default=512)
40 |     parser.add_argument('--drop_last', type=bool, default=True)
41 |     parser.add_argument('--weight_decay', type=float, default=5e-4)
42 |     parser.add_argument('--momentum', type=float, default=0.9)
43 |     parser.add_argument('--pin_memory', type=bool, default=True)
44 |     parser.add_argument('--num_workers', type=int, default=8)
45 |     
46 |     # ----- hyperparameters
47 |     parser.add_argument('--batch_size', type=int, default=512)
48 |     parser.add_argument('--num_epoch', type=int, default=22)
49 |     parser.add_argument('--warm_up_epoch', type=int, default=1)
50 |     parser.add_argument('--image_noise', type=float, default=0)
51 |     parser.add_argument('--lr', type=float, default=0.1)
52 |     parser.add_argument('--stages', type=str, nargs='+')
53 |     parser.add_argument('--kl_scale', type=float, default=0.01)
54 | 
55 |     args = parser.parse_args()
56 | 
57 |     return args
58 | 
59 | dul_args = dul_args_func()
60 | 
61 | Backbone_Dict = {
62 |     'IR_50': IR_50(dul_args.input_size),
63 |     'IR_101': IR_101(dul_args.input_size),
64 |     'IR_152': IR_152(dul_args.input_size),
65 |     'IR_SE_50': IR_SE_50(dul_args.input_size),
66 |     'IR_SE_64_DUL': IR_SE_64_DUL(dul_args.input_size),
67 |     'IR_SE_101': IR_SE_101(dul_args.input_size),
68 |     'IR_SE_152': IR_SE_152(dul_args.input_size)
69 | }
70 | 
71 | Test_FR_Data_Dict = {
72 |     'lfw': 'lfw',
73 |     'cfp_ff': 'cfp_ff',
74 |     'cfp_fp': 'cfp_fp',
75 |     'agedb': 'agedb_30',
76 |     'calfw': 'calfw',
77 |     'cplfw': 'cplfw',
78 |     'vgg2_fp': 'vgg2_fp'
79 | }


--------------------------------------------------------------------------------
/align/detector.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch.autograd import Variable
  4 | from get_nets import PNet, RNet, ONet
  5 | from box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
  6 | from first_stage import run_first_stage
  7 | 
  8 | 
  9 | def detect_faces(image, min_face_size = 20.0,
 10 |                  thresholds=[0.6, 0.7, 0.8],
 11 |                  nms_thresholds=[0.7, 0.7, 0.7]):
 12 |     """
 13 |     Arguments:
 14 |         image: an instance of PIL.Image.
 15 |         min_face_size: a float number.
 16 |         thresholds: a list of length 3.
 17 |         nms_thresholds: a list of length 3.
 18 | 
 19 |     Returns:
 20 |         two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
 21 |         bounding boxes and facial landmarks.
 22 |     """
 23 | 
 24 |     # LOAD MODELS
 25 |     pnet = PNet()
 26 |     rnet = RNet()
 27 |     onet = ONet()
 28 |     onet.eval()
 29 | 
 30 |     # BUILD AN IMAGE PYRAMID
 31 |     width, height = image.size
 32 |     min_length = min(height, width)
 33 | 
 34 |     min_detection_size = 12
 35 |     factor = 0.707  # sqrt(0.5)
 36 | 
 37 |     # scales for scaling the image
 38 |     scales = []
 39 | 
 40 |     # scales the image so that
 41 |     # minimum size that we can detect equals to
 42 |     # minimum face size that we want to detect
 43 |     m = min_detection_size/min_face_size
 44 |     min_length *= m
 45 | 
 46 |     factor_count = 0
 47 |     while min_length > min_detection_size:
 48 |         scales.append(m*factor**factor_count)
 49 |         min_length *= factor
 50 |         factor_count += 1
 51 | 
 52 |     # STAGE 1
 53 | 
 54 |     # it will be returned
 55 |     bounding_boxes = []
 56 | 
 57 |     # run P-Net on different scales
 58 |     for s in scales:
 59 |         boxes = run_first_stage(image, pnet, scale = s, threshold = thresholds[0])
 60 |         bounding_boxes.append(boxes)
 61 | 
 62 |     # collect boxes (and offsets, and scores) from different scales
 63 |     bounding_boxes = [i for i in bounding_boxes if i is not None]
 64 |     bounding_boxes = np.vstack(bounding_boxes)
 65 | 
 66 |     keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
 67 |     bounding_boxes = bounding_boxes[keep]
 68 | 
 69 |     # use offsets predicted by pnet to transform bounding boxes
 70 |     bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
 71 |     # shape [n_boxes, 5]
 72 | 
 73 |     bounding_boxes = convert_to_square(bounding_boxes)
 74 |     bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
 75 | 
 76 |     # STAGE 2
 77 | 
 78 |     img_boxes = get_image_boxes(bounding_boxes, image, size = 24)
 79 |     img_boxes = Variable(torch.FloatTensor(img_boxes), volatile = True)
 80 |     output = rnet(img_boxes)
 81 |     offsets = output[0].data.numpy()  # shape [n_boxes, 4]
 82 |     probs = output[1].data.numpy()  # shape [n_boxes, 2]
 83 | 
 84 |     keep = np.where(probs[:, 1] > thresholds[1])[0]
 85 |     bounding_boxes = bounding_boxes[keep]
 86 |     bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
 87 |     offsets = offsets[keep]
 88 | 
 89 |     keep = nms(bounding_boxes, nms_thresholds[1])
 90 |     bounding_boxes = bounding_boxes[keep]
 91 |     bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
 92 |     bounding_boxes = convert_to_square(bounding_boxes)
 93 |     bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
 94 | 
 95 |     # STAGE 3
 96 | 
 97 |     img_boxes = get_image_boxes(bounding_boxes, image, size = 48)
 98 |     if len(img_boxes) == 0: 
 99 |         return [], []
100 |     img_boxes = Variable(torch.FloatTensor(img_boxes), volatile = True)
101 |     output = onet(img_boxes)
102 |     landmarks = output[0].data.numpy()  # shape [n_boxes, 10]
103 |     offsets = output[1].data.numpy()  # shape [n_boxes, 4]
104 |     probs = output[2].data.numpy()  # shape [n_boxes, 2]
105 | 
106 |     keep = np.where(probs[:, 1] > thresholds[2])[0]
107 |     bounding_boxes = bounding_boxes[keep]
108 |     bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
109 |     offsets = offsets[keep]
110 |     landmarks = landmarks[keep]
111 | 
112 |     # compute landmark points
113 |     width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
114 |     height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
115 |     xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
116 |     landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1)*landmarks[:, 0:5]
117 |     landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1)*landmarks[:, 5:10]
118 | 
119 |     bounding_boxes = calibrate_box(bounding_boxes, offsets)
120 |     keep = nms(bounding_boxes, nms_thresholds[2], mode = 'min')
121 |     bounding_boxes = bounding_boxes[keep]
122 |     landmarks = landmarks[keep]
123 | 
124 |     return bounding_boxes, landmarks
125 | 


--------------------------------------------------------------------------------
/align/get_nets.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from collections import OrderedDict
  5 | import numpy as np
  6 | 
  7 | 
  8 | class Flatten(nn.Module):
  9 | 
 10 |     def __init__(self):
 11 |         super(Flatten, self).__init__()
 12 | 
 13 |     def forward(self, x):
 14 |         """
 15 |         Arguments:
 16 |             x: a float tensor with shape [batch_size, c, h, w].
 17 |         Returns:
 18 |             a float tensor with shape [batch_size, c*h*w].
 19 |         """
 20 | 
 21 |         # without this pretrained model isn't working
 22 |         x = x.transpose(3, 2).contiguous()
 23 | 
 24 |         return x.view(x.size(0), -1)
 25 | 
 26 | 
 27 | class PNet(nn.Module):
 28 | 
 29 |     def __init__(self):
 30 | 
 31 |         super(PNet, self).__init__()
 32 | 
 33 |         # suppose we have input with size HxW, then
 34 |         # after first layer: H - 2,
 35 |         # after pool: ceil((H - 2)/2),
 36 |         # after second conv: ceil((H - 2)/2) - 2,
 37 |         # after last conv: ceil((H - 2)/2) - 4,
 38 |         # and the same for W
 39 | 
 40 |         self.features = nn.Sequential(OrderedDict([
 41 |             ('conv1', nn.Conv2d(3, 10, 3, 1)),
 42 |             ('prelu1', nn.PReLU(10)),
 43 |             ('pool1', nn.MaxPool2d(2, 2, ceil_mode = True)),
 44 | 
 45 |             ('conv2', nn.Conv2d(10, 16, 3, 1)),
 46 |             ('prelu2', nn.PReLU(16)),
 47 | 
 48 |             ('conv3', nn.Conv2d(16, 32, 3, 1)),
 49 |             ('prelu3', nn.PReLU(32))
 50 |         ]))
 51 | 
 52 |         self.conv4_1 = nn.Conv2d(32, 2, 1, 1)
 53 |         self.conv4_2 = nn.Conv2d(32, 4, 1, 1)
 54 | 
 55 |         weights = np.load("./pnet.npy", allow_pickle=True)[()]
 56 |         for n, p in self.named_parameters():
 57 |             p.data = torch.FloatTensor(weights[n])
 58 | 
 59 |     def forward(self, x):
 60 |         """
 61 |         Arguments:
 62 |             x: a float tensor with shape [batch_size, 3, h, w].
 63 |         Returns:
 64 |             b: a float tensor with shape [batch_size, 4, h', w'].
 65 |             a: a float tensor with shape [batch_size, 2, h', w'].
 66 |         """
 67 |         x = self.features(x)
 68 |         a = self.conv4_1(x)
 69 |         b = self.conv4_2(x)
 70 |         a = F.softmax(a)
 71 |         return b, a
 72 | 
 73 | 
 74 | class RNet(nn.Module):
 75 | 
 76 |     def __init__(self):
 77 | 
 78 |         super(RNet, self).__init__()
 79 | 
 80 |         self.features = nn.Sequential(OrderedDict([
 81 |             ('conv1', nn.Conv2d(3, 28, 3, 1)),
 82 |             ('prelu1', nn.PReLU(28)),
 83 |             ('pool1', nn.MaxPool2d(3, 2, ceil_mode = True)),
 84 | 
 85 |             ('conv2', nn.Conv2d(28, 48, 3, 1)),
 86 |             ('prelu2', nn.PReLU(48)),
 87 |             ('pool2', nn.MaxPool2d(3, 2, ceil_mode = True)),
 88 | 
 89 |             ('conv3', nn.Conv2d(48, 64, 2, 1)),
 90 |             ('prelu3', nn.PReLU(64)),
 91 | 
 92 |             ('flatten', Flatten()),
 93 |             ('conv4', nn.Linear(576, 128)),
 94 |             ('prelu4', nn.PReLU(128))
 95 |         ]))
 96 | 
 97 |         self.conv5_1 = nn.Linear(128, 2)
 98 |         self.conv5_2 = nn.Linear(128, 4)
 99 | 
100 |         weights = np.load("./rnet.npy", allow_pickle=True)[()]
101 |         for n, p in self.named_parameters():
102 |             p.data = torch.FloatTensor(weights[n])
103 | 
104 |     def forward(self, x):
105 |         """
106 |         Arguments:
107 |             x: a float tensor with shape [batch_size, 3, h, w].
108 |         Returns:
109 |             b: a float tensor with shape [batch_size, 4].
110 |             a: a float tensor with shape [batch_size, 2].
111 |         """
112 |         x = self.features(x)
113 |         a = self.conv5_1(x)
114 |         b = self.conv5_2(x)
115 |         a = F.softmax(a)
116 |         return b, a
117 | 
118 | 
119 | class ONet(nn.Module):
120 | 
121 |     def __init__(self):
122 | 
123 |         super(ONet, self).__init__()
124 | 
125 |         self.features = nn.Sequential(OrderedDict([
126 |             ('conv1', nn.Conv2d(3, 32, 3, 1)),
127 |             ('prelu1', nn.PReLU(32)),
128 |             ('pool1', nn.MaxPool2d(3, 2, ceil_mode = True)),
129 | 
130 |             ('conv2', nn.Conv2d(32, 64, 3, 1)),
131 |             ('prelu2', nn.PReLU(64)),
132 |             ('pool2', nn.MaxPool2d(3, 2, ceil_mode = True)),
133 | 
134 |             ('conv3', nn.Conv2d(64, 64, 3, 1)),
135 |             ('prelu3', nn.PReLU(64)),
136 |             ('pool3', nn.MaxPool2d(2, 2, ceil_mode = True)),
137 | 
138 |             ('conv4', nn.Conv2d(64, 128, 2, 1)),
139 |             ('prelu4', nn.PReLU(128)),
140 | 
141 |             ('flatten', Flatten()),
142 |             ('conv5', nn.Linear(1152, 256)),
143 |             ('drop5', nn.Dropout(0.25)),
144 |             ('prelu5', nn.PReLU(256)),
145 |         ]))
146 | 
147 |         self.conv6_1 = nn.Linear(256, 2)
148 |         self.conv6_2 = nn.Linear(256, 4)
149 |         self.conv6_3 = nn.Linear(256, 10)
150 | 
151 |         weights = np.load("./onet.npy", allow_pickle=True)[()]
152 |         for n, p in self.named_parameters():
153 |             p.data = torch.FloatTensor(weights[n])
154 | 
155 |     def forward(self, x):
156 |         """
157 |         Arguments:
158 |             x: a float tensor with shape [batch_size, 3, h, w].
159 |         Returns:
160 |             c: a float tensor with shape [batch_size, 10].
161 |             b: a float tensor with shape [batch_size, 4].
162 |             a: a float tensor with shape [batch_size, 2].
163 |         """
164 |         x = self.features(x)
165 |         a = self.conv6_1(x)
166 |         b = self.conv6_2(x)
167 |         c = self.conv6_3(x)
168 |         a = F.softmax(a)
169 |         return c, b, a


--------------------------------------------------------------------------------
/align/box_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from PIL import Image
  3 | 
  4 | 
  5 | def nms(boxes, overlap_threshold = 0.5, mode = 'union'):
  6 |     """Non-maximum suppression.
  7 | 
  8 |     Arguments:
  9 |         boxes: a float numpy array of shape [n, 5],
 10 |             where each row is (xmin, ymin, xmax, ymax, score).
 11 |         overlap_threshold: a float number.
 12 |         mode: 'union' or 'min'.
 13 | 
 14 |     Returns:
 15 |         list with indices of the selected boxes
 16 |     """
 17 | 
 18 |     # if there are no boxes, return the empty list
 19 |     if len(boxes) == 0:
 20 |         return []
 21 | 
 22 |     # list of picked indices
 23 |     pick = []
 24 | 
 25 |     # grab the coordinates of the bounding boxes
 26 |     x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
 27 | 
 28 |     area = (x2 - x1 + 1.0)*(y2 - y1 + 1.0)
 29 |     ids = np.argsort(score)  # in increasing order
 30 | 
 31 |     while len(ids) > 0:
 32 | 
 33 |         # grab index of the largest value
 34 |         last = len(ids) - 1
 35 |         i = ids[last]
 36 |         pick.append(i)
 37 | 
 38 |         # compute intersections
 39 |         # of the box with the largest score
 40 |         # with the rest of boxes
 41 | 
 42 |         # left top corner of intersection boxes
 43 |         ix1 = np.maximum(x1[i], x1[ids[:last]])
 44 |         iy1 = np.maximum(y1[i], y1[ids[:last]])
 45 | 
 46 |         # right bottom corner of intersection boxes
 47 |         ix2 = np.minimum(x2[i], x2[ids[:last]])
 48 |         iy2 = np.minimum(y2[i], y2[ids[:last]])
 49 | 
 50 |         # width and height of intersection boxes
 51 |         w = np.maximum(0.0, ix2 - ix1 + 1.0)
 52 |         h = np.maximum(0.0, iy2 - iy1 + 1.0)
 53 | 
 54 |         # intersections' areas
 55 |         inter = w * h
 56 |         if mode == 'min':
 57 |             overlap = inter/np.minimum(area[i], area[ids[:last]])
 58 |         elif mode == 'union':
 59 |             # intersection over union (IoU)
 60 |             overlap = inter/(area[i] + area[ids[:last]] - inter)
 61 | 
 62 |         # delete all boxes where overlap is too big
 63 |         ids = np.delete(
 64 |             ids,
 65 |             np.concatenate([[last], np.where(overlap > overlap_threshold)[0]])
 66 |         )
 67 | 
 68 |     return pick
 69 | 
 70 | 
 71 | def convert_to_square(bboxes):
 72 |     """Convert bounding boxes to a square form.
 73 | 
 74 |     Arguments:
 75 |         bboxes: a float numpy array of shape [n, 5].
 76 | 
 77 |     Returns:
 78 |         a float numpy array of shape [n, 5],
 79 |             squared bounding boxes.
 80 |     """
 81 | 
 82 |     square_bboxes = np.zeros_like(bboxes)
 83 |     x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
 84 |     h = y2 - y1 + 1.0
 85 |     w = x2 - x1 + 1.0
 86 |     max_side = np.maximum(h, w)
 87 |     square_bboxes[:, 0] = x1 + w*0.5 - max_side*0.5
 88 |     square_bboxes[:, 1] = y1 + h*0.5 - max_side*0.5
 89 |     square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0
 90 |     square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0
 91 |     return square_bboxes
 92 | 
 93 | 
 94 | def calibrate_box(bboxes, offsets):
 95 |     """Transform bounding boxes to be more like true bounding boxes.
 96 |     'offsets' is one of the outputs of the nets.
 97 | 
 98 |     Arguments:
 99 |         bboxes: a float numpy array of shape [n, 5].
100 |         offsets: a float numpy array of shape [n, 4].
101 | 
102 |     Returns:
103 |         a float numpy array of shape [n, 5].
104 |     """
105 |     x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
106 |     w = x2 - x1 + 1.0
107 |     h = y2 - y1 + 1.0
108 |     w = np.expand_dims(w, 1)
109 |     h = np.expand_dims(h, 1)
110 | 
111 |     # this is what happening here:
112 |     # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
113 |     # x1_true = x1 + tx1*w
114 |     # y1_true = y1 + ty1*h
115 |     # x2_true = x2 + tx2*w
116 |     # y2_true = y2 + ty2*h
117 |     # below is just more compact form of this
118 | 
119 |     # are offsets always such that
120 |     # x1 < x2 and y1 < y2 ?
121 | 
122 |     translation = np.hstack([w, h, w, h])*offsets
123 |     bboxes[:, 0:4] = bboxes[:, 0:4] + translation
124 |     return bboxes
125 | 
126 | 
127 | def get_image_boxes(bounding_boxes, img, size = 24):
128 |     """Cut out boxes from the image.
129 | 
130 |     Arguments:
131 |         bounding_boxes: a float numpy array of shape [n, 5].
132 |         img: an instance of PIL.Image.
133 |         size: an integer, size of cutouts.
134 | 
135 |     Returns:
136 |         a float numpy array of shape [n, 3, size, size].
137 |     """
138 | 
139 |     num_boxes = len(bounding_boxes)
140 |     width, height = img.size
141 | 
142 |     [dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bounding_boxes, width, height)
143 |     img_boxes = np.zeros((num_boxes, 3, size, size), 'float32')
144 | 
145 |     for i in range(num_boxes):
146 |         img_box = np.zeros((h[i], w[i], 3), 'uint8')
147 | 
148 |         img_array = np.asarray(img, 'uint8')
149 |         img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\
150 |             img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :]
151 | 
152 |         # resize
153 |         img_box = Image.fromarray(img_box)
154 |         img_box = img_box.resize((size, size), Image.BILINEAR)
155 |         img_box = np.asarray(img_box, 'float32')
156 | 
157 |         img_boxes[i, :, :, :] = _preprocess(img_box)
158 | 
159 |     return img_boxes
160 | 
161 | 
162 | def correct_bboxes(bboxes, width, height):
163 |     """Crop boxes that are too big and get coordinates
164 |     with respect to cutouts.
165 | 
166 |     Arguments:
167 |         bboxes: a float numpy array of shape [n, 5],
168 |             where each row is (xmin, ymin, xmax, ymax, score).
169 |         width: a float number.
170 |         height: a float number.
171 | 
172 |     Returns:
173 |         dy, dx, edy, edx: a int numpy arrays of shape [n],
174 |             coordinates of the boxes with respect to the cutouts.
175 |         y, x, ey, ex: a int numpy arrays of shape [n],
176 |             corrected ymin, xmin, ymax, xmax.
177 |         h, w: a int numpy arrays of shape [n],
178 |             just heights and widths of boxes.
179 | 
180 |         in the following order:
181 |             [dy, edy, dx, edx, y, ey, x, ex, w, h].
182 |     """
183 | 
184 |     x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
185 |     w, h = x2 - x1 + 1.0,  y2 - y1 + 1.0
186 |     num_boxes = bboxes.shape[0]
187 | 
188 |     # 'e' stands for end
189 |     # (x, y) -> (ex, ey)
190 |     x, y, ex, ey = x1, y1, x2, y2
191 | 
192 |     # we need to cut out a box from the image.
193 |     # (x, y, ex, ey) are corrected coordinates of the box
194 |     # in the image.
195 |     # (dx, dy, edx, edy) are coordinates of the box in the cutout
196 |     # from the image.
197 |     dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,))
198 |     edx, edy = w.copy() - 1.0, h.copy() - 1.0
199 | 
200 |     # if box's bottom right corner is too far right
201 |     ind = np.where(ex > width - 1.0)[0]
202 |     edx[ind] = w[ind] + width - 2.0 - ex[ind]
203 |     ex[ind] = width - 1.0
204 | 
205 |     # if box's bottom right corner is too low
206 |     ind = np.where(ey > height - 1.0)[0]
207 |     edy[ind] = h[ind] + height - 2.0 - ey[ind]
208 |     ey[ind] = height - 1.0
209 | 
210 |     # if box's top left corner is too far left
211 |     ind = np.where(x < 0.0)[0]
212 |     dx[ind] = 0.0 - x[ind]
213 |     x[ind] = 0.0
214 | 
215 |     # if box's top left corner is too high
216 |     ind = np.where(y < 0.0)[0]
217 |     dy[ind] = 0.0 - y[ind]
218 |     y[ind] = 0.0
219 | 
220 |     return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h]
221 |     return_list = [i.astype('int32') for i in return_list]
222 | 
223 |     return return_list
224 | 
225 | 
226 | def _preprocess(img):
227 |     """Preprocessing step before feeding the network.
228 | 
229 |     Arguments:
230 |         img: a float numpy array of shape [h, w, c].
231 | 
232 |     Returns:
233 |         a float numpy array of shape [1, c, h, w].
234 |     """
235 |     img = img.transpose((2, 0, 1))
236 |     img = np.expand_dims(img, 0)
237 |     img = (img - 127.5) * 0.0078125
238 |     return img
239 | 


--------------------------------------------------------------------------------
/util/verification.py:
--------------------------------------------------------------------------------
  1 | """Helper for evaluation on the Labeled Faces in the Wild dataset
  2 | """
  3 | 
  4 | # MIT License
  5 | #
  6 | # Copyright (c) 2016 David Sandberg
  7 | #
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to deal
 10 | # in the Software without restriction, including without limitation the rights
 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | # copies of the Software, and to permit persons to whom the Software is
 13 | # furnished to do so, subject to the following conditions:
 14 | #
 15 | # The above copyright notice and this permission notice shall be included in all
 16 | # copies or substantial portions of the Software.
 17 | #
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 24 | # SOFTWARE.
 25 | 
 26 | import numpy as np
 27 | from sklearn.model_selection import KFold
 28 | from sklearn.decomposition import PCA
 29 | import sklearn
 30 | from scipy import interpolate
 31 | from scipy.spatial.distance import pdist
 32 | 
 33 | 
 34 | # Support: ['calculate_roc', 'calculate_accuracy', 'calculate_val', 'calculate_val_far', 'evaluate']
 35 | 
 36 | 
 37 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds = 10, pca = 0):
 38 |     assert (embeddings1.shape[0] == embeddings2.shape[0])
 39 |     assert (embeddings1.shape[1] == embeddings2.shape[1])
 40 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
 41 |     nrof_thresholds = len(thresholds)
 42 |     k_fold = KFold(n_splits = nrof_folds, shuffle = False)
 43 | 
 44 |     tprs = np.zeros((nrof_folds, nrof_thresholds))
 45 |     fprs = np.zeros((nrof_folds, nrof_thresholds))
 46 |     accuracy = np.zeros((nrof_folds))
 47 |     best_thresholds = np.zeros((nrof_folds))
 48 |     indices = np.arange(nrof_pairs)
 49 |     # print('pca', pca)
 50 | 
 51 |     if pca == 0:
 52 |         diff = np.subtract(embeddings1, embeddings2)
 53 |         dist = np.sum(np.square(diff), 1)
 54 |         # dist = pdist(np.vstack([embeddings1, embeddings2]), 'cosine')
 55 | 
 56 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
 57 |         # print('train_set', train_set)
 58 |         # print('test_set', test_set)
 59 |         if pca > 0:
 60 |             print("doing pca on", fold_idx)
 61 |             embed1_train = embeddings1[train_set]
 62 |             embed2_train = embeddings2[train_set]
 63 |             _embed_train = np.concatenate((embed1_train, embed2_train), axis = 0)
 64 |             # print(_embed_train.shape)
 65 |             pca_model = PCA(n_components = pca)
 66 |             pca_model.fit(_embed_train)
 67 |             embed1 = pca_model.transform(embeddings1)
 68 |             embed2 = pca_model.transform(embeddings2)
 69 |             embed1 = sklearn.preprocessing.normalize(embed1)
 70 |             embed2 = sklearn.preprocessing.normalize(embed2)
 71 |             # print(embed1.shape, embed2.shape)
 72 |             diff = np.subtract(embed1, embed2)
 73 |             dist = np.sum(np.square(diff), 1)
 74 | 
 75 |         # Find the best threshold for the fold
 76 |         acc_train = np.zeros((nrof_thresholds))
 77 |         for threshold_idx, threshold in enumerate(thresholds):
 78 |             _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
 79 |         best_threshold_index = np.argmax(acc_train)
 80 | #         print('best_threshold_index', best_threshold_index, acc_train[best_threshold_index])
 81 |         best_thresholds[fold_idx] = thresholds[best_threshold_index]
 82 |         for threshold_idx, threshold in enumerate(thresholds):
 83 |             tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(threshold,
 84 |                                                                                                  dist[test_set],
 85 |                                                                                                  actual_issame[
 86 |                                                                                                      test_set])
 87 |         _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
 88 | 
 89 |     tpr = np.mean(tprs, 0)
 90 |     fpr = np.mean(fprs, 0)
 91 |     return tpr, fpr, accuracy, best_thresholds
 92 | 
 93 | 
 94 | def calculate_accuracy(threshold, dist, actual_issame):
 95 |     predict_issame = np.less(dist, threshold)
 96 |     tp = np.sum(np.logical_and(predict_issame, actual_issame))
 97 |     fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
 98 |     tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
 99 |     fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
100 | 
101 |     tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
102 |     fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
103 |     acc = float(tp + tn) / dist.size
104 |     return tpr, fpr, acc
105 | 
106 | 
107 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds = 10):
108 |     '''
109 |     Copy from [insightface](https://github.com/deepinsight/insightface)
110 |     :param thresholds:
111 |     :param embeddings1:
112 |     :param embeddings2:
113 |     :param actual_issame:
114 |     :param far_target:
115 |     :param nrof_folds:
116 |     :return:
117 |     '''
118 |     assert (embeddings1.shape[0] == embeddings2.shape[0])
119 |     assert (embeddings1.shape[1] == embeddings2.shape[1])
120 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
121 |     nrof_thresholds = len(thresholds)
122 |     k_fold = KFold(n_splits = nrof_folds, shuffle = False)
123 | 
124 |     val = np.zeros(nrof_folds)
125 |     far = np.zeros(nrof_folds)
126 | 
127 |     diff = np.subtract(embeddings1, embeddings2)
128 |     dist = np.sum(np.square(diff), 1)
129 |     indices = np.arange(nrof_pairs)
130 | 
131 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
132 | 
133 |         # Find the threshold that gives FAR = far_target
134 |         far_train = np.zeros(nrof_thresholds)
135 |         for threshold_idx, threshold in enumerate(thresholds):
136 |             _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
137 |         if np.max(far_train) >= far_target:
138 |             f = interpolate.interp1d(far_train, thresholds, kind = 'slinear')
139 |             threshold = f(far_target)
140 |         else:
141 |             threshold = 0.0
142 | 
143 |         val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
144 | 
145 |     val_mean = np.mean(val)
146 |     far_mean = np.mean(far)
147 |     val_std = np.std(val)
148 |     return val_mean, val_std, far_mean
149 | 
150 | 
151 | def calculate_val_far(threshold, dist, actual_issame):
152 |     predict_issame = np.less(dist, threshold)
153 |     true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
154 |     false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
155 |     n_same = np.sum(actual_issame)
156 |     n_diff = np.sum(np.logical_not(actual_issame))
157 |     val = float(true_accept) / float(n_same)
158 |     far = float(false_accept) / float(n_diff)
159 |     return val, far
160 | 
161 | 
162 | def evaluate(embeddings, actual_issame, nrof_folds = 10, pca = 0):
163 |     # Calculate evaluation metrics
164 |     thresholds = np.arange(0, 4, 0.01)
165 |     embeddings1 = embeddings[0::2]
166 |     embeddings2 = embeddings[1::2]
167 |     tpr, fpr, accuracy, best_thresholds = calculate_roc(thresholds, embeddings1, embeddings2, np.asarray(actual_issame), nrof_folds = nrof_folds, pca = pca)
168 | #     thresholds = np.arange(0, 4, 0.001)
169 | #     val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2,
170 | #                                       np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds)
171 | #     return tpr, fpr, accuracy, best_thresholds, val, val_std, far
172 |     return tpr, fpr, accuracy, best_thresholds
173 | 


--------------------------------------------------------------------------------
/align/matlab_cp2tform.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy.linalg import inv, norm, lstsq
  3 | from numpy.linalg import matrix_rank as rank
  4 | 
  5 | 
  6 | class MatlabCp2tormException(Exception):
  7 |     def __str__(self):
  8 |         return "In File {}:{}".format(
  9 |                 __file__, super.__str__(self))
 10 | 
 11 | def tformfwd(trans, uv):
 12 |     """
 13 |     Function:
 14 |     ----------
 15 |         apply affine transform 'trans' to uv
 16 | 
 17 |     Parameters:
 18 |     ----------
 19 |         @trans: 3x3 np.array
 20 |             transform matrix
 21 |         @uv: Kx2 np.array
 22 |             each row is a pair of coordinates (x, y)
 23 | 
 24 |     Returns:
 25 |     ----------
 26 |         @xy: Kx2 np.array
 27 |             each row is a pair of transformed coordinates (x, y)
 28 |     """
 29 |     uv = np.hstack((
 30 |         uv, np.ones((uv.shape[0], 1))
 31 |     ))
 32 |     xy = np.dot(uv, trans)
 33 |     xy = xy[:, 0:-1]
 34 |     return xy
 35 | 
 36 | 
 37 | def tforminv(trans, uv):
 38 |     """
 39 |     Function:
 40 |     ----------
 41 |         apply the inverse of affine transform 'trans' to uv
 42 | 
 43 |     Parameters:
 44 |     ----------
 45 |         @trans: 3x3 np.array
 46 |             transform matrix
 47 |         @uv: Kx2 np.array
 48 |             each row is a pair of coordinates (x, y)
 49 | 
 50 |     Returns:
 51 |     ----------
 52 |         @xy: Kx2 np.array
 53 |             each row is a pair of inverse-transformed coordinates (x, y)
 54 |     """
 55 |     Tinv = inv(trans)
 56 |     xy = tformfwd(Tinv, uv)
 57 |     return xy
 58 | 
 59 | 
 60 | def findNonreflectiveSimilarity(uv, xy, options=None):
 61 | 
 62 |     options = {'K': 2}
 63 | 
 64 |     K = options['K']
 65 |     M = xy.shape[0]
 66 |     x = xy[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
 67 |     y = xy[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
 68 |     # print('--->x, y:\n', x, y
 69 | 
 70 |     tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
 71 |     tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
 72 |     X = np.vstack((tmp1, tmp2))
 73 |     # print('--->X.shape: ', X.shape
 74 |     # print('X:\n', X
 75 | 
 76 |     u = uv[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
 77 |     v = uv[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
 78 |     U = np.vstack((u, v))
 79 |     # print('--->U.shape: ', U.shape
 80 |     # print('U:\n', U
 81 | 
 82 |     # We know that X * r = U
 83 |     if rank(X) >= 2 * K:
 84 |         r, _, _, _ = lstsq(X, U)
 85 |         r = np.squeeze(r)
 86 |     else:
 87 |         raise Exception("cp2tform: two Unique Points Req")
 88 | 
 89 |     # print('--->r:\n', r
 90 | 
 91 |     sc = r[0]
 92 |     ss = r[1]
 93 |     tx = r[2]
 94 |     ty = r[3]
 95 | 
 96 |     Tinv = np.array([
 97 |         [sc, -ss, 0],
 98 |         [ss,  sc, 0],
 99 |         [tx,  ty, 1]
100 |     ])
101 | 
102 |     # print('--->Tinv:\n', Tinv
103 | 
104 |     T = inv(Tinv)
105 |     # print('--->T:\n', T
106 | 
107 |     T[:, 2] = np.array([0, 0, 1])
108 | 
109 |     return T, Tinv
110 | 
111 | 
112 | def findSimilarity(uv, xy, options=None):
113 | 
114 |     options = {'K': 2}
115 | 
116 | #    uv = np.array(uv)
117 | #    xy = np.array(xy)
118 | 
119 |     # Solve for trans1
120 |     trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
121 | 
122 |     # Solve for trans2
123 | 
124 |     # manually reflect the xy data across the Y-axis
125 |     xyR = xy
126 |     xyR[:, 0] = -1 * xyR[:, 0]
127 | 
128 |     trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
129 | 
130 |     # manually reflect the tform to undo the reflection done on xyR
131 |     TreflectY = np.array([
132 |         [-1, 0, 0],
133 |         [0, 1, 0],
134 |         [0, 0, 1]
135 |     ])
136 | 
137 |     trans2 = np.dot(trans2r, TreflectY)
138 | 
139 |     # Figure out if trans1 or trans2 is better
140 |     xy1 = tformfwd(trans1, uv)
141 |     norm1 = norm(xy1 - xy)
142 | 
143 |     xy2 = tformfwd(trans2, uv)
144 |     norm2 = norm(xy2 - xy)
145 | 
146 |     if norm1 <= norm2:
147 |         return trans1, trans1_inv
148 |     else:
149 |         trans2_inv = inv(trans2)
150 |         return trans2, trans2_inv
151 | 
152 | 
153 | def get_similarity_transform(src_pts, dst_pts, reflective = True):
154 |     """
155 |     Function:
156 |     ----------
157 |         Find Similarity Transform Matrix 'trans':
158 |             u = src_pts[:, 0]
159 |             v = src_pts[:, 1]
160 |             x = dst_pts[:, 0]
161 |             y = dst_pts[:, 1]
162 |             [x, y, 1] = [u, v, 1] * trans
163 | 
164 |     Parameters:
165 |     ----------
166 |         @src_pts: Kx2 np.array
167 |             source points, each row is a pair of coordinates (x, y)
168 |         @dst_pts: Kx2 np.array
169 |             destination points, each row is a pair of transformed
170 |             coordinates (x, y)
171 |         @reflective: True or False
172 |             if True:
173 |                 use reflective similarity transform
174 |             else:
175 |                 use non-reflective similarity transform
176 | 
177 |     Returns:
178 |     ----------
179 |        @trans: 3x3 np.array
180 |             transform matrix from uv to xy
181 |         trans_inv: 3x3 np.array
182 |             inverse of trans, transform matrix from xy to uv
183 |     """
184 | 
185 |     if reflective:
186 |         trans, trans_inv = findSimilarity(src_pts, dst_pts)
187 |     else:
188 |         trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
189 | 
190 |     return trans, trans_inv
191 | 
192 | 
193 | def cvt_tform_mat_for_cv2(trans):
194 |     """
195 |     Function:
196 |     ----------
197 |         Convert Transform Matrix 'trans' into 'cv2_trans' which could be
198 |         directly used by cv2.warpAffine():
199 |             u = src_pts[:, 0]
200 |             v = src_pts[:, 1]
201 |             x = dst_pts[:, 0]
202 |             y = dst_pts[:, 1]
203 |             [x, y].T = cv_trans * [u, v, 1].T
204 | 
205 |     Parameters:
206 |     ----------
207 |         @trans: 3x3 np.array
208 |             transform matrix from uv to xy
209 | 
210 |     Returns:
211 |     ----------
212 |         @cv2_trans: 2x3 np.array
213 |             transform matrix from src_pts to dst_pts, could be directly used
214 |             for cv2.warpAffine()
215 |     """
216 |     cv2_trans = trans[:, 0:2].T
217 | 
218 |     return cv2_trans
219 | 
220 | 
221 | def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective = True):
222 |     """
223 |     Function:
224 |     ----------
225 |         Find Similarity Transform Matrix 'cv2_trans' which could be
226 |         directly used by cv2.warpAffine():
227 |             u = src_pts[:, 0]
228 |             v = src_pts[:, 1]
229 |             x = dst_pts[:, 0]
230 |             y = dst_pts[:, 1]
231 |             [x, y].T = cv_trans * [u, v, 1].T
232 | 
233 |     Parameters:
234 |     ----------
235 |         @src_pts: Kx2 np.array
236 |             source points, each row is a pair of coordinates (x, y)
237 |         @dst_pts: Kx2 np.array
238 |             destination points, each row is a pair of transformed
239 |             coordinates (x, y)
240 |         reflective: True or False
241 |             if True:
242 |                 use reflective similarity transform
243 |             else:
244 |                 use non-reflective similarity transform
245 | 
246 |     Returns:
247 |     ----------
248 |         @cv2_trans: 2x3 np.array
249 |             transform matrix from src_pts to dst_pts, could be directly used
250 |             for cv2.warpAffine()
251 |     """
252 |     trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
253 |     cv2_trans = cvt_tform_mat_for_cv2(trans)
254 | 
255 |     return cv2_trans
256 | 
257 | 
258 | if __name__ == '__main__':
259 |     """
260 |     u = [0, 6, -2]
261 |     v = [0, 3, 5]
262 |     x = [-1, 0, 4]
263 |     y = [-1, -10, 4]
264 | 
265 |     # In Matlab, run:
266 |     #
267 |     #   uv = [u'; v'];
268 |     #   xy = [x'; y'];
269 |     #   tform_sim=cp2tform(uv,xy,'similarity');
270 |     #
271 |     #   trans = tform_sim.tdata.T
272 |     #   ans =
273 |     #       -0.0764   -1.6190         0
274 |     #        1.6190   -0.0764         0
275 |     #       -3.2156    0.0290    1.0000
276 |     #   trans_inv = tform_sim.tdata.Tinv
277 |     #    ans =
278 |     #
279 |     #       -0.0291    0.6163         0
280 |     #       -0.6163   -0.0291         0
281 |     #       -0.0756    1.9826    1.0000
282 |     #    xy_m=tformfwd(tform_sim, u,v)
283 |     #
284 |     #    xy_m =
285 |     #
286 |     #       -3.2156    0.0290
287 |     #        1.1833   -9.9143
288 |     #        5.0323    2.8853
289 |     #    uv_m=tforminv(tform_sim, x,y)
290 |     #
291 |     #    uv_m =
292 |     #
293 |     #        0.5698    1.3953
294 |     #        6.0872    2.2733
295 |     #       -2.6570    4.3314
296 |     """
297 |     u = [0, 6, -2]
298 |     v = [0, 3, 5]
299 |     x = [-1, 0, 4]
300 |     y = [-1, -10, 4]
301 | 
302 |     uv = np.array((u, v)).T
303 |     xy = np.array((x, y)).T
304 | 
305 |     print("\n--->uv:")
306 |     print(uv)
307 |     print("\n--->xy:")
308 |     print(xy)
309 | 
310 |     trans, trans_inv = get_similarity_transform(uv, xy)
311 | 
312 |     print("\n--->trans matrix:")
313 |     print(trans)
314 | 
315 |     print("\n--->trans_inv matrix:")
316 |     print(trans_inv)
317 | 
318 |     print("\n---> apply transform to uv")
319 |     print("\nxy_m = uv_augmented * trans")
320 |     uv_aug = np.hstack((
321 |         uv, np.ones((uv.shape[0], 1))
322 |     ))
323 |     xy_m = np.dot(uv_aug, trans)
324 |     print(xy_m)
325 | 
326 |     print("\nxy_m = tformfwd(trans, uv)")
327 |     xy_m = tformfwd(trans, uv)
328 |     print(xy_m)
329 | 
330 |     print("\n---> apply inverse transform to xy")
331 |     print("\nuv_m = xy_augmented * trans_inv")
332 |     xy_aug = np.hstack((
333 |         xy, np.ones((xy.shape[0], 1))
334 |     ))
335 |     uv_m = np.dot(xy_aug, trans_inv)
336 |     print(uv_m)
337 | 
338 |     print("\nuv_m = tformfwd(trans_inv, xy)")
339 |     uv_m = tformfwd(trans_inv, xy)
340 |     print(uv_m)
341 | 
342 |     uv_m = tforminv(trans, xy)
343 |     print("\nuv_m = tforminv(trans, xy)")
344 |     print(uv_m)


--------------------------------------------------------------------------------
/backbone/model_irse.py:
--------------------------------------------------------------------------------
  1 | from math import lgamma
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, Dropout, MaxPool2d, \
  5 |     AdaptiveAvgPool2d, Sequential, Module
  6 | from collections import namedtuple
  7 | 
  8 | from torch.nn.modules.flatten import Flatten
  9 | 
 10 | 
 11 | # Support: ['IR_50', 'IR_101', 'IR_152', 'IR_SE_50', 'IR_SE_101', 'IR_SE_152', \
 12 | #           'IR_SE_64_DUL(for DUL)']
 13 | 
 14 | 
 15 | class DUL_Backbone(nn.Module):
 16 |     def __init__(self, resnet):
 17 |         super(DUL_Backbone, self).__init__()
 18 | 
 19 |         self.features = nn.Sequential(
 20 |             resnet.input_layer,
 21 |             resnet.body,
 22 |             Sequential(BatchNorm2d(512),
 23 |                         Dropout(),
 24 |                         Flatten(),
 25 |             )
 26 |         )
 27 |         self.mu_dul_backbone = nn.Sequential(
 28 |             Linear(512 * 7 * 7, 512),
 29 |             BatchNorm1d(512),
 30 |         )
 31 |         self.logvar_dul_backbone = nn.Sequential(
 32 |             Linear(512 * 7 * 7, 512),
 33 |             BatchNorm1d(512),
 34 |         )
 35 | 
 36 |     def forward(self, img):
 37 |         x = self.features(img)
 38 |         mu_dul = self.mu_dul_backbone(x)
 39 |         logvar_dul = self.logvar_dul_backbone(x)
 40 |         std_dul = (logvar_dul * 0.5).exp()
 41 |         # std_dul should be restricted between (0, 1) from the original paper definition. However, it doesn't say how to implement.
 42 |         # You could simply clamp it or use zoom, sigmoid, softplus, etc.
 43 |         std_dul = torch.clamp(std_dul, min=1e-8, max=1.0)
 44 |         return mu_dul, std_dul
 45 | 
 46 | 
 47 | 
 48 | class Flatten(Module):
 49 |     def forward(self, input):
 50 |         return input.view(input.size(0), -1)
 51 | 
 52 | 
 53 | def l2_norm(input, axis=1):
 54 |     norm = torch.norm(input, 2, axis, True)
 55 |     output = torch.div(input, norm)
 56 | 
 57 |     return output
 58 | 
 59 | 
 60 | class SEModule(Module):
 61 |     def __init__(self, channels, reduction):
 62 |         super(SEModule, self).__init__()
 63 |         self.avg_pool = AdaptiveAvgPool2d(1)
 64 |         self.fc1 = Conv2d(
 65 |             channels, channels // reduction, kernel_size=1, padding=0, bias=False)
 66 | 
 67 |         nn.init.xavier_uniform_(self.fc1.weight.data)
 68 | 
 69 |         self.relu = ReLU(inplace=True)
 70 |         self.fc2 = Conv2d(
 71 |             channels // reduction, channels, kernel_size=1, padding=0, bias=False)
 72 | 
 73 |         self.sigmoid = Sigmoid()
 74 | 
 75 |     def forward(self, x):
 76 |         module_input = x
 77 |         x = self.avg_pool(x)
 78 |         x = self.fc1(x)
 79 |         x = self.relu(x)
 80 |         x = self.fc2(x)
 81 |         x = self.sigmoid(x)
 82 | 
 83 |         return module_input * x
 84 | 
 85 | 
 86 | class bottleneck_IR(Module):
 87 |     def __init__(self, in_channel, depth, stride):
 88 |         super(bottleneck_IR, self).__init__()
 89 |         if in_channel == depth:
 90 |             self.shortcut_layer = MaxPool2d(1, stride)
 91 |         else:
 92 |             self.shortcut_layer = Sequential(
 93 |                 Conv2d(in_channel, depth, (1, 1), stride, bias=False), BatchNorm2d(depth))
 94 |         self.res_layer = Sequential(
 95 |             BatchNorm2d(in_channel),
 96 |             Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth),
 97 |             Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth))
 98 | 
 99 |     def forward(self, x):
100 |         shortcut = self.shortcut_layer(x)
101 |         res = self.res_layer(x)
102 | 
103 |         return res + shortcut
104 | 
105 | 
106 | class bottleneck_IR_SE(Module):
107 |     def __init__(self, in_channel, depth, stride):
108 |         super(bottleneck_IR_SE, self).__init__()
109 |         if in_channel == depth:
110 |             self.shortcut_layer = MaxPool2d(1, stride)
111 |         else:
112 |             self.shortcut_layer = Sequential(
113 |                 Conv2d(in_channel, depth, (1, 1), stride, bias=False),
114 |                 BatchNorm2d(depth))
115 |         self.res_layer = Sequential(
116 |             BatchNorm2d(in_channel),
117 |             Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
118 |             PReLU(depth),
119 |             Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
120 |             BatchNorm2d(depth),
121 |             SEModule(depth, 16)
122 |         )
123 | 
124 |     def forward(self, x):
125 |         shortcut = self.shortcut_layer(x)
126 |         res = self.res_layer(x)
127 | 
128 |         return res + shortcut
129 | 
130 | 
131 | class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
132 |     '''A named tuple describing a ResNet block.'''
133 | 
134 | 
135 | def get_block(in_channel, depth, num_units, stride=2):
136 | 
137 |     return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
138 | 
139 | 
140 | def get_blocks(num_layers):
141 |     if num_layers == 50:
142 |         blocks = [
143 |             get_block(in_channel=64, depth=64, num_units=3),
144 |             get_block(in_channel=64, depth=128, num_units=4),
145 |             get_block(in_channel=128, depth=256, num_units=14),
146 |             get_block(in_channel=256, depth=512, num_units=3)
147 |         ]
148 |     elif num_layers == 64:
149 |         blocks = [
150 |             get_block(in_channel=64, depth=64, num_units=3),
151 |             get_block(in_channel=64, depth=128, num_units=8),
152 |             get_block(in_channel=128, depth=256, num_units=16),
153 |             get_block(in_channel=256, depth=512, num_units=3)
154 |         ]
155 |     elif num_layers == 100:
156 |         blocks = [
157 |             get_block(in_channel=64, depth=64, num_units=3),
158 |             get_block(in_channel=64, depth=128, num_units=13),
159 |             get_block(in_channel=128, depth=256, num_units=30),
160 |             get_block(in_channel=256, depth=512, num_units=3)
161 |         ]
162 |     elif num_layers == 152:
163 |         blocks = [
164 |             get_block(in_channel=64, depth=64, num_units=3),
165 |             get_block(in_channel=64, depth=128, num_units=8),
166 |             get_block(in_channel=128, depth=256, num_units=36),
167 |             get_block(in_channel=256, depth=512, num_units=3)
168 |         ]
169 | 
170 |     return blocks
171 | 
172 | 
173 | class Backbone(Module):
174 |     def __init__(self, input_size, num_layers, mode='ir'):
175 |         super(Backbone, self).__init__()
176 |         assert input_size[0] in [112, 224], "input_size should be [112, 112] or [224, 224]"
177 |         assert num_layers in [50, 64, 100, 152], "num_layers should be 50, 64, 100 or 152"
178 |         assert mode in ['ir', 'ir_se'], "mode should be ir or ir_se"
179 |         blocks = get_blocks(num_layers)
180 |         if mode == 'ir':
181 |             unit_module = bottleneck_IR
182 |         elif mode == 'ir_se':
183 |             unit_module = bottleneck_IR_SE
184 |         self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
185 |                                       BatchNorm2d(64),
186 |                                       PReLU(64))
187 |         if input_size[0] == 112:
188 |             self.output_layer = Sequential(BatchNorm2d(512),
189 |                                            Dropout(),
190 |                                            Flatten(),
191 |                                            Linear(512 * 7 * 7, 512),
192 |                                            BatchNorm1d(512))
193 |         else:
194 |             self.output_layer = Sequential(BatchNorm2d(512),
195 |                                            Dropout(),
196 |                                            Flatten(),
197 |                                            Linear(512 * 14 * 14, 512),
198 |                                            BatchNorm1d(512))
199 | 
200 |         modules = []
201 |         for block in blocks:
202 |             for bottleneck in block:
203 |                 modules.append(
204 |                     unit_module(bottleneck.in_channel,
205 |                                 bottleneck.depth,
206 |                                 bottleneck.stride))
207 |         self.body = Sequential(*modules)
208 | 
209 |         self._initialize_weights()
210 | 
211 |     def forward(self, x):
212 |         x = self.input_layer(x)
213 |         x = self.body(x)
214 |         x = self.output_layer(x)
215 | 
216 |         return x
217 | 
218 |     def _initialize_weights(self):
219 |         for m in self.modules():
220 |             if isinstance(m, nn.Conv2d):
221 |                 nn.init.xavier_uniform_(m.weight.data)
222 |                 if m.bias is not None:
223 |                     m.bias.data.zero_()
224 |             elif isinstance(m, nn.BatchNorm2d):
225 |                 m.weight.data.fill_(1)
226 |                 m.bias.data.zero_()
227 |             elif isinstance(m, nn.BatchNorm1d):
228 |                 m.weight.data.fill_(1)
229 |                 m.bias.data.zero_()
230 |             elif isinstance(m, nn.Linear):
231 |                 nn.init.xavier_uniform_(m.weight.data)
232 |                 if m.bias is not None:
233 |                     m.bias.data.zero_()
234 | 
235 | 
236 | def IR_50(input_size):
237 |     """Constructs a ir-50 model.
238 |     """
239 |     model = Backbone(input_size, 50, 'ir')
240 | 
241 |     return model
242 | 
243 | 
244 | def IR_101(input_size):
245 |     """Constructs a ir-101 model.
246 |     """
247 |     model = Backbone(input_size, 100, 'ir')
248 | 
249 |     return model
250 | 
251 | 
252 | def IR_152(input_size):
253 |     """Constructs a ir-152 model.
254 |     """
255 |     model = Backbone(input_size, 152, 'ir')
256 | 
257 |     return model
258 | 
259 | 
260 | def IR_SE_50(input_size):
261 |     """Constructs a ir_se-50 model.
262 |     """
263 |     model = Backbone(input_size, 50, 'ir_se')
264 | 
265 |     return model
266 | 
267 | 
268 | def IR_SE_64_DUL(input_size):
269 |     """Construct an ir_se_64_dul model for DUL. --> namely, base on resnet_se_64
270 |     """
271 |     model = Backbone(input_size, 64, mode='ir_se')
272 |     model_dul = DUL_Backbone(model)
273 | 
274 |     return model_dul
275 | 
276 | 
277 | def IR_SE_101(input_size):
278 |     """Constructs a ir_se-101 model.
279 |     """
280 |     model = Backbone(input_size, 100, 'ir_se')
281 | 
282 |     return model
283 | 
284 | 
285 | def IR_SE_152(input_size):
286 |     """Constructs a ir_se-152 model.
287 |     """
288 |     model = Backbone(input_size, 152, 'ir_se')
289 | 
290 |     return model
291 | 


--------------------------------------------------------------------------------
/align/align_trans.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | from matlab_cp2tform import get_similarity_transform_for_cv2
  4 | 
  5 | 
  6 | # reference facial points, a list of coordinates (x,y)
  7 | REFERENCE_FACIAL_POINTS = [        # default reference facial points for crop_size = (112, 112); should adjust REFERENCE_FACIAL_POINTS accordingly for other crop_size
  8 |     [30.29459953,  51.69630051], 
  9 |     [65.53179932,  51.50139999],
 10 |     [48.02519989,  71.73660278],
 11 |     [33.54930115,  92.3655014],
 12 |     [62.72990036,  92.20410156]
 13 | ]
 14 | 
 15 | DEFAULT_CROP_SIZE = (96, 112)
 16 | 
 17 | 
 18 | class FaceWarpException(Exception):
 19 |     def __str__(self):
 20 |         return 'In File {}:{}'.format(
 21 |             __file__, super.__str__(self))
 22 | 
 23 | 
 24 | def get_reference_facial_points(output_size = None,
 25 |                                 inner_padding_factor = 0.0,
 26 |                                 outer_padding=(0, 0),
 27 |                                 default_square = False):
 28 |     """
 29 |     Function:
 30 |     ----------
 31 |         get reference 5 key points according to crop settings:
 32 |         0. Set default crop_size:
 33 |             if default_square: 
 34 |                 crop_size = (112, 112)
 35 |             else: 
 36 |                 crop_size = (96, 112)
 37 |         1. Pad the crop_size by inner_padding_factor in each side;
 38 |         2. Resize crop_size into (output_size - outer_padding*2),
 39 |             pad into output_size with outer_padding;
 40 |         3. Output reference_5point;
 41 |     Parameters:
 42 |     ----------
 43 |         @output_size: (w, h) or None
 44 |             size of aligned face image
 45 |         @inner_padding_factor: (w_factor, h_factor)
 46 |             padding factor for inner (w, h)
 47 |         @outer_padding: (w_pad, h_pad)
 48 |             each row is a pair of coordinates (x, y)
 49 |         @default_square: True or False
 50 |             if True:
 51 |                 default crop_size = (112, 112)
 52 |             else:
 53 |                 default crop_size = (96, 112);
 54 |         !!! make sure, if output_size is not None:
 55 |                 (output_size - outer_padding) 
 56 |                 = some_scale * (default crop_size * (1.0 + inner_padding_factor))
 57 |     Returns:
 58 |     ----------
 59 |         @reference_5point: 5x2 np.array
 60 |             each row is a pair of transformed coordinates (x, y)
 61 |     """
 62 |     #print('\n===> get_reference_facial_points():')
 63 | 
 64 |     #print('---> Params:')
 65 |     #print('            output_size: ', output_size)
 66 |     #print('            inner_padding_factor: ', inner_padding_factor)
 67 |     #print('            outer_padding:', outer_padding)
 68 |     #print('            default_square: ', default_square)
 69 | 
 70 |     tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
 71 |     tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
 72 | 
 73 |     # 0) make the inner region a square
 74 |     if default_square:
 75 |         size_diff = max(tmp_crop_size) - tmp_crop_size
 76 |         tmp_5pts += size_diff / 2
 77 |         tmp_crop_size += size_diff
 78 | 
 79 |     #print('---> default:')
 80 |     #print('              crop_size = ', tmp_crop_size)
 81 |     #print('              reference_5pts = ', tmp_5pts)
 82 | 
 83 |     if (output_size and
 84 |             output_size[0] == tmp_crop_size[0] and
 85 |             output_size[1] == tmp_crop_size[1]):
 86 |         #print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size))
 87 |         return tmp_5pts
 88 | 
 89 |     if (inner_padding_factor == 0 and
 90 |             outer_padding == (0, 0)):
 91 |         if output_size is None:
 92 |             #print('No paddings to do: return default reference points')
 93 |             return tmp_5pts
 94 |         else:
 95 |             raise FaceWarpException(
 96 |                 'No paddings to do, output_size must be None or {}'.format(tmp_crop_size))
 97 | 
 98 |     # check output size
 99 |     if not (0 <= inner_padding_factor <= 1.0):
100 |         raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
101 | 
102 |     if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0)
103 |             and output_size is None):
104 |         output_size = tmp_crop_size * \
105 |             (1 + inner_padding_factor * 2).astype(np.int32)
106 |         output_size += np.array(outer_padding)
107 |         #print('              deduced from paddings, output_size = ', output_size)
108 | 
109 |     if not (outer_padding[0] < output_size[0]
110 |             and outer_padding[1] < output_size[1]):
111 |         raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
112 |                                 'and outer_padding[1] < output_size[1])')
113 | 
114 |     # 1) pad the inner region according inner_padding_factor
115 |     #print('---> STEP1: pad the inner region according inner_padding_factor')
116 |     if inner_padding_factor > 0:
117 |         size_diff = tmp_crop_size * inner_padding_factor * 2
118 |         tmp_5pts += size_diff / 2
119 |         tmp_crop_size += np.round(size_diff).astype(np.int32)
120 | 
121 |     #print('              crop_size = ', tmp_crop_size)
122 |     #print('              reference_5pts = ', tmp_5pts)
123 | 
124 |     # 2) resize the padded inner region
125 |     #print('---> STEP2: resize the padded inner region')
126 |     size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
127 |     #print('              crop_size = ', tmp_crop_size)
128 |     #print('              size_bf_outer_pad = ', size_bf_outer_pad)
129 | 
130 |     if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
131 |         raise FaceWarpException('Must have (output_size - outer_padding)'
132 |                                 '= some_scale * (crop_size * (1.0 + inner_padding_factor)')
133 | 
134 |     scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
135 |     #print('              resize scale_factor = ', scale_factor)
136 |     tmp_5pts = tmp_5pts * scale_factor
137 | #    size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
138 | #    tmp_5pts = tmp_5pts + size_diff / 2
139 |     tmp_crop_size = size_bf_outer_pad
140 |     #print('              crop_size = ', tmp_crop_size)
141 |     #print('              reference_5pts = ', tmp_5pts)
142 | 
143 |     # 3) add outer_padding to make output_size
144 |     reference_5point = tmp_5pts + np.array(outer_padding)
145 |     tmp_crop_size = output_size
146 |     #print('---> STEP3: add outer_padding to make output_size')
147 |     #print('              crop_size = ', tmp_crop_size)
148 |     #print('              reference_5pts = ', tmp_5pts)
149 | 
150 |     #print('===> end get_reference_facial_points\n')
151 | 
152 |     return reference_5point
153 | 
154 | 
155 | def get_affine_transform_matrix(src_pts, dst_pts):
156 |     """
157 |     Function:
158 |     ----------
159 |         get affine transform matrix 'tfm' from src_pts to dst_pts
160 |     Parameters:
161 |     ----------
162 |         @src_pts: Kx2 np.array
163 |             source points matrix, each row is a pair of coordinates (x, y)
164 |         @dst_pts: Kx2 np.array
165 |             destination points matrix, each row is a pair of coordinates (x, y)
166 |     Returns:
167 |     ----------
168 |         @tfm: 2x3 np.array
169 |             transform matrix from src_pts to dst_pts
170 |     """
171 | 
172 |     tfm = np.float32([[1, 0, 0], [0, 1, 0]])
173 |     n_pts = src_pts.shape[0]
174 |     ones = np.ones((n_pts, 1), src_pts.dtype)
175 |     src_pts_ = np.hstack([src_pts, ones])
176 |     dst_pts_ = np.hstack([dst_pts, ones])
177 | 
178 | #    #print(('src_pts_:\n' + str(src_pts_))
179 | #    #print(('dst_pts_:\n' + str(dst_pts_))
180 | 
181 |     A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
182 | 
183 | #    #print(('np.linalg.lstsq return A: \n' + str(A))
184 | #    #print(('np.linalg.lstsq return res: \n' + str(res))
185 | #    #print(('np.linalg.lstsq return rank: \n' + str(rank))
186 | #    #print(('np.linalg.lstsq return s: \n' + str(s))
187 | 
188 |     if rank == 3:
189 |         tfm = np.float32([
190 |             [A[0, 0], A[1, 0], A[2, 0]],
191 |             [A[0, 1], A[1, 1], A[2, 1]]
192 |         ])
193 |     elif rank == 2:
194 |         tfm = np.float32([
195 |             [A[0, 0], A[1, 0], 0],
196 |             [A[0, 1], A[1, 1], 0]
197 |         ])
198 | 
199 |     return tfm
200 | 
201 | 
202 | def warp_and_crop_face(src_img,
203 |                        facial_pts,
204 |                        reference_pts = None,
205 |                        crop_size=(96, 112),
206 |                        align_type = 'smilarity'):
207 |     """
208 |     Function:
209 |     ----------
210 |         apply affine transform 'trans' to uv
211 |     Parameters:
212 |     ----------
213 |         @src_img: 3x3 np.array
214 |             input image
215 |         @facial_pts: could be
216 |             1)a list of K coordinates (x,y)
217 |         or
218 |             2) Kx2 or 2xK np.array
219 |             each row or col is a pair of coordinates (x, y)
220 |         @reference_pts: could be
221 |             1) a list of K coordinates (x,y)
222 |         or
223 |             2) Kx2 or 2xK np.array
224 |             each row or col is a pair of coordinates (x, y)
225 |         or
226 |             3) None
227 |             if None, use default reference facial points
228 |         @crop_size: (w, h)
229 |             output face image size
230 |         @align_type: transform type, could be one of
231 |             1) 'similarity': use similarity transform
232 |             2) 'cv2_affine': use the first 3 points to do affine transform,
233 |                     by calling cv2.getAffineTransform()
234 |             3) 'affine': use all points to do affine transform
235 |     Returns:
236 |     ----------
237 |         @face_img: output face image with size (w, h) = @crop_size
238 |     """
239 | 
240 |     if reference_pts is None:
241 |         if crop_size[0] == 96 and crop_size[1] == 112:
242 |             reference_pts = REFERENCE_FACIAL_POINTS
243 |         else:
244 |             default_square = False
245 |             inner_padding_factor = 0
246 |             outer_padding = (0, 0)
247 |             output_size = crop_size
248 | 
249 |             reference_pts = get_reference_facial_points(output_size,
250 |                                                         inner_padding_factor,
251 |                                                         outer_padding,
252 |                                                         default_square)
253 | 
254 |     ref_pts = np.float32(reference_pts)
255 |     ref_pts_shp = ref_pts.shape
256 |     if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
257 |         raise FaceWarpException(
258 |             'reference_pts.shape must be (K,2) or (2,K) and K>2')
259 | 
260 |     if ref_pts_shp[0] == 2:
261 |         ref_pts = ref_pts.T
262 | 
263 |     src_pts = np.float32(facial_pts)
264 |     src_pts_shp = src_pts.shape
265 |     if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
266 |         raise FaceWarpException(
267 |             'facial_pts.shape must be (K,2) or (2,K) and K>2')
268 | 
269 |     if src_pts_shp[0] == 2:
270 |         src_pts = src_pts.T
271 | 
272 | #    #print('--->src_pts:\n', src_pts
273 | #    #print('--->ref_pts\n', ref_pts
274 | 
275 |     if src_pts.shape != ref_pts.shape:
276 |         raise FaceWarpException(
277 |             'facial_pts and reference_pts must have the same shape')
278 | 
279 |     if align_type is 'cv2_affine':
280 |         tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
281 | #        #print(('cv2.getAffineTransform() returns tfm=\n' + str(tfm))
282 |     elif align_type is 'affine':
283 |         tfm = get_affine_transform_matrix(src_pts, ref_pts)
284 | #        #print(('get_affine_transform_matrix() returns tfm=\n' + str(tfm))
285 |     else:
286 |         tfm = get_similarity_transform_for_cv2(src_pts, ref_pts)
287 | #        #print(('get_similarity_transform_for_cv2() returns tfm=\n' + str(tfm))
288 | 
289 | #    #print('--->Transform matrix: '
290 | #    #print(('type(tfm):' + str(type(tfm)))
291 | #    #print(('tfm.dtype:' + str(tfm.dtype))
292 | #    #print( tfm
293 | 
294 |     face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))
295 | 
296 |     return face_img


--------------------------------------------------------------------------------
/util/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch._C import device
  3 | import torchvision.transforms as transforms
  4 | import torch.nn.functional as F
  5 | 
  6 | from .verification import evaluate
  7 | 
  8 | from datetime import datetime
  9 | import matplotlib.pyplot as plt
 10 | plt.switch_backend('agg')
 11 | import numpy as np
 12 | from PIL import Image
 13 | import bcolz
 14 | import io
 15 | import os
 16 | import random
 17 | import cv2
 18 | 
 19 | 
 20 | # Support: ['get_time', 'l2_norm', 'make_weights_for_balanced_classes', 'get_val_pair', 'get_val_data', \
 21 | #           'separate_irse_bn_paras', 'separate_resnet_bn_paras', 'warm_up_lr', 'schedule_lr', 'de_preprocess', \
 22 | #           'hflip_batch', 'ccrop_batch', 'gen_plot', 'perform_val', 'buffer_val', 'AverageMeter', 'accuracy', \
 23 | #           'add_gaussian_noise', 'get_data_pair', 'perform_face_recog']
 24 | 
 25 | 
 26 | def get_time():
 27 |     return (str(datetime.now())[:-10]).replace(' ', '-').replace(':', '-')
 28 | 
 29 | 
 30 | def l2_norm(input, axis = 1):
 31 |     norm = torch.norm(input, 2, axis, True)
 32 |     output = torch.div(input, norm)
 33 | 
 34 |     return output
 35 | 
 36 | 
 37 | def make_weights_for_balanced_classes(images, nclasses):
 38 |     '''
 39 |         Make a vector of weights for each image in the dataset, based
 40 |         on class frequency. The returned vector of weights can be used
 41 |         to create a WeightedRandomSampler for a DataLoader to have
 42 |         class balancing when sampling for a training batch.
 43 |             images - torchvisionDataset.imgs
 44 |             nclasses - len(torchvisionDataset.classes)
 45 |         https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3
 46 |     '''
 47 |     count = [0] * nclasses
 48 |     for item in images:
 49 |         count[item[1]] += 1  # item is (img-data, label-id)
 50 |     weight_per_class = [0.] * nclasses
 51 |     N = float(sum(count))  # total number of images
 52 |     for i in range(nclasses):
 53 |         weight_per_class[i] = N / float(count[i])
 54 |     weight = [0] * len(images)
 55 |     for idx, val in enumerate(images):
 56 |         weight[idx] = weight_per_class[val[1]]
 57 | 
 58 |     return weight
 59 | 
 60 | 
 61 | def get_val_pair(path, name):
 62 |     carray = bcolz.carray(rootdir = os.path.join(path, name), mode = 'r')
 63 |     issame = np.load('{}/{}_list.npy'.format(path, name))
 64 | 
 65 |     return carray, issame
 66 | 
 67 | 
 68 | def get_val_data(data_path):
 69 |     lfw, lfw_issame = get_val_pair(data_path, 'lfw')
 70 |     cfp_ff, cfp_ff_issame = get_val_pair(data_path, 'cfp_ff')
 71 |     cfp_fp, cfp_fp_issame = get_val_pair(data_path, 'cfp_fp')
 72 |     agedb_30, agedb_30_issame = get_val_pair(data_path, 'agedb_30')
 73 |     calfw, calfw_issame = get_val_pair(data_path, 'calfw')
 74 |     cplfw, cplfw_issame = get_val_pair(data_path, 'cplfw')
 75 |     vgg2_fp, vgg2_fp_issame = get_val_pair(data_path, 'vgg2_fp')
 76 | 
 77 |     return lfw, cfp_ff, cfp_fp, agedb_30, calfw, cplfw, vgg2_fp, lfw_issame, cfp_ff_issame, cfp_fp_issame, agedb_30_issame, calfw_issame, cplfw_issame, vgg2_fp_issame
 78 | 
 79 | 
 80 | def separate_irse_bn_paras(modules):
 81 |     if not isinstance(modules, list):
 82 |         modules = [*modules.modules()]
 83 |     paras_only_bn = []
 84 |     paras_wo_bn = []
 85 |     for layer in modules:
 86 |         if 'model' in str(layer.__class__):
 87 |             continue
 88 |         if 'container' in str(layer.__class__):
 89 |             continue
 90 |         else:
 91 |             if 'batchnorm' in str(layer.__class__):
 92 |                 paras_only_bn.extend([*layer.parameters()])
 93 |             else:
 94 |                 paras_wo_bn.extend([*layer.parameters()])
 95 | 
 96 |     return paras_only_bn, paras_wo_bn
 97 | 
 98 | 
 99 | def separate_resnet_bn_paras(modules):
100 |     all_parameters = modules.parameters()
101 |     paras_only_bn = []
102 | 
103 |     for pname, p in modules.named_parameters():
104 |         if pname.find('bn') >= 0:
105 |             paras_only_bn.append(p)
106 |             
107 |     paras_only_bn_id = list(map(id, paras_only_bn))
108 |     paras_wo_bn = list(filter(lambda p: id(p) not in paras_only_bn_id, all_parameters))
109 |     
110 |     return paras_only_bn, paras_wo_bn
111 | 
112 | 
113 | def warm_up_lr(batch, num_batch_warm_up, init_lr, optimizer):
114 |     for params in optimizer.param_groups:
115 |         params['lr'] = batch * init_lr / num_batch_warm_up
116 | 
117 |     # print(optimizer)
118 | 
119 | 
120 | def schedule_lr(optimizer):
121 |     for params in optimizer.param_groups:
122 |         params['lr'] /= 10.
123 | 
124 |     print(optimizer)
125 | 
126 | 
127 | def de_preprocess(tensor):
128 | 
129 |     return tensor * 0.5 + 0.5
130 | 
131 | 
132 | hflip = transforms.Compose([
133 |             de_preprocess,
134 |             transforms.ToPILImage(),
135 |             transforms.functional.hflip,
136 |             transforms.ToTensor(),
137 |             transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
138 |         ])
139 | 
140 | 
141 | def hflip_batch(imgs_tensor):
142 |     hfliped_imgs = torch.empty_like(imgs_tensor)
143 |     for i, img_ten in enumerate(imgs_tensor):
144 |         hfliped_imgs[i] = hflip(img_ten)
145 | 
146 |     return hfliped_imgs
147 | 
148 | 
149 | ccrop = transforms.Compose([
150 |             de_preprocess,
151 |             transforms.ToPILImage(),
152 |             transforms.Resize([128, 128]),  # smaller side resized
153 |             transforms.CenterCrop([112, 112]),
154 |             transforms.ToTensor(),
155 |             transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
156 |         ])
157 | 
158 | 
159 | def ccrop_batch(imgs_tensor):
160 |     ccropped_imgs = torch.empty_like(imgs_tensor)
161 |     for i, img_ten in enumerate(imgs_tensor):
162 |         ccropped_imgs[i] = ccrop(img_ten)
163 | 
164 |     return ccropped_imgs
165 | 
166 | 
167 | def gen_plot(fpr, tpr):
168 |     """Create a pyplot plot and save to buffer."""
169 |     plt.figure()
170 |     plt.xlabel("FPR", fontsize = 14)
171 |     plt.ylabel("TPR", fontsize = 14)
172 |     plt.title("ROC Curve", fontsize = 14)
173 |     plot = plt.plot(fpr, tpr, linewidth = 2)
174 |     buf = io.BytesIO()
175 |     plt.savefig(buf, format = 'jpeg')
176 |     buf.seek(0)
177 |     plt.close()
178 | 
179 |     return buf
180 | 
181 | 
182 | def perform_val(multi_gpu, device, embedding_size, batch_size, backbone, carray, issame, nrof_folds = 10, tta = True):
183 |     if multi_gpu:
184 |         backbone = backbone.module # unpackage model from DataParallel
185 |         backbone = backbone.to(device)
186 |     else:
187 |         backbone = backbone.to(device)
188 |     backbone.eval() # switch to evaluation mode
189 | 
190 |     idx = 0
191 |     embeddings = np.zeros([len(carray), embedding_size])
192 |     with torch.no_grad():
193 |         while idx + batch_size <= len(carray):
194 |             batch = torch.tensor(carray[idx:idx + batch_size][:, [2, 1, 0], :, :])
195 |             if tta:
196 |                 ccropped = ccrop_batch(batch)
197 |                 fliped = hflip_batch(ccropped)
198 |                 emb_batch = backbone(ccropped.to(device)).cpu() + backbone(fliped.to(device)).cpu()
199 |                 embeddings[idx:idx + batch_size] = l2_norm(emb_batch)
200 |             else:
201 |                 ccropped = ccrop_batch(batch)
202 |                 embeddings[idx:idx + batch_size] = l2_norm(backbone(ccropped.to(device))).cpu()
203 |             idx += batch_size
204 |         if idx < len(carray):
205 |             batch = torch.tensor(carray[idx:])
206 |             if tta:
207 |                 ccropped = ccrop_batch(batch)
208 |                 fliped = hflip_batch(ccropped)
209 |                 emb_batch = backbone(ccropped.to(device)).cpu() + backbone(fliped.to(device)).cpu()
210 |                 embeddings[idx:] = l2_norm(emb_batch)
211 |             else:
212 |                 ccropped = ccrop_batch(batch)
213 |                 embeddings[idx:] = l2_norm(backbone(ccropped.to(device))).cpu()
214 | 
215 |     tpr, fpr, accuracy, best_thresholds = evaluate(embeddings, issame, nrof_folds)
216 |     buf = gen_plot(fpr, tpr)
217 |     roc_curve = Image.open(buf)
218 |     roc_curve_tensor = transforms.ToTensor()(roc_curve)
219 | 
220 |     return accuracy.mean(), best_thresholds.mean(), roc_curve_tensor
221 | 
222 | 
223 | def buffer_val(writer, db_name, acc, best_threshold, roc_curve_tensor, epoch):
224 |     writer.add_scalar('{}_Accuracy'.format(db_name), acc, epoch)
225 |     writer.add_scalar('{}_Best_Threshold'.format(db_name), best_threshold, epoch)
226 |     writer.add_image('{}_ROC_Curve'.format(db_name), roc_curve_tensor, epoch)
227 | 
228 | 
229 | class AverageMeter(object):
230 |     """Computes and stores the average and current value"""
231 |     def __init__(self):
232 |         self.reset()
233 | 
234 |     def reset(self):
235 |         self.val   = 0
236 |         self.avg   = 0
237 |         self.sum   = 0
238 |         self.count = 0
239 | 
240 |     def update(self, val, n = 1):
241 |         self.val   = val
242 |         self.sum   += val * n
243 |         self.count += n
244 |         self.avg   = self.sum / self.count
245 | 
246 | 
247 | def accuracy(output, target, topk=(1,)):
248 |     """Computes the precision@k for the specified values of k"""
249 |     maxk = max(topk)
250 |     batch_size = target.size(0)
251 | 
252 |     _, pred = output.topk(maxk, 1, True, True)
253 |     pred    = pred.t()
254 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
255 | 
256 |     res = []
257 |     for k in topk:
258 |         correct_k = correct[:k].reshape(-1).float().sum(0)
259 |         res.append(correct_k.mul_(100.0 / batch_size))
260 | 
261 |     return res
262 | 
263 | # ----- self definition
264 | 
265 | class add_gaussian_noise(object):
266 |     def __init__(self, mean=0.0, var=30, p=0.0):
267 |         self.mean = mean
268 |         self.var = var
269 |         self.p = p
270 |     def __call__(self, img):
271 |         if random.uniform(0, 1) < self.p:
272 |             std = self.var**0.5
273 |             image_array = np.array(img)
274 |             noisy_img = image_array + np.random.normal(self.mean, std, image_array.shape)
275 |             noisy_img_clipped = np.clip(noisy_img, 0, 255).astype(np.uint8)
276 |             return Image.fromarray(noisy_img_clipped)
277 |         else:
278 |             return img
279 | 
280 | 
281 | def get_data_pair(path, name):
282 |     carray = bcolz.carray(rootdir = os.path.join(path, name), mode = 'r')
283 |     issame = np.load('{}/{}_list.npy'.format(path, name))
284 | 
285 |     return carray, issame
286 | 
287 | 
288 | def perform_face_recog(multi_gpu, embedding_size, batch_size, backbone, carray, issame, nrof_folds = 10, tta = True, outfolder=''):
289 |     if outfolder:
290 |         os.makedirs(outfolder, exist_ok=True)
291 |     if multi_gpu:
292 |         backbone = backbone.module # unpackage model from DataParallel
293 |         backbone = backbone.cuda()
294 |     else:
295 |         backbone = backbone.cuda()
296 |     backbone.eval() # switch to evaluation mode
297 |     idx = 0
298 |     embeddings = np.zeros([len(carray), embedding_size])
299 |     with torch.no_grad():
300 |         while idx + batch_size <= len(carray):
301 |             batch = torch.tensor(carray[idx:idx + batch_size][:, [2, 1, 0], :, :])
302 |             if tta:
303 |                 ccropped = ccrop_batch(batch)
304 |                 fliped = hflip_batch(ccropped)
305 |                 # emb_batch = backbone(ccropped.cuda()).cpu() + backbone(fliped.cuda()).cpu()
306 |                 # backbone return image_features and logvar, use image_features only here; besides, convert to cpu-tensor, then to numpy
307 |                 emb_batch = backbone(ccropped.cuda())[0].cpu() + backbone(fliped.cuda())[0].cpu() 
308 |                 embeddings[idx:idx + batch_size] = l2_norm(emb_batch)
309 |             else:
310 |                 ccropped = ccrop_batch(batch)
311 |                 # embeddings[idx:idx + batch_size] = l2_norm(backbone(ccropped.cuda())).cpu()
312 |                 embeddings[idx:idx + batch_size] = l2_norm(backbone(ccropped.cuda())[0].cpu())
313 |             idx += batch_size
314 |             # print('idx: %d'%idx)
315 |         if idx < len(carray):
316 |             batch = torch.tensor(carray[idx:])
317 |             if tta:
318 |                 ccropped = ccrop_batch(batch)
319 |                 fliped = hflip_batch(ccropped)
320 |                 # emb_batch = backbone(ccropped.cuda()).cpu() + backbone(fliped.cuda()).cpu()
321 |                 emb_batch = backbone(ccropped.cuda())[0].cpu() + backbone(fliped.cuda())[0].cpu()
322 |                 embeddings[idx:] = l2_norm(emb_batch)
323 |             else:
324 |                 ccropped = ccrop_batch(batch)
325 |                 # embeddings[idx:] = l2_norm(backbone(ccropped.cuda())).cpu()
326 |                 embeddings[idx:] = l2_norm(backbone(ccropped.cuda())[0].cpu())
327 | 
328 |     tpr, fpr, accuracy, best_thresholds = evaluate(embeddings, issame, nrof_folds)
329 |     buf = gen_plot(fpr, tpr)
330 |     roc_curve = Image.open(buf)
331 |     roc_curve_tensor = transforms.ToTensor()(roc_curve)
332 | 
333 |     return accuracy.mean(), best_thresholds.mean(), roc_curve_tensor
334 | 


--------------------------------------------------------------------------------
/train_dul.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | import torch.nn.functional as F
  5 | from torch.optim.optimizer import Optimizer
  6 | import torchvision.transforms as transforms
  7 | import torchvision.datasets as datasets
  8 | 
  9 | from config import Backbone_Dict, dul_args_func
 10 | from head.metrics import ArcFace, CosFace, SphereFace, Am_softmax, Softmax
 11 | from loss.focal import FocalLoss
 12 | from util.utils import make_weights_for_balanced_classes, separate_irse_bn_paras, \
 13 |                        warm_up_lr, schedule_lr, get_time, AverageMeter, accuracy, add_gaussian_noise
 14 | 
 15 | from tensorboardX import SummaryWriter, writer
 16 | import os
 17 | import time
 18 | import numpy as np
 19 | from PIL import Image
 20 | import random
 21 | 
 22 | 
 23 | class DUL_Trainer():
 24 |     def __init__(self, dul_args):
 25 |         self.dul_args = dul_args
 26 |         self.dul_args.gpu_id = [int(item) for item in self.dul_args.gpu_id]
 27 |         self.dul_args.stages = [int(item) for item in self.dul_args.stages]
 28 | 
 29 |     def _report_configurations(self):
 30 |         print('=' * 60)
 31 |         print('Experiment time: ', get_time())
 32 |         print('=' * 60)
 33 |         print('Overall Configurations:')
 34 |         print('=' * 60)
 35 |         for k in self.dul_args.__dict__:
 36 |             print(" '{}' : '{}' ".format(k, str(self.dul_args.__dict__[k])))
 37 |         os.makedirs(self.dul_args.model_save_folder, exist_ok=True)
 38 |         os.makedirs(self.dul_args.log_tensorboard, exist_ok=True)
 39 |         writer = SummaryWriter(self.dul_args.log_tensorboard)
 40 |         return writer
 41 | 
 42 | 
 43 |     def _data_loader(self):
 44 |         if self.dul_args.center_crop:
 45 |             train_transform = transforms.Compose([ 
 46 |             transforms.Resize([int(128 * self.dul_args.input_size[0] / 112), int(128 * self.dul_args.input_size[0] / 112)]),
 47 |             transforms.RandomCrop([self.dul_args.input_size[0], self.dul_args.input_size[1]]),
 48 |             transforms.RandomHorizontalFlip(),
 49 |             add_gaussian_noise(p=self.dul_args.image_noise),
 50 |             transforms.ToTensor(),
 51 |             transforms.Normalize(mean = self.dul_args.rgb_mean,
 52 |                                  std = self.dul_args.rgb_std),
 53 |             #transforms.RandomErasing(scale=(0.02,0.25))
 54 |         ])
 55 |         else:
 56 |             train_transform = transforms.Compose([ # refer to https://pytorch.org/docs/stable/torchvision/transforms.html for more build-in online data augmentation
 57 |             transforms.Resize([112, 112]), # smaller side resized
 58 |             transforms.RandomHorizontalFlip(),
 59 |             transforms.ToTensor(),
 60 |             transforms.Normalize(mean = self.dul_args.rgb_mean,
 61 |                                  std = self.dul_args.rgb_std),
 62 |             transforms.RandomErasing(scale=(0.02,0.25))
 63 |         ])
 64 | 
 65 |         dataset_train = datasets.ImageFolder(self.dul_args.trainset_folder, train_transform)
 66 | 
 67 |         # ----- create a weighted random sampler to process imbalanced data
 68 |         weights = make_weights_for_balanced_classes(dataset_train.imgs, len(dataset_train.classes))
 69 |         weights = torch.DoubleTensor(weights)
 70 |         sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights))
 71 | 
 72 |         train_loader = torch.utils.data.DataLoader(
 73 |             dataset_train, sampler=sampler, batch_size=self.dul_args.batch_size,
 74 |             pin_memory=self.dul_args.pin_memory, num_workers=self.dul_args.num_workers,
 75 |             drop_last=self.dul_args.drop_last,
 76 |         )
 77 | 
 78 |         num_class = len(train_loader.dataset.classes)
 79 |         print('=' * 60)
 80 |         print("Number of Training Classes: '{}' ".format(num_class))
 81 | 
 82 |         return train_loader, num_class
 83 | 
 84 | 
 85 |     def _model_loader(self, num_class):
 86 |         # ----- backbone generate
 87 |         BACKBONE = Backbone_Dict[self.dul_args.backbone_name]
 88 |         print("=" * 60)
 89 |         print("Backbone Generated: '{}' ".format(self.dul_args.backbone_name))
 90 | 
 91 |         # ----- head generate
 92 |         Head_Dict = {
 93 |             'ArcFace': ArcFace(in_features = self.dul_args.embedding_size, out_features = num_class, device_id = self.dul_args.gpu_id, s=self.dul_args.arcface_scale),
 94 |             'CosFace': CosFace(in_features = self.dul_args.embedding_size, out_features = num_class, device_id = self.dul_args.gpu_id),
 95 |             'SphereFace': SphereFace(in_features = self.dul_args.embedding_size, out_features = num_class, device_id = self.dul_args.gpu_id),
 96 |             'Am_softmax': Am_softmax(in_features = self.dul_args.embedding_size, out_features = num_class, device_id = self.dul_args.gpu_id),
 97 |             'Softmax': Softmax(in_features = self.dul_args.embedding_size, out_features = num_class, device_id = self.dul_args.gpu_id)
 98 |         }
 99 |         HEAD = Head_Dict[self.dul_args.head_name]
100 |         print("=" * 60)
101 |         print("Head Generated: '{}' ".format(self.dul_args.head_name))
102 | 
103 |         # ----- loss generate
104 |         Loss_Dict = {
105 |             'Focal': FocalLoss(),
106 |             'Softmax': nn.CrossEntropyLoss()
107 |         }
108 |         LOSS = Loss_Dict[self.dul_args.loss_name]
109 |         print("=" * 60)
110 |         print("Loss Generated: '{}' ".format(self.dul_args.loss_name))
111 |         # ----- separate batch_norm parameters from others; do not do weight decay for batch_norm parameters to improve the generalizability
112 |         backbone_paras_only_bn, backbone_paras_wo_bn = separate_irse_bn_paras(BACKBONE)
113 |         _, head_paras_wo_bn = separate_irse_bn_paras(HEAD)
114 | 
115 |         # ----- optimizer generate
116 |         Optimizer_Dict = {
117 |             'SGD': optim.SGD([{'params': backbone_paras_wo_bn + head_paras_wo_bn, 'weight_decay': self.dul_args.weight_decay}, 
118 |                             {'params': backbone_paras_only_bn}], lr=self.dul_args.lr, momentum=self.dul_args.momentum),
119 |             'Adam': optim.Adam([{'params': backbone_paras_wo_bn + head_paras_wo_bn, 'weight_decay': self.dul_args.weight_decay}, 
120 |                             {'params': backbone_paras_only_bn}], lr=self.dul_args.lr, betas=(0.9, 0.999), eps=1e-8, weight_decay=0)
121 |         }
122 |         OPTIMIZER = Optimizer_Dict[self.dul_args.optimizer]
123 |         print("=" * 60)
124 |         print("Optimizer Generated: '{}' ".format(self.dul_args.optimizer))
125 |         print(OPTIMIZER)
126 | 
127 |         # ----- optional resume
128 |         if self.dul_args.resume_backbone or self.dul_args.resume_head:
129 |             print("=" * 60)
130 |             if os.path.isfile(self.dul_args.resume_backbone):
131 |                 print("Loading Backbone Checkpoint '{}'".format(self.dul_args.resume_backbone))
132 |                 BACKBONE.load_state_dict(torch.load(self.dul_args.resume_backbone))
133 |             if os.path.isfile(self.dul_args.resume_head):
134 |                 print("Loading Head Checkpoint '{}'".format(self.dul_args.resume_head))
135 |                 try:
136 |                     HEAD.load_state_dict(torch.load(self.dul_args.resume_head))
137 |                 except Exception as e:
138 |                     print(e)
139 |         else:
140 |             print("No Checkpoint Found at '{}' and '{}'. Please Have a Check or Continue to Train from Scratch".\
141 |                 format(self.dul_args.resume_backbone, self.dul_args.resume_head))
142 | 
143 |         # ----- multi-gpu or single-gpu
144 |         if self.dul_args.multi_gpu:
145 |             BACKBONE = nn.DataParallel(BACKBONE, device_ids=self.dul_args.gpu_id).cuda()
146 |             HEAD = HEAD.cuda()
147 |             LOSS = LOSS.cuda()
148 |         else:
149 |             BACKBONE = BACKBONE.cuda()
150 |             HEAD = HEAD.cuda()
151 |             LOSS = LOSS.cuda()
152 | 
153 |         return BACKBONE, HEAD, LOSS, OPTIMIZER
154 | 
155 | 
156 | 
157 |     def _dul_runner(self):
158 |         writer = self._report_configurations()
159 | 
160 |         train_loader, num_class = self._data_loader()
161 | 
162 |         BACKBONE, HEAD, LOSS, OPTIMIZER = self._model_loader(num_class=num_class)
163 | 
164 |         DISP_FREQ = len(train_loader) // 100 # frequency to display training loss & acc
165 | 
166 |         NUM_EPOCH_WARM_UP = self.dul_args.warm_up_epoch
167 |         NUM_BATCH_WARM_UP = int(len(train_loader) * NUM_EPOCH_WARM_UP)
168 |         batch = 0  # batch index
169 | 
170 |         print('=' * 60)
171 |         print("Display Freqency: '{}' ".format(DISP_FREQ))
172 |         print("Number of Epoch for Warm Up: '{}' ".format(NUM_EPOCH_WARM_UP))
173 |         print("Number of Batch for Warm Up: '{}' ".format(NUM_BATCH_WARM_UP))
174 |         print('Start Training: ')
175 | 
176 |         for epoch in range(self.dul_args.num_epoch):
177 |             if epoch == self.dul_args.stages[0]:
178 |                 schedule_lr(OPTIMIZER)
179 |             elif epoch == self.dul_args.stages[1]:
180 |                 schedule_lr(OPTIMIZER)
181 |             if epoch < self.dul_args.resume_epoch:
182 |                 continue
183 |             
184 |             BACKBONE.train()  # set to training mode
185 |             HEAD.train()
186 |             BACKBONE.training = True
187 | 
188 |             losses = AverageMeter()
189 |             top1 = AverageMeter()
190 |             top5 = AverageMeter()
191 |             losses_KL = AverageMeter()
192 | 
193 |             for inputs, labels in train_loader:
194 |                 if (epoch + 1 <= NUM_EPOCH_WARM_UP) and (batch + 1 <= NUM_BATCH_WARM_UP): # adjust LR for each training batch during warm up
195 |                     warm_up_lr(batch + 1, NUM_BATCH_WARM_UP, self.dul_args.lr, OPTIMIZER)
196 |                 
197 |                 inputs = inputs.cuda()
198 |                 labels = labels.cuda().long()
199 |                 loss = 0
200 | 
201 |                 mu_dul, std_dul = BACKBONE(inputs) # namely, mean and std
202 | 
203 |                 epsilon = torch.randn_like(std_dul)
204 |                 features = mu_dul + epsilon * std_dul
205 |                 variance_dul = std_dul**2
206 | 
207 |                 # Not sure which one shoul be used, see this issue: https://github.com/MouxiaoHuang/DUL/issues/5
208 |                 # loss_kl = ((variance_dul + mu_dul**2 - torch.log(variance_dul) - 1) * 0.5).sum(dim=-1).mean()
209 |                 loss_kl = ((variance_dul + mu_dul ** 2 - torch.log(variance_dul + 1e-8) - 1) * 0.5).mean()
210 |                 losses_KL.update(loss_kl.item(), inputs.size(0))
211 |                 loss += self.dul_args.kl_scale * loss_kl
212 | 
213 |                 outputs = HEAD(features, labels)
214 | 
215 |                 loss_head = LOSS(outputs, labels)
216 | 
217 |                 loss += loss_head
218 | 
219 |                 # measure accuracy and record loss
220 |                 prec1, prec5 = accuracy(outputs.data, labels, topk = (1, 5))
221 |                 losses.update(loss_head.data.item(), inputs.size(0))
222 |                 top1.update(prec1.data.item(), inputs.size(0))
223 |                 top5.update(prec5.data.item(), inputs.size(0))
224 | 
225 |                 # compute gradient and do SGD step
226 |                 OPTIMIZER.zero_grad()
227 |                 loss.backward()
228 |                 OPTIMIZER.step()
229 | 
230 |                 # dispaly training loss & acc every DISP_FREQ
231 |                 if ((batch + 1) % DISP_FREQ == 0) and batch != 0:
232 |                     print("=" * 60, flush=True)
233 |                     print('Epoch {}/{} Batch {}/{}\t'
234 |                           'Time {}\t'
235 |                           'Training Loss {loss.val:.4f} ({loss.avg:.4f})\t'
236 |                           'Training Loss_KL {loss_KL.val:.4f} ({loss_KL.avg:.4f})\t'
237 |                           'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
238 |                           'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
239 |                         epoch + 1, self.dul_args.num_epoch, batch + 1, len(train_loader) * self.dul_args.num_epoch, time.asctime(time.localtime(time.time())), loss = losses, loss_KL=losses_KL,  top1 = top1, top5 = top5), flush=True)
240 | 
241 |                 batch += 1 # batch index
242 |             # training statistics per epoch (buffer for visualization)
243 |             epoch_loss = losses.avg
244 |             epoch_acc = top1.avg
245 |             writer.add_scalar("Training_Loss", epoch_loss, epoch + 1)
246 |             writer.add_scalar("Training_Accuracy", epoch_acc, epoch + 1)
247 |             print("=" * 60, flush=True)
248 |             print('Epoch: {}/{}\t'
249 |                   'Training Loss {loss.val:.4f} ({loss.avg:.4f})\t'
250 |                   'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
251 |                   'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
252 |             epoch + 1, self.dul_args.num_epoch, loss = losses, top1 = top1, top5 = top5), flush=True)
253 | 
254 |             # ----- save model
255 |             if epoch==4 or epoch==7 or epoch==12 or epoch>17:
256 |                 print("=" * 60, flush=True)
257 |                 print('Saving NO.EPOCH {} trained model'.format(epoch+1), flush=True)
258 |                 if self.dul_args.multi_gpu:
259 |                     torch.save(BACKBONE.module.state_dict(), os.path.join(self.dul_args.model_save_folder, "Backbone_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(self.dul_args.backbone_name, epoch + 1, batch, get_time())))
260 |                     torch.save(HEAD.state_dict(), os.path.join(self.dul_args.model_save_folder, "Head_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(self.dul_args.head_name, epoch + 1, batch, get_time())))
261 |                 else:
262 |                     torch.save(BACKBONE.state_dict(), os.path.join(self.dul_args.model_save_folder, "Backbone_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(self.dul_args.backbone_name, epoch + 1, batch, get_time())))
263 |                     torch.save(HEAD.state_dict(), os.path.join(self.dul_args.model_save_folder, "Head_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(self.dul_args.head_name, epoch + 1, batch, get_time())))
264 |         print('=' * 60, flush=True)
265 |         print('Training process finished!', flush=True)
266 |         print('=' * 60, flush=True)
267 | 
268 | 
269 | if __name__ == '__main__':
270 |     dul_train = DUL_Trainer(dul_args_func())
271 |     dul_train._dul_runner()
272 | 


--------------------------------------------------------------------------------
/head/metrics.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from __future__ import division
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from torch.nn import Parameter
  7 | import math
  8 | 
  9 | 
 10 | # Support: ['Softmax', 'ArcFace', 'CosFace', 'SphereFace', 'Am_softmax']
 11 | 
 12 | class CircleLoss(nn.Module):
 13 |     def __init__(self, in_features, out_features, device_id, s = 256.0, m = 0.35):
 14 |         super(CircleLoss, self).__init__()
 15 |         self.in_features = in_features
 16 |         self.out_features = out_features
 17 |         self.device_id = device_id
 18 | 
 19 |         self.s = s
 20 |         self.O_p = 1 + m
 21 |         self.O_n = - m
 22 |         self.delta_p = 1 - m
 23 |         self.delta_n = m
 24 | 
 25 |         self.weight = Parameter(torch.FloatTensor(out_features, in_features))
 26 |         nn.init.xavier_uniform_(self.weight)
 27 | 
 28 |     def forward(self, input, label):
 29 |         # --------------------------- cos(theta) & phi(theta) ---------------------------
 30 |         if self.device_id == None:
 31 |             cosine = F.linear(F.normalize(input), F.normalize(self.weight))
 32 |         else:
 33 |             x = input
 34 |             sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
 35 |             temp_x = x.cuda(self.device_id[0])
 36 |             weight = sub_weights[0].cuda(self.device_id[0])
 37 |             cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
 38 |             for i in range(1, len(self.device_id)):
 39 |                 temp_x = x.cuda(self.device_id[i])
 40 |                 weight = sub_weights[i].cuda(self.device_id[i])
 41 |                 cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1)
 42 | 
 43 |         scores = cosine
 44 |         # --------------------------- convert label to one-hot ---------------------------
 45 |         one_hot = torch.zeros(cosine.size())
 46 | 
 47 |         alpha_p = (self.O_p - scores.detach()).clamp(min=0.)
 48 |         alpha_n = (scores.detach() - self.O_n).clamp(min=0.)
 49 | 
 50 |         one_hot = torch.zeros(scores.size())
 51 |         if self.device_id != None:
 52 |             one_hot = one_hot.cuda(self.device_id[0])
 53 |         one_hot.scatter_(1, label.view(-1, 1).long(), 1)
 54 | 
 55 |         output = (one_hot * (alpha_p * (scores - self.delta_p)) + (1.0 - one_hot) * (alpha_n * (scores - self.delta_n)))
 56 |         output *= self.s
 57 |         return output
 58 | 
 59 | 
 60 | class Softmax(nn.Module):
 61 |     r"""Implement of Softmax (normal classification head):
 62 |         Args:
 63 |             in_features: size of each input sample
 64 |             out_features: size of each output sample
 65 |             device_id: the ID of GPU where the model will be trained by model parallel.
 66 |                        if device_id=None, it will be trained on CPU without model parallel.
 67 |         """
 68 |     def __init__(self, in_features, out_features, device_id):
 69 |         super(Softmax, self).__init__()
 70 |         self.in_features = in_features
 71 |         self.out_features = out_features
 72 |         self.device_id = device_id
 73 | 
 74 |         self.weight = Parameter(torch.FloatTensor(out_features, in_features))
 75 |         self.bias = Parameter(torch.FloatTensor(out_features))
 76 |         nn.init.xavier_uniform_(self.weight)
 77 |         nn.init.zeros_(self.bias)
 78 | 
 79 |     def forward(self, x, label):
 80 |         if self.device_id == None:
 81 |             out = F.linear(x, self.weight, self.bias)
 82 |         else:
 83 |             sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
 84 |             sub_biases = torch.chunk(self.bias, len(self.device_id), dim=0)
 85 |             temp_x = x.cuda(self.device_id[0])
 86 |             weight = sub_weights[0].cuda(self.device_id[0])
 87 |             bias = sub_biases[0].cuda(self.device_id[0])
 88 |             out = F.linear(temp_x, weight, bias)
 89 |             for i in range(1, len(self.device_id)):
 90 |                 temp_x = x.cuda(self.device_id[i])
 91 |                 weight = sub_weights[i].cuda(self.device_id[i])
 92 |                 bias = sub_biases[i].cuda(self.device_id[i])
 93 |                 out = torch.cat((out, F.linear(temp_x, weight, bias).cuda(self.device_id[0])), dim=1)
 94 |         return out
 95 | 
 96 |     def _initialize_weights(self):
 97 |         for m in self.modules():
 98 |             if isinstance(m, nn.Conv2d):
 99 |                 nn.init.xavier_uniform_(m.weight.data)
100 |                 if m.bias is not None:
101 |                     m.bias.data.zeros_()
102 |             elif isinstance(m, nn.BatchNorm2d):
103 |                 m.weight.data.fill_(1)
104 |                 m.bias.data.zeros_()
105 |             elif isinstance(m, nn.BatchNorm1d):
106 |                 m.weight.data.fill_(1)
107 |                 m.bias.data.zeros_()
108 |             elif isinstance(m, nn.Linear):
109 |                 nn.init.xavier_uniform_(m.weight.data)
110 |                 if m.bias is not None:
111 |                     m.bias.data.zeros_()
112 | 
113 | 
114 | class ArcFace(nn.Module):
115 |     r"""Implement of ArcFace (https://arxiv.org/pdf/1801.07698v1.pdf):
116 |         Args:
117 |             in_features: size of each input sample
118 |             out_features: size of each output sample
119 |             device_id: the ID of GPU where the model will be trained by model parallel.
120 |                        if device_id=None, it will be trained on CPU without model parallel.
121 |             s: norm of input feature
122 |             m: margin
123 |             cos(theta+m)
124 |         """
125 |     def __init__(self, in_features, out_features, device_id, s = 64.0, m = 0.50, easy_margin = False):
126 |         super(ArcFace, self).__init__()
127 |         self.in_features = in_features
128 |         self.out_features = out_features
129 |         self.device_id = device_id
130 | 
131 |         self.s = s
132 |         self.m = m
133 | 
134 |         self.weight = Parameter(torch.FloatTensor(out_features, in_features))
135 |         nn.init.xavier_uniform_(self.weight)
136 | 
137 |         self.easy_margin = easy_margin
138 |         self.cos_m = math.cos(m)
139 |         self.sin_m = math.sin(m)
140 |         self.th = math.cos(math.pi - m)
141 |         self.mm = math.sin(math.pi - m) * m
142 | 
143 |     def forward(self, input, label):
144 |         # --------------------------- cos(theta) & phi(theta) ---------------------------
145 |         if self.device_id == None:
146 |             cosine = F.linear(F.normalize(input), F.normalize(self.weight))
147 |         else:
148 |             x = input
149 |             sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
150 |             temp_x = x.cuda(self.device_id[0])
151 |             weight = sub_weights[0].cuda(self.device_id[0])
152 |             cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
153 |             for i in range(1, len(self.device_id)):
154 |                 temp_x = x.cuda(self.device_id[i])
155 |                 weight = sub_weights[i].cuda(self.device_id[i])
156 |                 cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1)
157 |         sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
158 |         phi = cosine * self.cos_m - sine * self.sin_m
159 |         if self.easy_margin:
160 |             phi = torch.where(cosine > 0, phi, cosine)
161 |         else:
162 |             phi = torch.where(cosine > self.th, phi, cosine - self.mm)
163 |         # --------------------------- convert label to one-hot ---------------------------
164 |         one_hot = torch.zeros(cosine.size())
165 |         if self.device_id != None:
166 |             one_hot = one_hot.cuda(self.device_id[0])
167 |         one_hot.scatter_(1, label.view(-1, 1).long(), 1)
168 |         # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
169 |         output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
170 |         output *= self.s
171 | 
172 |         return output
173 | 
174 | 
175 | class CosFace(nn.Module):
176 |     r"""Implement of CosFace (https://arxiv.org/pdf/1801.09414.pdf):
177 |     Args:
178 |         in_features: size of each input sample
179 |         out_features: size of each output sample
180 |         device_id: the ID of GPU where the model will be trained by model parallel.
181 |                        if device_id=None, it will be trained on CPU without model parallel.
182 |         s: norm of input feature
183 |         m: margin
184 |         cos(theta)-m
185 |     """
186 |     def __init__(self, in_features, out_features, device_id, s = 64.0, m = 0.35):
187 |         super(CosFace, self).__init__()
188 |         self.in_features = in_features
189 |         self.out_features = out_features
190 |         self.device_id = device_id
191 |         self.s = s
192 |         self.m = m
193 | 
194 |         self.weight = Parameter(torch.FloatTensor(out_features, in_features))
195 |         nn.init.xavier_uniform_(self.weight)
196 | 
197 |     def forward(self, input, label):
198 |         # --------------------------- cos(theta) & phi(theta) ---------------------------
199 |         if self.device_id == None:
200 |             cosine = F.linear(F.normalize(input), F.normalize(self.weight))
201 |         else:
202 |             x = input
203 |             sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
204 |             temp_x = x.cuda(self.device_id[0])
205 |             weight = sub_weights[0].cuda(self.device_id[0])
206 |             cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
207 |             for i in range(1, len(self.device_id)):
208 |                 temp_x = x.cuda(self.device_id[i])
209 |                 weight = sub_weights[i].cuda(self.device_id[i])
210 |                 cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1)
211 |         phi = cosine - self.m
212 |         # --------------------------- convert label to one-hot ---------------------------
213 |         one_hot = torch.zeros(cosine.size())
214 |         if self.device_id != None:
215 |             one_hot = one_hot.cuda(self.device_id[0])
216 |         # one_hot = one_hot.cuda() if cosine.is_cuda else one_hot
217 |         one_hot.scatter_(1, label.view(-1, 1).long(), 1)
218 |         # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
219 |         output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
220 |         output *= self.s
221 | 
222 |         return output
223 | 
224 |     def __repr__(self):
225 |         return self.__class__.__name__ + '(' \
226 |                + 'in_features = ' + str(self.in_features) \
227 |                + ', out_features = ' + str(self.out_features) \
228 |                + ', s = ' + str(self.s) \
229 |                + ', m = ' + str(self.m) + ')'
230 | 
231 | class SphereFace(nn.Module):
232 |     r"""Implement of SphereFace (https://arxiv.org/pdf/1704.08063.pdf):
233 |     Args:
234 |         in_features: size of each input sample
235 |         out_features: size of each output sample
236 |         device_id: the ID of GPU where the model will be trained by model parallel.
237 |                        if device_id=None, it will be trained on CPU without model parallel.
238 |         m: margin
239 |         cos(m*theta)
240 |     """
241 |     def __init__(self, in_features, out_features, device_id, m = 4):
242 |         super(SphereFace, self).__init__()
243 |         self.in_features = in_features
244 |         self.out_features = out_features
245 |         self.m = m
246 |         self.base = 1000.0
247 |         self.gamma = 0.12
248 |         self.power = 1
249 |         self.LambdaMin = 5.0
250 |         self.iter = 0
251 |         self.device_id = device_id
252 | 
253 |         self.weight = Parameter(torch.FloatTensor(out_features, in_features))
254 |         nn.init.xavier_uniform_(self.weight)
255 | 
256 |         # duplication formula
257 |         self.mlambda = [
258 |             lambda x: x ** 0,
259 |             lambda x: x ** 1,
260 |             lambda x: 2 * x ** 2 - 1,
261 |             lambda x: 4 * x ** 3 - 3 * x,
262 |             lambda x: 8 * x ** 4 - 8 * x ** 2 + 1,
263 |             lambda x: 16 * x ** 5 - 20 * x ** 3 + 5 * x
264 |         ]
265 | 
266 |     def forward(self, input, label):
267 |         # lambda = max(lambda_min,base*(1+gamma*iteration)^(-power))
268 |         self.iter += 1
269 |         self.lamb = max(self.LambdaMin, self.base * (1 + self.gamma * self.iter) ** (-1 * self.power))
270 | 
271 |         # --------------------------- cos(theta) & phi(theta) ---------------------------
272 |         if self.device_id == None:
273 |             cos_theta = F.linear(F.normalize(input), F.normalize(self.weight))
274 |         else:
275 |             x = input
276 |             sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
277 |             temp_x = x.cuda(self.device_id[0])
278 |             weight = sub_weights[0].cuda(self.device_id[0])
279 |             cos_theta = F.linear(F.normalize(temp_x), F.normalize(weight))
280 |             for i in range(1, len(self.device_id)):
281 |                 temp_x = x.cuda(self.device_id[i])
282 |                 weight = sub_weights[i].cuda(self.device_id[i])
283 |                 cos_theta = torch.cat((cos_theta, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1)
284 | 
285 |         cos_theta = cos_theta.clamp(-1, 1)
286 |         cos_m_theta = self.mlambda[self.m](cos_theta)
287 |         theta = cos_theta.data.acos()
288 |         k = (self.m * theta / 3.14159265).floor()
289 |         phi_theta = ((-1.0) ** k) * cos_m_theta - 2 * k
290 |         NormOfFeature = torch.norm(input, 2, 1)
291 | 
292 |         # --------------------------- convert label to one-hot ---------------------------
293 |         one_hot = torch.zeros(cos_theta.size())
294 |         if self.device_id != None:
295 |             one_hot = one_hot.cuda(self.device_id[0])
296 |         one_hot.scatter_(1, label.view(-1, 1), 1)
297 | 
298 |         # --------------------------- Calculate output ---------------------------
299 |         output = (one_hot * (phi_theta - cos_theta) / (1 + self.lamb)) + cos_theta
300 |         output *= NormOfFeature.view(-1, 1)
301 | 
302 |         return output
303 | 
304 |     def __repr__(self):
305 |         return self.__class__.__name__ + '(' \
306 |                + 'in_features = ' + str(self.in_features) \
307 |                + ', out_features = ' + str(self.out_features) \
308 |                + ', m = ' + str(self.m) + ')'
309 | 
310 | 
311 | def l2_norm(input, axis = 1):
312 |     norm = torch.norm(input, 2, axis, True)
313 |     output = torch.div(input, norm)
314 |     return output
315 | 
316 | class Am_softmax(nn.Module):
317 |     r"""Implement of Am_softmax (https://arxiv.org/pdf/1801.05599.pdf):
318 |     Args:
319 |         in_features: size of each input sample
320 |         out_features: size of each output sample
321 |         device_id: the ID of GPU where the model will be trained by model parallel.
322 |                        if device_id=None, it will be trained on CPU without model parallel.
323 |         m: margin
324 |         s: scale of outputs
325 |     """
326 |     def __init__(self, in_features, out_features, device_id, m = 0.35, s = 30.0):
327 |         super(Am_softmax, self).__init__()
328 |         self.in_features = in_features
329 |         self.out_features = out_features
330 |         self.m = m
331 |         self.s = s
332 |         self.device_id = device_id
333 | 
334 |         self.kernel = Parameter(torch.Tensor(in_features, out_features))
335 |         self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)  # initialize kernel
336 | 
337 |     def forward(self, embbedings, label):
338 |         if self.device_id == None:
339 |             kernel_norm = l2_norm(self.kernel, axis = 0)
340 |             cos_theta = torch.mm(embbedings, kernel_norm)
341 |         else:
342 |             x = embbedings
343 |             sub_kernels = torch.chunk(self.kernel, len(self.device_id), dim=1)
344 |             temp_x = x.cuda(self.device_id[0])
345 |             kernel_norm = l2_norm(sub_kernels[0], axis = 0).cuda(self.device_id[0])
346 |             cos_theta = torch.mm(temp_x, kernel_norm)
347 |             for i in range(1, len(self.device_id)):
348 |                 temp_x = x.cuda(self.device_id[i])
349 |                 kernel_norm = l2_norm(sub_kernels[i], axis = 0).cuda(self.device_id[i])
350 |                 cos_theta = torch.cat((cos_theta, torch.mm(temp_x, kernel_norm).cuda(self.device_id[0])), dim=1)
351 | 
352 |         cos_theta = cos_theta.clamp(-1, 1)  # for numerical stability
353 |         phi = cos_theta - self.m
354 |         label = label.view(-1, 1)  # size=(B,1)
355 |         index = cos_theta.data * 0.0  # size=(B,Classnum)
356 |         index.scatter_(1, label.data.view(-1, 1), 1)
357 |         index = index.byte()
358 |         output = cos_theta * 1.0
359 |         output[index] = phi[index]  # only change the correct predicted output
360 |         output *= self.s  # scale up in order to make softmax work, first introduced in normface
361 | 
362 |         return output
363 | 
364 | if __name__ == "__main__":
365 |     feat = F.normalize(torch.rand(256, 64, requires_grad=True))
366 |     lbl = torch.randint(high=10, size=(256,))
367 | 
368 |     inp_sp, inp_sn = convert_label_to_similarity(feat, lbl)
369 | 
370 |     criterion = CircleLoss(m=0.25, gamma=256)
371 |     circle_loss = criterion(inp_sp, inp_sn)
372 | 
373 |     print(circle_loss)
374 | 


--------------------------------------------------------------------------------