├── exp ├── Debug.sh ├── Debug_Test.sh ├── logs │ └── debug.log ├── logs_test │ ├── debug_test.log │ ├── testfr_ms1m_dul.log │ └── testfr_webface_dul.log ├── logtensorboard │ └── exp_webface_dul │ │ └── events.out.tfevents.1631298816.BJEGS01 ├── TestFR_ms1m_DUL.sh ├── TestFR_webface_DUL.sh ├── Exp_webface_DUL.sh └── Exp_ms1m_DUL.sh ├── head ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-38.pyc │ └── metrics.cpython-38.pyc └── metrics.py ├── loss ├── __init__.py ├── __pycache__ │ ├── focal.cpython-38.pyc │ └── __init__.cpython-38.pyc └── focal.py ├── util ├── __init__.py ├── __pycache__ │ ├── utils.cpython-38.pyc │ ├── __init__.cpython-38.pyc │ └── verification.cpython-38.pyc ├── verification.py └── utils.py ├── align ├── __init__.py ├── onet.npy ├── pnet.npy ├── rnet.npy ├── visualization_utils.py ├── face_resize.py ├── face_align.py ├── first_stage.py ├── detector.py ├── get_nets.py ├── box_utils.py ├── matlab_cp2tform.py └── align_trans.py ├── backbone ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-38.pyc │ └── model_irse.cpython-38.pyc └── model_irse.py ├── .gitignore ├── __pycache__ └── config.cpython-38.pyc ├── requirements.txt ├── shutdown.py ├── test_fr_dul.py ├── README.md ├── config.py └── train_dul.py /exp/Debug.sh: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /exp/Debug_Test.sh: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /exp/logs/debug.log: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /head/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /loss/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /backbone/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /exp/logs_test/debug_test.log: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pth 2 | .DS_Store 3 | **/.DS_Store 4 | .DS_Store? 5 | -------------------------------------------------------------------------------- /align/onet.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/align/onet.npy -------------------------------------------------------------------------------- /align/pnet.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/align/pnet.npy -------------------------------------------------------------------------------- /align/rnet.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/align/rnet.npy -------------------------------------------------------------------------------- /__pycache__/config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/__pycache__/config.cpython-38.pyc -------------------------------------------------------------------------------- /loss/__pycache__/focal.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/loss/__pycache__/focal.cpython-38.pyc -------------------------------------------------------------------------------- /util/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/util/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /head/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/head/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /head/__pycache__/metrics.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/head/__pycache__/metrics.cpython-38.pyc -------------------------------------------------------------------------------- /loss/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/loss/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /util/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/util/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /backbone/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/backbone/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /util/__pycache__/verification.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/util/__pycache__/verification.cpython-38.pyc -------------------------------------------------------------------------------- /backbone/__pycache__/model_irse.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/backbone/__pycache__/model_irse.cpython-38.pyc -------------------------------------------------------------------------------- /exp/logtensorboard/exp_webface_dul/events.out.tfevents.1631298816.BJEGS01: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MouxiaoHuang/DUL/HEAD/exp/logtensorboard/exp_webface_dul/events.out.tfevents.1631298816.BJEGS01 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.19.5 2 | matplotlib==3.4.2 3 | torchvision==0.8.0 4 | bcolz==1.2.1 5 | tqdm==4.62.0 6 | opencv_python==4.5.3.56 7 | torch==1.7.0 8 | scipy==1.6.2 9 | Pillow==8.3.2 10 | scikit_learn==0.24.2 11 | tensorboardX==2.4 12 | -------------------------------------------------------------------------------- /exp/TestFR_ms1m_DUL.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=3 2 | 3 | logs_test_file='./logs_test/testfr_ms1m_dul.log' 4 | 5 | model_for_test='' 6 | 7 | python ../test_fr_dul.py \ 8 | --model_for_test $model_for_test \ 9 | >> $logs_test_file 2>&1 & 10 | -------------------------------------------------------------------------------- /exp/TestFR_webface_DUL.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=3 2 | 3 | logs_test_file='./logs_test/testfr_webface_dul.log' 4 | 5 | model_for_test='' 6 | 7 | python ../test_fr_dul.py \ 8 | --model_for_test $model_for_test \ 9 | >> $logs_test_file 2>&1 & 10 | -------------------------------------------------------------------------------- /shutdown.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser(description='shut down process by kill command') 7 | 8 | parser.add_argument('--key', type=str, default='') 9 | 10 | args = parser.parse_args() 11 | 12 | os.system('ps -ef | grep ' + args.key + ' | grep -v grep | cut -c 9-16 | xargs kill -9') -------------------------------------------------------------------------------- /loss/focal.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | # Support: ['FocalLoss'] 6 | 7 | 8 | class FocalLoss(nn.Module): 9 | def __init__(self, gamma = 2, eps = 1e-7): 10 | super(FocalLoss, self).__init__() 11 | self.gamma = gamma 12 | self.eps = eps 13 | self.ce = nn.CrossEntropyLoss() 14 | 15 | def forward(self, input, target): 16 | logp = self.ce(input, target) 17 | p = torch.exp(-logp) 18 | loss = (1 - p) ** self.gamma * logp 19 | return loss.mean() 20 | -------------------------------------------------------------------------------- /exp/Exp_webface_DUL.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=2,3 2 | 3 | model_save_folder='./checkpoints/exp_webface_dul/' 4 | log_tensorboard='./logtensorboard/exp_webface_dul/' 5 | logs_file='./logs/exp_webface_dul.log' 6 | 7 | # notice: default kl_scale is 0.01 in DUL (base on original paper) 8 | python ../train_dul.py \ 9 | --model_save_folder $model_save_folder \ 10 | --log_tensorboard $log_tensorboard \ 11 | --logs $logs_file \ 12 | --gpu_id 0 1 \ 13 | --stages 10 18 \ 14 | --kl_scale 0.01 \ 15 | >> $logs_file 2>&1 & 16 | -------------------------------------------------------------------------------- /exp/Exp_ms1m_DUL.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=4,5,6,7 2 | 3 | model_save_folder='./checkpoints/exp_ms1m_dul/' 4 | log_tensorboard='./logtensorboard/exp_ms1m_dul/' 5 | logs_file='./logs/exp_ms1m_dul.log' 6 | trainset_folder='/home/admin/workspace/fuling/data/face_recog/ms1m/imgs/' 7 | 8 | # notice: default kl_scale is 0.01 in DUL (base on original paper) 9 | python ../train_dul.py \ 10 | --model_save_folder $model_save_folder \ 11 | --log_tensorboard $log_tensorboard \ 12 | --logs $logs_file \ 13 | --gpu_id 0 1 2 3 \ 14 | --stages 10 18 \ 15 | --kl_scale 0.01 \ 16 | --batch_size 1024 \ 17 | --trainset_folder $trainset_folder \ 18 | >> $logs_file 2>&1 & 19 | -------------------------------------------------------------------------------- /exp/logs_test/testfr_ms1m_dul.log: -------------------------------------------------------------------------------- 1 | ============================================================ 2 | Model for testing Face Recognition performance is: 3 | './exp/checkpoints/exp_ms1m_dul/Backbone_IR_SE_64_DUL_Epoch_22_Batch_125092_Time_2021-09-12-15-57_checkpoint.pth' 4 | ============================================================ 5 | Face Recognition Performance on different dataset is as shown below: 6 | ============================================================ 7 | LFW : 0.9974999999999999 8 | CFP_FF : 0.9968571428571428 9 | CFP_FP : 0.9841428571428571 10 | AGEDB_30 : 0.9801666666666667 11 | CALFW : 0.9594999999999999 12 | CPLFW : 0.9296666666666666 13 | VGG2_FP : 0.954 14 | ============================================================ 15 | Testing finished! 16 | ============================================================ 17 | -------------------------------------------------------------------------------- /exp/logs_test/testfr_webface_dul.log: -------------------------------------------------------------------------------- 1 | ============================================================ 2 | Model for testing Face Recognition performance is: 3 | './exp/checkpoints/exp_webface_dul/Backbone_IR_SE_64_DUL_Epoch_22_Batch_19558_Time_2021-09-11-01-39_checkpoint.pth' 4 | ============================================================ 5 | Face Recognition Performance on different dataset is as shown below: 6 | ============================================================ 7 | LFW : 0.9941666666666666 8 | CFP_FF : 0.9922857142857142 9 | CFP_FP : 0.9652857142857142 10 | AGEDB_30 : 0.9393333333333335 11 | CALFW : 0.9348333333333333 12 | CPLFW : 0.8959999999999999 13 | VGG2_FP : 0.9376000000000001 14 | ============================================================ 15 | Testing finished! 16 | ============================================================ 17 | -------------------------------------------------------------------------------- /align/visualization_utils.py: -------------------------------------------------------------------------------- 1 | from PIL import ImageDraw 2 | 3 | 4 | def show_results(img, bounding_boxes, facial_landmarks = []): 5 | """Draw bounding boxes and facial landmarks. 6 | Arguments: 7 | img: an instance of PIL.Image. 8 | bounding_boxes: a float numpy array of shape [n, 5]. 9 | facial_landmarks: a float numpy array of shape [n, 10]. 10 | Returns: 11 | an instance of PIL.Image. 12 | """ 13 | img_copy = img.copy() 14 | draw = ImageDraw.Draw(img_copy) 15 | 16 | for b in bounding_boxes: 17 | draw.rectangle([ 18 | (b[0], b[1]), (b[2], b[3]) 19 | ], outline = 'white') 20 | 21 | inx = 0 22 | for p in facial_landmarks: 23 | for i in range(5): 24 | draw.ellipse([ 25 | (p[i] - 1.0, p[i + 5] - 1.0), 26 | (p[i] + 1.0, p[i + 5] + 1.0) 27 | ], outline = 'blue') 28 | 29 | return img_copy -------------------------------------------------------------------------------- /test_fr_dul.py: -------------------------------------------------------------------------------- 1 | # test face recognition performance of dul model 2 | import torch 3 | import os 4 | 5 | from config import dul_args_func, Backbone_Dict, Test_FR_Data_Dict 6 | from util.utils import get_data_pair, perform_face_recog 7 | 8 | 9 | class DUL_FR_Tester(): 10 | def __init__(self, dul_args) -> None: 11 | self.dul_args = dul_args 12 | self.dul_args.multi_gpu = False 13 | 14 | def face_recog(self): 15 | BACKBONE = Backbone_Dict[self.dul_args.backbone_name] 16 | if os.path.isfile(self.dul_args.model_for_test): 17 | print('=' * 60, flush=True) 18 | print("Model for testing Face Recognition performance is:\n '{}' ".format(self.dul_args.model_for_test), flush=True) 19 | BACKBONE.load_state_dict(torch.load(self.dul_args.model_for_test)) 20 | BACKBONE = BACKBONE.cuda().eval() 21 | else: 22 | print('=' * 60, flush=True) 23 | print('No model found for testing!', flush=True) 24 | print('=' * 60, flush=True) 25 | return 26 | print('=' * 60, flush=True) 27 | print('Face Recognition Performance on different dataset is as shown below:', flush=True) 28 | print('=' * 60, flush=True) 29 | for value in Test_FR_Data_Dict.values(): 30 | testdata, testdata_issame = get_data_pair(self.dul_args.testset_fr_folder, value) 31 | accuracy, best_threshold, roc_curve = perform_face_recog(self.dul_args.multi_gpu, self.dul_args.embedding_size, 32 | self.dul_args.batch_size, BACKBONE, testdata, testdata_issame) 33 | print(value.upper(), ': ', accuracy, flush=True) 34 | print('=' * 60, flush=True) 35 | print('Testing finished!', flush=True) 36 | print('=' * 60, flush=True) 37 | 38 | 39 | if __name__ == '__main__': 40 | dul_fr_test = DUL_FR_Tester(dul_args_func()) 41 | dul_fr_test.face_recog() 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Implementation of DUL (PyTorch version) 2 | 3 | #### Introduction 4 | 5 | --- 6 | 7 | This repo is an ***unofficial*** PyTorch implementation of DUL ([Data Uncertainty Learning in Face Recognition, CVPR2020](https://arxiv.org/abs/2003.11339)). 8 | 9 | NOTE: 10 | 11 | 1. *SE-Resnet64 is used as defult backbone in this repo*, you can define others in `./backbone/model_irse.py` 12 | 2. *Training (process)* & *Testing (results)* logs are saved in `./exp/logs/` & `./exp/logs_test/` 13 | 3. *Implementation details are not exactly the same as the original paper*, seen in `./config.py` 14 | 15 | 16 | 17 | #### Getting Started 18 | 19 | --- 20 | 21 | - Star this repo, plz 22 | 23 | 😊 24 | 25 | - Clone this repo 26 | 27 | ``` 28 | git clone https://github.com/MouxiaoHuang/DUL.git 29 | ``` 30 | 31 | - Prepare env 32 | 33 | ```python 34 | conda create --name python=3.8 35 | pip install -r requirements.txt 36 | ``` 37 | 38 | - Prepare trainset and testset 39 | - Trainset: [Casia WebFace or MS-Celeb-1M](https://github.com/ZhaoJ9014/face.evoLVe) 40 | - Testset: [LFW, CFP_FF, CFP_FP, AgeDB, CALFW, CPLFW, VGG2_FP](https://github.com/ZhaoJ9014/face.evoLVe) 41 | - Training 42 | 43 | ```python 44 | sh ./exp/Exp_webface_DUL.sh 45 | # or 46 | sh ./exp/Exp_ms1m_DUL.sh 47 | ``` 48 | 49 | - Testing 50 | 51 | ```python 52 | sh ./exp/TestFR_webface_DUL.sh 53 | # or 54 | sh ./exp/TestFR_ms1m_DUL.sh 55 | ``` 56 | 57 | 58 | 59 | #### Results Report 60 | 61 | --- 62 | 63 | - Trainset: Casia Webface 64 | 65 | | | LFW | CFP_FF | CFP_FP | AgeDB | CALFW | CPLFW | VGG2_FP | 66 | | :------------: | :---: | :----: | :----: | :---: | :---: | :---: | :-----: | 67 | | Original paper | - | - | - | - | - | - | - | 68 | | This repo | 99.42 | 99.23 | 96.53 | 93.93 | 93.48 | 89.60 | 93.76 | 69 | 70 | - Trainset: MS-Celeb-1M 71 | 72 | | | LFW | CFP_FF | CFP_FP | AgeDB | CALFW | CPLFW | VGG2_FP | 73 | | :-----------------------: | :---: | :----: | :----: | :---: | :---: | :---: | :-----: | 74 | | Original paper (ResNet64) | 99.78 | - | 98.67 | - | - | - | - | 75 | | This repo | 99.75 | 99.69 | 98.41 | 98.02 | 95.95 | 92.97 | 95.40 | 76 | | | | | | | | | | 77 | 78 | 79 | 80 | #### Thanks & Refs 81 | 82 | --- 83 | 84 | - [ZhaoJ9014/face.evoLVe](https://github.com/ZhaoJ9014/face.evoLVe) 85 | - [Ontheway361/dul-pytorch](https://github.com/Ontheway361/dul-pytorch) 86 | 87 | -------------------------------------------------------------------------------- /align/face_resize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | from tqdm import tqdm 4 | 5 | 6 | def mkdir(path): 7 | if not os.path.exists(path): 8 | os.mkdir(path) 9 | 10 | 11 | def process_image(img): 12 | 13 | size = img.shape 14 | h, w = size[0], size[1] 15 | scale = max(w, h) / float(min_side) 16 | new_w, new_h = int(w / scale), int(h / scale) 17 | resize_img = cv2.resize(img, (new_w, new_h)) 18 | if new_w % 2 != 0 and new_h % 2 == 0: 19 | top, bottom, left, right = (min_side - new_h) / 2, (min_side - new_h) / 2, (min_side - new_w) / 2 + 1, ( 20 | min_side - new_w) / 2 21 | elif new_h % 2 != 0 and new_w % 2 == 0: 22 | top, bottom, left, right = (min_side - new_h) / 2 + 1, (min_side - new_h) / 2, (min_side - new_w) / 2, ( 23 | min_side - new_w) / 2 24 | elif new_h % 2 == 0 and new_w % 2 == 0: 25 | top, bottom, left, right = (min_side - new_h) / 2, (min_side - new_h) / 2, (min_side - new_w) / 2, ( 26 | min_side - new_w) / 2 27 | else: 28 | top, bottom, left, right = (min_side - new_h) / 2 + 1, (min_side - new_h) / 2, (min_side - new_w) / 2 + 1, ( 29 | min_side - new_w) / 2 30 | pad_img = cv2.copyMakeBorder(resize_img, top, bottom, left, right, cv2.BORDER_CONSTANT, 31 | value=[0, 0, 0]) 32 | 33 | return pad_img 34 | 35 | 36 | def main(source_root): 37 | 38 | dest_root = "/media/pc/6T/jasonjzhao/data/MS-Celeb-1M_Resized" 39 | mkdir(dest_root) 40 | cwd = os.getcwd() # delete '.DS_Store' existed in the source_root 41 | os.chdir(source_root) 42 | os.system("find . -name '*.DS_Store' -type f -delete") 43 | os.chdir(cwd) 44 | 45 | if not os.path.isdir(dest_root): 46 | os.mkdir(dest_root) 47 | 48 | for subfolder in tqdm(os.listdir(source_root)): 49 | if not os.path.isdir(os.path.join(dest_root, subfolder)): 50 | os.mkdir(os.path.join(dest_root, subfolder)) 51 | for image_name in os.listdir(os.path.join(source_root, subfolder)): 52 | print("Processing\t{}".format(os.path.join(source_root, subfolder, image_name))) 53 | img = cv2.imread(os.path.join(source_root, subfolder, image_name)) 54 | if type(img) == type(None): 55 | print("damaged image %s, del it" % (img)) 56 | os.remove(img) 57 | continue 58 | size = img.shape 59 | h, w = size[0], size[1] 60 | if max(w, h) > 512: 61 | img_pad = process_image(img) 62 | else: 63 | img_pad = img 64 | cv2.imwrite(os.path.join(dest_root, subfolder, image_name.split('.')[0] + '.jpg'), img_pad) 65 | 66 | 67 | if __name__ == "__main__": 68 | min_side = 512 69 | main(source_root = "/media/pc/6T/jasonjzhao/data/MS-Celeb-1M/database/base") -------------------------------------------------------------------------------- /align/face_align.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | from detector import detect_faces 3 | from align_trans import get_reference_facial_points, warp_and_crop_face 4 | import numpy as np 5 | import os 6 | from tqdm import tqdm 7 | import argparse 8 | 9 | 10 | if __name__ == '__main__': 11 | parser = argparse.ArgumentParser(description = "face alignment") 12 | parser.add_argument("-source_root", "--source_root", help = "specify your source dir", default = "./data/test", type = str) 13 | parser.add_argument("-dest_root", "--dest_root", help = "specify your destination dir", default = "./data/test_Aligned", type = str) 14 | parser.add_argument("-crop_size", "--crop_size", help = "specify size of aligned faces, align and crop with padding", default = 112, type = int) 15 | args = parser.parse_args() 16 | 17 | source_root = args.source_root # specify your source dir 18 | dest_root = args.dest_root # specify your destination dir 19 | crop_size = args.crop_size # specify size of aligned faces, align and crop with padding 20 | scale = crop_size / 112. 21 | reference = get_reference_facial_points(default_square = True) * scale 22 | 23 | cwd = os.getcwd() # delete '.DS_Store' existed in the source_root 24 | os.chdir(source_root) 25 | os.system("find . -name '*.DS_Store' -type f -delete") 26 | os.chdir(cwd) 27 | 28 | if not os.path.isdir(dest_root): 29 | os.mkdir(dest_root) 30 | 31 | for subfolder in tqdm(os.listdir(source_root)): 32 | if not os.path.isdir(os.path.join(dest_root, subfolder)): 33 | os.mkdir(os.path.join(dest_root, subfolder)) 34 | for image_name in os.listdir(os.path.join(source_root, subfolder)): 35 | print("Processing\t{}".format(os.path.join(source_root, subfolder, image_name))) 36 | img = Image.open(os.path.join(source_root, subfolder, image_name)) 37 | try: # Handle exception 38 | _, landmarks = detect_faces(img) 39 | except Exception: 40 | print("{} is discarded due to exception!".format(os.path.join(source_root, subfolder, image_name))) 41 | continue 42 | if len(landmarks) == 0: # If the landmarks cannot be detected, the img will be discarded 43 | print("{} is discarded due to non-detected landmarks!".format(os.path.join(source_root, subfolder, image_name))) 44 | continue 45 | facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)] 46 | warped_face = warp_and_crop_face(np.array(img), facial5points, reference, crop_size=(crop_size, crop_size)) 47 | img_warped = Image.fromarray(warped_face) 48 | if image_name.split('.')[-1].lower() not in ['jpg', 'jpeg']: #not from jpg 49 | image_name = '.'.join(image_name.split('.')[:-1]) + '.jpg' 50 | img_warped.save(os.path.join(dest_root, subfolder, image_name)) 51 | -------------------------------------------------------------------------------- /align/first_stage.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | import math 4 | from PIL import Image 5 | import numpy as np 6 | from box_utils import nms, _preprocess 7 | 8 | 9 | def run_first_stage(image, net, scale, threshold): 10 | """Run P-Net, generate bounding boxes, and do NMS. 11 | 12 | Arguments: 13 | image: an instance of PIL.Image. 14 | net: an instance of pytorch's nn.Module, P-Net. 15 | scale: a float number, 16 | scale width and height of the image by this number. 17 | threshold: a float number, 18 | threshold on the probability of a face when generating 19 | bounding boxes from predictions of the net. 20 | 21 | Returns: 22 | a float numpy array of shape [n_boxes, 9], 23 | bounding boxes with scores and offsets (4 + 1 + 4). 24 | """ 25 | 26 | # scale the image and convert it to a float array 27 | width, height = image.size 28 | sw, sh = math.ceil(width*scale), math.ceil(height*scale) 29 | img = image.resize((sw, sh), Image.BILINEAR) 30 | img = np.asarray(img, 'float32') 31 | 32 | img = Variable(torch.FloatTensor(_preprocess(img)), volatile = True) 33 | output = net(img) 34 | probs = output[1].data.numpy()[0, 1, :, :] 35 | offsets = output[0].data.numpy() 36 | # probs: probability of a face at each sliding window 37 | # offsets: transformations to true bounding boxes 38 | 39 | boxes = _generate_bboxes(probs, offsets, scale, threshold) 40 | if len(boxes) == 0: 41 | return None 42 | 43 | keep = nms(boxes[:, 0:5], overlap_threshold = 0.5) 44 | return boxes[keep] 45 | 46 | 47 | def _generate_bboxes(probs, offsets, scale, threshold): 48 | """Generate bounding boxes at places 49 | where there is probably a face. 50 | 51 | Arguments: 52 | probs: a float numpy array of shape [n, m]. 53 | offsets: a float numpy array of shape [1, 4, n, m]. 54 | scale: a float number, 55 | width and height of the image were scaled by this number. 56 | threshold: a float number. 57 | 58 | Returns: 59 | a float numpy array of shape [n_boxes, 9] 60 | """ 61 | 62 | # applying P-Net is equivalent, in some sense, to 63 | # moving 12x12 window with stride 2 64 | stride = 2 65 | cell_size = 12 66 | 67 | # indices of boxes where there is probably a face 68 | inds = np.where(probs > threshold) 69 | 70 | if inds[0].size == 0: 71 | return np.array([]) 72 | 73 | # transformations of bounding boxes 74 | tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)] 75 | # they are defined as: 76 | # w = x2 - x1 + 1 77 | # h = y2 - y1 + 1 78 | # x1_true = x1 + tx1*w 79 | # x2_true = x2 + tx2*w 80 | # y1_true = y1 + ty1*h 81 | # y2_true = y2 + ty2*h 82 | 83 | offsets = np.array([tx1, ty1, tx2, ty2]) 84 | score = probs[inds[0], inds[1]] 85 | 86 | # P-Net is applied to scaled images 87 | # so we need to rescale bounding boxes back 88 | bounding_boxes = np.vstack([ 89 | np.round((stride*inds[1] + 1.0)/scale), 90 | np.round((stride*inds[0] + 1.0)/scale), 91 | np.round((stride*inds[1] + 1.0 + cell_size)/scale), 92 | np.round((stride*inds[0] + 1.0 + cell_size)/scale), 93 | score, offsets 94 | ]) 95 | # why one is added? 96 | 97 | return bounding_boxes.T -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from backbone.model_irse import * 3 | 4 | 5 | def dul_args_func(): 6 | parser = argparse.ArgumentParser(description='DUL: Data Uncertainty Learning in Face Recognition') 7 | 8 | # ----- random seed for reproducing 9 | parser.add_argument('--random_seed', type=int, default=6666) 10 | 11 | # ----- directory (train & test) 12 | parser.add_argument('--trainset_folder', type=str, default='/home/huangmouxiao.hmx/data/face_rec/casia_maxpy_clean_align/') 13 | parser.add_argument('--model_save_folder', type=str, default='./checkpoints/') 14 | parser.add_argument('--log_tensorboard', type=str, default='./logtensorboard/') 15 | parser.add_argument('--logs', type=str, default='./logs/') 16 | parser.add_argument('--testset_fr_folder', type=str, default='/home/huangmouxiao.hmx/data/face_rec/usual_test/') 17 | parser.add_argument('--testset_ood_folder', type=str, default='') 18 | parser.add_argument('--model_for_test', type=str, default='') 19 | 20 | # ----- training env 21 | parser.add_argument('--multi_gpu', type=bool, default=True) 22 | parser.add_argument('--gpu_id', type=str, nargs='+') 23 | 24 | # ----- resume pretrain details 25 | parser.add_argument('--resume_backbone', type=str, default='') 26 | parser.add_argument('--resume_head', type=str, default='') 27 | parser.add_argument('--resume_epoch', type=int, default=0) 28 | 29 | # ----- model & training details 30 | parser.add_argument('--backbone_name', type=str, default='IR_SE_64_DUL') 31 | parser.add_argument('--head_name', type=str, default='ArcFace') 32 | parser.add_argument('--loss_name', type=str, default='Softmax') 33 | parser.add_argument('--optimizer', type=str, default='SGD') 34 | parser.add_argument('--arcface_scale', type=int, default=64) 35 | parser.add_argument('--input_size', type=list, default=[112, 112]) # support: [112, 112] and [224, 224] 36 | parser.add_argument('--center_crop', type=bool, default=True) 37 | parser.add_argument('--rgb_mean', type=list, default=[0.5, 0.5, 0.5]) 38 | parser.add_argument('--rgb_std', type=list, default=[0.5, 0.5, 0.5]) 39 | parser.add_argument('--embedding_size', type=int, default=512) 40 | parser.add_argument('--drop_last', type=bool, default=True) 41 | parser.add_argument('--weight_decay', type=float, default=5e-4) 42 | parser.add_argument('--momentum', type=float, default=0.9) 43 | parser.add_argument('--pin_memory', type=bool, default=True) 44 | parser.add_argument('--num_workers', type=int, default=8) 45 | 46 | # ----- hyperparameters 47 | parser.add_argument('--batch_size', type=int, default=512) 48 | parser.add_argument('--num_epoch', type=int, default=22) 49 | parser.add_argument('--warm_up_epoch', type=int, default=1) 50 | parser.add_argument('--image_noise', type=float, default=0) 51 | parser.add_argument('--lr', type=float, default=0.1) 52 | parser.add_argument('--stages', type=str, nargs='+') 53 | parser.add_argument('--kl_scale', type=float, default=0.01) 54 | 55 | args = parser.parse_args() 56 | 57 | return args 58 | 59 | dul_args = dul_args_func() 60 | 61 | Backbone_Dict = { 62 | 'IR_50': IR_50(dul_args.input_size), 63 | 'IR_101': IR_101(dul_args.input_size), 64 | 'IR_152': IR_152(dul_args.input_size), 65 | 'IR_SE_50': IR_SE_50(dul_args.input_size), 66 | 'IR_SE_64_DUL': IR_SE_64_DUL(dul_args.input_size), 67 | 'IR_SE_101': IR_SE_101(dul_args.input_size), 68 | 'IR_SE_152': IR_SE_152(dul_args.input_size) 69 | } 70 | 71 | Test_FR_Data_Dict = { 72 | 'lfw': 'lfw', 73 | 'cfp_ff': 'cfp_ff', 74 | 'cfp_fp': 'cfp_fp', 75 | 'agedb': 'agedb_30', 76 | 'calfw': 'calfw', 77 | 'cplfw': 'cplfw', 78 | 'vgg2_fp': 'vgg2_fp' 79 | } -------------------------------------------------------------------------------- /align/detector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import Variable 4 | from get_nets import PNet, RNet, ONet 5 | from box_utils import nms, calibrate_box, get_image_boxes, convert_to_square 6 | from first_stage import run_first_stage 7 | 8 | 9 | def detect_faces(image, min_face_size = 20.0, 10 | thresholds=[0.6, 0.7, 0.8], 11 | nms_thresholds=[0.7, 0.7, 0.7]): 12 | """ 13 | Arguments: 14 | image: an instance of PIL.Image. 15 | min_face_size: a float number. 16 | thresholds: a list of length 3. 17 | nms_thresholds: a list of length 3. 18 | 19 | Returns: 20 | two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10], 21 | bounding boxes and facial landmarks. 22 | """ 23 | 24 | # LOAD MODELS 25 | pnet = PNet() 26 | rnet = RNet() 27 | onet = ONet() 28 | onet.eval() 29 | 30 | # BUILD AN IMAGE PYRAMID 31 | width, height = image.size 32 | min_length = min(height, width) 33 | 34 | min_detection_size = 12 35 | factor = 0.707 # sqrt(0.5) 36 | 37 | # scales for scaling the image 38 | scales = [] 39 | 40 | # scales the image so that 41 | # minimum size that we can detect equals to 42 | # minimum face size that we want to detect 43 | m = min_detection_size/min_face_size 44 | min_length *= m 45 | 46 | factor_count = 0 47 | while min_length > min_detection_size: 48 | scales.append(m*factor**factor_count) 49 | min_length *= factor 50 | factor_count += 1 51 | 52 | # STAGE 1 53 | 54 | # it will be returned 55 | bounding_boxes = [] 56 | 57 | # run P-Net on different scales 58 | for s in scales: 59 | boxes = run_first_stage(image, pnet, scale = s, threshold = thresholds[0]) 60 | bounding_boxes.append(boxes) 61 | 62 | # collect boxes (and offsets, and scores) from different scales 63 | bounding_boxes = [i for i in bounding_boxes if i is not None] 64 | bounding_boxes = np.vstack(bounding_boxes) 65 | 66 | keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0]) 67 | bounding_boxes = bounding_boxes[keep] 68 | 69 | # use offsets predicted by pnet to transform bounding boxes 70 | bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) 71 | # shape [n_boxes, 5] 72 | 73 | bounding_boxes = convert_to_square(bounding_boxes) 74 | bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) 75 | 76 | # STAGE 2 77 | 78 | img_boxes = get_image_boxes(bounding_boxes, image, size = 24) 79 | img_boxes = Variable(torch.FloatTensor(img_boxes), volatile = True) 80 | output = rnet(img_boxes) 81 | offsets = output[0].data.numpy() # shape [n_boxes, 4] 82 | probs = output[1].data.numpy() # shape [n_boxes, 2] 83 | 84 | keep = np.where(probs[:, 1] > thresholds[1])[0] 85 | bounding_boxes = bounding_boxes[keep] 86 | bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) 87 | offsets = offsets[keep] 88 | 89 | keep = nms(bounding_boxes, nms_thresholds[1]) 90 | bounding_boxes = bounding_boxes[keep] 91 | bounding_boxes = calibrate_box(bounding_boxes, offsets[keep]) 92 | bounding_boxes = convert_to_square(bounding_boxes) 93 | bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) 94 | 95 | # STAGE 3 96 | 97 | img_boxes = get_image_boxes(bounding_boxes, image, size = 48) 98 | if len(img_boxes) == 0: 99 | return [], [] 100 | img_boxes = Variable(torch.FloatTensor(img_boxes), volatile = True) 101 | output = onet(img_boxes) 102 | landmarks = output[0].data.numpy() # shape [n_boxes, 10] 103 | offsets = output[1].data.numpy() # shape [n_boxes, 4] 104 | probs = output[2].data.numpy() # shape [n_boxes, 2] 105 | 106 | keep = np.where(probs[:, 1] > thresholds[2])[0] 107 | bounding_boxes = bounding_boxes[keep] 108 | bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) 109 | offsets = offsets[keep] 110 | landmarks = landmarks[keep] 111 | 112 | # compute landmark points 113 | width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 114 | height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 115 | xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] 116 | landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1)*landmarks[:, 0:5] 117 | landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1)*landmarks[:, 5:10] 118 | 119 | bounding_boxes = calibrate_box(bounding_boxes, offsets) 120 | keep = nms(bounding_boxes, nms_thresholds[2], mode = 'min') 121 | bounding_boxes = bounding_boxes[keep] 122 | landmarks = landmarks[keep] 123 | 124 | return bounding_boxes, landmarks 125 | -------------------------------------------------------------------------------- /align/get_nets.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | import numpy as np 6 | 7 | 8 | class Flatten(nn.Module): 9 | 10 | def __init__(self): 11 | super(Flatten, self).__init__() 12 | 13 | def forward(self, x): 14 | """ 15 | Arguments: 16 | x: a float tensor with shape [batch_size, c, h, w]. 17 | Returns: 18 | a float tensor with shape [batch_size, c*h*w]. 19 | """ 20 | 21 | # without this pretrained model isn't working 22 | x = x.transpose(3, 2).contiguous() 23 | 24 | return x.view(x.size(0), -1) 25 | 26 | 27 | class PNet(nn.Module): 28 | 29 | def __init__(self): 30 | 31 | super(PNet, self).__init__() 32 | 33 | # suppose we have input with size HxW, then 34 | # after first layer: H - 2, 35 | # after pool: ceil((H - 2)/2), 36 | # after second conv: ceil((H - 2)/2) - 2, 37 | # after last conv: ceil((H - 2)/2) - 4, 38 | # and the same for W 39 | 40 | self.features = nn.Sequential(OrderedDict([ 41 | ('conv1', nn.Conv2d(3, 10, 3, 1)), 42 | ('prelu1', nn.PReLU(10)), 43 | ('pool1', nn.MaxPool2d(2, 2, ceil_mode = True)), 44 | 45 | ('conv2', nn.Conv2d(10, 16, 3, 1)), 46 | ('prelu2', nn.PReLU(16)), 47 | 48 | ('conv3', nn.Conv2d(16, 32, 3, 1)), 49 | ('prelu3', nn.PReLU(32)) 50 | ])) 51 | 52 | self.conv4_1 = nn.Conv2d(32, 2, 1, 1) 53 | self.conv4_2 = nn.Conv2d(32, 4, 1, 1) 54 | 55 | weights = np.load("./pnet.npy", allow_pickle=True)[()] 56 | for n, p in self.named_parameters(): 57 | p.data = torch.FloatTensor(weights[n]) 58 | 59 | def forward(self, x): 60 | """ 61 | Arguments: 62 | x: a float tensor with shape [batch_size, 3, h, w]. 63 | Returns: 64 | b: a float tensor with shape [batch_size, 4, h', w']. 65 | a: a float tensor with shape [batch_size, 2, h', w']. 66 | """ 67 | x = self.features(x) 68 | a = self.conv4_1(x) 69 | b = self.conv4_2(x) 70 | a = F.softmax(a) 71 | return b, a 72 | 73 | 74 | class RNet(nn.Module): 75 | 76 | def __init__(self): 77 | 78 | super(RNet, self).__init__() 79 | 80 | self.features = nn.Sequential(OrderedDict([ 81 | ('conv1', nn.Conv2d(3, 28, 3, 1)), 82 | ('prelu1', nn.PReLU(28)), 83 | ('pool1', nn.MaxPool2d(3, 2, ceil_mode = True)), 84 | 85 | ('conv2', nn.Conv2d(28, 48, 3, 1)), 86 | ('prelu2', nn.PReLU(48)), 87 | ('pool2', nn.MaxPool2d(3, 2, ceil_mode = True)), 88 | 89 | ('conv3', nn.Conv2d(48, 64, 2, 1)), 90 | ('prelu3', nn.PReLU(64)), 91 | 92 | ('flatten', Flatten()), 93 | ('conv4', nn.Linear(576, 128)), 94 | ('prelu4', nn.PReLU(128)) 95 | ])) 96 | 97 | self.conv5_1 = nn.Linear(128, 2) 98 | self.conv5_2 = nn.Linear(128, 4) 99 | 100 | weights = np.load("./rnet.npy", allow_pickle=True)[()] 101 | for n, p in self.named_parameters(): 102 | p.data = torch.FloatTensor(weights[n]) 103 | 104 | def forward(self, x): 105 | """ 106 | Arguments: 107 | x: a float tensor with shape [batch_size, 3, h, w]. 108 | Returns: 109 | b: a float tensor with shape [batch_size, 4]. 110 | a: a float tensor with shape [batch_size, 2]. 111 | """ 112 | x = self.features(x) 113 | a = self.conv5_1(x) 114 | b = self.conv5_2(x) 115 | a = F.softmax(a) 116 | return b, a 117 | 118 | 119 | class ONet(nn.Module): 120 | 121 | def __init__(self): 122 | 123 | super(ONet, self).__init__() 124 | 125 | self.features = nn.Sequential(OrderedDict([ 126 | ('conv1', nn.Conv2d(3, 32, 3, 1)), 127 | ('prelu1', nn.PReLU(32)), 128 | ('pool1', nn.MaxPool2d(3, 2, ceil_mode = True)), 129 | 130 | ('conv2', nn.Conv2d(32, 64, 3, 1)), 131 | ('prelu2', nn.PReLU(64)), 132 | ('pool2', nn.MaxPool2d(3, 2, ceil_mode = True)), 133 | 134 | ('conv3', nn.Conv2d(64, 64, 3, 1)), 135 | ('prelu3', nn.PReLU(64)), 136 | ('pool3', nn.MaxPool2d(2, 2, ceil_mode = True)), 137 | 138 | ('conv4', nn.Conv2d(64, 128, 2, 1)), 139 | ('prelu4', nn.PReLU(128)), 140 | 141 | ('flatten', Flatten()), 142 | ('conv5', nn.Linear(1152, 256)), 143 | ('drop5', nn.Dropout(0.25)), 144 | ('prelu5', nn.PReLU(256)), 145 | ])) 146 | 147 | self.conv6_1 = nn.Linear(256, 2) 148 | self.conv6_2 = nn.Linear(256, 4) 149 | self.conv6_3 = nn.Linear(256, 10) 150 | 151 | weights = np.load("./onet.npy", allow_pickle=True)[()] 152 | for n, p in self.named_parameters(): 153 | p.data = torch.FloatTensor(weights[n]) 154 | 155 | def forward(self, x): 156 | """ 157 | Arguments: 158 | x: a float tensor with shape [batch_size, 3, h, w]. 159 | Returns: 160 | c: a float tensor with shape [batch_size, 10]. 161 | b: a float tensor with shape [batch_size, 4]. 162 | a: a float tensor with shape [batch_size, 2]. 163 | """ 164 | x = self.features(x) 165 | a = self.conv6_1(x) 166 | b = self.conv6_2(x) 167 | c = self.conv6_3(x) 168 | a = F.softmax(a) 169 | return c, b, a -------------------------------------------------------------------------------- /align/box_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | 4 | 5 | def nms(boxes, overlap_threshold = 0.5, mode = 'union'): 6 | """Non-maximum suppression. 7 | 8 | Arguments: 9 | boxes: a float numpy array of shape [n, 5], 10 | where each row is (xmin, ymin, xmax, ymax, score). 11 | overlap_threshold: a float number. 12 | mode: 'union' or 'min'. 13 | 14 | Returns: 15 | list with indices of the selected boxes 16 | """ 17 | 18 | # if there are no boxes, return the empty list 19 | if len(boxes) == 0: 20 | return [] 21 | 22 | # list of picked indices 23 | pick = [] 24 | 25 | # grab the coordinates of the bounding boxes 26 | x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)] 27 | 28 | area = (x2 - x1 + 1.0)*(y2 - y1 + 1.0) 29 | ids = np.argsort(score) # in increasing order 30 | 31 | while len(ids) > 0: 32 | 33 | # grab index of the largest value 34 | last = len(ids) - 1 35 | i = ids[last] 36 | pick.append(i) 37 | 38 | # compute intersections 39 | # of the box with the largest score 40 | # with the rest of boxes 41 | 42 | # left top corner of intersection boxes 43 | ix1 = np.maximum(x1[i], x1[ids[:last]]) 44 | iy1 = np.maximum(y1[i], y1[ids[:last]]) 45 | 46 | # right bottom corner of intersection boxes 47 | ix2 = np.minimum(x2[i], x2[ids[:last]]) 48 | iy2 = np.minimum(y2[i], y2[ids[:last]]) 49 | 50 | # width and height of intersection boxes 51 | w = np.maximum(0.0, ix2 - ix1 + 1.0) 52 | h = np.maximum(0.0, iy2 - iy1 + 1.0) 53 | 54 | # intersections' areas 55 | inter = w * h 56 | if mode == 'min': 57 | overlap = inter/np.minimum(area[i], area[ids[:last]]) 58 | elif mode == 'union': 59 | # intersection over union (IoU) 60 | overlap = inter/(area[i] + area[ids[:last]] - inter) 61 | 62 | # delete all boxes where overlap is too big 63 | ids = np.delete( 64 | ids, 65 | np.concatenate([[last], np.where(overlap > overlap_threshold)[0]]) 66 | ) 67 | 68 | return pick 69 | 70 | 71 | def convert_to_square(bboxes): 72 | """Convert bounding boxes to a square form. 73 | 74 | Arguments: 75 | bboxes: a float numpy array of shape [n, 5]. 76 | 77 | Returns: 78 | a float numpy array of shape [n, 5], 79 | squared bounding boxes. 80 | """ 81 | 82 | square_bboxes = np.zeros_like(bboxes) 83 | x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] 84 | h = y2 - y1 + 1.0 85 | w = x2 - x1 + 1.0 86 | max_side = np.maximum(h, w) 87 | square_bboxes[:, 0] = x1 + w*0.5 - max_side*0.5 88 | square_bboxes[:, 1] = y1 + h*0.5 - max_side*0.5 89 | square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0 90 | square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0 91 | return square_bboxes 92 | 93 | 94 | def calibrate_box(bboxes, offsets): 95 | """Transform bounding boxes to be more like true bounding boxes. 96 | 'offsets' is one of the outputs of the nets. 97 | 98 | Arguments: 99 | bboxes: a float numpy array of shape [n, 5]. 100 | offsets: a float numpy array of shape [n, 4]. 101 | 102 | Returns: 103 | a float numpy array of shape [n, 5]. 104 | """ 105 | x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] 106 | w = x2 - x1 + 1.0 107 | h = y2 - y1 + 1.0 108 | w = np.expand_dims(w, 1) 109 | h = np.expand_dims(h, 1) 110 | 111 | # this is what happening here: 112 | # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)] 113 | # x1_true = x1 + tx1*w 114 | # y1_true = y1 + ty1*h 115 | # x2_true = x2 + tx2*w 116 | # y2_true = y2 + ty2*h 117 | # below is just more compact form of this 118 | 119 | # are offsets always such that 120 | # x1 < x2 and y1 < y2 ? 121 | 122 | translation = np.hstack([w, h, w, h])*offsets 123 | bboxes[:, 0:4] = bboxes[:, 0:4] + translation 124 | return bboxes 125 | 126 | 127 | def get_image_boxes(bounding_boxes, img, size = 24): 128 | """Cut out boxes from the image. 129 | 130 | Arguments: 131 | bounding_boxes: a float numpy array of shape [n, 5]. 132 | img: an instance of PIL.Image. 133 | size: an integer, size of cutouts. 134 | 135 | Returns: 136 | a float numpy array of shape [n, 3, size, size]. 137 | """ 138 | 139 | num_boxes = len(bounding_boxes) 140 | width, height = img.size 141 | 142 | [dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bounding_boxes, width, height) 143 | img_boxes = np.zeros((num_boxes, 3, size, size), 'float32') 144 | 145 | for i in range(num_boxes): 146 | img_box = np.zeros((h[i], w[i], 3), 'uint8') 147 | 148 | img_array = np.asarray(img, 'uint8') 149 | img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\ 150 | img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :] 151 | 152 | # resize 153 | img_box = Image.fromarray(img_box) 154 | img_box = img_box.resize((size, size), Image.BILINEAR) 155 | img_box = np.asarray(img_box, 'float32') 156 | 157 | img_boxes[i, :, :, :] = _preprocess(img_box) 158 | 159 | return img_boxes 160 | 161 | 162 | def correct_bboxes(bboxes, width, height): 163 | """Crop boxes that are too big and get coordinates 164 | with respect to cutouts. 165 | 166 | Arguments: 167 | bboxes: a float numpy array of shape [n, 5], 168 | where each row is (xmin, ymin, xmax, ymax, score). 169 | width: a float number. 170 | height: a float number. 171 | 172 | Returns: 173 | dy, dx, edy, edx: a int numpy arrays of shape [n], 174 | coordinates of the boxes with respect to the cutouts. 175 | y, x, ey, ex: a int numpy arrays of shape [n], 176 | corrected ymin, xmin, ymax, xmax. 177 | h, w: a int numpy arrays of shape [n], 178 | just heights and widths of boxes. 179 | 180 | in the following order: 181 | [dy, edy, dx, edx, y, ey, x, ex, w, h]. 182 | """ 183 | 184 | x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] 185 | w, h = x2 - x1 + 1.0, y2 - y1 + 1.0 186 | num_boxes = bboxes.shape[0] 187 | 188 | # 'e' stands for end 189 | # (x, y) -> (ex, ey) 190 | x, y, ex, ey = x1, y1, x2, y2 191 | 192 | # we need to cut out a box from the image. 193 | # (x, y, ex, ey) are corrected coordinates of the box 194 | # in the image. 195 | # (dx, dy, edx, edy) are coordinates of the box in the cutout 196 | # from the image. 197 | dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,)) 198 | edx, edy = w.copy() - 1.0, h.copy() - 1.0 199 | 200 | # if box's bottom right corner is too far right 201 | ind = np.where(ex > width - 1.0)[0] 202 | edx[ind] = w[ind] + width - 2.0 - ex[ind] 203 | ex[ind] = width - 1.0 204 | 205 | # if box's bottom right corner is too low 206 | ind = np.where(ey > height - 1.0)[0] 207 | edy[ind] = h[ind] + height - 2.0 - ey[ind] 208 | ey[ind] = height - 1.0 209 | 210 | # if box's top left corner is too far left 211 | ind = np.where(x < 0.0)[0] 212 | dx[ind] = 0.0 - x[ind] 213 | x[ind] = 0.0 214 | 215 | # if box's top left corner is too high 216 | ind = np.where(y < 0.0)[0] 217 | dy[ind] = 0.0 - y[ind] 218 | y[ind] = 0.0 219 | 220 | return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h] 221 | return_list = [i.astype('int32') for i in return_list] 222 | 223 | return return_list 224 | 225 | 226 | def _preprocess(img): 227 | """Preprocessing step before feeding the network. 228 | 229 | Arguments: 230 | img: a float numpy array of shape [h, w, c]. 231 | 232 | Returns: 233 | a float numpy array of shape [1, c, h, w]. 234 | """ 235 | img = img.transpose((2, 0, 1)) 236 | img = np.expand_dims(img, 0) 237 | img = (img - 127.5) * 0.0078125 238 | return img 239 | -------------------------------------------------------------------------------- /util/verification.py: -------------------------------------------------------------------------------- 1 | """Helper for evaluation on the Labeled Faces in the Wild dataset 2 | """ 3 | 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 David Sandberg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | import numpy as np 27 | from sklearn.model_selection import KFold 28 | from sklearn.decomposition import PCA 29 | import sklearn 30 | from scipy import interpolate 31 | from scipy.spatial.distance import pdist 32 | 33 | 34 | # Support: ['calculate_roc', 'calculate_accuracy', 'calculate_val', 'calculate_val_far', 'evaluate'] 35 | 36 | 37 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds = 10, pca = 0): 38 | assert (embeddings1.shape[0] == embeddings2.shape[0]) 39 | assert (embeddings1.shape[1] == embeddings2.shape[1]) 40 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 41 | nrof_thresholds = len(thresholds) 42 | k_fold = KFold(n_splits = nrof_folds, shuffle = False) 43 | 44 | tprs = np.zeros((nrof_folds, nrof_thresholds)) 45 | fprs = np.zeros((nrof_folds, nrof_thresholds)) 46 | accuracy = np.zeros((nrof_folds)) 47 | best_thresholds = np.zeros((nrof_folds)) 48 | indices = np.arange(nrof_pairs) 49 | # print('pca', pca) 50 | 51 | if pca == 0: 52 | diff = np.subtract(embeddings1, embeddings2) 53 | dist = np.sum(np.square(diff), 1) 54 | # dist = pdist(np.vstack([embeddings1, embeddings2]), 'cosine') 55 | 56 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 57 | # print('train_set', train_set) 58 | # print('test_set', test_set) 59 | if pca > 0: 60 | print("doing pca on", fold_idx) 61 | embed1_train = embeddings1[train_set] 62 | embed2_train = embeddings2[train_set] 63 | _embed_train = np.concatenate((embed1_train, embed2_train), axis = 0) 64 | # print(_embed_train.shape) 65 | pca_model = PCA(n_components = pca) 66 | pca_model.fit(_embed_train) 67 | embed1 = pca_model.transform(embeddings1) 68 | embed2 = pca_model.transform(embeddings2) 69 | embed1 = sklearn.preprocessing.normalize(embed1) 70 | embed2 = sklearn.preprocessing.normalize(embed2) 71 | # print(embed1.shape, embed2.shape) 72 | diff = np.subtract(embed1, embed2) 73 | dist = np.sum(np.square(diff), 1) 74 | 75 | # Find the best threshold for the fold 76 | acc_train = np.zeros((nrof_thresholds)) 77 | for threshold_idx, threshold in enumerate(thresholds): 78 | _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) 79 | best_threshold_index = np.argmax(acc_train) 80 | # print('best_threshold_index', best_threshold_index, acc_train[best_threshold_index]) 81 | best_thresholds[fold_idx] = thresholds[best_threshold_index] 82 | for threshold_idx, threshold in enumerate(thresholds): 83 | tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(threshold, 84 | dist[test_set], 85 | actual_issame[ 86 | test_set]) 87 | _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) 88 | 89 | tpr = np.mean(tprs, 0) 90 | fpr = np.mean(fprs, 0) 91 | return tpr, fpr, accuracy, best_thresholds 92 | 93 | 94 | def calculate_accuracy(threshold, dist, actual_issame): 95 | predict_issame = np.less(dist, threshold) 96 | tp = np.sum(np.logical_and(predict_issame, actual_issame)) 97 | fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 98 | tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) 99 | fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) 100 | 101 | tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn) 102 | fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn) 103 | acc = float(tp + tn) / dist.size 104 | return tpr, fpr, acc 105 | 106 | 107 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds = 10): 108 | ''' 109 | Copy from [insightface](https://github.com/deepinsight/insightface) 110 | :param thresholds: 111 | :param embeddings1: 112 | :param embeddings2: 113 | :param actual_issame: 114 | :param far_target: 115 | :param nrof_folds: 116 | :return: 117 | ''' 118 | assert (embeddings1.shape[0] == embeddings2.shape[0]) 119 | assert (embeddings1.shape[1] == embeddings2.shape[1]) 120 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 121 | nrof_thresholds = len(thresholds) 122 | k_fold = KFold(n_splits = nrof_folds, shuffle = False) 123 | 124 | val = np.zeros(nrof_folds) 125 | far = np.zeros(nrof_folds) 126 | 127 | diff = np.subtract(embeddings1, embeddings2) 128 | dist = np.sum(np.square(diff), 1) 129 | indices = np.arange(nrof_pairs) 130 | 131 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 132 | 133 | # Find the threshold that gives FAR = far_target 134 | far_train = np.zeros(nrof_thresholds) 135 | for threshold_idx, threshold in enumerate(thresholds): 136 | _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) 137 | if np.max(far_train) >= far_target: 138 | f = interpolate.interp1d(far_train, thresholds, kind = 'slinear') 139 | threshold = f(far_target) 140 | else: 141 | threshold = 0.0 142 | 143 | val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) 144 | 145 | val_mean = np.mean(val) 146 | far_mean = np.mean(far) 147 | val_std = np.std(val) 148 | return val_mean, val_std, far_mean 149 | 150 | 151 | def calculate_val_far(threshold, dist, actual_issame): 152 | predict_issame = np.less(dist, threshold) 153 | true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) 154 | false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 155 | n_same = np.sum(actual_issame) 156 | n_diff = np.sum(np.logical_not(actual_issame)) 157 | val = float(true_accept) / float(n_same) 158 | far = float(false_accept) / float(n_diff) 159 | return val, far 160 | 161 | 162 | def evaluate(embeddings, actual_issame, nrof_folds = 10, pca = 0): 163 | # Calculate evaluation metrics 164 | thresholds = np.arange(0, 4, 0.01) 165 | embeddings1 = embeddings[0::2] 166 | embeddings2 = embeddings[1::2] 167 | tpr, fpr, accuracy, best_thresholds = calculate_roc(thresholds, embeddings1, embeddings2, np.asarray(actual_issame), nrof_folds = nrof_folds, pca = pca) 168 | # thresholds = np.arange(0, 4, 0.001) 169 | # val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2, 170 | # np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds) 171 | # return tpr, fpr, accuracy, best_thresholds, val, val_std, far 172 | return tpr, fpr, accuracy, best_thresholds 173 | -------------------------------------------------------------------------------- /align/matlab_cp2tform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import inv, norm, lstsq 3 | from numpy.linalg import matrix_rank as rank 4 | 5 | 6 | class MatlabCp2tormException(Exception): 7 | def __str__(self): 8 | return "In File {}:{}".format( 9 | __file__, super.__str__(self)) 10 | 11 | def tformfwd(trans, uv): 12 | """ 13 | Function: 14 | ---------- 15 | apply affine transform 'trans' to uv 16 | 17 | Parameters: 18 | ---------- 19 | @trans: 3x3 np.array 20 | transform matrix 21 | @uv: Kx2 np.array 22 | each row is a pair of coordinates (x, y) 23 | 24 | Returns: 25 | ---------- 26 | @xy: Kx2 np.array 27 | each row is a pair of transformed coordinates (x, y) 28 | """ 29 | uv = np.hstack(( 30 | uv, np.ones((uv.shape[0], 1)) 31 | )) 32 | xy = np.dot(uv, trans) 33 | xy = xy[:, 0:-1] 34 | return xy 35 | 36 | 37 | def tforminv(trans, uv): 38 | """ 39 | Function: 40 | ---------- 41 | apply the inverse of affine transform 'trans' to uv 42 | 43 | Parameters: 44 | ---------- 45 | @trans: 3x3 np.array 46 | transform matrix 47 | @uv: Kx2 np.array 48 | each row is a pair of coordinates (x, y) 49 | 50 | Returns: 51 | ---------- 52 | @xy: Kx2 np.array 53 | each row is a pair of inverse-transformed coordinates (x, y) 54 | """ 55 | Tinv = inv(trans) 56 | xy = tformfwd(Tinv, uv) 57 | return xy 58 | 59 | 60 | def findNonreflectiveSimilarity(uv, xy, options=None): 61 | 62 | options = {'K': 2} 63 | 64 | K = options['K'] 65 | M = xy.shape[0] 66 | x = xy[:, 0].reshape((-1, 1)) # use reshape to keep a column vector 67 | y = xy[:, 1].reshape((-1, 1)) # use reshape to keep a column vector 68 | # print('--->x, y:\n', x, y 69 | 70 | tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1)))) 71 | tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1)))) 72 | X = np.vstack((tmp1, tmp2)) 73 | # print('--->X.shape: ', X.shape 74 | # print('X:\n', X 75 | 76 | u = uv[:, 0].reshape((-1, 1)) # use reshape to keep a column vector 77 | v = uv[:, 1].reshape((-1, 1)) # use reshape to keep a column vector 78 | U = np.vstack((u, v)) 79 | # print('--->U.shape: ', U.shape 80 | # print('U:\n', U 81 | 82 | # We know that X * r = U 83 | if rank(X) >= 2 * K: 84 | r, _, _, _ = lstsq(X, U) 85 | r = np.squeeze(r) 86 | else: 87 | raise Exception("cp2tform: two Unique Points Req") 88 | 89 | # print('--->r:\n', r 90 | 91 | sc = r[0] 92 | ss = r[1] 93 | tx = r[2] 94 | ty = r[3] 95 | 96 | Tinv = np.array([ 97 | [sc, -ss, 0], 98 | [ss, sc, 0], 99 | [tx, ty, 1] 100 | ]) 101 | 102 | # print('--->Tinv:\n', Tinv 103 | 104 | T = inv(Tinv) 105 | # print('--->T:\n', T 106 | 107 | T[:, 2] = np.array([0, 0, 1]) 108 | 109 | return T, Tinv 110 | 111 | 112 | def findSimilarity(uv, xy, options=None): 113 | 114 | options = {'K': 2} 115 | 116 | # uv = np.array(uv) 117 | # xy = np.array(xy) 118 | 119 | # Solve for trans1 120 | trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options) 121 | 122 | # Solve for trans2 123 | 124 | # manually reflect the xy data across the Y-axis 125 | xyR = xy 126 | xyR[:, 0] = -1 * xyR[:, 0] 127 | 128 | trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options) 129 | 130 | # manually reflect the tform to undo the reflection done on xyR 131 | TreflectY = np.array([ 132 | [-1, 0, 0], 133 | [0, 1, 0], 134 | [0, 0, 1] 135 | ]) 136 | 137 | trans2 = np.dot(trans2r, TreflectY) 138 | 139 | # Figure out if trans1 or trans2 is better 140 | xy1 = tformfwd(trans1, uv) 141 | norm1 = norm(xy1 - xy) 142 | 143 | xy2 = tformfwd(trans2, uv) 144 | norm2 = norm(xy2 - xy) 145 | 146 | if norm1 <= norm2: 147 | return trans1, trans1_inv 148 | else: 149 | trans2_inv = inv(trans2) 150 | return trans2, trans2_inv 151 | 152 | 153 | def get_similarity_transform(src_pts, dst_pts, reflective = True): 154 | """ 155 | Function: 156 | ---------- 157 | Find Similarity Transform Matrix 'trans': 158 | u = src_pts[:, 0] 159 | v = src_pts[:, 1] 160 | x = dst_pts[:, 0] 161 | y = dst_pts[:, 1] 162 | [x, y, 1] = [u, v, 1] * trans 163 | 164 | Parameters: 165 | ---------- 166 | @src_pts: Kx2 np.array 167 | source points, each row is a pair of coordinates (x, y) 168 | @dst_pts: Kx2 np.array 169 | destination points, each row is a pair of transformed 170 | coordinates (x, y) 171 | @reflective: True or False 172 | if True: 173 | use reflective similarity transform 174 | else: 175 | use non-reflective similarity transform 176 | 177 | Returns: 178 | ---------- 179 | @trans: 3x3 np.array 180 | transform matrix from uv to xy 181 | trans_inv: 3x3 np.array 182 | inverse of trans, transform matrix from xy to uv 183 | """ 184 | 185 | if reflective: 186 | trans, trans_inv = findSimilarity(src_pts, dst_pts) 187 | else: 188 | trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts) 189 | 190 | return trans, trans_inv 191 | 192 | 193 | def cvt_tform_mat_for_cv2(trans): 194 | """ 195 | Function: 196 | ---------- 197 | Convert Transform Matrix 'trans' into 'cv2_trans' which could be 198 | directly used by cv2.warpAffine(): 199 | u = src_pts[:, 0] 200 | v = src_pts[:, 1] 201 | x = dst_pts[:, 0] 202 | y = dst_pts[:, 1] 203 | [x, y].T = cv_trans * [u, v, 1].T 204 | 205 | Parameters: 206 | ---------- 207 | @trans: 3x3 np.array 208 | transform matrix from uv to xy 209 | 210 | Returns: 211 | ---------- 212 | @cv2_trans: 2x3 np.array 213 | transform matrix from src_pts to dst_pts, could be directly used 214 | for cv2.warpAffine() 215 | """ 216 | cv2_trans = trans[:, 0:2].T 217 | 218 | return cv2_trans 219 | 220 | 221 | def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective = True): 222 | """ 223 | Function: 224 | ---------- 225 | Find Similarity Transform Matrix 'cv2_trans' which could be 226 | directly used by cv2.warpAffine(): 227 | u = src_pts[:, 0] 228 | v = src_pts[:, 1] 229 | x = dst_pts[:, 0] 230 | y = dst_pts[:, 1] 231 | [x, y].T = cv_trans * [u, v, 1].T 232 | 233 | Parameters: 234 | ---------- 235 | @src_pts: Kx2 np.array 236 | source points, each row is a pair of coordinates (x, y) 237 | @dst_pts: Kx2 np.array 238 | destination points, each row is a pair of transformed 239 | coordinates (x, y) 240 | reflective: True or False 241 | if True: 242 | use reflective similarity transform 243 | else: 244 | use non-reflective similarity transform 245 | 246 | Returns: 247 | ---------- 248 | @cv2_trans: 2x3 np.array 249 | transform matrix from src_pts to dst_pts, could be directly used 250 | for cv2.warpAffine() 251 | """ 252 | trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective) 253 | cv2_trans = cvt_tform_mat_for_cv2(trans) 254 | 255 | return cv2_trans 256 | 257 | 258 | if __name__ == '__main__': 259 | """ 260 | u = [0, 6, -2] 261 | v = [0, 3, 5] 262 | x = [-1, 0, 4] 263 | y = [-1, -10, 4] 264 | 265 | # In Matlab, run: 266 | # 267 | # uv = [u'; v']; 268 | # xy = [x'; y']; 269 | # tform_sim=cp2tform(uv,xy,'similarity'); 270 | # 271 | # trans = tform_sim.tdata.T 272 | # ans = 273 | # -0.0764 -1.6190 0 274 | # 1.6190 -0.0764 0 275 | # -3.2156 0.0290 1.0000 276 | # trans_inv = tform_sim.tdata.Tinv 277 | # ans = 278 | # 279 | # -0.0291 0.6163 0 280 | # -0.6163 -0.0291 0 281 | # -0.0756 1.9826 1.0000 282 | # xy_m=tformfwd(tform_sim, u,v) 283 | # 284 | # xy_m = 285 | # 286 | # -3.2156 0.0290 287 | # 1.1833 -9.9143 288 | # 5.0323 2.8853 289 | # uv_m=tforminv(tform_sim, x,y) 290 | # 291 | # uv_m = 292 | # 293 | # 0.5698 1.3953 294 | # 6.0872 2.2733 295 | # -2.6570 4.3314 296 | """ 297 | u = [0, 6, -2] 298 | v = [0, 3, 5] 299 | x = [-1, 0, 4] 300 | y = [-1, -10, 4] 301 | 302 | uv = np.array((u, v)).T 303 | xy = np.array((x, y)).T 304 | 305 | print("\n--->uv:") 306 | print(uv) 307 | print("\n--->xy:") 308 | print(xy) 309 | 310 | trans, trans_inv = get_similarity_transform(uv, xy) 311 | 312 | print("\n--->trans matrix:") 313 | print(trans) 314 | 315 | print("\n--->trans_inv matrix:") 316 | print(trans_inv) 317 | 318 | print("\n---> apply transform to uv") 319 | print("\nxy_m = uv_augmented * trans") 320 | uv_aug = np.hstack(( 321 | uv, np.ones((uv.shape[0], 1)) 322 | )) 323 | xy_m = np.dot(uv_aug, trans) 324 | print(xy_m) 325 | 326 | print("\nxy_m = tformfwd(trans, uv)") 327 | xy_m = tformfwd(trans, uv) 328 | print(xy_m) 329 | 330 | print("\n---> apply inverse transform to xy") 331 | print("\nuv_m = xy_augmented * trans_inv") 332 | xy_aug = np.hstack(( 333 | xy, np.ones((xy.shape[0], 1)) 334 | )) 335 | uv_m = np.dot(xy_aug, trans_inv) 336 | print(uv_m) 337 | 338 | print("\nuv_m = tformfwd(trans_inv, xy)") 339 | uv_m = tformfwd(trans_inv, xy) 340 | print(uv_m) 341 | 342 | uv_m = tforminv(trans, xy) 343 | print("\nuv_m = tforminv(trans, xy)") 344 | print(uv_m) -------------------------------------------------------------------------------- /backbone/model_irse.py: -------------------------------------------------------------------------------- 1 | from math import lgamma 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, Dropout, MaxPool2d, \ 5 | AdaptiveAvgPool2d, Sequential, Module 6 | from collections import namedtuple 7 | 8 | from torch.nn.modules.flatten import Flatten 9 | 10 | 11 | # Support: ['IR_50', 'IR_101', 'IR_152', 'IR_SE_50', 'IR_SE_101', 'IR_SE_152', \ 12 | # 'IR_SE_64_DUL(for DUL)'] 13 | 14 | 15 | class DUL_Backbone(nn.Module): 16 | def __init__(self, resnet): 17 | super(DUL_Backbone, self).__init__() 18 | 19 | self.features = nn.Sequential( 20 | resnet.input_layer, 21 | resnet.body, 22 | Sequential(BatchNorm2d(512), 23 | Dropout(), 24 | Flatten(), 25 | ) 26 | ) 27 | self.mu_dul_backbone = nn.Sequential( 28 | Linear(512 * 7 * 7, 512), 29 | BatchNorm1d(512), 30 | ) 31 | self.logvar_dul_backbone = nn.Sequential( 32 | Linear(512 * 7 * 7, 512), 33 | BatchNorm1d(512), 34 | ) 35 | 36 | def forward(self, img): 37 | x = self.features(img) 38 | mu_dul = self.mu_dul_backbone(x) 39 | logvar_dul = self.logvar_dul_backbone(x) 40 | std_dul = (logvar_dul * 0.5).exp() 41 | # std_dul should be restricted between (0, 1) from the original paper definition. However, it doesn't say how to implement. 42 | # You could simply clamp it or use zoom, sigmoid, softplus, etc. 43 | std_dul = torch.clamp(std_dul, min=1e-8, max=1.0) 44 | return mu_dul, std_dul 45 | 46 | 47 | 48 | class Flatten(Module): 49 | def forward(self, input): 50 | return input.view(input.size(0), -1) 51 | 52 | 53 | def l2_norm(input, axis=1): 54 | norm = torch.norm(input, 2, axis, True) 55 | output = torch.div(input, norm) 56 | 57 | return output 58 | 59 | 60 | class SEModule(Module): 61 | def __init__(self, channels, reduction): 62 | super(SEModule, self).__init__() 63 | self.avg_pool = AdaptiveAvgPool2d(1) 64 | self.fc1 = Conv2d( 65 | channels, channels // reduction, kernel_size=1, padding=0, bias=False) 66 | 67 | nn.init.xavier_uniform_(self.fc1.weight.data) 68 | 69 | self.relu = ReLU(inplace=True) 70 | self.fc2 = Conv2d( 71 | channels // reduction, channels, kernel_size=1, padding=0, bias=False) 72 | 73 | self.sigmoid = Sigmoid() 74 | 75 | def forward(self, x): 76 | module_input = x 77 | x = self.avg_pool(x) 78 | x = self.fc1(x) 79 | x = self.relu(x) 80 | x = self.fc2(x) 81 | x = self.sigmoid(x) 82 | 83 | return module_input * x 84 | 85 | 86 | class bottleneck_IR(Module): 87 | def __init__(self, in_channel, depth, stride): 88 | super(bottleneck_IR, self).__init__() 89 | if in_channel == depth: 90 | self.shortcut_layer = MaxPool2d(1, stride) 91 | else: 92 | self.shortcut_layer = Sequential( 93 | Conv2d(in_channel, depth, (1, 1), stride, bias=False), BatchNorm2d(depth)) 94 | self.res_layer = Sequential( 95 | BatchNorm2d(in_channel), 96 | Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth), 97 | Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth)) 98 | 99 | def forward(self, x): 100 | shortcut = self.shortcut_layer(x) 101 | res = self.res_layer(x) 102 | 103 | return res + shortcut 104 | 105 | 106 | class bottleneck_IR_SE(Module): 107 | def __init__(self, in_channel, depth, stride): 108 | super(bottleneck_IR_SE, self).__init__() 109 | if in_channel == depth: 110 | self.shortcut_layer = MaxPool2d(1, stride) 111 | else: 112 | self.shortcut_layer = Sequential( 113 | Conv2d(in_channel, depth, (1, 1), stride, bias=False), 114 | BatchNorm2d(depth)) 115 | self.res_layer = Sequential( 116 | BatchNorm2d(in_channel), 117 | Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), 118 | PReLU(depth), 119 | Conv2d(depth, depth, (3, 3), stride, 1, bias=False), 120 | BatchNorm2d(depth), 121 | SEModule(depth, 16) 122 | ) 123 | 124 | def forward(self, x): 125 | shortcut = self.shortcut_layer(x) 126 | res = self.res_layer(x) 127 | 128 | return res + shortcut 129 | 130 | 131 | class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])): 132 | '''A named tuple describing a ResNet block.''' 133 | 134 | 135 | def get_block(in_channel, depth, num_units, stride=2): 136 | 137 | return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)] 138 | 139 | 140 | def get_blocks(num_layers): 141 | if num_layers == 50: 142 | blocks = [ 143 | get_block(in_channel=64, depth=64, num_units=3), 144 | get_block(in_channel=64, depth=128, num_units=4), 145 | get_block(in_channel=128, depth=256, num_units=14), 146 | get_block(in_channel=256, depth=512, num_units=3) 147 | ] 148 | elif num_layers == 64: 149 | blocks = [ 150 | get_block(in_channel=64, depth=64, num_units=3), 151 | get_block(in_channel=64, depth=128, num_units=8), 152 | get_block(in_channel=128, depth=256, num_units=16), 153 | get_block(in_channel=256, depth=512, num_units=3) 154 | ] 155 | elif num_layers == 100: 156 | blocks = [ 157 | get_block(in_channel=64, depth=64, num_units=3), 158 | get_block(in_channel=64, depth=128, num_units=13), 159 | get_block(in_channel=128, depth=256, num_units=30), 160 | get_block(in_channel=256, depth=512, num_units=3) 161 | ] 162 | elif num_layers == 152: 163 | blocks = [ 164 | get_block(in_channel=64, depth=64, num_units=3), 165 | get_block(in_channel=64, depth=128, num_units=8), 166 | get_block(in_channel=128, depth=256, num_units=36), 167 | get_block(in_channel=256, depth=512, num_units=3) 168 | ] 169 | 170 | return blocks 171 | 172 | 173 | class Backbone(Module): 174 | def __init__(self, input_size, num_layers, mode='ir'): 175 | super(Backbone, self).__init__() 176 | assert input_size[0] in [112, 224], "input_size should be [112, 112] or [224, 224]" 177 | assert num_layers in [50, 64, 100, 152], "num_layers should be 50, 64, 100 or 152" 178 | assert mode in ['ir', 'ir_se'], "mode should be ir or ir_se" 179 | blocks = get_blocks(num_layers) 180 | if mode == 'ir': 181 | unit_module = bottleneck_IR 182 | elif mode == 'ir_se': 183 | unit_module = bottleneck_IR_SE 184 | self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False), 185 | BatchNorm2d(64), 186 | PReLU(64)) 187 | if input_size[0] == 112: 188 | self.output_layer = Sequential(BatchNorm2d(512), 189 | Dropout(), 190 | Flatten(), 191 | Linear(512 * 7 * 7, 512), 192 | BatchNorm1d(512)) 193 | else: 194 | self.output_layer = Sequential(BatchNorm2d(512), 195 | Dropout(), 196 | Flatten(), 197 | Linear(512 * 14 * 14, 512), 198 | BatchNorm1d(512)) 199 | 200 | modules = [] 201 | for block in blocks: 202 | for bottleneck in block: 203 | modules.append( 204 | unit_module(bottleneck.in_channel, 205 | bottleneck.depth, 206 | bottleneck.stride)) 207 | self.body = Sequential(*modules) 208 | 209 | self._initialize_weights() 210 | 211 | def forward(self, x): 212 | x = self.input_layer(x) 213 | x = self.body(x) 214 | x = self.output_layer(x) 215 | 216 | return x 217 | 218 | def _initialize_weights(self): 219 | for m in self.modules(): 220 | if isinstance(m, nn.Conv2d): 221 | nn.init.xavier_uniform_(m.weight.data) 222 | if m.bias is not None: 223 | m.bias.data.zero_() 224 | elif isinstance(m, nn.BatchNorm2d): 225 | m.weight.data.fill_(1) 226 | m.bias.data.zero_() 227 | elif isinstance(m, nn.BatchNorm1d): 228 | m.weight.data.fill_(1) 229 | m.bias.data.zero_() 230 | elif isinstance(m, nn.Linear): 231 | nn.init.xavier_uniform_(m.weight.data) 232 | if m.bias is not None: 233 | m.bias.data.zero_() 234 | 235 | 236 | def IR_50(input_size): 237 | """Constructs a ir-50 model. 238 | """ 239 | model = Backbone(input_size, 50, 'ir') 240 | 241 | return model 242 | 243 | 244 | def IR_101(input_size): 245 | """Constructs a ir-101 model. 246 | """ 247 | model = Backbone(input_size, 100, 'ir') 248 | 249 | return model 250 | 251 | 252 | def IR_152(input_size): 253 | """Constructs a ir-152 model. 254 | """ 255 | model = Backbone(input_size, 152, 'ir') 256 | 257 | return model 258 | 259 | 260 | def IR_SE_50(input_size): 261 | """Constructs a ir_se-50 model. 262 | """ 263 | model = Backbone(input_size, 50, 'ir_se') 264 | 265 | return model 266 | 267 | 268 | def IR_SE_64_DUL(input_size): 269 | """Construct an ir_se_64_dul model for DUL. --> namely, base on resnet_se_64 270 | """ 271 | model = Backbone(input_size, 64, mode='ir_se') 272 | model_dul = DUL_Backbone(model) 273 | 274 | return model_dul 275 | 276 | 277 | def IR_SE_101(input_size): 278 | """Constructs a ir_se-101 model. 279 | """ 280 | model = Backbone(input_size, 100, 'ir_se') 281 | 282 | return model 283 | 284 | 285 | def IR_SE_152(input_size): 286 | """Constructs a ir_se-152 model. 287 | """ 288 | model = Backbone(input_size, 152, 'ir_se') 289 | 290 | return model 291 | -------------------------------------------------------------------------------- /align/align_trans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from matlab_cp2tform import get_similarity_transform_for_cv2 4 | 5 | 6 | # reference facial points, a list of coordinates (x,y) 7 | REFERENCE_FACIAL_POINTS = [ # default reference facial points for crop_size = (112, 112); should adjust REFERENCE_FACIAL_POINTS accordingly for other crop_size 8 | [30.29459953, 51.69630051], 9 | [65.53179932, 51.50139999], 10 | [48.02519989, 71.73660278], 11 | [33.54930115, 92.3655014], 12 | [62.72990036, 92.20410156] 13 | ] 14 | 15 | DEFAULT_CROP_SIZE = (96, 112) 16 | 17 | 18 | class FaceWarpException(Exception): 19 | def __str__(self): 20 | return 'In File {}:{}'.format( 21 | __file__, super.__str__(self)) 22 | 23 | 24 | def get_reference_facial_points(output_size = None, 25 | inner_padding_factor = 0.0, 26 | outer_padding=(0, 0), 27 | default_square = False): 28 | """ 29 | Function: 30 | ---------- 31 | get reference 5 key points according to crop settings: 32 | 0. Set default crop_size: 33 | if default_square: 34 | crop_size = (112, 112) 35 | else: 36 | crop_size = (96, 112) 37 | 1. Pad the crop_size by inner_padding_factor in each side; 38 | 2. Resize crop_size into (output_size - outer_padding*2), 39 | pad into output_size with outer_padding; 40 | 3. Output reference_5point; 41 | Parameters: 42 | ---------- 43 | @output_size: (w, h) or None 44 | size of aligned face image 45 | @inner_padding_factor: (w_factor, h_factor) 46 | padding factor for inner (w, h) 47 | @outer_padding: (w_pad, h_pad) 48 | each row is a pair of coordinates (x, y) 49 | @default_square: True or False 50 | if True: 51 | default crop_size = (112, 112) 52 | else: 53 | default crop_size = (96, 112); 54 | !!! make sure, if output_size is not None: 55 | (output_size - outer_padding) 56 | = some_scale * (default crop_size * (1.0 + inner_padding_factor)) 57 | Returns: 58 | ---------- 59 | @reference_5point: 5x2 np.array 60 | each row is a pair of transformed coordinates (x, y) 61 | """ 62 | #print('\n===> get_reference_facial_points():') 63 | 64 | #print('---> Params:') 65 | #print(' output_size: ', output_size) 66 | #print(' inner_padding_factor: ', inner_padding_factor) 67 | #print(' outer_padding:', outer_padding) 68 | #print(' default_square: ', default_square) 69 | 70 | tmp_5pts = np.array(REFERENCE_FACIAL_POINTS) 71 | tmp_crop_size = np.array(DEFAULT_CROP_SIZE) 72 | 73 | # 0) make the inner region a square 74 | if default_square: 75 | size_diff = max(tmp_crop_size) - tmp_crop_size 76 | tmp_5pts += size_diff / 2 77 | tmp_crop_size += size_diff 78 | 79 | #print('---> default:') 80 | #print(' crop_size = ', tmp_crop_size) 81 | #print(' reference_5pts = ', tmp_5pts) 82 | 83 | if (output_size and 84 | output_size[0] == tmp_crop_size[0] and 85 | output_size[1] == tmp_crop_size[1]): 86 | #print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size)) 87 | return tmp_5pts 88 | 89 | if (inner_padding_factor == 0 and 90 | outer_padding == (0, 0)): 91 | if output_size is None: 92 | #print('No paddings to do: return default reference points') 93 | return tmp_5pts 94 | else: 95 | raise FaceWarpException( 96 | 'No paddings to do, output_size must be None or {}'.format(tmp_crop_size)) 97 | 98 | # check output size 99 | if not (0 <= inner_padding_factor <= 1.0): 100 | raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)') 101 | 102 | if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0) 103 | and output_size is None): 104 | output_size = tmp_crop_size * \ 105 | (1 + inner_padding_factor * 2).astype(np.int32) 106 | output_size += np.array(outer_padding) 107 | #print(' deduced from paddings, output_size = ', output_size) 108 | 109 | if not (outer_padding[0] < output_size[0] 110 | and outer_padding[1] < output_size[1]): 111 | raise FaceWarpException('Not (outer_padding[0] < output_size[0]' 112 | 'and outer_padding[1] < output_size[1])') 113 | 114 | # 1) pad the inner region according inner_padding_factor 115 | #print('---> STEP1: pad the inner region according inner_padding_factor') 116 | if inner_padding_factor > 0: 117 | size_diff = tmp_crop_size * inner_padding_factor * 2 118 | tmp_5pts += size_diff / 2 119 | tmp_crop_size += np.round(size_diff).astype(np.int32) 120 | 121 | #print(' crop_size = ', tmp_crop_size) 122 | #print(' reference_5pts = ', tmp_5pts) 123 | 124 | # 2) resize the padded inner region 125 | #print('---> STEP2: resize the padded inner region') 126 | size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2 127 | #print(' crop_size = ', tmp_crop_size) 128 | #print(' size_bf_outer_pad = ', size_bf_outer_pad) 129 | 130 | if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]: 131 | raise FaceWarpException('Must have (output_size - outer_padding)' 132 | '= some_scale * (crop_size * (1.0 + inner_padding_factor)') 133 | 134 | scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0] 135 | #print(' resize scale_factor = ', scale_factor) 136 | tmp_5pts = tmp_5pts * scale_factor 137 | # size_diff = tmp_crop_size * (scale_factor - min(scale_factor)) 138 | # tmp_5pts = tmp_5pts + size_diff / 2 139 | tmp_crop_size = size_bf_outer_pad 140 | #print(' crop_size = ', tmp_crop_size) 141 | #print(' reference_5pts = ', tmp_5pts) 142 | 143 | # 3) add outer_padding to make output_size 144 | reference_5point = tmp_5pts + np.array(outer_padding) 145 | tmp_crop_size = output_size 146 | #print('---> STEP3: add outer_padding to make output_size') 147 | #print(' crop_size = ', tmp_crop_size) 148 | #print(' reference_5pts = ', tmp_5pts) 149 | 150 | #print('===> end get_reference_facial_points\n') 151 | 152 | return reference_5point 153 | 154 | 155 | def get_affine_transform_matrix(src_pts, dst_pts): 156 | """ 157 | Function: 158 | ---------- 159 | get affine transform matrix 'tfm' from src_pts to dst_pts 160 | Parameters: 161 | ---------- 162 | @src_pts: Kx2 np.array 163 | source points matrix, each row is a pair of coordinates (x, y) 164 | @dst_pts: Kx2 np.array 165 | destination points matrix, each row is a pair of coordinates (x, y) 166 | Returns: 167 | ---------- 168 | @tfm: 2x3 np.array 169 | transform matrix from src_pts to dst_pts 170 | """ 171 | 172 | tfm = np.float32([[1, 0, 0], [0, 1, 0]]) 173 | n_pts = src_pts.shape[0] 174 | ones = np.ones((n_pts, 1), src_pts.dtype) 175 | src_pts_ = np.hstack([src_pts, ones]) 176 | dst_pts_ = np.hstack([dst_pts, ones]) 177 | 178 | # #print(('src_pts_:\n' + str(src_pts_)) 179 | # #print(('dst_pts_:\n' + str(dst_pts_)) 180 | 181 | A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_) 182 | 183 | # #print(('np.linalg.lstsq return A: \n' + str(A)) 184 | # #print(('np.linalg.lstsq return res: \n' + str(res)) 185 | # #print(('np.linalg.lstsq return rank: \n' + str(rank)) 186 | # #print(('np.linalg.lstsq return s: \n' + str(s)) 187 | 188 | if rank == 3: 189 | tfm = np.float32([ 190 | [A[0, 0], A[1, 0], A[2, 0]], 191 | [A[0, 1], A[1, 1], A[2, 1]] 192 | ]) 193 | elif rank == 2: 194 | tfm = np.float32([ 195 | [A[0, 0], A[1, 0], 0], 196 | [A[0, 1], A[1, 1], 0] 197 | ]) 198 | 199 | return tfm 200 | 201 | 202 | def warp_and_crop_face(src_img, 203 | facial_pts, 204 | reference_pts = None, 205 | crop_size=(96, 112), 206 | align_type = 'smilarity'): 207 | """ 208 | Function: 209 | ---------- 210 | apply affine transform 'trans' to uv 211 | Parameters: 212 | ---------- 213 | @src_img: 3x3 np.array 214 | input image 215 | @facial_pts: could be 216 | 1)a list of K coordinates (x,y) 217 | or 218 | 2) Kx2 or 2xK np.array 219 | each row or col is a pair of coordinates (x, y) 220 | @reference_pts: could be 221 | 1) a list of K coordinates (x,y) 222 | or 223 | 2) Kx2 or 2xK np.array 224 | each row or col is a pair of coordinates (x, y) 225 | or 226 | 3) None 227 | if None, use default reference facial points 228 | @crop_size: (w, h) 229 | output face image size 230 | @align_type: transform type, could be one of 231 | 1) 'similarity': use similarity transform 232 | 2) 'cv2_affine': use the first 3 points to do affine transform, 233 | by calling cv2.getAffineTransform() 234 | 3) 'affine': use all points to do affine transform 235 | Returns: 236 | ---------- 237 | @face_img: output face image with size (w, h) = @crop_size 238 | """ 239 | 240 | if reference_pts is None: 241 | if crop_size[0] == 96 and crop_size[1] == 112: 242 | reference_pts = REFERENCE_FACIAL_POINTS 243 | else: 244 | default_square = False 245 | inner_padding_factor = 0 246 | outer_padding = (0, 0) 247 | output_size = crop_size 248 | 249 | reference_pts = get_reference_facial_points(output_size, 250 | inner_padding_factor, 251 | outer_padding, 252 | default_square) 253 | 254 | ref_pts = np.float32(reference_pts) 255 | ref_pts_shp = ref_pts.shape 256 | if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2: 257 | raise FaceWarpException( 258 | 'reference_pts.shape must be (K,2) or (2,K) and K>2') 259 | 260 | if ref_pts_shp[0] == 2: 261 | ref_pts = ref_pts.T 262 | 263 | src_pts = np.float32(facial_pts) 264 | src_pts_shp = src_pts.shape 265 | if max(src_pts_shp) < 3 or min(src_pts_shp) != 2: 266 | raise FaceWarpException( 267 | 'facial_pts.shape must be (K,2) or (2,K) and K>2') 268 | 269 | if src_pts_shp[0] == 2: 270 | src_pts = src_pts.T 271 | 272 | # #print('--->src_pts:\n', src_pts 273 | # #print('--->ref_pts\n', ref_pts 274 | 275 | if src_pts.shape != ref_pts.shape: 276 | raise FaceWarpException( 277 | 'facial_pts and reference_pts must have the same shape') 278 | 279 | if align_type is 'cv2_affine': 280 | tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3]) 281 | # #print(('cv2.getAffineTransform() returns tfm=\n' + str(tfm)) 282 | elif align_type is 'affine': 283 | tfm = get_affine_transform_matrix(src_pts, ref_pts) 284 | # #print(('get_affine_transform_matrix() returns tfm=\n' + str(tfm)) 285 | else: 286 | tfm = get_similarity_transform_for_cv2(src_pts, ref_pts) 287 | # #print(('get_similarity_transform_for_cv2() returns tfm=\n' + str(tfm)) 288 | 289 | # #print('--->Transform matrix: ' 290 | # #print(('type(tfm):' + str(type(tfm))) 291 | # #print(('tfm.dtype:' + str(tfm.dtype)) 292 | # #print( tfm 293 | 294 | face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1])) 295 | 296 | return face_img -------------------------------------------------------------------------------- /util/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch._C import device 3 | import torchvision.transforms as transforms 4 | import torch.nn.functional as F 5 | 6 | from .verification import evaluate 7 | 8 | from datetime import datetime 9 | import matplotlib.pyplot as plt 10 | plt.switch_backend('agg') 11 | import numpy as np 12 | from PIL import Image 13 | import bcolz 14 | import io 15 | import os 16 | import random 17 | import cv2 18 | 19 | 20 | # Support: ['get_time', 'l2_norm', 'make_weights_for_balanced_classes', 'get_val_pair', 'get_val_data', \ 21 | # 'separate_irse_bn_paras', 'separate_resnet_bn_paras', 'warm_up_lr', 'schedule_lr', 'de_preprocess', \ 22 | # 'hflip_batch', 'ccrop_batch', 'gen_plot', 'perform_val', 'buffer_val', 'AverageMeter', 'accuracy', \ 23 | # 'add_gaussian_noise', 'get_data_pair', 'perform_face_recog'] 24 | 25 | 26 | def get_time(): 27 | return (str(datetime.now())[:-10]).replace(' ', '-').replace(':', '-') 28 | 29 | 30 | def l2_norm(input, axis = 1): 31 | norm = torch.norm(input, 2, axis, True) 32 | output = torch.div(input, norm) 33 | 34 | return output 35 | 36 | 37 | def make_weights_for_balanced_classes(images, nclasses): 38 | ''' 39 | Make a vector of weights for each image in the dataset, based 40 | on class frequency. The returned vector of weights can be used 41 | to create a WeightedRandomSampler for a DataLoader to have 42 | class balancing when sampling for a training batch. 43 | images - torchvisionDataset.imgs 44 | nclasses - len(torchvisionDataset.classes) 45 | https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/3 46 | ''' 47 | count = [0] * nclasses 48 | for item in images: 49 | count[item[1]] += 1 # item is (img-data, label-id) 50 | weight_per_class = [0.] * nclasses 51 | N = float(sum(count)) # total number of images 52 | for i in range(nclasses): 53 | weight_per_class[i] = N / float(count[i]) 54 | weight = [0] * len(images) 55 | for idx, val in enumerate(images): 56 | weight[idx] = weight_per_class[val[1]] 57 | 58 | return weight 59 | 60 | 61 | def get_val_pair(path, name): 62 | carray = bcolz.carray(rootdir = os.path.join(path, name), mode = 'r') 63 | issame = np.load('{}/{}_list.npy'.format(path, name)) 64 | 65 | return carray, issame 66 | 67 | 68 | def get_val_data(data_path): 69 | lfw, lfw_issame = get_val_pair(data_path, 'lfw') 70 | cfp_ff, cfp_ff_issame = get_val_pair(data_path, 'cfp_ff') 71 | cfp_fp, cfp_fp_issame = get_val_pair(data_path, 'cfp_fp') 72 | agedb_30, agedb_30_issame = get_val_pair(data_path, 'agedb_30') 73 | calfw, calfw_issame = get_val_pair(data_path, 'calfw') 74 | cplfw, cplfw_issame = get_val_pair(data_path, 'cplfw') 75 | vgg2_fp, vgg2_fp_issame = get_val_pair(data_path, 'vgg2_fp') 76 | 77 | return lfw, cfp_ff, cfp_fp, agedb_30, calfw, cplfw, vgg2_fp, lfw_issame, cfp_ff_issame, cfp_fp_issame, agedb_30_issame, calfw_issame, cplfw_issame, vgg2_fp_issame 78 | 79 | 80 | def separate_irse_bn_paras(modules): 81 | if not isinstance(modules, list): 82 | modules = [*modules.modules()] 83 | paras_only_bn = [] 84 | paras_wo_bn = [] 85 | for layer in modules: 86 | if 'model' in str(layer.__class__): 87 | continue 88 | if 'container' in str(layer.__class__): 89 | continue 90 | else: 91 | if 'batchnorm' in str(layer.__class__): 92 | paras_only_bn.extend([*layer.parameters()]) 93 | else: 94 | paras_wo_bn.extend([*layer.parameters()]) 95 | 96 | return paras_only_bn, paras_wo_bn 97 | 98 | 99 | def separate_resnet_bn_paras(modules): 100 | all_parameters = modules.parameters() 101 | paras_only_bn = [] 102 | 103 | for pname, p in modules.named_parameters(): 104 | if pname.find('bn') >= 0: 105 | paras_only_bn.append(p) 106 | 107 | paras_only_bn_id = list(map(id, paras_only_bn)) 108 | paras_wo_bn = list(filter(lambda p: id(p) not in paras_only_bn_id, all_parameters)) 109 | 110 | return paras_only_bn, paras_wo_bn 111 | 112 | 113 | def warm_up_lr(batch, num_batch_warm_up, init_lr, optimizer): 114 | for params in optimizer.param_groups: 115 | params['lr'] = batch * init_lr / num_batch_warm_up 116 | 117 | # print(optimizer) 118 | 119 | 120 | def schedule_lr(optimizer): 121 | for params in optimizer.param_groups: 122 | params['lr'] /= 10. 123 | 124 | print(optimizer) 125 | 126 | 127 | def de_preprocess(tensor): 128 | 129 | return tensor * 0.5 + 0.5 130 | 131 | 132 | hflip = transforms.Compose([ 133 | de_preprocess, 134 | transforms.ToPILImage(), 135 | transforms.functional.hflip, 136 | transforms.ToTensor(), 137 | transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 138 | ]) 139 | 140 | 141 | def hflip_batch(imgs_tensor): 142 | hfliped_imgs = torch.empty_like(imgs_tensor) 143 | for i, img_ten in enumerate(imgs_tensor): 144 | hfliped_imgs[i] = hflip(img_ten) 145 | 146 | return hfliped_imgs 147 | 148 | 149 | ccrop = transforms.Compose([ 150 | de_preprocess, 151 | transforms.ToPILImage(), 152 | transforms.Resize([128, 128]), # smaller side resized 153 | transforms.CenterCrop([112, 112]), 154 | transforms.ToTensor(), 155 | transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 156 | ]) 157 | 158 | 159 | def ccrop_batch(imgs_tensor): 160 | ccropped_imgs = torch.empty_like(imgs_tensor) 161 | for i, img_ten in enumerate(imgs_tensor): 162 | ccropped_imgs[i] = ccrop(img_ten) 163 | 164 | return ccropped_imgs 165 | 166 | 167 | def gen_plot(fpr, tpr): 168 | """Create a pyplot plot and save to buffer.""" 169 | plt.figure() 170 | plt.xlabel("FPR", fontsize = 14) 171 | plt.ylabel("TPR", fontsize = 14) 172 | plt.title("ROC Curve", fontsize = 14) 173 | plot = plt.plot(fpr, tpr, linewidth = 2) 174 | buf = io.BytesIO() 175 | plt.savefig(buf, format = 'jpeg') 176 | buf.seek(0) 177 | plt.close() 178 | 179 | return buf 180 | 181 | 182 | def perform_val(multi_gpu, device, embedding_size, batch_size, backbone, carray, issame, nrof_folds = 10, tta = True): 183 | if multi_gpu: 184 | backbone = backbone.module # unpackage model from DataParallel 185 | backbone = backbone.to(device) 186 | else: 187 | backbone = backbone.to(device) 188 | backbone.eval() # switch to evaluation mode 189 | 190 | idx = 0 191 | embeddings = np.zeros([len(carray), embedding_size]) 192 | with torch.no_grad(): 193 | while idx + batch_size <= len(carray): 194 | batch = torch.tensor(carray[idx:idx + batch_size][:, [2, 1, 0], :, :]) 195 | if tta: 196 | ccropped = ccrop_batch(batch) 197 | fliped = hflip_batch(ccropped) 198 | emb_batch = backbone(ccropped.to(device)).cpu() + backbone(fliped.to(device)).cpu() 199 | embeddings[idx:idx + batch_size] = l2_norm(emb_batch) 200 | else: 201 | ccropped = ccrop_batch(batch) 202 | embeddings[idx:idx + batch_size] = l2_norm(backbone(ccropped.to(device))).cpu() 203 | idx += batch_size 204 | if idx < len(carray): 205 | batch = torch.tensor(carray[idx:]) 206 | if tta: 207 | ccropped = ccrop_batch(batch) 208 | fliped = hflip_batch(ccropped) 209 | emb_batch = backbone(ccropped.to(device)).cpu() + backbone(fliped.to(device)).cpu() 210 | embeddings[idx:] = l2_norm(emb_batch) 211 | else: 212 | ccropped = ccrop_batch(batch) 213 | embeddings[idx:] = l2_norm(backbone(ccropped.to(device))).cpu() 214 | 215 | tpr, fpr, accuracy, best_thresholds = evaluate(embeddings, issame, nrof_folds) 216 | buf = gen_plot(fpr, tpr) 217 | roc_curve = Image.open(buf) 218 | roc_curve_tensor = transforms.ToTensor()(roc_curve) 219 | 220 | return accuracy.mean(), best_thresholds.mean(), roc_curve_tensor 221 | 222 | 223 | def buffer_val(writer, db_name, acc, best_threshold, roc_curve_tensor, epoch): 224 | writer.add_scalar('{}_Accuracy'.format(db_name), acc, epoch) 225 | writer.add_scalar('{}_Best_Threshold'.format(db_name), best_threshold, epoch) 226 | writer.add_image('{}_ROC_Curve'.format(db_name), roc_curve_tensor, epoch) 227 | 228 | 229 | class AverageMeter(object): 230 | """Computes and stores the average and current value""" 231 | def __init__(self): 232 | self.reset() 233 | 234 | def reset(self): 235 | self.val = 0 236 | self.avg = 0 237 | self.sum = 0 238 | self.count = 0 239 | 240 | def update(self, val, n = 1): 241 | self.val = val 242 | self.sum += val * n 243 | self.count += n 244 | self.avg = self.sum / self.count 245 | 246 | 247 | def accuracy(output, target, topk=(1,)): 248 | """Computes the precision@k for the specified values of k""" 249 | maxk = max(topk) 250 | batch_size = target.size(0) 251 | 252 | _, pred = output.topk(maxk, 1, True, True) 253 | pred = pred.t() 254 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 255 | 256 | res = [] 257 | for k in topk: 258 | correct_k = correct[:k].reshape(-1).float().sum(0) 259 | res.append(correct_k.mul_(100.0 / batch_size)) 260 | 261 | return res 262 | 263 | # ----- self definition 264 | 265 | class add_gaussian_noise(object): 266 | def __init__(self, mean=0.0, var=30, p=0.0): 267 | self.mean = mean 268 | self.var = var 269 | self.p = p 270 | def __call__(self, img): 271 | if random.uniform(0, 1) < self.p: 272 | std = self.var**0.5 273 | image_array = np.array(img) 274 | noisy_img = image_array + np.random.normal(self.mean, std, image_array.shape) 275 | noisy_img_clipped = np.clip(noisy_img, 0, 255).astype(np.uint8) 276 | return Image.fromarray(noisy_img_clipped) 277 | else: 278 | return img 279 | 280 | 281 | def get_data_pair(path, name): 282 | carray = bcolz.carray(rootdir = os.path.join(path, name), mode = 'r') 283 | issame = np.load('{}/{}_list.npy'.format(path, name)) 284 | 285 | return carray, issame 286 | 287 | 288 | def perform_face_recog(multi_gpu, embedding_size, batch_size, backbone, carray, issame, nrof_folds = 10, tta = True, outfolder=''): 289 | if outfolder: 290 | os.makedirs(outfolder, exist_ok=True) 291 | if multi_gpu: 292 | backbone = backbone.module # unpackage model from DataParallel 293 | backbone = backbone.cuda() 294 | else: 295 | backbone = backbone.cuda() 296 | backbone.eval() # switch to evaluation mode 297 | idx = 0 298 | embeddings = np.zeros([len(carray), embedding_size]) 299 | with torch.no_grad(): 300 | while idx + batch_size <= len(carray): 301 | batch = torch.tensor(carray[idx:idx + batch_size][:, [2, 1, 0], :, :]) 302 | if tta: 303 | ccropped = ccrop_batch(batch) 304 | fliped = hflip_batch(ccropped) 305 | # emb_batch = backbone(ccropped.cuda()).cpu() + backbone(fliped.cuda()).cpu() 306 | # backbone return image_features and logvar, use image_features only here; besides, convert to cpu-tensor, then to numpy 307 | emb_batch = backbone(ccropped.cuda())[0].cpu() + backbone(fliped.cuda())[0].cpu() 308 | embeddings[idx:idx + batch_size] = l2_norm(emb_batch) 309 | else: 310 | ccropped = ccrop_batch(batch) 311 | # embeddings[idx:idx + batch_size] = l2_norm(backbone(ccropped.cuda())).cpu() 312 | embeddings[idx:idx + batch_size] = l2_norm(backbone(ccropped.cuda())[0].cpu()) 313 | idx += batch_size 314 | # print('idx: %d'%idx) 315 | if idx < len(carray): 316 | batch = torch.tensor(carray[idx:]) 317 | if tta: 318 | ccropped = ccrop_batch(batch) 319 | fliped = hflip_batch(ccropped) 320 | # emb_batch = backbone(ccropped.cuda()).cpu() + backbone(fliped.cuda()).cpu() 321 | emb_batch = backbone(ccropped.cuda())[0].cpu() + backbone(fliped.cuda())[0].cpu() 322 | embeddings[idx:] = l2_norm(emb_batch) 323 | else: 324 | ccropped = ccrop_batch(batch) 325 | # embeddings[idx:] = l2_norm(backbone(ccropped.cuda())).cpu() 326 | embeddings[idx:] = l2_norm(backbone(ccropped.cuda())[0].cpu()) 327 | 328 | tpr, fpr, accuracy, best_thresholds = evaluate(embeddings, issame, nrof_folds) 329 | buf = gen_plot(fpr, tpr) 330 | roc_curve = Image.open(buf) 331 | roc_curve_tensor = transforms.ToTensor()(roc_curve) 332 | 333 | return accuracy.mean(), best_thresholds.mean(), roc_curve_tensor 334 | -------------------------------------------------------------------------------- /train_dul.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | from torch.optim.optimizer import Optimizer 6 | import torchvision.transforms as transforms 7 | import torchvision.datasets as datasets 8 | 9 | from config import Backbone_Dict, dul_args_func 10 | from head.metrics import ArcFace, CosFace, SphereFace, Am_softmax, Softmax 11 | from loss.focal import FocalLoss 12 | from util.utils import make_weights_for_balanced_classes, separate_irse_bn_paras, \ 13 | warm_up_lr, schedule_lr, get_time, AverageMeter, accuracy, add_gaussian_noise 14 | 15 | from tensorboardX import SummaryWriter, writer 16 | import os 17 | import time 18 | import numpy as np 19 | from PIL import Image 20 | import random 21 | 22 | 23 | class DUL_Trainer(): 24 | def __init__(self, dul_args): 25 | self.dul_args = dul_args 26 | self.dul_args.gpu_id = [int(item) for item in self.dul_args.gpu_id] 27 | self.dul_args.stages = [int(item) for item in self.dul_args.stages] 28 | 29 | def _report_configurations(self): 30 | print('=' * 60) 31 | print('Experiment time: ', get_time()) 32 | print('=' * 60) 33 | print('Overall Configurations:') 34 | print('=' * 60) 35 | for k in self.dul_args.__dict__: 36 | print(" '{}' : '{}' ".format(k, str(self.dul_args.__dict__[k]))) 37 | os.makedirs(self.dul_args.model_save_folder, exist_ok=True) 38 | os.makedirs(self.dul_args.log_tensorboard, exist_ok=True) 39 | writer = SummaryWriter(self.dul_args.log_tensorboard) 40 | return writer 41 | 42 | 43 | def _data_loader(self): 44 | if self.dul_args.center_crop: 45 | train_transform = transforms.Compose([ 46 | transforms.Resize([int(128 * self.dul_args.input_size[0] / 112), int(128 * self.dul_args.input_size[0] / 112)]), 47 | transforms.RandomCrop([self.dul_args.input_size[0], self.dul_args.input_size[1]]), 48 | transforms.RandomHorizontalFlip(), 49 | add_gaussian_noise(p=self.dul_args.image_noise), 50 | transforms.ToTensor(), 51 | transforms.Normalize(mean = self.dul_args.rgb_mean, 52 | std = self.dul_args.rgb_std), 53 | #transforms.RandomErasing(scale=(0.02,0.25)) 54 | ]) 55 | else: 56 | train_transform = transforms.Compose([ # refer to https://pytorch.org/docs/stable/torchvision/transforms.html for more build-in online data augmentation 57 | transforms.Resize([112, 112]), # smaller side resized 58 | transforms.RandomHorizontalFlip(), 59 | transforms.ToTensor(), 60 | transforms.Normalize(mean = self.dul_args.rgb_mean, 61 | std = self.dul_args.rgb_std), 62 | transforms.RandomErasing(scale=(0.02,0.25)) 63 | ]) 64 | 65 | dataset_train = datasets.ImageFolder(self.dul_args.trainset_folder, train_transform) 66 | 67 | # ----- create a weighted random sampler to process imbalanced data 68 | weights = make_weights_for_balanced_classes(dataset_train.imgs, len(dataset_train.classes)) 69 | weights = torch.DoubleTensor(weights) 70 | sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights)) 71 | 72 | train_loader = torch.utils.data.DataLoader( 73 | dataset_train, sampler=sampler, batch_size=self.dul_args.batch_size, 74 | pin_memory=self.dul_args.pin_memory, num_workers=self.dul_args.num_workers, 75 | drop_last=self.dul_args.drop_last, 76 | ) 77 | 78 | num_class = len(train_loader.dataset.classes) 79 | print('=' * 60) 80 | print("Number of Training Classes: '{}' ".format(num_class)) 81 | 82 | return train_loader, num_class 83 | 84 | 85 | def _model_loader(self, num_class): 86 | # ----- backbone generate 87 | BACKBONE = Backbone_Dict[self.dul_args.backbone_name] 88 | print("=" * 60) 89 | print("Backbone Generated: '{}' ".format(self.dul_args.backbone_name)) 90 | 91 | # ----- head generate 92 | Head_Dict = { 93 | 'ArcFace': ArcFace(in_features = self.dul_args.embedding_size, out_features = num_class, device_id = self.dul_args.gpu_id, s=self.dul_args.arcface_scale), 94 | 'CosFace': CosFace(in_features = self.dul_args.embedding_size, out_features = num_class, device_id = self.dul_args.gpu_id), 95 | 'SphereFace': SphereFace(in_features = self.dul_args.embedding_size, out_features = num_class, device_id = self.dul_args.gpu_id), 96 | 'Am_softmax': Am_softmax(in_features = self.dul_args.embedding_size, out_features = num_class, device_id = self.dul_args.gpu_id), 97 | 'Softmax': Softmax(in_features = self.dul_args.embedding_size, out_features = num_class, device_id = self.dul_args.gpu_id) 98 | } 99 | HEAD = Head_Dict[self.dul_args.head_name] 100 | print("=" * 60) 101 | print("Head Generated: '{}' ".format(self.dul_args.head_name)) 102 | 103 | # ----- loss generate 104 | Loss_Dict = { 105 | 'Focal': FocalLoss(), 106 | 'Softmax': nn.CrossEntropyLoss() 107 | } 108 | LOSS = Loss_Dict[self.dul_args.loss_name] 109 | print("=" * 60) 110 | print("Loss Generated: '{}' ".format(self.dul_args.loss_name)) 111 | # ----- separate batch_norm parameters from others; do not do weight decay for batch_norm parameters to improve the generalizability 112 | backbone_paras_only_bn, backbone_paras_wo_bn = separate_irse_bn_paras(BACKBONE) 113 | _, head_paras_wo_bn = separate_irse_bn_paras(HEAD) 114 | 115 | # ----- optimizer generate 116 | Optimizer_Dict = { 117 | 'SGD': optim.SGD([{'params': backbone_paras_wo_bn + head_paras_wo_bn, 'weight_decay': self.dul_args.weight_decay}, 118 | {'params': backbone_paras_only_bn}], lr=self.dul_args.lr, momentum=self.dul_args.momentum), 119 | 'Adam': optim.Adam([{'params': backbone_paras_wo_bn + head_paras_wo_bn, 'weight_decay': self.dul_args.weight_decay}, 120 | {'params': backbone_paras_only_bn}], lr=self.dul_args.lr, betas=(0.9, 0.999), eps=1e-8, weight_decay=0) 121 | } 122 | OPTIMIZER = Optimizer_Dict[self.dul_args.optimizer] 123 | print("=" * 60) 124 | print("Optimizer Generated: '{}' ".format(self.dul_args.optimizer)) 125 | print(OPTIMIZER) 126 | 127 | # ----- optional resume 128 | if self.dul_args.resume_backbone or self.dul_args.resume_head: 129 | print("=" * 60) 130 | if os.path.isfile(self.dul_args.resume_backbone): 131 | print("Loading Backbone Checkpoint '{}'".format(self.dul_args.resume_backbone)) 132 | BACKBONE.load_state_dict(torch.load(self.dul_args.resume_backbone)) 133 | if os.path.isfile(self.dul_args.resume_head): 134 | print("Loading Head Checkpoint '{}'".format(self.dul_args.resume_head)) 135 | try: 136 | HEAD.load_state_dict(torch.load(self.dul_args.resume_head)) 137 | except Exception as e: 138 | print(e) 139 | else: 140 | print("No Checkpoint Found at '{}' and '{}'. Please Have a Check or Continue to Train from Scratch".\ 141 | format(self.dul_args.resume_backbone, self.dul_args.resume_head)) 142 | 143 | # ----- multi-gpu or single-gpu 144 | if self.dul_args.multi_gpu: 145 | BACKBONE = nn.DataParallel(BACKBONE, device_ids=self.dul_args.gpu_id).cuda() 146 | HEAD = HEAD.cuda() 147 | LOSS = LOSS.cuda() 148 | else: 149 | BACKBONE = BACKBONE.cuda() 150 | HEAD = HEAD.cuda() 151 | LOSS = LOSS.cuda() 152 | 153 | return BACKBONE, HEAD, LOSS, OPTIMIZER 154 | 155 | 156 | 157 | def _dul_runner(self): 158 | writer = self._report_configurations() 159 | 160 | train_loader, num_class = self._data_loader() 161 | 162 | BACKBONE, HEAD, LOSS, OPTIMIZER = self._model_loader(num_class=num_class) 163 | 164 | DISP_FREQ = len(train_loader) // 100 # frequency to display training loss & acc 165 | 166 | NUM_EPOCH_WARM_UP = self.dul_args.warm_up_epoch 167 | NUM_BATCH_WARM_UP = int(len(train_loader) * NUM_EPOCH_WARM_UP) 168 | batch = 0 # batch index 169 | 170 | print('=' * 60) 171 | print("Display Freqency: '{}' ".format(DISP_FREQ)) 172 | print("Number of Epoch for Warm Up: '{}' ".format(NUM_EPOCH_WARM_UP)) 173 | print("Number of Batch for Warm Up: '{}' ".format(NUM_BATCH_WARM_UP)) 174 | print('Start Training: ') 175 | 176 | for epoch in range(self.dul_args.num_epoch): 177 | if epoch == self.dul_args.stages[0]: 178 | schedule_lr(OPTIMIZER) 179 | elif epoch == self.dul_args.stages[1]: 180 | schedule_lr(OPTIMIZER) 181 | if epoch < self.dul_args.resume_epoch: 182 | continue 183 | 184 | BACKBONE.train() # set to training mode 185 | HEAD.train() 186 | BACKBONE.training = True 187 | 188 | losses = AverageMeter() 189 | top1 = AverageMeter() 190 | top5 = AverageMeter() 191 | losses_KL = AverageMeter() 192 | 193 | for inputs, labels in train_loader: 194 | if (epoch + 1 <= NUM_EPOCH_WARM_UP) and (batch + 1 <= NUM_BATCH_WARM_UP): # adjust LR for each training batch during warm up 195 | warm_up_lr(batch + 1, NUM_BATCH_WARM_UP, self.dul_args.lr, OPTIMIZER) 196 | 197 | inputs = inputs.cuda() 198 | labels = labels.cuda().long() 199 | loss = 0 200 | 201 | mu_dul, std_dul = BACKBONE(inputs) # namely, mean and std 202 | 203 | epsilon = torch.randn_like(std_dul) 204 | features = mu_dul + epsilon * std_dul 205 | variance_dul = std_dul**2 206 | 207 | # Not sure which one shoul be used, see this issue: https://github.com/MouxiaoHuang/DUL/issues/5 208 | # loss_kl = ((variance_dul + mu_dul**2 - torch.log(variance_dul) - 1) * 0.5).sum(dim=-1).mean() 209 | loss_kl = ((variance_dul + mu_dul ** 2 - torch.log(variance_dul + 1e-8) - 1) * 0.5).mean() 210 | losses_KL.update(loss_kl.item(), inputs.size(0)) 211 | loss += self.dul_args.kl_scale * loss_kl 212 | 213 | outputs = HEAD(features, labels) 214 | 215 | loss_head = LOSS(outputs, labels) 216 | 217 | loss += loss_head 218 | 219 | # measure accuracy and record loss 220 | prec1, prec5 = accuracy(outputs.data, labels, topk = (1, 5)) 221 | losses.update(loss_head.data.item(), inputs.size(0)) 222 | top1.update(prec1.data.item(), inputs.size(0)) 223 | top5.update(prec5.data.item(), inputs.size(0)) 224 | 225 | # compute gradient and do SGD step 226 | OPTIMIZER.zero_grad() 227 | loss.backward() 228 | OPTIMIZER.step() 229 | 230 | # dispaly training loss & acc every DISP_FREQ 231 | if ((batch + 1) % DISP_FREQ == 0) and batch != 0: 232 | print("=" * 60, flush=True) 233 | print('Epoch {}/{} Batch {}/{}\t' 234 | 'Time {}\t' 235 | 'Training Loss {loss.val:.4f} ({loss.avg:.4f})\t' 236 | 'Training Loss_KL {loss_KL.val:.4f} ({loss_KL.avg:.4f})\t' 237 | 'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 238 | 'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( 239 | epoch + 1, self.dul_args.num_epoch, batch + 1, len(train_loader) * self.dul_args.num_epoch, time.asctime(time.localtime(time.time())), loss = losses, loss_KL=losses_KL, top1 = top1, top5 = top5), flush=True) 240 | 241 | batch += 1 # batch index 242 | # training statistics per epoch (buffer for visualization) 243 | epoch_loss = losses.avg 244 | epoch_acc = top1.avg 245 | writer.add_scalar("Training_Loss", epoch_loss, epoch + 1) 246 | writer.add_scalar("Training_Accuracy", epoch_acc, epoch + 1) 247 | print("=" * 60, flush=True) 248 | print('Epoch: {}/{}\t' 249 | 'Training Loss {loss.val:.4f} ({loss.avg:.4f})\t' 250 | 'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 251 | 'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( 252 | epoch + 1, self.dul_args.num_epoch, loss = losses, top1 = top1, top5 = top5), flush=True) 253 | 254 | # ----- save model 255 | if epoch==4 or epoch==7 or epoch==12 or epoch>17: 256 | print("=" * 60, flush=True) 257 | print('Saving NO.EPOCH {} trained model'.format(epoch+1), flush=True) 258 | if self.dul_args.multi_gpu: 259 | torch.save(BACKBONE.module.state_dict(), os.path.join(self.dul_args.model_save_folder, "Backbone_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(self.dul_args.backbone_name, epoch + 1, batch, get_time()))) 260 | torch.save(HEAD.state_dict(), os.path.join(self.dul_args.model_save_folder, "Head_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(self.dul_args.head_name, epoch + 1, batch, get_time()))) 261 | else: 262 | torch.save(BACKBONE.state_dict(), os.path.join(self.dul_args.model_save_folder, "Backbone_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(self.dul_args.backbone_name, epoch + 1, batch, get_time()))) 263 | torch.save(HEAD.state_dict(), os.path.join(self.dul_args.model_save_folder, "Head_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(self.dul_args.head_name, epoch + 1, batch, get_time()))) 264 | print('=' * 60, flush=True) 265 | print('Training process finished!', flush=True) 266 | print('=' * 60, flush=True) 267 | 268 | 269 | if __name__ == '__main__': 270 | dul_train = DUL_Trainer(dul_args_func()) 271 | dul_train._dul_runner() 272 | -------------------------------------------------------------------------------- /head/metrics.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.nn import Parameter 7 | import math 8 | 9 | 10 | # Support: ['Softmax', 'ArcFace', 'CosFace', 'SphereFace', 'Am_softmax'] 11 | 12 | class CircleLoss(nn.Module): 13 | def __init__(self, in_features, out_features, device_id, s = 256.0, m = 0.35): 14 | super(CircleLoss, self).__init__() 15 | self.in_features = in_features 16 | self.out_features = out_features 17 | self.device_id = device_id 18 | 19 | self.s = s 20 | self.O_p = 1 + m 21 | self.O_n = - m 22 | self.delta_p = 1 - m 23 | self.delta_n = m 24 | 25 | self.weight = Parameter(torch.FloatTensor(out_features, in_features)) 26 | nn.init.xavier_uniform_(self.weight) 27 | 28 | def forward(self, input, label): 29 | # --------------------------- cos(theta) & phi(theta) --------------------------- 30 | if self.device_id == None: 31 | cosine = F.linear(F.normalize(input), F.normalize(self.weight)) 32 | else: 33 | x = input 34 | sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0) 35 | temp_x = x.cuda(self.device_id[0]) 36 | weight = sub_weights[0].cuda(self.device_id[0]) 37 | cosine = F.linear(F.normalize(temp_x), F.normalize(weight)) 38 | for i in range(1, len(self.device_id)): 39 | temp_x = x.cuda(self.device_id[i]) 40 | weight = sub_weights[i].cuda(self.device_id[i]) 41 | cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1) 42 | 43 | scores = cosine 44 | # --------------------------- convert label to one-hot --------------------------- 45 | one_hot = torch.zeros(cosine.size()) 46 | 47 | alpha_p = (self.O_p - scores.detach()).clamp(min=0.) 48 | alpha_n = (scores.detach() - self.O_n).clamp(min=0.) 49 | 50 | one_hot = torch.zeros(scores.size()) 51 | if self.device_id != None: 52 | one_hot = one_hot.cuda(self.device_id[0]) 53 | one_hot.scatter_(1, label.view(-1, 1).long(), 1) 54 | 55 | output = (one_hot * (alpha_p * (scores - self.delta_p)) + (1.0 - one_hot) * (alpha_n * (scores - self.delta_n))) 56 | output *= self.s 57 | return output 58 | 59 | 60 | class Softmax(nn.Module): 61 | r"""Implement of Softmax (normal classification head): 62 | Args: 63 | in_features: size of each input sample 64 | out_features: size of each output sample 65 | device_id: the ID of GPU where the model will be trained by model parallel. 66 | if device_id=None, it will be trained on CPU without model parallel. 67 | """ 68 | def __init__(self, in_features, out_features, device_id): 69 | super(Softmax, self).__init__() 70 | self.in_features = in_features 71 | self.out_features = out_features 72 | self.device_id = device_id 73 | 74 | self.weight = Parameter(torch.FloatTensor(out_features, in_features)) 75 | self.bias = Parameter(torch.FloatTensor(out_features)) 76 | nn.init.xavier_uniform_(self.weight) 77 | nn.init.zeros_(self.bias) 78 | 79 | def forward(self, x, label): 80 | if self.device_id == None: 81 | out = F.linear(x, self.weight, self.bias) 82 | else: 83 | sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0) 84 | sub_biases = torch.chunk(self.bias, len(self.device_id), dim=0) 85 | temp_x = x.cuda(self.device_id[0]) 86 | weight = sub_weights[0].cuda(self.device_id[0]) 87 | bias = sub_biases[0].cuda(self.device_id[0]) 88 | out = F.linear(temp_x, weight, bias) 89 | for i in range(1, len(self.device_id)): 90 | temp_x = x.cuda(self.device_id[i]) 91 | weight = sub_weights[i].cuda(self.device_id[i]) 92 | bias = sub_biases[i].cuda(self.device_id[i]) 93 | out = torch.cat((out, F.linear(temp_x, weight, bias).cuda(self.device_id[0])), dim=1) 94 | return out 95 | 96 | def _initialize_weights(self): 97 | for m in self.modules(): 98 | if isinstance(m, nn.Conv2d): 99 | nn.init.xavier_uniform_(m.weight.data) 100 | if m.bias is not None: 101 | m.bias.data.zeros_() 102 | elif isinstance(m, nn.BatchNorm2d): 103 | m.weight.data.fill_(1) 104 | m.bias.data.zeros_() 105 | elif isinstance(m, nn.BatchNorm1d): 106 | m.weight.data.fill_(1) 107 | m.bias.data.zeros_() 108 | elif isinstance(m, nn.Linear): 109 | nn.init.xavier_uniform_(m.weight.data) 110 | if m.bias is not None: 111 | m.bias.data.zeros_() 112 | 113 | 114 | class ArcFace(nn.Module): 115 | r"""Implement of ArcFace (https://arxiv.org/pdf/1801.07698v1.pdf): 116 | Args: 117 | in_features: size of each input sample 118 | out_features: size of each output sample 119 | device_id: the ID of GPU where the model will be trained by model parallel. 120 | if device_id=None, it will be trained on CPU without model parallel. 121 | s: norm of input feature 122 | m: margin 123 | cos(theta+m) 124 | """ 125 | def __init__(self, in_features, out_features, device_id, s = 64.0, m = 0.50, easy_margin = False): 126 | super(ArcFace, self).__init__() 127 | self.in_features = in_features 128 | self.out_features = out_features 129 | self.device_id = device_id 130 | 131 | self.s = s 132 | self.m = m 133 | 134 | self.weight = Parameter(torch.FloatTensor(out_features, in_features)) 135 | nn.init.xavier_uniform_(self.weight) 136 | 137 | self.easy_margin = easy_margin 138 | self.cos_m = math.cos(m) 139 | self.sin_m = math.sin(m) 140 | self.th = math.cos(math.pi - m) 141 | self.mm = math.sin(math.pi - m) * m 142 | 143 | def forward(self, input, label): 144 | # --------------------------- cos(theta) & phi(theta) --------------------------- 145 | if self.device_id == None: 146 | cosine = F.linear(F.normalize(input), F.normalize(self.weight)) 147 | else: 148 | x = input 149 | sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0) 150 | temp_x = x.cuda(self.device_id[0]) 151 | weight = sub_weights[0].cuda(self.device_id[0]) 152 | cosine = F.linear(F.normalize(temp_x), F.normalize(weight)) 153 | for i in range(1, len(self.device_id)): 154 | temp_x = x.cuda(self.device_id[i]) 155 | weight = sub_weights[i].cuda(self.device_id[i]) 156 | cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1) 157 | sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) 158 | phi = cosine * self.cos_m - sine * self.sin_m 159 | if self.easy_margin: 160 | phi = torch.where(cosine > 0, phi, cosine) 161 | else: 162 | phi = torch.where(cosine > self.th, phi, cosine - self.mm) 163 | # --------------------------- convert label to one-hot --------------------------- 164 | one_hot = torch.zeros(cosine.size()) 165 | if self.device_id != None: 166 | one_hot = one_hot.cuda(self.device_id[0]) 167 | one_hot.scatter_(1, label.view(-1, 1).long(), 1) 168 | # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------- 169 | output = (one_hot * phi) + ((1.0 - one_hot) * cosine) # you can use torch.where if your torch.__version__ is 0.4 170 | output *= self.s 171 | 172 | return output 173 | 174 | 175 | class CosFace(nn.Module): 176 | r"""Implement of CosFace (https://arxiv.org/pdf/1801.09414.pdf): 177 | Args: 178 | in_features: size of each input sample 179 | out_features: size of each output sample 180 | device_id: the ID of GPU where the model will be trained by model parallel. 181 | if device_id=None, it will be trained on CPU without model parallel. 182 | s: norm of input feature 183 | m: margin 184 | cos(theta)-m 185 | """ 186 | def __init__(self, in_features, out_features, device_id, s = 64.0, m = 0.35): 187 | super(CosFace, self).__init__() 188 | self.in_features = in_features 189 | self.out_features = out_features 190 | self.device_id = device_id 191 | self.s = s 192 | self.m = m 193 | 194 | self.weight = Parameter(torch.FloatTensor(out_features, in_features)) 195 | nn.init.xavier_uniform_(self.weight) 196 | 197 | def forward(self, input, label): 198 | # --------------------------- cos(theta) & phi(theta) --------------------------- 199 | if self.device_id == None: 200 | cosine = F.linear(F.normalize(input), F.normalize(self.weight)) 201 | else: 202 | x = input 203 | sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0) 204 | temp_x = x.cuda(self.device_id[0]) 205 | weight = sub_weights[0].cuda(self.device_id[0]) 206 | cosine = F.linear(F.normalize(temp_x), F.normalize(weight)) 207 | for i in range(1, len(self.device_id)): 208 | temp_x = x.cuda(self.device_id[i]) 209 | weight = sub_weights[i].cuda(self.device_id[i]) 210 | cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1) 211 | phi = cosine - self.m 212 | # --------------------------- convert label to one-hot --------------------------- 213 | one_hot = torch.zeros(cosine.size()) 214 | if self.device_id != None: 215 | one_hot = one_hot.cuda(self.device_id[0]) 216 | # one_hot = one_hot.cuda() if cosine.is_cuda else one_hot 217 | one_hot.scatter_(1, label.view(-1, 1).long(), 1) 218 | # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------- 219 | output = (one_hot * phi) + ((1.0 - one_hot) * cosine) # you can use torch.where if your torch.__version__ is 0.4 220 | output *= self.s 221 | 222 | return output 223 | 224 | def __repr__(self): 225 | return self.__class__.__name__ + '(' \ 226 | + 'in_features = ' + str(self.in_features) \ 227 | + ', out_features = ' + str(self.out_features) \ 228 | + ', s = ' + str(self.s) \ 229 | + ', m = ' + str(self.m) + ')' 230 | 231 | class SphereFace(nn.Module): 232 | r"""Implement of SphereFace (https://arxiv.org/pdf/1704.08063.pdf): 233 | Args: 234 | in_features: size of each input sample 235 | out_features: size of each output sample 236 | device_id: the ID of GPU where the model will be trained by model parallel. 237 | if device_id=None, it will be trained on CPU without model parallel. 238 | m: margin 239 | cos(m*theta) 240 | """ 241 | def __init__(self, in_features, out_features, device_id, m = 4): 242 | super(SphereFace, self).__init__() 243 | self.in_features = in_features 244 | self.out_features = out_features 245 | self.m = m 246 | self.base = 1000.0 247 | self.gamma = 0.12 248 | self.power = 1 249 | self.LambdaMin = 5.0 250 | self.iter = 0 251 | self.device_id = device_id 252 | 253 | self.weight = Parameter(torch.FloatTensor(out_features, in_features)) 254 | nn.init.xavier_uniform_(self.weight) 255 | 256 | # duplication formula 257 | self.mlambda = [ 258 | lambda x: x ** 0, 259 | lambda x: x ** 1, 260 | lambda x: 2 * x ** 2 - 1, 261 | lambda x: 4 * x ** 3 - 3 * x, 262 | lambda x: 8 * x ** 4 - 8 * x ** 2 + 1, 263 | lambda x: 16 * x ** 5 - 20 * x ** 3 + 5 * x 264 | ] 265 | 266 | def forward(self, input, label): 267 | # lambda = max(lambda_min,base*(1+gamma*iteration)^(-power)) 268 | self.iter += 1 269 | self.lamb = max(self.LambdaMin, self.base * (1 + self.gamma * self.iter) ** (-1 * self.power)) 270 | 271 | # --------------------------- cos(theta) & phi(theta) --------------------------- 272 | if self.device_id == None: 273 | cos_theta = F.linear(F.normalize(input), F.normalize(self.weight)) 274 | else: 275 | x = input 276 | sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0) 277 | temp_x = x.cuda(self.device_id[0]) 278 | weight = sub_weights[0].cuda(self.device_id[0]) 279 | cos_theta = F.linear(F.normalize(temp_x), F.normalize(weight)) 280 | for i in range(1, len(self.device_id)): 281 | temp_x = x.cuda(self.device_id[i]) 282 | weight = sub_weights[i].cuda(self.device_id[i]) 283 | cos_theta = torch.cat((cos_theta, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1) 284 | 285 | cos_theta = cos_theta.clamp(-1, 1) 286 | cos_m_theta = self.mlambda[self.m](cos_theta) 287 | theta = cos_theta.data.acos() 288 | k = (self.m * theta / 3.14159265).floor() 289 | phi_theta = ((-1.0) ** k) * cos_m_theta - 2 * k 290 | NormOfFeature = torch.norm(input, 2, 1) 291 | 292 | # --------------------------- convert label to one-hot --------------------------- 293 | one_hot = torch.zeros(cos_theta.size()) 294 | if self.device_id != None: 295 | one_hot = one_hot.cuda(self.device_id[0]) 296 | one_hot.scatter_(1, label.view(-1, 1), 1) 297 | 298 | # --------------------------- Calculate output --------------------------- 299 | output = (one_hot * (phi_theta - cos_theta) / (1 + self.lamb)) + cos_theta 300 | output *= NormOfFeature.view(-1, 1) 301 | 302 | return output 303 | 304 | def __repr__(self): 305 | return self.__class__.__name__ + '(' \ 306 | + 'in_features = ' + str(self.in_features) \ 307 | + ', out_features = ' + str(self.out_features) \ 308 | + ', m = ' + str(self.m) + ')' 309 | 310 | 311 | def l2_norm(input, axis = 1): 312 | norm = torch.norm(input, 2, axis, True) 313 | output = torch.div(input, norm) 314 | return output 315 | 316 | class Am_softmax(nn.Module): 317 | r"""Implement of Am_softmax (https://arxiv.org/pdf/1801.05599.pdf): 318 | Args: 319 | in_features: size of each input sample 320 | out_features: size of each output sample 321 | device_id: the ID of GPU where the model will be trained by model parallel. 322 | if device_id=None, it will be trained on CPU without model parallel. 323 | m: margin 324 | s: scale of outputs 325 | """ 326 | def __init__(self, in_features, out_features, device_id, m = 0.35, s = 30.0): 327 | super(Am_softmax, self).__init__() 328 | self.in_features = in_features 329 | self.out_features = out_features 330 | self.m = m 331 | self.s = s 332 | self.device_id = device_id 333 | 334 | self.kernel = Parameter(torch.Tensor(in_features, out_features)) 335 | self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5) # initialize kernel 336 | 337 | def forward(self, embbedings, label): 338 | if self.device_id == None: 339 | kernel_norm = l2_norm(self.kernel, axis = 0) 340 | cos_theta = torch.mm(embbedings, kernel_norm) 341 | else: 342 | x = embbedings 343 | sub_kernels = torch.chunk(self.kernel, len(self.device_id), dim=1) 344 | temp_x = x.cuda(self.device_id[0]) 345 | kernel_norm = l2_norm(sub_kernels[0], axis = 0).cuda(self.device_id[0]) 346 | cos_theta = torch.mm(temp_x, kernel_norm) 347 | for i in range(1, len(self.device_id)): 348 | temp_x = x.cuda(self.device_id[i]) 349 | kernel_norm = l2_norm(sub_kernels[i], axis = 0).cuda(self.device_id[i]) 350 | cos_theta = torch.cat((cos_theta, torch.mm(temp_x, kernel_norm).cuda(self.device_id[0])), dim=1) 351 | 352 | cos_theta = cos_theta.clamp(-1, 1) # for numerical stability 353 | phi = cos_theta - self.m 354 | label = label.view(-1, 1) # size=(B,1) 355 | index = cos_theta.data * 0.0 # size=(B,Classnum) 356 | index.scatter_(1, label.data.view(-1, 1), 1) 357 | index = index.byte() 358 | output = cos_theta * 1.0 359 | output[index] = phi[index] # only change the correct predicted output 360 | output *= self.s # scale up in order to make softmax work, first introduced in normface 361 | 362 | return output 363 | 364 | if __name__ == "__main__": 365 | feat = F.normalize(torch.rand(256, 64, requires_grad=True)) 366 | lbl = torch.randint(high=10, size=(256,)) 367 | 368 | inp_sp, inp_sn = convert_label_to_similarity(feat, lbl) 369 | 370 | criterion = CircleLoss(m=0.25, gamma=256) 371 | circle_loss = criterion(inp_sp, inp_sn) 372 | 373 | print(circle_loss) 374 | --------------------------------------------------------------------------------