├── LICENSE ├── README.md ├── data ├── dataloader.py └── test_fileList.txt ├── environment.yml ├── face_alignment ├── README.md ├── figure │ ├── figure1.png │ └── figure2.png ├── models │ ├── ZF_deploy.prototxt │ ├── ZF_local_solver.prototxt │ ├── ZF_local_train.prototxt │ ├── ZF_solver.prototxt │ ├── ZF_train.prototxt │ ├── list_train_global_front.txt │ ├── list_train_global_left.txt │ ├── list_train_global_right.txt │ ├── list_train_init_semifront.txt │ ├── mean_shapes.txt │ ├── shape_parameter_U_front.txt │ ├── shape_parameter_U_left.txt │ ├── shape_parameter_U_right.txt │ ├── shape_parameter_U_wild.txt │ ├── shape_parameter_s_front.txt │ ├── shape_parameter_s_left.txt │ ├── shape_parameter_s_right.txt │ ├── shape_parameter_s_wild.txt │ ├── warped_mean_front.bmp │ ├── warped_mean_left.bmp │ └── warped_mean_right.bmp └── python │ ├── fa_util.py │ ├── fa_util_train.py │ ├── face_alignment.py │ ├── make_wild_input.py │ └── test_300w_public.py ├── face_detection ├── .gitignore ├── LICENSE.MIT ├── NOTICE ├── README.md ├── convert_to_onnx.py ├── data │ ├── FDDB │ │ └── img_list.txt │ ├── __init__.py │ ├── config.py │ ├── data_augment.py │ └── wider_face.py ├── detect.py ├── environment.yml ├── model │ ├── multibox_loss.py │ ├── networks.py │ ├── prior_box.py │ └── retinaface.py ├── test_fddb.py ├── test_widerface.py ├── train_detector.py ├── utils │ ├── __init__.py │ ├── box_utils.py │ ├── misc.py │ └── timer.py ├── webcam_demo.py ├── weights │ ├── mobilenet0.25_final.pt │ └── mobilenet0.25_pretrain.pt └── widerface_evaluate │ ├── README.md │ ├── box_overlaps.pyx │ ├── evaluation.py │ ├── ground_truth │ ├── wider_easy_val.mat │ ├── wider_face_val.mat │ ├── wider_hard_val.mat │ └── wider_medium_val.mat │ ├── setup.py │ └── widerface_txt │ ├── 24--Soldier_Firing │ ├── 24_Soldier_Firing_Soldier_Firing_24_10.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_1037.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_115.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_129.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_133.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_15.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_254.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_264.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_268.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_281.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_315.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_329.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_368.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_372.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_405.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_431.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_523.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_540.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_601.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_633.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_644.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_67.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_691.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_702.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_703.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_763.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_812.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_824.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_887.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_890.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_901.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_904.txt │ ├── 24_Soldier_Firing_Soldier_Firing_24_931.txt │ └── 24_Soldier_Firing_Soldier_Firing_24_95.txt │ └── 40--Gymnastics │ ├── 40_Gymnastics_Gymnastics_40_1022.txt │ ├── 40_Gymnastics_Gymnastics_40_1035.txt │ ├── 40_Gymnastics_Gymnastics_40_1043.txt │ ├── 40_Gymnastics_Gymnastics_40_1044.txt │ ├── 40_Gymnastics_Gymnastics_40_108.txt │ ├── 40_Gymnastics_Gymnastics_40_115.txt │ ├── 40_Gymnastics_Gymnastics_40_138.txt │ ├── 40_Gymnastics_Gymnastics_40_156.txt │ ├── 40_Gymnastics_Gymnastics_40_161.txt │ ├── 40_Gymnastics_Gymnastics_40_171.txt │ ├── 40_Gymnastics_Gymnastics_40_175.txt │ ├── 40_Gymnastics_Gymnastics_40_197.txt │ ├── 40_Gymnastics_Gymnastics_40_24.txt │ ├── 40_Gymnastics_Gymnastics_40_255.txt │ ├── 40_Gymnastics_Gymnastics_40_260.txt │ ├── 40_Gymnastics_Gymnastics_40_273.txt │ ├── 40_Gymnastics_Gymnastics_40_274.txt │ ├── 40_Gymnastics_Gymnastics_40_285.txt │ ├── 40_Gymnastics_Gymnastics_40_331.txt │ ├── 40_Gymnastics_Gymnastics_40_361.txt │ ├── 40_Gymnastics_Gymnastics_40_364.txt │ ├── 40_Gymnastics_Gymnastics_40_389.txt │ └── 40_Gymnastics_Gymnastics_40_401.txt ├── face_recognition ├── config.py ├── model_atari.py ├── test.py └── train.py ├── gaze_estimation ├── README.md ├── example_movie │ └── media2_slow.avi ├── v1_caffe_model │ ├── ir_gaze_deploy.prototxt │ ├── ir_gaze_solver.prototxt │ └── ir_gaze_train_val.prototxt ├── v2_tensorflow_model │ ├── model.py │ ├── opt.py │ ├── test_sequences.py │ └── train.py └── v3_pytorch_model │ ├── config.py │ ├── gaze_model_heavy_ver.py │ ├── gaze_model_light_ver.py │ ├── ir_data.py │ ├── train.py │ └── utils.py ├── test.py └── webcam_demo.py /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PIMNet_Internal_Environment_Recognition 2 | ## Overview 3 | This project is open software for internal environment recognition for ADAS. 4 | This project includes: 5 | - Face Detection 6 | - Face Landmarks Detection / Face Alignment 7 | - Gaze Estimation 8 | - Face Recognition 9 | 10 | ### 11 | Project page : http://imlab.postech.ac.kr/opensw.htm 12 | -------------------------------------------------------------------------------- /data/dataloader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.utils.data as data 4 | from os import listdir 5 | import os 6 | from os.path import join 7 | from PIL import Image, ImageOps 8 | import random 9 | import torchvision.transforms as transforms 10 | import cv2 11 | import numpy as np 12 | from torch.autograd import Variable 13 | import matplotlib.pyplot as plt 14 | 15 | 16 | def loadFromFile(path, datasize): 17 | if path is None: 18 | return None, None 19 | 20 | # print("Load from file %s" % path) 21 | f = open(path) 22 | data = [] 23 | for idx in range(0, datasize): 24 | line = f.readline() 25 | line = line[:-1] 26 | data.append(line) 27 | f.close() 28 | return data 29 | 30 | 31 | def load_lr_hr_prior(file_path, input_height=128, input_width=128, output_height=128, output_width=128, is_mirror=False, 32 | is_gray=True, scale=8.0, is_scale_back=True, is_parsing_map=True): 33 | if input_width is None: 34 | input_width = input_height 35 | if output_width is None: 36 | output_width = output_height 37 | 38 | img = cv2.imread(file_path) 39 | # img = Image.open(file_path) 40 | 41 | if is_gray is False: 42 | b, g, r = cv2.split(img) 43 | img = cv2.merge([r, g, b]) 44 | if is_gray is True: 45 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 46 | 47 | if is_mirror and random.randint(0, 1) is 0: 48 | img = ImageOps.mirror(img) 49 | 50 | if input_height is not None: 51 | img = cv2.resize(img, (input_width, input_height), interpolation=cv2.INTER_CUBIC) 52 | 53 | if is_parsing_map: 54 | str = ['skin.png','lbrow.png','rbrow.png','leye.png','reye.png','lear.png','rear.png','nose.png','mouth.png','ulip.png','llip.png'] 55 | 56 | hms = np.zeros((64, 64, 128)) 57 | 58 | for i in range(len(str)): 59 | (onlyfilePath, img_name) = os.path.split(file_path) 60 | full_name = onlyfilePath + "/Parsing_Maps/" + img_name[:-4] + "_"+ str[i] 61 | hm = cv2.imread(full_name, cv2.IMREAD_GRAYSCALE) 62 | hm_resized = cv2.resize(hm, (64, 64), interpolation=cv2.INTER_CUBIC) / 255.0 63 | hms[:, :, i] = hm_resized 64 | hms[:, :, i+11] = hm_resized 65 | hms[:, :, i+22] = hm_resized 66 | hms[:, :, i+33] = hm_resized 67 | hms[:, :, i+44] = hm_resized 68 | hms[:, :, i+55] = hm_resized 69 | hms[:, :, i+66] = hm_resized 70 | hms[:, :, i+77] = hm_resized 71 | hms[:, :, i+88] = hm_resized 72 | hms[:, :, i+99] = hm_resized 73 | hms[:, :, i+110] = hm_resized 74 | is_bigger = i+121 < 128 75 | if is_bigger: 76 | hms[:, :, i+121] = hm_resized 77 | 78 | 79 | img = cv2.resize(img, (output_width, output_height), interpolation=cv2.INTER_CUBIC) 80 | img_lr = cv2.resize(img, (int(output_width / scale), int(output_height / scale)), interpolation=cv2.INTER_CUBIC) 81 | 82 | if is_scale_back: 83 | img_lr = cv2.resize(img_lr, (output_width, output_height), interpolation=cv2.INTER_CUBIC) 84 | return img_lr, img, hms 85 | else: 86 | return img_lr, img, hms 87 | 88 | def load_lr(file_path, input_height=128, input_width=128, output_height=128, output_width=128, is_mirror=False, 89 | is_gray=True, scale=8.0, is_scale_back=True, is_parsing_map=True): 90 | if input_width is None: 91 | input_width = input_height 92 | if output_width is None: 93 | output_width = output_height 94 | 95 | img = cv2.imread(file_path) 96 | # img = Image.open(file_path) 97 | 98 | if is_gray is False: 99 | b, g, r = cv2.split(img) 100 | img = cv2.merge([r, g, b]) 101 | if is_gray is True: 102 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 103 | 104 | if is_mirror and random.randint(0, 1) is 0: 105 | img = ImageOps.mirror(img) 106 | 107 | img = cv2.resize(img, (output_width, output_height), interpolation=cv2.INTER_CUBIC) 108 | img_lr = cv2.resize(img, (int(output_width / scale), int(output_height / scale)), interpolation=cv2.INTER_CUBIC) 109 | hms = np.zeros((64, 64, 128)) 110 | 111 | if is_scale_back: 112 | img_lr = cv2.resize(img_lr, (output_width, output_height), interpolation=cv2.INTER_CUBIC) 113 | return img_lr, img, hms 114 | else: 115 | return img_lr, img, hms 116 | 117 | 118 | class ImageDatasetFromFile(data.Dataset): 119 | def __init__(self, image_list, img_path, input_height=128, input_width=128, output_height=128, output_width=128, 120 | is_mirror=False, is_gray=False, upscale=8.0, is_scale_back=True, is_parsing_map=True): 121 | super(ImageDatasetFromFile, self).__init__() 122 | 123 | self.image_filenames = image_list 124 | self.upscale = upscale 125 | self.is_mirror = is_mirror 126 | self.img_path = img_path 127 | self.input_height = input_height 128 | self.input_width = input_width 129 | self.output_height = output_height 130 | self.output_width = output_width 131 | self.is_scale_back = is_scale_back 132 | self.is_gray = is_gray 133 | self.is_parsing_map = is_parsing_map 134 | 135 | self.input_transform = transforms.Compose([ 136 | transforms.ToTensor()]) 137 | 138 | def __getitem__(self, idx): 139 | 140 | if self.is_mirror: 141 | is_mirror = random.randint(0, 1) is 0 142 | else: 143 | is_mirror = False 144 | 145 | image_filenames = loadFromFile(self.image_filenames, len(open(self.image_filenames, 'r').readlines())) 146 | fullpath = join(self.img_path, image_filenames[idx]) 147 | 148 | lr, hr, pm = load_lr_hr_prior(fullpath, 149 | self.input_height, self.input_width, self.output_height, self.output_width, 150 | self.is_mirror, self.is_gray, self.upscale, self.is_scale_back, 151 | self.is_parsing_map) 152 | 153 | input = self.input_transform(lr) 154 | target = self.input_transform(hr) 155 | parsing_map = self.input_transform(pm) 156 | 157 | return input, target, parsing_map 158 | 159 | def __len__(self): 160 | return len(open(self.image_filenames, 'rU').readlines()) 161 | 162 | 163 | class TestDatasetFromFile(data.Dataset): 164 | def __init__(self, image_list, img_path, input_height=128, input_width=128, output_height=128, output_width=128, 165 | is_mirror=False, is_gray=False, upscale=8.0, is_scale_back=True, is_parsing_map=True): 166 | super(TestDatasetFromFile, self).__init__() 167 | 168 | self.image_filenames = image_list 169 | self.upscale = upscale 170 | self.is_mirror = is_mirror 171 | self.img_path = img_path 172 | self.input_height = input_height 173 | self.input_width = input_width 174 | self.output_height = output_height 175 | self.output_width = output_width 176 | self.is_scale_back = is_scale_back 177 | self.is_gray = is_gray 178 | self.is_parsing_map = is_parsing_map 179 | 180 | self.input_transform = transforms.Compose([ 181 | transforms.ToTensor()]) 182 | 183 | def __getitem__(self, idx): 184 | 185 | if self.is_mirror: 186 | is_mirror = random.randint(0, 1) is 0 187 | else: 188 | is_mirror = False 189 | 190 | image_filenames = loadFromFile(self.image_filenames, len(open(self.image_filenames, 'r').readlines())) 191 | fullpath = join(self.img_path, image_filenames[idx]) 192 | 193 | lr, hr, pm = load_lr(fullpath, 194 | self.input_height, self.input_width, self.output_height, self.output_width, 195 | self.is_mirror, self.is_gray, self.upscale, self.is_scale_back, 196 | self.is_parsing_map) 197 | 198 | input = self.input_transform(lr) 199 | target = self.input_transform(hr) 200 | parsing_map = self.input_transform(pm) 201 | 202 | 203 | return input, target, parsing_map 204 | 205 | def __len__(self): 206 | return len(open(self.image_filenames, 'rU').readlines()) 207 | 208 | 209 | # demo_dataset = ImageDatasetFromFile("/home/cydia/文档/毕业设计/make_Face_boundary/81_landmarks/fileList.txt", 210 | # "/home/cydia/图片/sample/") 211 | # 212 | # train_data_loader = data.DataLoader(dataset=demo_dataset, batch_size=1, num_workers=8) 213 | 214 | if __name__ == '__main__': 215 | for titer, batch in enumerate(train_data_loader): 216 | input, target, heatmaps = Variable(batch[0]), Variable(batch[1]), Variable(batch[2]) 217 | 218 | Input = input.permute(0, 2, 3, 1).cpu().data.numpy() 219 | Target = target.permute(0, 2, 3, 1).cpu().data.numpy() 220 | Parsing_maps = heatmaps.permute(0, 2, 3, 1).cpu().data.numpy() 221 | 222 | plt.figure("Input Image") 223 | plt.imshow(Input[0, :, :, :]) 224 | plt.axis('on') 225 | plt.title('image') 226 | plt.show() 227 | 228 | plt.figure("Target Image") 229 | plt.imshow(Target[0, :, :, :]) 230 | plt.axis('on') 231 | plt.title('Target') 232 | plt.show() 233 | 234 | plt.figure("HMS") 235 | plt.imshow(Parsing_maps[0, :, :, 0]) 236 | plt.axis('on') 237 | plt.title('OMS') 238 | plt.show() 239 | -------------------------------------------------------------------------------- /data/test_fileList.txt: -------------------------------------------------------------------------------- 1 | 0.jpg 2 | 1.jpg 3 | 2.jpg 4 | 3.jpg 5 | 4.jpg 6 | 5.jpg 7 | 6.jpg 8 | 7.jpg 9 | 8.jpg 10 | 9.jpg 11 | 10.jpg 12 | 11.jpg 13 | 12.jpg 14 | 13.jpg 15 | 14.jpg 16 | 15.jpg 17 | 16.jpg 18 | 17.jpg 19 | 18.jpg 20 | 19.jpg 21 | 20.jpg 22 | 21.jpg 23 | 22.jpg 24 | 23.jpg 25 | 24.jpg 26 | 25.jpg 27 | 26.jpg 28 | 27.jpg 29 | 28.jpg 30 | 29.jpg 31 | 30.jpg 32 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: pimnet 2 | channels: 3 | - pytorch 4 | dependencies: 5 | - cudatoolkit=11.3 6 | - matplotlib 7 | - pip 8 | - python=3.9 9 | - pytorch::pytorch=1.10.1 10 | - pytorch::torchvision 11 | - scikit-image 12 | - scipy 13 | - tqdm 14 | - pip: 15 | - opencv-python -------------------------------------------------------------------------------- /face_alignment/README.md: -------------------------------------------------------------------------------- 1 | # Stage-wise Face Alignment using Global and Local Regressors 2 | 3 | This is a caffe-python implementation on Windows 10 for face alignment. 4 | 5 | We implemented two-kind of methods.
6 | 7 | Method1 repeat global and local regression after initialization regression
8 |

9 | 10 | Method2 repeat local refinement regression after initialization regression
11 |

12 | 13 | ## Evaluation on 300w public test set 14 |
15 | 16 | | Method | Common | Challenging | Full | 17 | |:-------|:--------:|:-----:|:-------:| 18 | | Stage(Projection) | 8.24 | 12.56 | 9.07 | 19 | | Stage(Adjustment) | 6.25 | 10.16 | 7.02 | 20 | | Stage(Global1) | 4.66 | 8.20 | 5.35 | 21 | | Stage(Local1) | 3.45 | 6.49 | 4.05 | 22 | | Stage(Global2) | 3.59 | 6.62 | 4.18 | 23 | | Stage(Local2) | 3.29 | 6.14 | 3.85 | 24 | | Stage(Global3) | 3.48 | 6.37 | 4.05 | 25 | | Stage(Local3) | 3.28 | 6.09 | 3.83 | 26 | | Regression(Wild, simple net) | 4.07 | 6.90 | 4.62 | 27 | | Regression(Wild, ResNet50) | 3.72 | 6.44 | 4.25 | 28 |
29 | 30 | ## Usage 31 | 32 | ### For Training 33 | 1. Clone the repository 34 | ``` 35 | git clone https://github.com/hyunsungP/facelignmentregression 36 | ``` 37 | 38 | 2. make data files (.h5) 39 | ``` 40 | make_wild_input.py 41 | ``` 42 | and so on. 43 | 44 | 3. make data file list \ 45 | Refer to models/list_train_*.txt 46 | 47 | 4. training \ 48 | On console window with caffe 49 | ``` 50 | caffe train --solver=models/ZF_solver.prototxt --gpu=0 51 | ``` 52 | 53 | Other network are same. 54 | 55 | ### For Testing 56 | Change prototxt path in the source code. 57 | ``` 58 | test_300w_public.py 59 | ``` 60 | 61 | Other models will be uploaded. 62 | 63 | -------------------------------------------------------------------------------- /face_alignment/figure/figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/figure/figure1.png -------------------------------------------------------------------------------- /face_alignment/figure/figure2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/figure/figure2.png -------------------------------------------------------------------------------- /face_alignment/models/ZF_deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "FA_ZF_68" 2 | 3 | #------------------------------- input ---------------------------- 4 | input: "img" 5 | input_dim: 1 6 | input_dim: 3 7 | input_dim: 224 8 | input_dim: 224 9 | 10 | layer { 11 | name: "scale_and_shift" 12 | bottom: "img" 13 | top: "scale_and_shift" 14 | type: "Scale" 15 | param{ 16 | lr_mult: 0 17 | decay_mult: 0 18 | } 19 | param{ 20 | lr_mult: 0 21 | decay_mult: 0 22 | } 23 | scale_param{ 24 | filler{ 25 | type: "constant" 26 | value: 0.00392156862745 27 | } 28 | bias_term: true 29 | bias_filler { 30 | type: "constant" 31 | value: 0 32 | } 33 | } 34 | } 35 | # ----------------------- ZF ------------------- 36 | layer { 37 | name: "conv1" 38 | type: "Convolution" 39 | bottom: "scale_and_shift" 40 | top: "conv1" 41 | param { 42 | #learning rate 43 | lr_mult: 1.0 44 | } 45 | param { 46 | lr_mult: 2.0 47 | } 48 | convolution_param { 49 | num_output: 96 50 | kernel_size: 7 51 | pad: 3 52 | stride: 2 53 | weight_filler { 54 | type: "gaussian" 55 | std: 0.01 56 | } 57 | bias_filler { 58 | type: "constant" 59 | value: 0 60 | } 61 | } 62 | } 63 | 64 | layer { 65 | name: "relu1" 66 | type: "ReLU" 67 | bottom: "conv1" 68 | top: "conv1" 69 | } 70 | 71 | layer { 72 | name: "norm1" 73 | type: "LRN" 74 | bottom: "conv1" 75 | top: "norm1" 76 | lrn_param { 77 | local_size: 3 78 | alpha: 0.00005 79 | beta: 0.75 80 | norm_region: WITHIN_CHANNEL 81 | } 82 | } 83 | 84 | layer { 85 | name: "pool1" 86 | type: "Pooling" 87 | bottom: "norm1" 88 | top: "pool1" 89 | pooling_param { 90 | kernel_size: 3 91 | stride: 2 92 | pad: 1 93 | pool: MAX 94 | } 95 | } 96 | 97 | layer { 98 | name: "conv2" 99 | type: "Convolution" 100 | bottom: "pool1" 101 | top: "conv2" 102 | param { 103 | lr_mult: 1.0 104 | } 105 | param { 106 | lr_mult: 2.0 107 | } 108 | convolution_param { 109 | num_output: 256 110 | kernel_size: 5 111 | pad: 2 112 | stride: 2 113 | weight_filler { 114 | type: "gaussian" 115 | std: 0.01 116 | } 117 | bias_filler { 118 | type: "constant" 119 | value: 1 120 | } 121 | } 122 | } 123 | 124 | layer { 125 | name: "relu2" 126 | type: "ReLU" 127 | bottom: "conv2" 128 | top: "conv2" 129 | } 130 | 131 | layer { 132 | name: "norm2" 133 | type: "LRN" 134 | bottom: "conv2" 135 | top: "norm2" 136 | lrn_param { 137 | local_size: 3 138 | alpha: 0.00005 139 | beta: 0.75 140 | norm_region: WITHIN_CHANNEL 141 | } 142 | } 143 | 144 | layer { 145 | name: "pool2" 146 | type: "Pooling" 147 | bottom: "norm2" 148 | top: "pool2" 149 | pooling_param { 150 | kernel_size: 3 151 | stride: 2 152 | pad: 1 153 | pool: MAX 154 | } 155 | } 156 | 157 | layer { 158 | name: "conv3" 159 | type: "Convolution" 160 | bottom: "pool2" 161 | top: "conv3" 162 | param { 163 | lr_mult: 1.0 164 | } 165 | param { 166 | lr_mult: 2.0 167 | } 168 | convolution_param { 169 | num_output: 384 170 | kernel_size: 3 171 | pad: 1 172 | stride: 1 173 | weight_filler { 174 | type: "gaussian" 175 | std: 0.01 176 | } 177 | bias_filler { 178 | type: "constant" 179 | value: 0 180 | } 181 | } 182 | } 183 | 184 | layer { 185 | name: "relu3" 186 | type: "ReLU" 187 | bottom: "conv3" 188 | top: "conv3" 189 | } 190 | 191 | layer { 192 | name: "conv4" 193 | type: "Convolution" 194 | bottom: "conv3" 195 | top: "conv4" 196 | param { 197 | lr_mult: 1.0 198 | } 199 | param { 200 | lr_mult: 2.0 201 | } 202 | convolution_param { 203 | num_output: 384 204 | kernel_size: 3 205 | pad: 1 206 | stride: 1 207 | weight_filler { 208 | type: "gaussian" 209 | std: 0.01 210 | } 211 | bias_filler { 212 | type: "constant" 213 | value: 1 214 | } 215 | } 216 | } 217 | 218 | layer { 219 | name: "relu4" 220 | type: "ReLU" 221 | bottom: "conv4" 222 | top: "conv4" 223 | } 224 | 225 | layer { 226 | name: "conv5" 227 | type: "Convolution" 228 | bottom: "conv4" 229 | top: "conv5" 230 | param { 231 | lr_mult: 1.0 232 | } 233 | param { 234 | lr_mult: 2.0 235 | } 236 | convolution_param { 237 | num_output: 256 238 | kernel_size: 3 239 | pad: 1 240 | stride: 1 241 | weight_filler { 242 | type: "gaussian" 243 | std: 0.01 244 | } 245 | bias_filler { 246 | type: "constant" 247 | value: 1 248 | } 249 | } 250 | } 251 | 252 | layer { 253 | name: "relu5" 254 | type: "ReLU" 255 | bottom: "conv5" 256 | top: "conv5" 257 | } 258 | 259 | #-----------------------layer +------------------------- 260 | 261 | layer { 262 | name: "ip1" 263 | type: "InnerProduct" 264 | bottom: "conv5" 265 | top: "ip1" 266 | inner_product_param { 267 | num_output: 1024 268 | weight_filler { 269 | type: "xavier" 270 | } 271 | } 272 | } 273 | layer { 274 | name: "relu1" 275 | type: "ReLU" 276 | bottom: "ip1" 277 | top: "ip1" 278 | } 279 | layer { 280 | name: "ip2" 281 | type: "InnerProduct" 282 | bottom: "ip1" 283 | top: "ip2" 284 | inner_product_param { 285 | num_output: 1024 286 | weight_filler { 287 | type: "xavier" 288 | } 289 | } 290 | } 291 | layer { 292 | name: "relu2" 293 | type: "ReLU" 294 | bottom: "ip2" 295 | top: "ip2" 296 | } 297 | 298 | 299 | layer { 300 | name: "fc136" 301 | type: "InnerProduct" 302 | bottom: "ip2" 303 | top: "fc136" 304 | inner_product_param { 305 | num_output: 136 306 | weight_filler { 307 | type: "xavier" 308 | } 309 | } 310 | } 311 | -------------------------------------------------------------------------------- /face_alignment/models/ZF_local_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/ZF_local_train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 70000 6 | display: 20 7 | max_iter: 200000 8 | momentum: 0.9 9 | weight_decay: 0.0005 10 | ## We disable standard caffe solver snapshotting and implement our own snapshot 11 | #snapshot: 0 12 | snapshot: 10000 13 | snapshot_prefix: "E:/FA/FA_CNN_HS22/caffemodels/FA_ZF_local" 14 | #debug_info: true 15 | 16 | -------------------------------------------------------------------------------- /face_alignment/models/ZF_local_train.prototxt: -------------------------------------------------------------------------------- 1 | name: "FA_ZF_local" 2 | 3 | #------------------------------- input ---------------------------- 4 | layer { 5 | name: "data" 6 | type: "HDF5Data" 7 | top: "patch" 8 | top: "move" 9 | hdf5_data_param { 10 | source: "models/list_train_local.txt" 11 | batch_size: 24 12 | } 13 | } 14 | 15 | layer { 16 | name: "scale_and_shift" 17 | bottom: "patch" 18 | top: "scale_and_shift" 19 | type: "Scale" 20 | param{ 21 | lr_mult: 0 22 | decay_mult: 0 23 | } 24 | param{ 25 | lr_mult: 0 26 | decay_mult: 0 27 | } 28 | scale_param{ 29 | filler{ 30 | type: "constant" 31 | value: 1 32 | } 33 | bias_term: true 34 | bias_filler { 35 | type: "constant" 36 | value: -128 37 | } 38 | } 39 | } 40 | 41 | layer { 42 | name: "flatdata" 43 | type: "Flatten" 44 | bottom: "move" 45 | top: "flatdata" 46 | } 47 | 48 | 49 | #------------------------------- split ---------------------------- 50 | layer { 51 | name: "slicer" 52 | type: "Slice" 53 | bottom: "scale_and_shift" 54 | top: "patch_slice_1" 55 | top: "patch_slice_2" 56 | top: "patch_slice_3" 57 | top: "patch_slice_4" 58 | top: "patch_slice_5" 59 | top: "patch_slice_6" 60 | top: "patch_slice_7" 61 | top: "patch_slice_8" 62 | top: "patch_slice_9" 63 | top: "patch_slice_10" 64 | top: "patch_slice_11" 65 | top: "patch_slice_12" 66 | top: "patch_slice_13" 67 | top: "patch_slice_14" 68 | top: "patch_slice_15" 69 | top: "patch_slice_16" 70 | top: "patch_slice_17" 71 | top: "patch_slice_18" 72 | top: "patch_slice_19" 73 | top: "patch_slice_20" 74 | top: "patch_slice_21" 75 | top: "patch_slice_22" 76 | top: "patch_slice_23" 77 | top: "patch_slice_24" 78 | top: "patch_slice_25" 79 | top: "patch_slice_26" 80 | top: "patch_slice_27" 81 | top: "patch_slice_28" 82 | top: "patch_slice_29" 83 | top: "patch_slice_30" 84 | top: "patch_slice_31" 85 | top: "patch_slice_32" 86 | top: "patch_slice_33" 87 | top: "patch_slice_34" 88 | top: "patch_slice_35" 89 | top: "patch_slice_36" 90 | top: "patch_slice_37" 91 | top: "patch_slice_38" 92 | top: "patch_slice_39" 93 | top: "patch_slice_40" 94 | top: "patch_slice_41" 95 | top: "patch_slice_42" 96 | top: "patch_slice_43" 97 | top: "patch_slice_44" 98 | top: "patch_slice_45" 99 | top: "patch_slice_46" 100 | top: "patch_slice_47" 101 | top: "patch_slice_48" 102 | top: "patch_slice_49" 103 | top: "patch_slice_50" 104 | top: "patch_slice_51" 105 | top: "patch_slice_52" 106 | top: "patch_slice_53" 107 | top: "patch_slice_54" 108 | top: "patch_slice_55" 109 | top: "patch_slice_56" 110 | top: "patch_slice_57" 111 | top: "patch_slice_58" 112 | top: "patch_slice_59" 113 | top: "patch_slice_60" 114 | top: "patch_slice_61" 115 | top: "patch_slice_62" 116 | top: "patch_slice_63" 117 | top: "patch_slice_64" 118 | top: "patch_slice_65" 119 | top: "patch_slice_66" 120 | top: "patch_slice_67" 121 | top: "patch_slice_68" 122 | slice_param { 123 | axis:1 124 | slice_point: 3 125 | slice_point: 6 126 | slice_point: 9 127 | slice_point: 12 128 | slice_point: 15 129 | slice_point: 18 130 | slice_point: 21 131 | slice_point: 24 132 | slice_point: 27 133 | slice_point: 30 134 | slice_point: 33 135 | slice_point: 36 136 | slice_point: 39 137 | slice_point: 42 138 | slice_point: 45 139 | slice_point: 48 140 | slice_point: 51 141 | slice_point: 54 142 | slice_point: 57 143 | slice_point: 60 144 | slice_point: 63 145 | slice_point: 66 146 | slice_point: 69 147 | slice_point: 72 148 | slice_point: 75 149 | slice_point: 78 150 | slice_point: 81 151 | slice_point: 84 152 | slice_point: 87 153 | slice_point: 90 154 | slice_point: 93 155 | slice_point: 96 156 | slice_point: 99 157 | slice_point: 102 158 | slice_point: 105 159 | slice_point: 108 160 | slice_point: 111 161 | slice_point: 114 162 | slice_point: 117 163 | slice_point: 120 164 | slice_point: 123 165 | slice_point: 126 166 | slice_point: 129 167 | slice_point: 132 168 | slice_point: 135 169 | slice_point: 138 170 | slice_point: 141 171 | slice_point: 144 172 | slice_point: 147 173 | slice_point: 150 174 | slice_point: 153 175 | slice_point: 156 176 | slice_point: 159 177 | slice_point: 162 178 | slice_point: 165 179 | slice_point: 168 180 | slice_point: 171 181 | slice_point: 174 182 | slice_point: 177 183 | slice_point: 180 184 | slice_point: 183 185 | slice_point: 186 186 | slice_point: 189 187 | slice_point: 192 188 | slice_point: 195 189 | slice_point: 198 190 | slice_point: 201 191 | } 192 | } 193 | 194 | # ----------------------- ZF ------------------- 195 | layer { 196 | name: "conv1" 197 | type: "Convolution" 198 | bottom: "scale_and_shift" 199 | top: "conv1" 200 | param { 201 | #learning rate 202 | lr_mult: 1.0 203 | } 204 | param { 205 | lr_mult: 2.0 206 | } 207 | convolution_param { 208 | num_output: 96 209 | kernel_size: 7 210 | pad: 3 211 | stride: 2 212 | weight_filler { 213 | type: "gaussian" 214 | std: 0.001 215 | } 216 | bias_filler { 217 | type: "constant" 218 | value: 0 219 | } 220 | } 221 | } 222 | 223 | layer { 224 | name: "relu1" 225 | type: "ReLU" 226 | bottom: "conv1" 227 | top: "conv1" 228 | } 229 | 230 | layer { 231 | name: "norm1" 232 | type: "LRN" 233 | bottom: "conv1" 234 | top: "norm1" 235 | lrn_param { 236 | local_size: 3 237 | alpha: 0.00005 238 | beta: 0.75 239 | norm_region: WITHIN_CHANNEL 240 | } 241 | } 242 | 243 | layer { 244 | name: "pool1" 245 | type: "Pooling" 246 | bottom: "norm1" 247 | top: "pool1" 248 | pooling_param { 249 | kernel_size: 3 250 | stride: 2 251 | pad: 1 252 | pool: MAX 253 | } 254 | } 255 | 256 | layer { 257 | name: "conv2" 258 | type: "Convolution" 259 | bottom: "pool1" 260 | top: "conv2" 261 | param { 262 | lr_mult: 1.0 263 | } 264 | param { 265 | lr_mult: 2.0 266 | } 267 | convolution_param { 268 | num_output: 256 269 | kernel_size: 5 270 | pad: 2 271 | stride: 2 272 | weight_filler { 273 | type: "gaussian" 274 | std: 0.001 275 | } 276 | bias_filler { 277 | type: "constant" 278 | value: 1 279 | } 280 | } 281 | } 282 | 283 | layer { 284 | name: "relu2" 285 | type: "ReLU" 286 | bottom: "conv2" 287 | top: "conv2" 288 | } 289 | 290 | layer { 291 | name: "norm2" 292 | type: "LRN" 293 | bottom: "conv2" 294 | top: "norm2" 295 | lrn_param { 296 | local_size: 3 297 | alpha: 0.00005 298 | beta: 0.75 299 | norm_region: WITHIN_CHANNEL 300 | } 301 | } 302 | 303 | layer { 304 | name: "pool2" 305 | type: "Pooling" 306 | bottom: "norm2" 307 | top: "pool2" 308 | pooling_param { 309 | kernel_size: 3 310 | stride: 2 311 | pad: 1 312 | pool: MAX 313 | } 314 | } 315 | 316 | layer { 317 | name: "conv3" 318 | type: "Convolution" 319 | bottom: "pool2" 320 | top: "conv3" 321 | param { 322 | lr_mult: 1.0 323 | } 324 | param { 325 | lr_mult: 2.0 326 | } 327 | convolution_param { 328 | num_output: 384 329 | kernel_size: 3 330 | pad: 1 331 | stride: 1 332 | weight_filler { 333 | type: "gaussian" 334 | std: 0.001 335 | } 336 | bias_filler { 337 | type: "constant" 338 | value: 0 339 | } 340 | } 341 | } 342 | 343 | layer { 344 | name: "relu3" 345 | type: "ReLU" 346 | bottom: "conv3" 347 | top: "conv3" 348 | } 349 | 350 | layer { 351 | name: "conv4" 352 | type: "Convolution" 353 | bottom: "conv3" 354 | top: "conv4" 355 | param { 356 | lr_mult: 1.0 357 | } 358 | param { 359 | lr_mult: 2.0 360 | } 361 | convolution_param { 362 | num_output: 384 363 | kernel_size: 3 364 | pad: 1 365 | stride: 1 366 | weight_filler { 367 | type: "gaussian" 368 | std: 0.001 369 | } 370 | bias_filler { 371 | type: "constant" 372 | value: 1 373 | } 374 | } 375 | } 376 | 377 | layer { 378 | name: "relu4" 379 | type: "ReLU" 380 | bottom: "conv4" 381 | top: "conv4" 382 | } 383 | 384 | layer { 385 | name: "conv5" 386 | type: "Convolution" 387 | bottom: "conv4" 388 | top: "conv5" 389 | param { 390 | lr_mult: 1.0 391 | } 392 | param { 393 | lr_mult: 2.0 394 | } 395 | convolution_param { 396 | num_output: 256 397 | kernel_size: 3 398 | pad: 1 399 | stride: 1 400 | weight_filler { 401 | type: "gaussian" 402 | std: 0.001 403 | } 404 | bias_filler { 405 | type: "constant" 406 | value: 1 407 | } 408 | } 409 | } 410 | 411 | layer { 412 | name: "relu5" 413 | type: "ReLU" 414 | bottom: "conv5" 415 | top: "conv5" 416 | } 417 | 418 | #-----------------------layer +------------------------- 419 | 420 | layer { 421 | name: "ip1" 422 | type: "InnerProduct" 423 | bottom: "conv5" 424 | top: "ip1" 425 | inner_product_param { 426 | num_output: 1024 427 | weight_filler { 428 | type: "xavier" 429 | } 430 | } 431 | } 432 | layer { 433 | name: "relu1" 434 | type: "ReLU" 435 | bottom: "ip1" 436 | top: "ip1" 437 | } 438 | layer { 439 | name: "ip2" 440 | type: "InnerProduct" 441 | bottom: "ip1" 442 | top: "ip2" 443 | inner_product_param { 444 | num_output: 1024 445 | weight_filler { 446 | type: "xavier" 447 | } 448 | } 449 | } 450 | layer { 451 | name: "relu2" 452 | type: "ReLU" 453 | bottom: "ip2" 454 | top: "ip2" 455 | } 456 | 457 | 458 | layer { 459 | name: "fc136" 460 | type: "InnerProduct" 461 | bottom: "ip2" 462 | top: "fc136" 463 | inner_product_param { 464 | num_output: 136 465 | weight_filler { 466 | type: "xavier" 467 | } 468 | } 469 | } 470 | 471 | #------------------------------- loss ---------------------------- 472 | 473 | layer { 474 | name: "out" 475 | type: "EuclideanLoss" 476 | bottom: "fc136" 477 | bottom: "flatdata" 478 | top: "out" 479 | loss_weight: 1 480 | } 481 | -------------------------------------------------------------------------------- /face_alignment/models/ZF_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/ZF_train.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 70000 6 | display: 20 7 | max_iter: 200000 8 | momentum: 0.9 9 | weight_decay: 0.0005 10 | ## We disable standard caffe solver snapshotting and implement our own snapshot 11 | #snapshot: 0 12 | snapshot: 10000 13 | snapshot_prefix: "E:/FA/FA_CNN_HS20/caffemodels/FA_ZF_baseline" 14 | #debug_info: true 15 | 16 | -------------------------------------------------------------------------------- /face_alignment/models/ZF_train.prototxt: -------------------------------------------------------------------------------- 1 | name: "FA_ZF_68" 2 | 3 | #------------------------------- input ---------------------------- 4 | layer { 5 | name: "data" 6 | type: "HDF5Data" 7 | top: "img" 8 | top: "pts" 9 | hdf5_data_param { 10 | source: "models/list_train_wild.txt" 11 | batch_size: 32 12 | } 13 | } 14 | 15 | 16 | layer { 17 | name: "scale_and_shift" 18 | bottom: "img" 19 | top: "scale_and_shift" 20 | type: "Scale" 21 | param{ 22 | lr_mult: 0 23 | decay_mult: 0 24 | } 25 | param{ 26 | lr_mult: 0 27 | decay_mult: 0 28 | } 29 | scale_param{ 30 | filler{ 31 | type: "constant" 32 | value: 0.00392156862745 33 | } 34 | bias_term: true 35 | bias_filler { 36 | type: "constant" 37 | value: 0 38 | } 39 | } 40 | } 41 | 42 | layer { 43 | name: "flatdata" 44 | type: "Flatten" 45 | bottom: "pts" 46 | top: "flatdata" 47 | } 48 | # ----------------------- ZF ------------------- 49 | layer { 50 | name: "conv1" 51 | type: "Convolution" 52 | bottom: "scale_and_shift" 53 | top: "conv1" 54 | param { 55 | #learning rate 56 | lr_mult: 1.0 57 | } 58 | param { 59 | lr_mult: 2.0 60 | } 61 | convolution_param { 62 | num_output: 96 63 | kernel_size: 7 64 | pad: 3 65 | stride: 2 66 | weight_filler { 67 | type: "gaussian" 68 | std: 0.001 69 | } 70 | bias_filler { 71 | type: "constant" 72 | value: 0 73 | } 74 | } 75 | } 76 | 77 | layer { 78 | name: "relu1" 79 | type: "ReLU" 80 | bottom: "conv1" 81 | top: "conv1" 82 | } 83 | 84 | layer { 85 | name: "norm1" 86 | type: "LRN" 87 | bottom: "conv1" 88 | top: "norm1" 89 | lrn_param { 90 | local_size: 3 91 | alpha: 0.00005 92 | beta: 0.75 93 | norm_region: WITHIN_CHANNEL 94 | } 95 | } 96 | 97 | layer { 98 | name: "pool1" 99 | type: "Pooling" 100 | bottom: "norm1" 101 | top: "pool1" 102 | pooling_param { 103 | kernel_size: 3 104 | stride: 2 105 | pad: 1 106 | pool: MAX 107 | } 108 | } 109 | 110 | layer { 111 | name: "conv2" 112 | type: "Convolution" 113 | bottom: "pool1" 114 | top: "conv2" 115 | param { 116 | lr_mult: 1.0 117 | } 118 | param { 119 | lr_mult: 2.0 120 | } 121 | convolution_param { 122 | num_output: 256 123 | kernel_size: 5 124 | pad: 2 125 | stride: 2 126 | weight_filler { 127 | type: "gaussian" 128 | std: 0.001 129 | } 130 | bias_filler { 131 | type: "constant" 132 | value: 1 133 | } 134 | } 135 | } 136 | 137 | layer { 138 | name: "relu2" 139 | type: "ReLU" 140 | bottom: "conv2" 141 | top: "conv2" 142 | } 143 | 144 | layer { 145 | name: "norm2" 146 | type: "LRN" 147 | bottom: "conv2" 148 | top: "norm2" 149 | lrn_param { 150 | local_size: 3 151 | alpha: 0.00005 152 | beta: 0.75 153 | norm_region: WITHIN_CHANNEL 154 | } 155 | } 156 | 157 | layer { 158 | name: "pool2" 159 | type: "Pooling" 160 | bottom: "norm2" 161 | top: "pool2" 162 | pooling_param { 163 | kernel_size: 3 164 | stride: 2 165 | pad: 1 166 | pool: MAX 167 | } 168 | } 169 | 170 | layer { 171 | name: "conv3" 172 | type: "Convolution" 173 | bottom: "pool2" 174 | top: "conv3" 175 | param { 176 | lr_mult: 1.0 177 | } 178 | param { 179 | lr_mult: 2.0 180 | } 181 | convolution_param { 182 | num_output: 384 183 | kernel_size: 3 184 | pad: 1 185 | stride: 1 186 | weight_filler { 187 | type: "gaussian" 188 | std: 0.001 189 | } 190 | bias_filler { 191 | type: "constant" 192 | value: 0 193 | } 194 | } 195 | } 196 | 197 | layer { 198 | name: "relu3" 199 | type: "ReLU" 200 | bottom: "conv3" 201 | top: "conv3" 202 | } 203 | 204 | layer { 205 | name: "conv4" 206 | type: "Convolution" 207 | bottom: "conv3" 208 | top: "conv4" 209 | param { 210 | lr_mult: 1.0 211 | } 212 | param { 213 | lr_mult: 2.0 214 | } 215 | convolution_param { 216 | num_output: 384 217 | kernel_size: 3 218 | pad: 1 219 | stride: 1 220 | weight_filler { 221 | type: "gaussian" 222 | std: 0.001 223 | } 224 | bias_filler { 225 | type: "constant" 226 | value: 1 227 | } 228 | } 229 | } 230 | 231 | layer { 232 | name: "relu4" 233 | type: "ReLU" 234 | bottom: "conv4" 235 | top: "conv4" 236 | } 237 | 238 | layer { 239 | name: "conv5" 240 | type: "Convolution" 241 | bottom: "conv4" 242 | top: "conv5" 243 | param { 244 | lr_mult: 1.0 245 | } 246 | param { 247 | lr_mult: 2.0 248 | } 249 | convolution_param { 250 | num_output: 256 251 | kernel_size: 3 252 | pad: 1 253 | stride: 1 254 | weight_filler { 255 | type: "gaussian" 256 | std: 0.001 257 | } 258 | bias_filler { 259 | type: "constant" 260 | value: 1 261 | } 262 | } 263 | } 264 | 265 | layer { 266 | name: "relu5" 267 | type: "ReLU" 268 | bottom: "conv5" 269 | top: "conv5" 270 | } 271 | 272 | #-----------------------layer +------------------------- 273 | 274 | layer { 275 | name: "ip1" 276 | type: "InnerProduct" 277 | bottom: "conv5" 278 | top: "ip1" 279 | inner_product_param { 280 | num_output: 1024 281 | weight_filler { 282 | type: "xavier" 283 | } 284 | } 285 | } 286 | layer { 287 | name: "relu1" 288 | type: "ReLU" 289 | bottom: "ip1" 290 | top: "ip1" 291 | } 292 | layer { 293 | name: "ip2" 294 | type: "InnerProduct" 295 | bottom: "ip1" 296 | top: "ip2" 297 | inner_product_param { 298 | num_output: 1024 299 | weight_filler { 300 | type: "xavier" 301 | } 302 | } 303 | } 304 | layer { 305 | name: "relu2" 306 | type: "ReLU" 307 | bottom: "ip2" 308 | top: "ip2" 309 | } 310 | 311 | 312 | layer { 313 | name: "fc136" 314 | type: "InnerProduct" 315 | bottom: "ip2" 316 | top: "fc136" 317 | inner_product_param { 318 | num_output: 136 319 | weight_filler { 320 | type: "xavier" 321 | } 322 | } 323 | } 324 | 325 | #------------------------------- loss ---------------------------- 326 | 327 | layer { 328 | name: "out" 329 | type: "EuclideanLoss" 330 | bottom: "fc136" 331 | bottom: "flatdata" 332 | top: "out" 333 | loss_weight: 1 334 | } 335 | -------------------------------------------------------------------------------- /face_alignment/models/mean_shapes.txt: -------------------------------------------------------------------------------- 1 | -0.775420 -0.352592 -0.005021 -0.766519 -0.157355 -0.037416 -0.743629 0.036671 -0.055012 -0.701062 0.227707 -0.087501 -0.624895 0.409159 -0.181433 -0.507021 0.568253 -0.314666 -0.358238 0.700246 -0.461675 -0.188625 0.803156 -0.616360 0.000000 0.841022 -0.715031 0.188625 0.803156 -0.616360 0.358238 0.700246 -0.461675 0.507021 0.568253 -0.314666 0.624895 0.409159 -0.181433 0.701062 0.227707 -0.087501 0.743629 0.036671 -0.055012 0.766519 -0.157355 -0.037416 0.775420 -0.352592 -0.005021 -0.582617 -0.575646 -0.655455 -0.492129 -0.636310 -0.737820 -0.379689 -0.656483 -0.823215 -0.263923 -0.643754 -0.906170 -0.153347 -0.607971 -0.982801 0.153347 -0.607971 -0.982801 0.263923 -0.643754 -0.906170 0.379689 -0.656483 -0.823215 0.492129 -0.636310 -0.737820 0.582617 -0.575646 -0.655455 0.000000 -0.412566 -0.988062 0.000000 -0.291699 -1.070349 0.000000 -0.171456 -1.154743 0.000000 -0.051514 -1.235696 -0.122787 0.047974 -0.991719 -0.059097 0.070544 -1.029210 0.000000 0.080821 -1.063730 0.059097 0.070544 -1.029210 0.122787 0.047974 -0.991719 -0.430035 -0.396099 -0.732915 -0.358837 -0.444566 -0.761839 -0.276830 -0.446105 -0.775307 -0.204942 -0.396248 -0.783813 -0.279006 -0.333817 -0.768284 -0.357969 -0.331697 -0.754261 0.204942 -0.396248 -0.783813 0.276830 -0.446105 -0.775307 0.358837 -0.444566 -0.761839 0.430035 -0.396099 -0.732915 0.357969 -0.331697 -0.754261 0.279006 -0.333817 -0.768284 -0.262923 0.307414 -0.744026 -0.185044 0.253144 -0.863426 -0.091584 0.216906 -0.945248 0.000000 0.228164 -0.997522 0.091584 0.216906 -0.945248 0.185044 0.253144 -0.863426 0.262923 0.307414 -0.744026 0.190844 0.384972 -0.812714 0.097535 0.434418 -0.869535 0.000000 0.449659 -0.913157 -0.097535 0.434418 -0.869535 -0.190844 0.384972 -0.812714 -0.198999 0.307340 -0.799310 -0.095989 0.288185 -0.897865 0.000000 0.291515 -0.964643 0.095989 0.288185 -0.897865 0.198999 0.307340 -0.799310 0.097949 0.335492 -0.870915 0.000000 0.350547 -0.928230 -0.097949 0.335492 -0.870915 2 | -0.775420 -0.352592 -0.766519 -0.157355 -0.743629 0.036671 -0.701062 0.227707 -0.624895 0.409159 -0.507021 0.568253 -0.358238 0.700246 -0.188625 0.803156 0.000000 0.841022 0.188625 0.803156 0.358238 0.700246 0.507021 0.568253 0.624895 0.409159 0.701062 0.227707 0.743629 0.036671 0.766519 -0.157355 0.775420 -0.352592 -0.582617 -0.575646 -0.492129 -0.636310 -0.379689 -0.656483 -0.263923 -0.643754 -0.153347 -0.607971 0.153347 -0.607971 0.263923 -0.643754 0.379689 -0.656483 0.492129 -0.636310 0.582617 -0.575646 0.000000 -0.412566 0.000000 -0.291699 0.000000 -0.171456 0.000000 -0.051514 -0.122787 0.047974 -0.059097 0.070544 0.000000 0.080821 0.059097 0.070544 0.122787 0.047974 -0.430035 -0.396099 -0.358837 -0.444566 -0.276830 -0.446105 -0.204942 -0.396248 -0.279006 -0.333817 -0.357969 -0.331697 0.204942 -0.396248 0.276830 -0.446105 0.358837 -0.444566 0.430035 -0.396099 0.357969 -0.331697 0.279006 -0.333817 -0.262923 0.307414 -0.185044 0.253144 -0.091584 0.216906 0.000000 0.228164 0.091584 0.216906 0.185044 0.253144 0.262923 0.307414 0.190844 0.384972 0.097535 0.434418 0.000000 0.449659 -0.097535 0.434418 -0.190844 0.384972 -0.198999 0.307340 -0.095989 0.288185 0.000000 0.291515 0.095989 0.288185 0.198999 0.307340 0.097949 0.335492 0.000000 0.350547 -0.097949 0.335492 3 | -0.750298 -0.352592 -0.750084 -0.157355 -0.732529 0.036671 -0.699820 0.227707 -0.650560 0.409159 -0.571186 0.568253 -0.465522 0.700246 -0.341724 0.803156 -0.185064 0.841022 0.022672 0.803156 0.226541 0.700246 0.408303 0.568253 0.556644 0.409159 0.654527 0.227707 0.704052 0.036671 0.730716 -0.157355 0.747699 -0.352592 -0.832288 -0.575646 -0.795106 -0.636310 -0.740428 -0.656483 -0.681649 -0.643754 -0.624203 -0.607971 -0.358599 -0.607971 -0.224521 -0.643754 -0.082787 -0.656483 0.057286 -0.636310 0.176834 -0.575646 -0.494031 -0.412566 -0.535174 -0.291699 -0.577372 -0.171456 -0.617848 -0.051514 -0.602196 0.047974 -0.565784 0.070544 -0.531865 0.080821 -0.463426 0.070544 -0.389523 0.047974 -0.738879 -0.396099 -0.691681 -0.444566 -0.627395 -0.446105 -0.569392 -0.396248 -0.625768 -0.333817 -0.687140 -0.331697 -0.214421 -0.396248 -0.147911 -0.446105 -0.070158 -0.444566 0.005964 -0.396099 -0.067120 -0.331697 -0.142516 -0.333817 -0.599711 0.307414 -0.591965 0.253144 -0.551938 0.216906 -0.498761 0.228164 -0.393310 0.216906 -0.271460 0.253144 -0.144315 0.307414 -0.241081 0.384972 -0.350300 0.434418 -0.456579 0.449659 -0.519235 0.434418 -0.571632 0.384972 -0.571993 0.307340 -0.532062 0.288185 -0.482321 0.291515 -0.365803 0.288185 -0.227317 0.307340 -0.350631 0.335492 -0.464115 0.350547 -0.520284 0.335492 4 | -0.747699 -0.352592 -0.730716 -0.157355 -0.704052 0.036671 -0.654527 0.227707 -0.556644 0.409159 -0.408303 0.568253 -0.226541 0.700246 -0.022672 0.803156 0.185064 0.841022 0.341724 0.803156 0.465522 0.700246 0.571186 0.568253 0.650560 0.409159 0.699820 0.227707 0.732529 0.036671 0.750084 -0.157355 0.750298 -0.352592 -0.176834 -0.575646 -0.057286 -0.636310 0.082787 -0.656483 0.224521 -0.643754 0.358599 -0.607971 0.624203 -0.607971 0.681649 -0.643754 0.740428 -0.656483 0.795106 -0.636310 0.832288 -0.575646 0.494031 -0.412566 0.535174 -0.291699 0.577372 -0.171456 0.617848 -0.051514 0.389523 0.047974 0.463426 0.070544 0.531865 0.080821 0.565784 0.070544 0.602196 0.047974 -0.005964 -0.396099 0.070158 -0.444566 0.147911 -0.446105 0.214421 -0.396248 0.142516 -0.333817 0.067120 -0.331697 0.569392 -0.396248 0.627395 -0.446105 0.691681 -0.444566 0.738879 -0.396099 0.687140 -0.331697 0.625768 -0.333817 0.144315 0.307414 0.271460 0.253144 0.393310 0.216906 0.498761 0.228164 0.551938 0.216906 0.591965 0.253144 0.599711 0.307414 0.571632 0.384972 0.519235 0.434418 0.456579 0.449659 0.350300 0.434418 0.241081 0.384972 0.227317 0.307340 0.365803 0.288185 0.482321 0.291515 0.532062 0.288185 0.571993 0.307340 0.520284 0.335492 0.464115 0.350547 0.350631 0.335492 5 | -------------------------------------------------------------------------------- /face_alignment/models/shape_parameter_s_front.txt: -------------------------------------------------------------------------------- 1 | 1.031828079223632812e+02 2 | 9.566854095458984375e+01 3 | 6.069260025024414062e+01 4 | 4.313542938232421875e+01 5 | 3.479409408569335938e+01 6 | 2.776439666748046875e+01 7 | 2.766326141357421875e+01 8 | 2.420671272277832031e+01 9 | 2.100972938537597656e+01 10 | 1.877184486389160156e+01 11 | 1.825231742858886719e+01 12 | 1.696853065490722656e+01 13 | 1.407497215270996094e+01 14 | 1.345866584777832031e+01 15 | 1.124495315551757812e+01 16 | 1.050634860992431641e+01 17 | 1.045322513580322266e+01 18 | 1.016666221618652344e+01 19 | 9.403193473815917969e+00 20 | 8.697093963623046875e+00 21 | 8.366784095764160156e+00 22 | 7.573175430297851562e+00 23 | 7.421993732452392578e+00 24 | 7.175876617431640625e+00 25 | 6.694856166839599609e+00 26 | 6.609914302825927734e+00 27 | 6.394573211669921875e+00 28 | 6.032481670379638672e+00 29 | 6.031355381011962891e+00 30 | 5.338684558868408203e+00 31 | 5.145238876342773438e+00 32 | 5.112681388854980469e+00 33 | 5.004620075225830078e+00 34 | 4.974018573760986328e+00 35 | 4.814919948577880859e+00 36 | 4.734435558319091797e+00 37 | 4.285939216613769531e+00 38 | 3.971984148025512695e+00 39 | 3.971856117248535156e+00 40 | 3.753386497497558594e+00 41 | 3.669133424758911133e+00 42 | 3.578326463699340820e+00 43 | 3.483742237091064453e+00 44 | 3.311911344528198242e+00 45 | 3.139082670211791992e+00 46 | 3.108502388000488281e+00 47 | 3.022727251052856445e+00 48 | 2.984299659729003906e+00 49 | 2.859831809997558594e+00 50 | 2.833900928497314453e+00 51 | 2.755693197250366211e+00 52 | 2.742290258407592773e+00 53 | 2.523193836212158203e+00 54 | 2.451685428619384766e+00 55 | 2.441256761550903320e+00 56 | 2.379939079284667969e+00 57 | 2.357637166976928711e+00 58 | 2.252062797546386719e+00 59 | 2.231155633926391602e+00 60 | 2.168044567108154297e+00 61 | 2.127068758010864258e+00 62 | 2.045018196105957031e+00 63 | 2.041277647018432617e+00 64 | 2.012953281402587891e+00 65 | 2.005952835083007812e+00 66 | 1.955849528312683105e+00 67 | 1.935137510299682617e+00 68 | 1.874186635017395020e+00 69 | 1.829447269439697266e+00 70 | 1.807976007461547852e+00 71 | 1.798697710037231445e+00 72 | 1.765719175338745117e+00 73 | 1.662169933319091797e+00 74 | 1.660003185272216797e+00 75 | 1.633037924766540527e+00 76 | 1.625466108322143555e+00 77 | 1.608945488929748535e+00 78 | 1.607636570930480957e+00 79 | 1.600903630256652832e+00 80 | 1.565548300743103027e+00 81 | 1.555794477462768555e+00 82 | 1.520662426948547363e+00 83 | 1.516777276992797852e+00 84 | 1.480778694152832031e+00 85 | 1.462243556976318359e+00 86 | 1.427065491676330566e+00 87 | 1.411217451095581055e+00 88 | 1.398631095886230469e+00 89 | 1.364845037460327148e+00 90 | 1.355186700820922852e+00 91 | 1.346644043922424316e+00 92 | 1.338635683059692383e+00 93 | 1.327934265136718750e+00 94 | 1.310287356376647949e+00 95 | 1.287073850631713867e+00 96 | 1.259063243865966797e+00 97 | 1.218294143676757812e+00 98 | 1.190768599510192871e+00 99 | 1.139584541320800781e+00 100 | 1.127703666687011719e+00 101 | 1.127283215522766113e+00 102 | 1.092749476432800293e+00 103 | 1.061315417289733887e+00 104 | 1.040784716606140137e+00 105 | 1.030719995498657227e+00 106 | 1.003454208374023438e+00 107 | 1.000035881996154785e+00 108 | 9.665775299072265625e-01 109 | 9.630764126777648926e-01 110 | 9.550484418869018555e-01 111 | 9.314393401145935059e-01 112 | 9.235842823982238770e-01 113 | 9.105998873710632324e-01 114 | 8.669779896736145020e-01 115 | 8.544918298721313477e-01 116 | 8.450148105621337891e-01 117 | 8.216010928153991699e-01 118 | 8.042898178100585938e-01 119 | 7.873371839523315430e-01 120 | 7.616593241691589355e-01 121 | 7.413730621337890625e-01 122 | 7.263383865356445312e-01 123 | 7.158536911010742188e-01 124 | 7.149648666381835938e-01 125 | 6.883103251457214355e-01 126 | 6.826061010360717773e-01 127 | 6.503386497497558594e-01 128 | 5.366221070289611816e-01 129 | 5.077308416366577148e-01 130 | 2.871714234352111816e-01 131 | 2.232837432529777288e-04 132 | 1.937306515173986554e-04 133 | 4.919727507513016462e-05 134 | 4.453564542927779257e-05 135 | 3.619944982347078621e-05 136 | 2.667792978172656149e-05 137 | -------------------------------------------------------------------------------- /face_alignment/models/shape_parameter_s_left.txt: -------------------------------------------------------------------------------- 1 | 7.763617706298828125e+01 2 | 5.448361206054687500e+01 3 | 3.448307800292968750e+01 4 | 1.991018867492675781e+01 5 | 1.596661090850830078e+01 6 | 1.414914703369140625e+01 7 | 1.229103565216064453e+01 8 | 1.144200325012207031e+01 9 | 1.003643321990966797e+01 10 | 9.386501312255859375e+00 11 | 8.581890106201171875e+00 12 | 8.169677734375000000e+00 13 | 7.906897544860839844e+00 14 | 6.675380229949951172e+00 15 | 6.112782001495361328e+00 16 | 5.862775802612304688e+00 17 | 5.286133289337158203e+00 18 | 4.913509845733642578e+00 19 | 4.832731246948242188e+00 20 | 4.741940498352050781e+00 21 | 4.589621067047119141e+00 22 | 4.137164592742919922e+00 23 | 4.037960052490234375e+00 24 | 3.861081600189208984e+00 25 | 3.779168367385864258e+00 26 | 3.620183229446411133e+00 27 | 3.475615978240966797e+00 28 | 3.316045284271240234e+00 29 | 3.153186798095703125e+00 30 | 3.043802976608276367e+00 31 | 2.927801609039306641e+00 32 | 2.870085954666137695e+00 33 | 2.832670450210571289e+00 34 | 2.724978208541870117e+00 35 | 2.613666296005249023e+00 36 | 2.461195468902587891e+00 37 | 2.366128683090209961e+00 38 | 2.293519973754882812e+00 39 | 2.214362859725952148e+00 40 | 2.146535158157348633e+00 41 | 1.907979846000671387e+00 42 | 1.876132249832153320e+00 43 | 1.859354138374328613e+00 44 | 1.775403857231140137e+00 45 | 1.764379143714904785e+00 46 | 1.694374799728393555e+00 47 | 1.665422201156616211e+00 48 | 1.622999191284179688e+00 49 | 1.610870122909545898e+00 50 | 1.546877861022949219e+00 51 | 1.523749709129333496e+00 52 | 1.483136296272277832e+00 53 | 1.481248021125793457e+00 54 | 1.423740625381469727e+00 55 | 1.406941294670104980e+00 56 | 1.378324389457702637e+00 57 | 1.357655882835388184e+00 58 | 1.335111260414123535e+00 59 | 1.306033492088317871e+00 60 | 1.282203078269958496e+00 61 | 1.257453680038452148e+00 62 | 1.242352485656738281e+00 63 | 1.201884031295776367e+00 64 | 1.184469342231750488e+00 65 | 1.166077256202697754e+00 66 | 1.114611506462097168e+00 67 | 1.102498888969421387e+00 68 | 1.085692048072814941e+00 69 | 1.060934782028198242e+00 70 | 1.029542326927185059e+00 71 | 1.017418980598449707e+00 72 | 1.005733728408813477e+00 73 | 9.654799103736877441e-01 74 | 9.343039393424987793e-01 75 | 9.260154366493225098e-01 76 | 9.126370549201965332e-01 77 | 8.995376825332641602e-01 78 | 8.933218717575073242e-01 79 | 8.765093088150024414e-01 80 | 8.631937503814697266e-01 81 | 8.594997525215148926e-01 82 | 8.442590236663818359e-01 83 | 8.324881792068481445e-01 84 | 8.141325116157531738e-01 85 | 8.030978441238403320e-01 86 | 7.934148907661437988e-01 87 | 7.738255858421325684e-01 88 | 7.693558931350708008e-01 89 | 7.474056482315063477e-01 90 | 7.435721158981323242e-01 91 | 7.271158099174499512e-01 92 | 7.163758873939514160e-01 93 | 7.030839323997497559e-01 94 | 6.789638996124267578e-01 95 | 6.737074851989746094e-01 96 | 6.597926020622253418e-01 97 | 6.343482136726379395e-01 98 | 6.245849728584289551e-01 99 | 6.192614436149597168e-01 100 | 6.046380400657653809e-01 101 | 5.935505032539367676e-01 102 | 5.786783099174499512e-01 103 | 5.713734626770019531e-01 104 | 5.640091300010681152e-01 105 | 5.604548454284667969e-01 106 | 5.492605566978454590e-01 107 | 5.269949436187744141e-01 108 | 5.183300971984863281e-01 109 | 5.088832378387451172e-01 110 | 4.984530508518218994e-01 111 | 4.879687726497650146e-01 112 | 4.835968017578125000e-01 113 | 4.738907814025878906e-01 114 | 4.641085565090179443e-01 115 | 4.461972415447235107e-01 116 | 4.422465264797210693e-01 117 | 4.404929280281066895e-01 118 | 4.266946017742156982e-01 119 | 4.236666858196258545e-01 120 | 4.139477312564849854e-01 121 | 4.108542203903198242e-01 122 | 4.007500708103179932e-01 123 | 3.902464807033538818e-01 124 | 3.863844573497772217e-01 125 | 3.740646839141845703e-01 126 | 3.641172349452972412e-01 127 | 3.496397733688354492e-01 128 | 3.446161448955535889e-01 129 | 3.107274472713470459e-01 130 | 2.185715436935424805e-01 131 | 1.521436497569084167e-04 132 | 1.100038352888077497e-04 133 | 2.406101702945306897e-05 134 | 2.068835783575195819e-05 135 | 1.945491385413333774e-05 136 | 1.268230789719382301e-05 137 | -------------------------------------------------------------------------------- /face_alignment/models/shape_parameter_s_right.txt: -------------------------------------------------------------------------------- 1 | 7.763619232177734375e+01 2 | 5.448361206054687500e+01 3 | 3.448307800292968750e+01 4 | 1.991018676757812500e+01 5 | 1.596661090850830078e+01 6 | 1.414914703369140625e+01 7 | 1.229103469848632812e+01 8 | 1.144200229644775391e+01 9 | 1.003643226623535156e+01 10 | 9.386501312255859375e+00 11 | 8.581891059875488281e+00 12 | 8.169676780700683594e+00 13 | 7.906896114349365234e+00 14 | 6.675380229949951172e+00 15 | 6.112782478332519531e+00 16 | 5.862775802612304688e+00 17 | 5.286133289337158203e+00 18 | 4.913509845733642578e+00 19 | 4.832731246948242188e+00 20 | 4.741940498352050781e+00 21 | 4.589621067047119141e+00 22 | 4.137164592742919922e+00 23 | 4.037960052490234375e+00 24 | 3.861081600189208984e+00 25 | 3.779168128967285156e+00 26 | 3.620183229446411133e+00 27 | 3.475615978240966797e+00 28 | 3.316045522689819336e+00 29 | 3.153186798095703125e+00 30 | 3.043802738189697266e+00 31 | 2.927801609039306641e+00 32 | 2.870085716247558594e+00 33 | 2.832670450210571289e+00 34 | 2.724978208541870117e+00 35 | 2.613666296005249023e+00 36 | 2.461195707321166992e+00 37 | 2.366128444671630859e+00 38 | 2.293519973754882812e+00 39 | 2.214362859725952148e+00 40 | 2.146535158157348633e+00 41 | 1.907979846000671387e+00 42 | 1.876132249832153320e+00 43 | 1.859354138374328613e+00 44 | 1.775403857231140137e+00 45 | 1.764379262924194336e+00 46 | 1.694374799728393555e+00 47 | 1.665422201156616211e+00 48 | 1.622999191284179688e+00 49 | 1.610870122909545898e+00 50 | 1.546877861022949219e+00 51 | 1.523749589920043945e+00 52 | 1.483136296272277832e+00 53 | 1.481247901916503906e+00 54 | 1.423740625381469727e+00 55 | 1.406941294670104980e+00 56 | 1.378324389457702637e+00 57 | 1.357655882835388184e+00 58 | 1.335111260414123535e+00 59 | 1.306033611297607422e+00 60 | 1.282203078269958496e+00 61 | 1.257453799247741699e+00 62 | 1.242352604866027832e+00 63 | 1.201884031295776367e+00 64 | 1.184469461441040039e+00 65 | 1.166077256202697754e+00 66 | 1.114611506462097168e+00 67 | 1.102498888969421387e+00 68 | 1.085692048072814941e+00 69 | 1.060934782028198242e+00 70 | 1.029542207717895508e+00 71 | 1.017418980598449707e+00 72 | 1.005733728408813477e+00 73 | 9.654797911643981934e-01 74 | 9.343039393424987793e-01 75 | 9.260153770446777344e-01 76 | 9.126370549201965332e-01 77 | 8.995376825332641602e-01 78 | 8.933218717575073242e-01 79 | 8.765093088150024414e-01 80 | 8.631937503814697266e-01 81 | 8.594997525215148926e-01 82 | 8.442590236663818359e-01 83 | 8.324881196022033691e-01 84 | 8.141325116157531738e-01 85 | 8.030978441238403320e-01 86 | 7.934149503707885742e-01 87 | 7.738255858421325684e-01 88 | 7.693558931350708008e-01 89 | 7.474056482315063477e-01 90 | 7.435721158981323242e-01 91 | 7.271158099174499512e-01 92 | 7.163758873939514160e-01 93 | 7.030839323997497559e-01 94 | 6.789638996124267578e-01 95 | 6.737074851989746094e-01 96 | 6.597926020622253418e-01 97 | 6.343482136726379395e-01 98 | 6.245849728584289551e-01 99 | 6.192614436149597168e-01 100 | 6.046380400657653809e-01 101 | 5.935505628585815430e-01 102 | 5.786783099174499512e-01 103 | 5.713734626770019531e-01 104 | 5.640091300010681152e-01 105 | 5.604548454284667969e-01 106 | 5.492605566978454590e-01 107 | 5.269949436187744141e-01 108 | 5.183300971984863281e-01 109 | 5.088832378387451172e-01 110 | 4.984530508518218994e-01 111 | 4.879687726497650146e-01 112 | 4.835968017578125000e-01 113 | 4.738907516002655029e-01 114 | 4.641085267066955566e-01 115 | 4.461972415447235107e-01 116 | 4.422465264797210693e-01 117 | 4.404929280281066895e-01 118 | 4.266946017742156982e-01 119 | 4.236666858196258545e-01 120 | 4.139477312564849854e-01 121 | 4.108542203903198242e-01 122 | 4.007500708103179932e-01 123 | 3.902464807033538818e-01 124 | 3.863844573497772217e-01 125 | 3.740646839141845703e-01 126 | 3.641172349452972412e-01 127 | 3.496397733688354492e-01 128 | 3.446161448955535889e-01 129 | 3.107274472713470459e-01 130 | 2.185715138912200928e-01 131 | 1.266324252355843782e-04 132 | 1.034445594996213913e-04 133 | 2.327194488316308707e-05 134 | 2.060086262645199895e-05 135 | 1.640349546505603939e-05 136 | 1.241218888026196510e-05 137 | -------------------------------------------------------------------------------- /face_alignment/models/shape_parameter_s_wild.txt: -------------------------------------------------------------------------------- 1 | 1.970495758056640625e+02 2 | 1.147242965698242188e+02 3 | 7.450263214111328125e+01 4 | 4.619513702392578125e+01 5 | 3.563884353637695312e+01 6 | 2.953556442260742188e+01 7 | 2.518301963806152344e+01 8 | 2.209227180480957031e+01 9 | 1.681234169006347656e+01 10 | 1.483633422851562500e+01 11 | 1.392493820190429688e+01 12 | 1.294036865234375000e+01 13 | 1.165308284759521484e+01 14 | 1.114840412139892578e+01 15 | 9.776822090148925781e+00 16 | 8.556041717529296875e+00 17 | 8.135818481445312500e+00 18 | 8.057154655456542969e+00 19 | 7.755284786224365234e+00 20 | 7.051324844360351562e+00 21 | 6.971053600311279297e+00 22 | 6.026376247406005859e+00 23 | 5.712540626525878906e+00 24 | 5.444037437438964844e+00 25 | 5.380769252777099609e+00 26 | 5.245779514312744141e+00 27 | 5.084469318389892578e+00 28 | 4.777353763580322266e+00 29 | 4.659717559814453125e+00 30 | 4.281981468200683594e+00 31 | 4.220893383026123047e+00 32 | 4.155749320983886719e+00 33 | 4.146553993225097656e+00 34 | 3.788559436798095703e+00 35 | 3.753429412841796875e+00 36 | 3.653527259826660156e+00 37 | 3.541052818298339844e+00 38 | 3.238025188446044922e+00 39 | 3.161108016967773438e+00 40 | 2.992911100387573242e+00 41 | 2.964578628540039062e+00 42 | 2.928684711456298828e+00 43 | 2.812888145446777344e+00 44 | 2.672780752182006836e+00 45 | 2.297840595245361328e+00 46 | 2.162400960922241211e+00 47 | 2.125375986099243164e+00 48 | 2.069871425628662109e+00 49 | 2.011986732482910156e+00 50 | 1.985482931137084961e+00 51 | 1.916026353836059570e+00 52 | 1.875906825065612793e+00 53 | 1.837199926376342773e+00 54 | 1.768927335739135742e+00 55 | 1.732427358627319336e+00 56 | 1.714664340019226074e+00 57 | 1.667707800865173340e+00 58 | 1.633123993873596191e+00 59 | 1.602980017662048340e+00 60 | 1.585176110267639160e+00 61 | 1.543024063110351562e+00 62 | 1.507443666458129883e+00 63 | 1.486185431480407715e+00 64 | 1.423241972923278809e+00 65 | 1.388038516044616699e+00 66 | 1.372025370597839355e+00 67 | 1.328630685806274414e+00 68 | 1.319206714630126953e+00 69 | 1.295352339744567871e+00 70 | 1.281113266944885254e+00 71 | 1.258739233016967773e+00 72 | 1.223977208137512207e+00 73 | 1.198363065719604492e+00 74 | 1.166336297988891602e+00 75 | 1.107284784317016602e+00 76 | 1.077126860618591309e+00 77 | 1.055412054061889648e+00 78 | 1.015959739685058594e+00 79 | 9.891035556793212891e-01 80 | 9.715236425399780273e-01 81 | 9.593613147735595703e-01 82 | 9.326089024543762207e-01 83 | 9.173798561096191406e-01 84 | 9.023692011833190918e-01 85 | 8.879134654998779297e-01 86 | 8.786404728889465332e-01 87 | 8.638746142387390137e-01 88 | 8.559817671775817871e-01 89 | 8.491606116294860840e-01 90 | 8.322493433952331543e-01 91 | 8.189594745635986328e-01 92 | 8.083713650703430176e-01 93 | 7.968765497207641602e-01 94 | 7.838517427444458008e-01 95 | 7.731590270996093750e-01 96 | 7.560074925422668457e-01 97 | 7.524335980415344238e-01 98 | 7.510975599288940430e-01 99 | 7.457538247108459473e-01 100 | 7.109788060188293457e-01 101 | 6.914134025573730469e-01 102 | 6.808288097381591797e-01 103 | 6.761116385459899902e-01 104 | 6.582429409027099609e-01 105 | 6.515301465988159180e-01 106 | 6.346591114997863770e-01 107 | 6.292785406112670898e-01 108 | 6.262359619140625000e-01 109 | 6.150320768356323242e-01 110 | 6.042692661285400391e-01 111 | 5.963109135627746582e-01 112 | 5.879486799240112305e-01 113 | 5.791180729866027832e-01 114 | 5.688433647155761719e-01 115 | 5.627683997154235840e-01 116 | 5.566114783287048340e-01 117 | 5.424736738204956055e-01 118 | 5.290962457656860352e-01 119 | 5.219849944114685059e-01 120 | 5.168567895889282227e-01 121 | 5.056280493736267090e-01 122 | 4.916095137596130371e-01 123 | 4.908132255077362061e-01 124 | 4.677435755729675293e-01 125 | 4.633058011531829834e-01 126 | 4.573886096477508545e-01 127 | 4.397208690643310547e-01 128 | 4.285275936126708984e-01 129 | 4.175726473331451416e-01 130 | 4.158701598644256592e-01 131 | 3.877090513706207275e-01 132 | 3.767875134944915771e-01 133 | 3.591192364692687988e-01 134 | 3.540259003639221191e-01 135 | 3.483022153377532959e-01 136 | 2.917855679988861084e-01 137 | -------------------------------------------------------------------------------- /face_alignment/models/warped_mean_front.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/models/warped_mean_front.bmp -------------------------------------------------------------------------------- /face_alignment/models/warped_mean_left.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/models/warped_mean_left.bmp -------------------------------------------------------------------------------- /face_alignment/models/warped_mean_right.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/models/warped_mean_right.bmp -------------------------------------------------------------------------------- /face_alignment/python/make_wild_input.py: -------------------------------------------------------------------------------- 1 | # make input of initialization network 2 | import glob 3 | import numpy as np 4 | import random 5 | from datetime import datetime 6 | import matplotlib.pyplot as plt 7 | import fa_util as fu 8 | import fa_util_train as fut 9 | import h5py 10 | import cv2 11 | 12 | 13 | list_file_name = 'K:/VGG_list/vgg_list_all_000.txt' 14 | output_prefix = 'K:/VGG_hdf5/init/VGG_wild_000' 15 | # img_folders = ['../sample_data'] 16 | img_folders = ['D:/DB/FaceAlignment/HS_distribution/front', 'D:/DB/FaceAlignment/HS_distribution/left', 'D:/DB/FaceAlignment/HS_distribution/right'] 17 | # output_prefix = 'M:/HS_hdf5/wild/HS_wild' 18 | jittering_size = 1 # should be changed 32*128 19 | chunk_size = 1536 # should be changed 20 | 21 | 22 | def get_part_pts(gt_pts, warp_mat_inv): 23 | part_centers = fu.get_part_centers(gt_pts) 24 | part_gt_pts = np.hstack((part_centers, np.ones((len(part_centers), 1), np.float32))) 25 | part_gt_pts_t = np.transpose(part_gt_pts) 26 | part_pts3 = np.dot(warp_mat_inv, part_gt_pts_t) 27 | return np.transpose(part_pts3)[:, 0:2] 28 | 29 | 30 | def main(): 31 | # files = fut.make_file_list_by_folder(img_folders, ['png', 'jpg']) # get image file list by folder 32 | files = fut.make_file_list_by_text(list_file_name) # get image file list by text file 33 | n_samples = len(files) 34 | random.seed(1234) # set random seed 35 | random.shuffle(files) # random shuffle 36 | image_data_sets = fut.make_chunk_set(files, chunk_size) # get image file chunk set 37 | print('Total number of samples: ' + str(n_samples)) 38 | 39 | cnt_all = 0 # cnt for sample images 40 | for i in range(len(image_data_sets)): 41 | current_num_img_files = len(image_data_sets[i]) 42 | img_all = np.zeros((current_num_img_files * jittering_size, fu.init_h, fu.init_w, fu.channel), np.uint8) 43 | pts_all = np.zeros((current_num_img_files * jittering_size, fu.n_points, 2), np.float32) 44 | # part_all = np.zeros((current_num_img_files * jittering_size, fu.n_parts, 2), np.float32) 45 | 46 | # generate data 47 | cnt = 0 # cnt for total samples with jittering 48 | for x in image_data_sets[i]: 49 | current_img_set = np.zeros((jittering_size, fu.init_h, fu.init_w, fu.channel), np.uint8) 50 | current_pts_set = np.zeros((jittering_size, fu.n_points, 2), np.float32) 51 | # current_part_set = np.zeros((jittering_size, fu.n_parts, 2), np.float32) 52 | cnt_all = cnt_all + 1 53 | print(str(datetime.now()) + ' (' + str(cnt_all) + '/' + str(n_samples) + ') ' + x) 54 | img, gt_pts = fut.load_img_pts(x) 55 | face_box3 = fut.get_bounding_box3_square_with_margin(gt_pts) 56 | for k in range(jittering_size): 57 | if k == 0: 58 | face_box3_jittered = face_box3 59 | else: 60 | face_box3_jittered = fut.get_jittered_bounding_box3(face_box3) 61 | 62 | 63 | img_face, M_inv, M = fu.get_cropped_face_cv(img, face_box3_jittered) 64 | 65 | 66 | 67 | pts = cv2.transform(gt_pts.reshape((fu.n_points, 1, 2)), M) 68 | 69 | pts = pts.reshape((fu.n_points, 2)) 70 | 71 | # warp_mat_inv = np.linalg.inv(warp_mat) 72 | # pts = fu.get_warped_pts(gt_pts, warp_mat_inv.transpose()) 73 | # part_pts = get_part_pts(gt_pts, warp_mat_inv) 74 | 75 | current_img_set[k, :, :, :] = img_face 76 | current_pts_set[k, :, :] = pts 77 | # current_part_set[k, :, :] = part_pts 78 | 79 | # # draw 80 | # plt.figure(1) 81 | # plt.gcf().clear() 82 | # plt.imshow(img_face) 83 | # plt.scatter(pts[:, 0], pts[:, 1], c='b') 84 | # # plt.scatter(part_pts[:, 0], part_pts[:, 1], c='r') 85 | # plt.draw() 86 | # plt.pause(0.001) 87 | # z = 0 88 | img_all[cnt:cnt + jittering_size, :, :, :] = current_img_set 89 | pts_all[cnt:cnt + jittering_size, :, :] = current_pts_set 90 | # part_all[cnt:cnt + jittering_size, :, :] = current_part_set 91 | cnt = cnt + jittering_size 92 | img_all = img_all.transpose((0, 3, 1, 2)) # order: sample, c, m, n 93 | pts_all[:, :, 0] = pts_all[:, :, 0] / fu.init_w # normalize to 0~1 94 | pts_all[:, :, 1] = pts_all[:, :, 1] / fu.init_h # normalize to 0~1 95 | # part_all[:, :, 0] = part_all[:, :, 0] / fu.init_w # normalize to 0~1 96 | # part_all[:, :, 1] = part_all[:, :, 1] / fu.init_h # normalize to 0~1 97 | 98 | suffle_idx = np.random.permutation(current_num_img_files * jittering_size) # suffle 99 | img_all = img_all[suffle_idx, :, :, :] 100 | pts_all = pts_all[suffle_idx, :, :] 101 | # part_all = part_all[suffle_idx, :, :] 102 | 103 | current_output_path = "%s_%03d.h5" % (output_prefix, i) 104 | hf = h5py.File(current_output_path, 'w') 105 | input_face_img_name = "img" 106 | warped_img_set = hf.create_dataset(input_face_img_name, data=img_all) 107 | input_pts_name = "pts" 108 | pts_set = hf.create_dataset(input_pts_name, data=pts_all) 109 | # input_part_name = "part" 110 | # part_set = hf.create_dataset(input_part_name, data=part_all) 111 | hf.close() 112 | 113 | 114 | if __name__ == "__main__": 115 | main() 116 | -------------------------------------------------------------------------------- /face_alignment/python/test_300w_public.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from datetime import datetime 5 | import face_alignment as fa 6 | import fa_util_train as fut 7 | 8 | img_folders = ['N:\DB\FaceAlignment\\300W_public_test\lfpw', 'N:\DB\FaceAlignment\\300W_public_test\helen', 'N:\DB\FaceAlignment\\300W_public_test\ibug'] 9 | # img_folders = ['D:\DB\FaceAlignment\\300W_public_test\ibug'] 10 | # img_folders = ['../sample_data2'] 11 | img_extension = ['png', 'jpg'] 12 | output_folder = '../result' 13 | max_iter = 21 14 | # max_iter = 1 15 | 16 | def main(): 17 | # fa.fa_init([[1], [0, 0], [0, 0]]) 18 | fa.fa_init([[1], [1], [0, 0], [1, 0]]) 19 | files = [] 20 | current_pts = np.zeros((max_iter+1, 68, 2), np.float32) 21 | error_IOD = np.zeros((max_iter+1, 1), np.float32) 22 | error_BOX = np.zeros((max_iter+1, 1), np.float32) 23 | for folder in img_folders: 24 | for ext in img_extension: 25 | current_files = glob.glob(folder + '/*.' + ext) 26 | files.extend(current_files) 27 | 28 | cnt = 0 29 | n_samples = len(files) 30 | for x in files: 31 | cnt += 1 32 | 33 | img, gt_pts = fut.load_img_pts(x) 34 | face_box3 = fut.get_bounding_box3_square_with_margin(gt_pts) 35 | # current_pts[0, :, :], current_pts[1, :, :], current_pts[2, :, :], pose_idx = fa.face_alignment_detection(img, face_box3, -1) 36 | # current_pts[0, :, :], _, current_pts[1, :, :], pose_idx = fa.face_alignment_detection(img, face_box3, -1) 37 | current_pts[0, :, :], current_pts[1, :, :], current_pts[2, :, :], pose_idx = fa.face_alignment_detection(img, face_box3, -1) 38 | 39 | for i in range(2, max_iter): 40 | current_pts[i+1, :, :], pose_idx = fa.face_alignment_detection_step(img, current_pts[i, :, :], pose_idx) 41 | 42 | print(str(datetime.now()) + ' ' + str(cnt) + '/' + str(n_samples) + ' ' + x) 43 | for i in range(0, max_iter): 44 | output_path = fut.get_output_path(x, output_folder, 'pt%d' % i) 45 | fut.save_pts(output_path, current_pts[i, :, :]) 46 | error_IOD[i, 0], error_BOX[i, 0] = fut.measurement(gt_pts, current_pts[i, :, :]) 47 | print('Error%d :' % i + str(error_IOD[i, 0])) 48 | 49 | # draw 50 | draw_idx = [0, 1, 2, 3, 4] 51 | # draw_idx = [0] 52 | plt.figure(1) 53 | plt.gcf().clear() 54 | draw_cnt = 1 55 | for i in draw_idx: 56 | plt.subplot(1, len(draw_idx), draw_cnt) 57 | plt.imshow(img) 58 | plt.scatter(current_pts[i, :, 0], current_pts[i, :, 1], s=3, c='r') 59 | draw_cnt += 1 60 | plt.draw() 61 | plt.pause(0.001) 62 | z = 1 63 | 64 | 65 | if __name__ == "__main__": 66 | main() 67 | -------------------------------------------------------------------------------- /face_detection/.gitignore: -------------------------------------------------------------------------------- 1 | data/FDDB/images/ 2 | data/widerface/ 3 | eval/ 4 | results/ 5 | -------------------------------------------------------------------------------- /face_detection/LICENSE.MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /face_detection/NOTICE: -------------------------------------------------------------------------------- 1 | This project contains subcomponents with separate copyright notices and license terms. 2 | Your use of the source code for these subcomponents is subject to the terms and conditions of the following licenses. 3 | 4 | ===== 5 | 6 | biubug6/Pytorch_Retinaface 7 | https://github.com/biubug6/Pytorch_Retinaface 8 | 9 | 10 | MIT License 11 | 12 | Copyright (c) 2019 13 | 14 | Permission is hereby granted, free of charge, to any person obtaining a copy 15 | of this software and associated documentation files (the "Software"), to deal 16 | in the Software without restriction, including without limitation the rights 17 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 18 | copies of the Software, and to permit persons to whom the Software is 19 | furnished to do so, subject to the following conditions: 20 | 21 | The above copyright notice and this permission notice shall be included in all 22 | copies or substantial portions of the Software. 23 | 24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 29 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 | SOFTWARE. 31 | -------------------------------------------------------------------------------- /face_detection/README.md: -------------------------------------------------------------------------------- 1 | # Face Detection (work in progress) 2 | The code and checkpoints contained in this repository were adopted from the [biubug6/Pytorch_Retinaface](https://github.com/biubug6/Pytorch_Retinaface) repository. 3 | 4 | 5 | ## Getting Started 6 | 7 | ### Requirements / Installation 8 | - [Anaconda](https://www.anaconda.com/) 9 | - Nvidia GPU (for GPU utilization) 10 | 11 | Use the following commands to install the necessary packages and activate the environment: 12 | ```sh 13 | conda env create -f environment.yml 14 | conda activate retinaface 15 | ``` 16 | 17 | ### Data 18 | 1. Download the [WiderFace](http://shuoyang1213.me/WIDERFACE/WiderFace_Results.html) dataset. 19 | 20 | 2. Download annotations (face bounding boxes & five facial landmarks) from [baidu cloud](https://pan.baidu.com/s/1Laby0EctfuJGgGMgRRgykA). 21 | 22 | 3. Organise the dataset directory as follows: 23 | 24 | ``` 25 | ./data/widerface/ 26 | ├─train/ 27 | │ ├─images/ 28 | │ └─label.txt 29 | └─val/ 30 | ├─images/ 31 | └─wider_val.txt 32 | ``` 33 | 34 | ps: wider_val.txt only include val file names but not label information. 35 | 36 | 37 | ### Test 38 | You can use the following command to detect faces in a photo and save the result as an image: 39 | ```sh 40 | python detect.py --image -s 41 | ``` 42 | See [detect.py](detect.py#L16) for available arguments. 43 | 44 | 45 | ## Training 46 | We provide restnet50 and mobilenet0.25 as backbone network to train model. 47 | We trained Mobilenet0.25 on imagenet dataset and get 46.58% in top 1. If you do not wish to train the model, we also provide trained model. Pretrain model and trained model are put in [google cloud](https://drive.google.com/open?id=1oZRSG0ZegbVkVwUd8wUIQx8W7yfZ_ki1) and [baidu cloud](https://pan.baidu.com/s/12h97Fy1RYuqMMIV-RpzdPg) Password: fstq . The model could be put as follows: 48 | ```bash 49 | ./weights/ 50 | ├─mobilenet0.25_final.pt 51 | └─mobilenet0.25_pretrain.tar 52 | ``` 53 | 1. Before training, you can check network configuration (e.g. batch_size, min_sizes and steps etc..) in ``data/config.py and train.py``. 54 | 55 | 2. Train the model using WiderFace: 56 | ```Shell 57 | CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --network resnet50 or 58 | CUDA_VISIBLE_DEVICES=0 python train.py --network mobilenet0.25 59 | ``` 60 | 61 | 62 | ## Evaluation 63 | 64 | ### Evaluation WiderFace val 65 | 1. Generate txt file 66 | ```Shell 67 | python test_widerface.py --trained-model --network mobilenet0.25 or resnet50 68 | ``` 69 | 2. Evaluate txt results. Demo come from [Here](https://github.com/wondervictor/WiderFace-Evaluation) 70 | ```Shell 71 | cd ./widerface_evaluate 72 | python setup.py build_ext --inplace 73 | python evaluation.py 74 | ``` 75 | 3. You can also use WiderFace official Matlab evaluate demo in [Here](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/WiderFace_Results.html) 76 | 77 | ### Evaluation FDDB 78 | 79 | 1. Download the images [FDDB](https://drive.google.com/open?id=17t4WULUDgZgiSy5kpCax4aooyPaz3GQH) to: 80 | ```Shell 81 | ./data/FDDB/images/ 82 | ``` 83 | 84 | 2. Evaluate the trained model using: 85 | ```Shell 86 | python test_fddb.py --trained_model --network mobilenet0.25 or resnet50 87 | ``` 88 | 89 | 3. ~~Download [eval_tool](https://bitbucket.org/marcopede/face-eval) to evaluate the performance.~~ This link doesn't seem to work anymore. We found [this](https://github.com/RuisongZhou/FDDB_Evaluation) repository, but haven't tested it yet. 90 | 91 | 92 | ## References and Citation 93 | - [RetinaFace in PyTorch](https://github.com/biubug6/Pytorch_Retinaface) 94 | - [FaceBoxes](https://github.com/zisianw/FaceBoxes.PyTorch) 95 | - [Retinaface (mxnet)](https://github.com/deepinsight/insightface/tree/master/RetinaFace) 96 | 97 | ``` 98 | @inproceedings{deng2020retinaface, 99 | title={RetinaFace: Single-Shot Multi-Level Face Localisation in the wild}, 100 | author={Deng, Jiankang and Guo, Jia and Ververas, Evangelos and Kotsia, Irene and Zafeiriou, Stefanos}, 101 | booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, 102 | pages={5203--5212}, 103 | year={2020} 104 | } 105 | -------------------------------------------------------------------------------- /face_detection/convert_to_onnx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import os 4 | 5 | import torch 6 | from model.retinaface import RetinaFace 7 | 8 | parser = argparse.ArgumentParser(description='Convert to ONNX') 9 | parser.add_argument( 10 | '--checkpoint', type=str, 11 | default='./weights/mobilenet0.25_final.pt', 12 | help='Trained state_dict file path to open' 13 | ) 14 | parser.add_argument( 15 | '--long-side', type=int, default=640, 16 | help='when origin_size is false, long_side is scaled size(320 or 640 for long side)' 17 | ) 18 | parser.add_argument( 19 | '--cpu', action="store_true", 20 | help='Use cpu inference' 21 | ) 22 | 23 | 24 | def main(): 25 | args = parser.parse_args() 26 | assert os.path.isfile(args.checkpoint) 27 | 28 | checkpoint = torch.load(args.checkpoint, map_location="cpu") 29 | cfg = checkpoint["config"] 30 | device = torch.device("cpu" if args.cpu else "cuda") 31 | 32 | # net and model 33 | net = RetinaFace(**cfg) 34 | net.load_state_dict(checkpoint["net_state_dict"], strict=False) 35 | net.eval().requires_grad_(False) 36 | net.to(device) 37 | print('Finished loading model!') 38 | 39 | # ------------------------ export ----------------------------- 40 | output_onnx = 'face_detector.onnx' 41 | print("==> Exporting model to ONNX format at '{}'".format(output_onnx)) 42 | input_names = ["input0"] 43 | output_names = ["output0"] 44 | inputs = torch.randn(1, 3, args.long_side, args.long_side).to(device) 45 | 46 | torch_out = torch.onnx._export(net, inputs, output_onnx, export_params=True, verbose=False, 47 | input_names=input_names, output_names=output_names) 48 | 49 | 50 | if __name__ == "__main__": 51 | main() 52 | -------------------------------------------------------------------------------- /face_detection/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import * 2 | from .data_augment import * 3 | from .wider_face import WiderFaceDetection 4 | -------------------------------------------------------------------------------- /face_detection/data/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | 3 | cfg_mnet = { 4 | 'backbone': 'mobilenet0.25', 5 | 'min_sizes': [[16, 32], [64, 128], [256, 512]], 6 | 'steps': [8, 16, 32], 7 | 'variance': [0.1, 0.2], 8 | 'clip': False, 9 | 'loc_weight': 2.0, 10 | 'batch_size': 32, 11 | 'epoch': 250, 12 | 'decay1': 190, 13 | 'decay2': 220, 14 | 'image_size': 640, 15 | 'pretrain': True, 16 | 'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3}, 17 | 'in_channel': 32, 18 | 'out_channel': 64 19 | } 20 | 21 | cfg_re50 = { 22 | 'backbone': 'Resnet50', 23 | 'min_sizes': [[16, 32], [64, 128], [256, 512]], 24 | 'steps': [8, 16, 32], 25 | 'variance': [0.1, 0.2], 26 | 'clip': False, 27 | 'loc_weight': 2.0, 28 | 'batch_size': 24, 29 | 'epoch': 100, 30 | 'decay1': 70, 31 | 'decay2': 90, 32 | 'image_size': 840, 33 | 'pretrain': True, 34 | 'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3}, 35 | 'in_channel': 256, 36 | 'out_channel': 256 37 | } 38 | 39 | -------------------------------------------------------------------------------- /face_detection/data/data_augment.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import cv2 4 | import numpy as np 5 | from utils.box_utils import matrix_iof 6 | 7 | 8 | def _crop(image, boxes, labels, landm, img_dim): 9 | height, width, _ = image.shape 10 | pad_image_flag = True 11 | 12 | for _ in range(250): 13 | """ 14 | if random.uniform(0, 1) <= 0.2: 15 | scale = 1.0 16 | else: 17 | scale = random.uniform(0.3, 1.0) 18 | """ 19 | PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0] 20 | scale = random.choice(PRE_SCALES) 21 | short_side = min(width, height) 22 | w = int(scale * short_side) 23 | h = w 24 | 25 | if width == w: 26 | l = 0 27 | else: 28 | l = random.randrange(width - w) 29 | if height == h: 30 | t = 0 31 | else: 32 | t = random.randrange(height - h) 33 | roi = np.array((l, t, l + w, t + h)) 34 | 35 | value = matrix_iof(boxes, roi[np.newaxis]) 36 | flag = (value >= 1) 37 | if not flag.any(): 38 | continue 39 | 40 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2 41 | mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1) 42 | boxes_t = boxes[mask_a].copy() 43 | labels_t = labels[mask_a].copy() 44 | landms_t = landm[mask_a].copy() 45 | landms_t = landms_t.reshape([-1, 5, 2]) 46 | 47 | if boxes_t.shape[0] == 0: 48 | continue 49 | 50 | image_t = image[roi[1]:roi[3], roi[0]:roi[2]] 51 | 52 | boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2]) 53 | boxes_t[:, :2] -= roi[:2] 54 | boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:]) 55 | boxes_t[:, 2:] -= roi[:2] 56 | 57 | # landm 58 | landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2] 59 | landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0])) 60 | landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2]) 61 | landms_t = landms_t.reshape([-1, 10]) 62 | 63 | 64 | # make sure that the cropped image contains at least one face > 16 pixel at training image scale 65 | b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim 66 | b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim 67 | mask_b = np.minimum(b_w_t, b_h_t) > 0.0 68 | boxes_t = boxes_t[mask_b] 69 | labels_t = labels_t[mask_b] 70 | landms_t = landms_t[mask_b] 71 | 72 | if boxes_t.shape[0] == 0: 73 | continue 74 | 75 | pad_image_flag = False 76 | 77 | return image_t, boxes_t, labels_t, landms_t, pad_image_flag 78 | return image, boxes, labels, landm, pad_image_flag 79 | 80 | 81 | def _distort(image): 82 | 83 | def _convert(image, alpha=1, beta=0): 84 | tmp = image.astype(float) * alpha + beta 85 | tmp[tmp < 0] = 0 86 | tmp[tmp > 255] = 255 87 | image[:] = tmp 88 | 89 | image = image.copy() 90 | 91 | if random.randrange(2): 92 | 93 | #brightness distortion 94 | if random.randrange(2): 95 | _convert(image, beta=random.uniform(-32, 32)) 96 | 97 | #contrast distortion 98 | if random.randrange(2): 99 | _convert(image, alpha=random.uniform(0.5, 1.5)) 100 | 101 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 102 | 103 | #saturation distortion 104 | if random.randrange(2): 105 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) 106 | 107 | #hue distortion 108 | if random.randrange(2): 109 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) 110 | tmp %= 180 111 | image[:, :, 0] = tmp 112 | 113 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) 114 | 115 | else: 116 | 117 | #brightness distortion 118 | if random.randrange(2): 119 | _convert(image, beta=random.uniform(-32, 32)) 120 | 121 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 122 | 123 | #saturation distortion 124 | if random.randrange(2): 125 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) 126 | 127 | #hue distortion 128 | if random.randrange(2): 129 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) 130 | tmp %= 180 131 | image[:, :, 0] = tmp 132 | 133 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) 134 | 135 | #contrast distortion 136 | if random.randrange(2): 137 | _convert(image, alpha=random.uniform(0.5, 1.5)) 138 | 139 | return image 140 | 141 | 142 | def _expand(image, boxes, fill, p): 143 | if random.randrange(2): 144 | return image, boxes 145 | 146 | height, width, depth = image.shape 147 | 148 | scale = random.uniform(1, p) 149 | w = int(scale * width) 150 | h = int(scale * height) 151 | 152 | left = random.randint(0, w - width) 153 | top = random.randint(0, h - height) 154 | 155 | boxes_t = boxes.copy() 156 | boxes_t[:, :2] += (left, top) 157 | boxes_t[:, 2:] += (left, top) 158 | expand_image = np.empty( 159 | (h, w, depth), 160 | dtype=image.dtype) 161 | expand_image[:, :] = fill 162 | expand_image[top:top + height, left:left + width] = image 163 | image = expand_image 164 | 165 | return image, boxes_t 166 | 167 | 168 | def _mirror(image, boxes, landms): 169 | _, width, _ = image.shape 170 | if random.randrange(2): 171 | image = image[:, ::-1] 172 | boxes = boxes.copy() 173 | boxes[:, 0::2] = width - boxes[:, 2::-2] 174 | 175 | # landm 176 | landms = landms.copy() 177 | landms = landms.reshape([-1, 5, 2]) 178 | landms[:, :, 0] = width - landms[:, :, 0] 179 | tmp = landms[:, 1, :].copy() 180 | landms[:, 1, :] = landms[:, 0, :] 181 | landms[:, 0, :] = tmp 182 | tmp1 = landms[:, 4, :].copy() 183 | landms[:, 4, :] = landms[:, 3, :] 184 | landms[:, 3, :] = tmp1 185 | landms = landms.reshape([-1, 10]) 186 | 187 | return image, boxes, landms 188 | 189 | 190 | def _pad_to_square(image, rgb_mean, pad_image_flag): 191 | if not pad_image_flag: 192 | return image 193 | height, width, _ = image.shape 194 | long_side = max(width, height) 195 | image_t = np.empty((long_side, long_side, 3), dtype=image.dtype) 196 | image_t[:, :] = rgb_mean 197 | image_t[0:0 + height, 0:0 + width] = image 198 | return image_t 199 | 200 | 201 | def _resize_subtract_mean(image, insize, rgb_mean): 202 | interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4] 203 | interp_method = interp_methods[random.randrange(5)] 204 | image = cv2.resize(image, (insize, insize), interpolation=interp_method) 205 | image = image.astype(np.float32) 206 | image -= rgb_mean 207 | return image.transpose(2, 0, 1) 208 | 209 | 210 | class preproc(object): 211 | 212 | def __init__(self, img_dim, rgb_means): 213 | self.img_dim = img_dim 214 | self.rgb_means = rgb_means 215 | 216 | def __call__(self, image, targets): 217 | assert targets.shape[0] > 0, "this image does not have gt" 218 | 219 | boxes = targets[:, :4].copy() 220 | labels = targets[:, -1].copy() 221 | landm = targets[:, 4:-1].copy() 222 | 223 | image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim) 224 | image_t = _distort(image_t) 225 | image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag) 226 | image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t) 227 | height, width, _ = image_t.shape 228 | image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means) 229 | boxes_t[:, 0::2] /= width 230 | boxes_t[:, 1::2] /= height 231 | 232 | landm_t[:, 0::2] /= width 233 | landm_t[:, 1::2] /= height 234 | 235 | labels_t = np.expand_dims(labels_t, 1) 236 | targets_t = np.hstack((boxes_t, landm_t, labels_t)) 237 | 238 | return image_t, targets_t 239 | -------------------------------------------------------------------------------- /face_detection/data/wider_face.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | import torch.utils.data as data 5 | 6 | 7 | class WiderFaceDetection(data.Dataset): 8 | def __init__(self, txt_path, preproc=None): 9 | self.preproc = preproc 10 | self.imgs_path = [] 11 | self.words = [] 12 | f = open(txt_path,'r') 13 | lines = f.readlines() 14 | isFirst = True 15 | labels = [] 16 | for line in lines: 17 | line = line.rstrip() 18 | if line.startswith('#'): 19 | if isFirst is True: 20 | isFirst = False 21 | else: 22 | labels_copy = labels.copy() 23 | self.words.append(labels_copy) 24 | labels.clear() 25 | path = line[2:] 26 | path = txt_path.replace('label.txt','images/') + path 27 | self.imgs_path.append(path) 28 | else: 29 | line = line.split(' ') 30 | label = [float(x) for x in line] 31 | labels.append(label) 32 | 33 | self.words.append(labels) 34 | 35 | def __len__(self): 36 | return len(self.imgs_path) 37 | 38 | def __getitem__(self, index): 39 | img = cv2.imread(self.imgs_path[index]) 40 | height, width, _ = img.shape 41 | 42 | labels = self.words[index] 43 | annotations = np.zeros((0, 15)) 44 | if len(labels) == 0: 45 | return annotations 46 | for idx, label in enumerate(labels): 47 | annotation = np.zeros((1, 15)) 48 | # bbox 49 | annotation[0, 0] = label[0] # x1 50 | annotation[0, 1] = label[1] # y1 51 | annotation[0, 2] = label[0] + label[2] # x2 52 | annotation[0, 3] = label[1] + label[3] # y2 53 | 54 | # landmarks 55 | annotation[0, 4] = label[4] # l0_x 56 | annotation[0, 5] = label[5] # l0_y 57 | annotation[0, 6] = label[7] # l1_x 58 | annotation[0, 7] = label[8] # l1_y 59 | annotation[0, 8] = label[10] # l2_x 60 | annotation[0, 9] = label[11] # l2_y 61 | annotation[0, 10] = label[13] # l3_x 62 | annotation[0, 11] = label[14] # l3_y 63 | annotation[0, 12] = label[16] # l4_x 64 | annotation[0, 13] = label[17] # l4_y 65 | if (annotation[0, 4]<0): 66 | annotation[0, 14] = -1 67 | else: 68 | annotation[0, 14] = 1 69 | 70 | annotations = np.append(annotations, annotation, axis=0) 71 | target = np.array(annotations) 72 | if self.preproc is not None: 73 | img, target = self.preproc(img, target) 74 | 75 | return torch.from_numpy(img), target 76 | 77 | @staticmethod 78 | def collate(batch): 79 | """Custom collate fn for dealing with batches of images that have a different 80 | number of associated object annotations (bounding boxes). 81 | 82 | Arguments: 83 | batch: (tuple) A tuple of tensor images and lists of annotations 84 | 85 | Return: 86 | A tuple containing: 87 | 1) (tensor) batch of images stacked on their 0 dim 88 | 2) (list of tensors) annotations for a given image are stacked on 0 dim 89 | """ 90 | targets = [] 91 | imgs = [] 92 | for _, sample in enumerate(batch): 93 | for _, tup in enumerate(sample): 94 | if torch.is_tensor(tup): 95 | imgs.append(tup) 96 | elif isinstance(tup, type(np.empty(0))): 97 | annos = torch.from_numpy(tup).float() 98 | targets.append(annos) 99 | 100 | return torch.stack(imgs, 0), targets 101 | -------------------------------------------------------------------------------- /face_detection/detect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import os 4 | import time 5 | 6 | import cv2 7 | import numpy as np 8 | import torch 9 | from torchvision.ops import nms 10 | 11 | from model.prior_box import PriorBox 12 | from model.retinaface import RetinaFace 13 | from utils.box_utils import decode, decode_landm 14 | from utils.misc import draw_keypoint 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument( 18 | '--checkpoint', 19 | default='./weights/mobilenet0.25_final.pt', 20 | help='Trained state_dict file path to open' 21 | ) 22 | parser.add_argument( 23 | '--image', 24 | help='Input image file to detect' 25 | ) 26 | parser.add_argument( 27 | '--cpu', action="store_true", default=False, 28 | help='Use cpu inference' 29 | ) 30 | parser.add_argument( 31 | '--confidence-threshold', type=float, default=0.02, 32 | help='confidence_threshold' 33 | ) 34 | parser.add_argument( 35 | '--top-k', type=int, default=5000, 36 | help='top_k' 37 | ) 38 | parser.add_argument( 39 | '--nms-threshold', type=float, default=0.4, 40 | help='NMS threshold' 41 | ) 42 | parser.add_argument( 43 | '--keep-top-k', type=int, default=750, 44 | help='keep top k' 45 | ) 46 | parser.add_argument( 47 | '-s', '--save-image', action="store_true", default=False, 48 | help='show detection results' 49 | ) 50 | parser.add_argument( 51 | '--vis-thres', type=float, default=0.6, 52 | help='visualization_threshold' 53 | ) 54 | 55 | 56 | @torch.no_grad() 57 | def main(): 58 | args = parser.parse_args() 59 | assert os.path.isfile(args.checkpoint) 60 | 61 | checkpoint = torch.load(args.checkpoint, map_location="cpu") 62 | cfg = checkpoint["config"] 63 | device = torch.device("cpu" if args.cpu else "cuda") 64 | 65 | # net and model 66 | net = RetinaFace(**cfg) 67 | net.load_state_dict(checkpoint["net_state_dict"], strict=False) 68 | net.eval().requires_grad_(False) 69 | net.to(device) 70 | print('Finished loading model!') 71 | 72 | resize = 1 73 | 74 | # testing begin 75 | img_raw = cv2.imread(args.image, cv2.IMREAD_COLOR) 76 | 77 | img = np.float32(img_raw) 78 | 79 | im_height, im_width, _ = img.shape 80 | scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) 81 | img -= (104, 117, 123) 82 | img = img.transpose(2, 0, 1) 83 | img = torch.from_numpy(img).unsqueeze(0) 84 | img = img.to(device) 85 | scale = scale.to(device) 86 | 87 | tic = time.time() 88 | loc, conf, landms = net(img) # forward pass 89 | print('net forward time: {:.4f}'.format(time.time() - tic)) 90 | 91 | priorbox = PriorBox(cfg, image_size=(im_height, im_width)) 92 | priors = priorbox.forward() 93 | priors = priors.to(device) 94 | prior_data = priors.data 95 | boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) 96 | boxes = boxes * scale / resize 97 | scores = conf.squeeze(0)[:, 1] 98 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) 99 | scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], 100 | img.shape[3], img.shape[2], img.shape[3], img.shape[2], 101 | img.shape[3], img.shape[2]]) 102 | scale1 = scale1.to(device) 103 | landms = landms * scale1 / resize 104 | 105 | # ignore low scores 106 | inds = torch.where(scores > args.confidence_threshold)[0] 107 | boxes = boxes[inds] 108 | landms = landms[inds] 109 | scores = scores[inds] 110 | 111 | # keep top-K before NMS 112 | order = scores.argsort() 113 | boxes = boxes[order][:args.top_k] 114 | landms = landms[order][:args.top_k] 115 | scores = scores[order][:args.top_k] 116 | 117 | # do NMS 118 | keep = nms(boxes, scores, args.nms_threshold) 119 | 120 | boxes = boxes[keep] 121 | scores = scores[keep] 122 | landms = landms[keep] 123 | 124 | boxes = boxes.cpu().numpy() 125 | scores = scores.cpu().numpy() 126 | landms = landms.cpu().numpy() 127 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) 128 | dets = np.concatenate((dets, landms), axis=1) 129 | 130 | # save image 131 | if args.save_image: 132 | draw_keypoint(img_raw, dets, args.vis_thres) 133 | 134 | splits = args.image.split(".") 135 | name = ".".join(splits[:-1]) 136 | ext = splits[-1] 137 | output = f"{name}_results.{ext}" 138 | cv2.imwrite(output, img_raw) 139 | 140 | 141 | if __name__ == "__main__": 142 | main() 143 | -------------------------------------------------------------------------------- /face_detection/environment.yml: -------------------------------------------------------------------------------- 1 | name: retinaface 2 | channels: 3 | - pytorch 4 | dependencies: 5 | - cudatoolkit=11.3 6 | - matplotlib 7 | - pip 8 | - python=3.9 9 | - pytorch::pytorch=1.10.1 10 | - pytorch::torchvision 11 | - scipy 12 | - tqdm 13 | - pip: 14 | - opencv-python-headless 15 | -------------------------------------------------------------------------------- /face_detection/model/multibox_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from utils.box_utils import match, log_sum_exp 5 | 6 | 7 | class MultiBoxLoss(nn.Module): 8 | """SSD Weighted Loss Function 9 | Compute Targets: 10 | 1) Produce Confidence Target Indices by matching ground truth boxes 11 | with (default) 'priorboxes' that have jaccard index > threshold parameter 12 | (default threshold: 0.5). 13 | 2) Produce localization target by 'encoding' variance into offsets of ground 14 | truth boxes and their matched 'priorboxes'. 15 | 3) Hard negative mining to filter the excessive number of negative examples 16 | that comes with using a large number of default bounding boxes. 17 | (default negative:positive ratio 3:1) 18 | Objective Loss: 19 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 20 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss 21 | weighted by α which is set to 1 by cross val. 22 | Args: 23 | c: class confidences, 24 | l: predicted boxes, 25 | g: ground truth boxes 26 | N: number of matched default boxes 27 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 28 | """ 29 | 30 | def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target): 31 | super().__init__() 32 | self.num_classes = num_classes 33 | self.threshold = overlap_thresh 34 | self.background_label = bkg_label 35 | self.encode_target = encode_target 36 | self.use_prior_for_matching = prior_for_matching 37 | self.do_neg_mining = neg_mining 38 | self.negpos_ratio = neg_pos 39 | self.neg_overlap = neg_overlap 40 | self.variance = [0.1, 0.2] 41 | 42 | def forward(self, predictions, priors, targets): 43 | """Multibox Loss 44 | Args: 45 | predictions (tuple): A tuple containing loc preds, conf preds, 46 | and prior boxes from SSD net. 47 | loc shape: torch.size(batch_size,num_priors,4) 48 | conf shape: torch.size(batch_size,num_priors,num_classes) 49 | landm shape: torch.size(batch_size,num_priors,10) 50 | priors shape: torch.size(num_priors,4) 51 | 52 | ground_truth (tensor): Ground truth boxes and labels for a batch, 53 | shape: [batch_size,num_objs,5] (last idx is the label). 54 | """ 55 | 56 | loc_data, conf_data, landm_data = predictions 57 | priors = priors 58 | num = loc_data.size(0) 59 | num_priors = (priors.size(0)) 60 | device = loc_data.device 61 | 62 | # match priors (default boxes) and ground truth boxes 63 | loc_t = torch.Tensor(num, num_priors, 4) 64 | landm_t = torch.Tensor(num, num_priors, 10) 65 | conf_t = torch.LongTensor(num, num_priors) 66 | for idx in range(num): 67 | truths = targets[idx][:, :4].data 68 | labels = targets[idx][:, -1].data 69 | landms = targets[idx][:, 4:14].data 70 | defaults = priors.data 71 | match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) 72 | 73 | loc_t = loc_t.to(device) 74 | conf_t = conf_t.to(device) 75 | landm_t = landm_t.to(device) 76 | zeros = torch.tensor(0, device=device) 77 | 78 | # NOTE: landm Loss (Smooth L1) 79 | # Shape: [batch,num_priors,10] 80 | pos1 = conf_t > zeros 81 | num_pos_landm = pos1.long().sum(1, keepdim=True) 82 | N1 = max(num_pos_landm.data.sum().float(), 1) 83 | pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) 84 | landm_p = landm_data[pos_idx1].view(-1, 10) 85 | landm_t = landm_t[pos_idx1].view(-1, 10) 86 | loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum') 87 | 88 | 89 | pos = conf_t != zeros 90 | conf_t[pos] = 1 91 | 92 | # NOTE: Localization Loss (Smooth L1) 93 | # Shape: [batch,num_priors,4] 94 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) 95 | loc_p = loc_data[pos_idx].view(-1, 4) 96 | loc_t = loc_t[pos_idx].view(-1, 4) 97 | loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') 98 | 99 | # Compute max conf across batch for hard negative mining 100 | batch_conf = conf_data.view(-1, self.num_classes) 101 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) 102 | 103 | # NOTE: Hard Negative Mining 104 | loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now 105 | loss_c = loss_c.view(num, -1) 106 | _, loss_idx = loss_c.sort(1, descending=True) 107 | _, idx_rank = loss_idx.sort(1) 108 | num_pos = pos.long().sum(1, keepdim=True) 109 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) 110 | neg = idx_rank < num_neg.expand_as(idx_rank) 111 | 112 | # Confidence Loss Including Positive and Negative Examples 113 | pos_idx = pos.unsqueeze(2).expand_as(conf_data) 114 | neg_idx = neg.unsqueeze(2).expand_as(conf_data) 115 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) 116 | targets_weighted = conf_t[(pos+neg).gt(0)] 117 | loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') 118 | 119 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 120 | N = max(num_pos.data.sum().float(), 1) 121 | loss_l /= N 122 | loss_c /= N 123 | loss_landm /= N1 124 | 125 | return loss_l, loss_c, loss_landm 126 | -------------------------------------------------------------------------------- /face_detection/model/networks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def conv3_bn(inp, oup, stride): 7 | return nn.Sequential( 8 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 9 | nn.BatchNorm2d(oup), 10 | ) 11 | 12 | 13 | def conv3_bn_lrelu(inp, oup, stride=1, leaky=0): 14 | return nn.Sequential( 15 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 16 | nn.BatchNorm2d(oup), 17 | nn.LeakyReLU(negative_slope=leaky, inplace=True), 18 | ) 19 | 20 | 21 | def conv1_bn(inp, oup, stride, leaky=0): 22 | return nn.Sequential( 23 | nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False), 24 | nn.BatchNorm2d(oup), 25 | nn.LeakyReLU(negative_slope=leaky, inplace=True), 26 | ) 27 | 28 | 29 | def conv_dw(inp, oup, stride, leaky=0.1): 30 | return nn.Sequential( 31 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 32 | nn.BatchNorm2d(inp), 33 | nn.LeakyReLU(negative_slope=leaky, inplace=True), 34 | 35 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 36 | nn.BatchNorm2d(oup), 37 | nn.LeakyReLU(negative_slope=leaky, inplace=True), 38 | ) 39 | 40 | 41 | class SSH(nn.Module): 42 | # SSH: Single Stage Headless Face Detector 43 | # https://arxiv.org/abs/1708.03979 44 | def __init__(self, in_channels, out_channels): 45 | super().__init__() 46 | assert out_channels % 4 == 0 47 | leaky = 0 48 | if (out_channels <= 64): 49 | leaky = 0.1 50 | self.conv3X3 = conv3_bn(in_channels, out_channels//2, stride=1) 51 | 52 | self.conv5X5_1 = conv3_bn_lrelu(in_channels, out_channels//4, stride=1, leaky=leaky) 53 | self.conv5X5_2 = conv3_bn(out_channels//4, out_channels//4, stride=1) 54 | 55 | self.conv7X7_2 = conv3_bn_lrelu(out_channels//4, out_channels//4, stride=1, leaky=leaky) 56 | self.conv7x7_3 = conv3_bn(out_channels//4, out_channels//4, stride=1) 57 | 58 | def forward(self, input): 59 | conv3X3 = self.conv3X3(input) 60 | 61 | conv5X5_1 = self.conv5X5_1(input) 62 | conv5X5 = self.conv5X5_2(conv5X5_1) 63 | 64 | conv7X7_2 = self.conv7X7_2(conv5X5_1) 65 | conv7X7 = self.conv7x7_3(conv7X7_2) 66 | 67 | out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1) 68 | out = F.relu(out) 69 | return out 70 | 71 | 72 | class MobileNetV1(nn.Module): 73 | def __init__(self, num_classes=1000, width=0.25): 74 | super().__init__() 75 | self.stage1 = nn.Sequential( 76 | conv3_bn_lrelu(3, round(width*32), 2, leaky=0.1), # 3 77 | conv_dw(round(width*32), round(width*64), 1), # 7 78 | conv_dw(round(width*64), round(width*128), 2), # 11 79 | conv_dw(round(width*128), round(width*128), 1), # 19 80 | conv_dw(round(width*128), round(width*256), 2), # 27 81 | conv_dw(round(width*256), round(width*256), 1), # 43 82 | ) 83 | self.stage2 = nn.Sequential( 84 | conv_dw(round(width*256), round(width*512), 2), # 43 + 16 = 59 85 | conv_dw(round(width*512), round(width*512), 1), # 59 + 32 = 91 86 | conv_dw(round(width*512), round(width*512), 1), # 91 + 32 = 123 87 | conv_dw(round(width*512), round(width*512), 1), # 123 + 32 = 155 88 | conv_dw(round(width*512), round(width*512), 1), # 155 + 32 = 187 89 | conv_dw(round(width*512), round(width*512), 1), # 187 + 32 = 219 90 | ) 91 | self.stage3 = nn.Sequential( 92 | conv_dw(round(width*512), round(width*1024), 2), # 219 +3 2 = 241 93 | conv_dw(round(width*1024), round(width*1024), 1), # 241 + 64 = 301 94 | ) 95 | self.avg = nn.AdaptiveAvgPool2d((1,1)) 96 | self.fc = nn.Linear(256, num_classes) 97 | 98 | def forward(self, x): 99 | x = self.stage1(x) 100 | x = self.stage2(x) 101 | x = self.stage3(x) 102 | x = self.avg(x).view(-1, 256) 103 | x = self.fc(x) 104 | return x 105 | -------------------------------------------------------------------------------- /face_detection/model/prior_box.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | from math import ceil 3 | 4 | import torch 5 | 6 | 7 | class PriorBox: 8 | def __init__(self, cfg, image_size=None): 9 | self.min_sizes = cfg['min_sizes'] 10 | self.steps = cfg['steps'] 11 | self.clip = cfg['clip'] 12 | self.image_size = image_size 13 | self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps] 14 | self.name = "s" 15 | 16 | def forward(self): 17 | anchors = [] 18 | for k, f in enumerate(self.feature_maps): 19 | min_sizes = self.min_sizes[k] 20 | for i, j in product(range(f[0]), range(f[1])): 21 | for min_size in min_sizes: 22 | s_kx = min_size / self.image_size[1] 23 | s_ky = min_size / self.image_size[0] 24 | dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]] 25 | dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]] 26 | for cy, cx in product(dense_cy, dense_cx): 27 | anchors += [cx, cy, s_kx, s_ky] 28 | 29 | # back to torch land 30 | output = torch.Tensor(anchors).view(-1, 4) 31 | if self.clip: 32 | output.clamp_(min=0, max=1) 33 | return output 34 | -------------------------------------------------------------------------------- /face_detection/model/retinaface.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torchvision.models.feature_extraction import create_feature_extractor 5 | from torchvision.models import quantization 6 | from torchvision.ops import FeaturePyramidNetwork 7 | 8 | from .networks import SSH, MobileNetV1 9 | 10 | 11 | class ClassHead(nn.Conv2d): 12 | def __init__(self, in_channels=512, num_anchors=3): 13 | super().__init__(in_channels, num_anchors*2, kernel_size=1) 14 | self.num_anchors = num_anchors 15 | 16 | def forward(self, input): 17 | out = self._conv_forward(input, self.weight, self.bias) 18 | out = out.permute(0, 2, 3, 1).contiguous() 19 | return out.view(out.size(0), -1, 2) 20 | 21 | 22 | class BboxHead(nn.Conv2d): 23 | def __init__(self, in_channels=512, num_anchors=3): 24 | super().__init__(in_channels, num_anchors*4, kernel_size=1) 25 | 26 | def forward(self, input): 27 | out = self._conv_forward(input, self.weight, self.bias) 28 | out = out.permute(0, 2, 3, 1).contiguous() 29 | return out.view(out.size(0), -1, 4) 30 | 31 | 32 | class LandmarkHead(nn.Conv2d): 33 | def __init__(self, in_channels=512, num_anchors=3): 34 | super().__init__(in_channels, num_anchors*10, kernel_size=1) 35 | 36 | def forward(self, input): 37 | out = self._conv_forward(input, self.weight, self.bias) 38 | out = out.permute(0, 2, 3, 1).contiguous() 39 | return out.view(out.size(0), -1, 10) 40 | 41 | 42 | class RetinaFace(nn.Module): 43 | def __init__(self, backbone, in_channel, out_channel, **kwargs): 44 | super().__init__() 45 | assert backbone in ("mobilenet0.25", "resnet50") 46 | if backbone == "mobilenet0.25": 47 | model = MobileNetV1() 48 | ckpt_file = "./weights/mobilenet0.25_pretrain.pt" 49 | try: 50 | checkpoint = torch.load(ckpt_file, map_location="cpu") 51 | from collections import OrderedDict 52 | new_state_dict = OrderedDict() 53 | for k, v in checkpoint['state_dict'].items(): 54 | name = k[7:] # remove module. 55 | new_state_dict[name] = v 56 | # load params 57 | model.load_state_dict(new_state_dict) 58 | except: 59 | print(f"{ckpt_file} not found!") 60 | return_nodes={ 61 | "stage1": "feat0", 62 | "stage2": "feat1", 63 | "stage3": "feat2", 64 | } 65 | else: 66 | import torchvision.models as models 67 | model = models.resnet50(pretrained=True) 68 | return_nodes={ 69 | "layer2": "feat0", 70 | "layer3": "feat1", 71 | "layer4": "feat2", 72 | } 73 | 74 | self.body = create_feature_extractor(model, return_nodes=return_nodes) 75 | in_channels_stage2 = in_channel 76 | in_channels_list = [ 77 | in_channels_stage2 * 2, 78 | in_channels_stage2 * 4, 79 | in_channels_stage2 * 8, 80 | ] 81 | out_channels = out_channel 82 | self.fpn = FeaturePyramidNetwork(in_channels_list, out_channels) 83 | self.ssh1 = SSH(out_channels, out_channels) 84 | self.ssh2 = SSH(out_channels, out_channels) 85 | self.ssh3 = SSH(out_channels, out_channels) 86 | 87 | fpn_num = len(in_channels_list) 88 | self.class_head = self._make_class_head(fpn_num=fpn_num, in_channels=out_channels) 89 | self.bbox_head = self._make_bbox_head(fpn_num=fpn_num, in_channels=out_channels) 90 | self.landmark_head = self._make_landmark_head(fpn_num=fpn_num, in_channels=out_channels) 91 | 92 | def _make_class_head(self, fpn_num=3, in_channels=64, anchor_num=2): 93 | classhead = nn.ModuleList() 94 | for i in range(fpn_num): 95 | classhead.append(ClassHead(in_channels, anchor_num)) 96 | return classhead 97 | 98 | def _make_bbox_head(self, fpn_num=3, in_channels=64, anchor_num=2): 99 | bboxhead = nn.ModuleList() 100 | for i in range(fpn_num): 101 | bboxhead.append(BboxHead(in_channels, anchor_num)) 102 | return bboxhead 103 | 104 | def _make_landmark_head(self, fpn_num=3, in_channels=64, anchor_num=2): 105 | landmarkhead = nn.ModuleList() 106 | for i in range(fpn_num): 107 | landmarkhead.append(LandmarkHead(in_channels, anchor_num)) 108 | return landmarkhead 109 | 110 | def forward(self, inputs): 111 | out = self.body(inputs) 112 | 113 | # FPN 114 | out = self.fpn(out) 115 | 116 | # SSH 117 | feature0 = self.ssh1(out["feat0"]) 118 | feature1 = self.ssh2(out["feat1"]) 119 | feature2 = self.ssh3(out["feat2"]) 120 | 121 | bbox_regressions = torch.cat([ 122 | self.bbox_head[0](feature0), 123 | self.bbox_head[1](feature1), 124 | self.bbox_head[2](feature2), 125 | ], dim=1) 126 | 127 | classifications = torch.cat([ 128 | self.class_head[0](feature0), 129 | self.class_head[1](feature1), 130 | self.class_head[2](feature2), 131 | ], dim=1) 132 | 133 | lm_regressions = torch.cat([ 134 | self.landmark_head[0](feature0), 135 | self.landmark_head[1](feature1), 136 | self.landmark_head[2](feature2), 137 | ], dim=1) 138 | 139 | if not self.training: 140 | classifications = F.softmax(classifications, dim=-1) 141 | return bbox_regressions, classifications, lm_regressions 142 | 143 | def fuse_model(self) -> None: 144 | for m in self.modules(): 145 | if type(m) == quantization.mobilenetv2.QuantizableMobileNetV2: 146 | m.fuse_model() 147 | elif type(m) == quantization.mobilenetv3.QuantizableMobileNetV3: 148 | m.fuse_model() 149 | -------------------------------------------------------------------------------- /face_detection/test_fddb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import os 4 | 5 | import cv2 6 | import numpy as np 7 | import torch 8 | from torchvision.ops import nms 9 | 10 | from model.prior_box import PriorBox 11 | from model.retinaface import RetinaFace 12 | from utils.box_utils import decode, decode_landm 13 | from utils.misc import draw_keypoint 14 | from utils.timer import Timer 15 | 16 | parser = argparse.ArgumentParser(description='Retinaface') 17 | parser.add_argument( 18 | '--checkpoint', type=str, 19 | default='./weights/mobilenet0.25_final.pt', 20 | help='Trained state_dict file path to open' 21 | ) 22 | parser.add_argument('--save-folder', default='eval/', type=str, help='Dir to save results') 23 | parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference') 24 | parser.add_argument('--jit', action="store_true", default=False, help='Use JIT') 25 | parser.add_argument('--confidence-threshold', default=0.02, type=float, help='confidence_threshold') 26 | parser.add_argument('--top-k', default=5000, type=int, help='top_k') 27 | parser.add_argument('--nms-threshold', default=0.4, type=float, help='nms_threshold') 28 | parser.add_argument('--keep-top-k', default=750, type=int, help='keep_top_k') 29 | parser.add_argument('-s', '--save-image', action="store_true", default=False, help='show detection results') 30 | parser.add_argument('--vis-thres', default=0.5, type=float, help='visualization_threshold') 31 | 32 | 33 | def main(): 34 | args = parser.parse_args() 35 | assert os.path.isfile(args.checkpoint) 36 | 37 | checkpoint = torch.load(args.checkpoint, map_location="cpu") 38 | cfg = checkpoint["config"] 39 | device = torch.device("cpu" if args.cpu else "cuda") 40 | 41 | # net and model 42 | net = RetinaFace(**cfg) 43 | net.load_state_dict(checkpoint["net_state_dict"]) 44 | net.eval().requires_grad_(False) 45 | net.to(device) 46 | if args.jit: 47 | net = torch.jit.script(net) 48 | print('Finished loading model!') 49 | torch.backends.cudnn.benchmark = True 50 | 51 | # save file 52 | os.makedirs(args.save_folder, exist_ok=True) 53 | fw = open(os.path.join(args.save_folder, 'FDDB_dets.txt'), 'w') 54 | 55 | # testing dataset 56 | testset_folder = 'data/FDDB/images/' 57 | testset_list = 'data/FDDB/img_list.txt' 58 | with open(testset_list, 'r') as fr: 59 | test_dataset = fr.read().split() 60 | num_images = len(test_dataset) 61 | 62 | # testing scale 63 | resize = 1 64 | 65 | _t = { 66 | "preprocess": Timer(), 67 | "forward": Timer(), 68 | "postprocess": Timer(), 69 | "misc": Timer(), 70 | } 71 | 72 | # testing begin 73 | for i, img_name in enumerate(test_dataset): 74 | image_path = testset_folder + img_name + '.jpg' 75 | img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) 76 | 77 | # NOTE preprocessing. 78 | _t["preprocess"].tic() 79 | img = img_raw - (104, 117, 123) 80 | if resize != 1: 81 | img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) 82 | im_height, im_width, _ = img.shape 83 | scale = torch.as_tensor( 84 | [im_width, im_height, im_width, im_height], 85 | dtype=torch.float, device=device 86 | ) 87 | img = img.transpose(2, 0, 1) 88 | img = np.float32(img) 89 | img = torch.from_numpy(img).unsqueeze(0) 90 | img = img.to(device) 91 | _t["preprocess"].toc() 92 | 93 | # NOTE forward. 94 | _t["forward"].tic() 95 | loc, conf, landms = net(img) # forward pass 96 | _t["forward"].toc() 97 | 98 | # NOTE misc. 99 | _t["postprocess"].tic() 100 | priorbox = PriorBox(cfg, image_size=(im_height, im_width)) 101 | priors = priorbox.forward() 102 | priors = priors.to(device) 103 | prior_data = priors.data 104 | boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) 105 | boxes = boxes * scale / resize 106 | scores = conf.squeeze(0)[:, 1] 107 | 108 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) 109 | scale1 = torch.as_tensor( 110 | [im_width, im_height] * 5, dtype=torch.float, device=device 111 | ) 112 | scale1 = scale1.to(device) 113 | landms = landms * scale1 / resize 114 | 115 | # ignore low scores 116 | inds = torch.where(scores > args.confidence_threshold)[0] 117 | boxes = boxes[inds] 118 | landms = landms[inds] 119 | scores = scores[inds] 120 | 121 | # keep top-K before NMS 122 | order = scores.argsort() 123 | boxes = boxes[order][:args.top_k] 124 | landms = landms[order][:args.top_k] 125 | scores = scores[order][:args.top_k] 126 | _t["postprocess"].toc() 127 | 128 | # do NMS 129 | _t["misc"].tic() 130 | keep = nms(boxes, scores, args.nms_threshold) 131 | boxes = boxes[keep] 132 | scores = scores[keep] 133 | landms = landms[keep] 134 | 135 | boxes = boxes.cpu().numpy() 136 | scores = scores.cpu().numpy() 137 | landms = landms.cpu().numpy() 138 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) 139 | dets = np.concatenate((dets, landms), axis=1) 140 | _t["misc"].toc() 141 | 142 | # save dets 143 | fw.write(f'{img_name:s}\n') 144 | fw.write(f'{dets.shape[0]:.1f}\n') 145 | for k in range(dets.shape[0]): 146 | xmin, ymin, xmax, ymax = dets[k, :4] 147 | score = dets[k, 4] 148 | w = xmax - xmin + 1 149 | h = ymax - ymin + 1 150 | # fw.write('{:.3f} {:.3f} {:.3f} {:.3f} {:.10f}\n'.format(xmin, ymin, w, h, score)) 151 | fw.write('{:d} {:d} {:d} {:d} {:.10f}\n'.format(int(xmin), int(ymin), int(w), int(h), score)) 152 | 153 | print( 154 | f"im_detect: {i+1:d}/{num_images:d}\t" 155 | f"preprocess_time: {_t['preprocess'].average_time:.4f}s\t" 156 | f"forward_time: {_t['forward'].average_time:.4f}s\t" 157 | f"postprocess_time: {_t['postprocess'].average_time:.4f}s\t" 158 | f"misc_time: {_t['misc'].average_time:.4f}s" 159 | ) 160 | 161 | # show image 162 | if args.save_image: 163 | draw_keypoint(img_raw, dets, args.vis_thres) 164 | # save image 165 | if not os.path.exists("./results/"): 166 | os.makedirs("./results/") 167 | cv2.imwrite(f"./results/{i:05d}.jpg", img_raw) 168 | 169 | fw.close() 170 | 171 | 172 | if __name__ == "__main__": 173 | main() 174 | -------------------------------------------------------------------------------- /face_detection/test_widerface.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import os 4 | 5 | import cv2 6 | import numpy as np 7 | import torch 8 | from torchvision.ops import nms 9 | 10 | from model.prior_box import PriorBox 11 | from model.retinaface import RetinaFace 12 | from utils.box_utils import decode, decode_landm 13 | from utils.misc import draw_keypoint 14 | from utils.timer import Timer 15 | 16 | parser = argparse.ArgumentParser(description='Retinaface') 17 | parser.add_argument( 18 | '--checkpoint', type=str, 19 | default='./weights/mobilenet0.25_final.pt', 20 | help='Trained state_dict file path to open' 21 | ) 22 | parser.add_argument('--origin-size', default=True, type=str, help='Whether use origin image size to evaluate') 23 | parser.add_argument('--save-folder', default='./widerface_evaluate/widerface_txt/', type=str, help='Dir to save txt results') 24 | parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference') 25 | parser.add_argument('--jit', action="store_true", default=False, help='Use JIT') 26 | parser.add_argument('--dataset-folder', default='./data/widerface/val/images/', type=str, help='dataset path') 27 | parser.add_argument('--confidence-threshold', default=0.02, type=float, help='confidence_threshold') 28 | parser.add_argument('--top-k', default=5000, type=int, help='top_k') 29 | parser.add_argument('--nms-threshold', default=0.4, type=float, help='nms_threshold') 30 | parser.add_argument('--keep-top-k', default=750, type=int, help='keep_top_k') 31 | parser.add_argument('-s', '--save-image', action="store_true", default=False, help='show detection results') 32 | parser.add_argument('--vis-thres', default=0.5, type=float, help='visualization_threshold') 33 | 34 | 35 | def main(): 36 | args = parser.parse_args() 37 | assert os.path.isfile(args.checkpoint) 38 | 39 | checkpoint = torch.load(args.checkpoint, map_location="cpu") 40 | cfg = checkpoint["config"] 41 | device = torch.device("cpu" if args.cpu else "cuda") 42 | 43 | # net and model 44 | net = RetinaFace(**cfg) 45 | net.load_state_dict(checkpoint["net_state_dict"]) 46 | net.eval().requires_grad_(False) 47 | net.to(device) 48 | if args.jit: 49 | net = torch.jit.script(net) 50 | print('Finished loading model!') 51 | torch.backends.cudnn.benchmark = True 52 | 53 | # testing dataset 54 | testset_folder = args.dataset_folder 55 | testset_list = args.dataset_folder[:-7] + "wider_val.txt" 56 | 57 | with open(testset_list, 'r') as fr: 58 | test_dataset = fr.read().split() 59 | num_images = len(test_dataset) 60 | os.makedirs("./results/", exist_ok=True) 61 | 62 | target_size = 1600.0 63 | max_size = 2150.0 64 | _t = {'forward_pass': Timer(), 'misc': Timer()} 65 | # testing begin 66 | for i, img_name in enumerate(test_dataset): 67 | image_path = testset_folder + img_name 68 | img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) 69 | img = np.float32(img_raw) 70 | 71 | # testing scale 72 | im_shape = img.shape 73 | im_size_min = np.min(im_shape[0:2]) 74 | im_size_max = np.max(im_shape[0:2]) 75 | resize = target_size / im_size_min 76 | # prevent bigger axis from being more than max_size: 77 | if np.round(resize * im_size_max) > max_size: 78 | resize = float(max_size) / float(im_size_max) 79 | if args.origin_size: 80 | resize = 1 81 | 82 | if resize != 1: 83 | img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) 84 | im_height, im_width, _ = img.shape 85 | scale = torch.as_tensor( 86 | [im_width, im_height, im_width, im_height], 87 | dtype=torch.float, device=device 88 | ) 89 | img -= (104, 117, 123) 90 | img = img.transpose(2, 0, 1) 91 | img = torch.from_numpy(img).unsqueeze(0) 92 | img = img.to(device) 93 | 94 | _t['forward_pass'].tic() 95 | loc, conf, landms = net(img) # forward pass 96 | _t['forward_pass'].toc() 97 | _t['misc'].tic() 98 | priorbox = PriorBox(cfg, image_size=(im_height, im_width)) 99 | priors = priorbox.forward() 100 | priors = priors.to(device) 101 | prior_data = priors.data 102 | boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) 103 | boxes = boxes * scale / resize 104 | scores = conf.squeeze(0)[:, 1] 105 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) 106 | scale1 = torch.as_tensor( 107 | [im_width, im_height] * 5, dtype=torch.float, device=device 108 | ) 109 | landms = landms * scale1 / resize 110 | 111 | # ignore low scores 112 | inds = torch.where(scores > args.confidence_threshold)[0] 113 | boxes = boxes[inds] 114 | landms = landms[inds] 115 | scores = scores[inds] 116 | 117 | # keep top-K before NMS 118 | order = scores.argsort() 119 | boxes = boxes[order][:args.top_k] 120 | landms = landms[order][:args.top_k] 121 | scores = scores[order][:args.top_k] 122 | 123 | # do NMS 124 | keep = nms(boxes, scores, args.nms_threshold) 125 | boxes = boxes[keep] 126 | scores = scores[keep] 127 | landms = landms[keep] 128 | 129 | boxes = boxes.cpu().numpy() 130 | scores = scores.cpu().numpy() 131 | landms = landms.cpu().numpy() 132 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) 133 | dets = np.concatenate((dets, landms), axis=1) 134 | _t['misc'].toc() 135 | 136 | # -------------------------------------------------------------------- 137 | save_name = args.save_folder + img_name[:-4] + ".txt" 138 | dirname = os.path.dirname(save_name) 139 | if not os.path.isdir(dirname): 140 | os.makedirs(dirname) 141 | with open(save_name, "w") as fd: 142 | bboxs = dets 143 | file_name = os.path.basename(save_name)[:-4] + "\n" 144 | bboxs_num = str(len(bboxs)) + "\n" 145 | fd.write(file_name) 146 | fd.write(bboxs_num) 147 | for box in bboxs: 148 | x = int(box[0]) 149 | y = int(box[1]) 150 | w = int(box[2]) - int(box[0]) 151 | h = int(box[3]) - int(box[1]) 152 | confidence = str(box[4]) 153 | line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + confidence + " \n" 154 | fd.write(line) 155 | 156 | print(f"im_detect: {i+1:d}/{num_images:d}" 157 | f"forward_pass_time: {_t['forward_pass'].average_time:.4f}s misc: {_t['misc'].average_time:.4f}s") 158 | 159 | # save image 160 | if args.save_image: 161 | draw_keypoint(img_raw, dets, args.vis_thres) 162 | 163 | # save image 164 | cv2.imwrite(f"./results/{i:05d}.jpg", img_raw) 165 | 166 | 167 | if __name__ == "__main__": 168 | main() 169 | -------------------------------------------------------------------------------- /face_detection/train_detector.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import datetime 4 | import math 5 | import os 6 | import time 7 | 8 | import torch 9 | 10 | from data import WiderFaceDetection, cfg_mnet, cfg_re50, preproc 11 | from model.multibox_loss import MultiBoxLoss 12 | from model.prior_box import PriorBox 13 | from model.retinaface import RetinaFace 14 | 15 | parser = argparse.ArgumentParser(description='Retinaface Training') 16 | parser.add_argument('--dataset', default='./data/widerface/train/label.txt', help='Training dataset directory') 17 | parser.add_argument('--network', default='mobilenet0.25', choices={"mobilenet0.25", "resnet50"}) 18 | parser.add_argument('--batch-size', default=32, help='Batch size') 19 | parser.add_argument('--num-workers', default=4, type=int, help='Number of workers used in dataloading') 20 | parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, help='initial learning rate') 21 | parser.add_argument('--momentum', default=0.9, type=float, help='momentum') 22 | parser.add_argument('--resume-net', default=None, help='resume net for retraining') 23 | parser.add_argument('--resume-epoch', default=0, type=int, help='resume iter for retraining') 24 | parser.add_argument('--weight-decay', default=5e-4, type=float, help='Weight decay for SGD') 25 | parser.add_argument('--gamma', default=0.1, type=float, help='Gamma update for SGD') 26 | parser.add_argument('--save-folder', default='./weights/', help='Location to save checkpoint models') 27 | args = parser.parse_args() 28 | 29 | 30 | os.makedirs(args.save_folder, exist_ok=True) 31 | if args.network == "mobilenet0.25": 32 | cfg = cfg_mnet 33 | elif args.network == "resnet50": 34 | cfg = cfg_re50 35 | 36 | RGB_MEAN = (104, 117, 123) # bgr order 37 | img_dim = cfg['image_size'] 38 | batch_size = cfg['batch_size'] 39 | max_epoch = cfg['epoch'] 40 | 41 | initial_lr = args.lr 42 | gamma = args.gamma 43 | training_dataset = args.dataset 44 | save_folder = args.save_folder 45 | 46 | 47 | def initialize_network(cfg, checkpoint=None, print_net=False): 48 | net = RetinaFace(**cfg) 49 | if print_net: 50 | print("Printing net...") 51 | print(net) 52 | if checkpoint is not None: 53 | print('Loading resume network...') 54 | net.load_state_dict(checkpoint["net_state_dict"]) 55 | 56 | if torch.cuda.is_available(): 57 | net.cuda() 58 | num_gpu = torch.cuda.device_count() 59 | if num_gpu > 1: 60 | net = torch.nn.DataParallel(net) 61 | return cfg, net 62 | 63 | 64 | def training_loop(net, optimizer, criterion, dataloader, cfg): 65 | assert isinstance(net, torch.nn.Module) 66 | assert isinstance(optimizer, torch.optim.Optimizer) 67 | assert isinstance(dataloader, torch.utils.data.DataLoader) 68 | assert isinstance(cfg, dict) 69 | 70 | priorbox = PriorBox(cfg, image_size=(cfg['image_size'],)*2) 71 | with torch.no_grad(): 72 | priors = priorbox.forward() 73 | priors = priors.cuda() 74 | 75 | net.train() 76 | epoch = 0 + args.resume_epoch 77 | print('Loading Dataset...') 78 | 79 | epoch_size = math.ceil(len(dataloader)) 80 | max_iter = max_epoch * epoch_size 81 | 82 | stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size) 83 | step_index = 0 84 | 85 | start_iter = 0 86 | if args.resume_epoch > 0: 87 | start_iter += args.resume_epoch * epoch_size 88 | 89 | for iteration in range(start_iter, max_iter): 90 | load_t0 = time.perf_counter() 91 | if iteration in stepvalues: 92 | step_index += 1 93 | lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size) 94 | 95 | # load train data 96 | try: 97 | images, targets = next(batch_iterator) 98 | except: 99 | batch_iterator = iter(dataloader) 100 | if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']): 101 | net_state_dict = net.module.state_dict() if hasattr(net, "module") else net.state_dict() 102 | torch.save( 103 | { 104 | "net_state_dict": net_state_dict, 105 | "epoch": epoch, 106 | "config": cfg, 107 | }, save_folder + f"{cfg['backbone']}_epoch{epoch:03d}.pt" 108 | ) 109 | epoch += 1 110 | images, targets = next(batch_iterator) 111 | 112 | images = images.cuda() 113 | targets = [anno.cuda() for anno in targets] 114 | 115 | # forward 116 | out = net(images) 117 | 118 | # backprop 119 | optimizer.zero_grad(set_to_none=True) 120 | loss_l, loss_c, loss_landm = criterion(out, priors, targets) 121 | loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm 122 | loss.backward() 123 | optimizer.step() 124 | 125 | load_t1 = time.perf_counter() 126 | if (iteration + 1) % 10 == 0: 127 | batch_time = load_t1 - load_t0 128 | eta = int(batch_time * (max_iter - iteration)) 129 | print( 130 | f"Epoch:{epoch:03d}/{max_epoch:03d} " 131 | f'|| Epochiter: {(iteration % epoch_size)+1}/{epoch_size} ' 132 | f'|| Iter: {iteration+1}/{max_iter} ' 133 | f'|| Loc: {loss_l.item():.3f} Cla: {loss_c.item():.3f} Landm: {loss_landm.item():.3f} ' 134 | f'|| LR: {lr:.8f} || Batchtime: {batch_time:.4f} s ' 135 | f'|| ETA: {str(datetime.timedelta(seconds=eta))}' 136 | ) 137 | 138 | net_state_dict = net.module.state_dict() if hasattr(net, "module") else net.state_dict() 139 | torch.save( 140 | { 141 | "net_state_dict": net_state_dict, 142 | "epoch": epoch, 143 | "config": cfg, 144 | }, save_folder + f"{cfg['backbone']}_final.pt" 145 | ) 146 | 147 | 148 | def adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size): 149 | """Sets the learning rate 150 | # Adapted from PyTorch Imagenet example: 151 | # https://github.com/pytorch/examples/blob/master/imagenet/main.py 152 | """ 153 | warmup_epoch = -1 154 | if epoch <= warmup_epoch: 155 | lr = 1e-6 + (initial_lr-1e-6) * iteration / (epoch_size * warmup_epoch) 156 | else: 157 | lr = initial_lr * (gamma ** (step_index)) 158 | for param_group in optimizer.param_groups: 159 | param_group['lr'] = lr 160 | return lr 161 | 162 | 163 | def main(): 164 | if args.resume_net is not None and os.path.isfile(args.resume_net): 165 | checkpoint = torch.load(args.resume_net, map_location="cpu") 166 | cfg = checkpoint["config"] 167 | else: 168 | checkpoint = None 169 | if args.network == "mobilenet0.25": 170 | cfg = cfg_mnet 171 | elif args.network == "resnet50": 172 | cfg = cfg_re50 173 | 174 | cfg, net = initialize_network(cfg, checkpoint) 175 | torch.backends.cudnn.benchmark = True 176 | 177 | optimizer = torch.optim.SGD( 178 | net.parameters(), lr=initial_lr, 179 | momentum=args.momentum, weight_decay=args.weight_decay, 180 | ) 181 | criterion = MultiBoxLoss(2, 0.35, True, 0, True, 7, 0.35, False) 182 | 183 | dataset = WiderFaceDetection(training_dataset, preproc(img_dim, RGB_MEAN)) 184 | dataloader = torch.utils.data.DataLoader( 185 | dataset, batch_size, shuffle=True, 186 | num_workers=args.num_workers, collate_fn=dataset.collate, 187 | ) 188 | 189 | training_loop(net, optimizer, criterion, dataloader, cfg) 190 | 191 | 192 | if __name__ == '__main__': 193 | main() 194 | -------------------------------------------------------------------------------- /face_detection/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/utils/__init__.py -------------------------------------------------------------------------------- /face_detection/utils/misc.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | from torchvision.ops import nms 5 | 6 | from .box_utils import decode, decode_landm 7 | 8 | 9 | def draw_keypoint(image, dets, threshold): 10 | for b in dets: 11 | if b[4] < threshold: 12 | continue 13 | text = f"{b[4]:.4f}" 14 | b = list(map(round, b)) 15 | cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) 16 | cx = b[0] 17 | cy = b[1] + 12 18 | cv2.putText( 19 | image, text, (cx, cy), 20 | cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255) 21 | ) 22 | 23 | # landms 24 | cv2.circle(image, (b[5], b[6]), 1, (0, 0, 255), 4) 25 | cv2.circle(image, (b[7], b[8]), 1, (0, 255, 255), 4) 26 | cv2.circle(image, (b[9], b[10]), 1, (255, 0, 255), 4) 27 | cv2.circle(image, (b[11], b[12]), 1, (0, 255, 0), 4) 28 | cv2.circle(image, (b[13], b[14]), 1, (255, 0, 0), 4) 29 | 30 | 31 | def inference( 32 | network, image, scale, scale1, prior_data, 33 | cfg, confidence_threshold, nms_threshold, device 34 | ): 35 | img = image - (104, 117, 123) 36 | img = img.transpose(2, 0, 1) 37 | img = np.float32(img) 38 | img = torch.from_numpy(img).unsqueeze(0) 39 | img = img.to(device) 40 | 41 | loc, conf, landms = network(img) # forward pass 42 | 43 | boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) 44 | boxes *= scale 45 | scores = conf.squeeze(0)[:, 1] 46 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) 47 | landms *= scale1 48 | 49 | # ignore low scores 50 | inds = torch.where(scores > confidence_threshold)[0] 51 | boxes = boxes[inds] 52 | landms = landms[inds] 53 | scores = scores[inds] 54 | 55 | # do NMS 56 | keep = nms(boxes, scores, nms_threshold) 57 | boxes = boxes[keep] 58 | scores = scores[keep] 59 | landms = landms[keep] 60 | 61 | boxes = boxes.cpu().numpy() 62 | scores = scores.cpu().numpy() 63 | landms = landms.cpu().numpy() 64 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) 65 | dets = np.concatenate((dets, landms), axis=1) 66 | return dets -------------------------------------------------------------------------------- /face_detection/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer: 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.perf_counter() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.perf_counter() - self.start_time 27 | self.total_time += self.diff 28 | self.calls += 1 29 | self.average_time = self.total_time / self.calls 30 | if average: 31 | return self.average_time 32 | else: 33 | return self.diff 34 | 35 | def clear(self): 36 | self.total_time = 0. 37 | self.calls = 0 38 | self.start_time = 0. 39 | self.diff = 0. 40 | self.average_time = 0. 41 | -------------------------------------------------------------------------------- /face_detection/webcam_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import os 4 | 5 | import cv2 6 | import numpy as np 7 | import torch 8 | import torch.backends.cudnn as cudnn 9 | 10 | from model.prior_box import PriorBox 11 | from model.retinaface import RetinaFace 12 | from utils.misc import draw_keypoint, inference 13 | 14 | parser = argparse.ArgumentParser(description='Retinaface') 15 | parser.add_argument( 16 | '--checkpoint', type=str, 17 | default='./weights/mobilenet0.25_final.pt', 18 | help='Trained state_dict file path to open' 19 | ) 20 | parser.add_argument( 21 | '--cpu', action="store_true", default=False, 22 | help='Use cpu inference' 23 | ) 24 | parser.add_argument( 25 | '--jit', action="store_true", default=False, 26 | help='Use JIT' 27 | ) 28 | parser.add_argument( 29 | '--confidence-threshold', type=float, default=0.02, 30 | help='confidence_threshold' 31 | ) 32 | parser.add_argument( 33 | '--nms-threshold', type=float, default=0.4, 34 | help='nms_threshold' 35 | ) 36 | parser.add_argument( 37 | '--vis-thres', type=float, default=0.5, 38 | help='visualization_threshold' 39 | ) 40 | parser.add_argument( 41 | '-s', '--save-image', action="store_true", default=False, 42 | help='show detection results' 43 | ) 44 | parser.add_argument( 45 | '--save-dir', type=str, default='demo', 46 | help='Dir to save results' 47 | ) 48 | 49 | 50 | def main(): 51 | args = parser.parse_args() 52 | assert os.path.isfile(args.checkpoint) 53 | 54 | checkpoint = torch.load(args.checkpoint, map_location="cpu") 55 | cfg = checkpoint["config"] 56 | device = torch.device("cpu" if args.cpu else "cuda") 57 | 58 | # net and model 59 | net = RetinaFace(**cfg) 60 | net.load_state_dict(checkpoint["net_state_dict"]) 61 | net.eval().requires_grad_(False) 62 | net.to(device) 63 | print('Finished loading model!') 64 | cudnn.benchmark = True 65 | 66 | # prepare testing 67 | cap = cv2.VideoCapture(0) 68 | assert cap.isOpened() 69 | ret_val, img_tmp = cap.read() 70 | im_height, im_width, _ = img_tmp.shape 71 | scale = torch.Tensor([im_width, im_height, im_width, im_height]) 72 | scale = scale.to(device) 73 | 74 | scale1 = torch.Tensor([im_width, im_height] * 5) 75 | scale1 = scale1.to(device) 76 | 77 | priorbox = PriorBox(cfg, image_size=(im_height, im_width)) 78 | priors = priorbox.forward() 79 | priors = priors.to(device) 80 | prior_data = priors.data 81 | 82 | if args.jit: 83 | img_tmp = img_tmp.transpose(2, 0, 1) 84 | img_tmp = np.float32(img_tmp) 85 | img_tmp = torch.from_numpy(img_tmp).unsqueeze(0) 86 | dummy = img_tmp.to(device) 87 | net = torch.jit.trace(net, example_inputs=dummy) 88 | 89 | if args.save_image: 90 | nframe = 0 91 | fname = os.path.join(args.save_dir, "{:06d}.jpg") 92 | os.makedirs(args.save_dir, exist_ok=True) 93 | 94 | # testing begin 95 | ret_val, img_raw = cap.read() 96 | while ret_val: 97 | start = cv2.getTickCount() 98 | 99 | # NOTE preprocessing. 100 | dets = inference( 101 | net, img_raw, scale, scale1, prior_data, cfg, 102 | args.confidence_threshold, args.nms_threshold, device 103 | ) 104 | 105 | fps = float(cv2.getTickFrequency() / (cv2.getTickCount() - start)) 106 | print(f"runtime: {fps:.1f} sec/iter") 107 | cv2.putText( 108 | img_raw, f"FPS: {fps:.1f}", (5, 15), 109 | cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255) 110 | ) 111 | 112 | # show image 113 | draw_keypoint(img_raw, dets, args.vis_thres) 114 | 115 | if args.save_image: 116 | cv2.imwrite(fname.format(nframe), img_raw) 117 | nframe += 1 118 | 119 | cv2.imshow("Face Detection Demo", img_raw) 120 | if cv2.waitKey(1) == 27: # Press ESC button to quit. 121 | break 122 | 123 | ret_val, img_raw = cap.read() 124 | 125 | cap.release() 126 | cv2.destroyAllWindows() 127 | 128 | 129 | if __name__ == "__main__": 130 | main() -------------------------------------------------------------------------------- /face_detection/weights/mobilenet0.25_final.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/weights/mobilenet0.25_final.pt -------------------------------------------------------------------------------- /face_detection/weights/mobilenet0.25_pretrain.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/weights/mobilenet0.25_pretrain.pt -------------------------------------------------------------------------------- /face_detection/widerface_evaluate/README.md: -------------------------------------------------------------------------------- 1 | # WiderFace-Evaluation 2 | Python Evaluation Code for [Wider Face Dataset](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/) 3 | 4 | 5 | ## Usage 6 | 7 | 8 | ##### before evaluating .... 9 | 10 | ```` 11 | python3 setup.py build_ext --inplace 12 | ```` 13 | 14 | ##### evaluating 15 | 16 | **GroungTruth:** `wider_face_val.mat`, `wider_easy_val.mat`, `wider_medium_val.mat`,`wider_hard_val.mat` 17 | 18 | ```` 19 | python3 evaluation.py -p -g 20 | ```` 21 | 22 | ## Bugs & Problems 23 | please issue 24 | 25 | ## Acknowledgements 26 | 27 | some code borrowed from Sergey Karayev 28 | -------------------------------------------------------------------------------- /face_detection/widerface_evaluate/box_overlaps.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps -------------------------------------------------------------------------------- /face_detection/widerface_evaluate/evaluation.py: -------------------------------------------------------------------------------- 1 | """ 2 | WiderFace evaluation code 3 | author: wondervictor 4 | mail: tianhengcheng@gmail.com 5 | copyright@wondervictor 6 | """ 7 | 8 | import os 9 | import tqdm 10 | import pickle 11 | import argparse 12 | import numpy as np 13 | from scipy.io import loadmat 14 | from bbox import bbox_overlaps 15 | from IPython import embed 16 | 17 | 18 | def get_gt_boxes(gt_dir): 19 | """ gt dir: (wider_face_val.mat, wider_easy_val.mat, wider_medium_val.mat, wider_hard_val.mat)""" 20 | 21 | gt_mat = loadmat(os.path.join(gt_dir, 'wider_face_val.mat')) 22 | hard_mat = loadmat(os.path.join(gt_dir, 'wider_hard_val.mat')) 23 | medium_mat = loadmat(os.path.join(gt_dir, 'wider_medium_val.mat')) 24 | easy_mat = loadmat(os.path.join(gt_dir, 'wider_easy_val.mat')) 25 | 26 | facebox_list = gt_mat['face_bbx_list'] 27 | event_list = gt_mat['event_list'] 28 | file_list = gt_mat['file_list'] 29 | 30 | hard_gt_list = hard_mat['gt_list'] 31 | medium_gt_list = medium_mat['gt_list'] 32 | easy_gt_list = easy_mat['gt_list'] 33 | 34 | return facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list 35 | 36 | 37 | def get_gt_boxes_from_txt(gt_path, cache_dir): 38 | 39 | cache_file = os.path.join(cache_dir, 'gt_cache.pkl') 40 | if os.path.exists(cache_file): 41 | f = open(cache_file, 'rb') 42 | boxes = pickle.load(f) 43 | f.close() 44 | return boxes 45 | 46 | f = open(gt_path, 'r') 47 | state = 0 48 | lines = f.readlines() 49 | lines = list(map(lambda x: x.rstrip('\r\n'), lines)) 50 | boxes = {} 51 | print(len(lines)) 52 | f.close() 53 | current_boxes = [] 54 | current_name = None 55 | for line in lines: 56 | if state == 0 and '--' in line: 57 | state = 1 58 | current_name = line 59 | continue 60 | if state == 1: 61 | state = 2 62 | continue 63 | 64 | if state == 2 and '--' in line: 65 | state = 1 66 | boxes[current_name] = np.array(current_boxes).astype('float32') 67 | current_name = line 68 | current_boxes = [] 69 | continue 70 | 71 | if state == 2: 72 | box = [float(x) for x in line.split(' ')[:4]] 73 | current_boxes.append(box) 74 | continue 75 | 76 | f = open(cache_file, 'wb') 77 | pickle.dump(boxes, f) 78 | f.close() 79 | return boxes 80 | 81 | 82 | def read_pred_file(filepath): 83 | 84 | with open(filepath, 'r') as f: 85 | lines = f.readlines() 86 | img_file = lines[0].rstrip('\n\r') 87 | lines = lines[2:] 88 | 89 | # b = lines[0].rstrip('\r\n').split(' ')[:-1] 90 | # c = float(b) 91 | # a = map(lambda x: [[float(a[0]), float(a[1]), float(a[2]), float(a[3]), float(a[4])] for a in x.rstrip('\r\n').split(' ')], lines) 92 | boxes = [] 93 | for line in lines: 94 | line = line.rstrip('\r\n').split(' ') 95 | if line[0] is '': 96 | continue 97 | # a = float(line[4]) 98 | boxes.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), float(line[4])]) 99 | boxes = np.array(boxes) 100 | # boxes = np.array(list(map(lambda x: [float(a) for a in x.rstrip('\r\n').split(' ')], lines))).astype('float') 101 | return img_file.split('/')[-1], boxes 102 | 103 | 104 | def get_preds(pred_dir): 105 | events = os.listdir(pred_dir) 106 | boxes = dict() 107 | pbar = tqdm.tqdm(events) 108 | 109 | for event in pbar: 110 | pbar.set_description('Reading Predictions ') 111 | event_dir = os.path.join(pred_dir, event) 112 | event_images = os.listdir(event_dir) 113 | current_event = dict() 114 | for imgtxt in event_images: 115 | imgname, _boxes = read_pred_file(os.path.join(event_dir, imgtxt)) 116 | current_event[imgname.rstrip('.jpg')] = _boxes 117 | boxes[event] = current_event 118 | return boxes 119 | 120 | 121 | def norm_score(pred): 122 | """ norm score 123 | pred {key: [[x1,y1,x2,y2,s]]} 124 | """ 125 | 126 | max_score = 0 127 | min_score = 1 128 | 129 | for _, k in pred.items(): 130 | for _, v in k.items(): 131 | if len(v) == 0: 132 | continue 133 | _min = np.min(v[:, -1]) 134 | _max = np.max(v[:, -1]) 135 | max_score = max(_max, max_score) 136 | min_score = min(_min, min_score) 137 | 138 | diff = max_score - min_score 139 | for _, k in pred.items(): 140 | for _, v in k.items(): 141 | if len(v) == 0: 142 | continue 143 | v[:, -1] = (v[:, -1] - min_score)/diff 144 | 145 | 146 | def image_eval(pred, gt, ignore, iou_thresh): 147 | """ single image evaluation 148 | pred: Nx5 149 | gt: Nx4 150 | ignore: 151 | """ 152 | 153 | _pred = pred.copy() 154 | _gt = gt.copy() 155 | pred_recall = np.zeros(_pred.shape[0]) 156 | recall_list = np.zeros(_gt.shape[0]) 157 | proposal_list = np.ones(_pred.shape[0]) 158 | 159 | _pred[:, 2] = _pred[:, 2] + _pred[:, 0] 160 | _pred[:, 3] = _pred[:, 3] + _pred[:, 1] 161 | _gt[:, 2] = _gt[:, 2] + _gt[:, 0] 162 | _gt[:, 3] = _gt[:, 3] + _gt[:, 1] 163 | 164 | overlaps = bbox_overlaps(_pred[:, :4], _gt) 165 | 166 | for h in range(_pred.shape[0]): 167 | 168 | gt_overlap = overlaps[h] 169 | max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax() 170 | if max_overlap >= iou_thresh: 171 | if ignore[max_idx] == 0: 172 | recall_list[max_idx] = -1 173 | proposal_list[h] = -1 174 | elif recall_list[max_idx] == 0: 175 | recall_list[max_idx] = 1 176 | 177 | r_keep_index = np.where(recall_list == 1)[0] 178 | pred_recall[h] = len(r_keep_index) 179 | return pred_recall, proposal_list 180 | 181 | 182 | def img_pr_info(thresh_num, pred_info, proposal_list, pred_recall): 183 | pr_info = np.zeros((thresh_num, 2)).astype('float') 184 | for t in range(thresh_num): 185 | 186 | thresh = 1 - (t+1)/thresh_num 187 | r_index = np.where(pred_info[:, 4] >= thresh)[0] 188 | if len(r_index) == 0: 189 | pr_info[t, 0] = 0 190 | pr_info[t, 1] = 0 191 | else: 192 | r_index = r_index[-1] 193 | p_index = np.where(proposal_list[:r_index+1] == 1)[0] 194 | pr_info[t, 0] = len(p_index) 195 | pr_info[t, 1] = pred_recall[r_index] 196 | return pr_info 197 | 198 | 199 | def dataset_pr_info(thresh_num, pr_curve, count_face): 200 | _pr_curve = np.zeros((thresh_num, 2)) 201 | for i in range(thresh_num): 202 | _pr_curve[i, 0] = pr_curve[i, 1] / pr_curve[i, 0] 203 | _pr_curve[i, 1] = pr_curve[i, 1] / count_face 204 | return _pr_curve 205 | 206 | 207 | def voc_ap(rec, prec): 208 | 209 | # correct AP calculation 210 | # first append sentinel values at the end 211 | mrec = np.concatenate(([0.], rec, [1.])) 212 | mpre = np.concatenate(([0.], prec, [0.])) 213 | 214 | # compute the precision envelope 215 | for i in range(mpre.size - 1, 0, -1): 216 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 217 | 218 | # to calculate area under PR curve, look for points 219 | # where X axis (recall) changes value 220 | i = np.where(mrec[1:] != mrec[:-1])[0] 221 | 222 | # and sum (\Delta recall) * prec 223 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 224 | return ap 225 | 226 | 227 | def evaluation(pred, gt_path, iou_thresh=0.5): 228 | pred = get_preds(pred) 229 | norm_score(pred) 230 | facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list = get_gt_boxes(gt_path) 231 | event_num = len(event_list) 232 | thresh_num = 1000 233 | settings = ['easy', 'medium', 'hard'] 234 | setting_gts = [easy_gt_list, medium_gt_list, hard_gt_list] 235 | aps = [] 236 | for setting_id in range(3): 237 | # different setting 238 | gt_list = setting_gts[setting_id] 239 | count_face = 0 240 | pr_curve = np.zeros((thresh_num, 2)).astype('float') 241 | # [hard, medium, easy] 242 | pbar = tqdm.tqdm(range(event_num)) 243 | for i in pbar: 244 | pbar.set_description('Processing {}'.format(settings[setting_id])) 245 | event_name = str(event_list[i][0][0]) 246 | img_list = file_list[i][0] 247 | pred_list = pred[event_name] 248 | sub_gt_list = gt_list[i][0] 249 | # img_pr_info_list = np.zeros((len(img_list), thresh_num, 2)) 250 | gt_bbx_list = facebox_list[i][0] 251 | 252 | for j in range(len(img_list)): 253 | pred_info = pred_list[str(img_list[j][0][0])] 254 | 255 | gt_boxes = gt_bbx_list[j][0].astype('float') 256 | keep_index = sub_gt_list[j][0] 257 | count_face += len(keep_index) 258 | 259 | if len(gt_boxes) == 0 or len(pred_info) == 0: 260 | continue 261 | ignore = np.zeros(gt_boxes.shape[0]) 262 | if len(keep_index) != 0: 263 | ignore[keep_index-1] = 1 264 | pred_recall, proposal_list = image_eval(pred_info, gt_boxes, ignore, iou_thresh) 265 | 266 | _img_pr_info = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall) 267 | 268 | pr_curve += _img_pr_info 269 | pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face) 270 | 271 | propose = pr_curve[:, 0] 272 | recall = pr_curve[:, 1] 273 | 274 | ap = voc_ap(recall, propose) 275 | aps.append(ap) 276 | 277 | print("==================== Results ====================") 278 | print("Easy Val AP: {}".format(aps[0])) 279 | print("Medium Val AP: {}".format(aps[1])) 280 | print("Hard Val AP: {}".format(aps[2])) 281 | print("=================================================") 282 | 283 | 284 | if __name__ == '__main__': 285 | 286 | parser = argparse.ArgumentParser() 287 | parser.add_argument('-p', '--pred', default="./widerface_txt/") 288 | parser.add_argument('-g', '--gt', default='./ground_truth/') 289 | 290 | args = parser.parse_args() 291 | evaluation(args.pred, args.gt) 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | -------------------------------------------------------------------------------- /face_detection/widerface_evaluate/ground_truth/wider_easy_val.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/widerface_evaluate/ground_truth/wider_easy_val.mat -------------------------------------------------------------------------------- /face_detection/widerface_evaluate/ground_truth/wider_face_val.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/widerface_evaluate/ground_truth/wider_face_val.mat -------------------------------------------------------------------------------- /face_detection/widerface_evaluate/ground_truth/wider_hard_val.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/widerface_evaluate/ground_truth/wider_hard_val.mat -------------------------------------------------------------------------------- /face_detection/widerface_evaluate/ground_truth/wider_medium_val.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/widerface_evaluate/ground_truth/wider_medium_val.mat -------------------------------------------------------------------------------- /face_detection/widerface_evaluate/setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | WiderFace evaluation code 3 | author: wondervictor 4 | mail: tianhengcheng@gmail.com 5 | copyright@wondervictor 6 | """ 7 | 8 | from distutils.core import setup, Extension 9 | from Cython.Build import cythonize 10 | import numpy 11 | 12 | package = Extension('bbox', ['box_overlaps.pyx'], include_dirs=[numpy.get_include()]) 13 | setup(ext_modules=cythonize([package])) 14 | -------------------------------------------------------------------------------- /face_recognition/config.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | configurations = { 4 | 1: dict( 5 | SEED = 1993, # random seed for reproduce results 6 | 7 | DATA_ROOT = '../DATA', # the parent root where your train/val/test data are stored 8 | MODEL_ROOT = '../CHECKPOINT', # the root to buffer your checkpoints 9 | LOG_ROOT = '../LOG', # the root to log your train/val status 10 | BACKBONE_RESUME_ROOT = '../CHECKPOINT/Backbone_IR_152_Epoch_112.pth', # the root to resume training from a saved checkpoint 11 | HEAD_RESUME_ROOT = '../CHECKPOINT/Head_ArcFace_Epoch_112.pth', # the root to resume training from a saved checkpoint 12 | 13 | BACKBONE_NAME = 'IR_50', # support: ['ResNet_50', 'ResNet_101', 'ResNet_152', 'IR_50', 'IR_101', 'IR_152', 'IR_SE_50', 'IR_SE_101', 'IR_SE_152'] 14 | HEAD_NAME = 'ArcFace', # support: ['Softmax', 'ArcFace', 'CosFace', 'SphereFace', 'Am_softmax'] 15 | LOSS_NAME = 'Focal', # support: ['Focal', 'Softmax'] 16 | 17 | INPUT_SIZE = [112, 112], # support: [112, 112] and [224, 224] 18 | RGB_MEAN = [0.5, 0.5, 0.5], # for normalize inputs to [-1, 1] 19 | RGB_STD = [0.5, 0.5, 0.5], 20 | EMBEDDING_SIZE = 1024, # feature dimension 21 | BATCH_SIZE = 256*8, 22 | DROP_LAST = True, # whether drop the last batch to ensure consistent batch_norm statistics 23 | LR = 0.1, # initial LR 24 | NUM_EPOCH = 125, # total epoch number (use the firt 1/25 epochs to warm up) 25 | WEIGHT_DECAY = 5e-4, # do not apply to batch_norm parameters 26 | MOMENTUM = 0.9, 27 | STAGES = [35, 65, 95], # epoch stages to decay learning rate 28 | 29 | DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu"), 30 | MULTI_GPU = True, # flag to use multiple GPUs; if you choose to train with single GPU, you should first run "export CUDA_VISILE_DEVICES=device_id" to specify the GPU card you want to use 31 | GPU_ID = [0, 1, 2, 3, 4, 5, 6, 7], # specify your GPU ids 32 | #GPU_ID = [0], # specify your GPU ids 33 | PIN_MEMORY = True, 34 | NUM_WORKERS = 0 35 | ) 36 | } 37 | -------------------------------------------------------------------------------- /gaze_estimation/README.md: -------------------------------------------------------------------------------- 1 | # IR_Driver_Gaze_Estimation 2 | 3 | Implementation of gaze estimation using IR camera images with CNN. 4 | 5 | In this repository, light model version of gaze estimation (caffe, tensorflow and pytorch) and heavy model version 6 | 7 | * input : 120 x 100 grayscale face image 8 | * Light version : use 120 x 100 grayscale image for global estimator 9 | * Heavy version : use 120 x 100 grayscale image for global estimator and crop it to 80 x 100 image for local estimator 10 | * Heavy+Att version : add attention mask to heavy version 11 | 12 | 13 | ## CAFFE version 14 | Light model version is supported 15 | 16 | -TRAINING from Scratch- 17 | > bin\caffe train --solver=ir_gaze_solver.prototxt --gpu=0 18 | 19 | -TRAINING from Weights- 20 | > bin\caffe train --solver=ir_gaze_solver.prototxt --weights=caffemodels/***.caffemodel --gpu=0 21 | 22 | 23 | 24 | ## TENSORFLOW version 25 | Light model version is supported 26 | 27 | -TRAINING/EVALUATION from Scratch- 28 | > python train.py 29 | 30 | -PREDICT- 31 | >python test_sequences.py 32 | 33 | 34 | 35 | ## PYTORCH version 36 | Modify config.py for various options (such as batch size, gpu index, ..) 37 | 38 | -TRAINING- 39 | > python train.py -------------------------------------------------------------------------------- /gaze_estimation/example_movie/media2_slow.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/gaze_estimation/example_movie/media2_slow.avi -------------------------------------------------------------------------------- /gaze_estimation/v1_caffe_model/ir_gaze_deploy.prototxt: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | ## 20172258 Cha Dongmin 3 | ################################################################################ 4 | 5 | name: "IR_GAZE_ESTIMATION" 6 | input: "data" 7 | input_dim: 1 # batch size 8 | input_dim: 1 9 | input_dim: 100 10 | input_dim: 120 11 | 12 | layer { 13 | name: "data" 14 | type: "HDF5Data" 15 | top: "data" 16 | top: "label" 17 | hdf5_data_param { 18 | source: "list_train.txt" 19 | batch_size: 32 20 | } 21 | } 22 | 23 | 24 | 25 | layer { 26 | name: "conv1" 27 | type: "Convolution" 28 | bottom: "data" 29 | top: "conv1" 30 | param { 31 | lr_mult: 1.0 32 | } 33 | param { 34 | lr_mult: 2.0 35 | } 36 | convolution_param { 37 | num_output: 40 38 | kernel_size: 7 39 | stride: 2 40 | } 41 | } 42 | 43 | 44 | 45 | layer { 46 | name: "relu1" 47 | type: "ReLU" 48 | bottom: "conv1" 49 | top: "conv1" 50 | } 51 | 52 | 53 | layer { 54 | name: "pool1" 55 | type: "Pooling" 56 | bottom: "conv1" 57 | top: "pool1" 58 | pooling_param { 59 | kernel_size: 3 60 | stride: 2 61 | pool: MAX 62 | } 63 | } 64 | 65 | 66 | layer { 67 | name: "conv2" 68 | type: "Convolution" 69 | bottom: "pool1" 70 | top: "conv2" 71 | param { 72 | lr_mult: 1.0 73 | } 74 | param { 75 | lr_mult: 2.0 76 | } 77 | convolution_param { 78 | num_output: 70 79 | kernel_size: 5 80 | pad: 1 81 | stride: 2 82 | } 83 | } 84 | 85 | 86 | layer { 87 | name: "relu2" 88 | type: "ReLU" 89 | bottom: "conv2" 90 | top: "conv2" 91 | } 92 | 93 | 94 | 95 | 96 | layer { 97 | name: "pool2" 98 | type: "Pooling" 99 | bottom: "conv2" 100 | top: "pool2" 101 | pooling_param { 102 | kernel_size: 2 103 | stride: 2 104 | pool: MAX 105 | } 106 | } 107 | 108 | 109 | layer { 110 | name: "conv3" 111 | type: "Convolution" 112 | bottom: "pool2" 113 | top: "conv3" 114 | param { 115 | lr_mult: 1.0 116 | } 117 | param { 118 | lr_mult: 2.0 119 | } 120 | convolution_param { 121 | num_output: 60 122 | kernel_size: 3 123 | pad: 1 124 | } 125 | } 126 | 127 | 128 | 129 | layer { 130 | name: "relu3" 131 | type: "ReLU" 132 | bottom: "conv3" 133 | top: "conv3" 134 | } 135 | 136 | layer { 137 | name: "pool3" 138 | type: "Pooling" 139 | bottom: "conv3" 140 | top: "pool3" 141 | pooling_param { 142 | kernel_size: 2 143 | stride: 2 144 | pool: MAX 145 | } 146 | } 147 | 148 | 149 | 150 | layer { 151 | name: "conv4" 152 | type: "Convolution" 153 | bottom: "pool3" 154 | top: "conv4" 155 | param { 156 | lr_mult: 1.0 157 | } 158 | param { 159 | lr_mult: 2.0 160 | } 161 | convolution_param { 162 | num_output: 80 163 | kernel_size: 3 164 | pad: 1 165 | } 166 | } 167 | 168 | 169 | layer { 170 | name: "relu4" 171 | type: "ReLU" 172 | bottom: "conv4" 173 | top: "conv4" 174 | } 175 | 176 | layer { 177 | name: "pool4" 178 | type: "Pooling" 179 | bottom: "conv4" 180 | top: "pool4" 181 | pooling_param { 182 | kernel_size: 2 183 | stride: 2 184 | pool: MAX 185 | } 186 | } 187 | 188 | 189 | 190 | layer { 191 | name: "conv5" 192 | type: "Convolution" 193 | bottom: "pool4" 194 | top: "conv5" 195 | param { 196 | lr_mult: 1.0 197 | } 198 | param { 199 | lr_mult: 2.0 200 | } 201 | convolution_param { 202 | num_output: 100 203 | kernel_size: 3 204 | pad: 1 205 | } 206 | } 207 | 208 | 209 | layer { 210 | name: "relu5" 211 | type: "ReLU" 212 | bottom: "conv5" 213 | top: "conv5" 214 | } 215 | 216 | 217 | layer { 218 | name: "pool5" 219 | type: "Pooling" 220 | bottom: "conv5" 221 | top: "pool5" 222 | pooling_param { 223 | kernel_size: 2 224 | stride: 2 225 | pool: MAX 226 | } 227 | } 228 | 229 | 230 | layer { 231 | name: "concat1" 232 | bottom: "conv5" 233 | bottom: "pool4" 234 | top: "concat1" 235 | type: "Concat" 236 | concat_param { 237 | axis: 1 238 | } 239 | } 240 | 241 | 242 | 243 | layer { 244 | name: "fc1" 245 | type: "InnerProduct" 246 | bottom: "concat1" 247 | top: "fc1" 248 | inner_product_param { 249 | num_output: 4000 250 | } 251 | } 252 | 253 | 254 | layer { 255 | name: "relu6" 256 | type: "ReLU" 257 | bottom: "fc1" 258 | top: "fc1" 259 | } 260 | 261 | 262 | layer { 263 | name: "drop1" 264 | type: "Dropout" 265 | bottom: "fc1" 266 | top: "fc1" 267 | dropout_param { 268 | dropout_ratio: 0.5 269 | } 270 | 271 | 272 | } 273 | 274 | 275 | 276 | 277 | layer { 278 | name: "fc2" 279 | type: "InnerProduct" 280 | bottom: "fc1" 281 | top: "fc2" 282 | 283 | param{ 284 | lr_mult: 10 285 | decay_mult: 1 286 | } 287 | param{ 288 | lr_mult: 20 289 | decay_mult: 0 290 | } 291 | inner_product_param { 292 | num_output: 6 293 | weight_filler { 294 | type: "xavier" 295 | } 296 | bias_filler { 297 | type: "constant" 298 | value: 0.0 299 | } 300 | } 301 | } 302 | 303 | layer { 304 | name: "prob" 305 | type: "Softmax" 306 | bottom: "fc2" 307 | top: "prob" 308 | 309 | } 310 | -------------------------------------------------------------------------------- /gaze_estimation/v1_caffe_model/ir_gaze_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "ir_gaze_train_val.prototxt" 2 | base_lr: 0.001 3 | lr_policy: "step" 4 | gamma: 0.1 5 | stepsize: 30000 6 | display: 20 7 | max_iter: 40000 8 | momentum: 0.9 9 | weight_decay: 0.0005 10 | ## We disable standard caffe solver snapshotting and implement our own snapshot 11 | #snapshot: 0 12 | snapshot: 5000 13 | snapshot_prefix: "GAZE" 14 | #debug_info: true 15 | 16 | -------------------------------------------------------------------------------- /gaze_estimation/v1_caffe_model/ir_gaze_train_val.prototxt: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | ## 20172258 Cha Dongmin 3 | ################################################################################ 4 | 5 | name: "IR_GAZE_ESTIMATION" 6 | 7 | #input: "data" 8 | #input_dim: BATCH 9 | #input_dim: 1 10 | #input_dim: 100 11 | #input_dim: 120 12 | 13 | layer { 14 | name: "data" 15 | type: "HDF5Data" 16 | top: "data" 17 | top: "label" 18 | hdf5_data_param { 19 | source: "list_train.txt" 20 | batch_size: 32 21 | } 22 | } 23 | 24 | 25 | 26 | layer { 27 | name: "conv1" 28 | type: "Convolution" 29 | bottom: "data" 30 | top: "conv1" 31 | param { 32 | lr_mult: 1.0 33 | } 34 | param { 35 | lr_mult: 2.0 36 | } 37 | convolution_param { 38 | num_output: 40 39 | kernel_size: 7 40 | stride: 2 41 | } 42 | } 43 | 44 | 45 | 46 | layer { 47 | name: "relu1" 48 | type: "ReLU" 49 | bottom: "conv1" 50 | top: "conv1" 51 | } 52 | 53 | 54 | layer { 55 | name: "pool1" 56 | type: "Pooling" 57 | bottom: "conv1" 58 | top: "pool1" 59 | pooling_param { 60 | kernel_size: 3 61 | stride: 2 62 | pool: MAX 63 | } 64 | } 65 | 66 | 67 | layer { 68 | name: "conv2" 69 | type: "Convolution" 70 | bottom: "pool1" 71 | top: "conv2" 72 | param { 73 | lr_mult: 1.0 74 | } 75 | param { 76 | lr_mult: 2.0 77 | } 78 | convolution_param { 79 | num_output: 70 80 | kernel_size: 5 81 | pad: 1 82 | stride: 2 83 | } 84 | } 85 | 86 | 87 | layer { 88 | name: "relu2" 89 | type: "ReLU" 90 | bottom: "conv2" 91 | top: "conv2" 92 | } 93 | 94 | 95 | 96 | 97 | layer { 98 | name: "pool2" 99 | type: "Pooling" 100 | bottom: "conv2" 101 | top: "pool2" 102 | pooling_param { 103 | kernel_size: 2 104 | stride: 2 105 | pool: MAX 106 | } 107 | } 108 | 109 | 110 | layer { 111 | name: "conv3" 112 | type: "Convolution" 113 | bottom: "pool2" 114 | top: "conv3" 115 | param { 116 | lr_mult: 1.0 117 | } 118 | param { 119 | lr_mult: 2.0 120 | } 121 | convolution_param { 122 | num_output: 60 123 | kernel_size: 3 124 | pad: 1 125 | } 126 | } 127 | 128 | 129 | 130 | layer { 131 | name: "relu3" 132 | type: "ReLU" 133 | bottom: "conv3" 134 | top: "conv3" 135 | } 136 | 137 | layer { 138 | name: "pool3" 139 | type: "Pooling" 140 | bottom: "conv3" 141 | top: "pool3" 142 | pooling_param { 143 | kernel_size: 2 144 | stride: 2 145 | pool: MAX 146 | } 147 | } 148 | 149 | 150 | 151 | layer { 152 | name: "conv4" 153 | type: "Convolution" 154 | bottom: "pool3" 155 | top: "conv4" 156 | param { 157 | lr_mult: 1.0 158 | } 159 | param { 160 | lr_mult: 2.0 161 | } 162 | convolution_param { 163 | num_output: 80 164 | kernel_size: 3 165 | pad: 1 166 | } 167 | } 168 | 169 | 170 | layer { 171 | name: "relu4" 172 | type: "ReLU" 173 | bottom: "conv4" 174 | top: "conv4" 175 | } 176 | 177 | layer { 178 | name: "pool4" 179 | type: "Pooling" 180 | bottom: "conv4" 181 | top: "pool4" 182 | pooling_param { 183 | kernel_size: 2 184 | stride: 2 185 | pool: MAX 186 | } 187 | } 188 | 189 | 190 | 191 | layer { 192 | name: "conv5" 193 | type: "Convolution" 194 | bottom: "pool4" 195 | top: "conv5" 196 | param { 197 | lr_mult: 1.0 198 | } 199 | param { 200 | lr_mult: 2.0 201 | } 202 | convolution_param { 203 | num_output: 100 204 | kernel_size: 3 205 | pad: 1 206 | } 207 | } 208 | 209 | 210 | layer { 211 | name: "relu5" 212 | type: "ReLU" 213 | bottom: "conv5" 214 | top: "conv5" 215 | } 216 | 217 | 218 | layer { 219 | name: "pool5" 220 | type: "Pooling" 221 | bottom: "conv5" 222 | top: "pool5" 223 | pooling_param { 224 | kernel_size: 2 225 | stride: 2 226 | pool: MAX 227 | } 228 | } 229 | 230 | 231 | layer { 232 | name: "concat1" 233 | bottom: "conv5" 234 | bottom: "pool4" 235 | top: "concat1" 236 | type: "Concat" 237 | concat_param { 238 | axis: 1 239 | } 240 | } 241 | 242 | 243 | 244 | layer { 245 | name: "fc1" 246 | type: "InnerProduct" 247 | bottom: "concat1" 248 | top: "fc1" 249 | inner_product_param { 250 | num_output: 4000 251 | } 252 | } 253 | 254 | 255 | layer { 256 | name: "relu6" 257 | type: "ReLU" 258 | bottom: "fc1" 259 | top: "fc1" 260 | } 261 | 262 | 263 | layer { 264 | name: "drop1" 265 | type: "Dropout" 266 | bottom: "fc1" 267 | top: "fc1" 268 | dropout_param { 269 | dropout_ratio: 0.5 270 | } 271 | 272 | 273 | } 274 | 275 | 276 | 277 | 278 | layer { 279 | name: "fc2" 280 | type: "InnerProduct" 281 | bottom: "fc1" 282 | top: "fc2" 283 | 284 | param{ 285 | lr_mult: 10 286 | decay_mult: 1 287 | } 288 | param{ 289 | lr_mult: 20 290 | decay_mult: 0 291 | } 292 | inner_product_param { 293 | num_output: 6 294 | weight_filler { 295 | type: "xavier" 296 | } 297 | bias_filler { 298 | type: "constant" 299 | value: 0.0 300 | } 301 | } 302 | } 303 | 304 | layer { 305 | name: "loss" 306 | type: "SoftmaxWithLoss" 307 | bottom:"fc2" 308 | bottom:"label" 309 | top:"loss" 310 | } -------------------------------------------------------------------------------- /gaze_estimation/v2_tensorflow_model/model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | import numpy as np 4 | from opt import * 5 | 6 | 7 | 8 | def gazenetwork(features, labels, mode): 9 | 10 | # image : [batch, 100, 120, 1] 11 | input = tf.reshape(features["x"], [-1, 100, 120, 1]) 12 | 13 | # 트레이닝 때는 드롭아웃 적용 14 | if mode == tf.estimator.ModeKeys.TRAIN: 15 | dropout = 0.5 16 | else: 17 | dropout = 1.0 18 | 19 | 20 | 21 | # * conv는 기본적으로 SAME PADDING 22 | # H0 23 | h0 = lrelu(conv2d(input, output_dim=40, ks=7, s=2, name='h0_conv')) 24 | h0 = slim.max_pool2d(h0, kernel_size=3, stride=2, scope='h0_pool') 25 | 26 | # H1 27 | h1 = lrelu(conv2d(h0, output_dim=70, ks=5, s=2, name='h1_conv')) 28 | h1 = slim.max_pool2d(h1, kernel_size=2, stride=2, scope='h1_pool') 29 | 30 | # H2 31 | h2 = lrelu(conv2d(h1, output_dim=60, ks=3, s=1, name='h2_conv')) 32 | h2 = slim.max_pool2d(h2, kernel_size=2, stride=2, scope='h2_pool') 33 | 34 | # H3 35 | h3 = lrelu(conv2d(h2, output_dim=80, ks=3, s=1, name='h3_conv')) 36 | h3 = slim.max_pool2d(h3 , kernel_size=2, stride=2, scope='h3_pool') 37 | 38 | # H4 39 | h4 = lrelu(conv2d(h3, output_dim=100, ks=3, s=1, name='h4_conv')) 40 | 41 | # h3 & h4 concatenate 42 | h3_flat = slim.flatten(h3, scope="h3_flat") 43 | h4_flat = slim.flatten(h4, scope="h4_flat") 44 | h_concat = tf.concat([h3_flat, h4_flat], 1, name='h3_h4_concat') 45 | 46 | # start of fc 47 | fc1 = slim.fully_connected(h_concat, 4000, scope="fc1") 48 | fc1_dropout = slim.dropout(fc1, dropout) 49 | logits = slim.fully_connected(fc1_dropout, 6, activation_fn=None, scope="logits") 50 | class_logits = tf.argmax(input=logits, axis=1) 51 | 52 | # softmax 거침 53 | #softmax 54 | predictions = {"classes" : tf.argmax(input=logits, axis=1), 55 | "probabilities" : tf.nn.softmax(logits, name="softmax_tensor")} 56 | #predictions = tf.nn.softmax(logits, name='predictions') 57 | 58 | 59 | 60 | 61 | if mode == tf.estimator.ModeKeys.PREDICT: 62 | return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) 63 | 64 | loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) 65 | #accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions) 66 | 67 | 68 | #in TRAINING mode, 69 | if mode == tf.estimator.ModeKeys.TRAIN: 70 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.005) 71 | train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) 72 | 73 | # Training 모드의 EstimatorSpec을 출력해야 한다. EstimatorSpec은 mode, loss, train_op를 포함하여야 한다. 74 | # train_po는 loss의 optimizer을 minimization 하는 것 75 | return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) 76 | 77 | #in PREDICT mode. 78 | if mode == tf.estimator.ModeKeys.PREDICT: 79 | out_predictions = { 80 | "classes": tf.argmax(input=logits, axis=1), 81 | "probabilities": tf.nn.softmax(logits, name="softmax_tensor") 82 | } 83 | 84 | #out_predictions = {"logits": logits} 85 | return tf.estimator.EstimatorSpec(mode=mode, predictions=out_predictions) 86 | 87 | #in EVAL mode. 88 | print(labels) 89 | print(class_logits) 90 | eval_ops = {"accuracy" : tf.metrics.accuracy(labels=labels, predictions=class_logits)} 91 | 92 | # Eval 모드의 EstimatorSpec을 출력해야 한다. EstimatorSpec은 mode, loss, eval_ops를 포함하여야 한다. 93 | # eval은 accuracy 94 | return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_ops) 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /gaze_estimation/v2_tensorflow_model/opt.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import random 4 | import math 5 | from glob import glob 6 | from PIL import Image 7 | 8 | import tensorflow.contrib.slim as slim 9 | 10 | 11 | BATCH_SIZE = 256 12 | IMG_WIDTH = 120 13 | IMG_HEIGHT = 100 14 | CHANNEL_N = 1 15 | CLASS_N = 6 16 | 17 | def load_img_and_label_from_npy(image_npy, label_npy): 18 | images = load_np(image_npy) 19 | labels = load_np(label_npy) 20 | 21 | return images, labels 22 | 23 | def load_images(train_ratio=0.95, test_ratio=0.05): 24 | print("Loading Images...") 25 | 26 | #응시영역 레이블별로 읽도록 한다. 27 | #6 gaze zones 28 | data_list_1 = glob('*part_1.jpg') #1 29 | data_list_2 = glob('*part_3.jpg') #2 30 | data_list_3 = glob('*part_6.jpg') #3 31 | data_list_4 = glob('*part_8.jpg') #4 32 | data_list_5 = glob('*part_10.jpg') #5 33 | data_list_6 = glob('*part_12.jpg') #6 34 | 35 | 36 | batch_tuple = [] 37 | 38 | n = 0 39 | #------------1 40 | for i in range(len(data_list_1)): 41 | path = data_list_1[i] 42 | img = read_image(path) 43 | 44 | #불러온 이미지 batch에 저장 45 | batch_tuple.append((path, 0)) 46 | 47 | 48 | #-------------- 2 49 | for i in range(len(data_list_2)): 50 | path = data_list_2[i] 51 | img = read_image(path) 52 | 53 | #불러온 이미지 batch에 저장 54 | batch_tuple.append((path, 1)) 55 | 56 | #--------------- 3 57 | for i in range(len(data_list_3)): 58 | path = data_list_3[i] 59 | img = read_image(path) 60 | 61 | #불러온 이미지 batch에 저장 62 | batch_tuple.append((path, 2)) 63 | 64 | # ---------------- 4 65 | for i in range(len(data_list_4)): 66 | path = data_list_4[i] 67 | img = read_image(path) 68 | 69 | #불러온 이미지 batch에 저장 70 | batch_tuple.append((path, 3)) 71 | 72 | # ---------------- 5 73 | for i in range(len(data_list_5)): 74 | path = data_list_5[i] 75 | img = read_image(path) 76 | 77 | #불러온 이미지 batch에 저장 78 | batch_tuple.append((path, 4)) 79 | 80 | # ----------------- 6 81 | for i in range(len(data_list_6)): 82 | path = data_list_6[i] 83 | img = read_image(path) 84 | 85 | #불러온 이미지 batch에 저장 86 | batch_tuple.append((path, 5)) 87 | 88 | 89 | #섞은 후에 저장된 tuple을 풀어낸다 90 | random.shuffle(batch_tuple) 91 | #print(batch_tuple) 92 | 93 | #train:test 나눈다 94 | num = len(batch_tuple) 95 | train_num = math.floor(train_ratio*num) 96 | test_num = num - train_num 97 | 98 | 99 | #트레인, 테스트 나눔 100 | train_batch = batch_tuple[0:train_num] 101 | test_batch = batch_tuple[train_num:num] 102 | print(len(train_batch)) 103 | 104 | # 이미지를 numpy 형태로 받아야 한다. 105 | # BATCH_SIZE = len(data_list) 106 | 107 | train_image = np.zeros((train_num, IMG_HEIGHT, IMG_WIDTH, CHANNEL_N)) 108 | train_label = np.zeros((train_num, CLASS_N)) 109 | test_image = np.zeros((test_num, IMG_HEIGHT, IMG_WIDTH, CHANNEL_N)) 110 | test_label = np.zeros((test_num, CLASS_N)) 111 | 112 | # [TRAINING] numpy로 변환 113 | bat_idx = 0 114 | for path, label in train_batch: 115 | img = read_image(path) 116 | train_image[bat_idx,:, :,:] = img 117 | train_label[bat_idx, label] = 1 118 | bat_idx += 1 119 | 120 | # [TEST] numpy로 변환 121 | bat_idx = 0 122 | for path, label in test_batch: 123 | img = read_image(path) 124 | test_image[bat_idx, :, :, :] = img 125 | test_label[bat_idx, label] = 1 126 | bat_idx += 1 127 | 128 | print('[train_img]') 129 | print(train_image.shape) 130 | print('[test_img]') 131 | print(test_image.shape) 132 | print('[train_label]') 133 | print(train_label.shape) 134 | print('[test_label]') 135 | print(test_label.shape) 136 | 137 | save_np('train_img', train_image) 138 | save_np('train_label', train_label) 139 | save_np('test_img', test_image) 140 | save_np('test_label', test_label) 141 | 142 | 143 | 144 | 145 | def save_np(filename, data): 146 | np.save(filename, data) 147 | 148 | def load_np(filename): 149 | print('loading ' + filename + '......') 150 | return np.load(filename) 151 | 152 | def read_image_and_label(path): 153 | return read_image(path), read_label(path) 154 | 155 | def read_image(path): 156 | image = np.array(Image.open(path).convert('L')) 157 | image = image.astype(np.float32) 158 | image = image / 255.0 159 | image = np.expand_dims(image, axis=2) 160 | #image = image.reshape(IMG_HEIGHT, IMG_WIDTH, 1) 161 | return image 162 | 163 | 164 | def instance_norm(input, name="instance_norm"): 165 | with tf.variable_scope(name): 166 | depth = input.get_shape()[3] 167 | scale = tf.get_variable("scale", [depth], initializer=tf.random_normal_initializer(1.0, 0.02, dtype=tf.float32)) 168 | offset = tf.get_variable("offset", [depth], initializer=tf.constant_initializer(0.0)) 169 | mean, variance = tf.nn.moments(input, axes=[1,2], keep_dims=True) 170 | epsilon = 1e-5 171 | inv = tf.rsqrt(variance + epsilon) 172 | normalized = (input-mean)*inv 173 | return scale*normalized + offset 174 | 175 | 176 | # conv layer 177 | def conv2d(input_, output_dim, ks=4, s=2, stddev=0.02, padding='SAME', name="conv2d"): 178 | with tf.variable_scope(name): 179 | return slim.conv2d(input_, output_dim, ks, s, padding=padding, activation_fn=None, 180 | weights_initializer=tf.truncated_normal_initializer(stddev=stddev), 181 | biases_initializer=None) 182 | 183 | # relu를 수행한다. 184 | def lrelu(x, leak=0.2, name="lrelu"): 185 | return tf.maximum(x, leak*x) -------------------------------------------------------------------------------- /gaze_estimation/v2_tensorflow_model/test_sequences.py: -------------------------------------------------------------------------------- 1 | #import tensorflow as tf 2 | from opt import * 3 | from model import gazenetwork 4 | import random 5 | import math 6 | from glob import glob 7 | from PIL import Image, ImageDraw, ImageFont 8 | import matplotlib.pyplot as plt 9 | import time 10 | 11 | 12 | 13 | BATCH_SIZE = 256 14 | IMG_WIDTH = 120 15 | IMG_HEIGHT = 100 16 | CHANNEL_N = 1 17 | CLASS_N = 6 18 | 19 | 20 | def predict_imgs(): 21 | tf.logging.set_verbosity(tf.logging.INFO) 22 | # to avoid cuda memory out error 23 | gpu_options = tf.GPUOptions(allow_growth=True) 24 | config = tf.ConfigProto(gpu_options=gpu_options) 25 | 26 | # data load 27 | face_npy, img_list = load_imgs() 28 | IMG_NUM = len(img_list) 29 | 30 | # estimator 선언 31 | gaze_classifier = tf.estimator.Estimator(model_fn=gazenetwork, model_dir="./model", 32 | config=tf.contrib.learn.RunConfig(session_config=config)) 33 | 34 | 35 | 36 | # START 37 | img_template = None 38 | for i in range(IMG_NUM): 39 | test_data = face_npy[i, :, :, :] 40 | test_data = np.expand_dims(test_data, axis=0) 41 | 42 | # test 43 | test_input_fn = tf.estimator.inputs.numpy_input_fn( 44 | x={"x": test_data}, 45 | shuffle=False) 46 | #test_spec = tf.estimator.EvalSpec(input_fn=test_input_fn) 47 | 48 | predictions = gaze_classifier.predict(input_fn=test_input_fn) 49 | predictor = list(predictions) 50 | label = predictor[0]['classes'] + 1 51 | 52 | #draw pic 53 | draw_pic(img_template, img_list[i], label, i) 54 | 55 | #print(list(predictions)[0]['claasses']) 56 | 57 | 58 | 59 | 60 | def draw_pic(img_template, img_path, text, frameidx): 61 | 62 | 63 | plt.gcf().clear() 64 | image = Image.open(img_path) 65 | draw = ImageDraw.Draw(image) 66 | (x, y) = (10, 10) 67 | font = ImageFont.truetype('arial', size=125) 68 | message = str(text) 69 | color = 'rgb(255, 255, 255)' # black color 70 | draw.text((x, y), message, fill=color, font=font) 71 | #plt.imshow(image) 72 | 73 | if img_template is None: 74 | img_template = plt.imshow(image) 75 | else: 76 | img_template.set_data(image) 77 | 78 | plt.pause(0.1) 79 | 80 | #im = plt.imshow(image, animated=True) 81 | plt.draw() 82 | 83 | 84 | 85 | ''' 86 | def load_imgs(): 87 | BASE_DIR = "F:/2-2/cv/proj_gaze/sequences/4" 88 | face_dir = BASE_DIR + "/face/*.jpg" 89 | img_dir = BASE_DIR + "/entire/*.jpg" 90 | 91 | face_list = glob(face_dir) 92 | img_list = glob(img_dir) 93 | 94 | IMG_NUM = len(img_list) 95 | test_image = np.zeros((IMG_NUM, IMG_HEIGHT, IMG_WIDTH, CHANNEL_N)) 96 | 97 | # LOOP START 98 | bat_idx = 0 99 | for path in face_list: 100 | img = read_image(path) 101 | test_image[bat_idx,:, :,:] = img 102 | bat_idx += 1 103 | 104 | 105 | 106 | 107 | return test_image, img_list 108 | ''' 109 | 110 | def read_image(path): 111 | image = np.array(Image.open(path).convert('L')) 112 | image = image.astype(np.float32) 113 | image = image / 255.0 114 | image = np.expand_dims(image, axis=2) 115 | return image 116 | 117 | # main func 118 | def main(unused_argv): 119 | #load_imgs() 120 | predict_imgs() 121 | 122 | 123 | if __name__ == "__main__": 124 | tf.app.run() -------------------------------------------------------------------------------- /gaze_estimation/v2_tensorflow_model/train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from opt import * 3 | from model import gazenetwork 4 | 5 | def test(): 6 | tf.logging.set_verbosity(tf.logging.INFO) 7 | # to avoid cuda memory out error 8 | gpu_options = tf.GPUOptions(allow_growth=True) 9 | config = tf.ConfigProto(gpu_options=gpu_options) 10 | 11 | # data load 12 | eval_data, eval_label = load_img_and_label_from_npy('test_img.npy', 'test_label.npy') 13 | eval_label = np.argmax(eval_label, axis=1) 14 | print(eval_data[3]) 15 | print(eval_label[3]) 16 | print('npy loaded') 17 | 18 | # estimator 선언 19 | gaze_classifier = tf.estimator.Estimator(model_fn=gazenetwork, model_dir="./model", 20 | config=tf.contrib.learn.RunConfig(session_config=config)) 21 | 22 | # eval 23 | eval_input_fn = tf.estimator.inputs.numpy_input_fn( 24 | x={"x": eval_data}, 25 | y=eval_label, 26 | 27 | num_epochs=1, 28 | shuffle=False) 29 | eval_results = gaze_classifier.evaluate(input_fn=eval_input_fn) 30 | print(eval_results) 31 | 32 | def train(): 33 | # load_images() 34 | 35 | tf.logging.set_verbosity(tf.logging.INFO) 36 | # to avoid cuda memory out error 37 | gpu_options = tf.GPUOptions(allow_growth=True) 38 | config = tf.ConfigProto(gpu_options=gpu_options) 39 | 40 | # == ESTIMATOR 에 들어갈 input_fn 역시 조건 있다. 41 | # input_fn의 조건은 datrue과 label data 반환을 목적으로 한다 42 | train_data, train_label = load_img_and_label_from_npy('train_img.npy', 'train_label.npy') 43 | train_label = np.argmax(train_label, axis=1) 44 | print('npy loaded') 45 | 46 | train_input_fn = tf.estimator.inputs.numpy_input_fn( 47 | x={"x": train_data}, 48 | y=train_label, 49 | batch_size=712, num_epochs=None, shuffle=True) 50 | print('input_fn craeated') 51 | 52 | # == ESTIMATOR 학습을 위한 model_fn에 파라미터 등등에 대해 조건이 필요하다 53 | # 54 | # (features, labels, mode, params, config) 인데, features와 labels는 반드시 필수 55 | # 56 | # tf.estimator.EstimatorSpecwor 57 | 58 | # == ESTIMATOR의 model_dir은 학습 파라미터가 저장된다 그리고 config도 들어가고.. 59 | gaze_classifier = tf.estimator.Estimator(model_fn=gazenetwork, model_dir="./model", 60 | config=tf.contrib.learn.RunConfig(session_config=config)) 61 | print('estimator craeated') 62 | 63 | # recording logs 64 | log_tensor = {"loss" : "loss"} 65 | #logging_hook = tf.train.LoggingTensorHook({"loss": loss, 66 | # "accuracy": accuracy}, every_n_iter=10) 67 | 68 | log_hook = tf.train.LoggingTensorHook(tensors=log_tensor, every_n_iter=50) 69 | 70 | # train 71 | print('start train') 72 | gaze_classifier.train(input_fn=train_input_fn, steps=100000) 73 | 74 | def make_db(): 75 | load_images() 76 | 77 | #main func 78 | def main(unused_argv): 79 | #make_db() 80 | #test() 81 | train() 82 | 83 | 84 | 85 | if __name__ == "__main__": 86 | tf.app.run() -------------------------------------------------------------------------------- /gaze_estimation/v3_pytorch_model/config.py: -------------------------------------------------------------------------------- 1 | class Config(object): 2 | lr = 0.001 3 | 4 | # 'LIGHT' or 'HEAVY' or 'HEAVY+ATT' or 'MORE_LIGHT' 5 | #use_model_type = 'HEAVY+ATT' 6 | use_model_type = 'MORE_LIGHT' 7 | 8 | alpha = 2 9 | batch_size = 200 10 | global_img_size = [100, 120] 11 | local_img_size = [100, 80] 12 | schedule = [150, 225] 13 | gamma = 0.1 14 | print_iter = 5 15 | save_epoch = 10 16 | 17 | data_path = 'D:/-----/cropped_fld_and_face' 18 | save_path = 'save_checks_more_light' 19 | 20 | max_epoch = 200 21 | gpus = "0" 22 | class_num = 6 23 | momentum= 0.9 24 | weight_decay = 5e-4 -------------------------------------------------------------------------------- /gaze_estimation/v3_pytorch_model/gaze_model_heavy_ver.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | class Estimator(nn.Module): 7 | def __init__(self, use_attention_map=False): 8 | super(Estimator, self).__init__() 9 | 10 | self.global_estimator = Global_Estimator(use_attention_map) 11 | self.local_estimator = Local_Estimator(use_attention_map) 12 | self.use_attention_map = use_attention_map 13 | 14 | #if use_mtcnn: 15 | # self.final_fc = nn.Linear(1024 + 512 + 136, 6) 16 | #else: 17 | self.final_fc = nn.Linear(4000 + 1000, 6) 18 | 19 | 20 | 21 | def forward(self, input_x, input_local_x, flds=None): 22 | 23 | g_output = self.global_estimator(input_x) 24 | l_output = self.local_estimator(input_local_x) 25 | 26 | output = self.final_fc(torch.cat([g_output, l_output], dim=1)) 27 | 28 | return output 29 | 30 | 31 | 32 | 33 | # ------------------------------------- GLOBAL --------------------- 34 | class Global_Estimator(nn.Module): 35 | def __init__(self, use_attention=False): 36 | super(Global_Estimator, self).__init__() 37 | 38 | input_dim = 1 39 | 40 | self.use_attention = use_attention 41 | self.lrelu = nn.LeakyReLU(0.2) 42 | self.drop = nn.Dropout(0.5) 43 | self.pool = nn.MaxPool2d(2) 44 | self.pool3 = nn.MaxPool2d(3, 2) 45 | 46 | 47 | if self.use_attention: 48 | self.conv1_att = conv2d_block(40, 1, 3, 1, 1) 49 | self.conv2_att = conv2d_block(70, 1, 3, 1, 1) 50 | self.conv3_att = conv2d_block(60, 1, 3, 1, 1) 51 | self.conv4_att = conv2d_block(80, 1, 3, 1, 1) 52 | self.conv5_att = conv2d_block(100, 1, 3, 1, 1) 53 | 54 | # 120 x 180 55 | self.conv1 = conv2d_block(input_dim, 40, 7, 2, 0) 56 | self.norm_1 = nn.InstanceNorm2d(40) 57 | 58 | # 60 x 90 59 | self.conv2 = conv2d_block(40, 70, 5, 2, 1) 60 | self.norm_2 = nn.InstanceNorm2d(70) 61 | 62 | # 30 x 45 63 | self.conv3 = conv2d_block(70, 60, 3, 1, 0) 64 | self.norm_3 = nn.InstanceNorm2d(60) 65 | 66 | self.conv4 = conv2d_block(60, 80, 3, 1, 0) 67 | self.norm_4 = nn.InstanceNorm2d(80) 68 | 69 | self.conv5 = conv2d_block(80, 100, 3, 1, 0) 70 | self.norm_5 = nn.InstanceNorm2d(100) 71 | 72 | self.fc1 = nn.Linear((80 * 7 * 6) + (100 * 7 * 6), 4000) 73 | 74 | 75 | def forward(self, x): 76 | 77 | # input : B x C x 120 x 100 78 | x = F.pad(x, (53, 53, 63, 63)) # [left, right, top, bot] 79 | x = self.lrelu(self.conv1(x)) 80 | if self.use_attention: 81 | x_att1 = self.conv1_att(x) 82 | x = x_att1 * x 83 | x = self.norm_1(x) 84 | x = self.pool3(x) 85 | 86 | # B x C x 59 x 49 87 | x = F.pad(x, (25, 25, 30, 30)) # [left, right, top, bot] 88 | x = self.lrelu(self.conv2(x)) 89 | if self.use_attention: 90 | x_att2 = self.conv2_att(x) 91 | x = x_att2 * x 92 | x = self.norm_2(x) 93 | x = self.pool(x) 94 | 95 | # B x C x 29 x 24 96 | x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot] 97 | x = self.lrelu(self.conv3(x)) 98 | if self.use_attention: 99 | x_att3 = self.conv3_att(x) 100 | x = x_att3 * x 101 | x = self.norm_3(x) 102 | x = self.pool(x) 103 | 104 | # B x C x 14 x 12 105 | x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot] 106 | x = self.lrelu(self.conv4(x)) 107 | if self.use_attention: 108 | x_att4 = self.conv4_att(x) 109 | x = x_att4 * x 110 | x = self.norm_4(x) 111 | x = self.pool(x) 112 | x_41 = x.view(x.size()[0], -1) 113 | 114 | 115 | # B x C x 7 x 6 116 | x = F.pad(x, (1, 1, 1, 1)) 117 | x = self.lrelu(self.conv5(x)) 118 | if self.use_attention: 119 | x_att5 = self.conv5_att(x) 120 | x = x_att5 * x 121 | x = self.norm_5(x) 122 | x_51 = x.view(x.size()[0], -1) 123 | 124 | # concat 41 & 51 125 | x = self.fc1(torch.cat((x_41, x_51), dim=1)) 126 | #x = self.fc2(x) 127 | 128 | return x 129 | 130 | 131 | 132 | 133 | 134 | # ------------------------------------- LOCAL --------------------- 135 | class Local_Estimator(nn.Module): 136 | def __init__(self, use_attention=False): 137 | super(Local_Estimator, self).__init__() 138 | 139 | 140 | input_dim = 1 141 | self.use_attention = use_attention 142 | 143 | self.lrelu = nn.LeakyReLU(0.2) 144 | self.drop = nn.Dropout(0.5) 145 | self.pool = nn.MaxPool2d(2) 146 | self.pool3 = nn.MaxPool2d(3, 2) 147 | 148 | 149 | # att maps 150 | if self.use_attention: 151 | self.conv1_att = conv2d_block(40, 1, 3, 1, 1) 152 | self.conv2_att = conv2d_block(70, 1, 3, 1, 1) 153 | self.conv3_att = conv2d_block(60, 1, 3, 1, 1) 154 | self.conv4_att = conv2d_block(80, 1, 3, 1, 1) 155 | self.conv5_att = conv2d_block(100, 1, 3, 1, 1) 156 | 157 | 158 | # 120 x 180 159 | self.conv1 = conv2d_block(input_dim, 40, 7, 2, 0) 160 | self.norm_1 = nn.InstanceNorm2d(40) 161 | 162 | # 60 x 90 163 | self.conv2 = conv2d_block(40, 70, 5, 2, 1) 164 | self.norm_2 = nn.InstanceNorm2d(70) 165 | 166 | # 30 x 45 167 | self.conv3 = conv2d_block(70, 60, 3, 1, 0) 168 | self.norm_3 = nn.InstanceNorm2d(60) 169 | 170 | self.conv4 = conv2d_block(60, 80, 3, 1, 0) 171 | self.norm_4 = nn.InstanceNorm2d(80) 172 | 173 | self.conv5 = conv2d_block(80, 100, 3, 1, 0) 174 | self.norm_5 = nn.InstanceNorm2d(100) 175 | 176 | self.fc1 = nn.Linear((80 * 5 * 6) + (100 * 5 * 6), 1000) 177 | 178 | 179 | def forward(self, x): 180 | # input : B x C x 50 x 100 181 | x = F.pad(x, (53, 53, 28, 28)) # [left, right, top, bot] 182 | x = self.lrelu(self.conv1(x)) 183 | if self.use_attention: 184 | x_att1 = self.conv1_att(x) 185 | x = x_att1 * x 186 | x = self.norm_1(x) 187 | x = self.pool3(x) 188 | 189 | # B x C x 25 x 50 190 | x = F.pad(x, (25, 25, 30, 30)) # [left, right, top, bot] 191 | x = self.lrelu(self.conv2(x)) 192 | if self.use_attention: 193 | x_att2 = self.conv2_att(x) 194 | x = x_att2 * x 195 | x = self.norm_2(x) 196 | x = self.pool(x) 197 | 198 | # B x C x 12 x 25 199 | x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot] 200 | x = self.lrelu(self.conv3(x)) 201 | if self.use_attention: 202 | x_att3 = self.conv3_att(x) 203 | x = x_att3 * x 204 | x = self.norm_3(x) 205 | x = self.pool(x) 206 | 207 | # B x C x 6 x 12 208 | x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot] 209 | x = self.lrelu(self.conv4(x)) 210 | if self.use_attention: 211 | x_att4 = self.conv4_att(x) 212 | x = x_att4 * x 213 | x = self.norm_4(x) 214 | x = self.pool(x) 215 | x_41 = x.view(x.size()[0], -1) 216 | 217 | 218 | # B x C x 3 x 6 219 | x = F.pad(x, (1, 1, 1, 1)) 220 | x = self.lrelu(self.conv5(x)) 221 | if self.use_attention: 222 | x_att5 = self.conv5_att(x) 223 | x = x_att5 * x 224 | x = self.norm_5(x) 225 | #print("51b" + str(x.size())) 226 | x_51 = x.view(x.size()[0], -1) 227 | 228 | # concat 41 & 51 229 | x = self.fc1(torch.cat((x_41, x_51), dim=1)) 230 | 231 | return x 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | # ------ conv blocks ----------- 241 | 242 | class conv2d_block(nn.Module): 243 | def __init__(self, input_dim, output_dim, kernel_size=4, stride=2, padding=0, stddev=0.02): 244 | super(conv2d_block, self).__init__() 245 | 246 | self.conv = nn.Conv2d(input_dim, output_dim, kernel_size, stride, 247 | padding=padding) 248 | def forward(self, x): 249 | return self.conv(x) 250 | 251 | -------------------------------------------------------------------------------- /gaze_estimation/v3_pytorch_model/gaze_model_light_ver.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | class Estimator(nn.Module): 7 | def __init__(self, use_mtcnn=False): 8 | super(Estimator, self).__init__() 9 | 10 | self.global_estimator = Global_Estimator() 11 | self.use_mtcnn = use_mtcnn 12 | 13 | 14 | def forward(self, input_x, flds=None): 15 | 16 | output = self.global_estimator(input_x) 17 | return output 18 | 19 | 20 | 21 | 22 | # ------------------------------------- GLOBAL --------------------- 23 | class Global_Estimator(nn.Module): 24 | def __init__(self): 25 | super(Global_Estimator, self).__init__() 26 | 27 | input_dim = 1 28 | cnum = 16 29 | 30 | 31 | self.lrelu = nn.LeakyReLU(0.2) 32 | self.drop = nn.Dropout(0.5) 33 | self.pool = nn.MaxPool2d(2) 34 | self.pool3 = nn.MaxPool2d(3, 2) 35 | 36 | 37 | # 120 x 180 38 | self.conv1 = conv2d_block(input_dim, 20, 7, 2, 0) 39 | self.norm_1 = nn.InstanceNorm2d(20) 40 | 41 | # 60 x 90 42 | self.conv2 = conv2d_block(20, 32, 5, 2, 1) 43 | self.norm_2 = nn.InstanceNorm2d(32) 44 | 45 | # 30 x 45 46 | self.conv3 = conv2d_block(32, 30, 3, 1, 0) 47 | self.norm_3 = nn.InstanceNorm2d(30) 48 | 49 | self.conv4 = conv2d_block(30, 20, 3, 1, 0) 50 | self.norm_4 = nn.InstanceNorm2d(20) 51 | 52 | self.conv5 = conv2d_block(20, 50, 3, 1, 0) 53 | self.norm_5 = nn.InstanceNorm2d(50) 54 | 55 | self.fc1 = nn.Linear((20 * 7 * 6) + (50 * 7 * 6), 2000) 56 | self.fc2 = nn.Linear(2000, 6) 57 | 58 | 59 | def forward(self, x): 60 | #print("ORIG -" + str(x.size())) 61 | x = F.pad(x, (53, 53, 63, 63)) # [left, right, top, bot] 62 | x = self.lrelu(self.conv1(x)) 63 | x = self.norm_1(x) 64 | x = self.pool3(x) 65 | 66 | x = F.pad(x, (25, 25, 30, 30)) # [left, right, top, bot] 67 | x = self.lrelu(self.conv2(x)) 68 | x = self.norm_2(x) 69 | x = self.pool(x) 70 | 71 | x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot] 72 | x = self.lrelu(self.conv3(x)) 73 | x = self.norm_3(x) 74 | x = self.pool(x) 75 | 76 | x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot] 77 | x = self.lrelu(self.conv4(x)) 78 | x = self.norm_4(x) 79 | x = self.pool(x) 80 | x_41 = x.view(x.size()[0], -1) 81 | 82 | x = F.pad(x, (1, 1, 1, 1)) 83 | x = self.lrelu(self.conv5(x)) 84 | x = self.norm_5(x) 85 | x_51 = x.view(x.size()[0], -1) 86 | 87 | # concat 41 & 51 88 | x = self.fc1(torch.cat((x_41, x_51), dim=1)) 89 | x = self.fc2(x) 90 | 91 | return x 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | # ------ conv blocks ----------- 100 | 101 | class conv2d_block(nn.Module): 102 | def __init__(self, input_dim, output_dim, kernel_size=4, stride=2, padding=0, stddev=0.02): 103 | super(conv2d_block, self).__init__() 104 | 105 | self.conv = nn.Conv2d(input_dim, output_dim, kernel_size, stride, 106 | padding=padding) 107 | def forward(self, x): 108 | return self.conv(x) 109 | 110 | -------------------------------------------------------------------------------- /gaze_estimation/v3_pytorch_model/ir_data.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch.utils.data as data 3 | from os import listdir 4 | import os 5 | import random 6 | import torch 7 | import cv2 8 | from PIL import Image 9 | import numpy as np 10 | 11 | import torchvision.transforms as transforms 12 | 13 | def is_image_file(filename): 14 | IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif'] 15 | filename_lower = filename.lower() 16 | return any(filename_lower.endswith(extension) for extension in IMG_EXTENSIONS) 17 | 18 | def is_usable_gaze(filename): 19 | GAZE_ZONES = ['part_1', 'part_3', 'part_6', 'part_8', 'part_10', 'part_12'] 20 | filename_lower = filename.lower().split('.')[0] 21 | return any(filename_lower.endswith(gaze_zone) for gaze_zone in GAZE_ZONES) 22 | 23 | def img_loader(path): 24 | try: 25 | with open(path, 'rb') as f: 26 | img = cv2.imread(path, cv2.IMREAD_GRAYSCALE) 27 | 28 | return img 29 | except IOError: 30 | print('Cannot load image ' + path) 31 | 32 | class IR_FACE_Dataset(data.Dataset): 33 | def __init__(self, data_path, img_w, img_h, img_local_h, transform, loader=img_loader,\ 34 | with_subfolder=False, random_crop=True, read_fld=True, return_name=False): 35 | super(IR_FACE_Dataset, self).__init__() 36 | if with_subfolder: 37 | self.samples = self._find_samples_in_subfolders(data_path) 38 | else: 39 | self.samples = [x for x in listdir(data_path) if is_image_file(x)] 40 | 41 | 42 | 43 | # 44 | self.samples = [x for x in self.samples if is_usable_gaze(x)] 45 | ''' 46 | data_list_1 = glob('F:/DB/MOBIS/CROPPED_2/*part_1.jpg') #1 47 | data_list_2 = glob('F:/DB/MOBIS/CROPPED_2/*part_3.jpg') #2 48 | data_list_3 = glob('F:/DB/MOBIS/CROPPED_2/*part_6.jpg') #3 49 | data_list_4 = glob('F:/DB/MOBIS/CROPPED_2/*part_8.jpg') #4 50 | data_list_5 = glob('F:/DB/MOBIS/CROPPED_2/*part_10.jpg') #5 51 | data_list_6 = glob('F:/DB/MOBIS/CROPPED_2/*part_12.jpg') #6 52 | ''' 53 | 54 | self.data_path = data_path 55 | self.img_w = img_w 56 | self.img_h = img_h 57 | self.img_local_h = img_local_h 58 | self.transform = transform 59 | self.random_crop = random_crop 60 | self.return_name = return_name 61 | self.loader = loader 62 | 63 | # if true, read facial landmarks 64 | self.read_fld = read_fld 65 | 66 | 67 | print(str(len(self.samples)) + " items found") 68 | 69 | def __len__(self): 70 | return len(self.samples) 71 | 72 | def __getitem__(self, index): 73 | #path = os.path.join(self.data_path, self.samples[index]) 74 | 75 | path = self.data_path + '/' + self.samples[index] 76 | 77 | img = self.loader(path) 78 | w, h = img.shape[0], img.shape[1] 79 | 80 | # use fld? 81 | if self.read_fld: 82 | fld_file = path.replace("jpg", "txt") 83 | fld_fdes = open(fld_file, "r") 84 | flds = np.array(fld_fdes.read().split(), dtype=np.float32) 85 | flds = flds.reshape(68, 2) 86 | fld_fdes.close() 87 | 88 | # need resize? 89 | if w < self.img_w or h < self.img_h or w > self.img_w or h > self.img_h: 90 | 91 | if self.read_fld: 92 | w_ratio, h_ratio = self.img_w / w, self.img_h / h 93 | flds[:, 0] = flds[:, 0] * w_ratio 94 | flds[:, 1] = flds[:, 1] * h_ratio 95 | 96 | img = cv2.resize(img, (self.img_w, self.img_h), interpolation=cv2.INTER_AREA) 97 | 98 | 99 | local_img = img[0:self.img_local_h, 0:self.img_w] 100 | 101 | 102 | 103 | 104 | # pick class 105 | gaze_part = int(path.split('_')[-1].split('.')[0]) 106 | label_tensor = np.zeros([6]) 107 | 108 | ''' 109 | data_list_1 = glob('F:/DB/MOBIS/CROPPED_2/*part_1.jpg') #1 110 | data_list_2 = glob('F:/DB/MOBIS/CROPPED_2/*part_3.jpg') #2 111 | data_list_3 = glob('F:/DB/MOBIS/CROPPED_2/*part_6.jpg') #3 112 | data_list_4 = glob('F:/DB/MOBIS/CROPPED_2/*part_8.jpg') #4 113 | data_list_5 = glob('F:/DB/MOBIS/CROPPED_2/*part_10.jpg') #5 114 | data_list_6 = glob('F:/DB/MOBIS/CROPPED_2/*part_12.jpg') #6 115 | ''' 116 | if gaze_part == 1: 117 | gaze_class = 0 118 | label_tensor[0] = 1 119 | elif gaze_part == 3: 120 | gaze_class = 1 121 | label_tensor[1] = 1 122 | elif gaze_part == 6: 123 | gaze_class = 2 124 | label_tensor[2] = 1 125 | elif gaze_part == 8: 126 | gaze_class = 3 127 | label_tensor[3] = 1 128 | elif gaze_part == 10: 129 | gaze_class = 4 130 | label_tensor[4] = 1 131 | elif gaze_part == 12: 132 | gaze_class = 5 133 | label_tensor[5] = 1 134 | 135 | label_tensor = torch.LongTensor(label_tensor) 136 | #print(path + " --- " + gaze_class) 137 | 138 | 139 | if self.transform is not None: 140 | img = self.transform(img) 141 | local_img = self.transform(local_img) 142 | else: 143 | img = torch.from_numpy(img) 144 | local_img = torch.from_numpy(local_img) 145 | 146 | return img, local_img, label_tensor -------------------------------------------------------------------------------- /gaze_estimation/v3_pytorch_model/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision.transforms as transforms 4 | import torch.optim as optim 5 | 6 | import os 7 | import time 8 | 9 | from ir_data import IR_FACE_Dataset 10 | from config import Config 11 | import numpy as np 12 | 13 | from torch.utils.data import Dataset, DataLoader 14 | 15 | from utils import AverageMeter 16 | # ---------------------------------- 17 | if Config.use_model_type == 'LIGHT': 18 | from gaze_model_light_ver import Estimator 19 | elif Config.use_model_type == 'HEAVY' or Config.use_model_type == 'HEAVY+ATT': 20 | from gaze_model_heavy_ver import Estimator 21 | 22 | # ---------------------------------- 23 | 24 | def train(): 25 | torch.multiprocessing.freeze_support() 26 | train_transform = transforms.Compose([ 27 | transforms.ToTensor(), # range [0, 255] -> [0.0,1.0] 28 | ]) 29 | 30 | ir_dataset = IR_FACE_Dataset(data_path=Config.data_path, \ 31 | img_w=Config.global_img_size[0] ,img_h=Config.global_img_size[1], img_local_h=Config.local_img_size[1], \ 32 | transform=train_transform) 33 | ir_dataloader = DataLoader(ir_dataset, batch_size=Config.batch_size, \ 34 | shuffle=True, num_workers=1) 35 | 36 | device = torch.device("cuda") 37 | 38 | # checkpt dir 39 | if os.path.exists(Config.save_path) == False: 40 | os.makedirs(Config.save_path) 41 | 42 | # model 43 | if Config.use_model_type == 'HEAVY+ATT': 44 | model = Estimator(use_attention_map=True).cuda() 45 | else: 46 | model = Estimator().cuda() 47 | model = model.to(device) 48 | 49 | # opt 50 | criterion = nn.CrossEntropyLoss().cuda() 51 | optimizer = optim.SGD(model.parameters(), lr=Config.lr, momentum=Config.momentum, \ 52 | weight_decay=Config.weight_decay) 53 | 54 | 55 | for epoch_i in range(Config.max_epoch): 56 | model.train() 57 | 58 | #Config.lr = adjust_learning_rate_v2(optimizer, epoch_i - 1, Config) 59 | #for param_group in optimizer.param_groups: 60 | # param_group["lr"] = Config.lr 61 | 62 | iter_max = ir_dataset.__len__() // Config.batch_size 63 | 64 | # for print 65 | data_time = AverageMeter() 66 | losses = AverageMeter() 67 | top1 = AverageMeter() 68 | top5 = AverageMeter() 69 | end = time.time() 70 | 71 | dataiter = iter(ir_dataloader) 72 | steps_per_epoch = iter_max + 1 73 | #for ii, data in enumerate(ir_dataloader): 74 | for ii in range(steps_per_epoch): 75 | 76 | data_time.update(time.time() - end) 77 | 78 | data_input, data_input_local, label = dataiter.next() 79 | data_input = data_input.to(device) 80 | targets = label.to(device) 81 | data_input_local = data_input_local.to(device) 82 | 83 | 84 | 85 | # optimizer step 86 | optimizer.zero_grad() 87 | outputs = model(data_input, data_input_local) 88 | loss = criterion(outputs, torch.argmax(targets, 1)) 89 | 90 | loss.backward() 91 | optimizer.step() 92 | 93 | # measure accuracy and record loss 94 | total = data_input.size(0) 95 | _, predicted = outputs.max(1) 96 | correct = predicted.eq(torch.argmax(targets,1)).sum().item() 97 | top1.update(100.*correct/total) 98 | 99 | losses.update(loss.item(), data_input.size(0)) 100 | 101 | 102 | end = time.time() 103 | 104 | if ii % Config.print_iter == 0: 105 | print('\nEpoch: [%d | %d], Iter : [%d | %d] LR: %f | Loss : %f | top1 : %.4f | batch_time : %.3f' \ 106 | % (epoch_i, Config.max_epoch, ii, iter_max + 1, Config.lr, losses.avg, top1.avg, data_time.val)) 107 | 108 | 109 | # measure elapsed time 110 | 111 | 112 | # save model 113 | if epoch_i % Config.save_epoch == 0: 114 | torch.save({'state_dict' : model.state_dict(), 'opt' : optimizer.state_dict()}, \ 115 | Config.save_path + "/check_" + str(epoch_i) + ".pth") 116 | 117 | 118 | 119 | # not using - 120 | def adjust_learning_rate(optimizer, epoch, config): 121 | global state 122 | if epoch in config.schedule: 123 | config.lr *= config.gamma 124 | for param_group in optimizer.param_groups: 125 | param_group['lr'] = config.lr 126 | 127 | def adjust_learning_rate_v2(optimizer, epoch, config): 128 | lr = config.lr * (0.1 ** (epoch // 10)) 129 | return lr 130 | 131 | if __name__ == '__main__': 132 | train() -------------------------------------------------------------------------------- /gaze_estimation/v3_pytorch_model/utils.py: -------------------------------------------------------------------------------- 1 | class AverageMeter(object): 2 | """Computes and stores the average and current value 3 | Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262 4 | """ 5 | def __init__(self): 6 | self.reset() 7 | 8 | def reset(self): 9 | self.val = 0 10 | self.avg = 0 11 | self.sum = 0 12 | self.count = 0 13 | 14 | def update(self, val, n=1): 15 | self.val = val 16 | self.sum += val * n 17 | self.count += n 18 | self.avg = self.sum / self.count 19 | 20 | 21 | # accuracy of gaze 22 | def accuracy(output, target, topk=(1,)): 23 | """Computes the precision@k for the specified values of k""" 24 | maxk = max(topk) 25 | batch_size = target.size(0) 26 | 27 | _, pred = output.topk(maxk, 1, True, True) 28 | pred = pred.t() 29 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 30 | 31 | 32 | res = [] 33 | for k in topk: 34 | correct_k = correct[:k].reshape(-1).float().sum(0) 35 | res.append(correct_k.mul_(100.0 / batch_size)) 36 | return res 37 | 38 | 39 | def data_from_captue(img, use_fld=False): 40 | img -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | # Using this code to force the usage of any specific GPUs 3 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 4 | import argparse 5 | import os 6 | import random 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.parallel 10 | import torch.backends.cudnn as cudnn 11 | import torch.optim as optim 12 | import torch.utils.data 13 | import torchvision.datasets as dset 14 | import torch.utils.data as data 15 | import time 16 | import numpy as np 17 | import torchvision.utils as vutils 18 | from torch.autograd import Variable 19 | from math import log10 20 | import torchvision 21 | import cv2 22 | import skimage 23 | import scipy.io 24 | import glob 25 | import matplotlib.image as mpimg 26 | import matplotlib.pyplot as plt 27 | from model import losses 28 | from model.networks import * 29 | from util.model_storage import save_checkpoint 30 | from data.dataloader import * 31 | 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument("--pretrained", default="./pretrained/weight.pth", type=str, help="path to pretrained model (default: none)") 34 | parser.add_argument("--batch_size", default="8", type=int, help="The path to store our batch_size") 35 | parser.add_argument("--image_dir", default="./data/test_img/", type=str, help="The path to store our batch_size") 36 | parser.add_argument("--image_list", default="./data/test_fileList.txt", type=str, help="The path to store our batch_size") 37 | 38 | global opt,model 39 | opt = parser.parse_args() 40 | 41 | fsrnet = define_G(input_nc = 3, output_nc = 3, ngf=64, which_model_netG=0) 42 | 43 | if torch.cuda.is_available(): 44 | fsrnet = fsrnet.cuda() 45 | 46 | if opt.pretrained: 47 | if os.path.isfile(opt.pretrained): 48 | print("=> loading model '{}'".format(opt.pretrained)) 49 | weights = torch.load(opt.pretrained) 50 | 51 | pretrained_dict = weights['model'].state_dict() 52 | model_dict = fsrnet.state_dict() 53 | 54 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 55 | model_dict.update(pretrained_dict) 56 | 57 | fsrnet.load_state_dict(model_dict) 58 | else: 59 | print("=> no model found at '{}'".format(opt.pretrained)) 60 | 61 | demo_dataset = TestDatasetFromFile( 62 | opt.image_list, 63 | opt.image_dir) 64 | test_data_loader = data.DataLoader(dataset=demo_dataset, batch_size=opt.batch_size, num_workers=8, drop_last=True, 65 | pin_memory=True) 66 | 67 | for iteration, batch in enumerate(test_data_loader): 68 | input = Variable(batch[0]) 69 | input = input.cuda() 70 | upscaled,boundaries,reconstructed = fsrnet(input) 71 | 72 | if not os.path.isdir('./test_result/Coarse_SR_network'): 73 | os.makedirs('./test_result/Coarse_SR_network') 74 | if not os.path.isdir('./test_result/Prior_Estimation'): 75 | os.makedirs('./test_result/Prior_Estimation') 76 | if not os.path.isdir('./test_result/Final_SR_reconstruction'): 77 | os.makedirs('./test_result/Final_SR_reconstruction') 78 | 79 | for index in range(opt.batch_size): 80 | final_output = reconstructed.permute(0,2,3,1).detach().cpu().numpy() 81 | final_output_0 = final_output[index,:,:,:] 82 | 83 | estimated_boundary = boundaries.permute(0,2,3,1).detach().cpu().numpy() 84 | estimated_boundary_0 = estimated_boundary[index,:,:,0] 85 | 86 | output = upscaled.permute(0,2,3,1).detach().cpu().numpy() 87 | output_0 = output[index,:,:,:] 88 | 89 | img_num = iteration*opt.batch_size + index 90 | 91 | scipy.misc.toimage(output_0 * 255, high=255, low=0, cmin=0, cmax=255).save( 92 | './test_result/Coarse_SR_network/%4d.jpg'% (img_num)) 93 | scipy.misc.toimage(estimated_boundary_0 * 255, high=255, low=0, cmin=0, cmax=255).save( 94 | './test_result/Prior_Estimation/%4d.jpg' % (img_num)) 95 | scipy.misc.toimage(final_output_0 * 255, high=255, low=0, cmin=0, cmax=255).save( 96 | './test_result/Final_SR_reconstruction/%4d.jpg' % (img_num)) 97 | #code minor changeB10 98 | 99 | -------------------------------------------------------------------------------- /webcam_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import os 4 | 5 | import cv2 6 | import numpy as np 7 | import torch 8 | import torch.backends.cudnn as cudnn 9 | 10 | from face_detection.model.prior_box import PriorBox 11 | from face_detection.model.retinaface import RetinaFace 12 | from face_detection.utils.misc import draw_keypoint, inference 13 | 14 | parser = argparse.ArgumentParser(description='PIMNet') 15 | parser.add_argument( 16 | '--checkpoint', type=str, 17 | default='face_detection/weights/mobilenet0.25_final.pt', 18 | help='Trained state_dict file path to open' 19 | ) 20 | parser.add_argument( 21 | '--cpu', action="store_true", default=False, 22 | help='Use cpu inference' 23 | ) 24 | parser.add_argument( 25 | '--jit', action="store_true", default=False, 26 | help='Use JIT' 27 | ) 28 | parser.add_argument( 29 | '--confidence-threshold', type=float, default=0.02, 30 | help='confidence_threshold' 31 | ) 32 | parser.add_argument( 33 | '--nms-threshold', type=float, default=0.4, 34 | help='nms_threshold' 35 | ) 36 | parser.add_argument( 37 | '--vis-thres', type=float, default=0.5, 38 | help='visualization_threshold' 39 | ) 40 | parser.add_argument( 41 | '-s', '--save-image', action="store_true", default=False, 42 | help='show detection results' 43 | ) 44 | parser.add_argument( 45 | '--save-dir', type=str, default='demo', 46 | help='Dir to save results' 47 | ) 48 | 49 | 50 | def main(): 51 | args = parser.parse_args() 52 | assert os.path.isfile(args.checkpoint) 53 | 54 | checkpoint = torch.load(args.checkpoint, map_location="cpu") 55 | cfg = checkpoint["config"] 56 | device = torch.device("cpu" if args.cpu else "cuda") 57 | 58 | # net and model 59 | detector = RetinaFace(**cfg) 60 | detector.load_state_dict(checkpoint["net_state_dict"]) 61 | detector.eval().requires_grad_(False) 62 | detector.to(device) 63 | print('Finished loading model!') 64 | cudnn.benchmark = True 65 | 66 | # prepare testing 67 | cap = cv2.VideoCapture(0) 68 | assert cap.isOpened() 69 | ret_val, img_tmp = cap.read() 70 | im_height, im_width, _ = img_tmp.shape 71 | scale = torch.Tensor([im_width, im_height, im_width, im_height]) 72 | scale = scale.to(device) 73 | 74 | scale1 = torch.Tensor([im_width, im_height] * 5) 75 | scale1 = scale1.to(device) 76 | 77 | priorbox = PriorBox(cfg, image_size=(im_height, im_width)) 78 | priors = priorbox.forward() 79 | priors = priors.to(device) 80 | prior_data = priors.data 81 | 82 | if args.jit: 83 | img_tmp = img_tmp.transpose(2, 0, 1) 84 | img_tmp = np.float32(img_tmp) 85 | img_tmp = torch.from_numpy(img_tmp).unsqueeze(0) 86 | dummy = img_tmp.to(device) 87 | detector = torch.jit.trace(detector, example_inputs=dummy) 88 | 89 | if args.save_image: 90 | nframe = 0 91 | fname = os.path.join(args.save_dir, "{:06d}.jpg") 92 | os.makedirs(args.save_dir, exist_ok=True) 93 | 94 | # testing begin 95 | ret_val, img_raw = cap.read() 96 | while ret_val: 97 | start = cv2.getTickCount() 98 | 99 | # NOTE preprocessing. 100 | dets = inference( 101 | detector, img_raw, scale, scale1, prior_data, cfg, 102 | args.confidence_threshold, args.nms_threshold, device 103 | ) 104 | 105 | fps = float(cv2.getTickFrequency() / (cv2.getTickCount() - start)) 106 | cv2.putText( 107 | img_raw, f"FPS: {fps:.1f}", (5, 15), 108 | cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255) 109 | ) 110 | 111 | # show image 112 | draw_keypoint(img_raw, dets, args.vis_thres) 113 | 114 | if args.save_image: 115 | cv2.imwrite(fname.format(nframe), img_raw) 116 | nframe += 1 117 | 118 | cv2.imshow("Webcam Demo", img_raw) 119 | if cv2.waitKey(1) == 27: # Press ESC button to quit. 120 | break 121 | 122 | ret_val, img_raw = cap.read() 123 | 124 | cap.release() 125 | cv2.destroyAllWindows() 126 | 127 | 128 | if __name__ == "__main__": 129 | main() --------------------------------------------------------------------------------