├── LICENSE
├── README.md
├── data
├── dataloader.py
└── test_fileList.txt
├── environment.yml
├── face_alignment
├── README.md
├── figure
│ ├── figure1.png
│ └── figure2.png
├── models
│ ├── ZF_deploy.prototxt
│ ├── ZF_local_solver.prototxt
│ ├── ZF_local_train.prototxt
│ ├── ZF_solver.prototxt
│ ├── ZF_train.prototxt
│ ├── list_train_global_front.txt
│ ├── list_train_global_left.txt
│ ├── list_train_global_right.txt
│ ├── list_train_init_semifront.txt
│ ├── mean_shapes.txt
│ ├── shape_parameter_U_front.txt
│ ├── shape_parameter_U_left.txt
│ ├── shape_parameter_U_right.txt
│ ├── shape_parameter_U_wild.txt
│ ├── shape_parameter_s_front.txt
│ ├── shape_parameter_s_left.txt
│ ├── shape_parameter_s_right.txt
│ ├── shape_parameter_s_wild.txt
│ ├── warped_mean_front.bmp
│ ├── warped_mean_left.bmp
│ └── warped_mean_right.bmp
└── python
│ ├── fa_util.py
│ ├── fa_util_train.py
│ ├── face_alignment.py
│ ├── make_wild_input.py
│ └── test_300w_public.py
├── face_detection
├── .gitignore
├── LICENSE.MIT
├── NOTICE
├── README.md
├── convert_to_onnx.py
├── data
│ ├── FDDB
│ │ └── img_list.txt
│ ├── __init__.py
│ ├── config.py
│ ├── data_augment.py
│ └── wider_face.py
├── detect.py
├── environment.yml
├── model
│ ├── multibox_loss.py
│ ├── networks.py
│ ├── prior_box.py
│ └── retinaface.py
├── test_fddb.py
├── test_widerface.py
├── train_detector.py
├── utils
│ ├── __init__.py
│ ├── box_utils.py
│ ├── misc.py
│ └── timer.py
├── webcam_demo.py
├── weights
│ ├── mobilenet0.25_final.pt
│ └── mobilenet0.25_pretrain.pt
└── widerface_evaluate
│ ├── README.md
│ ├── box_overlaps.pyx
│ ├── evaluation.py
│ ├── ground_truth
│ ├── wider_easy_val.mat
│ ├── wider_face_val.mat
│ ├── wider_hard_val.mat
│ └── wider_medium_val.mat
│ ├── setup.py
│ └── widerface_txt
│ ├── 24--Soldier_Firing
│ ├── 24_Soldier_Firing_Soldier_Firing_24_10.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_1037.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_115.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_129.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_133.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_15.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_254.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_264.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_268.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_281.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_315.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_329.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_368.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_372.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_405.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_431.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_523.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_540.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_601.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_633.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_644.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_67.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_691.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_702.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_703.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_763.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_812.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_824.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_887.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_890.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_901.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_904.txt
│ ├── 24_Soldier_Firing_Soldier_Firing_24_931.txt
│ └── 24_Soldier_Firing_Soldier_Firing_24_95.txt
│ └── 40--Gymnastics
│ ├── 40_Gymnastics_Gymnastics_40_1022.txt
│ ├── 40_Gymnastics_Gymnastics_40_1035.txt
│ ├── 40_Gymnastics_Gymnastics_40_1043.txt
│ ├── 40_Gymnastics_Gymnastics_40_1044.txt
│ ├── 40_Gymnastics_Gymnastics_40_108.txt
│ ├── 40_Gymnastics_Gymnastics_40_115.txt
│ ├── 40_Gymnastics_Gymnastics_40_138.txt
│ ├── 40_Gymnastics_Gymnastics_40_156.txt
│ ├── 40_Gymnastics_Gymnastics_40_161.txt
│ ├── 40_Gymnastics_Gymnastics_40_171.txt
│ ├── 40_Gymnastics_Gymnastics_40_175.txt
│ ├── 40_Gymnastics_Gymnastics_40_197.txt
│ ├── 40_Gymnastics_Gymnastics_40_24.txt
│ ├── 40_Gymnastics_Gymnastics_40_255.txt
│ ├── 40_Gymnastics_Gymnastics_40_260.txt
│ ├── 40_Gymnastics_Gymnastics_40_273.txt
│ ├── 40_Gymnastics_Gymnastics_40_274.txt
│ ├── 40_Gymnastics_Gymnastics_40_285.txt
│ ├── 40_Gymnastics_Gymnastics_40_331.txt
│ ├── 40_Gymnastics_Gymnastics_40_361.txt
│ ├── 40_Gymnastics_Gymnastics_40_364.txt
│ ├── 40_Gymnastics_Gymnastics_40_389.txt
│ └── 40_Gymnastics_Gymnastics_40_401.txt
├── face_recognition
├── config.py
├── model_atari.py
├── test.py
└── train.py
├── gaze_estimation
├── README.md
├── example_movie
│ └── media2_slow.avi
├── v1_caffe_model
│ ├── ir_gaze_deploy.prototxt
│ ├── ir_gaze_solver.prototxt
│ └── ir_gaze_train_val.prototxt
├── v2_tensorflow_model
│ ├── model.py
│ ├── opt.py
│ ├── test_sequences.py
│ └── train.py
└── v3_pytorch_model
│ ├── config.py
│ ├── gaze_model_heavy_ver.py
│ ├── gaze_model_light_ver.py
│ ├── ir_data.py
│ ├── train.py
│ └── utils.py
├── test.py
└── webcam_demo.py
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PIMNet_Internal_Environment_Recognition
2 | ## Overview
3 | This project is open software for internal environment recognition for ADAS.
4 | This project includes:
5 | - Face Detection
6 | - Face Landmarks Detection / Face Alignment
7 | - Gaze Estimation
8 | - Face Recognition
9 |
10 | ###
11 | Project page : http://imlab.postech.ac.kr/opensw.htm
12 |
--------------------------------------------------------------------------------
/data/dataloader.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import torch
3 | import torch.utils.data as data
4 | from os import listdir
5 | import os
6 | from os.path import join
7 | from PIL import Image, ImageOps
8 | import random
9 | import torchvision.transforms as transforms
10 | import cv2
11 | import numpy as np
12 | from torch.autograd import Variable
13 | import matplotlib.pyplot as plt
14 |
15 |
16 | def loadFromFile(path, datasize):
17 | if path is None:
18 | return None, None
19 |
20 | # print("Load from file %s" % path)
21 | f = open(path)
22 | data = []
23 | for idx in range(0, datasize):
24 | line = f.readline()
25 | line = line[:-1]
26 | data.append(line)
27 | f.close()
28 | return data
29 |
30 |
31 | def load_lr_hr_prior(file_path, input_height=128, input_width=128, output_height=128, output_width=128, is_mirror=False,
32 | is_gray=True, scale=8.0, is_scale_back=True, is_parsing_map=True):
33 | if input_width is None:
34 | input_width = input_height
35 | if output_width is None:
36 | output_width = output_height
37 |
38 | img = cv2.imread(file_path)
39 | # img = Image.open(file_path)
40 |
41 | if is_gray is False:
42 | b, g, r = cv2.split(img)
43 | img = cv2.merge([r, g, b])
44 | if is_gray is True:
45 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
46 |
47 | if is_mirror and random.randint(0, 1) is 0:
48 | img = ImageOps.mirror(img)
49 |
50 | if input_height is not None:
51 | img = cv2.resize(img, (input_width, input_height), interpolation=cv2.INTER_CUBIC)
52 |
53 | if is_parsing_map:
54 | str = ['skin.png','lbrow.png','rbrow.png','leye.png','reye.png','lear.png','rear.png','nose.png','mouth.png','ulip.png','llip.png']
55 |
56 | hms = np.zeros((64, 64, 128))
57 |
58 | for i in range(len(str)):
59 | (onlyfilePath, img_name) = os.path.split(file_path)
60 | full_name = onlyfilePath + "/Parsing_Maps/" + img_name[:-4] + "_"+ str[i]
61 | hm = cv2.imread(full_name, cv2.IMREAD_GRAYSCALE)
62 | hm_resized = cv2.resize(hm, (64, 64), interpolation=cv2.INTER_CUBIC) / 255.0
63 | hms[:, :, i] = hm_resized
64 | hms[:, :, i+11] = hm_resized
65 | hms[:, :, i+22] = hm_resized
66 | hms[:, :, i+33] = hm_resized
67 | hms[:, :, i+44] = hm_resized
68 | hms[:, :, i+55] = hm_resized
69 | hms[:, :, i+66] = hm_resized
70 | hms[:, :, i+77] = hm_resized
71 | hms[:, :, i+88] = hm_resized
72 | hms[:, :, i+99] = hm_resized
73 | hms[:, :, i+110] = hm_resized
74 | is_bigger = i+121 < 128
75 | if is_bigger:
76 | hms[:, :, i+121] = hm_resized
77 |
78 |
79 | img = cv2.resize(img, (output_width, output_height), interpolation=cv2.INTER_CUBIC)
80 | img_lr = cv2.resize(img, (int(output_width / scale), int(output_height / scale)), interpolation=cv2.INTER_CUBIC)
81 |
82 | if is_scale_back:
83 | img_lr = cv2.resize(img_lr, (output_width, output_height), interpolation=cv2.INTER_CUBIC)
84 | return img_lr, img, hms
85 | else:
86 | return img_lr, img, hms
87 |
88 | def load_lr(file_path, input_height=128, input_width=128, output_height=128, output_width=128, is_mirror=False,
89 | is_gray=True, scale=8.0, is_scale_back=True, is_parsing_map=True):
90 | if input_width is None:
91 | input_width = input_height
92 | if output_width is None:
93 | output_width = output_height
94 |
95 | img = cv2.imread(file_path)
96 | # img = Image.open(file_path)
97 |
98 | if is_gray is False:
99 | b, g, r = cv2.split(img)
100 | img = cv2.merge([r, g, b])
101 | if is_gray is True:
102 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
103 |
104 | if is_mirror and random.randint(0, 1) is 0:
105 | img = ImageOps.mirror(img)
106 |
107 | img = cv2.resize(img, (output_width, output_height), interpolation=cv2.INTER_CUBIC)
108 | img_lr = cv2.resize(img, (int(output_width / scale), int(output_height / scale)), interpolation=cv2.INTER_CUBIC)
109 | hms = np.zeros((64, 64, 128))
110 |
111 | if is_scale_back:
112 | img_lr = cv2.resize(img_lr, (output_width, output_height), interpolation=cv2.INTER_CUBIC)
113 | return img_lr, img, hms
114 | else:
115 | return img_lr, img, hms
116 |
117 |
118 | class ImageDatasetFromFile(data.Dataset):
119 | def __init__(self, image_list, img_path, input_height=128, input_width=128, output_height=128, output_width=128,
120 | is_mirror=False, is_gray=False, upscale=8.0, is_scale_back=True, is_parsing_map=True):
121 | super(ImageDatasetFromFile, self).__init__()
122 |
123 | self.image_filenames = image_list
124 | self.upscale = upscale
125 | self.is_mirror = is_mirror
126 | self.img_path = img_path
127 | self.input_height = input_height
128 | self.input_width = input_width
129 | self.output_height = output_height
130 | self.output_width = output_width
131 | self.is_scale_back = is_scale_back
132 | self.is_gray = is_gray
133 | self.is_parsing_map = is_parsing_map
134 |
135 | self.input_transform = transforms.Compose([
136 | transforms.ToTensor()])
137 |
138 | def __getitem__(self, idx):
139 |
140 | if self.is_mirror:
141 | is_mirror = random.randint(0, 1) is 0
142 | else:
143 | is_mirror = False
144 |
145 | image_filenames = loadFromFile(self.image_filenames, len(open(self.image_filenames, 'r').readlines()))
146 | fullpath = join(self.img_path, image_filenames[idx])
147 |
148 | lr, hr, pm = load_lr_hr_prior(fullpath,
149 | self.input_height, self.input_width, self.output_height, self.output_width,
150 | self.is_mirror, self.is_gray, self.upscale, self.is_scale_back,
151 | self.is_parsing_map)
152 |
153 | input = self.input_transform(lr)
154 | target = self.input_transform(hr)
155 | parsing_map = self.input_transform(pm)
156 |
157 | return input, target, parsing_map
158 |
159 | def __len__(self):
160 | return len(open(self.image_filenames, 'rU').readlines())
161 |
162 |
163 | class TestDatasetFromFile(data.Dataset):
164 | def __init__(self, image_list, img_path, input_height=128, input_width=128, output_height=128, output_width=128,
165 | is_mirror=False, is_gray=False, upscale=8.0, is_scale_back=True, is_parsing_map=True):
166 | super(TestDatasetFromFile, self).__init__()
167 |
168 | self.image_filenames = image_list
169 | self.upscale = upscale
170 | self.is_mirror = is_mirror
171 | self.img_path = img_path
172 | self.input_height = input_height
173 | self.input_width = input_width
174 | self.output_height = output_height
175 | self.output_width = output_width
176 | self.is_scale_back = is_scale_back
177 | self.is_gray = is_gray
178 | self.is_parsing_map = is_parsing_map
179 |
180 | self.input_transform = transforms.Compose([
181 | transforms.ToTensor()])
182 |
183 | def __getitem__(self, idx):
184 |
185 | if self.is_mirror:
186 | is_mirror = random.randint(0, 1) is 0
187 | else:
188 | is_mirror = False
189 |
190 | image_filenames = loadFromFile(self.image_filenames, len(open(self.image_filenames, 'r').readlines()))
191 | fullpath = join(self.img_path, image_filenames[idx])
192 |
193 | lr, hr, pm = load_lr(fullpath,
194 | self.input_height, self.input_width, self.output_height, self.output_width,
195 | self.is_mirror, self.is_gray, self.upscale, self.is_scale_back,
196 | self.is_parsing_map)
197 |
198 | input = self.input_transform(lr)
199 | target = self.input_transform(hr)
200 | parsing_map = self.input_transform(pm)
201 |
202 |
203 | return input, target, parsing_map
204 |
205 | def __len__(self):
206 | return len(open(self.image_filenames, 'rU').readlines())
207 |
208 |
209 | # demo_dataset = ImageDatasetFromFile("/home/cydia/文档/毕业设计/make_Face_boundary/81_landmarks/fileList.txt",
210 | # "/home/cydia/图片/sample/")
211 | #
212 | # train_data_loader = data.DataLoader(dataset=demo_dataset, batch_size=1, num_workers=8)
213 |
214 | if __name__ == '__main__':
215 | for titer, batch in enumerate(train_data_loader):
216 | input, target, heatmaps = Variable(batch[0]), Variable(batch[1]), Variable(batch[2])
217 |
218 | Input = input.permute(0, 2, 3, 1).cpu().data.numpy()
219 | Target = target.permute(0, 2, 3, 1).cpu().data.numpy()
220 | Parsing_maps = heatmaps.permute(0, 2, 3, 1).cpu().data.numpy()
221 |
222 | plt.figure("Input Image")
223 | plt.imshow(Input[0, :, :, :])
224 | plt.axis('on')
225 | plt.title('image')
226 | plt.show()
227 |
228 | plt.figure("Target Image")
229 | plt.imshow(Target[0, :, :, :])
230 | plt.axis('on')
231 | plt.title('Target')
232 | plt.show()
233 |
234 | plt.figure("HMS")
235 | plt.imshow(Parsing_maps[0, :, :, 0])
236 | plt.axis('on')
237 | plt.title('OMS')
238 | plt.show()
239 |
--------------------------------------------------------------------------------
/data/test_fileList.txt:
--------------------------------------------------------------------------------
1 | 0.jpg
2 | 1.jpg
3 | 2.jpg
4 | 3.jpg
5 | 4.jpg
6 | 5.jpg
7 | 6.jpg
8 | 7.jpg
9 | 8.jpg
10 | 9.jpg
11 | 10.jpg
12 | 11.jpg
13 | 12.jpg
14 | 13.jpg
15 | 14.jpg
16 | 15.jpg
17 | 16.jpg
18 | 17.jpg
19 | 18.jpg
20 | 19.jpg
21 | 20.jpg
22 | 21.jpg
23 | 22.jpg
24 | 23.jpg
25 | 24.jpg
26 | 25.jpg
27 | 26.jpg
28 | 27.jpg
29 | 28.jpg
30 | 29.jpg
31 | 30.jpg
32 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: pimnet
2 | channels:
3 | - pytorch
4 | dependencies:
5 | - cudatoolkit=11.3
6 | - matplotlib
7 | - pip
8 | - python=3.9
9 | - pytorch::pytorch=1.10.1
10 | - pytorch::torchvision
11 | - scikit-image
12 | - scipy
13 | - tqdm
14 | - pip:
15 | - opencv-python
--------------------------------------------------------------------------------
/face_alignment/README.md:
--------------------------------------------------------------------------------
1 | # Stage-wise Face Alignment using Global and Local Regressors
2 |
3 | This is a caffe-python implementation on Windows 10 for face alignment.
4 |
5 | We implemented two-kind of methods.
6 |
7 | Method1 repeat global and local regression after initialization regression
8 |

9 |
10 | Method2 repeat local refinement regression after initialization regression
11 | 
12 |
13 | ## Evaluation on 300w public test set
14 |
15 |
16 | | Method | Common | Challenging | Full |
17 | |:-------|:--------:|:-----:|:-------:|
18 | | Stage(Projection) | 8.24 | 12.56 | 9.07 |
19 | | Stage(Adjustment) | 6.25 | 10.16 | 7.02 |
20 | | Stage(Global1) | 4.66 | 8.20 | 5.35 |
21 | | Stage(Local1) | 3.45 | 6.49 | 4.05 |
22 | | Stage(Global2) | 3.59 | 6.62 | 4.18 |
23 | | Stage(Local2) | 3.29 | 6.14 | 3.85 |
24 | | Stage(Global3) | 3.48 | 6.37 | 4.05 |
25 | | Stage(Local3) | 3.28 | 6.09 | 3.83 |
26 | | Regression(Wild, simple net) | 4.07 | 6.90 | 4.62 |
27 | | Regression(Wild, ResNet50) | 3.72 | 6.44 | 4.25 |
28 |
29 |
30 | ## Usage
31 |
32 | ### For Training
33 | 1. Clone the repository
34 | ```
35 | git clone https://github.com/hyunsungP/facelignmentregression
36 | ```
37 |
38 | 2. make data files (.h5)
39 | ```
40 | make_wild_input.py
41 | ```
42 | and so on.
43 |
44 | 3. make data file list \
45 | Refer to models/list_train_*.txt
46 |
47 | 4. training \
48 | On console window with caffe
49 | ```
50 | caffe train --solver=models/ZF_solver.prototxt --gpu=0
51 | ```
52 |
53 | Other network are same.
54 |
55 | ### For Testing
56 | Change prototxt path in the source code.
57 | ```
58 | test_300w_public.py
59 | ```
60 |
61 | Other models will be uploaded.
62 |
63 |
--------------------------------------------------------------------------------
/face_alignment/figure/figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/figure/figure1.png
--------------------------------------------------------------------------------
/face_alignment/figure/figure2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/figure/figure2.png
--------------------------------------------------------------------------------
/face_alignment/models/ZF_deploy.prototxt:
--------------------------------------------------------------------------------
1 | name: "FA_ZF_68"
2 |
3 | #------------------------------- input ----------------------------
4 | input: "img"
5 | input_dim: 1
6 | input_dim: 3
7 | input_dim: 224
8 | input_dim: 224
9 |
10 | layer {
11 | name: "scale_and_shift"
12 | bottom: "img"
13 | top: "scale_and_shift"
14 | type: "Scale"
15 | param{
16 | lr_mult: 0
17 | decay_mult: 0
18 | }
19 | param{
20 | lr_mult: 0
21 | decay_mult: 0
22 | }
23 | scale_param{
24 | filler{
25 | type: "constant"
26 | value: 0.00392156862745
27 | }
28 | bias_term: true
29 | bias_filler {
30 | type: "constant"
31 | value: 0
32 | }
33 | }
34 | }
35 | # ----------------------- ZF -------------------
36 | layer {
37 | name: "conv1"
38 | type: "Convolution"
39 | bottom: "scale_and_shift"
40 | top: "conv1"
41 | param {
42 | #learning rate
43 | lr_mult: 1.0
44 | }
45 | param {
46 | lr_mult: 2.0
47 | }
48 | convolution_param {
49 | num_output: 96
50 | kernel_size: 7
51 | pad: 3
52 | stride: 2
53 | weight_filler {
54 | type: "gaussian"
55 | std: 0.01
56 | }
57 | bias_filler {
58 | type: "constant"
59 | value: 0
60 | }
61 | }
62 | }
63 |
64 | layer {
65 | name: "relu1"
66 | type: "ReLU"
67 | bottom: "conv1"
68 | top: "conv1"
69 | }
70 |
71 | layer {
72 | name: "norm1"
73 | type: "LRN"
74 | bottom: "conv1"
75 | top: "norm1"
76 | lrn_param {
77 | local_size: 3
78 | alpha: 0.00005
79 | beta: 0.75
80 | norm_region: WITHIN_CHANNEL
81 | }
82 | }
83 |
84 | layer {
85 | name: "pool1"
86 | type: "Pooling"
87 | bottom: "norm1"
88 | top: "pool1"
89 | pooling_param {
90 | kernel_size: 3
91 | stride: 2
92 | pad: 1
93 | pool: MAX
94 | }
95 | }
96 |
97 | layer {
98 | name: "conv2"
99 | type: "Convolution"
100 | bottom: "pool1"
101 | top: "conv2"
102 | param {
103 | lr_mult: 1.0
104 | }
105 | param {
106 | lr_mult: 2.0
107 | }
108 | convolution_param {
109 | num_output: 256
110 | kernel_size: 5
111 | pad: 2
112 | stride: 2
113 | weight_filler {
114 | type: "gaussian"
115 | std: 0.01
116 | }
117 | bias_filler {
118 | type: "constant"
119 | value: 1
120 | }
121 | }
122 | }
123 |
124 | layer {
125 | name: "relu2"
126 | type: "ReLU"
127 | bottom: "conv2"
128 | top: "conv2"
129 | }
130 |
131 | layer {
132 | name: "norm2"
133 | type: "LRN"
134 | bottom: "conv2"
135 | top: "norm2"
136 | lrn_param {
137 | local_size: 3
138 | alpha: 0.00005
139 | beta: 0.75
140 | norm_region: WITHIN_CHANNEL
141 | }
142 | }
143 |
144 | layer {
145 | name: "pool2"
146 | type: "Pooling"
147 | bottom: "norm2"
148 | top: "pool2"
149 | pooling_param {
150 | kernel_size: 3
151 | stride: 2
152 | pad: 1
153 | pool: MAX
154 | }
155 | }
156 |
157 | layer {
158 | name: "conv3"
159 | type: "Convolution"
160 | bottom: "pool2"
161 | top: "conv3"
162 | param {
163 | lr_mult: 1.0
164 | }
165 | param {
166 | lr_mult: 2.0
167 | }
168 | convolution_param {
169 | num_output: 384
170 | kernel_size: 3
171 | pad: 1
172 | stride: 1
173 | weight_filler {
174 | type: "gaussian"
175 | std: 0.01
176 | }
177 | bias_filler {
178 | type: "constant"
179 | value: 0
180 | }
181 | }
182 | }
183 |
184 | layer {
185 | name: "relu3"
186 | type: "ReLU"
187 | bottom: "conv3"
188 | top: "conv3"
189 | }
190 |
191 | layer {
192 | name: "conv4"
193 | type: "Convolution"
194 | bottom: "conv3"
195 | top: "conv4"
196 | param {
197 | lr_mult: 1.0
198 | }
199 | param {
200 | lr_mult: 2.0
201 | }
202 | convolution_param {
203 | num_output: 384
204 | kernel_size: 3
205 | pad: 1
206 | stride: 1
207 | weight_filler {
208 | type: "gaussian"
209 | std: 0.01
210 | }
211 | bias_filler {
212 | type: "constant"
213 | value: 1
214 | }
215 | }
216 | }
217 |
218 | layer {
219 | name: "relu4"
220 | type: "ReLU"
221 | bottom: "conv4"
222 | top: "conv4"
223 | }
224 |
225 | layer {
226 | name: "conv5"
227 | type: "Convolution"
228 | bottom: "conv4"
229 | top: "conv5"
230 | param {
231 | lr_mult: 1.0
232 | }
233 | param {
234 | lr_mult: 2.0
235 | }
236 | convolution_param {
237 | num_output: 256
238 | kernel_size: 3
239 | pad: 1
240 | stride: 1
241 | weight_filler {
242 | type: "gaussian"
243 | std: 0.01
244 | }
245 | bias_filler {
246 | type: "constant"
247 | value: 1
248 | }
249 | }
250 | }
251 |
252 | layer {
253 | name: "relu5"
254 | type: "ReLU"
255 | bottom: "conv5"
256 | top: "conv5"
257 | }
258 |
259 | #-----------------------layer +-------------------------
260 |
261 | layer {
262 | name: "ip1"
263 | type: "InnerProduct"
264 | bottom: "conv5"
265 | top: "ip1"
266 | inner_product_param {
267 | num_output: 1024
268 | weight_filler {
269 | type: "xavier"
270 | }
271 | }
272 | }
273 | layer {
274 | name: "relu1"
275 | type: "ReLU"
276 | bottom: "ip1"
277 | top: "ip1"
278 | }
279 | layer {
280 | name: "ip2"
281 | type: "InnerProduct"
282 | bottom: "ip1"
283 | top: "ip2"
284 | inner_product_param {
285 | num_output: 1024
286 | weight_filler {
287 | type: "xavier"
288 | }
289 | }
290 | }
291 | layer {
292 | name: "relu2"
293 | type: "ReLU"
294 | bottom: "ip2"
295 | top: "ip2"
296 | }
297 |
298 |
299 | layer {
300 | name: "fc136"
301 | type: "InnerProduct"
302 | bottom: "ip2"
303 | top: "fc136"
304 | inner_product_param {
305 | num_output: 136
306 | weight_filler {
307 | type: "xavier"
308 | }
309 | }
310 | }
311 |
--------------------------------------------------------------------------------
/face_alignment/models/ZF_local_solver.prototxt:
--------------------------------------------------------------------------------
1 | net: "models/ZF_local_train.prototxt"
2 | base_lr: 0.001
3 | lr_policy: "step"
4 | gamma: 0.1
5 | stepsize: 70000
6 | display: 20
7 | max_iter: 200000
8 | momentum: 0.9
9 | weight_decay: 0.0005
10 | ## We disable standard caffe solver snapshotting and implement our own snapshot
11 | #snapshot: 0
12 | snapshot: 10000
13 | snapshot_prefix: "E:/FA/FA_CNN_HS22/caffemodels/FA_ZF_local"
14 | #debug_info: true
15 |
16 |
--------------------------------------------------------------------------------
/face_alignment/models/ZF_local_train.prototxt:
--------------------------------------------------------------------------------
1 | name: "FA_ZF_local"
2 |
3 | #------------------------------- input ----------------------------
4 | layer {
5 | name: "data"
6 | type: "HDF5Data"
7 | top: "patch"
8 | top: "move"
9 | hdf5_data_param {
10 | source: "models/list_train_local.txt"
11 | batch_size: 24
12 | }
13 | }
14 |
15 | layer {
16 | name: "scale_and_shift"
17 | bottom: "patch"
18 | top: "scale_and_shift"
19 | type: "Scale"
20 | param{
21 | lr_mult: 0
22 | decay_mult: 0
23 | }
24 | param{
25 | lr_mult: 0
26 | decay_mult: 0
27 | }
28 | scale_param{
29 | filler{
30 | type: "constant"
31 | value: 1
32 | }
33 | bias_term: true
34 | bias_filler {
35 | type: "constant"
36 | value: -128
37 | }
38 | }
39 | }
40 |
41 | layer {
42 | name: "flatdata"
43 | type: "Flatten"
44 | bottom: "move"
45 | top: "flatdata"
46 | }
47 |
48 |
49 | #------------------------------- split ----------------------------
50 | layer {
51 | name: "slicer"
52 | type: "Slice"
53 | bottom: "scale_and_shift"
54 | top: "patch_slice_1"
55 | top: "patch_slice_2"
56 | top: "patch_slice_3"
57 | top: "patch_slice_4"
58 | top: "patch_slice_5"
59 | top: "patch_slice_6"
60 | top: "patch_slice_7"
61 | top: "patch_slice_8"
62 | top: "patch_slice_9"
63 | top: "patch_slice_10"
64 | top: "patch_slice_11"
65 | top: "patch_slice_12"
66 | top: "patch_slice_13"
67 | top: "patch_slice_14"
68 | top: "patch_slice_15"
69 | top: "patch_slice_16"
70 | top: "patch_slice_17"
71 | top: "patch_slice_18"
72 | top: "patch_slice_19"
73 | top: "patch_slice_20"
74 | top: "patch_slice_21"
75 | top: "patch_slice_22"
76 | top: "patch_slice_23"
77 | top: "patch_slice_24"
78 | top: "patch_slice_25"
79 | top: "patch_slice_26"
80 | top: "patch_slice_27"
81 | top: "patch_slice_28"
82 | top: "patch_slice_29"
83 | top: "patch_slice_30"
84 | top: "patch_slice_31"
85 | top: "patch_slice_32"
86 | top: "patch_slice_33"
87 | top: "patch_slice_34"
88 | top: "patch_slice_35"
89 | top: "patch_slice_36"
90 | top: "patch_slice_37"
91 | top: "patch_slice_38"
92 | top: "patch_slice_39"
93 | top: "patch_slice_40"
94 | top: "patch_slice_41"
95 | top: "patch_slice_42"
96 | top: "patch_slice_43"
97 | top: "patch_slice_44"
98 | top: "patch_slice_45"
99 | top: "patch_slice_46"
100 | top: "patch_slice_47"
101 | top: "patch_slice_48"
102 | top: "patch_slice_49"
103 | top: "patch_slice_50"
104 | top: "patch_slice_51"
105 | top: "patch_slice_52"
106 | top: "patch_slice_53"
107 | top: "patch_slice_54"
108 | top: "patch_slice_55"
109 | top: "patch_slice_56"
110 | top: "patch_slice_57"
111 | top: "patch_slice_58"
112 | top: "patch_slice_59"
113 | top: "patch_slice_60"
114 | top: "patch_slice_61"
115 | top: "patch_slice_62"
116 | top: "patch_slice_63"
117 | top: "patch_slice_64"
118 | top: "patch_slice_65"
119 | top: "patch_slice_66"
120 | top: "patch_slice_67"
121 | top: "patch_slice_68"
122 | slice_param {
123 | axis:1
124 | slice_point: 3
125 | slice_point: 6
126 | slice_point: 9
127 | slice_point: 12
128 | slice_point: 15
129 | slice_point: 18
130 | slice_point: 21
131 | slice_point: 24
132 | slice_point: 27
133 | slice_point: 30
134 | slice_point: 33
135 | slice_point: 36
136 | slice_point: 39
137 | slice_point: 42
138 | slice_point: 45
139 | slice_point: 48
140 | slice_point: 51
141 | slice_point: 54
142 | slice_point: 57
143 | slice_point: 60
144 | slice_point: 63
145 | slice_point: 66
146 | slice_point: 69
147 | slice_point: 72
148 | slice_point: 75
149 | slice_point: 78
150 | slice_point: 81
151 | slice_point: 84
152 | slice_point: 87
153 | slice_point: 90
154 | slice_point: 93
155 | slice_point: 96
156 | slice_point: 99
157 | slice_point: 102
158 | slice_point: 105
159 | slice_point: 108
160 | slice_point: 111
161 | slice_point: 114
162 | slice_point: 117
163 | slice_point: 120
164 | slice_point: 123
165 | slice_point: 126
166 | slice_point: 129
167 | slice_point: 132
168 | slice_point: 135
169 | slice_point: 138
170 | slice_point: 141
171 | slice_point: 144
172 | slice_point: 147
173 | slice_point: 150
174 | slice_point: 153
175 | slice_point: 156
176 | slice_point: 159
177 | slice_point: 162
178 | slice_point: 165
179 | slice_point: 168
180 | slice_point: 171
181 | slice_point: 174
182 | slice_point: 177
183 | slice_point: 180
184 | slice_point: 183
185 | slice_point: 186
186 | slice_point: 189
187 | slice_point: 192
188 | slice_point: 195
189 | slice_point: 198
190 | slice_point: 201
191 | }
192 | }
193 |
194 | # ----------------------- ZF -------------------
195 | layer {
196 | name: "conv1"
197 | type: "Convolution"
198 | bottom: "scale_and_shift"
199 | top: "conv1"
200 | param {
201 | #learning rate
202 | lr_mult: 1.0
203 | }
204 | param {
205 | lr_mult: 2.0
206 | }
207 | convolution_param {
208 | num_output: 96
209 | kernel_size: 7
210 | pad: 3
211 | stride: 2
212 | weight_filler {
213 | type: "gaussian"
214 | std: 0.001
215 | }
216 | bias_filler {
217 | type: "constant"
218 | value: 0
219 | }
220 | }
221 | }
222 |
223 | layer {
224 | name: "relu1"
225 | type: "ReLU"
226 | bottom: "conv1"
227 | top: "conv1"
228 | }
229 |
230 | layer {
231 | name: "norm1"
232 | type: "LRN"
233 | bottom: "conv1"
234 | top: "norm1"
235 | lrn_param {
236 | local_size: 3
237 | alpha: 0.00005
238 | beta: 0.75
239 | norm_region: WITHIN_CHANNEL
240 | }
241 | }
242 |
243 | layer {
244 | name: "pool1"
245 | type: "Pooling"
246 | bottom: "norm1"
247 | top: "pool1"
248 | pooling_param {
249 | kernel_size: 3
250 | stride: 2
251 | pad: 1
252 | pool: MAX
253 | }
254 | }
255 |
256 | layer {
257 | name: "conv2"
258 | type: "Convolution"
259 | bottom: "pool1"
260 | top: "conv2"
261 | param {
262 | lr_mult: 1.0
263 | }
264 | param {
265 | lr_mult: 2.0
266 | }
267 | convolution_param {
268 | num_output: 256
269 | kernel_size: 5
270 | pad: 2
271 | stride: 2
272 | weight_filler {
273 | type: "gaussian"
274 | std: 0.001
275 | }
276 | bias_filler {
277 | type: "constant"
278 | value: 1
279 | }
280 | }
281 | }
282 |
283 | layer {
284 | name: "relu2"
285 | type: "ReLU"
286 | bottom: "conv2"
287 | top: "conv2"
288 | }
289 |
290 | layer {
291 | name: "norm2"
292 | type: "LRN"
293 | bottom: "conv2"
294 | top: "norm2"
295 | lrn_param {
296 | local_size: 3
297 | alpha: 0.00005
298 | beta: 0.75
299 | norm_region: WITHIN_CHANNEL
300 | }
301 | }
302 |
303 | layer {
304 | name: "pool2"
305 | type: "Pooling"
306 | bottom: "norm2"
307 | top: "pool2"
308 | pooling_param {
309 | kernel_size: 3
310 | stride: 2
311 | pad: 1
312 | pool: MAX
313 | }
314 | }
315 |
316 | layer {
317 | name: "conv3"
318 | type: "Convolution"
319 | bottom: "pool2"
320 | top: "conv3"
321 | param {
322 | lr_mult: 1.0
323 | }
324 | param {
325 | lr_mult: 2.0
326 | }
327 | convolution_param {
328 | num_output: 384
329 | kernel_size: 3
330 | pad: 1
331 | stride: 1
332 | weight_filler {
333 | type: "gaussian"
334 | std: 0.001
335 | }
336 | bias_filler {
337 | type: "constant"
338 | value: 0
339 | }
340 | }
341 | }
342 |
343 | layer {
344 | name: "relu3"
345 | type: "ReLU"
346 | bottom: "conv3"
347 | top: "conv3"
348 | }
349 |
350 | layer {
351 | name: "conv4"
352 | type: "Convolution"
353 | bottom: "conv3"
354 | top: "conv4"
355 | param {
356 | lr_mult: 1.0
357 | }
358 | param {
359 | lr_mult: 2.0
360 | }
361 | convolution_param {
362 | num_output: 384
363 | kernel_size: 3
364 | pad: 1
365 | stride: 1
366 | weight_filler {
367 | type: "gaussian"
368 | std: 0.001
369 | }
370 | bias_filler {
371 | type: "constant"
372 | value: 1
373 | }
374 | }
375 | }
376 |
377 | layer {
378 | name: "relu4"
379 | type: "ReLU"
380 | bottom: "conv4"
381 | top: "conv4"
382 | }
383 |
384 | layer {
385 | name: "conv5"
386 | type: "Convolution"
387 | bottom: "conv4"
388 | top: "conv5"
389 | param {
390 | lr_mult: 1.0
391 | }
392 | param {
393 | lr_mult: 2.0
394 | }
395 | convolution_param {
396 | num_output: 256
397 | kernel_size: 3
398 | pad: 1
399 | stride: 1
400 | weight_filler {
401 | type: "gaussian"
402 | std: 0.001
403 | }
404 | bias_filler {
405 | type: "constant"
406 | value: 1
407 | }
408 | }
409 | }
410 |
411 | layer {
412 | name: "relu5"
413 | type: "ReLU"
414 | bottom: "conv5"
415 | top: "conv5"
416 | }
417 |
418 | #-----------------------layer +-------------------------
419 |
420 | layer {
421 | name: "ip1"
422 | type: "InnerProduct"
423 | bottom: "conv5"
424 | top: "ip1"
425 | inner_product_param {
426 | num_output: 1024
427 | weight_filler {
428 | type: "xavier"
429 | }
430 | }
431 | }
432 | layer {
433 | name: "relu1"
434 | type: "ReLU"
435 | bottom: "ip1"
436 | top: "ip1"
437 | }
438 | layer {
439 | name: "ip2"
440 | type: "InnerProduct"
441 | bottom: "ip1"
442 | top: "ip2"
443 | inner_product_param {
444 | num_output: 1024
445 | weight_filler {
446 | type: "xavier"
447 | }
448 | }
449 | }
450 | layer {
451 | name: "relu2"
452 | type: "ReLU"
453 | bottom: "ip2"
454 | top: "ip2"
455 | }
456 |
457 |
458 | layer {
459 | name: "fc136"
460 | type: "InnerProduct"
461 | bottom: "ip2"
462 | top: "fc136"
463 | inner_product_param {
464 | num_output: 136
465 | weight_filler {
466 | type: "xavier"
467 | }
468 | }
469 | }
470 |
471 | #------------------------------- loss ----------------------------
472 |
473 | layer {
474 | name: "out"
475 | type: "EuclideanLoss"
476 | bottom: "fc136"
477 | bottom: "flatdata"
478 | top: "out"
479 | loss_weight: 1
480 | }
481 |
--------------------------------------------------------------------------------
/face_alignment/models/ZF_solver.prototxt:
--------------------------------------------------------------------------------
1 | net: "models/ZF_train.prototxt"
2 | base_lr: 0.001
3 | lr_policy: "step"
4 | gamma: 0.1
5 | stepsize: 70000
6 | display: 20
7 | max_iter: 200000
8 | momentum: 0.9
9 | weight_decay: 0.0005
10 | ## We disable standard caffe solver snapshotting and implement our own snapshot
11 | #snapshot: 0
12 | snapshot: 10000
13 | snapshot_prefix: "E:/FA/FA_CNN_HS20/caffemodels/FA_ZF_baseline"
14 | #debug_info: true
15 |
16 |
--------------------------------------------------------------------------------
/face_alignment/models/ZF_train.prototxt:
--------------------------------------------------------------------------------
1 | name: "FA_ZF_68"
2 |
3 | #------------------------------- input ----------------------------
4 | layer {
5 | name: "data"
6 | type: "HDF5Data"
7 | top: "img"
8 | top: "pts"
9 | hdf5_data_param {
10 | source: "models/list_train_wild.txt"
11 | batch_size: 32
12 | }
13 | }
14 |
15 |
16 | layer {
17 | name: "scale_and_shift"
18 | bottom: "img"
19 | top: "scale_and_shift"
20 | type: "Scale"
21 | param{
22 | lr_mult: 0
23 | decay_mult: 0
24 | }
25 | param{
26 | lr_mult: 0
27 | decay_mult: 0
28 | }
29 | scale_param{
30 | filler{
31 | type: "constant"
32 | value: 0.00392156862745
33 | }
34 | bias_term: true
35 | bias_filler {
36 | type: "constant"
37 | value: 0
38 | }
39 | }
40 | }
41 |
42 | layer {
43 | name: "flatdata"
44 | type: "Flatten"
45 | bottom: "pts"
46 | top: "flatdata"
47 | }
48 | # ----------------------- ZF -------------------
49 | layer {
50 | name: "conv1"
51 | type: "Convolution"
52 | bottom: "scale_and_shift"
53 | top: "conv1"
54 | param {
55 | #learning rate
56 | lr_mult: 1.0
57 | }
58 | param {
59 | lr_mult: 2.0
60 | }
61 | convolution_param {
62 | num_output: 96
63 | kernel_size: 7
64 | pad: 3
65 | stride: 2
66 | weight_filler {
67 | type: "gaussian"
68 | std: 0.001
69 | }
70 | bias_filler {
71 | type: "constant"
72 | value: 0
73 | }
74 | }
75 | }
76 |
77 | layer {
78 | name: "relu1"
79 | type: "ReLU"
80 | bottom: "conv1"
81 | top: "conv1"
82 | }
83 |
84 | layer {
85 | name: "norm1"
86 | type: "LRN"
87 | bottom: "conv1"
88 | top: "norm1"
89 | lrn_param {
90 | local_size: 3
91 | alpha: 0.00005
92 | beta: 0.75
93 | norm_region: WITHIN_CHANNEL
94 | }
95 | }
96 |
97 | layer {
98 | name: "pool1"
99 | type: "Pooling"
100 | bottom: "norm1"
101 | top: "pool1"
102 | pooling_param {
103 | kernel_size: 3
104 | stride: 2
105 | pad: 1
106 | pool: MAX
107 | }
108 | }
109 |
110 | layer {
111 | name: "conv2"
112 | type: "Convolution"
113 | bottom: "pool1"
114 | top: "conv2"
115 | param {
116 | lr_mult: 1.0
117 | }
118 | param {
119 | lr_mult: 2.0
120 | }
121 | convolution_param {
122 | num_output: 256
123 | kernel_size: 5
124 | pad: 2
125 | stride: 2
126 | weight_filler {
127 | type: "gaussian"
128 | std: 0.001
129 | }
130 | bias_filler {
131 | type: "constant"
132 | value: 1
133 | }
134 | }
135 | }
136 |
137 | layer {
138 | name: "relu2"
139 | type: "ReLU"
140 | bottom: "conv2"
141 | top: "conv2"
142 | }
143 |
144 | layer {
145 | name: "norm2"
146 | type: "LRN"
147 | bottom: "conv2"
148 | top: "norm2"
149 | lrn_param {
150 | local_size: 3
151 | alpha: 0.00005
152 | beta: 0.75
153 | norm_region: WITHIN_CHANNEL
154 | }
155 | }
156 |
157 | layer {
158 | name: "pool2"
159 | type: "Pooling"
160 | bottom: "norm2"
161 | top: "pool2"
162 | pooling_param {
163 | kernel_size: 3
164 | stride: 2
165 | pad: 1
166 | pool: MAX
167 | }
168 | }
169 |
170 | layer {
171 | name: "conv3"
172 | type: "Convolution"
173 | bottom: "pool2"
174 | top: "conv3"
175 | param {
176 | lr_mult: 1.0
177 | }
178 | param {
179 | lr_mult: 2.0
180 | }
181 | convolution_param {
182 | num_output: 384
183 | kernel_size: 3
184 | pad: 1
185 | stride: 1
186 | weight_filler {
187 | type: "gaussian"
188 | std: 0.001
189 | }
190 | bias_filler {
191 | type: "constant"
192 | value: 0
193 | }
194 | }
195 | }
196 |
197 | layer {
198 | name: "relu3"
199 | type: "ReLU"
200 | bottom: "conv3"
201 | top: "conv3"
202 | }
203 |
204 | layer {
205 | name: "conv4"
206 | type: "Convolution"
207 | bottom: "conv3"
208 | top: "conv4"
209 | param {
210 | lr_mult: 1.0
211 | }
212 | param {
213 | lr_mult: 2.0
214 | }
215 | convolution_param {
216 | num_output: 384
217 | kernel_size: 3
218 | pad: 1
219 | stride: 1
220 | weight_filler {
221 | type: "gaussian"
222 | std: 0.001
223 | }
224 | bias_filler {
225 | type: "constant"
226 | value: 1
227 | }
228 | }
229 | }
230 |
231 | layer {
232 | name: "relu4"
233 | type: "ReLU"
234 | bottom: "conv4"
235 | top: "conv4"
236 | }
237 |
238 | layer {
239 | name: "conv5"
240 | type: "Convolution"
241 | bottom: "conv4"
242 | top: "conv5"
243 | param {
244 | lr_mult: 1.0
245 | }
246 | param {
247 | lr_mult: 2.0
248 | }
249 | convolution_param {
250 | num_output: 256
251 | kernel_size: 3
252 | pad: 1
253 | stride: 1
254 | weight_filler {
255 | type: "gaussian"
256 | std: 0.001
257 | }
258 | bias_filler {
259 | type: "constant"
260 | value: 1
261 | }
262 | }
263 | }
264 |
265 | layer {
266 | name: "relu5"
267 | type: "ReLU"
268 | bottom: "conv5"
269 | top: "conv5"
270 | }
271 |
272 | #-----------------------layer +-------------------------
273 |
274 | layer {
275 | name: "ip1"
276 | type: "InnerProduct"
277 | bottom: "conv5"
278 | top: "ip1"
279 | inner_product_param {
280 | num_output: 1024
281 | weight_filler {
282 | type: "xavier"
283 | }
284 | }
285 | }
286 | layer {
287 | name: "relu1"
288 | type: "ReLU"
289 | bottom: "ip1"
290 | top: "ip1"
291 | }
292 | layer {
293 | name: "ip2"
294 | type: "InnerProduct"
295 | bottom: "ip1"
296 | top: "ip2"
297 | inner_product_param {
298 | num_output: 1024
299 | weight_filler {
300 | type: "xavier"
301 | }
302 | }
303 | }
304 | layer {
305 | name: "relu2"
306 | type: "ReLU"
307 | bottom: "ip2"
308 | top: "ip2"
309 | }
310 |
311 |
312 | layer {
313 | name: "fc136"
314 | type: "InnerProduct"
315 | bottom: "ip2"
316 | top: "fc136"
317 | inner_product_param {
318 | num_output: 136
319 | weight_filler {
320 | type: "xavier"
321 | }
322 | }
323 | }
324 |
325 | #------------------------------- loss ----------------------------
326 |
327 | layer {
328 | name: "out"
329 | type: "EuclideanLoss"
330 | bottom: "fc136"
331 | bottom: "flatdata"
332 | top: "out"
333 | loss_weight: 1
334 | }
335 |
--------------------------------------------------------------------------------
/face_alignment/models/mean_shapes.txt:
--------------------------------------------------------------------------------
1 | -0.775420 -0.352592 -0.005021 -0.766519 -0.157355 -0.037416 -0.743629 0.036671 -0.055012 -0.701062 0.227707 -0.087501 -0.624895 0.409159 -0.181433 -0.507021 0.568253 -0.314666 -0.358238 0.700246 -0.461675 -0.188625 0.803156 -0.616360 0.000000 0.841022 -0.715031 0.188625 0.803156 -0.616360 0.358238 0.700246 -0.461675 0.507021 0.568253 -0.314666 0.624895 0.409159 -0.181433 0.701062 0.227707 -0.087501 0.743629 0.036671 -0.055012 0.766519 -0.157355 -0.037416 0.775420 -0.352592 -0.005021 -0.582617 -0.575646 -0.655455 -0.492129 -0.636310 -0.737820 -0.379689 -0.656483 -0.823215 -0.263923 -0.643754 -0.906170 -0.153347 -0.607971 -0.982801 0.153347 -0.607971 -0.982801 0.263923 -0.643754 -0.906170 0.379689 -0.656483 -0.823215 0.492129 -0.636310 -0.737820 0.582617 -0.575646 -0.655455 0.000000 -0.412566 -0.988062 0.000000 -0.291699 -1.070349 0.000000 -0.171456 -1.154743 0.000000 -0.051514 -1.235696 -0.122787 0.047974 -0.991719 -0.059097 0.070544 -1.029210 0.000000 0.080821 -1.063730 0.059097 0.070544 -1.029210 0.122787 0.047974 -0.991719 -0.430035 -0.396099 -0.732915 -0.358837 -0.444566 -0.761839 -0.276830 -0.446105 -0.775307 -0.204942 -0.396248 -0.783813 -0.279006 -0.333817 -0.768284 -0.357969 -0.331697 -0.754261 0.204942 -0.396248 -0.783813 0.276830 -0.446105 -0.775307 0.358837 -0.444566 -0.761839 0.430035 -0.396099 -0.732915 0.357969 -0.331697 -0.754261 0.279006 -0.333817 -0.768284 -0.262923 0.307414 -0.744026 -0.185044 0.253144 -0.863426 -0.091584 0.216906 -0.945248 0.000000 0.228164 -0.997522 0.091584 0.216906 -0.945248 0.185044 0.253144 -0.863426 0.262923 0.307414 -0.744026 0.190844 0.384972 -0.812714 0.097535 0.434418 -0.869535 0.000000 0.449659 -0.913157 -0.097535 0.434418 -0.869535 -0.190844 0.384972 -0.812714 -0.198999 0.307340 -0.799310 -0.095989 0.288185 -0.897865 0.000000 0.291515 -0.964643 0.095989 0.288185 -0.897865 0.198999 0.307340 -0.799310 0.097949 0.335492 -0.870915 0.000000 0.350547 -0.928230 -0.097949 0.335492 -0.870915
2 | -0.775420 -0.352592 -0.766519 -0.157355 -0.743629 0.036671 -0.701062 0.227707 -0.624895 0.409159 -0.507021 0.568253 -0.358238 0.700246 -0.188625 0.803156 0.000000 0.841022 0.188625 0.803156 0.358238 0.700246 0.507021 0.568253 0.624895 0.409159 0.701062 0.227707 0.743629 0.036671 0.766519 -0.157355 0.775420 -0.352592 -0.582617 -0.575646 -0.492129 -0.636310 -0.379689 -0.656483 -0.263923 -0.643754 -0.153347 -0.607971 0.153347 -0.607971 0.263923 -0.643754 0.379689 -0.656483 0.492129 -0.636310 0.582617 -0.575646 0.000000 -0.412566 0.000000 -0.291699 0.000000 -0.171456 0.000000 -0.051514 -0.122787 0.047974 -0.059097 0.070544 0.000000 0.080821 0.059097 0.070544 0.122787 0.047974 -0.430035 -0.396099 -0.358837 -0.444566 -0.276830 -0.446105 -0.204942 -0.396248 -0.279006 -0.333817 -0.357969 -0.331697 0.204942 -0.396248 0.276830 -0.446105 0.358837 -0.444566 0.430035 -0.396099 0.357969 -0.331697 0.279006 -0.333817 -0.262923 0.307414 -0.185044 0.253144 -0.091584 0.216906 0.000000 0.228164 0.091584 0.216906 0.185044 0.253144 0.262923 0.307414 0.190844 0.384972 0.097535 0.434418 0.000000 0.449659 -0.097535 0.434418 -0.190844 0.384972 -0.198999 0.307340 -0.095989 0.288185 0.000000 0.291515 0.095989 0.288185 0.198999 0.307340 0.097949 0.335492 0.000000 0.350547 -0.097949 0.335492
3 | -0.750298 -0.352592 -0.750084 -0.157355 -0.732529 0.036671 -0.699820 0.227707 -0.650560 0.409159 -0.571186 0.568253 -0.465522 0.700246 -0.341724 0.803156 -0.185064 0.841022 0.022672 0.803156 0.226541 0.700246 0.408303 0.568253 0.556644 0.409159 0.654527 0.227707 0.704052 0.036671 0.730716 -0.157355 0.747699 -0.352592 -0.832288 -0.575646 -0.795106 -0.636310 -0.740428 -0.656483 -0.681649 -0.643754 -0.624203 -0.607971 -0.358599 -0.607971 -0.224521 -0.643754 -0.082787 -0.656483 0.057286 -0.636310 0.176834 -0.575646 -0.494031 -0.412566 -0.535174 -0.291699 -0.577372 -0.171456 -0.617848 -0.051514 -0.602196 0.047974 -0.565784 0.070544 -0.531865 0.080821 -0.463426 0.070544 -0.389523 0.047974 -0.738879 -0.396099 -0.691681 -0.444566 -0.627395 -0.446105 -0.569392 -0.396248 -0.625768 -0.333817 -0.687140 -0.331697 -0.214421 -0.396248 -0.147911 -0.446105 -0.070158 -0.444566 0.005964 -0.396099 -0.067120 -0.331697 -0.142516 -0.333817 -0.599711 0.307414 -0.591965 0.253144 -0.551938 0.216906 -0.498761 0.228164 -0.393310 0.216906 -0.271460 0.253144 -0.144315 0.307414 -0.241081 0.384972 -0.350300 0.434418 -0.456579 0.449659 -0.519235 0.434418 -0.571632 0.384972 -0.571993 0.307340 -0.532062 0.288185 -0.482321 0.291515 -0.365803 0.288185 -0.227317 0.307340 -0.350631 0.335492 -0.464115 0.350547 -0.520284 0.335492
4 | -0.747699 -0.352592 -0.730716 -0.157355 -0.704052 0.036671 -0.654527 0.227707 -0.556644 0.409159 -0.408303 0.568253 -0.226541 0.700246 -0.022672 0.803156 0.185064 0.841022 0.341724 0.803156 0.465522 0.700246 0.571186 0.568253 0.650560 0.409159 0.699820 0.227707 0.732529 0.036671 0.750084 -0.157355 0.750298 -0.352592 -0.176834 -0.575646 -0.057286 -0.636310 0.082787 -0.656483 0.224521 -0.643754 0.358599 -0.607971 0.624203 -0.607971 0.681649 -0.643754 0.740428 -0.656483 0.795106 -0.636310 0.832288 -0.575646 0.494031 -0.412566 0.535174 -0.291699 0.577372 -0.171456 0.617848 -0.051514 0.389523 0.047974 0.463426 0.070544 0.531865 0.080821 0.565784 0.070544 0.602196 0.047974 -0.005964 -0.396099 0.070158 -0.444566 0.147911 -0.446105 0.214421 -0.396248 0.142516 -0.333817 0.067120 -0.331697 0.569392 -0.396248 0.627395 -0.446105 0.691681 -0.444566 0.738879 -0.396099 0.687140 -0.331697 0.625768 -0.333817 0.144315 0.307414 0.271460 0.253144 0.393310 0.216906 0.498761 0.228164 0.551938 0.216906 0.591965 0.253144 0.599711 0.307414 0.571632 0.384972 0.519235 0.434418 0.456579 0.449659 0.350300 0.434418 0.241081 0.384972 0.227317 0.307340 0.365803 0.288185 0.482321 0.291515 0.532062 0.288185 0.571993 0.307340 0.520284 0.335492 0.464115 0.350547 0.350631 0.335492
5 |
--------------------------------------------------------------------------------
/face_alignment/models/shape_parameter_s_front.txt:
--------------------------------------------------------------------------------
1 | 1.031828079223632812e+02
2 | 9.566854095458984375e+01
3 | 6.069260025024414062e+01
4 | 4.313542938232421875e+01
5 | 3.479409408569335938e+01
6 | 2.776439666748046875e+01
7 | 2.766326141357421875e+01
8 | 2.420671272277832031e+01
9 | 2.100972938537597656e+01
10 | 1.877184486389160156e+01
11 | 1.825231742858886719e+01
12 | 1.696853065490722656e+01
13 | 1.407497215270996094e+01
14 | 1.345866584777832031e+01
15 | 1.124495315551757812e+01
16 | 1.050634860992431641e+01
17 | 1.045322513580322266e+01
18 | 1.016666221618652344e+01
19 | 9.403193473815917969e+00
20 | 8.697093963623046875e+00
21 | 8.366784095764160156e+00
22 | 7.573175430297851562e+00
23 | 7.421993732452392578e+00
24 | 7.175876617431640625e+00
25 | 6.694856166839599609e+00
26 | 6.609914302825927734e+00
27 | 6.394573211669921875e+00
28 | 6.032481670379638672e+00
29 | 6.031355381011962891e+00
30 | 5.338684558868408203e+00
31 | 5.145238876342773438e+00
32 | 5.112681388854980469e+00
33 | 5.004620075225830078e+00
34 | 4.974018573760986328e+00
35 | 4.814919948577880859e+00
36 | 4.734435558319091797e+00
37 | 4.285939216613769531e+00
38 | 3.971984148025512695e+00
39 | 3.971856117248535156e+00
40 | 3.753386497497558594e+00
41 | 3.669133424758911133e+00
42 | 3.578326463699340820e+00
43 | 3.483742237091064453e+00
44 | 3.311911344528198242e+00
45 | 3.139082670211791992e+00
46 | 3.108502388000488281e+00
47 | 3.022727251052856445e+00
48 | 2.984299659729003906e+00
49 | 2.859831809997558594e+00
50 | 2.833900928497314453e+00
51 | 2.755693197250366211e+00
52 | 2.742290258407592773e+00
53 | 2.523193836212158203e+00
54 | 2.451685428619384766e+00
55 | 2.441256761550903320e+00
56 | 2.379939079284667969e+00
57 | 2.357637166976928711e+00
58 | 2.252062797546386719e+00
59 | 2.231155633926391602e+00
60 | 2.168044567108154297e+00
61 | 2.127068758010864258e+00
62 | 2.045018196105957031e+00
63 | 2.041277647018432617e+00
64 | 2.012953281402587891e+00
65 | 2.005952835083007812e+00
66 | 1.955849528312683105e+00
67 | 1.935137510299682617e+00
68 | 1.874186635017395020e+00
69 | 1.829447269439697266e+00
70 | 1.807976007461547852e+00
71 | 1.798697710037231445e+00
72 | 1.765719175338745117e+00
73 | 1.662169933319091797e+00
74 | 1.660003185272216797e+00
75 | 1.633037924766540527e+00
76 | 1.625466108322143555e+00
77 | 1.608945488929748535e+00
78 | 1.607636570930480957e+00
79 | 1.600903630256652832e+00
80 | 1.565548300743103027e+00
81 | 1.555794477462768555e+00
82 | 1.520662426948547363e+00
83 | 1.516777276992797852e+00
84 | 1.480778694152832031e+00
85 | 1.462243556976318359e+00
86 | 1.427065491676330566e+00
87 | 1.411217451095581055e+00
88 | 1.398631095886230469e+00
89 | 1.364845037460327148e+00
90 | 1.355186700820922852e+00
91 | 1.346644043922424316e+00
92 | 1.338635683059692383e+00
93 | 1.327934265136718750e+00
94 | 1.310287356376647949e+00
95 | 1.287073850631713867e+00
96 | 1.259063243865966797e+00
97 | 1.218294143676757812e+00
98 | 1.190768599510192871e+00
99 | 1.139584541320800781e+00
100 | 1.127703666687011719e+00
101 | 1.127283215522766113e+00
102 | 1.092749476432800293e+00
103 | 1.061315417289733887e+00
104 | 1.040784716606140137e+00
105 | 1.030719995498657227e+00
106 | 1.003454208374023438e+00
107 | 1.000035881996154785e+00
108 | 9.665775299072265625e-01
109 | 9.630764126777648926e-01
110 | 9.550484418869018555e-01
111 | 9.314393401145935059e-01
112 | 9.235842823982238770e-01
113 | 9.105998873710632324e-01
114 | 8.669779896736145020e-01
115 | 8.544918298721313477e-01
116 | 8.450148105621337891e-01
117 | 8.216010928153991699e-01
118 | 8.042898178100585938e-01
119 | 7.873371839523315430e-01
120 | 7.616593241691589355e-01
121 | 7.413730621337890625e-01
122 | 7.263383865356445312e-01
123 | 7.158536911010742188e-01
124 | 7.149648666381835938e-01
125 | 6.883103251457214355e-01
126 | 6.826061010360717773e-01
127 | 6.503386497497558594e-01
128 | 5.366221070289611816e-01
129 | 5.077308416366577148e-01
130 | 2.871714234352111816e-01
131 | 2.232837432529777288e-04
132 | 1.937306515173986554e-04
133 | 4.919727507513016462e-05
134 | 4.453564542927779257e-05
135 | 3.619944982347078621e-05
136 | 2.667792978172656149e-05
137 |
--------------------------------------------------------------------------------
/face_alignment/models/shape_parameter_s_left.txt:
--------------------------------------------------------------------------------
1 | 7.763617706298828125e+01
2 | 5.448361206054687500e+01
3 | 3.448307800292968750e+01
4 | 1.991018867492675781e+01
5 | 1.596661090850830078e+01
6 | 1.414914703369140625e+01
7 | 1.229103565216064453e+01
8 | 1.144200325012207031e+01
9 | 1.003643321990966797e+01
10 | 9.386501312255859375e+00
11 | 8.581890106201171875e+00
12 | 8.169677734375000000e+00
13 | 7.906897544860839844e+00
14 | 6.675380229949951172e+00
15 | 6.112782001495361328e+00
16 | 5.862775802612304688e+00
17 | 5.286133289337158203e+00
18 | 4.913509845733642578e+00
19 | 4.832731246948242188e+00
20 | 4.741940498352050781e+00
21 | 4.589621067047119141e+00
22 | 4.137164592742919922e+00
23 | 4.037960052490234375e+00
24 | 3.861081600189208984e+00
25 | 3.779168367385864258e+00
26 | 3.620183229446411133e+00
27 | 3.475615978240966797e+00
28 | 3.316045284271240234e+00
29 | 3.153186798095703125e+00
30 | 3.043802976608276367e+00
31 | 2.927801609039306641e+00
32 | 2.870085954666137695e+00
33 | 2.832670450210571289e+00
34 | 2.724978208541870117e+00
35 | 2.613666296005249023e+00
36 | 2.461195468902587891e+00
37 | 2.366128683090209961e+00
38 | 2.293519973754882812e+00
39 | 2.214362859725952148e+00
40 | 2.146535158157348633e+00
41 | 1.907979846000671387e+00
42 | 1.876132249832153320e+00
43 | 1.859354138374328613e+00
44 | 1.775403857231140137e+00
45 | 1.764379143714904785e+00
46 | 1.694374799728393555e+00
47 | 1.665422201156616211e+00
48 | 1.622999191284179688e+00
49 | 1.610870122909545898e+00
50 | 1.546877861022949219e+00
51 | 1.523749709129333496e+00
52 | 1.483136296272277832e+00
53 | 1.481248021125793457e+00
54 | 1.423740625381469727e+00
55 | 1.406941294670104980e+00
56 | 1.378324389457702637e+00
57 | 1.357655882835388184e+00
58 | 1.335111260414123535e+00
59 | 1.306033492088317871e+00
60 | 1.282203078269958496e+00
61 | 1.257453680038452148e+00
62 | 1.242352485656738281e+00
63 | 1.201884031295776367e+00
64 | 1.184469342231750488e+00
65 | 1.166077256202697754e+00
66 | 1.114611506462097168e+00
67 | 1.102498888969421387e+00
68 | 1.085692048072814941e+00
69 | 1.060934782028198242e+00
70 | 1.029542326927185059e+00
71 | 1.017418980598449707e+00
72 | 1.005733728408813477e+00
73 | 9.654799103736877441e-01
74 | 9.343039393424987793e-01
75 | 9.260154366493225098e-01
76 | 9.126370549201965332e-01
77 | 8.995376825332641602e-01
78 | 8.933218717575073242e-01
79 | 8.765093088150024414e-01
80 | 8.631937503814697266e-01
81 | 8.594997525215148926e-01
82 | 8.442590236663818359e-01
83 | 8.324881792068481445e-01
84 | 8.141325116157531738e-01
85 | 8.030978441238403320e-01
86 | 7.934148907661437988e-01
87 | 7.738255858421325684e-01
88 | 7.693558931350708008e-01
89 | 7.474056482315063477e-01
90 | 7.435721158981323242e-01
91 | 7.271158099174499512e-01
92 | 7.163758873939514160e-01
93 | 7.030839323997497559e-01
94 | 6.789638996124267578e-01
95 | 6.737074851989746094e-01
96 | 6.597926020622253418e-01
97 | 6.343482136726379395e-01
98 | 6.245849728584289551e-01
99 | 6.192614436149597168e-01
100 | 6.046380400657653809e-01
101 | 5.935505032539367676e-01
102 | 5.786783099174499512e-01
103 | 5.713734626770019531e-01
104 | 5.640091300010681152e-01
105 | 5.604548454284667969e-01
106 | 5.492605566978454590e-01
107 | 5.269949436187744141e-01
108 | 5.183300971984863281e-01
109 | 5.088832378387451172e-01
110 | 4.984530508518218994e-01
111 | 4.879687726497650146e-01
112 | 4.835968017578125000e-01
113 | 4.738907814025878906e-01
114 | 4.641085565090179443e-01
115 | 4.461972415447235107e-01
116 | 4.422465264797210693e-01
117 | 4.404929280281066895e-01
118 | 4.266946017742156982e-01
119 | 4.236666858196258545e-01
120 | 4.139477312564849854e-01
121 | 4.108542203903198242e-01
122 | 4.007500708103179932e-01
123 | 3.902464807033538818e-01
124 | 3.863844573497772217e-01
125 | 3.740646839141845703e-01
126 | 3.641172349452972412e-01
127 | 3.496397733688354492e-01
128 | 3.446161448955535889e-01
129 | 3.107274472713470459e-01
130 | 2.185715436935424805e-01
131 | 1.521436497569084167e-04
132 | 1.100038352888077497e-04
133 | 2.406101702945306897e-05
134 | 2.068835783575195819e-05
135 | 1.945491385413333774e-05
136 | 1.268230789719382301e-05
137 |
--------------------------------------------------------------------------------
/face_alignment/models/shape_parameter_s_right.txt:
--------------------------------------------------------------------------------
1 | 7.763619232177734375e+01
2 | 5.448361206054687500e+01
3 | 3.448307800292968750e+01
4 | 1.991018676757812500e+01
5 | 1.596661090850830078e+01
6 | 1.414914703369140625e+01
7 | 1.229103469848632812e+01
8 | 1.144200229644775391e+01
9 | 1.003643226623535156e+01
10 | 9.386501312255859375e+00
11 | 8.581891059875488281e+00
12 | 8.169676780700683594e+00
13 | 7.906896114349365234e+00
14 | 6.675380229949951172e+00
15 | 6.112782478332519531e+00
16 | 5.862775802612304688e+00
17 | 5.286133289337158203e+00
18 | 4.913509845733642578e+00
19 | 4.832731246948242188e+00
20 | 4.741940498352050781e+00
21 | 4.589621067047119141e+00
22 | 4.137164592742919922e+00
23 | 4.037960052490234375e+00
24 | 3.861081600189208984e+00
25 | 3.779168128967285156e+00
26 | 3.620183229446411133e+00
27 | 3.475615978240966797e+00
28 | 3.316045522689819336e+00
29 | 3.153186798095703125e+00
30 | 3.043802738189697266e+00
31 | 2.927801609039306641e+00
32 | 2.870085716247558594e+00
33 | 2.832670450210571289e+00
34 | 2.724978208541870117e+00
35 | 2.613666296005249023e+00
36 | 2.461195707321166992e+00
37 | 2.366128444671630859e+00
38 | 2.293519973754882812e+00
39 | 2.214362859725952148e+00
40 | 2.146535158157348633e+00
41 | 1.907979846000671387e+00
42 | 1.876132249832153320e+00
43 | 1.859354138374328613e+00
44 | 1.775403857231140137e+00
45 | 1.764379262924194336e+00
46 | 1.694374799728393555e+00
47 | 1.665422201156616211e+00
48 | 1.622999191284179688e+00
49 | 1.610870122909545898e+00
50 | 1.546877861022949219e+00
51 | 1.523749589920043945e+00
52 | 1.483136296272277832e+00
53 | 1.481247901916503906e+00
54 | 1.423740625381469727e+00
55 | 1.406941294670104980e+00
56 | 1.378324389457702637e+00
57 | 1.357655882835388184e+00
58 | 1.335111260414123535e+00
59 | 1.306033611297607422e+00
60 | 1.282203078269958496e+00
61 | 1.257453799247741699e+00
62 | 1.242352604866027832e+00
63 | 1.201884031295776367e+00
64 | 1.184469461441040039e+00
65 | 1.166077256202697754e+00
66 | 1.114611506462097168e+00
67 | 1.102498888969421387e+00
68 | 1.085692048072814941e+00
69 | 1.060934782028198242e+00
70 | 1.029542207717895508e+00
71 | 1.017418980598449707e+00
72 | 1.005733728408813477e+00
73 | 9.654797911643981934e-01
74 | 9.343039393424987793e-01
75 | 9.260153770446777344e-01
76 | 9.126370549201965332e-01
77 | 8.995376825332641602e-01
78 | 8.933218717575073242e-01
79 | 8.765093088150024414e-01
80 | 8.631937503814697266e-01
81 | 8.594997525215148926e-01
82 | 8.442590236663818359e-01
83 | 8.324881196022033691e-01
84 | 8.141325116157531738e-01
85 | 8.030978441238403320e-01
86 | 7.934149503707885742e-01
87 | 7.738255858421325684e-01
88 | 7.693558931350708008e-01
89 | 7.474056482315063477e-01
90 | 7.435721158981323242e-01
91 | 7.271158099174499512e-01
92 | 7.163758873939514160e-01
93 | 7.030839323997497559e-01
94 | 6.789638996124267578e-01
95 | 6.737074851989746094e-01
96 | 6.597926020622253418e-01
97 | 6.343482136726379395e-01
98 | 6.245849728584289551e-01
99 | 6.192614436149597168e-01
100 | 6.046380400657653809e-01
101 | 5.935505628585815430e-01
102 | 5.786783099174499512e-01
103 | 5.713734626770019531e-01
104 | 5.640091300010681152e-01
105 | 5.604548454284667969e-01
106 | 5.492605566978454590e-01
107 | 5.269949436187744141e-01
108 | 5.183300971984863281e-01
109 | 5.088832378387451172e-01
110 | 4.984530508518218994e-01
111 | 4.879687726497650146e-01
112 | 4.835968017578125000e-01
113 | 4.738907516002655029e-01
114 | 4.641085267066955566e-01
115 | 4.461972415447235107e-01
116 | 4.422465264797210693e-01
117 | 4.404929280281066895e-01
118 | 4.266946017742156982e-01
119 | 4.236666858196258545e-01
120 | 4.139477312564849854e-01
121 | 4.108542203903198242e-01
122 | 4.007500708103179932e-01
123 | 3.902464807033538818e-01
124 | 3.863844573497772217e-01
125 | 3.740646839141845703e-01
126 | 3.641172349452972412e-01
127 | 3.496397733688354492e-01
128 | 3.446161448955535889e-01
129 | 3.107274472713470459e-01
130 | 2.185715138912200928e-01
131 | 1.266324252355843782e-04
132 | 1.034445594996213913e-04
133 | 2.327194488316308707e-05
134 | 2.060086262645199895e-05
135 | 1.640349546505603939e-05
136 | 1.241218888026196510e-05
137 |
--------------------------------------------------------------------------------
/face_alignment/models/shape_parameter_s_wild.txt:
--------------------------------------------------------------------------------
1 | 1.970495758056640625e+02
2 | 1.147242965698242188e+02
3 | 7.450263214111328125e+01
4 | 4.619513702392578125e+01
5 | 3.563884353637695312e+01
6 | 2.953556442260742188e+01
7 | 2.518301963806152344e+01
8 | 2.209227180480957031e+01
9 | 1.681234169006347656e+01
10 | 1.483633422851562500e+01
11 | 1.392493820190429688e+01
12 | 1.294036865234375000e+01
13 | 1.165308284759521484e+01
14 | 1.114840412139892578e+01
15 | 9.776822090148925781e+00
16 | 8.556041717529296875e+00
17 | 8.135818481445312500e+00
18 | 8.057154655456542969e+00
19 | 7.755284786224365234e+00
20 | 7.051324844360351562e+00
21 | 6.971053600311279297e+00
22 | 6.026376247406005859e+00
23 | 5.712540626525878906e+00
24 | 5.444037437438964844e+00
25 | 5.380769252777099609e+00
26 | 5.245779514312744141e+00
27 | 5.084469318389892578e+00
28 | 4.777353763580322266e+00
29 | 4.659717559814453125e+00
30 | 4.281981468200683594e+00
31 | 4.220893383026123047e+00
32 | 4.155749320983886719e+00
33 | 4.146553993225097656e+00
34 | 3.788559436798095703e+00
35 | 3.753429412841796875e+00
36 | 3.653527259826660156e+00
37 | 3.541052818298339844e+00
38 | 3.238025188446044922e+00
39 | 3.161108016967773438e+00
40 | 2.992911100387573242e+00
41 | 2.964578628540039062e+00
42 | 2.928684711456298828e+00
43 | 2.812888145446777344e+00
44 | 2.672780752182006836e+00
45 | 2.297840595245361328e+00
46 | 2.162400960922241211e+00
47 | 2.125375986099243164e+00
48 | 2.069871425628662109e+00
49 | 2.011986732482910156e+00
50 | 1.985482931137084961e+00
51 | 1.916026353836059570e+00
52 | 1.875906825065612793e+00
53 | 1.837199926376342773e+00
54 | 1.768927335739135742e+00
55 | 1.732427358627319336e+00
56 | 1.714664340019226074e+00
57 | 1.667707800865173340e+00
58 | 1.633123993873596191e+00
59 | 1.602980017662048340e+00
60 | 1.585176110267639160e+00
61 | 1.543024063110351562e+00
62 | 1.507443666458129883e+00
63 | 1.486185431480407715e+00
64 | 1.423241972923278809e+00
65 | 1.388038516044616699e+00
66 | 1.372025370597839355e+00
67 | 1.328630685806274414e+00
68 | 1.319206714630126953e+00
69 | 1.295352339744567871e+00
70 | 1.281113266944885254e+00
71 | 1.258739233016967773e+00
72 | 1.223977208137512207e+00
73 | 1.198363065719604492e+00
74 | 1.166336297988891602e+00
75 | 1.107284784317016602e+00
76 | 1.077126860618591309e+00
77 | 1.055412054061889648e+00
78 | 1.015959739685058594e+00
79 | 9.891035556793212891e-01
80 | 9.715236425399780273e-01
81 | 9.593613147735595703e-01
82 | 9.326089024543762207e-01
83 | 9.173798561096191406e-01
84 | 9.023692011833190918e-01
85 | 8.879134654998779297e-01
86 | 8.786404728889465332e-01
87 | 8.638746142387390137e-01
88 | 8.559817671775817871e-01
89 | 8.491606116294860840e-01
90 | 8.322493433952331543e-01
91 | 8.189594745635986328e-01
92 | 8.083713650703430176e-01
93 | 7.968765497207641602e-01
94 | 7.838517427444458008e-01
95 | 7.731590270996093750e-01
96 | 7.560074925422668457e-01
97 | 7.524335980415344238e-01
98 | 7.510975599288940430e-01
99 | 7.457538247108459473e-01
100 | 7.109788060188293457e-01
101 | 6.914134025573730469e-01
102 | 6.808288097381591797e-01
103 | 6.761116385459899902e-01
104 | 6.582429409027099609e-01
105 | 6.515301465988159180e-01
106 | 6.346591114997863770e-01
107 | 6.292785406112670898e-01
108 | 6.262359619140625000e-01
109 | 6.150320768356323242e-01
110 | 6.042692661285400391e-01
111 | 5.963109135627746582e-01
112 | 5.879486799240112305e-01
113 | 5.791180729866027832e-01
114 | 5.688433647155761719e-01
115 | 5.627683997154235840e-01
116 | 5.566114783287048340e-01
117 | 5.424736738204956055e-01
118 | 5.290962457656860352e-01
119 | 5.219849944114685059e-01
120 | 5.168567895889282227e-01
121 | 5.056280493736267090e-01
122 | 4.916095137596130371e-01
123 | 4.908132255077362061e-01
124 | 4.677435755729675293e-01
125 | 4.633058011531829834e-01
126 | 4.573886096477508545e-01
127 | 4.397208690643310547e-01
128 | 4.285275936126708984e-01
129 | 4.175726473331451416e-01
130 | 4.158701598644256592e-01
131 | 3.877090513706207275e-01
132 | 3.767875134944915771e-01
133 | 3.591192364692687988e-01
134 | 3.540259003639221191e-01
135 | 3.483022153377532959e-01
136 | 2.917855679988861084e-01
137 |
--------------------------------------------------------------------------------
/face_alignment/models/warped_mean_front.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/models/warped_mean_front.bmp
--------------------------------------------------------------------------------
/face_alignment/models/warped_mean_left.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/models/warped_mean_left.bmp
--------------------------------------------------------------------------------
/face_alignment/models/warped_mean_right.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/models/warped_mean_right.bmp
--------------------------------------------------------------------------------
/face_alignment/python/make_wild_input.py:
--------------------------------------------------------------------------------
1 | # make input of initialization network
2 | import glob
3 | import numpy as np
4 | import random
5 | from datetime import datetime
6 | import matplotlib.pyplot as plt
7 | import fa_util as fu
8 | import fa_util_train as fut
9 | import h5py
10 | import cv2
11 |
12 |
13 | list_file_name = 'K:/VGG_list/vgg_list_all_000.txt'
14 | output_prefix = 'K:/VGG_hdf5/init/VGG_wild_000'
15 | # img_folders = ['../sample_data']
16 | img_folders = ['D:/DB/FaceAlignment/HS_distribution/front', 'D:/DB/FaceAlignment/HS_distribution/left', 'D:/DB/FaceAlignment/HS_distribution/right']
17 | # output_prefix = 'M:/HS_hdf5/wild/HS_wild'
18 | jittering_size = 1 # should be changed 32*128
19 | chunk_size = 1536 # should be changed
20 |
21 |
22 | def get_part_pts(gt_pts, warp_mat_inv):
23 | part_centers = fu.get_part_centers(gt_pts)
24 | part_gt_pts = np.hstack((part_centers, np.ones((len(part_centers), 1), np.float32)))
25 | part_gt_pts_t = np.transpose(part_gt_pts)
26 | part_pts3 = np.dot(warp_mat_inv, part_gt_pts_t)
27 | return np.transpose(part_pts3)[:, 0:2]
28 |
29 |
30 | def main():
31 | # files = fut.make_file_list_by_folder(img_folders, ['png', 'jpg']) # get image file list by folder
32 | files = fut.make_file_list_by_text(list_file_name) # get image file list by text file
33 | n_samples = len(files)
34 | random.seed(1234) # set random seed
35 | random.shuffle(files) # random shuffle
36 | image_data_sets = fut.make_chunk_set(files, chunk_size) # get image file chunk set
37 | print('Total number of samples: ' + str(n_samples))
38 |
39 | cnt_all = 0 # cnt for sample images
40 | for i in range(len(image_data_sets)):
41 | current_num_img_files = len(image_data_sets[i])
42 | img_all = np.zeros((current_num_img_files * jittering_size, fu.init_h, fu.init_w, fu.channel), np.uint8)
43 | pts_all = np.zeros((current_num_img_files * jittering_size, fu.n_points, 2), np.float32)
44 | # part_all = np.zeros((current_num_img_files * jittering_size, fu.n_parts, 2), np.float32)
45 |
46 | # generate data
47 | cnt = 0 # cnt for total samples with jittering
48 | for x in image_data_sets[i]:
49 | current_img_set = np.zeros((jittering_size, fu.init_h, fu.init_w, fu.channel), np.uint8)
50 | current_pts_set = np.zeros((jittering_size, fu.n_points, 2), np.float32)
51 | # current_part_set = np.zeros((jittering_size, fu.n_parts, 2), np.float32)
52 | cnt_all = cnt_all + 1
53 | print(str(datetime.now()) + ' (' + str(cnt_all) + '/' + str(n_samples) + ') ' + x)
54 | img, gt_pts = fut.load_img_pts(x)
55 | face_box3 = fut.get_bounding_box3_square_with_margin(gt_pts)
56 | for k in range(jittering_size):
57 | if k == 0:
58 | face_box3_jittered = face_box3
59 | else:
60 | face_box3_jittered = fut.get_jittered_bounding_box3(face_box3)
61 |
62 |
63 | img_face, M_inv, M = fu.get_cropped_face_cv(img, face_box3_jittered)
64 |
65 |
66 |
67 | pts = cv2.transform(gt_pts.reshape((fu.n_points, 1, 2)), M)
68 |
69 | pts = pts.reshape((fu.n_points, 2))
70 |
71 | # warp_mat_inv = np.linalg.inv(warp_mat)
72 | # pts = fu.get_warped_pts(gt_pts, warp_mat_inv.transpose())
73 | # part_pts = get_part_pts(gt_pts, warp_mat_inv)
74 |
75 | current_img_set[k, :, :, :] = img_face
76 | current_pts_set[k, :, :] = pts
77 | # current_part_set[k, :, :] = part_pts
78 |
79 | # # draw
80 | # plt.figure(1)
81 | # plt.gcf().clear()
82 | # plt.imshow(img_face)
83 | # plt.scatter(pts[:, 0], pts[:, 1], c='b')
84 | # # plt.scatter(part_pts[:, 0], part_pts[:, 1], c='r')
85 | # plt.draw()
86 | # plt.pause(0.001)
87 | # z = 0
88 | img_all[cnt:cnt + jittering_size, :, :, :] = current_img_set
89 | pts_all[cnt:cnt + jittering_size, :, :] = current_pts_set
90 | # part_all[cnt:cnt + jittering_size, :, :] = current_part_set
91 | cnt = cnt + jittering_size
92 | img_all = img_all.transpose((0, 3, 1, 2)) # order: sample, c, m, n
93 | pts_all[:, :, 0] = pts_all[:, :, 0] / fu.init_w # normalize to 0~1
94 | pts_all[:, :, 1] = pts_all[:, :, 1] / fu.init_h # normalize to 0~1
95 | # part_all[:, :, 0] = part_all[:, :, 0] / fu.init_w # normalize to 0~1
96 | # part_all[:, :, 1] = part_all[:, :, 1] / fu.init_h # normalize to 0~1
97 |
98 | suffle_idx = np.random.permutation(current_num_img_files * jittering_size) # suffle
99 | img_all = img_all[suffle_idx, :, :, :]
100 | pts_all = pts_all[suffle_idx, :, :]
101 | # part_all = part_all[suffle_idx, :, :]
102 |
103 | current_output_path = "%s_%03d.h5" % (output_prefix, i)
104 | hf = h5py.File(current_output_path, 'w')
105 | input_face_img_name = "img"
106 | warped_img_set = hf.create_dataset(input_face_img_name, data=img_all)
107 | input_pts_name = "pts"
108 | pts_set = hf.create_dataset(input_pts_name, data=pts_all)
109 | # input_part_name = "part"
110 | # part_set = hf.create_dataset(input_part_name, data=part_all)
111 | hf.close()
112 |
113 |
114 | if __name__ == "__main__":
115 | main()
116 |
--------------------------------------------------------------------------------
/face_alignment/python/test_300w_public.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 | from datetime import datetime
5 | import face_alignment as fa
6 | import fa_util_train as fut
7 |
8 | img_folders = ['N:\DB\FaceAlignment\\300W_public_test\lfpw', 'N:\DB\FaceAlignment\\300W_public_test\helen', 'N:\DB\FaceAlignment\\300W_public_test\ibug']
9 | # img_folders = ['D:\DB\FaceAlignment\\300W_public_test\ibug']
10 | # img_folders = ['../sample_data2']
11 | img_extension = ['png', 'jpg']
12 | output_folder = '../result'
13 | max_iter = 21
14 | # max_iter = 1
15 |
16 | def main():
17 | # fa.fa_init([[1], [0, 0], [0, 0]])
18 | fa.fa_init([[1], [1], [0, 0], [1, 0]])
19 | files = []
20 | current_pts = np.zeros((max_iter+1, 68, 2), np.float32)
21 | error_IOD = np.zeros((max_iter+1, 1), np.float32)
22 | error_BOX = np.zeros((max_iter+1, 1), np.float32)
23 | for folder in img_folders:
24 | for ext in img_extension:
25 | current_files = glob.glob(folder + '/*.' + ext)
26 | files.extend(current_files)
27 |
28 | cnt = 0
29 | n_samples = len(files)
30 | for x in files:
31 | cnt += 1
32 |
33 | img, gt_pts = fut.load_img_pts(x)
34 | face_box3 = fut.get_bounding_box3_square_with_margin(gt_pts)
35 | # current_pts[0, :, :], current_pts[1, :, :], current_pts[2, :, :], pose_idx = fa.face_alignment_detection(img, face_box3, -1)
36 | # current_pts[0, :, :], _, current_pts[1, :, :], pose_idx = fa.face_alignment_detection(img, face_box3, -1)
37 | current_pts[0, :, :], current_pts[1, :, :], current_pts[2, :, :], pose_idx = fa.face_alignment_detection(img, face_box3, -1)
38 |
39 | for i in range(2, max_iter):
40 | current_pts[i+1, :, :], pose_idx = fa.face_alignment_detection_step(img, current_pts[i, :, :], pose_idx)
41 |
42 | print(str(datetime.now()) + ' ' + str(cnt) + '/' + str(n_samples) + ' ' + x)
43 | for i in range(0, max_iter):
44 | output_path = fut.get_output_path(x, output_folder, 'pt%d' % i)
45 | fut.save_pts(output_path, current_pts[i, :, :])
46 | error_IOD[i, 0], error_BOX[i, 0] = fut.measurement(gt_pts, current_pts[i, :, :])
47 | print('Error%d :' % i + str(error_IOD[i, 0]))
48 |
49 | # draw
50 | draw_idx = [0, 1, 2, 3, 4]
51 | # draw_idx = [0]
52 | plt.figure(1)
53 | plt.gcf().clear()
54 | draw_cnt = 1
55 | for i in draw_idx:
56 | plt.subplot(1, len(draw_idx), draw_cnt)
57 | plt.imshow(img)
58 | plt.scatter(current_pts[i, :, 0], current_pts[i, :, 1], s=3, c='r')
59 | draw_cnt += 1
60 | plt.draw()
61 | plt.pause(0.001)
62 | z = 1
63 |
64 |
65 | if __name__ == "__main__":
66 | main()
67 |
--------------------------------------------------------------------------------
/face_detection/.gitignore:
--------------------------------------------------------------------------------
1 | data/FDDB/images/
2 | data/widerface/
3 | eval/
4 | results/
5 |
--------------------------------------------------------------------------------
/face_detection/LICENSE.MIT:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/face_detection/NOTICE:
--------------------------------------------------------------------------------
1 | This project contains subcomponents with separate copyright notices and license terms.
2 | Your use of the source code for these subcomponents is subject to the terms and conditions of the following licenses.
3 |
4 | =====
5 |
6 | biubug6/Pytorch_Retinaface
7 | https://github.com/biubug6/Pytorch_Retinaface
8 |
9 |
10 | MIT License
11 |
12 | Copyright (c) 2019
13 |
14 | Permission is hereby granted, free of charge, to any person obtaining a copy
15 | of this software and associated documentation files (the "Software"), to deal
16 | in the Software without restriction, including without limitation the rights
17 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18 | copies of the Software, and to permit persons to whom the Software is
19 | furnished to do so, subject to the following conditions:
20 |
21 | The above copyright notice and this permission notice shall be included in all
22 | copies or substantial portions of the Software.
23 |
24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 | SOFTWARE.
31 |
--------------------------------------------------------------------------------
/face_detection/README.md:
--------------------------------------------------------------------------------
1 | # Face Detection (work in progress)
2 | The code and checkpoints contained in this repository were adopted from the [biubug6/Pytorch_Retinaface](https://github.com/biubug6/Pytorch_Retinaface) repository.
3 |
4 |
5 | ## Getting Started
6 |
7 | ### Requirements / Installation
8 | - [Anaconda](https://www.anaconda.com/)
9 | - Nvidia GPU (for GPU utilization)
10 |
11 | Use the following commands to install the necessary packages and activate the environment:
12 | ```sh
13 | conda env create -f environment.yml
14 | conda activate retinaface
15 | ```
16 |
17 | ### Data
18 | 1. Download the [WiderFace](http://shuoyang1213.me/WIDERFACE/WiderFace_Results.html) dataset.
19 |
20 | 2. Download annotations (face bounding boxes & five facial landmarks) from [baidu cloud](https://pan.baidu.com/s/1Laby0EctfuJGgGMgRRgykA).
21 |
22 | 3. Organise the dataset directory as follows:
23 |
24 | ```
25 | ./data/widerface/
26 | ├─train/
27 | │ ├─images/
28 | │ └─label.txt
29 | └─val/
30 | ├─images/
31 | └─wider_val.txt
32 | ```
33 |
34 | ps: wider_val.txt only include val file names but not label information.
35 |
36 |
37 | ### Test
38 | You can use the following command to detect faces in a photo and save the result as an image:
39 | ```sh
40 | python detect.py --image -s
41 | ```
42 | See [detect.py](detect.py#L16) for available arguments.
43 |
44 |
45 | ## Training
46 | We provide restnet50 and mobilenet0.25 as backbone network to train model.
47 | We trained Mobilenet0.25 on imagenet dataset and get 46.58% in top 1. If you do not wish to train the model, we also provide trained model. Pretrain model and trained model are put in [google cloud](https://drive.google.com/open?id=1oZRSG0ZegbVkVwUd8wUIQx8W7yfZ_ki1) and [baidu cloud](https://pan.baidu.com/s/12h97Fy1RYuqMMIV-RpzdPg) Password: fstq . The model could be put as follows:
48 | ```bash
49 | ./weights/
50 | ├─mobilenet0.25_final.pt
51 | └─mobilenet0.25_pretrain.tar
52 | ```
53 | 1. Before training, you can check network configuration (e.g. batch_size, min_sizes and steps etc..) in ``data/config.py and train.py``.
54 |
55 | 2. Train the model using WiderFace:
56 | ```Shell
57 | CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --network resnet50 or
58 | CUDA_VISIBLE_DEVICES=0 python train.py --network mobilenet0.25
59 | ```
60 |
61 |
62 | ## Evaluation
63 |
64 | ### Evaluation WiderFace val
65 | 1. Generate txt file
66 | ```Shell
67 | python test_widerface.py --trained-model --network mobilenet0.25 or resnet50
68 | ```
69 | 2. Evaluate txt results. Demo come from [Here](https://github.com/wondervictor/WiderFace-Evaluation)
70 | ```Shell
71 | cd ./widerface_evaluate
72 | python setup.py build_ext --inplace
73 | python evaluation.py
74 | ```
75 | 3. You can also use WiderFace official Matlab evaluate demo in [Here](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/WiderFace_Results.html)
76 |
77 | ### Evaluation FDDB
78 |
79 | 1. Download the images [FDDB](https://drive.google.com/open?id=17t4WULUDgZgiSy5kpCax4aooyPaz3GQH) to:
80 | ```Shell
81 | ./data/FDDB/images/
82 | ```
83 |
84 | 2. Evaluate the trained model using:
85 | ```Shell
86 | python test_fddb.py --trained_model --network mobilenet0.25 or resnet50
87 | ```
88 |
89 | 3. ~~Download [eval_tool](https://bitbucket.org/marcopede/face-eval) to evaluate the performance.~~ This link doesn't seem to work anymore. We found [this](https://github.com/RuisongZhou/FDDB_Evaluation) repository, but haven't tested it yet.
90 |
91 |
92 | ## References and Citation
93 | - [RetinaFace in PyTorch](https://github.com/biubug6/Pytorch_Retinaface)
94 | - [FaceBoxes](https://github.com/zisianw/FaceBoxes.PyTorch)
95 | - [Retinaface (mxnet)](https://github.com/deepinsight/insightface/tree/master/RetinaFace)
96 |
97 | ```
98 | @inproceedings{deng2020retinaface,
99 | title={RetinaFace: Single-Shot Multi-Level Face Localisation in the wild},
100 | author={Deng, Jiankang and Guo, Jia and Ververas, Evangelos and Kotsia, Irene and Zafeiriou, Stefanos},
101 | booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
102 | pages={5203--5212},
103 | year={2020}
104 | }
105 |
--------------------------------------------------------------------------------
/face_detection/convert_to_onnx.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import argparse
3 | import os
4 |
5 | import torch
6 | from model.retinaface import RetinaFace
7 |
8 | parser = argparse.ArgumentParser(description='Convert to ONNX')
9 | parser.add_argument(
10 | '--checkpoint', type=str,
11 | default='./weights/mobilenet0.25_final.pt',
12 | help='Trained state_dict file path to open'
13 | )
14 | parser.add_argument(
15 | '--long-side', type=int, default=640,
16 | help='when origin_size is false, long_side is scaled size(320 or 640 for long side)'
17 | )
18 | parser.add_argument(
19 | '--cpu', action="store_true",
20 | help='Use cpu inference'
21 | )
22 |
23 |
24 | def main():
25 | args = parser.parse_args()
26 | assert os.path.isfile(args.checkpoint)
27 |
28 | checkpoint = torch.load(args.checkpoint, map_location="cpu")
29 | cfg = checkpoint["config"]
30 | device = torch.device("cpu" if args.cpu else "cuda")
31 |
32 | # net and model
33 | net = RetinaFace(**cfg)
34 | net.load_state_dict(checkpoint["net_state_dict"], strict=False)
35 | net.eval().requires_grad_(False)
36 | net.to(device)
37 | print('Finished loading model!')
38 |
39 | # ------------------------ export -----------------------------
40 | output_onnx = 'face_detector.onnx'
41 | print("==> Exporting model to ONNX format at '{}'".format(output_onnx))
42 | input_names = ["input0"]
43 | output_names = ["output0"]
44 | inputs = torch.randn(1, 3, args.long_side, args.long_side).to(device)
45 |
46 | torch_out = torch.onnx._export(net, inputs, output_onnx, export_params=True, verbose=False,
47 | input_names=input_names, output_names=output_names)
48 |
49 |
50 | if __name__ == "__main__":
51 | main()
52 |
--------------------------------------------------------------------------------
/face_detection/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import *
2 | from .data_augment import *
3 | from .wider_face import WiderFaceDetection
4 |
--------------------------------------------------------------------------------
/face_detection/data/config.py:
--------------------------------------------------------------------------------
1 | # config.py
2 |
3 | cfg_mnet = {
4 | 'backbone': 'mobilenet0.25',
5 | 'min_sizes': [[16, 32], [64, 128], [256, 512]],
6 | 'steps': [8, 16, 32],
7 | 'variance': [0.1, 0.2],
8 | 'clip': False,
9 | 'loc_weight': 2.0,
10 | 'batch_size': 32,
11 | 'epoch': 250,
12 | 'decay1': 190,
13 | 'decay2': 220,
14 | 'image_size': 640,
15 | 'pretrain': True,
16 | 'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
17 | 'in_channel': 32,
18 | 'out_channel': 64
19 | }
20 |
21 | cfg_re50 = {
22 | 'backbone': 'Resnet50',
23 | 'min_sizes': [[16, 32], [64, 128], [256, 512]],
24 | 'steps': [8, 16, 32],
25 | 'variance': [0.1, 0.2],
26 | 'clip': False,
27 | 'loc_weight': 2.0,
28 | 'batch_size': 24,
29 | 'epoch': 100,
30 | 'decay1': 70,
31 | 'decay2': 90,
32 | 'image_size': 840,
33 | 'pretrain': True,
34 | 'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
35 | 'in_channel': 256,
36 | 'out_channel': 256
37 | }
38 |
39 |
--------------------------------------------------------------------------------
/face_detection/data/data_augment.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import cv2
4 | import numpy as np
5 | from utils.box_utils import matrix_iof
6 |
7 |
8 | def _crop(image, boxes, labels, landm, img_dim):
9 | height, width, _ = image.shape
10 | pad_image_flag = True
11 |
12 | for _ in range(250):
13 | """
14 | if random.uniform(0, 1) <= 0.2:
15 | scale = 1.0
16 | else:
17 | scale = random.uniform(0.3, 1.0)
18 | """
19 | PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
20 | scale = random.choice(PRE_SCALES)
21 | short_side = min(width, height)
22 | w = int(scale * short_side)
23 | h = w
24 |
25 | if width == w:
26 | l = 0
27 | else:
28 | l = random.randrange(width - w)
29 | if height == h:
30 | t = 0
31 | else:
32 | t = random.randrange(height - h)
33 | roi = np.array((l, t, l + w, t + h))
34 |
35 | value = matrix_iof(boxes, roi[np.newaxis])
36 | flag = (value >= 1)
37 | if not flag.any():
38 | continue
39 |
40 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2
41 | mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
42 | boxes_t = boxes[mask_a].copy()
43 | labels_t = labels[mask_a].copy()
44 | landms_t = landm[mask_a].copy()
45 | landms_t = landms_t.reshape([-1, 5, 2])
46 |
47 | if boxes_t.shape[0] == 0:
48 | continue
49 |
50 | image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
51 |
52 | boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
53 | boxes_t[:, :2] -= roi[:2]
54 | boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
55 | boxes_t[:, 2:] -= roi[:2]
56 |
57 | # landm
58 | landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
59 | landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
60 | landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
61 | landms_t = landms_t.reshape([-1, 10])
62 |
63 |
64 | # make sure that the cropped image contains at least one face > 16 pixel at training image scale
65 | b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
66 | b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
67 | mask_b = np.minimum(b_w_t, b_h_t) > 0.0
68 | boxes_t = boxes_t[mask_b]
69 | labels_t = labels_t[mask_b]
70 | landms_t = landms_t[mask_b]
71 |
72 | if boxes_t.shape[0] == 0:
73 | continue
74 |
75 | pad_image_flag = False
76 |
77 | return image_t, boxes_t, labels_t, landms_t, pad_image_flag
78 | return image, boxes, labels, landm, pad_image_flag
79 |
80 |
81 | def _distort(image):
82 |
83 | def _convert(image, alpha=1, beta=0):
84 | tmp = image.astype(float) * alpha + beta
85 | tmp[tmp < 0] = 0
86 | tmp[tmp > 255] = 255
87 | image[:] = tmp
88 |
89 | image = image.copy()
90 |
91 | if random.randrange(2):
92 |
93 | #brightness distortion
94 | if random.randrange(2):
95 | _convert(image, beta=random.uniform(-32, 32))
96 |
97 | #contrast distortion
98 | if random.randrange(2):
99 | _convert(image, alpha=random.uniform(0.5, 1.5))
100 |
101 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
102 |
103 | #saturation distortion
104 | if random.randrange(2):
105 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
106 |
107 | #hue distortion
108 | if random.randrange(2):
109 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
110 | tmp %= 180
111 | image[:, :, 0] = tmp
112 |
113 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
114 |
115 | else:
116 |
117 | #brightness distortion
118 | if random.randrange(2):
119 | _convert(image, beta=random.uniform(-32, 32))
120 |
121 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
122 |
123 | #saturation distortion
124 | if random.randrange(2):
125 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
126 |
127 | #hue distortion
128 | if random.randrange(2):
129 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
130 | tmp %= 180
131 | image[:, :, 0] = tmp
132 |
133 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
134 |
135 | #contrast distortion
136 | if random.randrange(2):
137 | _convert(image, alpha=random.uniform(0.5, 1.5))
138 |
139 | return image
140 |
141 |
142 | def _expand(image, boxes, fill, p):
143 | if random.randrange(2):
144 | return image, boxes
145 |
146 | height, width, depth = image.shape
147 |
148 | scale = random.uniform(1, p)
149 | w = int(scale * width)
150 | h = int(scale * height)
151 |
152 | left = random.randint(0, w - width)
153 | top = random.randint(0, h - height)
154 |
155 | boxes_t = boxes.copy()
156 | boxes_t[:, :2] += (left, top)
157 | boxes_t[:, 2:] += (left, top)
158 | expand_image = np.empty(
159 | (h, w, depth),
160 | dtype=image.dtype)
161 | expand_image[:, :] = fill
162 | expand_image[top:top + height, left:left + width] = image
163 | image = expand_image
164 |
165 | return image, boxes_t
166 |
167 |
168 | def _mirror(image, boxes, landms):
169 | _, width, _ = image.shape
170 | if random.randrange(2):
171 | image = image[:, ::-1]
172 | boxes = boxes.copy()
173 | boxes[:, 0::2] = width - boxes[:, 2::-2]
174 |
175 | # landm
176 | landms = landms.copy()
177 | landms = landms.reshape([-1, 5, 2])
178 | landms[:, :, 0] = width - landms[:, :, 0]
179 | tmp = landms[:, 1, :].copy()
180 | landms[:, 1, :] = landms[:, 0, :]
181 | landms[:, 0, :] = tmp
182 | tmp1 = landms[:, 4, :].copy()
183 | landms[:, 4, :] = landms[:, 3, :]
184 | landms[:, 3, :] = tmp1
185 | landms = landms.reshape([-1, 10])
186 |
187 | return image, boxes, landms
188 |
189 |
190 | def _pad_to_square(image, rgb_mean, pad_image_flag):
191 | if not pad_image_flag:
192 | return image
193 | height, width, _ = image.shape
194 | long_side = max(width, height)
195 | image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
196 | image_t[:, :] = rgb_mean
197 | image_t[0:0 + height, 0:0 + width] = image
198 | return image_t
199 |
200 |
201 | def _resize_subtract_mean(image, insize, rgb_mean):
202 | interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
203 | interp_method = interp_methods[random.randrange(5)]
204 | image = cv2.resize(image, (insize, insize), interpolation=interp_method)
205 | image = image.astype(np.float32)
206 | image -= rgb_mean
207 | return image.transpose(2, 0, 1)
208 |
209 |
210 | class preproc(object):
211 |
212 | def __init__(self, img_dim, rgb_means):
213 | self.img_dim = img_dim
214 | self.rgb_means = rgb_means
215 |
216 | def __call__(self, image, targets):
217 | assert targets.shape[0] > 0, "this image does not have gt"
218 |
219 | boxes = targets[:, :4].copy()
220 | labels = targets[:, -1].copy()
221 | landm = targets[:, 4:-1].copy()
222 |
223 | image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim)
224 | image_t = _distort(image_t)
225 | image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag)
226 | image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
227 | height, width, _ = image_t.shape
228 | image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
229 | boxes_t[:, 0::2] /= width
230 | boxes_t[:, 1::2] /= height
231 |
232 | landm_t[:, 0::2] /= width
233 | landm_t[:, 1::2] /= height
234 |
235 | labels_t = np.expand_dims(labels_t, 1)
236 | targets_t = np.hstack((boxes_t, landm_t, labels_t))
237 |
238 | return image_t, targets_t
239 |
--------------------------------------------------------------------------------
/face_detection/data/wider_face.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import torch
4 | import torch.utils.data as data
5 |
6 |
7 | class WiderFaceDetection(data.Dataset):
8 | def __init__(self, txt_path, preproc=None):
9 | self.preproc = preproc
10 | self.imgs_path = []
11 | self.words = []
12 | f = open(txt_path,'r')
13 | lines = f.readlines()
14 | isFirst = True
15 | labels = []
16 | for line in lines:
17 | line = line.rstrip()
18 | if line.startswith('#'):
19 | if isFirst is True:
20 | isFirst = False
21 | else:
22 | labels_copy = labels.copy()
23 | self.words.append(labels_copy)
24 | labels.clear()
25 | path = line[2:]
26 | path = txt_path.replace('label.txt','images/') + path
27 | self.imgs_path.append(path)
28 | else:
29 | line = line.split(' ')
30 | label = [float(x) for x in line]
31 | labels.append(label)
32 |
33 | self.words.append(labels)
34 |
35 | def __len__(self):
36 | return len(self.imgs_path)
37 |
38 | def __getitem__(self, index):
39 | img = cv2.imread(self.imgs_path[index])
40 | height, width, _ = img.shape
41 |
42 | labels = self.words[index]
43 | annotations = np.zeros((0, 15))
44 | if len(labels) == 0:
45 | return annotations
46 | for idx, label in enumerate(labels):
47 | annotation = np.zeros((1, 15))
48 | # bbox
49 | annotation[0, 0] = label[0] # x1
50 | annotation[0, 1] = label[1] # y1
51 | annotation[0, 2] = label[0] + label[2] # x2
52 | annotation[0, 3] = label[1] + label[3] # y2
53 |
54 | # landmarks
55 | annotation[0, 4] = label[4] # l0_x
56 | annotation[0, 5] = label[5] # l0_y
57 | annotation[0, 6] = label[7] # l1_x
58 | annotation[0, 7] = label[8] # l1_y
59 | annotation[0, 8] = label[10] # l2_x
60 | annotation[0, 9] = label[11] # l2_y
61 | annotation[0, 10] = label[13] # l3_x
62 | annotation[0, 11] = label[14] # l3_y
63 | annotation[0, 12] = label[16] # l4_x
64 | annotation[0, 13] = label[17] # l4_y
65 | if (annotation[0, 4]<0):
66 | annotation[0, 14] = -1
67 | else:
68 | annotation[0, 14] = 1
69 |
70 | annotations = np.append(annotations, annotation, axis=0)
71 | target = np.array(annotations)
72 | if self.preproc is not None:
73 | img, target = self.preproc(img, target)
74 |
75 | return torch.from_numpy(img), target
76 |
77 | @staticmethod
78 | def collate(batch):
79 | """Custom collate fn for dealing with batches of images that have a different
80 | number of associated object annotations (bounding boxes).
81 |
82 | Arguments:
83 | batch: (tuple) A tuple of tensor images and lists of annotations
84 |
85 | Return:
86 | A tuple containing:
87 | 1) (tensor) batch of images stacked on their 0 dim
88 | 2) (list of tensors) annotations for a given image are stacked on 0 dim
89 | """
90 | targets = []
91 | imgs = []
92 | for _, sample in enumerate(batch):
93 | for _, tup in enumerate(sample):
94 | if torch.is_tensor(tup):
95 | imgs.append(tup)
96 | elif isinstance(tup, type(np.empty(0))):
97 | annos = torch.from_numpy(tup).float()
98 | targets.append(annos)
99 |
100 | return torch.stack(imgs, 0), targets
101 |
--------------------------------------------------------------------------------
/face_detection/detect.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import argparse
3 | import os
4 | import time
5 |
6 | import cv2
7 | import numpy as np
8 | import torch
9 | from torchvision.ops import nms
10 |
11 | from model.prior_box import PriorBox
12 | from model.retinaface import RetinaFace
13 | from utils.box_utils import decode, decode_landm
14 | from utils.misc import draw_keypoint
15 |
16 | parser = argparse.ArgumentParser()
17 | parser.add_argument(
18 | '--checkpoint',
19 | default='./weights/mobilenet0.25_final.pt',
20 | help='Trained state_dict file path to open'
21 | )
22 | parser.add_argument(
23 | '--image',
24 | help='Input image file to detect'
25 | )
26 | parser.add_argument(
27 | '--cpu', action="store_true", default=False,
28 | help='Use cpu inference'
29 | )
30 | parser.add_argument(
31 | '--confidence-threshold', type=float, default=0.02,
32 | help='confidence_threshold'
33 | )
34 | parser.add_argument(
35 | '--top-k', type=int, default=5000,
36 | help='top_k'
37 | )
38 | parser.add_argument(
39 | '--nms-threshold', type=float, default=0.4,
40 | help='NMS threshold'
41 | )
42 | parser.add_argument(
43 | '--keep-top-k', type=int, default=750,
44 | help='keep top k'
45 | )
46 | parser.add_argument(
47 | '-s', '--save-image', action="store_true", default=False,
48 | help='show detection results'
49 | )
50 | parser.add_argument(
51 | '--vis-thres', type=float, default=0.6,
52 | help='visualization_threshold'
53 | )
54 |
55 |
56 | @torch.no_grad()
57 | def main():
58 | args = parser.parse_args()
59 | assert os.path.isfile(args.checkpoint)
60 |
61 | checkpoint = torch.load(args.checkpoint, map_location="cpu")
62 | cfg = checkpoint["config"]
63 | device = torch.device("cpu" if args.cpu else "cuda")
64 |
65 | # net and model
66 | net = RetinaFace(**cfg)
67 | net.load_state_dict(checkpoint["net_state_dict"], strict=False)
68 | net.eval().requires_grad_(False)
69 | net.to(device)
70 | print('Finished loading model!')
71 |
72 | resize = 1
73 |
74 | # testing begin
75 | img_raw = cv2.imread(args.image, cv2.IMREAD_COLOR)
76 |
77 | img = np.float32(img_raw)
78 |
79 | im_height, im_width, _ = img.shape
80 | scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
81 | img -= (104, 117, 123)
82 | img = img.transpose(2, 0, 1)
83 | img = torch.from_numpy(img).unsqueeze(0)
84 | img = img.to(device)
85 | scale = scale.to(device)
86 |
87 | tic = time.time()
88 | loc, conf, landms = net(img) # forward pass
89 | print('net forward time: {:.4f}'.format(time.time() - tic))
90 |
91 | priorbox = PriorBox(cfg, image_size=(im_height, im_width))
92 | priors = priorbox.forward()
93 | priors = priors.to(device)
94 | prior_data = priors.data
95 | boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
96 | boxes = boxes * scale / resize
97 | scores = conf.squeeze(0)[:, 1]
98 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
99 | scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
100 | img.shape[3], img.shape[2], img.shape[3], img.shape[2],
101 | img.shape[3], img.shape[2]])
102 | scale1 = scale1.to(device)
103 | landms = landms * scale1 / resize
104 |
105 | # ignore low scores
106 | inds = torch.where(scores > args.confidence_threshold)[0]
107 | boxes = boxes[inds]
108 | landms = landms[inds]
109 | scores = scores[inds]
110 |
111 | # keep top-K before NMS
112 | order = scores.argsort()
113 | boxes = boxes[order][:args.top_k]
114 | landms = landms[order][:args.top_k]
115 | scores = scores[order][:args.top_k]
116 |
117 | # do NMS
118 | keep = nms(boxes, scores, args.nms_threshold)
119 |
120 | boxes = boxes[keep]
121 | scores = scores[keep]
122 | landms = landms[keep]
123 |
124 | boxes = boxes.cpu().numpy()
125 | scores = scores.cpu().numpy()
126 | landms = landms.cpu().numpy()
127 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
128 | dets = np.concatenate((dets, landms), axis=1)
129 |
130 | # save image
131 | if args.save_image:
132 | draw_keypoint(img_raw, dets, args.vis_thres)
133 |
134 | splits = args.image.split(".")
135 | name = ".".join(splits[:-1])
136 | ext = splits[-1]
137 | output = f"{name}_results.{ext}"
138 | cv2.imwrite(output, img_raw)
139 |
140 |
141 | if __name__ == "__main__":
142 | main()
143 |
--------------------------------------------------------------------------------
/face_detection/environment.yml:
--------------------------------------------------------------------------------
1 | name: retinaface
2 | channels:
3 | - pytorch
4 | dependencies:
5 | - cudatoolkit=11.3
6 | - matplotlib
7 | - pip
8 | - python=3.9
9 | - pytorch::pytorch=1.10.1
10 | - pytorch::torchvision
11 | - scipy
12 | - tqdm
13 | - pip:
14 | - opencv-python-headless
15 |
--------------------------------------------------------------------------------
/face_detection/model/multibox_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from utils.box_utils import match, log_sum_exp
5 |
6 |
7 | class MultiBoxLoss(nn.Module):
8 | """SSD Weighted Loss Function
9 | Compute Targets:
10 | 1) Produce Confidence Target Indices by matching ground truth boxes
11 | with (default) 'priorboxes' that have jaccard index > threshold parameter
12 | (default threshold: 0.5).
13 | 2) Produce localization target by 'encoding' variance into offsets of ground
14 | truth boxes and their matched 'priorboxes'.
15 | 3) Hard negative mining to filter the excessive number of negative examples
16 | that comes with using a large number of default bounding boxes.
17 | (default negative:positive ratio 3:1)
18 | Objective Loss:
19 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
20 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
21 | weighted by α which is set to 1 by cross val.
22 | Args:
23 | c: class confidences,
24 | l: predicted boxes,
25 | g: ground truth boxes
26 | N: number of matched default boxes
27 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
28 | """
29 |
30 | def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target):
31 | super().__init__()
32 | self.num_classes = num_classes
33 | self.threshold = overlap_thresh
34 | self.background_label = bkg_label
35 | self.encode_target = encode_target
36 | self.use_prior_for_matching = prior_for_matching
37 | self.do_neg_mining = neg_mining
38 | self.negpos_ratio = neg_pos
39 | self.neg_overlap = neg_overlap
40 | self.variance = [0.1, 0.2]
41 |
42 | def forward(self, predictions, priors, targets):
43 | """Multibox Loss
44 | Args:
45 | predictions (tuple): A tuple containing loc preds, conf preds,
46 | and prior boxes from SSD net.
47 | loc shape: torch.size(batch_size,num_priors,4)
48 | conf shape: torch.size(batch_size,num_priors,num_classes)
49 | landm shape: torch.size(batch_size,num_priors,10)
50 | priors shape: torch.size(num_priors,4)
51 |
52 | ground_truth (tensor): Ground truth boxes and labels for a batch,
53 | shape: [batch_size,num_objs,5] (last idx is the label).
54 | """
55 |
56 | loc_data, conf_data, landm_data = predictions
57 | priors = priors
58 | num = loc_data.size(0)
59 | num_priors = (priors.size(0))
60 | device = loc_data.device
61 |
62 | # match priors (default boxes) and ground truth boxes
63 | loc_t = torch.Tensor(num, num_priors, 4)
64 | landm_t = torch.Tensor(num, num_priors, 10)
65 | conf_t = torch.LongTensor(num, num_priors)
66 | for idx in range(num):
67 | truths = targets[idx][:, :4].data
68 | labels = targets[idx][:, -1].data
69 | landms = targets[idx][:, 4:14].data
70 | defaults = priors.data
71 | match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx)
72 |
73 | loc_t = loc_t.to(device)
74 | conf_t = conf_t.to(device)
75 | landm_t = landm_t.to(device)
76 | zeros = torch.tensor(0, device=device)
77 |
78 | # NOTE: landm Loss (Smooth L1)
79 | # Shape: [batch,num_priors,10]
80 | pos1 = conf_t > zeros
81 | num_pos_landm = pos1.long().sum(1, keepdim=True)
82 | N1 = max(num_pos_landm.data.sum().float(), 1)
83 | pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
84 | landm_p = landm_data[pos_idx1].view(-1, 10)
85 | landm_t = landm_t[pos_idx1].view(-1, 10)
86 | loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')
87 |
88 |
89 | pos = conf_t != zeros
90 | conf_t[pos] = 1
91 |
92 | # NOTE: Localization Loss (Smooth L1)
93 | # Shape: [batch,num_priors,4]
94 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
95 | loc_p = loc_data[pos_idx].view(-1, 4)
96 | loc_t = loc_t[pos_idx].view(-1, 4)
97 | loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
98 |
99 | # Compute max conf across batch for hard negative mining
100 | batch_conf = conf_data.view(-1, self.num_classes)
101 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
102 |
103 | # NOTE: Hard Negative Mining
104 | loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
105 | loss_c = loss_c.view(num, -1)
106 | _, loss_idx = loss_c.sort(1, descending=True)
107 | _, idx_rank = loss_idx.sort(1)
108 | num_pos = pos.long().sum(1, keepdim=True)
109 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
110 | neg = idx_rank < num_neg.expand_as(idx_rank)
111 |
112 | # Confidence Loss Including Positive and Negative Examples
113 | pos_idx = pos.unsqueeze(2).expand_as(conf_data)
114 | neg_idx = neg.unsqueeze(2).expand_as(conf_data)
115 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
116 | targets_weighted = conf_t[(pos+neg).gt(0)]
117 | loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
118 |
119 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
120 | N = max(num_pos.data.sum().float(), 1)
121 | loss_l /= N
122 | loss_c /= N
123 | loss_landm /= N1
124 |
125 | return loss_l, loss_c, loss_landm
126 |
--------------------------------------------------------------------------------
/face_detection/model/networks.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | def conv3_bn(inp, oup, stride):
7 | return nn.Sequential(
8 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
9 | nn.BatchNorm2d(oup),
10 | )
11 |
12 |
13 | def conv3_bn_lrelu(inp, oup, stride=1, leaky=0):
14 | return nn.Sequential(
15 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
16 | nn.BatchNorm2d(oup),
17 | nn.LeakyReLU(negative_slope=leaky, inplace=True),
18 | )
19 |
20 |
21 | def conv1_bn(inp, oup, stride, leaky=0):
22 | return nn.Sequential(
23 | nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
24 | nn.BatchNorm2d(oup),
25 | nn.LeakyReLU(negative_slope=leaky, inplace=True),
26 | )
27 |
28 |
29 | def conv_dw(inp, oup, stride, leaky=0.1):
30 | return nn.Sequential(
31 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
32 | nn.BatchNorm2d(inp),
33 | nn.LeakyReLU(negative_slope=leaky, inplace=True),
34 |
35 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
36 | nn.BatchNorm2d(oup),
37 | nn.LeakyReLU(negative_slope=leaky, inplace=True),
38 | )
39 |
40 |
41 | class SSH(nn.Module):
42 | # SSH: Single Stage Headless Face Detector
43 | # https://arxiv.org/abs/1708.03979
44 | def __init__(self, in_channels, out_channels):
45 | super().__init__()
46 | assert out_channels % 4 == 0
47 | leaky = 0
48 | if (out_channels <= 64):
49 | leaky = 0.1
50 | self.conv3X3 = conv3_bn(in_channels, out_channels//2, stride=1)
51 |
52 | self.conv5X5_1 = conv3_bn_lrelu(in_channels, out_channels//4, stride=1, leaky=leaky)
53 | self.conv5X5_2 = conv3_bn(out_channels//4, out_channels//4, stride=1)
54 |
55 | self.conv7X7_2 = conv3_bn_lrelu(out_channels//4, out_channels//4, stride=1, leaky=leaky)
56 | self.conv7x7_3 = conv3_bn(out_channels//4, out_channels//4, stride=1)
57 |
58 | def forward(self, input):
59 | conv3X3 = self.conv3X3(input)
60 |
61 | conv5X5_1 = self.conv5X5_1(input)
62 | conv5X5 = self.conv5X5_2(conv5X5_1)
63 |
64 | conv7X7_2 = self.conv7X7_2(conv5X5_1)
65 | conv7X7 = self.conv7x7_3(conv7X7_2)
66 |
67 | out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
68 | out = F.relu(out)
69 | return out
70 |
71 |
72 | class MobileNetV1(nn.Module):
73 | def __init__(self, num_classes=1000, width=0.25):
74 | super().__init__()
75 | self.stage1 = nn.Sequential(
76 | conv3_bn_lrelu(3, round(width*32), 2, leaky=0.1), # 3
77 | conv_dw(round(width*32), round(width*64), 1), # 7
78 | conv_dw(round(width*64), round(width*128), 2), # 11
79 | conv_dw(round(width*128), round(width*128), 1), # 19
80 | conv_dw(round(width*128), round(width*256), 2), # 27
81 | conv_dw(round(width*256), round(width*256), 1), # 43
82 | )
83 | self.stage2 = nn.Sequential(
84 | conv_dw(round(width*256), round(width*512), 2), # 43 + 16 = 59
85 | conv_dw(round(width*512), round(width*512), 1), # 59 + 32 = 91
86 | conv_dw(round(width*512), round(width*512), 1), # 91 + 32 = 123
87 | conv_dw(round(width*512), round(width*512), 1), # 123 + 32 = 155
88 | conv_dw(round(width*512), round(width*512), 1), # 155 + 32 = 187
89 | conv_dw(round(width*512), round(width*512), 1), # 187 + 32 = 219
90 | )
91 | self.stage3 = nn.Sequential(
92 | conv_dw(round(width*512), round(width*1024), 2), # 219 +3 2 = 241
93 | conv_dw(round(width*1024), round(width*1024), 1), # 241 + 64 = 301
94 | )
95 | self.avg = nn.AdaptiveAvgPool2d((1,1))
96 | self.fc = nn.Linear(256, num_classes)
97 |
98 | def forward(self, x):
99 | x = self.stage1(x)
100 | x = self.stage2(x)
101 | x = self.stage3(x)
102 | x = self.avg(x).view(-1, 256)
103 | x = self.fc(x)
104 | return x
105 |
--------------------------------------------------------------------------------
/face_detection/model/prior_box.py:
--------------------------------------------------------------------------------
1 | from itertools import product
2 | from math import ceil
3 |
4 | import torch
5 |
6 |
7 | class PriorBox:
8 | def __init__(self, cfg, image_size=None):
9 | self.min_sizes = cfg['min_sizes']
10 | self.steps = cfg['steps']
11 | self.clip = cfg['clip']
12 | self.image_size = image_size
13 | self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
14 | self.name = "s"
15 |
16 | def forward(self):
17 | anchors = []
18 | for k, f in enumerate(self.feature_maps):
19 | min_sizes = self.min_sizes[k]
20 | for i, j in product(range(f[0]), range(f[1])):
21 | for min_size in min_sizes:
22 | s_kx = min_size / self.image_size[1]
23 | s_ky = min_size / self.image_size[0]
24 | dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
25 | dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
26 | for cy, cx in product(dense_cy, dense_cx):
27 | anchors += [cx, cy, s_kx, s_ky]
28 |
29 | # back to torch land
30 | output = torch.Tensor(anchors).view(-1, 4)
31 | if self.clip:
32 | output.clamp_(min=0, max=1)
33 | return output
34 |
--------------------------------------------------------------------------------
/face_detection/model/retinaface.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torchvision.models.feature_extraction import create_feature_extractor
5 | from torchvision.models import quantization
6 | from torchvision.ops import FeaturePyramidNetwork
7 |
8 | from .networks import SSH, MobileNetV1
9 |
10 |
11 | class ClassHead(nn.Conv2d):
12 | def __init__(self, in_channels=512, num_anchors=3):
13 | super().__init__(in_channels, num_anchors*2, kernel_size=1)
14 | self.num_anchors = num_anchors
15 |
16 | def forward(self, input):
17 | out = self._conv_forward(input, self.weight, self.bias)
18 | out = out.permute(0, 2, 3, 1).contiguous()
19 | return out.view(out.size(0), -1, 2)
20 |
21 |
22 | class BboxHead(nn.Conv2d):
23 | def __init__(self, in_channels=512, num_anchors=3):
24 | super().__init__(in_channels, num_anchors*4, kernel_size=1)
25 |
26 | def forward(self, input):
27 | out = self._conv_forward(input, self.weight, self.bias)
28 | out = out.permute(0, 2, 3, 1).contiguous()
29 | return out.view(out.size(0), -1, 4)
30 |
31 |
32 | class LandmarkHead(nn.Conv2d):
33 | def __init__(self, in_channels=512, num_anchors=3):
34 | super().__init__(in_channels, num_anchors*10, kernel_size=1)
35 |
36 | def forward(self, input):
37 | out = self._conv_forward(input, self.weight, self.bias)
38 | out = out.permute(0, 2, 3, 1).contiguous()
39 | return out.view(out.size(0), -1, 10)
40 |
41 |
42 | class RetinaFace(nn.Module):
43 | def __init__(self, backbone, in_channel, out_channel, **kwargs):
44 | super().__init__()
45 | assert backbone in ("mobilenet0.25", "resnet50")
46 | if backbone == "mobilenet0.25":
47 | model = MobileNetV1()
48 | ckpt_file = "./weights/mobilenet0.25_pretrain.pt"
49 | try:
50 | checkpoint = torch.load(ckpt_file, map_location="cpu")
51 | from collections import OrderedDict
52 | new_state_dict = OrderedDict()
53 | for k, v in checkpoint['state_dict'].items():
54 | name = k[7:] # remove module.
55 | new_state_dict[name] = v
56 | # load params
57 | model.load_state_dict(new_state_dict)
58 | except:
59 | print(f"{ckpt_file} not found!")
60 | return_nodes={
61 | "stage1": "feat0",
62 | "stage2": "feat1",
63 | "stage3": "feat2",
64 | }
65 | else:
66 | import torchvision.models as models
67 | model = models.resnet50(pretrained=True)
68 | return_nodes={
69 | "layer2": "feat0",
70 | "layer3": "feat1",
71 | "layer4": "feat2",
72 | }
73 |
74 | self.body = create_feature_extractor(model, return_nodes=return_nodes)
75 | in_channels_stage2 = in_channel
76 | in_channels_list = [
77 | in_channels_stage2 * 2,
78 | in_channels_stage2 * 4,
79 | in_channels_stage2 * 8,
80 | ]
81 | out_channels = out_channel
82 | self.fpn = FeaturePyramidNetwork(in_channels_list, out_channels)
83 | self.ssh1 = SSH(out_channels, out_channels)
84 | self.ssh2 = SSH(out_channels, out_channels)
85 | self.ssh3 = SSH(out_channels, out_channels)
86 |
87 | fpn_num = len(in_channels_list)
88 | self.class_head = self._make_class_head(fpn_num=fpn_num, in_channels=out_channels)
89 | self.bbox_head = self._make_bbox_head(fpn_num=fpn_num, in_channels=out_channels)
90 | self.landmark_head = self._make_landmark_head(fpn_num=fpn_num, in_channels=out_channels)
91 |
92 | def _make_class_head(self, fpn_num=3, in_channels=64, anchor_num=2):
93 | classhead = nn.ModuleList()
94 | for i in range(fpn_num):
95 | classhead.append(ClassHead(in_channels, anchor_num))
96 | return classhead
97 |
98 | def _make_bbox_head(self, fpn_num=3, in_channels=64, anchor_num=2):
99 | bboxhead = nn.ModuleList()
100 | for i in range(fpn_num):
101 | bboxhead.append(BboxHead(in_channels, anchor_num))
102 | return bboxhead
103 |
104 | def _make_landmark_head(self, fpn_num=3, in_channels=64, anchor_num=2):
105 | landmarkhead = nn.ModuleList()
106 | for i in range(fpn_num):
107 | landmarkhead.append(LandmarkHead(in_channels, anchor_num))
108 | return landmarkhead
109 |
110 | def forward(self, inputs):
111 | out = self.body(inputs)
112 |
113 | # FPN
114 | out = self.fpn(out)
115 |
116 | # SSH
117 | feature0 = self.ssh1(out["feat0"])
118 | feature1 = self.ssh2(out["feat1"])
119 | feature2 = self.ssh3(out["feat2"])
120 |
121 | bbox_regressions = torch.cat([
122 | self.bbox_head[0](feature0),
123 | self.bbox_head[1](feature1),
124 | self.bbox_head[2](feature2),
125 | ], dim=1)
126 |
127 | classifications = torch.cat([
128 | self.class_head[0](feature0),
129 | self.class_head[1](feature1),
130 | self.class_head[2](feature2),
131 | ], dim=1)
132 |
133 | lm_regressions = torch.cat([
134 | self.landmark_head[0](feature0),
135 | self.landmark_head[1](feature1),
136 | self.landmark_head[2](feature2),
137 | ], dim=1)
138 |
139 | if not self.training:
140 | classifications = F.softmax(classifications, dim=-1)
141 | return bbox_regressions, classifications, lm_regressions
142 |
143 | def fuse_model(self) -> None:
144 | for m in self.modules():
145 | if type(m) == quantization.mobilenetv2.QuantizableMobileNetV2:
146 | m.fuse_model()
147 | elif type(m) == quantization.mobilenetv3.QuantizableMobileNetV3:
148 | m.fuse_model()
149 |
--------------------------------------------------------------------------------
/face_detection/test_fddb.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import argparse
3 | import os
4 |
5 | import cv2
6 | import numpy as np
7 | import torch
8 | from torchvision.ops import nms
9 |
10 | from model.prior_box import PriorBox
11 | from model.retinaface import RetinaFace
12 | from utils.box_utils import decode, decode_landm
13 | from utils.misc import draw_keypoint
14 | from utils.timer import Timer
15 |
16 | parser = argparse.ArgumentParser(description='Retinaface')
17 | parser.add_argument(
18 | '--checkpoint', type=str,
19 | default='./weights/mobilenet0.25_final.pt',
20 | help='Trained state_dict file path to open'
21 | )
22 | parser.add_argument('--save-folder', default='eval/', type=str, help='Dir to save results')
23 | parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference')
24 | parser.add_argument('--jit', action="store_true", default=False, help='Use JIT')
25 | parser.add_argument('--confidence-threshold', default=0.02, type=float, help='confidence_threshold')
26 | parser.add_argument('--top-k', default=5000, type=int, help='top_k')
27 | parser.add_argument('--nms-threshold', default=0.4, type=float, help='nms_threshold')
28 | parser.add_argument('--keep-top-k', default=750, type=int, help='keep_top_k')
29 | parser.add_argument('-s', '--save-image', action="store_true", default=False, help='show detection results')
30 | parser.add_argument('--vis-thres', default=0.5, type=float, help='visualization_threshold')
31 |
32 |
33 | def main():
34 | args = parser.parse_args()
35 | assert os.path.isfile(args.checkpoint)
36 |
37 | checkpoint = torch.load(args.checkpoint, map_location="cpu")
38 | cfg = checkpoint["config"]
39 | device = torch.device("cpu" if args.cpu else "cuda")
40 |
41 | # net and model
42 | net = RetinaFace(**cfg)
43 | net.load_state_dict(checkpoint["net_state_dict"])
44 | net.eval().requires_grad_(False)
45 | net.to(device)
46 | if args.jit:
47 | net = torch.jit.script(net)
48 | print('Finished loading model!')
49 | torch.backends.cudnn.benchmark = True
50 |
51 | # save file
52 | os.makedirs(args.save_folder, exist_ok=True)
53 | fw = open(os.path.join(args.save_folder, 'FDDB_dets.txt'), 'w')
54 |
55 | # testing dataset
56 | testset_folder = 'data/FDDB/images/'
57 | testset_list = 'data/FDDB/img_list.txt'
58 | with open(testset_list, 'r') as fr:
59 | test_dataset = fr.read().split()
60 | num_images = len(test_dataset)
61 |
62 | # testing scale
63 | resize = 1
64 |
65 | _t = {
66 | "preprocess": Timer(),
67 | "forward": Timer(),
68 | "postprocess": Timer(),
69 | "misc": Timer(),
70 | }
71 |
72 | # testing begin
73 | for i, img_name in enumerate(test_dataset):
74 | image_path = testset_folder + img_name + '.jpg'
75 | img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
76 |
77 | # NOTE preprocessing.
78 | _t["preprocess"].tic()
79 | img = img_raw - (104, 117, 123)
80 | if resize != 1:
81 | img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
82 | im_height, im_width, _ = img.shape
83 | scale = torch.as_tensor(
84 | [im_width, im_height, im_width, im_height],
85 | dtype=torch.float, device=device
86 | )
87 | img = img.transpose(2, 0, 1)
88 | img = np.float32(img)
89 | img = torch.from_numpy(img).unsqueeze(0)
90 | img = img.to(device)
91 | _t["preprocess"].toc()
92 |
93 | # NOTE forward.
94 | _t["forward"].tic()
95 | loc, conf, landms = net(img) # forward pass
96 | _t["forward"].toc()
97 |
98 | # NOTE misc.
99 | _t["postprocess"].tic()
100 | priorbox = PriorBox(cfg, image_size=(im_height, im_width))
101 | priors = priorbox.forward()
102 | priors = priors.to(device)
103 | prior_data = priors.data
104 | boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
105 | boxes = boxes * scale / resize
106 | scores = conf.squeeze(0)[:, 1]
107 |
108 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
109 | scale1 = torch.as_tensor(
110 | [im_width, im_height] * 5, dtype=torch.float, device=device
111 | )
112 | scale1 = scale1.to(device)
113 | landms = landms * scale1 / resize
114 |
115 | # ignore low scores
116 | inds = torch.where(scores > args.confidence_threshold)[0]
117 | boxes = boxes[inds]
118 | landms = landms[inds]
119 | scores = scores[inds]
120 |
121 | # keep top-K before NMS
122 | order = scores.argsort()
123 | boxes = boxes[order][:args.top_k]
124 | landms = landms[order][:args.top_k]
125 | scores = scores[order][:args.top_k]
126 | _t["postprocess"].toc()
127 |
128 | # do NMS
129 | _t["misc"].tic()
130 | keep = nms(boxes, scores, args.nms_threshold)
131 | boxes = boxes[keep]
132 | scores = scores[keep]
133 | landms = landms[keep]
134 |
135 | boxes = boxes.cpu().numpy()
136 | scores = scores.cpu().numpy()
137 | landms = landms.cpu().numpy()
138 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
139 | dets = np.concatenate((dets, landms), axis=1)
140 | _t["misc"].toc()
141 |
142 | # save dets
143 | fw.write(f'{img_name:s}\n')
144 | fw.write(f'{dets.shape[0]:.1f}\n')
145 | for k in range(dets.shape[0]):
146 | xmin, ymin, xmax, ymax = dets[k, :4]
147 | score = dets[k, 4]
148 | w = xmax - xmin + 1
149 | h = ymax - ymin + 1
150 | # fw.write('{:.3f} {:.3f} {:.3f} {:.3f} {:.10f}\n'.format(xmin, ymin, w, h, score))
151 | fw.write('{:d} {:d} {:d} {:d} {:.10f}\n'.format(int(xmin), int(ymin), int(w), int(h), score))
152 |
153 | print(
154 | f"im_detect: {i+1:d}/{num_images:d}\t"
155 | f"preprocess_time: {_t['preprocess'].average_time:.4f}s\t"
156 | f"forward_time: {_t['forward'].average_time:.4f}s\t"
157 | f"postprocess_time: {_t['postprocess'].average_time:.4f}s\t"
158 | f"misc_time: {_t['misc'].average_time:.4f}s"
159 | )
160 |
161 | # show image
162 | if args.save_image:
163 | draw_keypoint(img_raw, dets, args.vis_thres)
164 | # save image
165 | if not os.path.exists("./results/"):
166 | os.makedirs("./results/")
167 | cv2.imwrite(f"./results/{i:05d}.jpg", img_raw)
168 |
169 | fw.close()
170 |
171 |
172 | if __name__ == "__main__":
173 | main()
174 |
--------------------------------------------------------------------------------
/face_detection/test_widerface.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import argparse
3 | import os
4 |
5 | import cv2
6 | import numpy as np
7 | import torch
8 | from torchvision.ops import nms
9 |
10 | from model.prior_box import PriorBox
11 | from model.retinaface import RetinaFace
12 | from utils.box_utils import decode, decode_landm
13 | from utils.misc import draw_keypoint
14 | from utils.timer import Timer
15 |
16 | parser = argparse.ArgumentParser(description='Retinaface')
17 | parser.add_argument(
18 | '--checkpoint', type=str,
19 | default='./weights/mobilenet0.25_final.pt',
20 | help='Trained state_dict file path to open'
21 | )
22 | parser.add_argument('--origin-size', default=True, type=str, help='Whether use origin image size to evaluate')
23 | parser.add_argument('--save-folder', default='./widerface_evaluate/widerface_txt/', type=str, help='Dir to save txt results')
24 | parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference')
25 | parser.add_argument('--jit', action="store_true", default=False, help='Use JIT')
26 | parser.add_argument('--dataset-folder', default='./data/widerface/val/images/', type=str, help='dataset path')
27 | parser.add_argument('--confidence-threshold', default=0.02, type=float, help='confidence_threshold')
28 | parser.add_argument('--top-k', default=5000, type=int, help='top_k')
29 | parser.add_argument('--nms-threshold', default=0.4, type=float, help='nms_threshold')
30 | parser.add_argument('--keep-top-k', default=750, type=int, help='keep_top_k')
31 | parser.add_argument('-s', '--save-image', action="store_true", default=False, help='show detection results')
32 | parser.add_argument('--vis-thres', default=0.5, type=float, help='visualization_threshold')
33 |
34 |
35 | def main():
36 | args = parser.parse_args()
37 | assert os.path.isfile(args.checkpoint)
38 |
39 | checkpoint = torch.load(args.checkpoint, map_location="cpu")
40 | cfg = checkpoint["config"]
41 | device = torch.device("cpu" if args.cpu else "cuda")
42 |
43 | # net and model
44 | net = RetinaFace(**cfg)
45 | net.load_state_dict(checkpoint["net_state_dict"])
46 | net.eval().requires_grad_(False)
47 | net.to(device)
48 | if args.jit:
49 | net = torch.jit.script(net)
50 | print('Finished loading model!')
51 | torch.backends.cudnn.benchmark = True
52 |
53 | # testing dataset
54 | testset_folder = args.dataset_folder
55 | testset_list = args.dataset_folder[:-7] + "wider_val.txt"
56 |
57 | with open(testset_list, 'r') as fr:
58 | test_dataset = fr.read().split()
59 | num_images = len(test_dataset)
60 | os.makedirs("./results/", exist_ok=True)
61 |
62 | target_size = 1600.0
63 | max_size = 2150.0
64 | _t = {'forward_pass': Timer(), 'misc': Timer()}
65 | # testing begin
66 | for i, img_name in enumerate(test_dataset):
67 | image_path = testset_folder + img_name
68 | img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
69 | img = np.float32(img_raw)
70 |
71 | # testing scale
72 | im_shape = img.shape
73 | im_size_min = np.min(im_shape[0:2])
74 | im_size_max = np.max(im_shape[0:2])
75 | resize = target_size / im_size_min
76 | # prevent bigger axis from being more than max_size:
77 | if np.round(resize * im_size_max) > max_size:
78 | resize = float(max_size) / float(im_size_max)
79 | if args.origin_size:
80 | resize = 1
81 |
82 | if resize != 1:
83 | img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
84 | im_height, im_width, _ = img.shape
85 | scale = torch.as_tensor(
86 | [im_width, im_height, im_width, im_height],
87 | dtype=torch.float, device=device
88 | )
89 | img -= (104, 117, 123)
90 | img = img.transpose(2, 0, 1)
91 | img = torch.from_numpy(img).unsqueeze(0)
92 | img = img.to(device)
93 |
94 | _t['forward_pass'].tic()
95 | loc, conf, landms = net(img) # forward pass
96 | _t['forward_pass'].toc()
97 | _t['misc'].tic()
98 | priorbox = PriorBox(cfg, image_size=(im_height, im_width))
99 | priors = priorbox.forward()
100 | priors = priors.to(device)
101 | prior_data = priors.data
102 | boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
103 | boxes = boxes * scale / resize
104 | scores = conf.squeeze(0)[:, 1]
105 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
106 | scale1 = torch.as_tensor(
107 | [im_width, im_height] * 5, dtype=torch.float, device=device
108 | )
109 | landms = landms * scale1 / resize
110 |
111 | # ignore low scores
112 | inds = torch.where(scores > args.confidence_threshold)[0]
113 | boxes = boxes[inds]
114 | landms = landms[inds]
115 | scores = scores[inds]
116 |
117 | # keep top-K before NMS
118 | order = scores.argsort()
119 | boxes = boxes[order][:args.top_k]
120 | landms = landms[order][:args.top_k]
121 | scores = scores[order][:args.top_k]
122 |
123 | # do NMS
124 | keep = nms(boxes, scores, args.nms_threshold)
125 | boxes = boxes[keep]
126 | scores = scores[keep]
127 | landms = landms[keep]
128 |
129 | boxes = boxes.cpu().numpy()
130 | scores = scores.cpu().numpy()
131 | landms = landms.cpu().numpy()
132 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
133 | dets = np.concatenate((dets, landms), axis=1)
134 | _t['misc'].toc()
135 |
136 | # --------------------------------------------------------------------
137 | save_name = args.save_folder + img_name[:-4] + ".txt"
138 | dirname = os.path.dirname(save_name)
139 | if not os.path.isdir(dirname):
140 | os.makedirs(dirname)
141 | with open(save_name, "w") as fd:
142 | bboxs = dets
143 | file_name = os.path.basename(save_name)[:-4] + "\n"
144 | bboxs_num = str(len(bboxs)) + "\n"
145 | fd.write(file_name)
146 | fd.write(bboxs_num)
147 | for box in bboxs:
148 | x = int(box[0])
149 | y = int(box[1])
150 | w = int(box[2]) - int(box[0])
151 | h = int(box[3]) - int(box[1])
152 | confidence = str(box[4])
153 | line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + confidence + " \n"
154 | fd.write(line)
155 |
156 | print(f"im_detect: {i+1:d}/{num_images:d}"
157 | f"forward_pass_time: {_t['forward_pass'].average_time:.4f}s misc: {_t['misc'].average_time:.4f}s")
158 |
159 | # save image
160 | if args.save_image:
161 | draw_keypoint(img_raw, dets, args.vis_thres)
162 |
163 | # save image
164 | cv2.imwrite(f"./results/{i:05d}.jpg", img_raw)
165 |
166 |
167 | if __name__ == "__main__":
168 | main()
169 |
--------------------------------------------------------------------------------
/face_detection/train_detector.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import argparse
3 | import datetime
4 | import math
5 | import os
6 | import time
7 |
8 | import torch
9 |
10 | from data import WiderFaceDetection, cfg_mnet, cfg_re50, preproc
11 | from model.multibox_loss import MultiBoxLoss
12 | from model.prior_box import PriorBox
13 | from model.retinaface import RetinaFace
14 |
15 | parser = argparse.ArgumentParser(description='Retinaface Training')
16 | parser.add_argument('--dataset', default='./data/widerface/train/label.txt', help='Training dataset directory')
17 | parser.add_argument('--network', default='mobilenet0.25', choices={"mobilenet0.25", "resnet50"})
18 | parser.add_argument('--batch-size', default=32, help='Batch size')
19 | parser.add_argument('--num-workers', default=4, type=int, help='Number of workers used in dataloading')
20 | parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, help='initial learning rate')
21 | parser.add_argument('--momentum', default=0.9, type=float, help='momentum')
22 | parser.add_argument('--resume-net', default=None, help='resume net for retraining')
23 | parser.add_argument('--resume-epoch', default=0, type=int, help='resume iter for retraining')
24 | parser.add_argument('--weight-decay', default=5e-4, type=float, help='Weight decay for SGD')
25 | parser.add_argument('--gamma', default=0.1, type=float, help='Gamma update for SGD')
26 | parser.add_argument('--save-folder', default='./weights/', help='Location to save checkpoint models')
27 | args = parser.parse_args()
28 |
29 |
30 | os.makedirs(args.save_folder, exist_ok=True)
31 | if args.network == "mobilenet0.25":
32 | cfg = cfg_mnet
33 | elif args.network == "resnet50":
34 | cfg = cfg_re50
35 |
36 | RGB_MEAN = (104, 117, 123) # bgr order
37 | img_dim = cfg['image_size']
38 | batch_size = cfg['batch_size']
39 | max_epoch = cfg['epoch']
40 |
41 | initial_lr = args.lr
42 | gamma = args.gamma
43 | training_dataset = args.dataset
44 | save_folder = args.save_folder
45 |
46 |
47 | def initialize_network(cfg, checkpoint=None, print_net=False):
48 | net = RetinaFace(**cfg)
49 | if print_net:
50 | print("Printing net...")
51 | print(net)
52 | if checkpoint is not None:
53 | print('Loading resume network...')
54 | net.load_state_dict(checkpoint["net_state_dict"])
55 |
56 | if torch.cuda.is_available():
57 | net.cuda()
58 | num_gpu = torch.cuda.device_count()
59 | if num_gpu > 1:
60 | net = torch.nn.DataParallel(net)
61 | return cfg, net
62 |
63 |
64 | def training_loop(net, optimizer, criterion, dataloader, cfg):
65 | assert isinstance(net, torch.nn.Module)
66 | assert isinstance(optimizer, torch.optim.Optimizer)
67 | assert isinstance(dataloader, torch.utils.data.DataLoader)
68 | assert isinstance(cfg, dict)
69 |
70 | priorbox = PriorBox(cfg, image_size=(cfg['image_size'],)*2)
71 | with torch.no_grad():
72 | priors = priorbox.forward()
73 | priors = priors.cuda()
74 |
75 | net.train()
76 | epoch = 0 + args.resume_epoch
77 | print('Loading Dataset...')
78 |
79 | epoch_size = math.ceil(len(dataloader))
80 | max_iter = max_epoch * epoch_size
81 |
82 | stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size)
83 | step_index = 0
84 |
85 | start_iter = 0
86 | if args.resume_epoch > 0:
87 | start_iter += args.resume_epoch * epoch_size
88 |
89 | for iteration in range(start_iter, max_iter):
90 | load_t0 = time.perf_counter()
91 | if iteration in stepvalues:
92 | step_index += 1
93 | lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size)
94 |
95 | # load train data
96 | try:
97 | images, targets = next(batch_iterator)
98 | except:
99 | batch_iterator = iter(dataloader)
100 | if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']):
101 | net_state_dict = net.module.state_dict() if hasattr(net, "module") else net.state_dict()
102 | torch.save(
103 | {
104 | "net_state_dict": net_state_dict,
105 | "epoch": epoch,
106 | "config": cfg,
107 | }, save_folder + f"{cfg['backbone']}_epoch{epoch:03d}.pt"
108 | )
109 | epoch += 1
110 | images, targets = next(batch_iterator)
111 |
112 | images = images.cuda()
113 | targets = [anno.cuda() for anno in targets]
114 |
115 | # forward
116 | out = net(images)
117 |
118 | # backprop
119 | optimizer.zero_grad(set_to_none=True)
120 | loss_l, loss_c, loss_landm = criterion(out, priors, targets)
121 | loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm
122 | loss.backward()
123 | optimizer.step()
124 |
125 | load_t1 = time.perf_counter()
126 | if (iteration + 1) % 10 == 0:
127 | batch_time = load_t1 - load_t0
128 | eta = int(batch_time * (max_iter - iteration))
129 | print(
130 | f"Epoch:{epoch:03d}/{max_epoch:03d} "
131 | f'|| Epochiter: {(iteration % epoch_size)+1}/{epoch_size} '
132 | f'|| Iter: {iteration+1}/{max_iter} '
133 | f'|| Loc: {loss_l.item():.3f} Cla: {loss_c.item():.3f} Landm: {loss_landm.item():.3f} '
134 | f'|| LR: {lr:.8f} || Batchtime: {batch_time:.4f} s '
135 | f'|| ETA: {str(datetime.timedelta(seconds=eta))}'
136 | )
137 |
138 | net_state_dict = net.module.state_dict() if hasattr(net, "module") else net.state_dict()
139 | torch.save(
140 | {
141 | "net_state_dict": net_state_dict,
142 | "epoch": epoch,
143 | "config": cfg,
144 | }, save_folder + f"{cfg['backbone']}_final.pt"
145 | )
146 |
147 |
148 | def adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size):
149 | """Sets the learning rate
150 | # Adapted from PyTorch Imagenet example:
151 | # https://github.com/pytorch/examples/blob/master/imagenet/main.py
152 | """
153 | warmup_epoch = -1
154 | if epoch <= warmup_epoch:
155 | lr = 1e-6 + (initial_lr-1e-6) * iteration / (epoch_size * warmup_epoch)
156 | else:
157 | lr = initial_lr * (gamma ** (step_index))
158 | for param_group in optimizer.param_groups:
159 | param_group['lr'] = lr
160 | return lr
161 |
162 |
163 | def main():
164 | if args.resume_net is not None and os.path.isfile(args.resume_net):
165 | checkpoint = torch.load(args.resume_net, map_location="cpu")
166 | cfg = checkpoint["config"]
167 | else:
168 | checkpoint = None
169 | if args.network == "mobilenet0.25":
170 | cfg = cfg_mnet
171 | elif args.network == "resnet50":
172 | cfg = cfg_re50
173 |
174 | cfg, net = initialize_network(cfg, checkpoint)
175 | torch.backends.cudnn.benchmark = True
176 |
177 | optimizer = torch.optim.SGD(
178 | net.parameters(), lr=initial_lr,
179 | momentum=args.momentum, weight_decay=args.weight_decay,
180 | )
181 | criterion = MultiBoxLoss(2, 0.35, True, 0, True, 7, 0.35, False)
182 |
183 | dataset = WiderFaceDetection(training_dataset, preproc(img_dim, RGB_MEAN))
184 | dataloader = torch.utils.data.DataLoader(
185 | dataset, batch_size, shuffle=True,
186 | num_workers=args.num_workers, collate_fn=dataset.collate,
187 | )
188 |
189 | training_loop(net, optimizer, criterion, dataloader, cfg)
190 |
191 |
192 | if __name__ == '__main__':
193 | main()
194 |
--------------------------------------------------------------------------------
/face_detection/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/utils/__init__.py
--------------------------------------------------------------------------------
/face_detection/utils/misc.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import torch
4 | from torchvision.ops import nms
5 |
6 | from .box_utils import decode, decode_landm
7 |
8 |
9 | def draw_keypoint(image, dets, threshold):
10 | for b in dets:
11 | if b[4] < threshold:
12 | continue
13 | text = f"{b[4]:.4f}"
14 | b = list(map(round, b))
15 | cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
16 | cx = b[0]
17 | cy = b[1] + 12
18 | cv2.putText(
19 | image, text, (cx, cy),
20 | cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)
21 | )
22 |
23 | # landms
24 | cv2.circle(image, (b[5], b[6]), 1, (0, 0, 255), 4)
25 | cv2.circle(image, (b[7], b[8]), 1, (0, 255, 255), 4)
26 | cv2.circle(image, (b[9], b[10]), 1, (255, 0, 255), 4)
27 | cv2.circle(image, (b[11], b[12]), 1, (0, 255, 0), 4)
28 | cv2.circle(image, (b[13], b[14]), 1, (255, 0, 0), 4)
29 |
30 |
31 | def inference(
32 | network, image, scale, scale1, prior_data,
33 | cfg, confidence_threshold, nms_threshold, device
34 | ):
35 | img = image - (104, 117, 123)
36 | img = img.transpose(2, 0, 1)
37 | img = np.float32(img)
38 | img = torch.from_numpy(img).unsqueeze(0)
39 | img = img.to(device)
40 |
41 | loc, conf, landms = network(img) # forward pass
42 |
43 | boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
44 | boxes *= scale
45 | scores = conf.squeeze(0)[:, 1]
46 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
47 | landms *= scale1
48 |
49 | # ignore low scores
50 | inds = torch.where(scores > confidence_threshold)[0]
51 | boxes = boxes[inds]
52 | landms = landms[inds]
53 | scores = scores[inds]
54 |
55 | # do NMS
56 | keep = nms(boxes, scores, nms_threshold)
57 | boxes = boxes[keep]
58 | scores = scores[keep]
59 | landms = landms[keep]
60 |
61 | boxes = boxes.cpu().numpy()
62 | scores = scores.cpu().numpy()
63 | landms = landms.cpu().numpy()
64 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
65 | dets = np.concatenate((dets, landms), axis=1)
66 | return dets
--------------------------------------------------------------------------------
/face_detection/utils/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 |
11 | class Timer:
12 | """A simple timer."""
13 | def __init__(self):
14 | self.total_time = 0.
15 | self.calls = 0
16 | self.start_time = 0.
17 | self.diff = 0.
18 | self.average_time = 0.
19 |
20 | def tic(self):
21 | # using time.time instead of time.clock because time time.clock
22 | # does not normalize for multithreading
23 | self.start_time = time.perf_counter()
24 |
25 | def toc(self, average=True):
26 | self.diff = time.perf_counter() - self.start_time
27 | self.total_time += self.diff
28 | self.calls += 1
29 | self.average_time = self.total_time / self.calls
30 | if average:
31 | return self.average_time
32 | else:
33 | return self.diff
34 |
35 | def clear(self):
36 | self.total_time = 0.
37 | self.calls = 0
38 | self.start_time = 0.
39 | self.diff = 0.
40 | self.average_time = 0.
41 |
--------------------------------------------------------------------------------
/face_detection/webcam_demo.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import argparse
3 | import os
4 |
5 | import cv2
6 | import numpy as np
7 | import torch
8 | import torch.backends.cudnn as cudnn
9 |
10 | from model.prior_box import PriorBox
11 | from model.retinaface import RetinaFace
12 | from utils.misc import draw_keypoint, inference
13 |
14 | parser = argparse.ArgumentParser(description='Retinaface')
15 | parser.add_argument(
16 | '--checkpoint', type=str,
17 | default='./weights/mobilenet0.25_final.pt',
18 | help='Trained state_dict file path to open'
19 | )
20 | parser.add_argument(
21 | '--cpu', action="store_true", default=False,
22 | help='Use cpu inference'
23 | )
24 | parser.add_argument(
25 | '--jit', action="store_true", default=False,
26 | help='Use JIT'
27 | )
28 | parser.add_argument(
29 | '--confidence-threshold', type=float, default=0.02,
30 | help='confidence_threshold'
31 | )
32 | parser.add_argument(
33 | '--nms-threshold', type=float, default=0.4,
34 | help='nms_threshold'
35 | )
36 | parser.add_argument(
37 | '--vis-thres', type=float, default=0.5,
38 | help='visualization_threshold'
39 | )
40 | parser.add_argument(
41 | '-s', '--save-image', action="store_true", default=False,
42 | help='show detection results'
43 | )
44 | parser.add_argument(
45 | '--save-dir', type=str, default='demo',
46 | help='Dir to save results'
47 | )
48 |
49 |
50 | def main():
51 | args = parser.parse_args()
52 | assert os.path.isfile(args.checkpoint)
53 |
54 | checkpoint = torch.load(args.checkpoint, map_location="cpu")
55 | cfg = checkpoint["config"]
56 | device = torch.device("cpu" if args.cpu else "cuda")
57 |
58 | # net and model
59 | net = RetinaFace(**cfg)
60 | net.load_state_dict(checkpoint["net_state_dict"])
61 | net.eval().requires_grad_(False)
62 | net.to(device)
63 | print('Finished loading model!')
64 | cudnn.benchmark = True
65 |
66 | # prepare testing
67 | cap = cv2.VideoCapture(0)
68 | assert cap.isOpened()
69 | ret_val, img_tmp = cap.read()
70 | im_height, im_width, _ = img_tmp.shape
71 | scale = torch.Tensor([im_width, im_height, im_width, im_height])
72 | scale = scale.to(device)
73 |
74 | scale1 = torch.Tensor([im_width, im_height] * 5)
75 | scale1 = scale1.to(device)
76 |
77 | priorbox = PriorBox(cfg, image_size=(im_height, im_width))
78 | priors = priorbox.forward()
79 | priors = priors.to(device)
80 | prior_data = priors.data
81 |
82 | if args.jit:
83 | img_tmp = img_tmp.transpose(2, 0, 1)
84 | img_tmp = np.float32(img_tmp)
85 | img_tmp = torch.from_numpy(img_tmp).unsqueeze(0)
86 | dummy = img_tmp.to(device)
87 | net = torch.jit.trace(net, example_inputs=dummy)
88 |
89 | if args.save_image:
90 | nframe = 0
91 | fname = os.path.join(args.save_dir, "{:06d}.jpg")
92 | os.makedirs(args.save_dir, exist_ok=True)
93 |
94 | # testing begin
95 | ret_val, img_raw = cap.read()
96 | while ret_val:
97 | start = cv2.getTickCount()
98 |
99 | # NOTE preprocessing.
100 | dets = inference(
101 | net, img_raw, scale, scale1, prior_data, cfg,
102 | args.confidence_threshold, args.nms_threshold, device
103 | )
104 |
105 | fps = float(cv2.getTickFrequency() / (cv2.getTickCount() - start))
106 | print(f"runtime: {fps:.1f} sec/iter")
107 | cv2.putText(
108 | img_raw, f"FPS: {fps:.1f}", (5, 15),
109 | cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)
110 | )
111 |
112 | # show image
113 | draw_keypoint(img_raw, dets, args.vis_thres)
114 |
115 | if args.save_image:
116 | cv2.imwrite(fname.format(nframe), img_raw)
117 | nframe += 1
118 |
119 | cv2.imshow("Face Detection Demo", img_raw)
120 | if cv2.waitKey(1) == 27: # Press ESC button to quit.
121 | break
122 |
123 | ret_val, img_raw = cap.read()
124 |
125 | cap.release()
126 | cv2.destroyAllWindows()
127 |
128 |
129 | if __name__ == "__main__":
130 | main()
--------------------------------------------------------------------------------
/face_detection/weights/mobilenet0.25_final.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/weights/mobilenet0.25_final.pt
--------------------------------------------------------------------------------
/face_detection/weights/mobilenet0.25_pretrain.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/weights/mobilenet0.25_pretrain.pt
--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/README.md:
--------------------------------------------------------------------------------
1 | # WiderFace-Evaluation
2 | Python Evaluation Code for [Wider Face Dataset](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/)
3 |
4 |
5 | ## Usage
6 |
7 |
8 | ##### before evaluating ....
9 |
10 | ````
11 | python3 setup.py build_ext --inplace
12 | ````
13 |
14 | ##### evaluating
15 |
16 | **GroungTruth:** `wider_face_val.mat`, `wider_easy_val.mat`, `wider_medium_val.mat`,`wider_hard_val.mat`
17 |
18 | ````
19 | python3 evaluation.py -p -g
20 | ````
21 |
22 | ## Bugs & Problems
23 | please issue
24 |
25 | ## Acknowledgements
26 |
27 | some code borrowed from Sergey Karayev
28 |
--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/box_overlaps.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Sergey Karayev
6 | # --------------------------------------------------------
7 |
8 | cimport cython
9 | import numpy as np
10 | cimport numpy as np
11 |
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 |
15 | def bbox_overlaps(
16 | np.ndarray[DTYPE_t, ndim=2] boxes,
17 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 | """
19 | Parameters
20 | ----------
21 | boxes: (N, 4) ndarray of float
22 | query_boxes: (K, 4) ndarray of float
23 | Returns
24 | -------
25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 | """
27 | cdef unsigned int N = boxes.shape[0]
28 | cdef unsigned int K = query_boxes.shape[0]
29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 | cdef DTYPE_t iw, ih, box_area
31 | cdef DTYPE_t ua
32 | cdef unsigned int k, n
33 | for k in range(K):
34 | box_area = (
35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 | )
38 | for n in range(N):
39 | iw = (
40 | min(boxes[n, 2], query_boxes[k, 2]) -
41 | max(boxes[n, 0], query_boxes[k, 0]) + 1
42 | )
43 | if iw > 0:
44 | ih = (
45 | min(boxes[n, 3], query_boxes[k, 3]) -
46 | max(boxes[n, 1], query_boxes[k, 1]) + 1
47 | )
48 | if ih > 0:
49 | ua = float(
50 | (boxes[n, 2] - boxes[n, 0] + 1) *
51 | (boxes[n, 3] - boxes[n, 1] + 1) +
52 | box_area - iw * ih
53 | )
54 | overlaps[n, k] = iw * ih / ua
55 | return overlaps
--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/evaluation.py:
--------------------------------------------------------------------------------
1 | """
2 | WiderFace evaluation code
3 | author: wondervictor
4 | mail: tianhengcheng@gmail.com
5 | copyright@wondervictor
6 | """
7 |
8 | import os
9 | import tqdm
10 | import pickle
11 | import argparse
12 | import numpy as np
13 | from scipy.io import loadmat
14 | from bbox import bbox_overlaps
15 | from IPython import embed
16 |
17 |
18 | def get_gt_boxes(gt_dir):
19 | """ gt dir: (wider_face_val.mat, wider_easy_val.mat, wider_medium_val.mat, wider_hard_val.mat)"""
20 |
21 | gt_mat = loadmat(os.path.join(gt_dir, 'wider_face_val.mat'))
22 | hard_mat = loadmat(os.path.join(gt_dir, 'wider_hard_val.mat'))
23 | medium_mat = loadmat(os.path.join(gt_dir, 'wider_medium_val.mat'))
24 | easy_mat = loadmat(os.path.join(gt_dir, 'wider_easy_val.mat'))
25 |
26 | facebox_list = gt_mat['face_bbx_list']
27 | event_list = gt_mat['event_list']
28 | file_list = gt_mat['file_list']
29 |
30 | hard_gt_list = hard_mat['gt_list']
31 | medium_gt_list = medium_mat['gt_list']
32 | easy_gt_list = easy_mat['gt_list']
33 |
34 | return facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list
35 |
36 |
37 | def get_gt_boxes_from_txt(gt_path, cache_dir):
38 |
39 | cache_file = os.path.join(cache_dir, 'gt_cache.pkl')
40 | if os.path.exists(cache_file):
41 | f = open(cache_file, 'rb')
42 | boxes = pickle.load(f)
43 | f.close()
44 | return boxes
45 |
46 | f = open(gt_path, 'r')
47 | state = 0
48 | lines = f.readlines()
49 | lines = list(map(lambda x: x.rstrip('\r\n'), lines))
50 | boxes = {}
51 | print(len(lines))
52 | f.close()
53 | current_boxes = []
54 | current_name = None
55 | for line in lines:
56 | if state == 0 and '--' in line:
57 | state = 1
58 | current_name = line
59 | continue
60 | if state == 1:
61 | state = 2
62 | continue
63 |
64 | if state == 2 and '--' in line:
65 | state = 1
66 | boxes[current_name] = np.array(current_boxes).astype('float32')
67 | current_name = line
68 | current_boxes = []
69 | continue
70 |
71 | if state == 2:
72 | box = [float(x) for x in line.split(' ')[:4]]
73 | current_boxes.append(box)
74 | continue
75 |
76 | f = open(cache_file, 'wb')
77 | pickle.dump(boxes, f)
78 | f.close()
79 | return boxes
80 |
81 |
82 | def read_pred_file(filepath):
83 |
84 | with open(filepath, 'r') as f:
85 | lines = f.readlines()
86 | img_file = lines[0].rstrip('\n\r')
87 | lines = lines[2:]
88 |
89 | # b = lines[0].rstrip('\r\n').split(' ')[:-1]
90 | # c = float(b)
91 | # a = map(lambda x: [[float(a[0]), float(a[1]), float(a[2]), float(a[3]), float(a[4])] for a in x.rstrip('\r\n').split(' ')], lines)
92 | boxes = []
93 | for line in lines:
94 | line = line.rstrip('\r\n').split(' ')
95 | if line[0] is '':
96 | continue
97 | # a = float(line[4])
98 | boxes.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), float(line[4])])
99 | boxes = np.array(boxes)
100 | # boxes = np.array(list(map(lambda x: [float(a) for a in x.rstrip('\r\n').split(' ')], lines))).astype('float')
101 | return img_file.split('/')[-1], boxes
102 |
103 |
104 | def get_preds(pred_dir):
105 | events = os.listdir(pred_dir)
106 | boxes = dict()
107 | pbar = tqdm.tqdm(events)
108 |
109 | for event in pbar:
110 | pbar.set_description('Reading Predictions ')
111 | event_dir = os.path.join(pred_dir, event)
112 | event_images = os.listdir(event_dir)
113 | current_event = dict()
114 | for imgtxt in event_images:
115 | imgname, _boxes = read_pred_file(os.path.join(event_dir, imgtxt))
116 | current_event[imgname.rstrip('.jpg')] = _boxes
117 | boxes[event] = current_event
118 | return boxes
119 |
120 |
121 | def norm_score(pred):
122 | """ norm score
123 | pred {key: [[x1,y1,x2,y2,s]]}
124 | """
125 |
126 | max_score = 0
127 | min_score = 1
128 |
129 | for _, k in pred.items():
130 | for _, v in k.items():
131 | if len(v) == 0:
132 | continue
133 | _min = np.min(v[:, -1])
134 | _max = np.max(v[:, -1])
135 | max_score = max(_max, max_score)
136 | min_score = min(_min, min_score)
137 |
138 | diff = max_score - min_score
139 | for _, k in pred.items():
140 | for _, v in k.items():
141 | if len(v) == 0:
142 | continue
143 | v[:, -1] = (v[:, -1] - min_score)/diff
144 |
145 |
146 | def image_eval(pred, gt, ignore, iou_thresh):
147 | """ single image evaluation
148 | pred: Nx5
149 | gt: Nx4
150 | ignore:
151 | """
152 |
153 | _pred = pred.copy()
154 | _gt = gt.copy()
155 | pred_recall = np.zeros(_pred.shape[0])
156 | recall_list = np.zeros(_gt.shape[0])
157 | proposal_list = np.ones(_pred.shape[0])
158 |
159 | _pred[:, 2] = _pred[:, 2] + _pred[:, 0]
160 | _pred[:, 3] = _pred[:, 3] + _pred[:, 1]
161 | _gt[:, 2] = _gt[:, 2] + _gt[:, 0]
162 | _gt[:, 3] = _gt[:, 3] + _gt[:, 1]
163 |
164 | overlaps = bbox_overlaps(_pred[:, :4], _gt)
165 |
166 | for h in range(_pred.shape[0]):
167 |
168 | gt_overlap = overlaps[h]
169 | max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax()
170 | if max_overlap >= iou_thresh:
171 | if ignore[max_idx] == 0:
172 | recall_list[max_idx] = -1
173 | proposal_list[h] = -1
174 | elif recall_list[max_idx] == 0:
175 | recall_list[max_idx] = 1
176 |
177 | r_keep_index = np.where(recall_list == 1)[0]
178 | pred_recall[h] = len(r_keep_index)
179 | return pred_recall, proposal_list
180 |
181 |
182 | def img_pr_info(thresh_num, pred_info, proposal_list, pred_recall):
183 | pr_info = np.zeros((thresh_num, 2)).astype('float')
184 | for t in range(thresh_num):
185 |
186 | thresh = 1 - (t+1)/thresh_num
187 | r_index = np.where(pred_info[:, 4] >= thresh)[0]
188 | if len(r_index) == 0:
189 | pr_info[t, 0] = 0
190 | pr_info[t, 1] = 0
191 | else:
192 | r_index = r_index[-1]
193 | p_index = np.where(proposal_list[:r_index+1] == 1)[0]
194 | pr_info[t, 0] = len(p_index)
195 | pr_info[t, 1] = pred_recall[r_index]
196 | return pr_info
197 |
198 |
199 | def dataset_pr_info(thresh_num, pr_curve, count_face):
200 | _pr_curve = np.zeros((thresh_num, 2))
201 | for i in range(thresh_num):
202 | _pr_curve[i, 0] = pr_curve[i, 1] / pr_curve[i, 0]
203 | _pr_curve[i, 1] = pr_curve[i, 1] / count_face
204 | return _pr_curve
205 |
206 |
207 | def voc_ap(rec, prec):
208 |
209 | # correct AP calculation
210 | # first append sentinel values at the end
211 | mrec = np.concatenate(([0.], rec, [1.]))
212 | mpre = np.concatenate(([0.], prec, [0.]))
213 |
214 | # compute the precision envelope
215 | for i in range(mpre.size - 1, 0, -1):
216 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
217 |
218 | # to calculate area under PR curve, look for points
219 | # where X axis (recall) changes value
220 | i = np.where(mrec[1:] != mrec[:-1])[0]
221 |
222 | # and sum (\Delta recall) * prec
223 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
224 | return ap
225 |
226 |
227 | def evaluation(pred, gt_path, iou_thresh=0.5):
228 | pred = get_preds(pred)
229 | norm_score(pred)
230 | facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list = get_gt_boxes(gt_path)
231 | event_num = len(event_list)
232 | thresh_num = 1000
233 | settings = ['easy', 'medium', 'hard']
234 | setting_gts = [easy_gt_list, medium_gt_list, hard_gt_list]
235 | aps = []
236 | for setting_id in range(3):
237 | # different setting
238 | gt_list = setting_gts[setting_id]
239 | count_face = 0
240 | pr_curve = np.zeros((thresh_num, 2)).astype('float')
241 | # [hard, medium, easy]
242 | pbar = tqdm.tqdm(range(event_num))
243 | for i in pbar:
244 | pbar.set_description('Processing {}'.format(settings[setting_id]))
245 | event_name = str(event_list[i][0][0])
246 | img_list = file_list[i][0]
247 | pred_list = pred[event_name]
248 | sub_gt_list = gt_list[i][0]
249 | # img_pr_info_list = np.zeros((len(img_list), thresh_num, 2))
250 | gt_bbx_list = facebox_list[i][0]
251 |
252 | for j in range(len(img_list)):
253 | pred_info = pred_list[str(img_list[j][0][0])]
254 |
255 | gt_boxes = gt_bbx_list[j][0].astype('float')
256 | keep_index = sub_gt_list[j][0]
257 | count_face += len(keep_index)
258 |
259 | if len(gt_boxes) == 0 or len(pred_info) == 0:
260 | continue
261 | ignore = np.zeros(gt_boxes.shape[0])
262 | if len(keep_index) != 0:
263 | ignore[keep_index-1] = 1
264 | pred_recall, proposal_list = image_eval(pred_info, gt_boxes, ignore, iou_thresh)
265 |
266 | _img_pr_info = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
267 |
268 | pr_curve += _img_pr_info
269 | pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
270 |
271 | propose = pr_curve[:, 0]
272 | recall = pr_curve[:, 1]
273 |
274 | ap = voc_ap(recall, propose)
275 | aps.append(ap)
276 |
277 | print("==================== Results ====================")
278 | print("Easy Val AP: {}".format(aps[0]))
279 | print("Medium Val AP: {}".format(aps[1]))
280 | print("Hard Val AP: {}".format(aps[2]))
281 | print("=================================================")
282 |
283 |
284 | if __name__ == '__main__':
285 |
286 | parser = argparse.ArgumentParser()
287 | parser.add_argument('-p', '--pred', default="./widerface_txt/")
288 | parser.add_argument('-g', '--gt', default='./ground_truth/')
289 |
290 | args = parser.parse_args()
291 | evaluation(args.pred, args.gt)
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/ground_truth/wider_easy_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/widerface_evaluate/ground_truth/wider_easy_val.mat
--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/ground_truth/wider_face_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/widerface_evaluate/ground_truth/wider_face_val.mat
--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/ground_truth/wider_hard_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/widerface_evaluate/ground_truth/wider_hard_val.mat
--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/ground_truth/wider_medium_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/widerface_evaluate/ground_truth/wider_medium_val.mat
--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/setup.py:
--------------------------------------------------------------------------------
1 | """
2 | WiderFace evaluation code
3 | author: wondervictor
4 | mail: tianhengcheng@gmail.com
5 | copyright@wondervictor
6 | """
7 |
8 | from distutils.core import setup, Extension
9 | from Cython.Build import cythonize
10 | import numpy
11 |
12 | package = Extension('bbox', ['box_overlaps.pyx'], include_dirs=[numpy.get_include()])
13 | setup(ext_modules=cythonize([package]))
14 |
--------------------------------------------------------------------------------
/face_recognition/config.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | configurations = {
4 | 1: dict(
5 | SEED = 1993, # random seed for reproduce results
6 |
7 | DATA_ROOT = '../DATA', # the parent root where your train/val/test data are stored
8 | MODEL_ROOT = '../CHECKPOINT', # the root to buffer your checkpoints
9 | LOG_ROOT = '../LOG', # the root to log your train/val status
10 | BACKBONE_RESUME_ROOT = '../CHECKPOINT/Backbone_IR_152_Epoch_112.pth', # the root to resume training from a saved checkpoint
11 | HEAD_RESUME_ROOT = '../CHECKPOINT/Head_ArcFace_Epoch_112.pth', # the root to resume training from a saved checkpoint
12 |
13 | BACKBONE_NAME = 'IR_50', # support: ['ResNet_50', 'ResNet_101', 'ResNet_152', 'IR_50', 'IR_101', 'IR_152', 'IR_SE_50', 'IR_SE_101', 'IR_SE_152']
14 | HEAD_NAME = 'ArcFace', # support: ['Softmax', 'ArcFace', 'CosFace', 'SphereFace', 'Am_softmax']
15 | LOSS_NAME = 'Focal', # support: ['Focal', 'Softmax']
16 |
17 | INPUT_SIZE = [112, 112], # support: [112, 112] and [224, 224]
18 | RGB_MEAN = [0.5, 0.5, 0.5], # for normalize inputs to [-1, 1]
19 | RGB_STD = [0.5, 0.5, 0.5],
20 | EMBEDDING_SIZE = 1024, # feature dimension
21 | BATCH_SIZE = 256*8,
22 | DROP_LAST = True, # whether drop the last batch to ensure consistent batch_norm statistics
23 | LR = 0.1, # initial LR
24 | NUM_EPOCH = 125, # total epoch number (use the firt 1/25 epochs to warm up)
25 | WEIGHT_DECAY = 5e-4, # do not apply to batch_norm parameters
26 | MOMENTUM = 0.9,
27 | STAGES = [35, 65, 95], # epoch stages to decay learning rate
28 |
29 | DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
30 | MULTI_GPU = True, # flag to use multiple GPUs; if you choose to train with single GPU, you should first run "export CUDA_VISILE_DEVICES=device_id" to specify the GPU card you want to use
31 | GPU_ID = [0, 1, 2, 3, 4, 5, 6, 7], # specify your GPU ids
32 | #GPU_ID = [0], # specify your GPU ids
33 | PIN_MEMORY = True,
34 | NUM_WORKERS = 0
35 | )
36 | }
37 |
--------------------------------------------------------------------------------
/gaze_estimation/README.md:
--------------------------------------------------------------------------------
1 | # IR_Driver_Gaze_Estimation
2 |
3 | Implementation of gaze estimation using IR camera images with CNN.
4 |
5 | In this repository, light model version of gaze estimation (caffe, tensorflow and pytorch) and heavy model version
6 |
7 | * input : 120 x 100 grayscale face image
8 | * Light version : use 120 x 100 grayscale image for global estimator
9 | * Heavy version : use 120 x 100 grayscale image for global estimator and crop it to 80 x 100 image for local estimator
10 | * Heavy+Att version : add attention mask to heavy version
11 |
12 |
13 | ## CAFFE version
14 | Light model version is supported
15 |
16 | -TRAINING from Scratch-
17 | > bin\caffe train --solver=ir_gaze_solver.prototxt --gpu=0
18 |
19 | -TRAINING from Weights-
20 | > bin\caffe train --solver=ir_gaze_solver.prototxt --weights=caffemodels/***.caffemodel --gpu=0
21 |
22 |
23 |
24 | ## TENSORFLOW version
25 | Light model version is supported
26 |
27 | -TRAINING/EVALUATION from Scratch-
28 | > python train.py
29 |
30 | -PREDICT-
31 | >python test_sequences.py
32 |
33 |
34 |
35 | ## PYTORCH version
36 | Modify config.py for various options (such as batch size, gpu index, ..)
37 |
38 | -TRAINING-
39 | > python train.py
--------------------------------------------------------------------------------
/gaze_estimation/example_movie/media2_slow.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/gaze_estimation/example_movie/media2_slow.avi
--------------------------------------------------------------------------------
/gaze_estimation/v1_caffe_model/ir_gaze_deploy.prototxt:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | ## 20172258 Cha Dongmin
3 | ################################################################################
4 |
5 | name: "IR_GAZE_ESTIMATION"
6 | input: "data"
7 | input_dim: 1 # batch size
8 | input_dim: 1
9 | input_dim: 100
10 | input_dim: 120
11 |
12 | layer {
13 | name: "data"
14 | type: "HDF5Data"
15 | top: "data"
16 | top: "label"
17 | hdf5_data_param {
18 | source: "list_train.txt"
19 | batch_size: 32
20 | }
21 | }
22 |
23 |
24 |
25 | layer {
26 | name: "conv1"
27 | type: "Convolution"
28 | bottom: "data"
29 | top: "conv1"
30 | param {
31 | lr_mult: 1.0
32 | }
33 | param {
34 | lr_mult: 2.0
35 | }
36 | convolution_param {
37 | num_output: 40
38 | kernel_size: 7
39 | stride: 2
40 | }
41 | }
42 |
43 |
44 |
45 | layer {
46 | name: "relu1"
47 | type: "ReLU"
48 | bottom: "conv1"
49 | top: "conv1"
50 | }
51 |
52 |
53 | layer {
54 | name: "pool1"
55 | type: "Pooling"
56 | bottom: "conv1"
57 | top: "pool1"
58 | pooling_param {
59 | kernel_size: 3
60 | stride: 2
61 | pool: MAX
62 | }
63 | }
64 |
65 |
66 | layer {
67 | name: "conv2"
68 | type: "Convolution"
69 | bottom: "pool1"
70 | top: "conv2"
71 | param {
72 | lr_mult: 1.0
73 | }
74 | param {
75 | lr_mult: 2.0
76 | }
77 | convolution_param {
78 | num_output: 70
79 | kernel_size: 5
80 | pad: 1
81 | stride: 2
82 | }
83 | }
84 |
85 |
86 | layer {
87 | name: "relu2"
88 | type: "ReLU"
89 | bottom: "conv2"
90 | top: "conv2"
91 | }
92 |
93 |
94 |
95 |
96 | layer {
97 | name: "pool2"
98 | type: "Pooling"
99 | bottom: "conv2"
100 | top: "pool2"
101 | pooling_param {
102 | kernel_size: 2
103 | stride: 2
104 | pool: MAX
105 | }
106 | }
107 |
108 |
109 | layer {
110 | name: "conv3"
111 | type: "Convolution"
112 | bottom: "pool2"
113 | top: "conv3"
114 | param {
115 | lr_mult: 1.0
116 | }
117 | param {
118 | lr_mult: 2.0
119 | }
120 | convolution_param {
121 | num_output: 60
122 | kernel_size: 3
123 | pad: 1
124 | }
125 | }
126 |
127 |
128 |
129 | layer {
130 | name: "relu3"
131 | type: "ReLU"
132 | bottom: "conv3"
133 | top: "conv3"
134 | }
135 |
136 | layer {
137 | name: "pool3"
138 | type: "Pooling"
139 | bottom: "conv3"
140 | top: "pool3"
141 | pooling_param {
142 | kernel_size: 2
143 | stride: 2
144 | pool: MAX
145 | }
146 | }
147 |
148 |
149 |
150 | layer {
151 | name: "conv4"
152 | type: "Convolution"
153 | bottom: "pool3"
154 | top: "conv4"
155 | param {
156 | lr_mult: 1.0
157 | }
158 | param {
159 | lr_mult: 2.0
160 | }
161 | convolution_param {
162 | num_output: 80
163 | kernel_size: 3
164 | pad: 1
165 | }
166 | }
167 |
168 |
169 | layer {
170 | name: "relu4"
171 | type: "ReLU"
172 | bottom: "conv4"
173 | top: "conv4"
174 | }
175 |
176 | layer {
177 | name: "pool4"
178 | type: "Pooling"
179 | bottom: "conv4"
180 | top: "pool4"
181 | pooling_param {
182 | kernel_size: 2
183 | stride: 2
184 | pool: MAX
185 | }
186 | }
187 |
188 |
189 |
190 | layer {
191 | name: "conv5"
192 | type: "Convolution"
193 | bottom: "pool4"
194 | top: "conv5"
195 | param {
196 | lr_mult: 1.0
197 | }
198 | param {
199 | lr_mult: 2.0
200 | }
201 | convolution_param {
202 | num_output: 100
203 | kernel_size: 3
204 | pad: 1
205 | }
206 | }
207 |
208 |
209 | layer {
210 | name: "relu5"
211 | type: "ReLU"
212 | bottom: "conv5"
213 | top: "conv5"
214 | }
215 |
216 |
217 | layer {
218 | name: "pool5"
219 | type: "Pooling"
220 | bottom: "conv5"
221 | top: "pool5"
222 | pooling_param {
223 | kernel_size: 2
224 | stride: 2
225 | pool: MAX
226 | }
227 | }
228 |
229 |
230 | layer {
231 | name: "concat1"
232 | bottom: "conv5"
233 | bottom: "pool4"
234 | top: "concat1"
235 | type: "Concat"
236 | concat_param {
237 | axis: 1
238 | }
239 | }
240 |
241 |
242 |
243 | layer {
244 | name: "fc1"
245 | type: "InnerProduct"
246 | bottom: "concat1"
247 | top: "fc1"
248 | inner_product_param {
249 | num_output: 4000
250 | }
251 | }
252 |
253 |
254 | layer {
255 | name: "relu6"
256 | type: "ReLU"
257 | bottom: "fc1"
258 | top: "fc1"
259 | }
260 |
261 |
262 | layer {
263 | name: "drop1"
264 | type: "Dropout"
265 | bottom: "fc1"
266 | top: "fc1"
267 | dropout_param {
268 | dropout_ratio: 0.5
269 | }
270 |
271 |
272 | }
273 |
274 |
275 |
276 |
277 | layer {
278 | name: "fc2"
279 | type: "InnerProduct"
280 | bottom: "fc1"
281 | top: "fc2"
282 |
283 | param{
284 | lr_mult: 10
285 | decay_mult: 1
286 | }
287 | param{
288 | lr_mult: 20
289 | decay_mult: 0
290 | }
291 | inner_product_param {
292 | num_output: 6
293 | weight_filler {
294 | type: "xavier"
295 | }
296 | bias_filler {
297 | type: "constant"
298 | value: 0.0
299 | }
300 | }
301 | }
302 |
303 | layer {
304 | name: "prob"
305 | type: "Softmax"
306 | bottom: "fc2"
307 | top: "prob"
308 |
309 | }
310 |
--------------------------------------------------------------------------------
/gaze_estimation/v1_caffe_model/ir_gaze_solver.prototxt:
--------------------------------------------------------------------------------
1 | net: "ir_gaze_train_val.prototxt"
2 | base_lr: 0.001
3 | lr_policy: "step"
4 | gamma: 0.1
5 | stepsize: 30000
6 | display: 20
7 | max_iter: 40000
8 | momentum: 0.9
9 | weight_decay: 0.0005
10 | ## We disable standard caffe solver snapshotting and implement our own snapshot
11 | #snapshot: 0
12 | snapshot: 5000
13 | snapshot_prefix: "GAZE"
14 | #debug_info: true
15 |
16 |
--------------------------------------------------------------------------------
/gaze_estimation/v1_caffe_model/ir_gaze_train_val.prototxt:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | ## 20172258 Cha Dongmin
3 | ################################################################################
4 |
5 | name: "IR_GAZE_ESTIMATION"
6 |
7 | #input: "data"
8 | #input_dim: BATCH
9 | #input_dim: 1
10 | #input_dim: 100
11 | #input_dim: 120
12 |
13 | layer {
14 | name: "data"
15 | type: "HDF5Data"
16 | top: "data"
17 | top: "label"
18 | hdf5_data_param {
19 | source: "list_train.txt"
20 | batch_size: 32
21 | }
22 | }
23 |
24 |
25 |
26 | layer {
27 | name: "conv1"
28 | type: "Convolution"
29 | bottom: "data"
30 | top: "conv1"
31 | param {
32 | lr_mult: 1.0
33 | }
34 | param {
35 | lr_mult: 2.0
36 | }
37 | convolution_param {
38 | num_output: 40
39 | kernel_size: 7
40 | stride: 2
41 | }
42 | }
43 |
44 |
45 |
46 | layer {
47 | name: "relu1"
48 | type: "ReLU"
49 | bottom: "conv1"
50 | top: "conv1"
51 | }
52 |
53 |
54 | layer {
55 | name: "pool1"
56 | type: "Pooling"
57 | bottom: "conv1"
58 | top: "pool1"
59 | pooling_param {
60 | kernel_size: 3
61 | stride: 2
62 | pool: MAX
63 | }
64 | }
65 |
66 |
67 | layer {
68 | name: "conv2"
69 | type: "Convolution"
70 | bottom: "pool1"
71 | top: "conv2"
72 | param {
73 | lr_mult: 1.0
74 | }
75 | param {
76 | lr_mult: 2.0
77 | }
78 | convolution_param {
79 | num_output: 70
80 | kernel_size: 5
81 | pad: 1
82 | stride: 2
83 | }
84 | }
85 |
86 |
87 | layer {
88 | name: "relu2"
89 | type: "ReLU"
90 | bottom: "conv2"
91 | top: "conv2"
92 | }
93 |
94 |
95 |
96 |
97 | layer {
98 | name: "pool2"
99 | type: "Pooling"
100 | bottom: "conv2"
101 | top: "pool2"
102 | pooling_param {
103 | kernel_size: 2
104 | stride: 2
105 | pool: MAX
106 | }
107 | }
108 |
109 |
110 | layer {
111 | name: "conv3"
112 | type: "Convolution"
113 | bottom: "pool2"
114 | top: "conv3"
115 | param {
116 | lr_mult: 1.0
117 | }
118 | param {
119 | lr_mult: 2.0
120 | }
121 | convolution_param {
122 | num_output: 60
123 | kernel_size: 3
124 | pad: 1
125 | }
126 | }
127 |
128 |
129 |
130 | layer {
131 | name: "relu3"
132 | type: "ReLU"
133 | bottom: "conv3"
134 | top: "conv3"
135 | }
136 |
137 | layer {
138 | name: "pool3"
139 | type: "Pooling"
140 | bottom: "conv3"
141 | top: "pool3"
142 | pooling_param {
143 | kernel_size: 2
144 | stride: 2
145 | pool: MAX
146 | }
147 | }
148 |
149 |
150 |
151 | layer {
152 | name: "conv4"
153 | type: "Convolution"
154 | bottom: "pool3"
155 | top: "conv4"
156 | param {
157 | lr_mult: 1.0
158 | }
159 | param {
160 | lr_mult: 2.0
161 | }
162 | convolution_param {
163 | num_output: 80
164 | kernel_size: 3
165 | pad: 1
166 | }
167 | }
168 |
169 |
170 | layer {
171 | name: "relu4"
172 | type: "ReLU"
173 | bottom: "conv4"
174 | top: "conv4"
175 | }
176 |
177 | layer {
178 | name: "pool4"
179 | type: "Pooling"
180 | bottom: "conv4"
181 | top: "pool4"
182 | pooling_param {
183 | kernel_size: 2
184 | stride: 2
185 | pool: MAX
186 | }
187 | }
188 |
189 |
190 |
191 | layer {
192 | name: "conv5"
193 | type: "Convolution"
194 | bottom: "pool4"
195 | top: "conv5"
196 | param {
197 | lr_mult: 1.0
198 | }
199 | param {
200 | lr_mult: 2.0
201 | }
202 | convolution_param {
203 | num_output: 100
204 | kernel_size: 3
205 | pad: 1
206 | }
207 | }
208 |
209 |
210 | layer {
211 | name: "relu5"
212 | type: "ReLU"
213 | bottom: "conv5"
214 | top: "conv5"
215 | }
216 |
217 |
218 | layer {
219 | name: "pool5"
220 | type: "Pooling"
221 | bottom: "conv5"
222 | top: "pool5"
223 | pooling_param {
224 | kernel_size: 2
225 | stride: 2
226 | pool: MAX
227 | }
228 | }
229 |
230 |
231 | layer {
232 | name: "concat1"
233 | bottom: "conv5"
234 | bottom: "pool4"
235 | top: "concat1"
236 | type: "Concat"
237 | concat_param {
238 | axis: 1
239 | }
240 | }
241 |
242 |
243 |
244 | layer {
245 | name: "fc1"
246 | type: "InnerProduct"
247 | bottom: "concat1"
248 | top: "fc1"
249 | inner_product_param {
250 | num_output: 4000
251 | }
252 | }
253 |
254 |
255 | layer {
256 | name: "relu6"
257 | type: "ReLU"
258 | bottom: "fc1"
259 | top: "fc1"
260 | }
261 |
262 |
263 | layer {
264 | name: "drop1"
265 | type: "Dropout"
266 | bottom: "fc1"
267 | top: "fc1"
268 | dropout_param {
269 | dropout_ratio: 0.5
270 | }
271 |
272 |
273 | }
274 |
275 |
276 |
277 |
278 | layer {
279 | name: "fc2"
280 | type: "InnerProduct"
281 | bottom: "fc1"
282 | top: "fc2"
283 |
284 | param{
285 | lr_mult: 10
286 | decay_mult: 1
287 | }
288 | param{
289 | lr_mult: 20
290 | decay_mult: 0
291 | }
292 | inner_product_param {
293 | num_output: 6
294 | weight_filler {
295 | type: "xavier"
296 | }
297 | bias_filler {
298 | type: "constant"
299 | value: 0.0
300 | }
301 | }
302 | }
303 |
304 | layer {
305 | name: "loss"
306 | type: "SoftmaxWithLoss"
307 | bottom:"fc2"
308 | bottom:"label"
309 | top:"loss"
310 | }
--------------------------------------------------------------------------------
/gaze_estimation/v2_tensorflow_model/model.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import tensorflow.contrib.slim as slim
3 | import numpy as np
4 | from opt import *
5 |
6 |
7 |
8 | def gazenetwork(features, labels, mode):
9 |
10 | # image : [batch, 100, 120, 1]
11 | input = tf.reshape(features["x"], [-1, 100, 120, 1])
12 |
13 | # 트레이닝 때는 드롭아웃 적용
14 | if mode == tf.estimator.ModeKeys.TRAIN:
15 | dropout = 0.5
16 | else:
17 | dropout = 1.0
18 |
19 |
20 |
21 | # * conv는 기본적으로 SAME PADDING
22 | # H0
23 | h0 = lrelu(conv2d(input, output_dim=40, ks=7, s=2, name='h0_conv'))
24 | h0 = slim.max_pool2d(h0, kernel_size=3, stride=2, scope='h0_pool')
25 |
26 | # H1
27 | h1 = lrelu(conv2d(h0, output_dim=70, ks=5, s=2, name='h1_conv'))
28 | h1 = slim.max_pool2d(h1, kernel_size=2, stride=2, scope='h1_pool')
29 |
30 | # H2
31 | h2 = lrelu(conv2d(h1, output_dim=60, ks=3, s=1, name='h2_conv'))
32 | h2 = slim.max_pool2d(h2, kernel_size=2, stride=2, scope='h2_pool')
33 |
34 | # H3
35 | h3 = lrelu(conv2d(h2, output_dim=80, ks=3, s=1, name='h3_conv'))
36 | h3 = slim.max_pool2d(h3 , kernel_size=2, stride=2, scope='h3_pool')
37 |
38 | # H4
39 | h4 = lrelu(conv2d(h3, output_dim=100, ks=3, s=1, name='h4_conv'))
40 |
41 | # h3 & h4 concatenate
42 | h3_flat = slim.flatten(h3, scope="h3_flat")
43 | h4_flat = slim.flatten(h4, scope="h4_flat")
44 | h_concat = tf.concat([h3_flat, h4_flat], 1, name='h3_h4_concat')
45 |
46 | # start of fc
47 | fc1 = slim.fully_connected(h_concat, 4000, scope="fc1")
48 | fc1_dropout = slim.dropout(fc1, dropout)
49 | logits = slim.fully_connected(fc1_dropout, 6, activation_fn=None, scope="logits")
50 | class_logits = tf.argmax(input=logits, axis=1)
51 |
52 | # softmax 거침
53 | #softmax
54 | predictions = {"classes" : tf.argmax(input=logits, axis=1),
55 | "probabilities" : tf.nn.softmax(logits, name="softmax_tensor")}
56 | #predictions = tf.nn.softmax(logits, name='predictions')
57 |
58 |
59 |
60 |
61 | if mode == tf.estimator.ModeKeys.PREDICT:
62 | return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
63 |
64 | loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
65 | #accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions)
66 |
67 |
68 | #in TRAINING mode,
69 | if mode == tf.estimator.ModeKeys.TRAIN:
70 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.005)
71 | train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
72 |
73 | # Training 모드의 EstimatorSpec을 출력해야 한다. EstimatorSpec은 mode, loss, train_op를 포함하여야 한다.
74 | # train_po는 loss의 optimizer을 minimization 하는 것
75 | return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
76 |
77 | #in PREDICT mode.
78 | if mode == tf.estimator.ModeKeys.PREDICT:
79 | out_predictions = {
80 | "classes": tf.argmax(input=logits, axis=1),
81 | "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
82 | }
83 |
84 | #out_predictions = {"logits": logits}
85 | return tf.estimator.EstimatorSpec(mode=mode, predictions=out_predictions)
86 |
87 | #in EVAL mode.
88 | print(labels)
89 | print(class_logits)
90 | eval_ops = {"accuracy" : tf.metrics.accuracy(labels=labels, predictions=class_logits)}
91 |
92 | # Eval 모드의 EstimatorSpec을 출력해야 한다. EstimatorSpec은 mode, loss, eval_ops를 포함하여야 한다.
93 | # eval은 accuracy
94 | return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_ops)
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
--------------------------------------------------------------------------------
/gaze_estimation/v2_tensorflow_model/opt.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import random
4 | import math
5 | from glob import glob
6 | from PIL import Image
7 |
8 | import tensorflow.contrib.slim as slim
9 |
10 |
11 | BATCH_SIZE = 256
12 | IMG_WIDTH = 120
13 | IMG_HEIGHT = 100
14 | CHANNEL_N = 1
15 | CLASS_N = 6
16 |
17 | def load_img_and_label_from_npy(image_npy, label_npy):
18 | images = load_np(image_npy)
19 | labels = load_np(label_npy)
20 |
21 | return images, labels
22 |
23 | def load_images(train_ratio=0.95, test_ratio=0.05):
24 | print("Loading Images...")
25 |
26 | #응시영역 레이블별로 읽도록 한다.
27 | #6 gaze zones
28 | data_list_1 = glob('*part_1.jpg') #1
29 | data_list_2 = glob('*part_3.jpg') #2
30 | data_list_3 = glob('*part_6.jpg') #3
31 | data_list_4 = glob('*part_8.jpg') #4
32 | data_list_5 = glob('*part_10.jpg') #5
33 | data_list_6 = glob('*part_12.jpg') #6
34 |
35 |
36 | batch_tuple = []
37 |
38 | n = 0
39 | #------------1
40 | for i in range(len(data_list_1)):
41 | path = data_list_1[i]
42 | img = read_image(path)
43 |
44 | #불러온 이미지 batch에 저장
45 | batch_tuple.append((path, 0))
46 |
47 |
48 | #-------------- 2
49 | for i in range(len(data_list_2)):
50 | path = data_list_2[i]
51 | img = read_image(path)
52 |
53 | #불러온 이미지 batch에 저장
54 | batch_tuple.append((path, 1))
55 |
56 | #--------------- 3
57 | for i in range(len(data_list_3)):
58 | path = data_list_3[i]
59 | img = read_image(path)
60 |
61 | #불러온 이미지 batch에 저장
62 | batch_tuple.append((path, 2))
63 |
64 | # ---------------- 4
65 | for i in range(len(data_list_4)):
66 | path = data_list_4[i]
67 | img = read_image(path)
68 |
69 | #불러온 이미지 batch에 저장
70 | batch_tuple.append((path, 3))
71 |
72 | # ---------------- 5
73 | for i in range(len(data_list_5)):
74 | path = data_list_5[i]
75 | img = read_image(path)
76 |
77 | #불러온 이미지 batch에 저장
78 | batch_tuple.append((path, 4))
79 |
80 | # ----------------- 6
81 | for i in range(len(data_list_6)):
82 | path = data_list_6[i]
83 | img = read_image(path)
84 |
85 | #불러온 이미지 batch에 저장
86 | batch_tuple.append((path, 5))
87 |
88 |
89 | #섞은 후에 저장된 tuple을 풀어낸다
90 | random.shuffle(batch_tuple)
91 | #print(batch_tuple)
92 |
93 | #train:test 나눈다
94 | num = len(batch_tuple)
95 | train_num = math.floor(train_ratio*num)
96 | test_num = num - train_num
97 |
98 |
99 | #트레인, 테스트 나눔
100 | train_batch = batch_tuple[0:train_num]
101 | test_batch = batch_tuple[train_num:num]
102 | print(len(train_batch))
103 |
104 | # 이미지를 numpy 형태로 받아야 한다.
105 | # BATCH_SIZE = len(data_list)
106 |
107 | train_image = np.zeros((train_num, IMG_HEIGHT, IMG_WIDTH, CHANNEL_N))
108 | train_label = np.zeros((train_num, CLASS_N))
109 | test_image = np.zeros((test_num, IMG_HEIGHT, IMG_WIDTH, CHANNEL_N))
110 | test_label = np.zeros((test_num, CLASS_N))
111 |
112 | # [TRAINING] numpy로 변환
113 | bat_idx = 0
114 | for path, label in train_batch:
115 | img = read_image(path)
116 | train_image[bat_idx,:, :,:] = img
117 | train_label[bat_idx, label] = 1
118 | bat_idx += 1
119 |
120 | # [TEST] numpy로 변환
121 | bat_idx = 0
122 | for path, label in test_batch:
123 | img = read_image(path)
124 | test_image[bat_idx, :, :, :] = img
125 | test_label[bat_idx, label] = 1
126 | bat_idx += 1
127 |
128 | print('[train_img]')
129 | print(train_image.shape)
130 | print('[test_img]')
131 | print(test_image.shape)
132 | print('[train_label]')
133 | print(train_label.shape)
134 | print('[test_label]')
135 | print(test_label.shape)
136 |
137 | save_np('train_img', train_image)
138 | save_np('train_label', train_label)
139 | save_np('test_img', test_image)
140 | save_np('test_label', test_label)
141 |
142 |
143 |
144 |
145 | def save_np(filename, data):
146 | np.save(filename, data)
147 |
148 | def load_np(filename):
149 | print('loading ' + filename + '......')
150 | return np.load(filename)
151 |
152 | def read_image_and_label(path):
153 | return read_image(path), read_label(path)
154 |
155 | def read_image(path):
156 | image = np.array(Image.open(path).convert('L'))
157 | image = image.astype(np.float32)
158 | image = image / 255.0
159 | image = np.expand_dims(image, axis=2)
160 | #image = image.reshape(IMG_HEIGHT, IMG_WIDTH, 1)
161 | return image
162 |
163 |
164 | def instance_norm(input, name="instance_norm"):
165 | with tf.variable_scope(name):
166 | depth = input.get_shape()[3]
167 | scale = tf.get_variable("scale", [depth], initializer=tf.random_normal_initializer(1.0, 0.02, dtype=tf.float32))
168 | offset = tf.get_variable("offset", [depth], initializer=tf.constant_initializer(0.0))
169 | mean, variance = tf.nn.moments(input, axes=[1,2], keep_dims=True)
170 | epsilon = 1e-5
171 | inv = tf.rsqrt(variance + epsilon)
172 | normalized = (input-mean)*inv
173 | return scale*normalized + offset
174 |
175 |
176 | # conv layer
177 | def conv2d(input_, output_dim, ks=4, s=2, stddev=0.02, padding='SAME', name="conv2d"):
178 | with tf.variable_scope(name):
179 | return slim.conv2d(input_, output_dim, ks, s, padding=padding, activation_fn=None,
180 | weights_initializer=tf.truncated_normal_initializer(stddev=stddev),
181 | biases_initializer=None)
182 |
183 | # relu를 수행한다.
184 | def lrelu(x, leak=0.2, name="lrelu"):
185 | return tf.maximum(x, leak*x)
--------------------------------------------------------------------------------
/gaze_estimation/v2_tensorflow_model/test_sequences.py:
--------------------------------------------------------------------------------
1 | #import tensorflow as tf
2 | from opt import *
3 | from model import gazenetwork
4 | import random
5 | import math
6 | from glob import glob
7 | from PIL import Image, ImageDraw, ImageFont
8 | import matplotlib.pyplot as plt
9 | import time
10 |
11 |
12 |
13 | BATCH_SIZE = 256
14 | IMG_WIDTH = 120
15 | IMG_HEIGHT = 100
16 | CHANNEL_N = 1
17 | CLASS_N = 6
18 |
19 |
20 | def predict_imgs():
21 | tf.logging.set_verbosity(tf.logging.INFO)
22 | # to avoid cuda memory out error
23 | gpu_options = tf.GPUOptions(allow_growth=True)
24 | config = tf.ConfigProto(gpu_options=gpu_options)
25 |
26 | # data load
27 | face_npy, img_list = load_imgs()
28 | IMG_NUM = len(img_list)
29 |
30 | # estimator 선언
31 | gaze_classifier = tf.estimator.Estimator(model_fn=gazenetwork, model_dir="./model",
32 | config=tf.contrib.learn.RunConfig(session_config=config))
33 |
34 |
35 |
36 | # START
37 | img_template = None
38 | for i in range(IMG_NUM):
39 | test_data = face_npy[i, :, :, :]
40 | test_data = np.expand_dims(test_data, axis=0)
41 |
42 | # test
43 | test_input_fn = tf.estimator.inputs.numpy_input_fn(
44 | x={"x": test_data},
45 | shuffle=False)
46 | #test_spec = tf.estimator.EvalSpec(input_fn=test_input_fn)
47 |
48 | predictions = gaze_classifier.predict(input_fn=test_input_fn)
49 | predictor = list(predictions)
50 | label = predictor[0]['classes'] + 1
51 |
52 | #draw pic
53 | draw_pic(img_template, img_list[i], label, i)
54 |
55 | #print(list(predictions)[0]['claasses'])
56 |
57 |
58 |
59 |
60 | def draw_pic(img_template, img_path, text, frameidx):
61 |
62 |
63 | plt.gcf().clear()
64 | image = Image.open(img_path)
65 | draw = ImageDraw.Draw(image)
66 | (x, y) = (10, 10)
67 | font = ImageFont.truetype('arial', size=125)
68 | message = str(text)
69 | color = 'rgb(255, 255, 255)' # black color
70 | draw.text((x, y), message, fill=color, font=font)
71 | #plt.imshow(image)
72 |
73 | if img_template is None:
74 | img_template = plt.imshow(image)
75 | else:
76 | img_template.set_data(image)
77 |
78 | plt.pause(0.1)
79 |
80 | #im = plt.imshow(image, animated=True)
81 | plt.draw()
82 |
83 |
84 |
85 | '''
86 | def load_imgs():
87 | BASE_DIR = "F:/2-2/cv/proj_gaze/sequences/4"
88 | face_dir = BASE_DIR + "/face/*.jpg"
89 | img_dir = BASE_DIR + "/entire/*.jpg"
90 |
91 | face_list = glob(face_dir)
92 | img_list = glob(img_dir)
93 |
94 | IMG_NUM = len(img_list)
95 | test_image = np.zeros((IMG_NUM, IMG_HEIGHT, IMG_WIDTH, CHANNEL_N))
96 |
97 | # LOOP START
98 | bat_idx = 0
99 | for path in face_list:
100 | img = read_image(path)
101 | test_image[bat_idx,:, :,:] = img
102 | bat_idx += 1
103 |
104 |
105 |
106 |
107 | return test_image, img_list
108 | '''
109 |
110 | def read_image(path):
111 | image = np.array(Image.open(path).convert('L'))
112 | image = image.astype(np.float32)
113 | image = image / 255.0
114 | image = np.expand_dims(image, axis=2)
115 | return image
116 |
117 | # main func
118 | def main(unused_argv):
119 | #load_imgs()
120 | predict_imgs()
121 |
122 |
123 | if __name__ == "__main__":
124 | tf.app.run()
--------------------------------------------------------------------------------
/gaze_estimation/v2_tensorflow_model/train.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from opt import *
3 | from model import gazenetwork
4 |
5 | def test():
6 | tf.logging.set_verbosity(tf.logging.INFO)
7 | # to avoid cuda memory out error
8 | gpu_options = tf.GPUOptions(allow_growth=True)
9 | config = tf.ConfigProto(gpu_options=gpu_options)
10 |
11 | # data load
12 | eval_data, eval_label = load_img_and_label_from_npy('test_img.npy', 'test_label.npy')
13 | eval_label = np.argmax(eval_label, axis=1)
14 | print(eval_data[3])
15 | print(eval_label[3])
16 | print('npy loaded')
17 |
18 | # estimator 선언
19 | gaze_classifier = tf.estimator.Estimator(model_fn=gazenetwork, model_dir="./model",
20 | config=tf.contrib.learn.RunConfig(session_config=config))
21 |
22 | # eval
23 | eval_input_fn = tf.estimator.inputs.numpy_input_fn(
24 | x={"x": eval_data},
25 | y=eval_label,
26 |
27 | num_epochs=1,
28 | shuffle=False)
29 | eval_results = gaze_classifier.evaluate(input_fn=eval_input_fn)
30 | print(eval_results)
31 |
32 | def train():
33 | # load_images()
34 |
35 | tf.logging.set_verbosity(tf.logging.INFO)
36 | # to avoid cuda memory out error
37 | gpu_options = tf.GPUOptions(allow_growth=True)
38 | config = tf.ConfigProto(gpu_options=gpu_options)
39 |
40 | # == ESTIMATOR 에 들어갈 input_fn 역시 조건 있다.
41 | # input_fn의 조건은 datrue과 label data 반환을 목적으로 한다
42 | train_data, train_label = load_img_and_label_from_npy('train_img.npy', 'train_label.npy')
43 | train_label = np.argmax(train_label, axis=1)
44 | print('npy loaded')
45 |
46 | train_input_fn = tf.estimator.inputs.numpy_input_fn(
47 | x={"x": train_data},
48 | y=train_label,
49 | batch_size=712, num_epochs=None, shuffle=True)
50 | print('input_fn craeated')
51 |
52 | # == ESTIMATOR 학습을 위한 model_fn에 파라미터 등등에 대해 조건이 필요하다
53 | #
54 | # (features, labels, mode, params, config) 인데, features와 labels는 반드시 필수
55 | #
56 | # tf.estimator.EstimatorSpecwor
57 |
58 | # == ESTIMATOR의 model_dir은 학습 파라미터가 저장된다 그리고 config도 들어가고..
59 | gaze_classifier = tf.estimator.Estimator(model_fn=gazenetwork, model_dir="./model",
60 | config=tf.contrib.learn.RunConfig(session_config=config))
61 | print('estimator craeated')
62 |
63 | # recording logs
64 | log_tensor = {"loss" : "loss"}
65 | #logging_hook = tf.train.LoggingTensorHook({"loss": loss,
66 | # "accuracy": accuracy}, every_n_iter=10)
67 |
68 | log_hook = tf.train.LoggingTensorHook(tensors=log_tensor, every_n_iter=50)
69 |
70 | # train
71 | print('start train')
72 | gaze_classifier.train(input_fn=train_input_fn, steps=100000)
73 |
74 | def make_db():
75 | load_images()
76 |
77 | #main func
78 | def main(unused_argv):
79 | #make_db()
80 | #test()
81 | train()
82 |
83 |
84 |
85 | if __name__ == "__main__":
86 | tf.app.run()
--------------------------------------------------------------------------------
/gaze_estimation/v3_pytorch_model/config.py:
--------------------------------------------------------------------------------
1 | class Config(object):
2 | lr = 0.001
3 |
4 | # 'LIGHT' or 'HEAVY' or 'HEAVY+ATT' or 'MORE_LIGHT'
5 | #use_model_type = 'HEAVY+ATT'
6 | use_model_type = 'MORE_LIGHT'
7 |
8 | alpha = 2
9 | batch_size = 200
10 | global_img_size = [100, 120]
11 | local_img_size = [100, 80]
12 | schedule = [150, 225]
13 | gamma = 0.1
14 | print_iter = 5
15 | save_epoch = 10
16 |
17 | data_path = 'D:/-----/cropped_fld_and_face'
18 | save_path = 'save_checks_more_light'
19 |
20 | max_epoch = 200
21 | gpus = "0"
22 | class_num = 6
23 | momentum= 0.9
24 | weight_decay = 5e-4
--------------------------------------------------------------------------------
/gaze_estimation/v3_pytorch_model/gaze_model_heavy_ver.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import cv2
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | class Estimator(nn.Module):
7 | def __init__(self, use_attention_map=False):
8 | super(Estimator, self).__init__()
9 |
10 | self.global_estimator = Global_Estimator(use_attention_map)
11 | self.local_estimator = Local_Estimator(use_attention_map)
12 | self.use_attention_map = use_attention_map
13 |
14 | #if use_mtcnn:
15 | # self.final_fc = nn.Linear(1024 + 512 + 136, 6)
16 | #else:
17 | self.final_fc = nn.Linear(4000 + 1000, 6)
18 |
19 |
20 |
21 | def forward(self, input_x, input_local_x, flds=None):
22 |
23 | g_output = self.global_estimator(input_x)
24 | l_output = self.local_estimator(input_local_x)
25 |
26 | output = self.final_fc(torch.cat([g_output, l_output], dim=1))
27 |
28 | return output
29 |
30 |
31 |
32 |
33 | # ------------------------------------- GLOBAL ---------------------
34 | class Global_Estimator(nn.Module):
35 | def __init__(self, use_attention=False):
36 | super(Global_Estimator, self).__init__()
37 |
38 | input_dim = 1
39 |
40 | self.use_attention = use_attention
41 | self.lrelu = nn.LeakyReLU(0.2)
42 | self.drop = nn.Dropout(0.5)
43 | self.pool = nn.MaxPool2d(2)
44 | self.pool3 = nn.MaxPool2d(3, 2)
45 |
46 |
47 | if self.use_attention:
48 | self.conv1_att = conv2d_block(40, 1, 3, 1, 1)
49 | self.conv2_att = conv2d_block(70, 1, 3, 1, 1)
50 | self.conv3_att = conv2d_block(60, 1, 3, 1, 1)
51 | self.conv4_att = conv2d_block(80, 1, 3, 1, 1)
52 | self.conv5_att = conv2d_block(100, 1, 3, 1, 1)
53 |
54 | # 120 x 180
55 | self.conv1 = conv2d_block(input_dim, 40, 7, 2, 0)
56 | self.norm_1 = nn.InstanceNorm2d(40)
57 |
58 | # 60 x 90
59 | self.conv2 = conv2d_block(40, 70, 5, 2, 1)
60 | self.norm_2 = nn.InstanceNorm2d(70)
61 |
62 | # 30 x 45
63 | self.conv3 = conv2d_block(70, 60, 3, 1, 0)
64 | self.norm_3 = nn.InstanceNorm2d(60)
65 |
66 | self.conv4 = conv2d_block(60, 80, 3, 1, 0)
67 | self.norm_4 = nn.InstanceNorm2d(80)
68 |
69 | self.conv5 = conv2d_block(80, 100, 3, 1, 0)
70 | self.norm_5 = nn.InstanceNorm2d(100)
71 |
72 | self.fc1 = nn.Linear((80 * 7 * 6) + (100 * 7 * 6), 4000)
73 |
74 |
75 | def forward(self, x):
76 |
77 | # input : B x C x 120 x 100
78 | x = F.pad(x, (53, 53, 63, 63)) # [left, right, top, bot]
79 | x = self.lrelu(self.conv1(x))
80 | if self.use_attention:
81 | x_att1 = self.conv1_att(x)
82 | x = x_att1 * x
83 | x = self.norm_1(x)
84 | x = self.pool3(x)
85 |
86 | # B x C x 59 x 49
87 | x = F.pad(x, (25, 25, 30, 30)) # [left, right, top, bot]
88 | x = self.lrelu(self.conv2(x))
89 | if self.use_attention:
90 | x_att2 = self.conv2_att(x)
91 | x = x_att2 * x
92 | x = self.norm_2(x)
93 | x = self.pool(x)
94 |
95 | # B x C x 29 x 24
96 | x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot]
97 | x = self.lrelu(self.conv3(x))
98 | if self.use_attention:
99 | x_att3 = self.conv3_att(x)
100 | x = x_att3 * x
101 | x = self.norm_3(x)
102 | x = self.pool(x)
103 |
104 | # B x C x 14 x 12
105 | x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot]
106 | x = self.lrelu(self.conv4(x))
107 | if self.use_attention:
108 | x_att4 = self.conv4_att(x)
109 | x = x_att4 * x
110 | x = self.norm_4(x)
111 | x = self.pool(x)
112 | x_41 = x.view(x.size()[0], -1)
113 |
114 |
115 | # B x C x 7 x 6
116 | x = F.pad(x, (1, 1, 1, 1))
117 | x = self.lrelu(self.conv5(x))
118 | if self.use_attention:
119 | x_att5 = self.conv5_att(x)
120 | x = x_att5 * x
121 | x = self.norm_5(x)
122 | x_51 = x.view(x.size()[0], -1)
123 |
124 | # concat 41 & 51
125 | x = self.fc1(torch.cat((x_41, x_51), dim=1))
126 | #x = self.fc2(x)
127 |
128 | return x
129 |
130 |
131 |
132 |
133 |
134 | # ------------------------------------- LOCAL ---------------------
135 | class Local_Estimator(nn.Module):
136 | def __init__(self, use_attention=False):
137 | super(Local_Estimator, self).__init__()
138 |
139 |
140 | input_dim = 1
141 | self.use_attention = use_attention
142 |
143 | self.lrelu = nn.LeakyReLU(0.2)
144 | self.drop = nn.Dropout(0.5)
145 | self.pool = nn.MaxPool2d(2)
146 | self.pool3 = nn.MaxPool2d(3, 2)
147 |
148 |
149 | # att maps
150 | if self.use_attention:
151 | self.conv1_att = conv2d_block(40, 1, 3, 1, 1)
152 | self.conv2_att = conv2d_block(70, 1, 3, 1, 1)
153 | self.conv3_att = conv2d_block(60, 1, 3, 1, 1)
154 | self.conv4_att = conv2d_block(80, 1, 3, 1, 1)
155 | self.conv5_att = conv2d_block(100, 1, 3, 1, 1)
156 |
157 |
158 | # 120 x 180
159 | self.conv1 = conv2d_block(input_dim, 40, 7, 2, 0)
160 | self.norm_1 = nn.InstanceNorm2d(40)
161 |
162 | # 60 x 90
163 | self.conv2 = conv2d_block(40, 70, 5, 2, 1)
164 | self.norm_2 = nn.InstanceNorm2d(70)
165 |
166 | # 30 x 45
167 | self.conv3 = conv2d_block(70, 60, 3, 1, 0)
168 | self.norm_3 = nn.InstanceNorm2d(60)
169 |
170 | self.conv4 = conv2d_block(60, 80, 3, 1, 0)
171 | self.norm_4 = nn.InstanceNorm2d(80)
172 |
173 | self.conv5 = conv2d_block(80, 100, 3, 1, 0)
174 | self.norm_5 = nn.InstanceNorm2d(100)
175 |
176 | self.fc1 = nn.Linear((80 * 5 * 6) + (100 * 5 * 6), 1000)
177 |
178 |
179 | def forward(self, x):
180 | # input : B x C x 50 x 100
181 | x = F.pad(x, (53, 53, 28, 28)) # [left, right, top, bot]
182 | x = self.lrelu(self.conv1(x))
183 | if self.use_attention:
184 | x_att1 = self.conv1_att(x)
185 | x = x_att1 * x
186 | x = self.norm_1(x)
187 | x = self.pool3(x)
188 |
189 | # B x C x 25 x 50
190 | x = F.pad(x, (25, 25, 30, 30)) # [left, right, top, bot]
191 | x = self.lrelu(self.conv2(x))
192 | if self.use_attention:
193 | x_att2 = self.conv2_att(x)
194 | x = x_att2 * x
195 | x = self.norm_2(x)
196 | x = self.pool(x)
197 |
198 | # B x C x 12 x 25
199 | x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot]
200 | x = self.lrelu(self.conv3(x))
201 | if self.use_attention:
202 | x_att3 = self.conv3_att(x)
203 | x = x_att3 * x
204 | x = self.norm_3(x)
205 | x = self.pool(x)
206 |
207 | # B x C x 6 x 12
208 | x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot]
209 | x = self.lrelu(self.conv4(x))
210 | if self.use_attention:
211 | x_att4 = self.conv4_att(x)
212 | x = x_att4 * x
213 | x = self.norm_4(x)
214 | x = self.pool(x)
215 | x_41 = x.view(x.size()[0], -1)
216 |
217 |
218 | # B x C x 3 x 6
219 | x = F.pad(x, (1, 1, 1, 1))
220 | x = self.lrelu(self.conv5(x))
221 | if self.use_attention:
222 | x_att5 = self.conv5_att(x)
223 | x = x_att5 * x
224 | x = self.norm_5(x)
225 | #print("51b" + str(x.size()))
226 | x_51 = x.view(x.size()[0], -1)
227 |
228 | # concat 41 & 51
229 | x = self.fc1(torch.cat((x_41, x_51), dim=1))
230 |
231 | return x
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 | # ------ conv blocks -----------
241 |
242 | class conv2d_block(nn.Module):
243 | def __init__(self, input_dim, output_dim, kernel_size=4, stride=2, padding=0, stddev=0.02):
244 | super(conv2d_block, self).__init__()
245 |
246 | self.conv = nn.Conv2d(input_dim, output_dim, kernel_size, stride,
247 | padding=padding)
248 | def forward(self, x):
249 | return self.conv(x)
250 |
251 |
--------------------------------------------------------------------------------
/gaze_estimation/v3_pytorch_model/gaze_model_light_ver.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import cv2
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | class Estimator(nn.Module):
7 | def __init__(self, use_mtcnn=False):
8 | super(Estimator, self).__init__()
9 |
10 | self.global_estimator = Global_Estimator()
11 | self.use_mtcnn = use_mtcnn
12 |
13 |
14 | def forward(self, input_x, flds=None):
15 |
16 | output = self.global_estimator(input_x)
17 | return output
18 |
19 |
20 |
21 |
22 | # ------------------------------------- GLOBAL ---------------------
23 | class Global_Estimator(nn.Module):
24 | def __init__(self):
25 | super(Global_Estimator, self).__init__()
26 |
27 | input_dim = 1
28 | cnum = 16
29 |
30 |
31 | self.lrelu = nn.LeakyReLU(0.2)
32 | self.drop = nn.Dropout(0.5)
33 | self.pool = nn.MaxPool2d(2)
34 | self.pool3 = nn.MaxPool2d(3, 2)
35 |
36 |
37 | # 120 x 180
38 | self.conv1 = conv2d_block(input_dim, 20, 7, 2, 0)
39 | self.norm_1 = nn.InstanceNorm2d(20)
40 |
41 | # 60 x 90
42 | self.conv2 = conv2d_block(20, 32, 5, 2, 1)
43 | self.norm_2 = nn.InstanceNorm2d(32)
44 |
45 | # 30 x 45
46 | self.conv3 = conv2d_block(32, 30, 3, 1, 0)
47 | self.norm_3 = nn.InstanceNorm2d(30)
48 |
49 | self.conv4 = conv2d_block(30, 20, 3, 1, 0)
50 | self.norm_4 = nn.InstanceNorm2d(20)
51 |
52 | self.conv5 = conv2d_block(20, 50, 3, 1, 0)
53 | self.norm_5 = nn.InstanceNorm2d(50)
54 |
55 | self.fc1 = nn.Linear((20 * 7 * 6) + (50 * 7 * 6), 2000)
56 | self.fc2 = nn.Linear(2000, 6)
57 |
58 |
59 | def forward(self, x):
60 | #print("ORIG -" + str(x.size()))
61 | x = F.pad(x, (53, 53, 63, 63)) # [left, right, top, bot]
62 | x = self.lrelu(self.conv1(x))
63 | x = self.norm_1(x)
64 | x = self.pool3(x)
65 |
66 | x = F.pad(x, (25, 25, 30, 30)) # [left, right, top, bot]
67 | x = self.lrelu(self.conv2(x))
68 | x = self.norm_2(x)
69 | x = self.pool(x)
70 |
71 | x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot]
72 | x = self.lrelu(self.conv3(x))
73 | x = self.norm_3(x)
74 | x = self.pool(x)
75 |
76 | x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot]
77 | x = self.lrelu(self.conv4(x))
78 | x = self.norm_4(x)
79 | x = self.pool(x)
80 | x_41 = x.view(x.size()[0], -1)
81 |
82 | x = F.pad(x, (1, 1, 1, 1))
83 | x = self.lrelu(self.conv5(x))
84 | x = self.norm_5(x)
85 | x_51 = x.view(x.size()[0], -1)
86 |
87 | # concat 41 & 51
88 | x = self.fc1(torch.cat((x_41, x_51), dim=1))
89 | x = self.fc2(x)
90 |
91 | return x
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 | # ------ conv blocks -----------
100 |
101 | class conv2d_block(nn.Module):
102 | def __init__(self, input_dim, output_dim, kernel_size=4, stride=2, padding=0, stddev=0.02):
103 | super(conv2d_block, self).__init__()
104 |
105 | self.conv = nn.Conv2d(input_dim, output_dim, kernel_size, stride,
106 | padding=padding)
107 | def forward(self, x):
108 | return self.conv(x)
109 |
110 |
--------------------------------------------------------------------------------
/gaze_estimation/v3_pytorch_model/ir_data.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import torch.utils.data as data
3 | from os import listdir
4 | import os
5 | import random
6 | import torch
7 | import cv2
8 | from PIL import Image
9 | import numpy as np
10 |
11 | import torchvision.transforms as transforms
12 |
13 | def is_image_file(filename):
14 | IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif']
15 | filename_lower = filename.lower()
16 | return any(filename_lower.endswith(extension) for extension in IMG_EXTENSIONS)
17 |
18 | def is_usable_gaze(filename):
19 | GAZE_ZONES = ['part_1', 'part_3', 'part_6', 'part_8', 'part_10', 'part_12']
20 | filename_lower = filename.lower().split('.')[0]
21 | return any(filename_lower.endswith(gaze_zone) for gaze_zone in GAZE_ZONES)
22 |
23 | def img_loader(path):
24 | try:
25 | with open(path, 'rb') as f:
26 | img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
27 |
28 | return img
29 | except IOError:
30 | print('Cannot load image ' + path)
31 |
32 | class IR_FACE_Dataset(data.Dataset):
33 | def __init__(self, data_path, img_w, img_h, img_local_h, transform, loader=img_loader,\
34 | with_subfolder=False, random_crop=True, read_fld=True, return_name=False):
35 | super(IR_FACE_Dataset, self).__init__()
36 | if with_subfolder:
37 | self.samples = self._find_samples_in_subfolders(data_path)
38 | else:
39 | self.samples = [x for x in listdir(data_path) if is_image_file(x)]
40 |
41 |
42 |
43 | #
44 | self.samples = [x for x in self.samples if is_usable_gaze(x)]
45 | '''
46 | data_list_1 = glob('F:/DB/MOBIS/CROPPED_2/*part_1.jpg') #1
47 | data_list_2 = glob('F:/DB/MOBIS/CROPPED_2/*part_3.jpg') #2
48 | data_list_3 = glob('F:/DB/MOBIS/CROPPED_2/*part_6.jpg') #3
49 | data_list_4 = glob('F:/DB/MOBIS/CROPPED_2/*part_8.jpg') #4
50 | data_list_5 = glob('F:/DB/MOBIS/CROPPED_2/*part_10.jpg') #5
51 | data_list_6 = glob('F:/DB/MOBIS/CROPPED_2/*part_12.jpg') #6
52 | '''
53 |
54 | self.data_path = data_path
55 | self.img_w = img_w
56 | self.img_h = img_h
57 | self.img_local_h = img_local_h
58 | self.transform = transform
59 | self.random_crop = random_crop
60 | self.return_name = return_name
61 | self.loader = loader
62 |
63 | # if true, read facial landmarks
64 | self.read_fld = read_fld
65 |
66 |
67 | print(str(len(self.samples)) + " items found")
68 |
69 | def __len__(self):
70 | return len(self.samples)
71 |
72 | def __getitem__(self, index):
73 | #path = os.path.join(self.data_path, self.samples[index])
74 |
75 | path = self.data_path + '/' + self.samples[index]
76 |
77 | img = self.loader(path)
78 | w, h = img.shape[0], img.shape[1]
79 |
80 | # use fld?
81 | if self.read_fld:
82 | fld_file = path.replace("jpg", "txt")
83 | fld_fdes = open(fld_file, "r")
84 | flds = np.array(fld_fdes.read().split(), dtype=np.float32)
85 | flds = flds.reshape(68, 2)
86 | fld_fdes.close()
87 |
88 | # need resize?
89 | if w < self.img_w or h < self.img_h or w > self.img_w or h > self.img_h:
90 |
91 | if self.read_fld:
92 | w_ratio, h_ratio = self.img_w / w, self.img_h / h
93 | flds[:, 0] = flds[:, 0] * w_ratio
94 | flds[:, 1] = flds[:, 1] * h_ratio
95 |
96 | img = cv2.resize(img, (self.img_w, self.img_h), interpolation=cv2.INTER_AREA)
97 |
98 |
99 | local_img = img[0:self.img_local_h, 0:self.img_w]
100 |
101 |
102 |
103 |
104 | # pick class
105 | gaze_part = int(path.split('_')[-1].split('.')[0])
106 | label_tensor = np.zeros([6])
107 |
108 | '''
109 | data_list_1 = glob('F:/DB/MOBIS/CROPPED_2/*part_1.jpg') #1
110 | data_list_2 = glob('F:/DB/MOBIS/CROPPED_2/*part_3.jpg') #2
111 | data_list_3 = glob('F:/DB/MOBIS/CROPPED_2/*part_6.jpg') #3
112 | data_list_4 = glob('F:/DB/MOBIS/CROPPED_2/*part_8.jpg') #4
113 | data_list_5 = glob('F:/DB/MOBIS/CROPPED_2/*part_10.jpg') #5
114 | data_list_6 = glob('F:/DB/MOBIS/CROPPED_2/*part_12.jpg') #6
115 | '''
116 | if gaze_part == 1:
117 | gaze_class = 0
118 | label_tensor[0] = 1
119 | elif gaze_part == 3:
120 | gaze_class = 1
121 | label_tensor[1] = 1
122 | elif gaze_part == 6:
123 | gaze_class = 2
124 | label_tensor[2] = 1
125 | elif gaze_part == 8:
126 | gaze_class = 3
127 | label_tensor[3] = 1
128 | elif gaze_part == 10:
129 | gaze_class = 4
130 | label_tensor[4] = 1
131 | elif gaze_part == 12:
132 | gaze_class = 5
133 | label_tensor[5] = 1
134 |
135 | label_tensor = torch.LongTensor(label_tensor)
136 | #print(path + " --- " + gaze_class)
137 |
138 |
139 | if self.transform is not None:
140 | img = self.transform(img)
141 | local_img = self.transform(local_img)
142 | else:
143 | img = torch.from_numpy(img)
144 | local_img = torch.from_numpy(local_img)
145 |
146 | return img, local_img, label_tensor
--------------------------------------------------------------------------------
/gaze_estimation/v3_pytorch_model/train.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torchvision.transforms as transforms
4 | import torch.optim as optim
5 |
6 | import os
7 | import time
8 |
9 | from ir_data import IR_FACE_Dataset
10 | from config import Config
11 | import numpy as np
12 |
13 | from torch.utils.data import Dataset, DataLoader
14 |
15 | from utils import AverageMeter
16 | # ----------------------------------
17 | if Config.use_model_type == 'LIGHT':
18 | from gaze_model_light_ver import Estimator
19 | elif Config.use_model_type == 'HEAVY' or Config.use_model_type == 'HEAVY+ATT':
20 | from gaze_model_heavy_ver import Estimator
21 |
22 | # ----------------------------------
23 |
24 | def train():
25 | torch.multiprocessing.freeze_support()
26 | train_transform = transforms.Compose([
27 | transforms.ToTensor(), # range [0, 255] -> [0.0,1.0]
28 | ])
29 |
30 | ir_dataset = IR_FACE_Dataset(data_path=Config.data_path, \
31 | img_w=Config.global_img_size[0] ,img_h=Config.global_img_size[1], img_local_h=Config.local_img_size[1], \
32 | transform=train_transform)
33 | ir_dataloader = DataLoader(ir_dataset, batch_size=Config.batch_size, \
34 | shuffle=True, num_workers=1)
35 |
36 | device = torch.device("cuda")
37 |
38 | # checkpt dir
39 | if os.path.exists(Config.save_path) == False:
40 | os.makedirs(Config.save_path)
41 |
42 | # model
43 | if Config.use_model_type == 'HEAVY+ATT':
44 | model = Estimator(use_attention_map=True).cuda()
45 | else:
46 | model = Estimator().cuda()
47 | model = model.to(device)
48 |
49 | # opt
50 | criterion = nn.CrossEntropyLoss().cuda()
51 | optimizer = optim.SGD(model.parameters(), lr=Config.lr, momentum=Config.momentum, \
52 | weight_decay=Config.weight_decay)
53 |
54 |
55 | for epoch_i in range(Config.max_epoch):
56 | model.train()
57 |
58 | #Config.lr = adjust_learning_rate_v2(optimizer, epoch_i - 1, Config)
59 | #for param_group in optimizer.param_groups:
60 | # param_group["lr"] = Config.lr
61 |
62 | iter_max = ir_dataset.__len__() // Config.batch_size
63 |
64 | # for print
65 | data_time = AverageMeter()
66 | losses = AverageMeter()
67 | top1 = AverageMeter()
68 | top5 = AverageMeter()
69 | end = time.time()
70 |
71 | dataiter = iter(ir_dataloader)
72 | steps_per_epoch = iter_max + 1
73 | #for ii, data in enumerate(ir_dataloader):
74 | for ii in range(steps_per_epoch):
75 |
76 | data_time.update(time.time() - end)
77 |
78 | data_input, data_input_local, label = dataiter.next()
79 | data_input = data_input.to(device)
80 | targets = label.to(device)
81 | data_input_local = data_input_local.to(device)
82 |
83 |
84 |
85 | # optimizer step
86 | optimizer.zero_grad()
87 | outputs = model(data_input, data_input_local)
88 | loss = criterion(outputs, torch.argmax(targets, 1))
89 |
90 | loss.backward()
91 | optimizer.step()
92 |
93 | # measure accuracy and record loss
94 | total = data_input.size(0)
95 | _, predicted = outputs.max(1)
96 | correct = predicted.eq(torch.argmax(targets,1)).sum().item()
97 | top1.update(100.*correct/total)
98 |
99 | losses.update(loss.item(), data_input.size(0))
100 |
101 |
102 | end = time.time()
103 |
104 | if ii % Config.print_iter == 0:
105 | print('\nEpoch: [%d | %d], Iter : [%d | %d] LR: %f | Loss : %f | top1 : %.4f | batch_time : %.3f' \
106 | % (epoch_i, Config.max_epoch, ii, iter_max + 1, Config.lr, losses.avg, top1.avg, data_time.val))
107 |
108 |
109 | # measure elapsed time
110 |
111 |
112 | # save model
113 | if epoch_i % Config.save_epoch == 0:
114 | torch.save({'state_dict' : model.state_dict(), 'opt' : optimizer.state_dict()}, \
115 | Config.save_path + "/check_" + str(epoch_i) + ".pth")
116 |
117 |
118 |
119 | # not using -
120 | def adjust_learning_rate(optimizer, epoch, config):
121 | global state
122 | if epoch in config.schedule:
123 | config.lr *= config.gamma
124 | for param_group in optimizer.param_groups:
125 | param_group['lr'] = config.lr
126 |
127 | def adjust_learning_rate_v2(optimizer, epoch, config):
128 | lr = config.lr * (0.1 ** (epoch // 10))
129 | return lr
130 |
131 | if __name__ == '__main__':
132 | train()
--------------------------------------------------------------------------------
/gaze_estimation/v3_pytorch_model/utils.py:
--------------------------------------------------------------------------------
1 | class AverageMeter(object):
2 | """Computes and stores the average and current value
3 | Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262
4 | """
5 | def __init__(self):
6 | self.reset()
7 |
8 | def reset(self):
9 | self.val = 0
10 | self.avg = 0
11 | self.sum = 0
12 | self.count = 0
13 |
14 | def update(self, val, n=1):
15 | self.val = val
16 | self.sum += val * n
17 | self.count += n
18 | self.avg = self.sum / self.count
19 |
20 |
21 | # accuracy of gaze
22 | def accuracy(output, target, topk=(1,)):
23 | """Computes the precision@k for the specified values of k"""
24 | maxk = max(topk)
25 | batch_size = target.size(0)
26 |
27 | _, pred = output.topk(maxk, 1, True, True)
28 | pred = pred.t()
29 | correct = pred.eq(target.view(1, -1).expand_as(pred))
30 |
31 |
32 | res = []
33 | for k in topk:
34 | correct_k = correct[:k].reshape(-1).float().sum(0)
35 | res.append(correct_k.mul_(100.0 / batch_size))
36 | return res
37 |
38 |
39 | def data_from_captue(img, use_fld=False):
40 | img
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import os
2 | # Using this code to force the usage of any specific GPUs
3 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
4 | import argparse
5 | import os
6 | import random
7 | import torch
8 | import torch.nn as nn
9 | import torch.nn.parallel
10 | import torch.backends.cudnn as cudnn
11 | import torch.optim as optim
12 | import torch.utils.data
13 | import torchvision.datasets as dset
14 | import torch.utils.data as data
15 | import time
16 | import numpy as np
17 | import torchvision.utils as vutils
18 | from torch.autograd import Variable
19 | from math import log10
20 | import torchvision
21 | import cv2
22 | import skimage
23 | import scipy.io
24 | import glob
25 | import matplotlib.image as mpimg
26 | import matplotlib.pyplot as plt
27 | from model import losses
28 | from model.networks import *
29 | from util.model_storage import save_checkpoint
30 | from data.dataloader import *
31 |
32 | parser = argparse.ArgumentParser()
33 | parser.add_argument("--pretrained", default="./pretrained/weight.pth", type=str, help="path to pretrained model (default: none)")
34 | parser.add_argument("--batch_size", default="8", type=int, help="The path to store our batch_size")
35 | parser.add_argument("--image_dir", default="./data/test_img/", type=str, help="The path to store our batch_size")
36 | parser.add_argument("--image_list", default="./data/test_fileList.txt", type=str, help="The path to store our batch_size")
37 |
38 | global opt,model
39 | opt = parser.parse_args()
40 |
41 | fsrnet = define_G(input_nc = 3, output_nc = 3, ngf=64, which_model_netG=0)
42 |
43 | if torch.cuda.is_available():
44 | fsrnet = fsrnet.cuda()
45 |
46 | if opt.pretrained:
47 | if os.path.isfile(opt.pretrained):
48 | print("=> loading model '{}'".format(opt.pretrained))
49 | weights = torch.load(opt.pretrained)
50 |
51 | pretrained_dict = weights['model'].state_dict()
52 | model_dict = fsrnet.state_dict()
53 |
54 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
55 | model_dict.update(pretrained_dict)
56 |
57 | fsrnet.load_state_dict(model_dict)
58 | else:
59 | print("=> no model found at '{}'".format(opt.pretrained))
60 |
61 | demo_dataset = TestDatasetFromFile(
62 | opt.image_list,
63 | opt.image_dir)
64 | test_data_loader = data.DataLoader(dataset=demo_dataset, batch_size=opt.batch_size, num_workers=8, drop_last=True,
65 | pin_memory=True)
66 |
67 | for iteration, batch in enumerate(test_data_loader):
68 | input = Variable(batch[0])
69 | input = input.cuda()
70 | upscaled,boundaries,reconstructed = fsrnet(input)
71 |
72 | if not os.path.isdir('./test_result/Coarse_SR_network'):
73 | os.makedirs('./test_result/Coarse_SR_network')
74 | if not os.path.isdir('./test_result/Prior_Estimation'):
75 | os.makedirs('./test_result/Prior_Estimation')
76 | if not os.path.isdir('./test_result/Final_SR_reconstruction'):
77 | os.makedirs('./test_result/Final_SR_reconstruction')
78 |
79 | for index in range(opt.batch_size):
80 | final_output = reconstructed.permute(0,2,3,1).detach().cpu().numpy()
81 | final_output_0 = final_output[index,:,:,:]
82 |
83 | estimated_boundary = boundaries.permute(0,2,3,1).detach().cpu().numpy()
84 | estimated_boundary_0 = estimated_boundary[index,:,:,0]
85 |
86 | output = upscaled.permute(0,2,3,1).detach().cpu().numpy()
87 | output_0 = output[index,:,:,:]
88 |
89 | img_num = iteration*opt.batch_size + index
90 |
91 | scipy.misc.toimage(output_0 * 255, high=255, low=0, cmin=0, cmax=255).save(
92 | './test_result/Coarse_SR_network/%4d.jpg'% (img_num))
93 | scipy.misc.toimage(estimated_boundary_0 * 255, high=255, low=0, cmin=0, cmax=255).save(
94 | './test_result/Prior_Estimation/%4d.jpg' % (img_num))
95 | scipy.misc.toimage(final_output_0 * 255, high=255, low=0, cmin=0, cmax=255).save(
96 | './test_result/Final_SR_reconstruction/%4d.jpg' % (img_num))
97 | #code minor changeB10
98 |
99 |
--------------------------------------------------------------------------------
/webcam_demo.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import argparse
3 | import os
4 |
5 | import cv2
6 | import numpy as np
7 | import torch
8 | import torch.backends.cudnn as cudnn
9 |
10 | from face_detection.model.prior_box import PriorBox
11 | from face_detection.model.retinaface import RetinaFace
12 | from face_detection.utils.misc import draw_keypoint, inference
13 |
14 | parser = argparse.ArgumentParser(description='PIMNet')
15 | parser.add_argument(
16 | '--checkpoint', type=str,
17 | default='face_detection/weights/mobilenet0.25_final.pt',
18 | help='Trained state_dict file path to open'
19 | )
20 | parser.add_argument(
21 | '--cpu', action="store_true", default=False,
22 | help='Use cpu inference'
23 | )
24 | parser.add_argument(
25 | '--jit', action="store_true", default=False,
26 | help='Use JIT'
27 | )
28 | parser.add_argument(
29 | '--confidence-threshold', type=float, default=0.02,
30 | help='confidence_threshold'
31 | )
32 | parser.add_argument(
33 | '--nms-threshold', type=float, default=0.4,
34 | help='nms_threshold'
35 | )
36 | parser.add_argument(
37 | '--vis-thres', type=float, default=0.5,
38 | help='visualization_threshold'
39 | )
40 | parser.add_argument(
41 | '-s', '--save-image', action="store_true", default=False,
42 | help='show detection results'
43 | )
44 | parser.add_argument(
45 | '--save-dir', type=str, default='demo',
46 | help='Dir to save results'
47 | )
48 |
49 |
50 | def main():
51 | args = parser.parse_args()
52 | assert os.path.isfile(args.checkpoint)
53 |
54 | checkpoint = torch.load(args.checkpoint, map_location="cpu")
55 | cfg = checkpoint["config"]
56 | device = torch.device("cpu" if args.cpu else "cuda")
57 |
58 | # net and model
59 | detector = RetinaFace(**cfg)
60 | detector.load_state_dict(checkpoint["net_state_dict"])
61 | detector.eval().requires_grad_(False)
62 | detector.to(device)
63 | print('Finished loading model!')
64 | cudnn.benchmark = True
65 |
66 | # prepare testing
67 | cap = cv2.VideoCapture(0)
68 | assert cap.isOpened()
69 | ret_val, img_tmp = cap.read()
70 | im_height, im_width, _ = img_tmp.shape
71 | scale = torch.Tensor([im_width, im_height, im_width, im_height])
72 | scale = scale.to(device)
73 |
74 | scale1 = torch.Tensor([im_width, im_height] * 5)
75 | scale1 = scale1.to(device)
76 |
77 | priorbox = PriorBox(cfg, image_size=(im_height, im_width))
78 | priors = priorbox.forward()
79 | priors = priors.to(device)
80 | prior_data = priors.data
81 |
82 | if args.jit:
83 | img_tmp = img_tmp.transpose(2, 0, 1)
84 | img_tmp = np.float32(img_tmp)
85 | img_tmp = torch.from_numpy(img_tmp).unsqueeze(0)
86 | dummy = img_tmp.to(device)
87 | detector = torch.jit.trace(detector, example_inputs=dummy)
88 |
89 | if args.save_image:
90 | nframe = 0
91 | fname = os.path.join(args.save_dir, "{:06d}.jpg")
92 | os.makedirs(args.save_dir, exist_ok=True)
93 |
94 | # testing begin
95 | ret_val, img_raw = cap.read()
96 | while ret_val:
97 | start = cv2.getTickCount()
98 |
99 | # NOTE preprocessing.
100 | dets = inference(
101 | detector, img_raw, scale, scale1, prior_data, cfg,
102 | args.confidence_threshold, args.nms_threshold, device
103 | )
104 |
105 | fps = float(cv2.getTickFrequency() / (cv2.getTickCount() - start))
106 | cv2.putText(
107 | img_raw, f"FPS: {fps:.1f}", (5, 15),
108 | cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)
109 | )
110 |
111 | # show image
112 | draw_keypoint(img_raw, dets, args.vis_thres)
113 |
114 | if args.save_image:
115 | cv2.imwrite(fname.format(nframe), img_raw)
116 | nframe += 1
117 |
118 | cv2.imshow("Webcam Demo", img_raw)
119 | if cv2.waitKey(1) == 27: # Press ESC button to quit.
120 | break
121 |
122 | ret_val, img_raw = cap.read()
123 |
124 | cap.release()
125 | cv2.destroyAllWindows()
126 |
127 |
128 | if __name__ == "__main__":
129 | main()
--------------------------------------------------------------------------------