├── LICENSE
├── README.md
├── data
    ├── dataloader.py
    └── test_fileList.txt
├── environment.yml
├── face_alignment
    ├── README.md
    ├── figure
    │   ├── figure1.png
    │   └── figure2.png
    ├── models
    │   ├── ZF_deploy.prototxt
    │   ├── ZF_local_solver.prototxt
    │   ├── ZF_local_train.prototxt
    │   ├── ZF_solver.prototxt
    │   ├── ZF_train.prototxt
    │   ├── list_train_global_front.txt
    │   ├── list_train_global_left.txt
    │   ├── list_train_global_right.txt
    │   ├── list_train_init_semifront.txt
    │   ├── mean_shapes.txt
    │   ├── shape_parameter_U_front.txt
    │   ├── shape_parameter_U_left.txt
    │   ├── shape_parameter_U_right.txt
    │   ├── shape_parameter_U_wild.txt
    │   ├── shape_parameter_s_front.txt
    │   ├── shape_parameter_s_left.txt
    │   ├── shape_parameter_s_right.txt
    │   ├── shape_parameter_s_wild.txt
    │   ├── warped_mean_front.bmp
    │   ├── warped_mean_left.bmp
    │   └── warped_mean_right.bmp
    └── python
    │   ├── fa_util.py
    │   ├── fa_util_train.py
    │   ├── face_alignment.py
    │   ├── make_wild_input.py
    │   └── test_300w_public.py
├── face_detection
    ├── .gitignore
    ├── LICENSE.MIT
    ├── NOTICE
    ├── README.md
    ├── convert_to_onnx.py
    ├── data
    │   ├── FDDB
    │   │   └── img_list.txt
    │   ├── __init__.py
    │   ├── config.py
    │   ├── data_augment.py
    │   └── wider_face.py
    ├── detect.py
    ├── environment.yml
    ├── model
    │   ├── multibox_loss.py
    │   ├── networks.py
    │   ├── prior_box.py
    │   └── retinaface.py
    ├── test_fddb.py
    ├── test_widerface.py
    ├── train_detector.py
    ├── utils
    │   ├── __init__.py
    │   ├── box_utils.py
    │   ├── misc.py
    │   └── timer.py
    ├── webcam_demo.py
    ├── weights
    │   ├── mobilenet0.25_final.pt
    │   └── mobilenet0.25_pretrain.pt
    └── widerface_evaluate
    │   ├── README.md
    │   ├── box_overlaps.pyx
    │   ├── evaluation.py
    │   ├── ground_truth
    │       ├── wider_easy_val.mat
    │       ├── wider_face_val.mat
    │       ├── wider_hard_val.mat
    │       └── wider_medium_val.mat
    │   ├── setup.py
    │   └── widerface_txt
    │       ├── 24--Soldier_Firing
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_10.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_1037.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_115.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_129.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_133.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_15.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_254.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_264.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_268.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_281.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_315.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_329.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_368.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_372.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_405.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_431.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_523.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_540.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_601.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_633.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_644.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_67.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_691.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_702.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_703.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_763.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_812.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_824.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_887.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_890.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_901.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_904.txt
    │           ├── 24_Soldier_Firing_Soldier_Firing_24_931.txt
    │           └── 24_Soldier_Firing_Soldier_Firing_24_95.txt
    │       └── 40--Gymnastics
    │           ├── 40_Gymnastics_Gymnastics_40_1022.txt
    │           ├── 40_Gymnastics_Gymnastics_40_1035.txt
    │           ├── 40_Gymnastics_Gymnastics_40_1043.txt
    │           ├── 40_Gymnastics_Gymnastics_40_1044.txt
    │           ├── 40_Gymnastics_Gymnastics_40_108.txt
    │           ├── 40_Gymnastics_Gymnastics_40_115.txt
    │           ├── 40_Gymnastics_Gymnastics_40_138.txt
    │           ├── 40_Gymnastics_Gymnastics_40_156.txt
    │           ├── 40_Gymnastics_Gymnastics_40_161.txt
    │           ├── 40_Gymnastics_Gymnastics_40_171.txt
    │           ├── 40_Gymnastics_Gymnastics_40_175.txt
    │           ├── 40_Gymnastics_Gymnastics_40_197.txt
    │           ├── 40_Gymnastics_Gymnastics_40_24.txt
    │           ├── 40_Gymnastics_Gymnastics_40_255.txt
    │           ├── 40_Gymnastics_Gymnastics_40_260.txt
    │           ├── 40_Gymnastics_Gymnastics_40_273.txt
    │           ├── 40_Gymnastics_Gymnastics_40_274.txt
    │           ├── 40_Gymnastics_Gymnastics_40_285.txt
    │           ├── 40_Gymnastics_Gymnastics_40_331.txt
    │           ├── 40_Gymnastics_Gymnastics_40_361.txt
    │           ├── 40_Gymnastics_Gymnastics_40_364.txt
    │           ├── 40_Gymnastics_Gymnastics_40_389.txt
    │           └── 40_Gymnastics_Gymnastics_40_401.txt
├── face_recognition
    ├── config.py
    ├── model_atari.py
    ├── test.py
    └── train.py
├── gaze_estimation
    ├── README.md
    ├── example_movie
    │   └── media2_slow.avi
    ├── v1_caffe_model
    │   ├── ir_gaze_deploy.prototxt
    │   ├── ir_gaze_solver.prototxt
    │   └── ir_gaze_train_val.prototxt
    ├── v2_tensorflow_model
    │   ├── model.py
    │   ├── opt.py
    │   ├── test_sequences.py
    │   └── train.py
    └── v3_pytorch_model
    │   ├── config.py
    │   ├── gaze_model_heavy_ver.py
    │   ├── gaze_model_light_ver.py
    │   ├── ir_data.py
    │   ├── train.py
    │   └── utils.py
├── test.py
└── webcam_demo.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                    GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PIMNet_Internal_Environment_Recognition
 2 | ## Overview
 3 | This project is open software for internal environment recognition for ADAS.
 4 | This project includes:
 5 | - Face Detection
 6 | - Face Landmarks Detection / Face Alignment
 7 | - Gaze Estimation
 8 | - Face Recognition
 9 | 
10 | ###
11 | Project page : http://imlab.postech.ac.kr/opensw.htm
12 | 


--------------------------------------------------------------------------------
/data/dataloader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import torch
  3 | import torch.utils.data as data
  4 | from os import listdir
  5 | import os
  6 | from os.path import join
  7 | from PIL import Image, ImageOps
  8 | import random
  9 | import torchvision.transforms as transforms
 10 | import cv2
 11 | import numpy as np
 12 | from torch.autograd import Variable
 13 | import matplotlib.pyplot as plt
 14 | 
 15 | 
 16 | def loadFromFile(path, datasize):
 17 |     if path is None:
 18 |         return None, None
 19 | 
 20 |     # print("Load from file %s" % path)
 21 |     f = open(path)
 22 |     data = []
 23 |     for idx in range(0, datasize):
 24 |         line = f.readline()
 25 |         line = line[:-1]
 26 |         data.append(line)
 27 |     f.close()
 28 |     return data
 29 | 
 30 | 
 31 | def load_lr_hr_prior(file_path, input_height=128, input_width=128, output_height=128, output_width=128, is_mirror=False,
 32 |                      is_gray=True, scale=8.0, is_scale_back=True, is_parsing_map=True):
 33 |     if input_width is None:
 34 |         input_width = input_height
 35 |     if output_width is None:
 36 |         output_width = output_height
 37 | 
 38 |     img = cv2.imread(file_path)
 39 |     # img = Image.open(file_path)
 40 | 
 41 |     if is_gray is False:
 42 |         b, g, r = cv2.split(img)
 43 |         img = cv2.merge([r, g, b])
 44 |     if is_gray is True:
 45 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 46 | 
 47 |     if is_mirror and random.randint(0, 1) is 0:
 48 |         img = ImageOps.mirror(img)
 49 | 
 50 |     if input_height is not None:
 51 |         img = cv2.resize(img, (input_width, input_height), interpolation=cv2.INTER_CUBIC)
 52 | 
 53 |     if is_parsing_map:
 54 |         str = ['skin.png','lbrow.png','rbrow.png','leye.png','reye.png','lear.png','rear.png','nose.png','mouth.png','ulip.png','llip.png']
 55 | 
 56 |         hms = np.zeros((64, 64, 128))
 57 | 
 58 |         for i in range(len(str)):
 59 |             (onlyfilePath, img_name) = os.path.split(file_path)
 60 |             full_name = onlyfilePath + "/Parsing_Maps/" + img_name[:-4] + "_"+ str[i]
 61 |             hm = cv2.imread(full_name, cv2.IMREAD_GRAYSCALE)
 62 |             hm_resized = cv2.resize(hm, (64, 64), interpolation=cv2.INTER_CUBIC) / 255.0
 63 |             hms[:, :, i] = hm_resized
 64 |             hms[:, :, i+11] = hm_resized
 65 |             hms[:, :, i+22] = hm_resized
 66 |             hms[:, :, i+33] = hm_resized
 67 |             hms[:, :, i+44] = hm_resized
 68 |             hms[:, :, i+55] = hm_resized
 69 |             hms[:, :, i+66] = hm_resized
 70 |             hms[:, :, i+77] = hm_resized
 71 |             hms[:, :, i+88] = hm_resized
 72 |             hms[:, :, i+99] = hm_resized
 73 |             hms[:, :, i+110] = hm_resized
 74 |             is_bigger = i+121 < 128
 75 |             if is_bigger:
 76 |                 hms[:, :, i+121] = hm_resized
 77 | 
 78 | 
 79 |     img = cv2.resize(img, (output_width, output_height), interpolation=cv2.INTER_CUBIC)
 80 |     img_lr = cv2.resize(img, (int(output_width / scale), int(output_height / scale)), interpolation=cv2.INTER_CUBIC)
 81 | 
 82 |     if is_scale_back:
 83 |         img_lr = cv2.resize(img_lr, (output_width, output_height), interpolation=cv2.INTER_CUBIC)
 84 |         return img_lr, img, hms
 85 |     else:
 86 |         return img_lr, img, hms
 87 | 
 88 | def load_lr(file_path, input_height=128, input_width=128, output_height=128, output_width=128, is_mirror=False,
 89 |                      is_gray=True, scale=8.0, is_scale_back=True, is_parsing_map=True):
 90 |     if input_width is None:
 91 |         input_width = input_height
 92 |     if output_width is None:
 93 |         output_width = output_height
 94 | 
 95 |     img = cv2.imread(file_path)
 96 |     # img = Image.open(file_path)
 97 | 
 98 |     if is_gray is False:
 99 |         b, g, r = cv2.split(img)
100 |         img = cv2.merge([r, g, b])
101 |     if is_gray is True:
102 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
103 | 
104 |     if is_mirror and random.randint(0, 1) is 0:
105 |         img = ImageOps.mirror(img)
106 | 
107 |     img = cv2.resize(img, (output_width, output_height), interpolation=cv2.INTER_CUBIC)
108 |     img_lr = cv2.resize(img, (int(output_width / scale), int(output_height / scale)), interpolation=cv2.INTER_CUBIC)
109 |     hms = np.zeros((64, 64, 128))
110 | 
111 |     if is_scale_back:
112 |         img_lr = cv2.resize(img_lr, (output_width, output_height), interpolation=cv2.INTER_CUBIC)
113 |         return img_lr, img, hms
114 |     else:
115 |         return img_lr, img, hms
116 | 
117 | 
118 | class ImageDatasetFromFile(data.Dataset):
119 |     def __init__(self, image_list, img_path, input_height=128, input_width=128, output_height=128, output_width=128,
120 |                  is_mirror=False, is_gray=False, upscale=8.0, is_scale_back=True, is_parsing_map=True):
121 |         super(ImageDatasetFromFile, self).__init__()
122 | 
123 |         self.image_filenames = image_list
124 |         self.upscale = upscale
125 |         self.is_mirror = is_mirror
126 |         self.img_path = img_path
127 |         self.input_height = input_height
128 |         self.input_width = input_width
129 |         self.output_height = output_height
130 |         self.output_width = output_width
131 |         self.is_scale_back = is_scale_back
132 |         self.is_gray = is_gray
133 |         self.is_parsing_map = is_parsing_map
134 | 
135 |         self.input_transform = transforms.Compose([
136 |             transforms.ToTensor()])
137 | 
138 |     def __getitem__(self, idx):
139 | 
140 |         if self.is_mirror:
141 |             is_mirror = random.randint(0, 1) is 0
142 |         else:
143 |             is_mirror = False
144 | 
145 |         image_filenames = loadFromFile(self.image_filenames, len(open(self.image_filenames, 'r').readlines()))
146 |         fullpath = join(self.img_path, image_filenames[idx])
147 | 
148 |         lr, hr, pm = load_lr_hr_prior(fullpath,
149 |                                       self.input_height, self.input_width, self.output_height, self.output_width,
150 |                                       self.is_mirror, self.is_gray, self.upscale, self.is_scale_back,
151 |                                       self.is_parsing_map)
152 | 
153 |         input = self.input_transform(lr)
154 |         target = self.input_transform(hr)
155 |         parsing_map = self.input_transform(pm)
156 | 
157 |         return input, target, parsing_map
158 | 
159 |     def __len__(self):
160 |         return len(open(self.image_filenames, 'rU').readlines())
161 | 
162 | 
163 | class TestDatasetFromFile(data.Dataset):
164 |     def __init__(self, image_list, img_path, input_height=128, input_width=128, output_height=128, output_width=128,
165 |                  is_mirror=False, is_gray=False, upscale=8.0, is_scale_back=True, is_parsing_map=True):
166 |         super(TestDatasetFromFile, self).__init__()
167 | 
168 |         self.image_filenames = image_list
169 |         self.upscale = upscale
170 |         self.is_mirror = is_mirror
171 |         self.img_path = img_path
172 |         self.input_height = input_height
173 |         self.input_width = input_width
174 |         self.output_height = output_height
175 |         self.output_width = output_width
176 |         self.is_scale_back = is_scale_back
177 |         self.is_gray = is_gray
178 |         self.is_parsing_map = is_parsing_map
179 | 
180 |         self.input_transform = transforms.Compose([
181 |             transforms.ToTensor()])
182 | 
183 |     def __getitem__(self, idx):
184 | 
185 |         if self.is_mirror:
186 |             is_mirror = random.randint(0, 1) is 0
187 |         else:
188 |             is_mirror = False
189 | 
190 |         image_filenames = loadFromFile(self.image_filenames, len(open(self.image_filenames, 'r').readlines()))
191 |         fullpath = join(self.img_path, image_filenames[idx])
192 | 
193 |         lr, hr, pm = load_lr(fullpath,
194 |                               self.input_height, self.input_width, self.output_height, self.output_width,
195 |                               self.is_mirror, self.is_gray, self.upscale, self.is_scale_back,
196 |                               self.is_parsing_map)
197 | 
198 |         input = self.input_transform(lr)
199 |         target = self.input_transform(hr)
200 |         parsing_map = self.input_transform(pm)
201 | 
202 | 
203 |         return input, target, parsing_map
204 | 
205 |     def __len__(self):
206 |         return len(open(self.image_filenames, 'rU').readlines())
207 | 
208 | 
209 | # demo_dataset = ImageDatasetFromFile("/home/cydia/文档/毕业设计/make_Face_boundary/81_landmarks/fileList.txt",
210 | #                                     "/home/cydia/图片/sample/")
211 | #
212 | # train_data_loader = data.DataLoader(dataset=demo_dataset, batch_size=1, num_workers=8)
213 | 
214 | if __name__ == '__main__':
215 |     for titer, batch in enumerate(train_data_loader):
216 |         input, target, heatmaps = Variable(batch[0]), Variable(batch[1]), Variable(batch[2])
217 | 
218 |         Input = input.permute(0, 2, 3, 1).cpu().data.numpy()
219 |         Target = target.permute(0, 2, 3, 1).cpu().data.numpy()
220 |         Parsing_maps = heatmaps.permute(0, 2, 3, 1).cpu().data.numpy()
221 | 
222 |         plt.figure("Input Image")
223 |         plt.imshow(Input[0, :, :, :])
224 |         plt.axis('on')
225 |         plt.title('image')
226 |         plt.show()
227 | 
228 |         plt.figure("Target Image")
229 |         plt.imshow(Target[0, :, :, :])
230 |         plt.axis('on')
231 |         plt.title('Target')
232 |         plt.show()
233 | 
234 |         plt.figure("HMS")
235 |         plt.imshow(Parsing_maps[0, :, :, 0])
236 |         plt.axis('on')
237 |         plt.title('OMS')
238 |         plt.show()
239 | 


--------------------------------------------------------------------------------
/data/test_fileList.txt:
--------------------------------------------------------------------------------
 1 | 0.jpg
 2 | 1.jpg
 3 | 2.jpg
 4 | 3.jpg
 5 | 4.jpg
 6 | 5.jpg
 7 | 6.jpg
 8 | 7.jpg
 9 | 8.jpg
10 | 9.jpg
11 | 10.jpg
12 | 11.jpg
13 | 12.jpg
14 | 13.jpg
15 | 14.jpg
16 | 15.jpg
17 | 16.jpg
18 | 17.jpg
19 | 18.jpg
20 | 19.jpg
21 | 20.jpg
22 | 21.jpg
23 | 22.jpg
24 | 23.jpg
25 | 24.jpg
26 | 25.jpg
27 | 26.jpg
28 | 27.jpg
29 | 28.jpg
30 | 29.jpg
31 | 30.jpg
32 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: pimnet
 2 | channels:
 3 |   - pytorch
 4 | dependencies:
 5 |   - cudatoolkit=11.3
 6 |   - matplotlib
 7 |   - pip
 8 |   - python=3.9
 9 |   - pytorch::pytorch=1.10.1
10 |   - pytorch::torchvision
11 |   - scikit-image
12 |   - scipy
13 |   - tqdm
14 |   - pip:
15 |     - opencv-python


--------------------------------------------------------------------------------
/face_alignment/README.md:
--------------------------------------------------------------------------------
 1 | # Stage-wise Face Alignment using Global and Local Regressors
 2 | 
 3 | This is a caffe-python implementation on Windows 10 for face alignment.
 4 | 
 5 | We implemented two-kind of methods.<br>
 6 | 
 7 | Method1 repeat global and local regression after initialization regression<br>
 8 | <p align="center"><img src="figure/overview1.png" alt="" width="400"></p>
 9 | 
10 | Method2 repeat local refinement regression after initialization regression<br>
11 | <p align="center"><img src="figure/overview2.png" alt="" width="400"></p>
12 | 
13 | ## Evaluation on 300w public test set
14 | <center>
15 | 
16 | | Method | Common | Challenging | Full |
17 | |:-------|:--------:|:-----:|:-------:|
18 | | Stage(Projection) | 8.24 | 12.56 | 9.07 |
19 | | Stage(Adjustment) | 6.25 | 10.16 | 7.02 |
20 | | Stage(Global1) | 4.66 | 8.20 | 5.35 |
21 | | Stage(Local1) | 3.45 | 6.49 | 4.05 |
22 | | Stage(Global2) | 3.59 | 6.62 | 4.18 |
23 | | Stage(Local2) | 3.29 | 6.14 | 3.85 |
24 | | Stage(Global3) | 3.48 | 6.37 | 4.05 |
25 | | Stage(Local3) | 3.28 | 6.09 | 3.83 |
26 | | Regression(Wild, simple net) | 4.07 | 6.90 | 4.62 |
27 | | Regression(Wild, ResNet50) | 3.72 | 6.44 | 4.25 |
28 | </center>
29 | 
30 | ## Usage
31 | 
32 | ### For Training
33 | 1. Clone the repository
34 | ```
35 | git clone https://github.com/hyunsungP/facelignmentregression
36 | ```
37 | 
38 | 2. make data files (.h5)
39 | ```
40 | make_wild_input.py
41 | ```
42 | and so on.
43 | 
44 | 3. make data file list \
45 | Refer to models/list_train_*.txt
46 | 
47 | 4. training \
48 | On console window with caffe
49 | ```
50 | caffe train --solver=models/ZF_solver.prototxt --gpu=0
51 | ```
52 | 
53 | Other network are same.
54 | 
55 | ### For Testing
56 | Change prototxt path in the source code.
57 | ```
58 | test_300w_public.py
59 | ```
60 | 
61 | Other models will be uploaded.
62 | 
63 | 


--------------------------------------------------------------------------------
/face_alignment/figure/figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/figure/figure1.png


--------------------------------------------------------------------------------
/face_alignment/figure/figure2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/figure/figure2.png


--------------------------------------------------------------------------------
/face_alignment/models/ZF_deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "FA_ZF_68"
  2 | 
  3 | #------------------------------- input ----------------------------
  4 | input: "img"
  5 | input_dim: 1
  6 | input_dim: 3
  7 | input_dim: 224
  8 | input_dim: 224
  9 | 
 10 | layer {
 11 | 	name: "scale_and_shift"
 12 | 	bottom: "img"
 13 | 	top: "scale_and_shift"
 14 | 	type: "Scale"
 15 | 	param{
 16 | 		lr_mult: 0
 17 | 		decay_mult: 0
 18 | 		}
 19 | 	param{
 20 | 		lr_mult: 0
 21 | 		decay_mult: 0
 22 | 		}
 23 | 	scale_param{
 24 | 		filler{
 25 | 			type: "constant"
 26 | 			value: 0.00392156862745
 27 | 		}
 28 | 		bias_term: true
 29 | 		bias_filler {
 30 | 			type: "constant"
 31 | 			value: 0
 32 | 		}
 33 | 	}
 34 | }
 35 | # ----------------------- ZF -------------------
 36 | layer {
 37 | 	name: "conv1"
 38 | 	type: "Convolution"
 39 | 	bottom: "scale_and_shift"
 40 | 	top: "conv1"
 41 | 	param {
 42 | #learning rate
 43 | 		lr_mult: 1.0
 44 | 	}
 45 | 	param {
 46 | 		lr_mult: 2.0
 47 | 	}
 48 | 	convolution_param {
 49 | 		num_output: 96
 50 | 		kernel_size: 7
 51 | 		pad: 3
 52 | 		stride: 2
 53 | 		weight_filler {
 54 | 			type: "gaussian"
 55 | 			std: 0.01
 56 | 		}
 57 | 		bias_filler {
 58 | 			type: "constant"
 59 | 			value: 0
 60 | 		}
 61 | 	}
 62 | }
 63 | 
 64 | layer {
 65 | 	name: "relu1"
 66 | 	type: "ReLU"
 67 | 	bottom: "conv1"
 68 | 	top: "conv1"
 69 | }
 70 | 
 71 | layer {
 72 | 	name: "norm1"
 73 | 	type: "LRN"
 74 | 	bottom: "conv1"
 75 | 	top: "norm1"
 76 | 	lrn_param {
 77 | 		local_size: 3
 78 | 		alpha: 0.00005
 79 | 		beta: 0.75
 80 | 		norm_region: WITHIN_CHANNEL
 81 | 	}
 82 | }
 83 | 
 84 | layer {
 85 | 	name: "pool1"
 86 | 	type: "Pooling"
 87 | 	bottom: "norm1"
 88 | 	top: "pool1"
 89 | 	pooling_param {
 90 | 		kernel_size: 3
 91 | 		stride: 2
 92 | 		pad: 1
 93 | 		pool: MAX
 94 | 	}
 95 | }
 96 | 
 97 | layer {
 98 | 	name: "conv2"
 99 | 	type: "Convolution"
100 | 	bottom: "pool1"
101 | 	top: "conv2"
102 | 	param {
103 | 		lr_mult: 1.0
104 | 	}
105 | 	param {
106 | 		lr_mult: 2.0
107 | 	}
108 | 	convolution_param {
109 | 		num_output: 256
110 | 		kernel_size: 5
111 | 		pad: 2
112 | 		stride: 2
113 | 		weight_filler {
114 | 			type: "gaussian"
115 | 			std: 0.01
116 | 		}
117 | 		bias_filler {
118 | 			type: "constant"
119 | 			value: 1
120 | 		}
121 | 	}
122 | }
123 | 
124 | layer {
125 | 	name: "relu2"
126 | 	type: "ReLU"
127 | 	bottom: "conv2"
128 | 	top: "conv2"
129 | }
130 | 
131 | layer {
132 | 	name: "norm2"
133 | 	type: "LRN"
134 | 	bottom: "conv2"
135 | 	top: "norm2"
136 | 	lrn_param {
137 | 		local_size: 3
138 | 		alpha: 0.00005
139 | 		beta: 0.75
140 | 		norm_region: WITHIN_CHANNEL
141 | 	}
142 | }
143 | 
144 | layer {
145 | 	name: "pool2"
146 | 	type: "Pooling"
147 | 	bottom: "norm2"
148 | 	top: "pool2"
149 | 	pooling_param {
150 | 		kernel_size: 3
151 | 		stride: 2
152 | 		pad: 1
153 | 		pool: MAX
154 | 	}
155 | }
156 | 
157 | layer {
158 | 	name: "conv3"
159 | 	type: "Convolution"
160 | 	bottom: "pool2"
161 | 	top: "conv3"
162 | 	param {
163 | 		lr_mult: 1.0
164 | 	}
165 | 	param {
166 | 		lr_mult: 2.0
167 | 	}
168 | 	convolution_param {
169 | 		num_output: 384
170 | 		kernel_size: 3
171 | 		pad: 1
172 | 		stride: 1
173 | 		weight_filler {
174 | 			type: "gaussian"
175 | 			std: 0.01
176 | 		}
177 | 		bias_filler {
178 | 			type: "constant"
179 | 			value: 0
180 | 		}
181 | 	}
182 | }
183 | 
184 | layer {
185 | 	name: "relu3"
186 | 	type: "ReLU"
187 | 	bottom: "conv3"
188 | 	top: "conv3"
189 | }
190 | 
191 | layer {
192 | 	name: "conv4"
193 | 	type: "Convolution"
194 | 	bottom: "conv3"
195 | 	top: "conv4"
196 | 	param {
197 | 		lr_mult: 1.0
198 | 	}
199 | 	param {
200 | 		lr_mult: 2.0
201 | 	}
202 | 	convolution_param {
203 | 		num_output: 384
204 | 		kernel_size: 3
205 | 		pad: 1
206 | 		stride: 1
207 | 		weight_filler {
208 | 			type: "gaussian"
209 | 			std: 0.01
210 | 		}
211 | 		bias_filler {
212 | 			type: "constant"
213 | 			value: 1
214 | 		}
215 | 	}
216 | }
217 | 
218 | layer {
219 | 	name: "relu4"
220 | 	type: "ReLU"
221 | 	bottom: "conv4"
222 | 	top: "conv4"
223 | }
224 | 
225 | layer {
226 | 	name: "conv5"
227 | 	type: "Convolution"
228 | 	bottom: "conv4"
229 | 	top: "conv5"
230 | 	param {
231 | 		lr_mult: 1.0
232 | 	}
233 | 	param {
234 | 		lr_mult: 2.0
235 | 	}
236 | 	convolution_param {
237 | 		num_output: 256
238 | 		kernel_size: 3
239 | 		pad: 1
240 | 		stride: 1
241 | 		weight_filler {
242 | 			type: "gaussian"
243 | 			std: 0.01
244 | 		}
245 | 		bias_filler {
246 | 			type: "constant"
247 | 			value: 1
248 | 		}
249 | 	}
250 | }
251 | 
252 | layer {
253 | 	name: "relu5"
254 | 	type: "ReLU"
255 | 	bottom: "conv5"
256 | 	top: "conv5"
257 | }
258 | 
259 | #-----------------------layer +-------------------------
260 | 
261 | layer {
262 |   name: "ip1"
263 |   type: "InnerProduct"
264 |   bottom: "conv5"
265 |   top: "ip1"
266 |   inner_product_param {
267 |     num_output: 1024
268 |     weight_filler {
269 |       type: "xavier"
270 |     }
271 |   }
272 | }
273 | layer {
274 |   name: "relu1"
275 |   type: "ReLU"
276 |   bottom: "ip1"
277 |   top: "ip1"
278 | }
279 | layer {
280 |   name: "ip2"
281 |   type: "InnerProduct"
282 |   bottom: "ip1"
283 |   top: "ip2"
284 |   inner_product_param {
285 |     num_output: 1024
286 |     weight_filler {
287 |       type: "xavier"
288 |     }
289 |   }
290 | }
291 | layer {
292 |   name: "relu2"
293 |   type: "ReLU"
294 |   bottom: "ip2"
295 |   top: "ip2"
296 | }
297 | 
298 | 
299 | layer {
300 |   name: "fc136"
301 |   type: "InnerProduct"
302 |   bottom: "ip2"
303 |   top: "fc136"
304 |   inner_product_param {
305 |     num_output: 136
306 |     weight_filler {
307 |       type: "xavier"
308 |     }
309 |   }
310 | }
311 | 


--------------------------------------------------------------------------------
/face_alignment/models/ZF_local_solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "models/ZF_local_train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 70000
 6 | display: 20
 7 | max_iter: 200000
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | ## We disable standard caffe solver snapshotting and implement our own snapshot
11 | #snapshot: 0
12 | snapshot: 10000
13 | snapshot_prefix: "E:/FA/FA_CNN_HS22/caffemodels/FA_ZF_local"
14 | #debug_info: true
15 | 
16 | 


--------------------------------------------------------------------------------
/face_alignment/models/ZF_local_train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "FA_ZF_local"
  2 | 
  3 | #------------------------------- input ----------------------------
  4 | layer {
  5 | 	name: "data"
  6 | 	type: "HDF5Data"
  7 | 	top: "patch"
  8 | 	top: "move"
  9 | 	hdf5_data_param {
 10 | 	source: "models/list_train_local.txt"
 11 | 	batch_size: 24
 12 | 	}
 13 | }
 14 | 
 15 | layer {
 16 | 	name: "scale_and_shift"
 17 | 	bottom: "patch"
 18 | 	top: "scale_and_shift"
 19 | 	type: "Scale"
 20 | 	param{
 21 | 		lr_mult: 0
 22 | 		decay_mult: 0
 23 | 		}
 24 | 	param{
 25 | 		lr_mult: 0
 26 | 		decay_mult: 0
 27 | 	}
 28 | 	scale_param{
 29 | 		filler{
 30 | 			type: "constant"
 31 | 			value: 1
 32 | 		}
 33 | 		bias_term: true
 34 | 		bias_filler {
 35 | 			type: "constant"
 36 | 			value: -128
 37 | 		}
 38 | 	}
 39 | }
 40 | 
 41 | layer {
 42 | 	name: "flatdata"
 43 | 	type: "Flatten"
 44 | 	bottom: "move"
 45 | 	top: "flatdata"
 46 | }
 47 | 
 48 | 
 49 | #------------------------------- split ----------------------------
 50 | layer {
 51 | 	name: "slicer"
 52 | 	type: "Slice"
 53 | 	bottom: "scale_and_shift"
 54 | 	top: "patch_slice_1"
 55 | 	top: "patch_slice_2"
 56 | 	top: "patch_slice_3"
 57 | 	top: "patch_slice_4"
 58 | 	top: "patch_slice_5"
 59 | 	top: "patch_slice_6"
 60 | 	top: "patch_slice_7"
 61 | 	top: "patch_slice_8"
 62 | 	top: "patch_slice_9"
 63 | 	top: "patch_slice_10"
 64 | 	top: "patch_slice_11"
 65 | 	top: "patch_slice_12"
 66 | 	top: "patch_slice_13"
 67 | 	top: "patch_slice_14"
 68 | 	top: "patch_slice_15"
 69 | 	top: "patch_slice_16"
 70 | 	top: "patch_slice_17"
 71 | 	top: "patch_slice_18"
 72 | 	top: "patch_slice_19"
 73 | 	top: "patch_slice_20"
 74 | 	top: "patch_slice_21"
 75 | 	top: "patch_slice_22"
 76 | 	top: "patch_slice_23"
 77 | 	top: "patch_slice_24"
 78 | 	top: "patch_slice_25"
 79 | 	top: "patch_slice_26"
 80 | 	top: "patch_slice_27"
 81 | 	top: "patch_slice_28"
 82 | 	top: "patch_slice_29"
 83 | 	top: "patch_slice_30"
 84 | 	top: "patch_slice_31"
 85 | 	top: "patch_slice_32"
 86 | 	top: "patch_slice_33"
 87 | 	top: "patch_slice_34"
 88 | 	top: "patch_slice_35"
 89 | 	top: "patch_slice_36"
 90 | 	top: "patch_slice_37"
 91 | 	top: "patch_slice_38"
 92 | 	top: "patch_slice_39"
 93 | 	top: "patch_slice_40"
 94 | 	top: "patch_slice_41"
 95 | 	top: "patch_slice_42"
 96 | 	top: "patch_slice_43"
 97 | 	top: "patch_slice_44"
 98 | 	top: "patch_slice_45"
 99 | 	top: "patch_slice_46"
100 | 	top: "patch_slice_47"
101 | 	top: "patch_slice_48"
102 | 	top: "patch_slice_49"
103 | 	top: "patch_slice_50"
104 | 	top: "patch_slice_51"
105 | 	top: "patch_slice_52"
106 | 	top: "patch_slice_53"
107 | 	top: "patch_slice_54"
108 | 	top: "patch_slice_55"
109 | 	top: "patch_slice_56"
110 | 	top: "patch_slice_57"
111 | 	top: "patch_slice_58"
112 | 	top: "patch_slice_59"
113 | 	top: "patch_slice_60"
114 | 	top: "patch_slice_61"
115 | 	top: "patch_slice_62"
116 | 	top: "patch_slice_63"
117 | 	top: "patch_slice_64"
118 | 	top: "patch_slice_65"
119 | 	top: "patch_slice_66"
120 | 	top: "patch_slice_67"
121 | 	top: "patch_slice_68"
122 | 	slice_param {
123 | 		axis:1
124 | 		slice_point: 3
125 | 		slice_point: 6
126 | 		slice_point: 9
127 | 		slice_point: 12
128 | 		slice_point: 15
129 | 		slice_point: 18
130 | 		slice_point: 21
131 | 		slice_point: 24
132 | 		slice_point: 27
133 | 		slice_point: 30
134 | 		slice_point: 33
135 | 		slice_point: 36
136 | 		slice_point: 39
137 | 		slice_point: 42
138 | 		slice_point: 45
139 | 		slice_point: 48
140 | 		slice_point: 51
141 | 		slice_point: 54
142 | 		slice_point: 57
143 | 		slice_point: 60
144 | 		slice_point: 63
145 | 		slice_point: 66
146 | 		slice_point: 69
147 | 		slice_point: 72
148 | 		slice_point: 75
149 | 		slice_point: 78
150 | 		slice_point: 81
151 | 		slice_point: 84
152 | 		slice_point: 87
153 | 		slice_point: 90
154 | 		slice_point: 93
155 | 		slice_point: 96
156 | 		slice_point: 99
157 | 		slice_point: 102
158 | 		slice_point: 105
159 | 		slice_point: 108
160 | 		slice_point: 111
161 | 		slice_point: 114
162 | 		slice_point: 117
163 | 		slice_point: 120
164 | 		slice_point: 123
165 | 		slice_point: 126
166 | 		slice_point: 129
167 | 		slice_point: 132
168 | 		slice_point: 135
169 | 		slice_point: 138
170 | 		slice_point: 141
171 | 		slice_point: 144
172 | 		slice_point: 147
173 | 		slice_point: 150
174 | 		slice_point: 153
175 | 		slice_point: 156
176 | 		slice_point: 159
177 | 		slice_point: 162
178 | 		slice_point: 165
179 | 		slice_point: 168
180 | 		slice_point: 171
181 | 		slice_point: 174
182 | 		slice_point: 177
183 | 		slice_point: 180
184 | 		slice_point: 183
185 | 		slice_point: 186
186 | 		slice_point: 189
187 | 		slice_point: 192
188 | 		slice_point: 195
189 | 		slice_point: 198
190 | 		slice_point: 201
191 | 	}
192 | }
193 | 
194 | # ----------------------- ZF -------------------
195 | layer {
196 | 	name: "conv1"
197 | 	type: "Convolution"
198 | 	bottom: "scale_and_shift"
199 | 	top: "conv1"
200 | 	param {
201 | #learning rate
202 | 		lr_mult: 1.0
203 | 	}
204 | 	param {
205 | 		lr_mult: 2.0
206 | 	}
207 | 	convolution_param {
208 | 		num_output: 96
209 | 		kernel_size: 7
210 | 		pad: 3
211 | 		stride: 2
212 | 		weight_filler {
213 | 			type: "gaussian"
214 | 			std: 0.001
215 | 		}
216 | 		bias_filler {
217 | 			type: "constant"
218 | 			value: 0
219 | 		}
220 | 	}
221 | }
222 | 
223 | layer {
224 | 	name: "relu1"
225 | 	type: "ReLU"
226 | 	bottom: "conv1"
227 | 	top: "conv1"
228 | }
229 | 
230 | layer {
231 | 	name: "norm1"
232 | 	type: "LRN"
233 | 	bottom: "conv1"
234 | 	top: "norm1"
235 | 	lrn_param {
236 | 		local_size: 3
237 | 		alpha: 0.00005
238 | 		beta: 0.75
239 | 		norm_region: WITHIN_CHANNEL
240 | 	}
241 | }
242 | 
243 | layer {
244 | 	name: "pool1"
245 | 	type: "Pooling"
246 | 	bottom: "norm1"
247 | 	top: "pool1"
248 | 	pooling_param {
249 | 		kernel_size: 3
250 | 		stride: 2
251 | 		pad: 1
252 | 		pool: MAX
253 | 	}
254 | }
255 | 
256 | layer {
257 | 	name: "conv2"
258 | 	type: "Convolution"
259 | 	bottom: "pool1"
260 | 	top: "conv2"
261 | 	param {
262 | 		lr_mult: 1.0
263 | 	}
264 | 	param {
265 | 		lr_mult: 2.0
266 | 	}
267 | 	convolution_param {
268 | 		num_output: 256
269 | 		kernel_size: 5
270 | 		pad: 2
271 | 		stride: 2
272 | 		weight_filler {
273 | 			type: "gaussian"
274 | 			std: 0.001
275 | 		}
276 | 		bias_filler {
277 | 			type: "constant"
278 | 			value: 1
279 | 		}
280 | 	}
281 | }
282 | 
283 | layer {
284 | 	name: "relu2"
285 | 	type: "ReLU"
286 | 	bottom: "conv2"
287 | 	top: "conv2"
288 | }
289 | 
290 | layer {
291 | 	name: "norm2"
292 | 	type: "LRN"
293 | 	bottom: "conv2"
294 | 	top: "norm2"
295 | 	lrn_param {
296 | 		local_size: 3
297 | 		alpha: 0.00005
298 | 		beta: 0.75
299 | 		norm_region: WITHIN_CHANNEL
300 | 	}
301 | }
302 | 
303 | layer {
304 | 	name: "pool2"
305 | 	type: "Pooling"
306 | 	bottom: "norm2"
307 | 	top: "pool2"
308 | 	pooling_param {
309 | 		kernel_size: 3
310 | 		stride: 2
311 | 		pad: 1
312 | 		pool: MAX
313 | 	}
314 | }
315 | 
316 | layer {
317 | 	name: "conv3"
318 | 	type: "Convolution"
319 | 	bottom: "pool2"
320 | 	top: "conv3"
321 | 	param {
322 | 		lr_mult: 1.0
323 | 	}
324 | 	param {
325 | 		lr_mult: 2.0
326 | 	}
327 | 	convolution_param {
328 | 		num_output: 384
329 | 		kernel_size: 3
330 | 		pad: 1
331 | 		stride: 1
332 | 		weight_filler {
333 | 			type: "gaussian"
334 | 			std: 0.001
335 | 		}
336 | 		bias_filler {
337 | 			type: "constant"
338 | 			value: 0
339 | 		}
340 | 	}
341 | }
342 | 
343 | layer {
344 | 	name: "relu3"
345 | 	type: "ReLU"
346 | 	bottom: "conv3"
347 | 	top: "conv3"
348 | }
349 | 
350 | layer {
351 | 	name: "conv4"
352 | 	type: "Convolution"
353 | 	bottom: "conv3"
354 | 	top: "conv4"
355 | 	param {
356 | 		lr_mult: 1.0
357 | 	}
358 | 	param {
359 | 		lr_mult: 2.0
360 | 	}
361 | 	convolution_param {
362 | 		num_output: 384
363 | 		kernel_size: 3
364 | 		pad: 1
365 | 		stride: 1
366 | 		weight_filler {
367 | 			type: "gaussian"
368 | 			std: 0.001
369 | 		}
370 | 		bias_filler {
371 | 			type: "constant"
372 | 			value: 1
373 | 		}
374 | 	}
375 | }
376 | 
377 | layer {
378 | 	name: "relu4"
379 | 	type: "ReLU"
380 | 	bottom: "conv4"
381 | 	top: "conv4"
382 | }
383 | 
384 | layer {
385 | 	name: "conv5"
386 | 	type: "Convolution"
387 | 	bottom: "conv4"
388 | 	top: "conv5"
389 | 	param {
390 | 		lr_mult: 1.0
391 | 	}
392 | 	param {
393 | 		lr_mult: 2.0
394 | 	}
395 | 	convolution_param {
396 | 		num_output: 256
397 | 		kernel_size: 3
398 | 		pad: 1
399 | 		stride: 1
400 | 		weight_filler {
401 | 			type: "gaussian"
402 | 			std: 0.001
403 | 		}
404 | 		bias_filler {
405 | 			type: "constant"
406 | 			value: 1
407 | 		}
408 | 	}
409 | }
410 | 
411 | layer {
412 | 	name: "relu5"
413 | 	type: "ReLU"
414 | 	bottom: "conv5"
415 | 	top: "conv5"
416 | }
417 | 
418 | #-----------------------layer +-------------------------
419 | 
420 | layer {
421 |   name: "ip1"
422 |   type: "InnerProduct"
423 |   bottom: "conv5"
424 |   top: "ip1"
425 |   inner_product_param {
426 |     num_output: 1024
427 |     weight_filler {
428 |       type: "xavier"
429 |     }
430 |   }
431 | }
432 | layer {
433 |   name: "relu1"
434 |   type: "ReLU"
435 |   bottom: "ip1"
436 |   top: "ip1"
437 | }
438 | layer {
439 |   name: "ip2"
440 |   type: "InnerProduct"
441 |   bottom: "ip1"
442 |   top: "ip2"
443 |   inner_product_param {
444 |     num_output: 1024
445 |     weight_filler {
446 |       type: "xavier"
447 |     }
448 |   }
449 | }
450 | layer {
451 |   name: "relu2"
452 |   type: "ReLU"
453 |   bottom: "ip2"
454 |   top: "ip2"
455 | }
456 | 
457 | 
458 | layer {
459 |   name: "fc136"
460 |   type: "InnerProduct"
461 |   bottom: "ip2"
462 |   top: "fc136"
463 |   inner_product_param {
464 |     num_output: 136
465 |     weight_filler {
466 |       type: "xavier"
467 |     }
468 |   }
469 | }
470 | 
471 | #------------------------------- loss ----------------------------
472 | 
473 | layer {
474 |   name: "out"
475 |   type: "EuclideanLoss"
476 |   bottom: "fc136"
477 |   bottom: "flatdata"
478 |   top: "out"
479 |   loss_weight: 1
480 | }
481 | 


--------------------------------------------------------------------------------
/face_alignment/models/ZF_solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "models/ZF_train.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 70000
 6 | display: 20
 7 | max_iter: 200000
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | ## We disable standard caffe solver snapshotting and implement our own snapshot
11 | #snapshot: 0
12 | snapshot: 10000
13 | snapshot_prefix: "E:/FA/FA_CNN_HS20/caffemodels/FA_ZF_baseline"
14 | #debug_info: true
15 | 
16 | 


--------------------------------------------------------------------------------
/face_alignment/models/ZF_train.prototxt:
--------------------------------------------------------------------------------
  1 | name: "FA_ZF_68"
  2 | 
  3 | #------------------------------- input ----------------------------
  4 | layer {
  5 |   name: "data"
  6 |   type: "HDF5Data"
  7 |   top: "img"
  8 |   top: "pts"
  9 |   hdf5_data_param {
 10 | 	source: "models/list_train_wild.txt"
 11 | 	batch_size: 32
 12 |   }
 13 | }
 14 | 
 15 | 
 16 | layer {
 17 | 	name: "scale_and_shift"
 18 | 	bottom: "img"
 19 | 	top: "scale_and_shift"
 20 | 	type: "Scale"
 21 | 	param{
 22 | 		lr_mult: 0
 23 | 		decay_mult: 0
 24 | 		}
 25 | 	param{
 26 | 		lr_mult: 0
 27 | 		decay_mult: 0
 28 | 		}
 29 | 	scale_param{
 30 | 		filler{
 31 | 			type: "constant"
 32 | 			value: 0.00392156862745
 33 | 		}
 34 | 		bias_term: true
 35 | 		bias_filler {
 36 | 			type: "constant"
 37 | 			value: 0
 38 | 		}
 39 | 	}
 40 | }
 41 | 
 42 | layer {
 43 | 	name: "flatdata"
 44 | 	type: "Flatten"
 45 | 	bottom: "pts"
 46 | 	top: "flatdata"
 47 | }
 48 | # ----------------------- ZF -------------------
 49 | layer {
 50 | 	name: "conv1"
 51 | 	type: "Convolution"
 52 | 	bottom: "scale_and_shift"
 53 | 	top: "conv1"
 54 | 	param {
 55 | #learning rate
 56 | 		lr_mult: 1.0
 57 | 	}
 58 | 	param {
 59 | 		lr_mult: 2.0
 60 | 	}
 61 | 	convolution_param {
 62 | 		num_output: 96
 63 | 		kernel_size: 7
 64 | 		pad: 3
 65 | 		stride: 2
 66 | 		weight_filler {
 67 | 			type: "gaussian"
 68 | 			std: 0.001
 69 | 		}
 70 | 		bias_filler {
 71 | 			type: "constant"
 72 | 			value: 0
 73 | 		}
 74 | 	}
 75 | }
 76 | 
 77 | layer {
 78 | 	name: "relu1"
 79 | 	type: "ReLU"
 80 | 	bottom: "conv1"
 81 | 	top: "conv1"
 82 | }
 83 | 
 84 | layer {
 85 | 	name: "norm1"
 86 | 	type: "LRN"
 87 | 	bottom: "conv1"
 88 | 	top: "norm1"
 89 | 	lrn_param {
 90 | 		local_size: 3
 91 | 		alpha: 0.00005
 92 | 		beta: 0.75
 93 | 		norm_region: WITHIN_CHANNEL
 94 | 	}
 95 | }
 96 | 
 97 | layer {
 98 | 	name: "pool1"
 99 | 	type: "Pooling"
100 | 	bottom: "norm1"
101 | 	top: "pool1"
102 | 	pooling_param {
103 | 		kernel_size: 3
104 | 		stride: 2
105 | 		pad: 1
106 | 		pool: MAX
107 | 	}
108 | }
109 | 
110 | layer {
111 | 	name: "conv2"
112 | 	type: "Convolution"
113 | 	bottom: "pool1"
114 | 	top: "conv2"
115 | 	param {
116 | 		lr_mult: 1.0
117 | 	}
118 | 	param {
119 | 		lr_mult: 2.0
120 | 	}
121 | 	convolution_param {
122 | 		num_output: 256
123 | 		kernel_size: 5
124 | 		pad: 2
125 | 		stride: 2
126 | 		weight_filler {
127 | 			type: "gaussian"
128 | 			std: 0.001
129 | 		}
130 | 		bias_filler {
131 | 			type: "constant"
132 | 			value: 1
133 | 		}
134 | 	}
135 | }
136 | 
137 | layer {
138 | 	name: "relu2"
139 | 	type: "ReLU"
140 | 	bottom: "conv2"
141 | 	top: "conv2"
142 | }
143 | 
144 | layer {
145 | 	name: "norm2"
146 | 	type: "LRN"
147 | 	bottom: "conv2"
148 | 	top: "norm2"
149 | 	lrn_param {
150 | 		local_size: 3
151 | 		alpha: 0.00005
152 | 		beta: 0.75
153 | 		norm_region: WITHIN_CHANNEL
154 | 	}
155 | }
156 | 
157 | layer {
158 | 	name: "pool2"
159 | 	type: "Pooling"
160 | 	bottom: "norm2"
161 | 	top: "pool2"
162 | 	pooling_param {
163 | 		kernel_size: 3
164 | 		stride: 2
165 | 		pad: 1
166 | 		pool: MAX
167 | 	}
168 | }
169 | 
170 | layer {
171 | 	name: "conv3"
172 | 	type: "Convolution"
173 | 	bottom: "pool2"
174 | 	top: "conv3"
175 | 	param {
176 | 		lr_mult: 1.0
177 | 	}
178 | 	param {
179 | 		lr_mult: 2.0
180 | 	}
181 | 	convolution_param {
182 | 		num_output: 384
183 | 		kernel_size: 3
184 | 		pad: 1
185 | 		stride: 1
186 | 		weight_filler {
187 | 			type: "gaussian"
188 | 			std: 0.001
189 | 		}
190 | 		bias_filler {
191 | 			type: "constant"
192 | 			value: 0
193 | 		}
194 | 	}
195 | }
196 | 
197 | layer {
198 | 	name: "relu3"
199 | 	type: "ReLU"
200 | 	bottom: "conv3"
201 | 	top: "conv3"
202 | }
203 | 
204 | layer {
205 | 	name: "conv4"
206 | 	type: "Convolution"
207 | 	bottom: "conv3"
208 | 	top: "conv4"
209 | 	param {
210 | 		lr_mult: 1.0
211 | 	}
212 | 	param {
213 | 		lr_mult: 2.0
214 | 	}
215 | 	convolution_param {
216 | 		num_output: 384
217 | 		kernel_size: 3
218 | 		pad: 1
219 | 		stride: 1
220 | 		weight_filler {
221 | 			type: "gaussian"
222 | 			std: 0.001
223 | 		}
224 | 		bias_filler {
225 | 			type: "constant"
226 | 			value: 1
227 | 		}
228 | 	}
229 | }
230 | 
231 | layer {
232 | 	name: "relu4"
233 | 	type: "ReLU"
234 | 	bottom: "conv4"
235 | 	top: "conv4"
236 | }
237 | 
238 | layer {
239 | 	name: "conv5"
240 | 	type: "Convolution"
241 | 	bottom: "conv4"
242 | 	top: "conv5"
243 | 	param {
244 | 		lr_mult: 1.0
245 | 	}
246 | 	param {
247 | 		lr_mult: 2.0
248 | 	}
249 | 	convolution_param {
250 | 		num_output: 256
251 | 		kernel_size: 3
252 | 		pad: 1
253 | 		stride: 1
254 | 		weight_filler {
255 | 			type: "gaussian"
256 | 			std: 0.001
257 | 		}
258 | 		bias_filler {
259 | 			type: "constant"
260 | 			value: 1
261 | 		}
262 | 	}
263 | }
264 | 
265 | layer {
266 | 	name: "relu5"
267 | 	type: "ReLU"
268 | 	bottom: "conv5"
269 | 	top: "conv5"
270 | }
271 | 
272 | #-----------------------layer +-------------------------
273 | 
274 | layer {
275 |   name: "ip1"
276 |   type: "InnerProduct"
277 |   bottom: "conv5"
278 |   top: "ip1"
279 |   inner_product_param {
280 |     num_output: 1024
281 |     weight_filler {
282 |       type: "xavier"
283 |     }
284 |   }
285 | }
286 | layer {
287 |   name: "relu1"
288 |   type: "ReLU"
289 |   bottom: "ip1"
290 |   top: "ip1"
291 | }
292 | layer {
293 |   name: "ip2"
294 |   type: "InnerProduct"
295 |   bottom: "ip1"
296 |   top: "ip2"
297 |   inner_product_param {
298 |     num_output: 1024
299 |     weight_filler {
300 |       type: "xavier"
301 |     }
302 |   }
303 | }
304 | layer {
305 |   name: "relu2"
306 |   type: "ReLU"
307 |   bottom: "ip2"
308 |   top: "ip2"
309 | }
310 | 
311 | 
312 | layer {
313 |   name: "fc136"
314 |   type: "InnerProduct"
315 |   bottom: "ip2"
316 |   top: "fc136"
317 |   inner_product_param {
318 |     num_output: 136
319 |     weight_filler {
320 |       type: "xavier"
321 |     }
322 |   }
323 | }
324 | 
325 | #------------------------------- loss ----------------------------
326 | 
327 | layer {
328 |   name: "out"
329 |   type: "EuclideanLoss"
330 |   bottom: "fc136"
331 |   bottom: "flatdata"
332 |   top: "out"
333 |   loss_weight: 1
334 | }
335 | 


--------------------------------------------------------------------------------
/face_alignment/models/mean_shapes.txt:
--------------------------------------------------------------------------------
1 | -0.775420 -0.352592 -0.005021 -0.766519 -0.157355 -0.037416 -0.743629 0.036671 -0.055012 -0.701062 0.227707 -0.087501 -0.624895 0.409159 -0.181433 -0.507021 0.568253 -0.314666 -0.358238 0.700246 -0.461675 -0.188625 0.803156 -0.616360 0.000000 0.841022 -0.715031 0.188625 0.803156 -0.616360 0.358238 0.700246 -0.461675 0.507021 0.568253 -0.314666 0.624895 0.409159 -0.181433 0.701062 0.227707 -0.087501 0.743629 0.036671 -0.055012 0.766519 -0.157355 -0.037416 0.775420 -0.352592 -0.005021 -0.582617 -0.575646 -0.655455 -0.492129 -0.636310 -0.737820 -0.379689 -0.656483 -0.823215 -0.263923 -0.643754 -0.906170 -0.153347 -0.607971 -0.982801 0.153347 -0.607971 -0.982801 0.263923 -0.643754 -0.906170 0.379689 -0.656483 -0.823215 0.492129 -0.636310 -0.737820 0.582617 -0.575646 -0.655455 0.000000 -0.412566 -0.988062 0.000000 -0.291699 -1.070349 0.000000 -0.171456 -1.154743 0.000000 -0.051514 -1.235696 -0.122787 0.047974 -0.991719 -0.059097 0.070544 -1.029210 0.000000 0.080821 -1.063730 0.059097 0.070544 -1.029210 0.122787 0.047974 -0.991719 -0.430035 -0.396099 -0.732915 -0.358837 -0.444566 -0.761839 -0.276830 -0.446105 -0.775307 -0.204942 -0.396248 -0.783813 -0.279006 -0.333817 -0.768284 -0.357969 -0.331697 -0.754261 0.204942 -0.396248 -0.783813 0.276830 -0.446105 -0.775307 0.358837 -0.444566 -0.761839 0.430035 -0.396099 -0.732915 0.357969 -0.331697 -0.754261 0.279006 -0.333817 -0.768284 -0.262923 0.307414 -0.744026 -0.185044 0.253144 -0.863426 -0.091584 0.216906 -0.945248 0.000000 0.228164 -0.997522 0.091584 0.216906 -0.945248 0.185044 0.253144 -0.863426 0.262923 0.307414 -0.744026 0.190844 0.384972 -0.812714 0.097535 0.434418 -0.869535 0.000000 0.449659 -0.913157 -0.097535 0.434418 -0.869535 -0.190844 0.384972 -0.812714 -0.198999 0.307340 -0.799310 -0.095989 0.288185 -0.897865 0.000000 0.291515 -0.964643 0.095989 0.288185 -0.897865 0.198999 0.307340 -0.799310 0.097949 0.335492 -0.870915 0.000000 0.350547 -0.928230 -0.097949 0.335492 -0.870915 
2 | -0.775420 -0.352592 -0.766519 -0.157355 -0.743629 0.036671 -0.701062 0.227707 -0.624895 0.409159 -0.507021 0.568253 -0.358238 0.700246 -0.188625 0.803156 0.000000 0.841022 0.188625 0.803156 0.358238 0.700246 0.507021 0.568253 0.624895 0.409159 0.701062 0.227707 0.743629 0.036671 0.766519 -0.157355 0.775420 -0.352592 -0.582617 -0.575646 -0.492129 -0.636310 -0.379689 -0.656483 -0.263923 -0.643754 -0.153347 -0.607971 0.153347 -0.607971 0.263923 -0.643754 0.379689 -0.656483 0.492129 -0.636310 0.582617 -0.575646 0.000000 -0.412566 0.000000 -0.291699 0.000000 -0.171456 0.000000 -0.051514 -0.122787 0.047974 -0.059097 0.070544 0.000000 0.080821 0.059097 0.070544 0.122787 0.047974 -0.430035 -0.396099 -0.358837 -0.444566 -0.276830 -0.446105 -0.204942 -0.396248 -0.279006 -0.333817 -0.357969 -0.331697 0.204942 -0.396248 0.276830 -0.446105 0.358837 -0.444566 0.430035 -0.396099 0.357969 -0.331697 0.279006 -0.333817 -0.262923 0.307414 -0.185044 0.253144 -0.091584 0.216906 0.000000 0.228164 0.091584 0.216906 0.185044 0.253144 0.262923 0.307414 0.190844 0.384972 0.097535 0.434418 0.000000 0.449659 -0.097535 0.434418 -0.190844 0.384972 -0.198999 0.307340 -0.095989 0.288185 0.000000 0.291515 0.095989 0.288185 0.198999 0.307340 0.097949 0.335492 0.000000 0.350547 -0.097949 0.335492 
3 | -0.750298 -0.352592 -0.750084 -0.157355 -0.732529 0.036671 -0.699820 0.227707 -0.650560 0.409159 -0.571186 0.568253 -0.465522 0.700246 -0.341724 0.803156 -0.185064 0.841022 0.022672 0.803156 0.226541 0.700246 0.408303 0.568253 0.556644 0.409159 0.654527 0.227707 0.704052 0.036671 0.730716 -0.157355 0.747699 -0.352592 -0.832288 -0.575646 -0.795106 -0.636310 -0.740428 -0.656483 -0.681649 -0.643754 -0.624203 -0.607971 -0.358599 -0.607971 -0.224521 -0.643754 -0.082787 -0.656483 0.057286 -0.636310 0.176834 -0.575646 -0.494031 -0.412566 -0.535174 -0.291699 -0.577372 -0.171456 -0.617848 -0.051514 -0.602196 0.047974 -0.565784 0.070544 -0.531865 0.080821 -0.463426 0.070544 -0.389523 0.047974 -0.738879 -0.396099 -0.691681 -0.444566 -0.627395 -0.446105 -0.569392 -0.396248 -0.625768 -0.333817 -0.687140 -0.331697 -0.214421 -0.396248 -0.147911 -0.446105 -0.070158 -0.444566 0.005964 -0.396099 -0.067120 -0.331697 -0.142516 -0.333817 -0.599711 0.307414 -0.591965 0.253144 -0.551938 0.216906 -0.498761 0.228164 -0.393310 0.216906 -0.271460 0.253144 -0.144315 0.307414 -0.241081 0.384972 -0.350300 0.434418 -0.456579 0.449659 -0.519235 0.434418 -0.571632 0.384972 -0.571993 0.307340 -0.532062 0.288185 -0.482321 0.291515 -0.365803 0.288185 -0.227317 0.307340 -0.350631 0.335492 -0.464115 0.350547 -0.520284 0.335492 
4 | -0.747699 -0.352592 -0.730716 -0.157355 -0.704052 0.036671 -0.654527 0.227707 -0.556644 0.409159 -0.408303 0.568253 -0.226541 0.700246 -0.022672 0.803156 0.185064 0.841022 0.341724 0.803156 0.465522 0.700246 0.571186 0.568253 0.650560 0.409159 0.699820 0.227707 0.732529 0.036671 0.750084 -0.157355 0.750298 -0.352592 -0.176834 -0.575646 -0.057286 -0.636310 0.082787 -0.656483 0.224521 -0.643754 0.358599 -0.607971 0.624203 -0.607971 0.681649 -0.643754 0.740428 -0.656483 0.795106 -0.636310 0.832288 -0.575646 0.494031 -0.412566 0.535174 -0.291699 0.577372 -0.171456 0.617848 -0.051514 0.389523 0.047974 0.463426 0.070544 0.531865 0.080821 0.565784 0.070544 0.602196 0.047974 -0.005964 -0.396099 0.070158 -0.444566 0.147911 -0.446105 0.214421 -0.396248 0.142516 -0.333817 0.067120 -0.331697 0.569392 -0.396248 0.627395 -0.446105 0.691681 -0.444566 0.738879 -0.396099 0.687140 -0.331697 0.625768 -0.333817 0.144315 0.307414 0.271460 0.253144 0.393310 0.216906 0.498761 0.228164 0.551938 0.216906 0.591965 0.253144 0.599711 0.307414 0.571632 0.384972 0.519235 0.434418 0.456579 0.449659 0.350300 0.434418 0.241081 0.384972 0.227317 0.307340 0.365803 0.288185 0.482321 0.291515 0.532062 0.288185 0.571993 0.307340 0.520284 0.335492 0.464115 0.350547 0.350631 0.335492 
5 | 


--------------------------------------------------------------------------------
/face_alignment/models/shape_parameter_s_front.txt:
--------------------------------------------------------------------------------
  1 | 1.031828079223632812e+02
  2 | 9.566854095458984375e+01
  3 | 6.069260025024414062e+01
  4 | 4.313542938232421875e+01
  5 | 3.479409408569335938e+01
  6 | 2.776439666748046875e+01
  7 | 2.766326141357421875e+01
  8 | 2.420671272277832031e+01
  9 | 2.100972938537597656e+01
 10 | 1.877184486389160156e+01
 11 | 1.825231742858886719e+01
 12 | 1.696853065490722656e+01
 13 | 1.407497215270996094e+01
 14 | 1.345866584777832031e+01
 15 | 1.124495315551757812e+01
 16 | 1.050634860992431641e+01
 17 | 1.045322513580322266e+01
 18 | 1.016666221618652344e+01
 19 | 9.403193473815917969e+00
 20 | 8.697093963623046875e+00
 21 | 8.366784095764160156e+00
 22 | 7.573175430297851562e+00
 23 | 7.421993732452392578e+00
 24 | 7.175876617431640625e+00
 25 | 6.694856166839599609e+00
 26 | 6.609914302825927734e+00
 27 | 6.394573211669921875e+00
 28 | 6.032481670379638672e+00
 29 | 6.031355381011962891e+00
 30 | 5.338684558868408203e+00
 31 | 5.145238876342773438e+00
 32 | 5.112681388854980469e+00
 33 | 5.004620075225830078e+00
 34 | 4.974018573760986328e+00
 35 | 4.814919948577880859e+00
 36 | 4.734435558319091797e+00
 37 | 4.285939216613769531e+00
 38 | 3.971984148025512695e+00
 39 | 3.971856117248535156e+00
 40 | 3.753386497497558594e+00
 41 | 3.669133424758911133e+00
 42 | 3.578326463699340820e+00
 43 | 3.483742237091064453e+00
 44 | 3.311911344528198242e+00
 45 | 3.139082670211791992e+00
 46 | 3.108502388000488281e+00
 47 | 3.022727251052856445e+00
 48 | 2.984299659729003906e+00
 49 | 2.859831809997558594e+00
 50 | 2.833900928497314453e+00
 51 | 2.755693197250366211e+00
 52 | 2.742290258407592773e+00
 53 | 2.523193836212158203e+00
 54 | 2.451685428619384766e+00
 55 | 2.441256761550903320e+00
 56 | 2.379939079284667969e+00
 57 | 2.357637166976928711e+00
 58 | 2.252062797546386719e+00
 59 | 2.231155633926391602e+00
 60 | 2.168044567108154297e+00
 61 | 2.127068758010864258e+00
 62 | 2.045018196105957031e+00
 63 | 2.041277647018432617e+00
 64 | 2.012953281402587891e+00
 65 | 2.005952835083007812e+00
 66 | 1.955849528312683105e+00
 67 | 1.935137510299682617e+00
 68 | 1.874186635017395020e+00
 69 | 1.829447269439697266e+00
 70 | 1.807976007461547852e+00
 71 | 1.798697710037231445e+00
 72 | 1.765719175338745117e+00
 73 | 1.662169933319091797e+00
 74 | 1.660003185272216797e+00
 75 | 1.633037924766540527e+00
 76 | 1.625466108322143555e+00
 77 | 1.608945488929748535e+00
 78 | 1.607636570930480957e+00
 79 | 1.600903630256652832e+00
 80 | 1.565548300743103027e+00
 81 | 1.555794477462768555e+00
 82 | 1.520662426948547363e+00
 83 | 1.516777276992797852e+00
 84 | 1.480778694152832031e+00
 85 | 1.462243556976318359e+00
 86 | 1.427065491676330566e+00
 87 | 1.411217451095581055e+00
 88 | 1.398631095886230469e+00
 89 | 1.364845037460327148e+00
 90 | 1.355186700820922852e+00
 91 | 1.346644043922424316e+00
 92 | 1.338635683059692383e+00
 93 | 1.327934265136718750e+00
 94 | 1.310287356376647949e+00
 95 | 1.287073850631713867e+00
 96 | 1.259063243865966797e+00
 97 | 1.218294143676757812e+00
 98 | 1.190768599510192871e+00
 99 | 1.139584541320800781e+00
100 | 1.127703666687011719e+00
101 | 1.127283215522766113e+00
102 | 1.092749476432800293e+00
103 | 1.061315417289733887e+00
104 | 1.040784716606140137e+00
105 | 1.030719995498657227e+00
106 | 1.003454208374023438e+00
107 | 1.000035881996154785e+00
108 | 9.665775299072265625e-01
109 | 9.630764126777648926e-01
110 | 9.550484418869018555e-01
111 | 9.314393401145935059e-01
112 | 9.235842823982238770e-01
113 | 9.105998873710632324e-01
114 | 8.669779896736145020e-01
115 | 8.544918298721313477e-01
116 | 8.450148105621337891e-01
117 | 8.216010928153991699e-01
118 | 8.042898178100585938e-01
119 | 7.873371839523315430e-01
120 | 7.616593241691589355e-01
121 | 7.413730621337890625e-01
122 | 7.263383865356445312e-01
123 | 7.158536911010742188e-01
124 | 7.149648666381835938e-01
125 | 6.883103251457214355e-01
126 | 6.826061010360717773e-01
127 | 6.503386497497558594e-01
128 | 5.366221070289611816e-01
129 | 5.077308416366577148e-01
130 | 2.871714234352111816e-01
131 | 2.232837432529777288e-04
132 | 1.937306515173986554e-04
133 | 4.919727507513016462e-05
134 | 4.453564542927779257e-05
135 | 3.619944982347078621e-05
136 | 2.667792978172656149e-05
137 | 


--------------------------------------------------------------------------------
/face_alignment/models/shape_parameter_s_left.txt:
--------------------------------------------------------------------------------
  1 | 7.763617706298828125e+01
  2 | 5.448361206054687500e+01
  3 | 3.448307800292968750e+01
  4 | 1.991018867492675781e+01
  5 | 1.596661090850830078e+01
  6 | 1.414914703369140625e+01
  7 | 1.229103565216064453e+01
  8 | 1.144200325012207031e+01
  9 | 1.003643321990966797e+01
 10 | 9.386501312255859375e+00
 11 | 8.581890106201171875e+00
 12 | 8.169677734375000000e+00
 13 | 7.906897544860839844e+00
 14 | 6.675380229949951172e+00
 15 | 6.112782001495361328e+00
 16 | 5.862775802612304688e+00
 17 | 5.286133289337158203e+00
 18 | 4.913509845733642578e+00
 19 | 4.832731246948242188e+00
 20 | 4.741940498352050781e+00
 21 | 4.589621067047119141e+00
 22 | 4.137164592742919922e+00
 23 | 4.037960052490234375e+00
 24 | 3.861081600189208984e+00
 25 | 3.779168367385864258e+00
 26 | 3.620183229446411133e+00
 27 | 3.475615978240966797e+00
 28 | 3.316045284271240234e+00
 29 | 3.153186798095703125e+00
 30 | 3.043802976608276367e+00
 31 | 2.927801609039306641e+00
 32 | 2.870085954666137695e+00
 33 | 2.832670450210571289e+00
 34 | 2.724978208541870117e+00
 35 | 2.613666296005249023e+00
 36 | 2.461195468902587891e+00
 37 | 2.366128683090209961e+00
 38 | 2.293519973754882812e+00
 39 | 2.214362859725952148e+00
 40 | 2.146535158157348633e+00
 41 | 1.907979846000671387e+00
 42 | 1.876132249832153320e+00
 43 | 1.859354138374328613e+00
 44 | 1.775403857231140137e+00
 45 | 1.764379143714904785e+00
 46 | 1.694374799728393555e+00
 47 | 1.665422201156616211e+00
 48 | 1.622999191284179688e+00
 49 | 1.610870122909545898e+00
 50 | 1.546877861022949219e+00
 51 | 1.523749709129333496e+00
 52 | 1.483136296272277832e+00
 53 | 1.481248021125793457e+00
 54 | 1.423740625381469727e+00
 55 | 1.406941294670104980e+00
 56 | 1.378324389457702637e+00
 57 | 1.357655882835388184e+00
 58 | 1.335111260414123535e+00
 59 | 1.306033492088317871e+00
 60 | 1.282203078269958496e+00
 61 | 1.257453680038452148e+00
 62 | 1.242352485656738281e+00
 63 | 1.201884031295776367e+00
 64 | 1.184469342231750488e+00
 65 | 1.166077256202697754e+00
 66 | 1.114611506462097168e+00
 67 | 1.102498888969421387e+00
 68 | 1.085692048072814941e+00
 69 | 1.060934782028198242e+00
 70 | 1.029542326927185059e+00
 71 | 1.017418980598449707e+00
 72 | 1.005733728408813477e+00
 73 | 9.654799103736877441e-01
 74 | 9.343039393424987793e-01
 75 | 9.260154366493225098e-01
 76 | 9.126370549201965332e-01
 77 | 8.995376825332641602e-01
 78 | 8.933218717575073242e-01
 79 | 8.765093088150024414e-01
 80 | 8.631937503814697266e-01
 81 | 8.594997525215148926e-01
 82 | 8.442590236663818359e-01
 83 | 8.324881792068481445e-01
 84 | 8.141325116157531738e-01
 85 | 8.030978441238403320e-01
 86 | 7.934148907661437988e-01
 87 | 7.738255858421325684e-01
 88 | 7.693558931350708008e-01
 89 | 7.474056482315063477e-01
 90 | 7.435721158981323242e-01
 91 | 7.271158099174499512e-01
 92 | 7.163758873939514160e-01
 93 | 7.030839323997497559e-01
 94 | 6.789638996124267578e-01
 95 | 6.737074851989746094e-01
 96 | 6.597926020622253418e-01
 97 | 6.343482136726379395e-01
 98 | 6.245849728584289551e-01
 99 | 6.192614436149597168e-01
100 | 6.046380400657653809e-01
101 | 5.935505032539367676e-01
102 | 5.786783099174499512e-01
103 | 5.713734626770019531e-01
104 | 5.640091300010681152e-01
105 | 5.604548454284667969e-01
106 | 5.492605566978454590e-01
107 | 5.269949436187744141e-01
108 | 5.183300971984863281e-01
109 | 5.088832378387451172e-01
110 | 4.984530508518218994e-01
111 | 4.879687726497650146e-01
112 | 4.835968017578125000e-01
113 | 4.738907814025878906e-01
114 | 4.641085565090179443e-01
115 | 4.461972415447235107e-01
116 | 4.422465264797210693e-01
117 | 4.404929280281066895e-01
118 | 4.266946017742156982e-01
119 | 4.236666858196258545e-01
120 | 4.139477312564849854e-01
121 | 4.108542203903198242e-01
122 | 4.007500708103179932e-01
123 | 3.902464807033538818e-01
124 | 3.863844573497772217e-01
125 | 3.740646839141845703e-01
126 | 3.641172349452972412e-01
127 | 3.496397733688354492e-01
128 | 3.446161448955535889e-01
129 | 3.107274472713470459e-01
130 | 2.185715436935424805e-01
131 | 1.521436497569084167e-04
132 | 1.100038352888077497e-04
133 | 2.406101702945306897e-05
134 | 2.068835783575195819e-05
135 | 1.945491385413333774e-05
136 | 1.268230789719382301e-05
137 | 


--------------------------------------------------------------------------------
/face_alignment/models/shape_parameter_s_right.txt:
--------------------------------------------------------------------------------
  1 | 7.763619232177734375e+01
  2 | 5.448361206054687500e+01
  3 | 3.448307800292968750e+01
  4 | 1.991018676757812500e+01
  5 | 1.596661090850830078e+01
  6 | 1.414914703369140625e+01
  7 | 1.229103469848632812e+01
  8 | 1.144200229644775391e+01
  9 | 1.003643226623535156e+01
 10 | 9.386501312255859375e+00
 11 | 8.581891059875488281e+00
 12 | 8.169676780700683594e+00
 13 | 7.906896114349365234e+00
 14 | 6.675380229949951172e+00
 15 | 6.112782478332519531e+00
 16 | 5.862775802612304688e+00
 17 | 5.286133289337158203e+00
 18 | 4.913509845733642578e+00
 19 | 4.832731246948242188e+00
 20 | 4.741940498352050781e+00
 21 | 4.589621067047119141e+00
 22 | 4.137164592742919922e+00
 23 | 4.037960052490234375e+00
 24 | 3.861081600189208984e+00
 25 | 3.779168128967285156e+00
 26 | 3.620183229446411133e+00
 27 | 3.475615978240966797e+00
 28 | 3.316045522689819336e+00
 29 | 3.153186798095703125e+00
 30 | 3.043802738189697266e+00
 31 | 2.927801609039306641e+00
 32 | 2.870085716247558594e+00
 33 | 2.832670450210571289e+00
 34 | 2.724978208541870117e+00
 35 | 2.613666296005249023e+00
 36 | 2.461195707321166992e+00
 37 | 2.366128444671630859e+00
 38 | 2.293519973754882812e+00
 39 | 2.214362859725952148e+00
 40 | 2.146535158157348633e+00
 41 | 1.907979846000671387e+00
 42 | 1.876132249832153320e+00
 43 | 1.859354138374328613e+00
 44 | 1.775403857231140137e+00
 45 | 1.764379262924194336e+00
 46 | 1.694374799728393555e+00
 47 | 1.665422201156616211e+00
 48 | 1.622999191284179688e+00
 49 | 1.610870122909545898e+00
 50 | 1.546877861022949219e+00
 51 | 1.523749589920043945e+00
 52 | 1.483136296272277832e+00
 53 | 1.481247901916503906e+00
 54 | 1.423740625381469727e+00
 55 | 1.406941294670104980e+00
 56 | 1.378324389457702637e+00
 57 | 1.357655882835388184e+00
 58 | 1.335111260414123535e+00
 59 | 1.306033611297607422e+00
 60 | 1.282203078269958496e+00
 61 | 1.257453799247741699e+00
 62 | 1.242352604866027832e+00
 63 | 1.201884031295776367e+00
 64 | 1.184469461441040039e+00
 65 | 1.166077256202697754e+00
 66 | 1.114611506462097168e+00
 67 | 1.102498888969421387e+00
 68 | 1.085692048072814941e+00
 69 | 1.060934782028198242e+00
 70 | 1.029542207717895508e+00
 71 | 1.017418980598449707e+00
 72 | 1.005733728408813477e+00
 73 | 9.654797911643981934e-01
 74 | 9.343039393424987793e-01
 75 | 9.260153770446777344e-01
 76 | 9.126370549201965332e-01
 77 | 8.995376825332641602e-01
 78 | 8.933218717575073242e-01
 79 | 8.765093088150024414e-01
 80 | 8.631937503814697266e-01
 81 | 8.594997525215148926e-01
 82 | 8.442590236663818359e-01
 83 | 8.324881196022033691e-01
 84 | 8.141325116157531738e-01
 85 | 8.030978441238403320e-01
 86 | 7.934149503707885742e-01
 87 | 7.738255858421325684e-01
 88 | 7.693558931350708008e-01
 89 | 7.474056482315063477e-01
 90 | 7.435721158981323242e-01
 91 | 7.271158099174499512e-01
 92 | 7.163758873939514160e-01
 93 | 7.030839323997497559e-01
 94 | 6.789638996124267578e-01
 95 | 6.737074851989746094e-01
 96 | 6.597926020622253418e-01
 97 | 6.343482136726379395e-01
 98 | 6.245849728584289551e-01
 99 | 6.192614436149597168e-01
100 | 6.046380400657653809e-01
101 | 5.935505628585815430e-01
102 | 5.786783099174499512e-01
103 | 5.713734626770019531e-01
104 | 5.640091300010681152e-01
105 | 5.604548454284667969e-01
106 | 5.492605566978454590e-01
107 | 5.269949436187744141e-01
108 | 5.183300971984863281e-01
109 | 5.088832378387451172e-01
110 | 4.984530508518218994e-01
111 | 4.879687726497650146e-01
112 | 4.835968017578125000e-01
113 | 4.738907516002655029e-01
114 | 4.641085267066955566e-01
115 | 4.461972415447235107e-01
116 | 4.422465264797210693e-01
117 | 4.404929280281066895e-01
118 | 4.266946017742156982e-01
119 | 4.236666858196258545e-01
120 | 4.139477312564849854e-01
121 | 4.108542203903198242e-01
122 | 4.007500708103179932e-01
123 | 3.902464807033538818e-01
124 | 3.863844573497772217e-01
125 | 3.740646839141845703e-01
126 | 3.641172349452972412e-01
127 | 3.496397733688354492e-01
128 | 3.446161448955535889e-01
129 | 3.107274472713470459e-01
130 | 2.185715138912200928e-01
131 | 1.266324252355843782e-04
132 | 1.034445594996213913e-04
133 | 2.327194488316308707e-05
134 | 2.060086262645199895e-05
135 | 1.640349546505603939e-05
136 | 1.241218888026196510e-05
137 | 


--------------------------------------------------------------------------------
/face_alignment/models/shape_parameter_s_wild.txt:
--------------------------------------------------------------------------------
  1 | 1.970495758056640625e+02
  2 | 1.147242965698242188e+02
  3 | 7.450263214111328125e+01
  4 | 4.619513702392578125e+01
  5 | 3.563884353637695312e+01
  6 | 2.953556442260742188e+01
  7 | 2.518301963806152344e+01
  8 | 2.209227180480957031e+01
  9 | 1.681234169006347656e+01
 10 | 1.483633422851562500e+01
 11 | 1.392493820190429688e+01
 12 | 1.294036865234375000e+01
 13 | 1.165308284759521484e+01
 14 | 1.114840412139892578e+01
 15 | 9.776822090148925781e+00
 16 | 8.556041717529296875e+00
 17 | 8.135818481445312500e+00
 18 | 8.057154655456542969e+00
 19 | 7.755284786224365234e+00
 20 | 7.051324844360351562e+00
 21 | 6.971053600311279297e+00
 22 | 6.026376247406005859e+00
 23 | 5.712540626525878906e+00
 24 | 5.444037437438964844e+00
 25 | 5.380769252777099609e+00
 26 | 5.245779514312744141e+00
 27 | 5.084469318389892578e+00
 28 | 4.777353763580322266e+00
 29 | 4.659717559814453125e+00
 30 | 4.281981468200683594e+00
 31 | 4.220893383026123047e+00
 32 | 4.155749320983886719e+00
 33 | 4.146553993225097656e+00
 34 | 3.788559436798095703e+00
 35 | 3.753429412841796875e+00
 36 | 3.653527259826660156e+00
 37 | 3.541052818298339844e+00
 38 | 3.238025188446044922e+00
 39 | 3.161108016967773438e+00
 40 | 2.992911100387573242e+00
 41 | 2.964578628540039062e+00
 42 | 2.928684711456298828e+00
 43 | 2.812888145446777344e+00
 44 | 2.672780752182006836e+00
 45 | 2.297840595245361328e+00
 46 | 2.162400960922241211e+00
 47 | 2.125375986099243164e+00
 48 | 2.069871425628662109e+00
 49 | 2.011986732482910156e+00
 50 | 1.985482931137084961e+00
 51 | 1.916026353836059570e+00
 52 | 1.875906825065612793e+00
 53 | 1.837199926376342773e+00
 54 | 1.768927335739135742e+00
 55 | 1.732427358627319336e+00
 56 | 1.714664340019226074e+00
 57 | 1.667707800865173340e+00
 58 | 1.633123993873596191e+00
 59 | 1.602980017662048340e+00
 60 | 1.585176110267639160e+00
 61 | 1.543024063110351562e+00
 62 | 1.507443666458129883e+00
 63 | 1.486185431480407715e+00
 64 | 1.423241972923278809e+00
 65 | 1.388038516044616699e+00
 66 | 1.372025370597839355e+00
 67 | 1.328630685806274414e+00
 68 | 1.319206714630126953e+00
 69 | 1.295352339744567871e+00
 70 | 1.281113266944885254e+00
 71 | 1.258739233016967773e+00
 72 | 1.223977208137512207e+00
 73 | 1.198363065719604492e+00
 74 | 1.166336297988891602e+00
 75 | 1.107284784317016602e+00
 76 | 1.077126860618591309e+00
 77 | 1.055412054061889648e+00
 78 | 1.015959739685058594e+00
 79 | 9.891035556793212891e-01
 80 | 9.715236425399780273e-01
 81 | 9.593613147735595703e-01
 82 | 9.326089024543762207e-01
 83 | 9.173798561096191406e-01
 84 | 9.023692011833190918e-01
 85 | 8.879134654998779297e-01
 86 | 8.786404728889465332e-01
 87 | 8.638746142387390137e-01
 88 | 8.559817671775817871e-01
 89 | 8.491606116294860840e-01
 90 | 8.322493433952331543e-01
 91 | 8.189594745635986328e-01
 92 | 8.083713650703430176e-01
 93 | 7.968765497207641602e-01
 94 | 7.838517427444458008e-01
 95 | 7.731590270996093750e-01
 96 | 7.560074925422668457e-01
 97 | 7.524335980415344238e-01
 98 | 7.510975599288940430e-01
 99 | 7.457538247108459473e-01
100 | 7.109788060188293457e-01
101 | 6.914134025573730469e-01
102 | 6.808288097381591797e-01
103 | 6.761116385459899902e-01
104 | 6.582429409027099609e-01
105 | 6.515301465988159180e-01
106 | 6.346591114997863770e-01
107 | 6.292785406112670898e-01
108 | 6.262359619140625000e-01
109 | 6.150320768356323242e-01
110 | 6.042692661285400391e-01
111 | 5.963109135627746582e-01
112 | 5.879486799240112305e-01
113 | 5.791180729866027832e-01
114 | 5.688433647155761719e-01
115 | 5.627683997154235840e-01
116 | 5.566114783287048340e-01
117 | 5.424736738204956055e-01
118 | 5.290962457656860352e-01
119 | 5.219849944114685059e-01
120 | 5.168567895889282227e-01
121 | 5.056280493736267090e-01
122 | 4.916095137596130371e-01
123 | 4.908132255077362061e-01
124 | 4.677435755729675293e-01
125 | 4.633058011531829834e-01
126 | 4.573886096477508545e-01
127 | 4.397208690643310547e-01
128 | 4.285275936126708984e-01
129 | 4.175726473331451416e-01
130 | 4.158701598644256592e-01
131 | 3.877090513706207275e-01
132 | 3.767875134944915771e-01
133 | 3.591192364692687988e-01
134 | 3.540259003639221191e-01
135 | 3.483022153377532959e-01
136 | 2.917855679988861084e-01
137 | 


--------------------------------------------------------------------------------
/face_alignment/models/warped_mean_front.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/models/warped_mean_front.bmp


--------------------------------------------------------------------------------
/face_alignment/models/warped_mean_left.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/models/warped_mean_left.bmp


--------------------------------------------------------------------------------
/face_alignment/models/warped_mean_right.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_alignment/models/warped_mean_right.bmp


--------------------------------------------------------------------------------
/face_alignment/python/make_wild_input.py:
--------------------------------------------------------------------------------
  1 | # make input of initialization network
  2 | import glob
  3 | import numpy as np
  4 | import random
  5 | from datetime import datetime
  6 | import matplotlib.pyplot as plt
  7 | import fa_util as fu
  8 | import fa_util_train as fut
  9 | import h5py
 10 | import cv2
 11 | 
 12 | 
 13 | list_file_name = 'K:/VGG_list/vgg_list_all_000.txt'
 14 | output_prefix = 'K:/VGG_hdf5/init/VGG_wild_000'
 15 | # img_folders = ['../sample_data']
 16 | img_folders = ['D:/DB/FaceAlignment/HS_distribution/front', 'D:/DB/FaceAlignment/HS_distribution/left', 'D:/DB/FaceAlignment/HS_distribution/right']
 17 | # output_prefix = 'M:/HS_hdf5/wild/HS_wild'
 18 | jittering_size = 1                                                                     # should be changed 32*128
 19 | chunk_size = 1536                                                                        # should be changed
 20 | 
 21 | 
 22 | def get_part_pts(gt_pts, warp_mat_inv):
 23 |     part_centers = fu.get_part_centers(gt_pts)
 24 |     part_gt_pts = np.hstack((part_centers, np.ones((len(part_centers), 1), np.float32)))
 25 |     part_gt_pts_t = np.transpose(part_gt_pts)
 26 |     part_pts3 = np.dot(warp_mat_inv, part_gt_pts_t)
 27 |     return np.transpose(part_pts3)[:, 0:2]
 28 | 
 29 | 
 30 | def main():
 31 |     # files = fut.make_file_list_by_folder(img_folders, ['png', 'jpg'])   # get image file list by folder
 32 |     files = fut.make_file_list_by_text(list_file_name)                # get image file list by text file
 33 |     n_samples = len(files)
 34 |     random.seed(1234)                                                   # set random seed
 35 |     random.shuffle(files)                                               # random shuffle
 36 |     image_data_sets = fut.make_chunk_set(files, chunk_size)             # get image file chunk set
 37 |     print('Total number of samples: ' + str(n_samples))
 38 | 
 39 |     cnt_all = 0                                                         # cnt for sample images
 40 |     for i in range(len(image_data_sets)):
 41 |         current_num_img_files = len(image_data_sets[i])
 42 |         img_all = np.zeros((current_num_img_files * jittering_size, fu.init_h, fu.init_w, fu.channel), np.uint8)
 43 |         pts_all = np.zeros((current_num_img_files * jittering_size, fu.n_points, 2), np.float32)
 44 |         # part_all = np.zeros((current_num_img_files * jittering_size, fu.n_parts, 2), np.float32)
 45 | 
 46 |         # generate data
 47 |         cnt = 0                                                         # cnt for total samples with jittering
 48 |         for x in image_data_sets[i]:
 49 |             current_img_set = np.zeros((jittering_size, fu.init_h, fu.init_w, fu.channel), np.uint8)
 50 |             current_pts_set = np.zeros((jittering_size, fu.n_points, 2), np.float32)
 51 |             # current_part_set = np.zeros((jittering_size, fu.n_parts, 2), np.float32)
 52 |             cnt_all = cnt_all + 1
 53 |             print(str(datetime.now()) + ' (' + str(cnt_all) + '/' + str(n_samples) + ') ' + x)
 54 |             img, gt_pts = fut.load_img_pts(x)
 55 |             face_box3 = fut.get_bounding_box3_square_with_margin(gt_pts)
 56 |             for k in range(jittering_size):
 57 |                 if k == 0:
 58 |                     face_box3_jittered = face_box3
 59 |                 else:
 60 |                     face_box3_jittered = fut.get_jittered_bounding_box3(face_box3)
 61 | 
 62 | 
 63 |                 img_face, M_inv, M = fu.get_cropped_face_cv(img, face_box3_jittered)
 64 | 
 65 | 
 66 | 
 67 |                 pts = cv2.transform(gt_pts.reshape((fu.n_points, 1, 2)), M)
 68 | 
 69 |                 pts = pts.reshape((fu.n_points, 2))
 70 | 
 71 |                 # warp_mat_inv = np.linalg.inv(warp_mat)
 72 |                 # pts = fu.get_warped_pts(gt_pts, warp_mat_inv.transpose())
 73 |                 # part_pts = get_part_pts(gt_pts, warp_mat_inv)
 74 | 
 75 |                 current_img_set[k, :, :, :] = img_face
 76 |                 current_pts_set[k, :, :] = pts
 77 |                 # current_part_set[k, :, :] = part_pts
 78 | 
 79 |                 # # draw
 80 |                 # plt.figure(1)
 81 |                 # plt.gcf().clear()
 82 |                 # plt.imshow(img_face)
 83 |                 # plt.scatter(pts[:, 0], pts[:, 1], c='b')
 84 |                 # # plt.scatter(part_pts[:, 0], part_pts[:, 1], c='r')
 85 |                 # plt.draw()
 86 |                 # plt.pause(0.001)
 87 |                 # z = 0
 88 |             img_all[cnt:cnt + jittering_size, :, :, :] = current_img_set
 89 |             pts_all[cnt:cnt + jittering_size, :, :] = current_pts_set
 90 |             # part_all[cnt:cnt + jittering_size, :, :] = current_part_set
 91 |             cnt = cnt + jittering_size
 92 |         img_all = img_all.transpose((0, 3, 1, 2))  # order: sample, c, m, n
 93 |         pts_all[:, :, 0] = pts_all[:, :, 0] / fu.init_w  # normalize to 0~1
 94 |         pts_all[:, :, 1] = pts_all[:, :, 1] / fu.init_h  # normalize to 0~1
 95 |         # part_all[:, :, 0] = part_all[:, :, 0] / fu.init_w  # normalize to 0~1
 96 |         # part_all[:, :, 1] = part_all[:, :, 1] / fu.init_h  # normalize to 0~1
 97 | 
 98 |         suffle_idx = np.random.permutation(current_num_img_files * jittering_size)  # suffle
 99 |         img_all = img_all[suffle_idx, :, :, :]
100 |         pts_all = pts_all[suffle_idx, :, :]
101 |         # part_all = part_all[suffle_idx, :, :]
102 | 
103 |         current_output_path = "%s_%03d.h5" % (output_prefix, i)
104 |         hf = h5py.File(current_output_path, 'w')
105 |         input_face_img_name = "img"
106 |         warped_img_set = hf.create_dataset(input_face_img_name, data=img_all)
107 |         input_pts_name = "pts"
108 |         pts_set = hf.create_dataset(input_pts_name, data=pts_all)
109 |         # input_part_name = "part"
110 |         # part_set = hf.create_dataset(input_part_name, data=part_all)
111 |         hf.close()
112 | 
113 | 
114 | if __name__ == "__main__":
115 |     main()
116 | 


--------------------------------------------------------------------------------
/face_alignment/python/test_300w_public.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from datetime import datetime
 5 | import face_alignment as fa
 6 | import fa_util_train as fut
 7 | 
 8 | img_folders = ['N:\DB\FaceAlignment\\300W_public_test\lfpw', 'N:\DB\FaceAlignment\\300W_public_test\helen', 'N:\DB\FaceAlignment\\300W_public_test\ibug']
 9 | # img_folders = ['D:\DB\FaceAlignment\\300W_public_test\ibug']
10 | # img_folders = ['../sample_data2']
11 | img_extension = ['png', 'jpg']
12 | output_folder = '../result'
13 | max_iter = 21
14 | # max_iter = 1
15 | 
16 | def main():
17 |     # fa.fa_init([[1], [0, 0], [0, 0]])
18 |     fa.fa_init([[1], [1], [0, 0], [1, 0]])
19 |     files = []
20 |     current_pts = np.zeros((max_iter+1, 68, 2), np.float32)
21 |     error_IOD = np.zeros((max_iter+1, 1), np.float32)
22 |     error_BOX = np.zeros((max_iter+1, 1), np.float32)
23 |     for folder in img_folders:
24 |         for ext in img_extension:
25 |             current_files = glob.glob(folder + '/*.' + ext)
26 |             files.extend(current_files)
27 | 
28 |     cnt = 0
29 |     n_samples = len(files)
30 |     for x in files:
31 |         cnt += 1
32 | 
33 |         img, gt_pts = fut.load_img_pts(x)
34 |         face_box3 = fut.get_bounding_box3_square_with_margin(gt_pts)
35 |         # current_pts[0, :, :], current_pts[1, :, :], current_pts[2, :, :], pose_idx = fa.face_alignment_detection(img, face_box3, -1)
36 |         # current_pts[0, :, :], _, current_pts[1, :, :], pose_idx = fa.face_alignment_detection(img, face_box3, -1)
37 |         current_pts[0, :, :], current_pts[1, :, :], current_pts[2, :, :], pose_idx = fa.face_alignment_detection(img, face_box3, -1)
38 | 
39 |         for i in range(2, max_iter):
40 |             current_pts[i+1, :, :], pose_idx = fa.face_alignment_detection_step(img, current_pts[i, :, :], pose_idx)
41 | 
42 |         print(str(datetime.now()) + ' ' + str(cnt) + '/' + str(n_samples) + ' ' + x)
43 |         for i in range(0, max_iter):
44 |             output_path = fut.get_output_path(x, output_folder, 'pt%d' % i)
45 |             fut.save_pts(output_path, current_pts[i, :, :])
46 |             error_IOD[i, 0], error_BOX[i, 0] = fut.measurement(gt_pts, current_pts[i, :, :])
47 |             print('Error%d :' % i + str(error_IOD[i, 0]))
48 | 
49 |         # draw
50 |         draw_idx = [0, 1, 2, 3, 4]
51 |         # draw_idx = [0]
52 |         plt.figure(1)
53 |         plt.gcf().clear()
54 |         draw_cnt = 1
55 |         for i in draw_idx:
56 |             plt.subplot(1, len(draw_idx), draw_cnt)
57 |             plt.imshow(img)
58 |             plt.scatter(current_pts[i, :, 0], current_pts[i, :, 1], s=3, c='r')
59 |             draw_cnt += 1
60 |         plt.draw()
61 |         plt.pause(0.001)
62 |         z = 1
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     main()
67 | 


--------------------------------------------------------------------------------
/face_detection/.gitignore:
--------------------------------------------------------------------------------
1 | data/FDDB/images/
2 | data/widerface/
3 | eval/
4 | results/
5 | 


--------------------------------------------------------------------------------
/face_detection/LICENSE.MIT:
--------------------------------------------------------------------------------
 1 | MIT License 
 2 | 
 3 | Copyright (c) 2019
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/face_detection/NOTICE:
--------------------------------------------------------------------------------
 1 | This project contains subcomponents with separate copyright notices and license terms. 
 2 | Your use of the source code for these subcomponents is subject to the terms and conditions of the following licenses.
 3 | 
 4 | =====
 5 | 
 6 | biubug6/Pytorch_Retinaface
 7 | https://github.com/biubug6/Pytorch_Retinaface
 8 | 
 9 | 
10 | MIT License 
11 | 
12 | Copyright (c) 2019
13 | 
14 | Permission is hereby granted, free of charge, to any person obtaining a copy
15 | of this software and associated documentation files (the "Software"), to deal
16 | in the Software without restriction, including without limitation the rights
17 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18 | copies of the Software, and to permit persons to whom the Software is
19 | furnished to do so, subject to the following conditions:
20 | 
21 | The above copyright notice and this permission notice shall be included in all
22 | copies or substantial portions of the Software.
23 | 
24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 | SOFTWARE.
31 | 


--------------------------------------------------------------------------------
/face_detection/README.md:
--------------------------------------------------------------------------------
  1 | # Face Detection (work in progress)
  2 | The code and checkpoints contained in this repository were adopted from the [biubug6/Pytorch_Retinaface](https://github.com/biubug6/Pytorch_Retinaface) repository.
  3 | 
  4 | 
  5 | ## Getting Started
  6 | 
  7 | ### Requirements / Installation
  8 | - [Anaconda](https://www.anaconda.com/)
  9 | - Nvidia GPU (for GPU utilization)
 10 | 
 11 | Use the following commands to install the necessary packages and activate the environment:
 12 | ```sh
 13 | conda env create -f environment.yml
 14 | conda activate retinaface
 15 | ```
 16 | 
 17 | ### Data
 18 | 1. Download the [WiderFace](http://shuoyang1213.me/WIDERFACE/WiderFace_Results.html) dataset.
 19 | 
 20 | 2. Download annotations (face bounding boxes & five facial landmarks) from [baidu cloud](https://pan.baidu.com/s/1Laby0EctfuJGgGMgRRgykA).
 21 | 
 22 | 3. Organise the dataset directory as follows:
 23 | 
 24 | ```
 25 | ./data/widerface/
 26 | ├─train/
 27 | │  ├─images/
 28 | │  └─label.txt
 29 | └─val/
 30 |    ├─images/
 31 |    └─wider_val.txt
 32 | ```
 33 | 
 34 | ps: wider_val.txt only include val file names but not label information.
 35 | 
 36 | 
 37 | ### Test
 38 | You can use the following command to detect faces in a photo and save the result as an image:
 39 | ```sh
 40 | python detect.py --image <path to image file> -s
 41 | ```
 42 | See [detect.py](detect.py#L16) for available arguments.
 43 | 
 44 | 
 45 | ## Training
 46 | We provide restnet50 and mobilenet0.25 as backbone network to train model.
 47 | We trained Mobilenet0.25 on imagenet dataset and get 46.58%  in top 1. If you do not wish to train the model, we also provide trained model. Pretrain model and trained model are put in [google cloud](https://drive.google.com/open?id=1oZRSG0ZegbVkVwUd8wUIQx8W7yfZ_ki1) and [baidu cloud](https://pan.baidu.com/s/12h97Fy1RYuqMMIV-RpzdPg) Password: fstq . The model could be put as follows:
 48 | ```bash
 49 | ./weights/
 50 | ├─mobilenet0.25_final.pt
 51 | └─mobilenet0.25_pretrain.tar
 52 | ```
 53 | 1. Before training, you can check network configuration (e.g. batch_size, min_sizes and steps etc..) in ``data/config.py and train.py``.
 54 | 
 55 | 2. Train the model using WiderFace:
 56 |   ```Shell
 57 |   CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --network resnet50 or
 58 |   CUDA_VISIBLE_DEVICES=0 python train.py --network mobilenet0.25
 59 |   ```
 60 | 
 61 | 
 62 | ## Evaluation
 63 | 
 64 | ### Evaluation WiderFace val
 65 | 1. Generate txt file
 66 | ```Shell
 67 | python test_widerface.py --trained-model <weight file> --network mobilenet0.25 or resnet50
 68 | ```
 69 | 2. Evaluate txt results. Demo come from [Here](https://github.com/wondervictor/WiderFace-Evaluation)
 70 | ```Shell
 71 | cd ./widerface_evaluate
 72 | python setup.py build_ext --inplace
 73 | python evaluation.py
 74 | ```
 75 | 3. You can also use WiderFace official Matlab evaluate demo in [Here](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/WiderFace_Results.html)
 76 | 
 77 | ### Evaluation FDDB
 78 | 
 79 | 1. Download the images [FDDB](https://drive.google.com/open?id=17t4WULUDgZgiSy5kpCax4aooyPaz3GQH) to:
 80 | ```Shell
 81 | ./data/FDDB/images/
 82 | ```
 83 | 
 84 | 2. Evaluate the trained model using:
 85 | ```Shell
 86 | python test_fddb.py --trained_model <weight file> --network mobilenet0.25 or resnet50
 87 | ```
 88 | 
 89 | 3. ~~Download [eval_tool](https://bitbucket.org/marcopede/face-eval) to evaluate the performance.~~ This link doesn't seem to work anymore. We found [this](https://github.com/RuisongZhou/FDDB_Evaluation) repository, but haven't tested it yet.
 90 | 
 91 | 
 92 | ## References and Citation
 93 | - [RetinaFace in PyTorch](https://github.com/biubug6/Pytorch_Retinaface)
 94 | - [FaceBoxes](https://github.com/zisianw/FaceBoxes.PyTorch)
 95 | - [Retinaface (mxnet)](https://github.com/deepinsight/insightface/tree/master/RetinaFace)
 96 | 
 97 | ```
 98 | @inproceedings{deng2020retinaface,
 99 |   title={RetinaFace: Single-Shot Multi-Level Face Localisation in the wild},
100 |   author={Deng, Jiankang and Guo, Jia and Ververas, Evangelos and Kotsia, Irene and Zafeiriou, Stefanos},
101 |   booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
102 |   pages={5203--5212},
103 |   year={2020}
104 | }
105 | 


--------------------------------------------------------------------------------
/face_detection/convert_to_onnx.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import argparse
 3 | import os
 4 | 
 5 | import torch
 6 | from model.retinaface import RetinaFace
 7 | 
 8 | parser = argparse.ArgumentParser(description='Convert to ONNX')
 9 | parser.add_argument(
10 |     '--checkpoint', type=str,
11 |     default='./weights/mobilenet0.25_final.pt',
12 |     help='Trained state_dict file path to open'
13 | )
14 | parser.add_argument(
15 |     '--long-side', type=int, default=640,
16 |     help='when origin_size is false, long_side is scaled size(320 or 640 for long side)'
17 | )
18 | parser.add_argument(
19 |     '--cpu', action="store_true",
20 |     help='Use cpu inference'
21 | )
22 | 
23 | 
24 | def main():
25 |     args = parser.parse_args()
26 |     assert os.path.isfile(args.checkpoint)
27 | 
28 |     checkpoint = torch.load(args.checkpoint, map_location="cpu")
29 |     cfg = checkpoint["config"]
30 |     device = torch.device("cpu" if args.cpu else "cuda")
31 | 
32 |     # net and model
33 |     net = RetinaFace(**cfg)
34 |     net.load_state_dict(checkpoint["net_state_dict"], strict=False)
35 |     net.eval().requires_grad_(False)
36 |     net.to(device)
37 |     print('Finished loading model!')
38 | 
39 |     # ------------------------ export -----------------------------
40 |     output_onnx = 'face_detector.onnx'
41 |     print("==> Exporting model to ONNX format at '{}'".format(output_onnx))
42 |     input_names = ["input0"]
43 |     output_names = ["output0"]
44 |     inputs = torch.randn(1, 3, args.long_side, args.long_side).to(device)
45 | 
46 |     torch_out = torch.onnx._export(net, inputs, output_onnx, export_params=True, verbose=False,
47 |                                    input_names=input_names, output_names=output_names)
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/face_detection/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import *
2 | from .data_augment import *
3 | from .wider_face import WiderFaceDetection
4 | 


--------------------------------------------------------------------------------
/face_detection/data/config.py:
--------------------------------------------------------------------------------
 1 | # config.py
 2 | 
 3 | cfg_mnet = {
 4 |     'backbone': 'mobilenet0.25',
 5 |     'min_sizes': [[16, 32], [64, 128], [256, 512]],
 6 |     'steps': [8, 16, 32],
 7 |     'variance': [0.1, 0.2],
 8 |     'clip': False,
 9 |     'loc_weight': 2.0,
10 |     'batch_size': 32,
11 |     'epoch': 250,
12 |     'decay1': 190,
13 |     'decay2': 220,
14 |     'image_size': 640,
15 |     'pretrain': True,
16 |     'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
17 |     'in_channel': 32,
18 |     'out_channel': 64
19 | }
20 | 
21 | cfg_re50 = {
22 |     'backbone': 'Resnet50',
23 |     'min_sizes': [[16, 32], [64, 128], [256, 512]],
24 |     'steps': [8, 16, 32],
25 |     'variance': [0.1, 0.2],
26 |     'clip': False,
27 |     'loc_weight': 2.0,
28 |     'batch_size': 24,
29 |     'epoch': 100,
30 |     'decay1': 70,
31 |     'decay2': 90,
32 |     'image_size': 840,
33 |     'pretrain': True,
34 |     'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
35 |     'in_channel': 256,
36 |     'out_channel': 256
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/face_detection/data/data_augment.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | from utils.box_utils import matrix_iof
  6 | 
  7 | 
  8 | def _crop(image, boxes, labels, landm, img_dim):
  9 |     height, width, _ = image.shape
 10 |     pad_image_flag = True
 11 | 
 12 |     for _ in range(250):
 13 |         """
 14 |         if random.uniform(0, 1) <= 0.2:
 15 |             scale = 1.0
 16 |         else:
 17 |             scale = random.uniform(0.3, 1.0)
 18 |         """
 19 |         PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
 20 |         scale = random.choice(PRE_SCALES)
 21 |         short_side = min(width, height)
 22 |         w = int(scale * short_side)
 23 |         h = w
 24 | 
 25 |         if width == w:
 26 |             l = 0
 27 |         else:
 28 |             l = random.randrange(width - w)
 29 |         if height == h:
 30 |             t = 0
 31 |         else:
 32 |             t = random.randrange(height - h)
 33 |         roi = np.array((l, t, l + w, t + h))
 34 | 
 35 |         value = matrix_iof(boxes, roi[np.newaxis])
 36 |         flag = (value >= 1)
 37 |         if not flag.any():
 38 |             continue
 39 | 
 40 |         centers = (boxes[:, :2] + boxes[:, 2:]) / 2
 41 |         mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
 42 |         boxes_t = boxes[mask_a].copy()
 43 |         labels_t = labels[mask_a].copy()
 44 |         landms_t = landm[mask_a].copy()
 45 |         landms_t = landms_t.reshape([-1, 5, 2])
 46 | 
 47 |         if boxes_t.shape[0] == 0:
 48 |             continue
 49 | 
 50 |         image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
 51 | 
 52 |         boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
 53 |         boxes_t[:, :2] -= roi[:2]
 54 |         boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
 55 |         boxes_t[:, 2:] -= roi[:2]
 56 | 
 57 |         # landm
 58 |         landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
 59 |         landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
 60 |         landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
 61 |         landms_t = landms_t.reshape([-1, 10])
 62 | 
 63 | 
 64 | 	# make sure that the cropped image contains at least one face > 16 pixel at training image scale
 65 |         b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
 66 |         b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
 67 |         mask_b = np.minimum(b_w_t, b_h_t) > 0.0
 68 |         boxes_t = boxes_t[mask_b]
 69 |         labels_t = labels_t[mask_b]
 70 |         landms_t = landms_t[mask_b]
 71 | 
 72 |         if boxes_t.shape[0] == 0:
 73 |             continue
 74 | 
 75 |         pad_image_flag = False
 76 | 
 77 |         return image_t, boxes_t, labels_t, landms_t, pad_image_flag
 78 |     return image, boxes, labels, landm, pad_image_flag
 79 | 
 80 | 
 81 | def _distort(image):
 82 | 
 83 |     def _convert(image, alpha=1, beta=0):
 84 |         tmp = image.astype(float) * alpha + beta
 85 |         tmp[tmp < 0] = 0
 86 |         tmp[tmp > 255] = 255
 87 |         image[:] = tmp
 88 | 
 89 |     image = image.copy()
 90 | 
 91 |     if random.randrange(2):
 92 | 
 93 |         #brightness distortion
 94 |         if random.randrange(2):
 95 |             _convert(image, beta=random.uniform(-32, 32))
 96 | 
 97 |         #contrast distortion
 98 |         if random.randrange(2):
 99 |             _convert(image, alpha=random.uniform(0.5, 1.5))
100 | 
101 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
102 | 
103 |         #saturation distortion
104 |         if random.randrange(2):
105 |             _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
106 | 
107 |         #hue distortion
108 |         if random.randrange(2):
109 |             tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
110 |             tmp %= 180
111 |             image[:, :, 0] = tmp
112 | 
113 |         image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
114 | 
115 |     else:
116 | 
117 |         #brightness distortion
118 |         if random.randrange(2):
119 |             _convert(image, beta=random.uniform(-32, 32))
120 | 
121 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
122 | 
123 |         #saturation distortion
124 |         if random.randrange(2):
125 |             _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
126 | 
127 |         #hue distortion
128 |         if random.randrange(2):
129 |             tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
130 |             tmp %= 180
131 |             image[:, :, 0] = tmp
132 | 
133 |         image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
134 | 
135 |         #contrast distortion
136 |         if random.randrange(2):
137 |             _convert(image, alpha=random.uniform(0.5, 1.5))
138 | 
139 |     return image
140 | 
141 | 
142 | def _expand(image, boxes, fill, p):
143 |     if random.randrange(2):
144 |         return image, boxes
145 | 
146 |     height, width, depth = image.shape
147 | 
148 |     scale = random.uniform(1, p)
149 |     w = int(scale * width)
150 |     h = int(scale * height)
151 | 
152 |     left = random.randint(0, w - width)
153 |     top = random.randint(0, h - height)
154 | 
155 |     boxes_t = boxes.copy()
156 |     boxes_t[:, :2] += (left, top)
157 |     boxes_t[:, 2:] += (left, top)
158 |     expand_image = np.empty(
159 |         (h, w, depth),
160 |         dtype=image.dtype)
161 |     expand_image[:, :] = fill
162 |     expand_image[top:top + height, left:left + width] = image
163 |     image = expand_image
164 | 
165 |     return image, boxes_t
166 | 
167 | 
168 | def _mirror(image, boxes, landms):
169 |     _, width, _ = image.shape
170 |     if random.randrange(2):
171 |         image = image[:, ::-1]
172 |         boxes = boxes.copy()
173 |         boxes[:, 0::2] = width - boxes[:, 2::-2]
174 | 
175 |         # landm
176 |         landms = landms.copy()
177 |         landms = landms.reshape([-1, 5, 2])
178 |         landms[:, :, 0] = width - landms[:, :, 0]
179 |         tmp = landms[:, 1, :].copy()
180 |         landms[:, 1, :] = landms[:, 0, :]
181 |         landms[:, 0, :] = tmp
182 |         tmp1 = landms[:, 4, :].copy()
183 |         landms[:, 4, :] = landms[:, 3, :]
184 |         landms[:, 3, :] = tmp1
185 |         landms = landms.reshape([-1, 10])
186 | 
187 |     return image, boxes, landms
188 | 
189 | 
190 | def _pad_to_square(image, rgb_mean, pad_image_flag):
191 |     if not pad_image_flag:
192 |         return image
193 |     height, width, _ = image.shape
194 |     long_side = max(width, height)
195 |     image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
196 |     image_t[:, :] = rgb_mean
197 |     image_t[0:0 + height, 0:0 + width] = image
198 |     return image_t
199 | 
200 | 
201 | def _resize_subtract_mean(image, insize, rgb_mean):
202 |     interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
203 |     interp_method = interp_methods[random.randrange(5)]
204 |     image = cv2.resize(image, (insize, insize), interpolation=interp_method)
205 |     image = image.astype(np.float32)
206 |     image -= rgb_mean
207 |     return image.transpose(2, 0, 1)
208 | 
209 | 
210 | class preproc(object):
211 | 
212 |     def __init__(self, img_dim, rgb_means):
213 |         self.img_dim = img_dim
214 |         self.rgb_means = rgb_means
215 | 
216 |     def __call__(self, image, targets):
217 |         assert targets.shape[0] > 0, "this image does not have gt"
218 | 
219 |         boxes = targets[:, :4].copy()
220 |         labels = targets[:, -1].copy()
221 |         landm = targets[:, 4:-1].copy()
222 | 
223 |         image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim)
224 |         image_t = _distort(image_t)
225 |         image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag)
226 |         image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
227 |         height, width, _ = image_t.shape
228 |         image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
229 |         boxes_t[:, 0::2] /= width
230 |         boxes_t[:, 1::2] /= height
231 | 
232 |         landm_t[:, 0::2] /= width
233 |         landm_t[:, 1::2] /= height
234 | 
235 |         labels_t = np.expand_dims(labels_t, 1)
236 |         targets_t = np.hstack((boxes_t, landm_t, labels_t))
237 | 
238 |         return image_t, targets_t
239 | 


--------------------------------------------------------------------------------
/face_detection/data/wider_face.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import torch
  4 | import torch.utils.data as data
  5 | 
  6 | 
  7 | class WiderFaceDetection(data.Dataset):
  8 |     def __init__(self, txt_path, preproc=None):
  9 |         self.preproc = preproc
 10 |         self.imgs_path = []
 11 |         self.words = []
 12 |         f = open(txt_path,'r')
 13 |         lines = f.readlines()
 14 |         isFirst = True
 15 |         labels = []
 16 |         for line in lines:
 17 |             line = line.rstrip()
 18 |             if line.startswith('#'):
 19 |                 if isFirst is True:
 20 |                     isFirst = False
 21 |                 else:
 22 |                     labels_copy = labels.copy()
 23 |                     self.words.append(labels_copy)
 24 |                     labels.clear()
 25 |                 path = line[2:]
 26 |                 path = txt_path.replace('label.txt','images/') + path
 27 |                 self.imgs_path.append(path)
 28 |             else:
 29 |                 line = line.split(' ')
 30 |                 label = [float(x) for x in line]
 31 |                 labels.append(label)
 32 | 
 33 |         self.words.append(labels)
 34 | 
 35 |     def __len__(self):
 36 |         return len(self.imgs_path)
 37 | 
 38 |     def __getitem__(self, index):
 39 |         img = cv2.imread(self.imgs_path[index])
 40 |         height, width, _ = img.shape
 41 | 
 42 |         labels = self.words[index]
 43 |         annotations = np.zeros((0, 15))
 44 |         if len(labels) == 0:
 45 |             return annotations
 46 |         for idx, label in enumerate(labels):
 47 |             annotation = np.zeros((1, 15))
 48 |             # bbox
 49 |             annotation[0, 0] = label[0]  # x1
 50 |             annotation[0, 1] = label[1]  # y1
 51 |             annotation[0, 2] = label[0] + label[2]  # x2
 52 |             annotation[0, 3] = label[1] + label[3]  # y2
 53 | 
 54 |             # landmarks
 55 |             annotation[0, 4] = label[4]    # l0_x
 56 |             annotation[0, 5] = label[5]    # l0_y
 57 |             annotation[0, 6] = label[7]    # l1_x
 58 |             annotation[0, 7] = label[8]    # l1_y
 59 |             annotation[0, 8] = label[10]   # l2_x
 60 |             annotation[0, 9] = label[11]   # l2_y
 61 |             annotation[0, 10] = label[13]  # l3_x
 62 |             annotation[0, 11] = label[14]  # l3_y
 63 |             annotation[0, 12] = label[16]  # l4_x
 64 |             annotation[0, 13] = label[17]  # l4_y
 65 |             if (annotation[0, 4]<0):
 66 |                 annotation[0, 14] = -1
 67 |             else:
 68 |                 annotation[0, 14] = 1
 69 | 
 70 |             annotations = np.append(annotations, annotation, axis=0)
 71 |         target = np.array(annotations)
 72 |         if self.preproc is not None:
 73 |             img, target = self.preproc(img, target)
 74 | 
 75 |         return torch.from_numpy(img), target
 76 | 
 77 |     @staticmethod
 78 |     def collate(batch):
 79 |         """Custom collate fn for dealing with batches of images that have a different
 80 |         number of associated object annotations (bounding boxes).
 81 | 
 82 |         Arguments:
 83 |             batch: (tuple) A tuple of tensor images and lists of annotations
 84 | 
 85 |         Return:
 86 |             A tuple containing:
 87 |                 1) (tensor) batch of images stacked on their 0 dim
 88 |                 2) (list of tensors) annotations for a given image are stacked on 0 dim
 89 |         """
 90 |         targets = []
 91 |         imgs = []
 92 |         for _, sample in enumerate(batch):
 93 |             for _, tup in enumerate(sample):
 94 |                 if torch.is_tensor(tup):
 95 |                     imgs.append(tup)
 96 |                 elif isinstance(tup, type(np.empty(0))):
 97 |                     annos = torch.from_numpy(tup).float()
 98 |                     targets.append(annos)
 99 | 
100 |         return torch.stack(imgs, 0), targets
101 | 


--------------------------------------------------------------------------------
/face_detection/detect.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import argparse
  3 | import os
  4 | import time
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | import torch
  9 | from torchvision.ops import nms
 10 | 
 11 | from model.prior_box import PriorBox
 12 | from model.retinaface import RetinaFace
 13 | from utils.box_utils import decode, decode_landm
 14 | from utils.misc import draw_keypoint
 15 | 
 16 | parser = argparse.ArgumentParser()
 17 | parser.add_argument(
 18 |     '--checkpoint',
 19 |     default='./weights/mobilenet0.25_final.pt',
 20 |     help='Trained state_dict file path to open'
 21 | )
 22 | parser.add_argument(
 23 |     '--image',
 24 |     help='Input image file to detect'
 25 | )
 26 | parser.add_argument(
 27 |     '--cpu', action="store_true", default=False,
 28 |     help='Use cpu inference'
 29 | )
 30 | parser.add_argument(
 31 |     '--confidence-threshold', type=float, default=0.02,
 32 |     help='confidence_threshold'
 33 | )
 34 | parser.add_argument(
 35 |     '--top-k', type=int, default=5000,
 36 |     help='top_k'
 37 | )
 38 | parser.add_argument(
 39 |     '--nms-threshold', type=float, default=0.4,
 40 |     help='NMS threshold'
 41 | )
 42 | parser.add_argument(
 43 |     '--keep-top-k', type=int, default=750,
 44 |     help='keep top k'
 45 | )
 46 | parser.add_argument(
 47 |     '-s', '--save-image', action="store_true", default=False,
 48 |     help='show detection results'
 49 | )
 50 | parser.add_argument(
 51 |     '--vis-thres', type=float, default=0.6,
 52 |     help='visualization_threshold'
 53 | )
 54 | 
 55 | 
 56 | @torch.no_grad()
 57 | def main():
 58 |     args = parser.parse_args()
 59 |     assert os.path.isfile(args.checkpoint)
 60 | 
 61 |     checkpoint = torch.load(args.checkpoint, map_location="cpu")
 62 |     cfg = checkpoint["config"]
 63 |     device = torch.device("cpu" if args.cpu else "cuda")
 64 | 
 65 |     # net and model
 66 |     net = RetinaFace(**cfg)
 67 |     net.load_state_dict(checkpoint["net_state_dict"], strict=False)
 68 |     net.eval().requires_grad_(False)
 69 |     net.to(device)
 70 |     print('Finished loading model!')
 71 | 
 72 |     resize = 1
 73 | 
 74 |     # testing begin
 75 |     img_raw = cv2.imread(args.image, cv2.IMREAD_COLOR)
 76 | 
 77 |     img = np.float32(img_raw)
 78 | 
 79 |     im_height, im_width, _ = img.shape
 80 |     scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
 81 |     img -= (104, 117, 123)
 82 |     img = img.transpose(2, 0, 1)
 83 |     img = torch.from_numpy(img).unsqueeze(0)
 84 |     img = img.to(device)
 85 |     scale = scale.to(device)
 86 | 
 87 |     tic = time.time()
 88 |     loc, conf, landms = net(img)  # forward pass
 89 |     print('net forward time: {:.4f}'.format(time.time() - tic))
 90 | 
 91 |     priorbox = PriorBox(cfg, image_size=(im_height, im_width))
 92 |     priors = priorbox.forward()
 93 |     priors = priors.to(device)
 94 |     prior_data = priors.data
 95 |     boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
 96 |     boxes = boxes * scale / resize
 97 |     scores = conf.squeeze(0)[:, 1]
 98 |     landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
 99 |     scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
100 |                             img.shape[3], img.shape[2], img.shape[3], img.shape[2],
101 |                             img.shape[3], img.shape[2]])
102 |     scale1 = scale1.to(device)
103 |     landms = landms * scale1 / resize
104 | 
105 |     # ignore low scores
106 |     inds = torch.where(scores > args.confidence_threshold)[0]
107 |     boxes = boxes[inds]
108 |     landms = landms[inds]
109 |     scores = scores[inds]
110 | 
111 |     # keep top-K before NMS
112 |     order = scores.argsort()
113 |     boxes = boxes[order][:args.top_k]
114 |     landms = landms[order][:args.top_k]
115 |     scores = scores[order][:args.top_k]
116 | 
117 |     # do NMS
118 |     keep = nms(boxes, scores, args.nms_threshold)
119 | 
120 |     boxes = boxes[keep]
121 |     scores = scores[keep]
122 |     landms = landms[keep]
123 | 
124 |     boxes = boxes.cpu().numpy()
125 |     scores = scores.cpu().numpy()
126 |     landms = landms.cpu().numpy()
127 |     dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
128 |     dets = np.concatenate((dets, landms), axis=1)
129 | 
130 |     # save image
131 |     if args.save_image:
132 |         draw_keypoint(img_raw, dets, args.vis_thres)
133 | 
134 |         splits = args.image.split(".")
135 |         name = ".".join(splits[:-1])
136 |         ext = splits[-1]
137 |         output = f"{name}_results.{ext}"
138 |         cv2.imwrite(output, img_raw)
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     main()
143 | 


--------------------------------------------------------------------------------
/face_detection/environment.yml:
--------------------------------------------------------------------------------
 1 | name: retinaface
 2 | channels:
 3 |   - pytorch
 4 | dependencies:
 5 |   - cudatoolkit=11.3
 6 |   - matplotlib
 7 |   - pip
 8 |   - python=3.9
 9 |   - pytorch::pytorch=1.10.1
10 |   - pytorch::torchvision
11 |   - scipy
12 |   - tqdm
13 |   - pip:
14 |     - opencv-python-headless
15 | 


--------------------------------------------------------------------------------
/face_detection/model/multibox_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from utils.box_utils import match, log_sum_exp
  5 | 
  6 | 
  7 | class MultiBoxLoss(nn.Module):
  8 |     """SSD Weighted Loss Function
  9 |     Compute Targets:
 10 |         1) Produce Confidence Target Indices by matching  ground truth boxes
 11 |            with (default) 'priorboxes' that have jaccard index > threshold parameter
 12 |            (default threshold: 0.5).
 13 |         2) Produce localization target by 'encoding' variance into offsets of ground
 14 |            truth boxes and their matched  'priorboxes'.
 15 |         3) Hard negative mining to filter the excessive number of negative examples
 16 |            that comes with using a large number of default bounding boxes.
 17 |            (default negative:positive ratio 3:1)
 18 |     Objective Loss:
 19 |         L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
 20 |         Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
 21 |         weighted by α which is set to 1 by cross val.
 22 |         Args:
 23 |             c: class confidences,
 24 |             l: predicted boxes,
 25 |             g: ground truth boxes
 26 |             N: number of matched default boxes
 27 |         See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 28 |     """
 29 | 
 30 |     def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target):
 31 |         super().__init__()
 32 |         self.num_classes = num_classes
 33 |         self.threshold = overlap_thresh
 34 |         self.background_label = bkg_label
 35 |         self.encode_target = encode_target
 36 |         self.use_prior_for_matching = prior_for_matching
 37 |         self.do_neg_mining = neg_mining
 38 |         self.negpos_ratio = neg_pos
 39 |         self.neg_overlap = neg_overlap
 40 |         self.variance = [0.1, 0.2]
 41 | 
 42 |     def forward(self, predictions, priors, targets):
 43 |         """Multibox Loss
 44 |         Args:
 45 |             predictions (tuple): A tuple containing loc preds, conf preds,
 46 |             and prior boxes from SSD net.
 47 |                 loc shape: torch.size(batch_size,num_priors,4)
 48 |                 conf shape: torch.size(batch_size,num_priors,num_classes)
 49 |                 landm shape: torch.size(batch_size,num_priors,10)
 50 |                 priors shape: torch.size(num_priors,4)
 51 | 
 52 |             ground_truth (tensor): Ground truth boxes and labels for a batch,
 53 |                 shape: [batch_size,num_objs,5] (last idx is the label).
 54 |         """
 55 | 
 56 |         loc_data, conf_data, landm_data = predictions
 57 |         priors = priors
 58 |         num = loc_data.size(0)
 59 |         num_priors = (priors.size(0))
 60 |         device = loc_data.device
 61 | 
 62 |         # match priors (default boxes) and ground truth boxes
 63 |         loc_t = torch.Tensor(num, num_priors, 4)
 64 |         landm_t = torch.Tensor(num, num_priors, 10)
 65 |         conf_t = torch.LongTensor(num, num_priors)
 66 |         for idx in range(num):
 67 |             truths = targets[idx][:, :4].data
 68 |             labels = targets[idx][:, -1].data
 69 |             landms = targets[idx][:, 4:14].data
 70 |             defaults = priors.data
 71 |             match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx)
 72 |         
 73 |         loc_t = loc_t.to(device)
 74 |         conf_t = conf_t.to(device)
 75 |         landm_t = landm_t.to(device)
 76 |         zeros = torch.tensor(0, device=device)
 77 | 
 78 |         # NOTE: landm Loss (Smooth L1)
 79 |         # Shape: [batch,num_priors,10]
 80 |         pos1 = conf_t > zeros
 81 |         num_pos_landm = pos1.long().sum(1, keepdim=True)
 82 |         N1 = max(num_pos_landm.data.sum().float(), 1)
 83 |         pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
 84 |         landm_p = landm_data[pos_idx1].view(-1, 10)
 85 |         landm_t = landm_t[pos_idx1].view(-1, 10)
 86 |         loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')
 87 | 
 88 | 
 89 |         pos = conf_t != zeros
 90 |         conf_t[pos] = 1
 91 | 
 92 |         # NOTE: Localization Loss (Smooth L1)
 93 |         # Shape: [batch,num_priors,4]
 94 |         pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
 95 |         loc_p = loc_data[pos_idx].view(-1, 4)
 96 |         loc_t = loc_t[pos_idx].view(-1, 4)
 97 |         loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
 98 | 
 99 |         # Compute max conf across batch for hard negative mining
100 |         batch_conf = conf_data.view(-1, self.num_classes)
101 |         loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
102 | 
103 |         # NOTE: Hard Negative Mining
104 |         loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
105 |         loss_c = loss_c.view(num, -1)
106 |         _, loss_idx = loss_c.sort(1, descending=True)
107 |         _, idx_rank = loss_idx.sort(1)
108 |         num_pos = pos.long().sum(1, keepdim=True)
109 |         num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
110 |         neg = idx_rank < num_neg.expand_as(idx_rank)
111 | 
112 |         # Confidence Loss Including Positive and Negative Examples
113 |         pos_idx = pos.unsqueeze(2).expand_as(conf_data)
114 |         neg_idx = neg.unsqueeze(2).expand_as(conf_data)
115 |         conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
116 |         targets_weighted = conf_t[(pos+neg).gt(0)]
117 |         loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
118 | 
119 |         # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
120 |         N = max(num_pos.data.sum().float(), 1)
121 |         loss_l /= N
122 |         loss_c /= N
123 |         loss_landm /= N1
124 | 
125 |         return loss_l, loss_c, loss_landm
126 | 


--------------------------------------------------------------------------------
/face_detection/model/networks.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | def conv3_bn(inp, oup, stride):
  7 |     return nn.Sequential(
  8 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
  9 |         nn.BatchNorm2d(oup),
 10 |     )
 11 | 
 12 | 
 13 | def conv3_bn_lrelu(inp, oup, stride=1, leaky=0):
 14 |     return nn.Sequential(
 15 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 16 |         nn.BatchNorm2d(oup),
 17 |         nn.LeakyReLU(negative_slope=leaky, inplace=True),
 18 |     )
 19 | 
 20 | 
 21 | def conv1_bn(inp, oup, stride, leaky=0):
 22 |     return nn.Sequential(
 23 |         nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
 24 |         nn.BatchNorm2d(oup),
 25 |         nn.LeakyReLU(negative_slope=leaky, inplace=True),
 26 |     )
 27 | 
 28 | 
 29 | def conv_dw(inp, oup, stride, leaky=0.1):
 30 |     return nn.Sequential(
 31 |         nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
 32 |         nn.BatchNorm2d(inp),
 33 |         nn.LeakyReLU(negative_slope=leaky, inplace=True),
 34 | 
 35 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 36 |         nn.BatchNorm2d(oup),
 37 |         nn.LeakyReLU(negative_slope=leaky, inplace=True),
 38 |     )
 39 | 
 40 | 
 41 | class SSH(nn.Module):
 42 |     # SSH: Single Stage Headless Face Detector
 43 |     # https://arxiv.org/abs/1708.03979
 44 |     def __init__(self, in_channels, out_channels):
 45 |         super().__init__()
 46 |         assert out_channels % 4 == 0
 47 |         leaky = 0
 48 |         if (out_channels <= 64):
 49 |             leaky = 0.1
 50 |         self.conv3X3 = conv3_bn(in_channels, out_channels//2, stride=1)
 51 | 
 52 |         self.conv5X5_1 = conv3_bn_lrelu(in_channels, out_channels//4, stride=1, leaky=leaky)
 53 |         self.conv5X5_2 = conv3_bn(out_channels//4, out_channels//4, stride=1)
 54 | 
 55 |         self.conv7X7_2 = conv3_bn_lrelu(out_channels//4, out_channels//4, stride=1, leaky=leaky)
 56 |         self.conv7x7_3 = conv3_bn(out_channels//4, out_channels//4, stride=1)
 57 | 
 58 |     def forward(self, input):
 59 |         conv3X3 = self.conv3X3(input)
 60 | 
 61 |         conv5X5_1 = self.conv5X5_1(input)
 62 |         conv5X5 = self.conv5X5_2(conv5X5_1)
 63 | 
 64 |         conv7X7_2 = self.conv7X7_2(conv5X5_1)
 65 |         conv7X7 = self.conv7x7_3(conv7X7_2)
 66 | 
 67 |         out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
 68 |         out = F.relu(out)
 69 |         return out
 70 | 
 71 | 
 72 | class MobileNetV1(nn.Module):
 73 |     def __init__(self, num_classes=1000, width=0.25):
 74 |         super().__init__()
 75 |         self.stage1 = nn.Sequential(
 76 |             conv3_bn_lrelu(3, round(width*32), 2, leaky=0.1), # 3
 77 |             conv_dw(round(width*32), round(width*64), 1),    # 7
 78 |             conv_dw(round(width*64), round(width*128), 2),   # 11
 79 |             conv_dw(round(width*128), round(width*128), 1),  # 19
 80 |             conv_dw(round(width*128), round(width*256), 2),  # 27
 81 |             conv_dw(round(width*256), round(width*256), 1),  # 43
 82 |         )
 83 |         self.stage2 = nn.Sequential(
 84 |             conv_dw(round(width*256), round(width*512), 2),  # 43 + 16 = 59
 85 |             conv_dw(round(width*512), round(width*512), 1),  # 59 + 32 = 91
 86 |             conv_dw(round(width*512), round(width*512), 1),  # 91 + 32 = 123
 87 |             conv_dw(round(width*512), round(width*512), 1),  # 123 + 32 = 155
 88 |             conv_dw(round(width*512), round(width*512), 1),  # 155 + 32 = 187
 89 |             conv_dw(round(width*512), round(width*512), 1),  # 187 + 32 = 219
 90 |         )
 91 |         self.stage3 = nn.Sequential(
 92 |             conv_dw(round(width*512), round(width*1024), 2),  # 219 +3 2 = 241
 93 |             conv_dw(round(width*1024), round(width*1024), 1), # 241 + 64 = 301
 94 |         )
 95 |         self.avg = nn.AdaptiveAvgPool2d((1,1))
 96 |         self.fc = nn.Linear(256, num_classes)
 97 | 
 98 |     def forward(self, x):
 99 |         x = self.stage1(x)
100 |         x = self.stage2(x)
101 |         x = self.stage3(x)
102 |         x = self.avg(x).view(-1, 256)
103 |         x = self.fc(x)
104 |         return x
105 | 


--------------------------------------------------------------------------------
/face_detection/model/prior_box.py:
--------------------------------------------------------------------------------
 1 | from itertools import product
 2 | from math import ceil
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class PriorBox:
 8 |     def __init__(self, cfg, image_size=None):
 9 |         self.min_sizes = cfg['min_sizes']
10 |         self.steps = cfg['steps']
11 |         self.clip = cfg['clip']
12 |         self.image_size = image_size
13 |         self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
14 |         self.name = "s"
15 | 
16 |     def forward(self):
17 |         anchors = []
18 |         for k, f in enumerate(self.feature_maps):
19 |             min_sizes = self.min_sizes[k]
20 |             for i, j in product(range(f[0]), range(f[1])):
21 |                 for min_size in min_sizes:
22 |                     s_kx = min_size / self.image_size[1]
23 |                     s_ky = min_size / self.image_size[0]
24 |                     dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
25 |                     dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
26 |                     for cy, cx in product(dense_cy, dense_cx):
27 |                         anchors += [cx, cy, s_kx, s_ky]
28 | 
29 |         # back to torch land
30 |         output = torch.Tensor(anchors).view(-1, 4)
31 |         if self.clip:
32 |             output.clamp_(min=0, max=1)
33 |         return output
34 | 


--------------------------------------------------------------------------------
/face_detection/model/retinaface.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torchvision.models.feature_extraction import create_feature_extractor
  5 | from torchvision.models import quantization
  6 | from torchvision.ops import FeaturePyramidNetwork
  7 | 
  8 | from .networks import SSH, MobileNetV1
  9 | 
 10 | 
 11 | class ClassHead(nn.Conv2d):
 12 |     def __init__(self, in_channels=512, num_anchors=3):
 13 |         super().__init__(in_channels, num_anchors*2, kernel_size=1)
 14 |         self.num_anchors = num_anchors
 15 | 
 16 |     def forward(self, input):
 17 |         out = self._conv_forward(input, self.weight, self.bias)
 18 |         out = out.permute(0, 2, 3, 1).contiguous()
 19 |         return out.view(out.size(0), -1, 2)
 20 | 
 21 | 
 22 | class BboxHead(nn.Conv2d):
 23 |     def __init__(self, in_channels=512, num_anchors=3):
 24 |         super().__init__(in_channels, num_anchors*4, kernel_size=1)
 25 | 
 26 |     def forward(self, input):
 27 |         out = self._conv_forward(input, self.weight, self.bias)
 28 |         out = out.permute(0, 2, 3, 1).contiguous()
 29 |         return out.view(out.size(0), -1, 4)
 30 | 
 31 | 
 32 | class LandmarkHead(nn.Conv2d):
 33 |     def __init__(self, in_channels=512, num_anchors=3):
 34 |         super().__init__(in_channels, num_anchors*10, kernel_size=1)
 35 | 
 36 |     def forward(self, input):
 37 |         out = self._conv_forward(input, self.weight, self.bias)
 38 |         out = out.permute(0, 2, 3, 1).contiguous()
 39 |         return out.view(out.size(0), -1, 10)
 40 | 
 41 | 
 42 | class RetinaFace(nn.Module):
 43 |     def __init__(self, backbone, in_channel, out_channel, **kwargs):
 44 |         super().__init__()
 45 |         assert backbone in ("mobilenet0.25", "resnet50")
 46 |         if backbone == "mobilenet0.25":
 47 |             model = MobileNetV1()
 48 |             ckpt_file = "./weights/mobilenet0.25_pretrain.pt"
 49 |             try:
 50 |                 checkpoint = torch.load(ckpt_file, map_location="cpu")
 51 |                 from collections import OrderedDict
 52 |                 new_state_dict = OrderedDict()
 53 |                 for k, v in checkpoint['state_dict'].items():
 54 |                     name = k[7:]  # remove module.
 55 |                     new_state_dict[name] = v
 56 |                 # load params
 57 |                 model.load_state_dict(new_state_dict)
 58 |             except:
 59 |                 print(f"{ckpt_file} not found!")
 60 |             return_nodes={
 61 |                 "stage1": "feat0",
 62 |                 "stage2": "feat1",
 63 |                 "stage3": "feat2",
 64 |             }
 65 |         else:
 66 |             import torchvision.models as models
 67 |             model = models.resnet50(pretrained=True)
 68 |             return_nodes={
 69 |                 "layer2": "feat0",
 70 |                 "layer3": "feat1",
 71 |                 "layer4": "feat2",
 72 |             }
 73 | 
 74 |         self.body = create_feature_extractor(model, return_nodes=return_nodes)
 75 |         in_channels_stage2 = in_channel
 76 |         in_channels_list = [
 77 |             in_channels_stage2 * 2,
 78 |             in_channels_stage2 * 4,
 79 |             in_channels_stage2 * 8,
 80 |         ]
 81 |         out_channels = out_channel
 82 |         self.fpn = FeaturePyramidNetwork(in_channels_list, out_channels)
 83 |         self.ssh1 = SSH(out_channels, out_channels)
 84 |         self.ssh2 = SSH(out_channels, out_channels)
 85 |         self.ssh3 = SSH(out_channels, out_channels)
 86 | 
 87 |         fpn_num = len(in_channels_list)
 88 |         self.class_head = self._make_class_head(fpn_num=fpn_num, in_channels=out_channels)
 89 |         self.bbox_head = self._make_bbox_head(fpn_num=fpn_num, in_channels=out_channels)
 90 |         self.landmark_head = self._make_landmark_head(fpn_num=fpn_num, in_channels=out_channels)
 91 | 
 92 |     def _make_class_head(self, fpn_num=3, in_channels=64, anchor_num=2):
 93 |         classhead = nn.ModuleList()
 94 |         for i in range(fpn_num):
 95 |             classhead.append(ClassHead(in_channels, anchor_num))
 96 |         return classhead
 97 |     
 98 |     def _make_bbox_head(self, fpn_num=3, in_channels=64, anchor_num=2):
 99 |         bboxhead = nn.ModuleList()
100 |         for i in range(fpn_num):
101 |             bboxhead.append(BboxHead(in_channels, anchor_num))
102 |         return bboxhead
103 | 
104 |     def _make_landmark_head(self, fpn_num=3, in_channels=64, anchor_num=2):
105 |         landmarkhead = nn.ModuleList()
106 |         for i in range(fpn_num):
107 |             landmarkhead.append(LandmarkHead(in_channels, anchor_num))
108 |         return landmarkhead
109 | 
110 |     def forward(self, inputs):
111 |         out = self.body(inputs)
112 | 
113 |         # FPN
114 |         out = self.fpn(out)
115 | 
116 |         # SSH
117 |         feature0 = self.ssh1(out["feat0"])
118 |         feature1 = self.ssh2(out["feat1"])
119 |         feature2 = self.ssh3(out["feat2"])
120 | 
121 |         bbox_regressions = torch.cat([
122 |             self.bbox_head[0](feature0),
123 |             self.bbox_head[1](feature1),
124 |             self.bbox_head[2](feature2),
125 |         ], dim=1)
126 | 
127 |         classifications = torch.cat([
128 |             self.class_head[0](feature0),
129 |             self.class_head[1](feature1),
130 |             self.class_head[2](feature2),
131 |         ], dim=1)
132 | 
133 |         lm_regressions = torch.cat([
134 |             self.landmark_head[0](feature0),
135 |             self.landmark_head[1](feature1),
136 |             self.landmark_head[2](feature2),
137 |         ], dim=1)
138 | 
139 |         if not self.training:
140 |             classifications = F.softmax(classifications, dim=-1)
141 |         return bbox_regressions, classifications, lm_regressions
142 | 
143 |     def fuse_model(self) -> None:
144 |         for m in self.modules():
145 |             if type(m) == quantization.mobilenetv2.QuantizableMobileNetV2:
146 |                 m.fuse_model()
147 |             elif type(m) == quantization.mobilenetv3.QuantizableMobileNetV3:
148 |                 m.fuse_model()
149 | 


--------------------------------------------------------------------------------
/face_detection/test_fddb.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import argparse
  3 | import os
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | import torch
  8 | from torchvision.ops import nms
  9 | 
 10 | from model.prior_box import PriorBox
 11 | from model.retinaface import RetinaFace
 12 | from utils.box_utils import decode, decode_landm
 13 | from utils.misc import draw_keypoint
 14 | from utils.timer import Timer
 15 | 
 16 | parser = argparse.ArgumentParser(description='Retinaface')
 17 | parser.add_argument(
 18 |     '--checkpoint', type=str,
 19 |     default='./weights/mobilenet0.25_final.pt',
 20 |     help='Trained state_dict file path to open'
 21 | )
 22 | parser.add_argument('--save-folder', default='eval/', type=str, help='Dir to save results')
 23 | parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference')
 24 | parser.add_argument('--jit', action="store_true", default=False, help='Use JIT')
 25 | parser.add_argument('--confidence-threshold', default=0.02, type=float, help='confidence_threshold')
 26 | parser.add_argument('--top-k', default=5000, type=int, help='top_k')
 27 | parser.add_argument('--nms-threshold', default=0.4, type=float, help='nms_threshold')
 28 | parser.add_argument('--keep-top-k', default=750, type=int, help='keep_top_k')
 29 | parser.add_argument('-s', '--save-image', action="store_true", default=False, help='show detection results')
 30 | parser.add_argument('--vis-thres', default=0.5, type=float, help='visualization_threshold')
 31 | 
 32 | 
 33 | def main():
 34 |     args = parser.parse_args()
 35 |     assert os.path.isfile(args.checkpoint)
 36 | 
 37 |     checkpoint = torch.load(args.checkpoint, map_location="cpu")
 38 |     cfg = checkpoint["config"]
 39 |     device = torch.device("cpu" if args.cpu else "cuda")
 40 | 
 41 |     # net and model
 42 |     net = RetinaFace(**cfg)
 43 |     net.load_state_dict(checkpoint["net_state_dict"])
 44 |     net.eval().requires_grad_(False)
 45 |     net.to(device)
 46 |     if args.jit:
 47 |         net = torch.jit.script(net)
 48 |     print('Finished loading model!')
 49 |     torch.backends.cudnn.benchmark = True
 50 | 
 51 |     # save file
 52 |     os.makedirs(args.save_folder, exist_ok=True)
 53 |     fw = open(os.path.join(args.save_folder, 'FDDB_dets.txt'), 'w')
 54 | 
 55 |     # testing dataset
 56 |     testset_folder = 'data/FDDB/images/'
 57 |     testset_list = 'data/FDDB/img_list.txt'
 58 |     with open(testset_list, 'r') as fr:
 59 |         test_dataset = fr.read().split()
 60 |     num_images = len(test_dataset)
 61 | 
 62 |     # testing scale
 63 |     resize = 1
 64 | 
 65 |     _t = {
 66 |         "preprocess": Timer(),
 67 |         "forward": Timer(),
 68 |         "postprocess": Timer(),
 69 |         "misc": Timer(),
 70 |     }
 71 | 
 72 |     # testing begin
 73 |     for i, img_name in enumerate(test_dataset):
 74 |         image_path = testset_folder + img_name + '.jpg'
 75 |         img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
 76 | 
 77 |         # NOTE preprocessing.
 78 |         _t["preprocess"].tic()
 79 |         img = img_raw - (104, 117, 123)
 80 |         if resize != 1:
 81 |             img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
 82 |         im_height, im_width, _ = img.shape
 83 |         scale = torch.as_tensor(
 84 |             [im_width, im_height, im_width, im_height],
 85 |             dtype=torch.float, device=device
 86 |         )
 87 |         img = img.transpose(2, 0, 1)
 88 |         img = np.float32(img)
 89 |         img = torch.from_numpy(img).unsqueeze(0)
 90 |         img = img.to(device)
 91 |         _t["preprocess"].toc()
 92 | 
 93 |         # NOTE forward.
 94 |         _t["forward"].tic()
 95 |         loc, conf, landms = net(img)  # forward pass
 96 |         _t["forward"].toc()
 97 | 
 98 |         # NOTE misc.
 99 |         _t["postprocess"].tic()
100 |         priorbox = PriorBox(cfg, image_size=(im_height, im_width))
101 |         priors = priorbox.forward()
102 |         priors = priors.to(device)
103 |         prior_data = priors.data
104 |         boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
105 |         boxes = boxes * scale / resize
106 |         scores = conf.squeeze(0)[:, 1]
107 | 
108 |         landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
109 |         scale1 = torch.as_tensor(
110 |             [im_width, im_height] * 5, dtype=torch.float, device=device
111 |         )
112 |         scale1 = scale1.to(device)
113 |         landms = landms * scale1 / resize
114 | 
115 |         # ignore low scores
116 |         inds = torch.where(scores > args.confidence_threshold)[0]
117 |         boxes = boxes[inds]
118 |         landms = landms[inds]
119 |         scores = scores[inds]
120 | 
121 |         # keep top-K before NMS
122 |         order = scores.argsort()
123 |         boxes = boxes[order][:args.top_k]
124 |         landms = landms[order][:args.top_k]
125 |         scores = scores[order][:args.top_k]
126 |         _t["postprocess"].toc()
127 | 
128 |         # do NMS
129 |         _t["misc"].tic()
130 |         keep = nms(boxes, scores, args.nms_threshold)
131 |         boxes = boxes[keep]
132 |         scores = scores[keep]
133 |         landms = landms[keep]
134 | 
135 |         boxes = boxes.cpu().numpy()
136 |         scores = scores.cpu().numpy()
137 |         landms = landms.cpu().numpy()
138 |         dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
139 |         dets = np.concatenate((dets, landms), axis=1)
140 |         _t["misc"].toc()
141 | 
142 |         # save dets
143 |         fw.write(f'{img_name:s}\n')
144 |         fw.write(f'{dets.shape[0]:.1f}\n')
145 |         for k in range(dets.shape[0]):
146 |             xmin, ymin, xmax, ymax = dets[k, :4]
147 |             score = dets[k, 4]
148 |             w = xmax - xmin + 1
149 |             h = ymax - ymin + 1
150 |             # fw.write('{:.3f} {:.3f} {:.3f} {:.3f} {:.10f}\n'.format(xmin, ymin, w, h, score))
151 |             fw.write('{:d} {:d} {:d} {:d} {:.10f}\n'.format(int(xmin), int(ymin), int(w), int(h), score))
152 | 
153 |         print(
154 |             f"im_detect: {i+1:d}/{num_images:d}\t"
155 |             f"preprocess_time: {_t['preprocess'].average_time:.4f}s\t"
156 |             f"forward_time: {_t['forward'].average_time:.4f}s\t"
157 |             f"postprocess_time: {_t['postprocess'].average_time:.4f}s\t"
158 |             f"misc_time: {_t['misc'].average_time:.4f}s"
159 |         )
160 | 
161 |         # show image
162 |         if args.save_image:
163 |             draw_keypoint(img_raw, dets, args.vis_thres)
164 |             # save image
165 |             if not os.path.exists("./results/"):
166 |                 os.makedirs("./results/")
167 |             cv2.imwrite(f"./results/{i:05d}.jpg", img_raw)
168 | 
169 |     fw.close()
170 | 
171 | 
172 | if __name__ == "__main__":
173 |     main()
174 | 


--------------------------------------------------------------------------------
/face_detection/test_widerface.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import argparse
  3 | import os
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | import torch
  8 | from torchvision.ops import nms
  9 | 
 10 | from model.prior_box import PriorBox
 11 | from model.retinaface import RetinaFace
 12 | from utils.box_utils import decode, decode_landm
 13 | from utils.misc import draw_keypoint
 14 | from utils.timer import Timer
 15 | 
 16 | parser = argparse.ArgumentParser(description='Retinaface')
 17 | parser.add_argument(
 18 |     '--checkpoint', type=str,
 19 |     default='./weights/mobilenet0.25_final.pt',
 20 |     help='Trained state_dict file path to open'
 21 | )
 22 | parser.add_argument('--origin-size', default=True, type=str, help='Whether use origin image size to evaluate')
 23 | parser.add_argument('--save-folder', default='./widerface_evaluate/widerface_txt/', type=str, help='Dir to save txt results')
 24 | parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference')
 25 | parser.add_argument('--jit', action="store_true", default=False, help='Use JIT')
 26 | parser.add_argument('--dataset-folder', default='./data/widerface/val/images/', type=str, help='dataset path')
 27 | parser.add_argument('--confidence-threshold', default=0.02, type=float, help='confidence_threshold')
 28 | parser.add_argument('--top-k', default=5000, type=int, help='top_k')
 29 | parser.add_argument('--nms-threshold', default=0.4, type=float, help='nms_threshold')
 30 | parser.add_argument('--keep-top-k', default=750, type=int, help='keep_top_k')
 31 | parser.add_argument('-s', '--save-image', action="store_true", default=False, help='show detection results')
 32 | parser.add_argument('--vis-thres', default=0.5, type=float, help='visualization_threshold')
 33 | 
 34 | 
 35 | def main():
 36 |     args = parser.parse_args()
 37 |     assert os.path.isfile(args.checkpoint)
 38 | 
 39 |     checkpoint = torch.load(args.checkpoint, map_location="cpu")
 40 |     cfg = checkpoint["config"]
 41 |     device = torch.device("cpu" if args.cpu else "cuda")
 42 | 
 43 |     # net and model
 44 |     net = RetinaFace(**cfg)
 45 |     net.load_state_dict(checkpoint["net_state_dict"])
 46 |     net.eval().requires_grad_(False)
 47 |     net.to(device)
 48 |     if args.jit:
 49 |         net = torch.jit.script(net)
 50 |     print('Finished loading model!')
 51 |     torch.backends.cudnn.benchmark = True
 52 | 
 53 |     # testing dataset
 54 |     testset_folder = args.dataset_folder
 55 |     testset_list = args.dataset_folder[:-7] + "wider_val.txt"
 56 | 
 57 |     with open(testset_list, 'r') as fr:
 58 |         test_dataset = fr.read().split()
 59 |     num_images = len(test_dataset)
 60 |     os.makedirs("./results/", exist_ok=True)
 61 | 
 62 |     target_size = 1600.0
 63 |     max_size = 2150.0
 64 |     _t = {'forward_pass': Timer(), 'misc': Timer()}
 65 |     # testing begin
 66 |     for i, img_name in enumerate(test_dataset):
 67 |         image_path = testset_folder + img_name
 68 |         img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
 69 |         img = np.float32(img_raw)
 70 | 
 71 |         # testing scale
 72 |         im_shape = img.shape
 73 |         im_size_min = np.min(im_shape[0:2])
 74 |         im_size_max = np.max(im_shape[0:2])
 75 |         resize = target_size / im_size_min
 76 |         # prevent bigger axis from being more than max_size:
 77 |         if np.round(resize * im_size_max) > max_size:
 78 |             resize = float(max_size) / float(im_size_max)
 79 |         if args.origin_size:
 80 |             resize = 1
 81 | 
 82 |         if resize != 1:
 83 |             img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
 84 |         im_height, im_width, _ = img.shape
 85 |         scale = torch.as_tensor(
 86 |             [im_width, im_height, im_width, im_height],
 87 |             dtype=torch.float, device=device
 88 |         )
 89 |         img -= (104, 117, 123)
 90 |         img = img.transpose(2, 0, 1)
 91 |         img = torch.from_numpy(img).unsqueeze(0)
 92 |         img = img.to(device)
 93 | 
 94 |         _t['forward_pass'].tic()
 95 |         loc, conf, landms = net(img)  # forward pass
 96 |         _t['forward_pass'].toc()
 97 |         _t['misc'].tic()
 98 |         priorbox = PriorBox(cfg, image_size=(im_height, im_width))
 99 |         priors = priorbox.forward()
100 |         priors = priors.to(device)
101 |         prior_data = priors.data
102 |         boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
103 |         boxes = boxes * scale / resize
104 |         scores = conf.squeeze(0)[:, 1]
105 |         landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
106 |         scale1 = torch.as_tensor(
107 |             [im_width, im_height] * 5, dtype=torch.float, device=device
108 |         )
109 |         landms = landms * scale1 / resize
110 | 
111 |         # ignore low scores
112 |         inds = torch.where(scores > args.confidence_threshold)[0]
113 |         boxes = boxes[inds]
114 |         landms = landms[inds]
115 |         scores = scores[inds]
116 | 
117 |         # keep top-K before NMS
118 |         order = scores.argsort()
119 |         boxes = boxes[order][:args.top_k]
120 |         landms = landms[order][:args.top_k]
121 |         scores = scores[order][:args.top_k]
122 | 
123 |         # do NMS
124 |         keep = nms(boxes, scores, args.nms_threshold)
125 |         boxes = boxes[keep]
126 |         scores = scores[keep]
127 |         landms = landms[keep]
128 | 
129 |         boxes = boxes.cpu().numpy()
130 |         scores = scores.cpu().numpy()
131 |         landms = landms.cpu().numpy()
132 |         dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
133 |         dets = np.concatenate((dets, landms), axis=1)
134 |         _t['misc'].toc()
135 | 
136 |         # --------------------------------------------------------------------
137 |         save_name = args.save_folder + img_name[:-4] + ".txt"
138 |         dirname = os.path.dirname(save_name)
139 |         if not os.path.isdir(dirname):
140 |             os.makedirs(dirname)
141 |         with open(save_name, "w") as fd:
142 |             bboxs = dets
143 |             file_name = os.path.basename(save_name)[:-4] + "\n"
144 |             bboxs_num = str(len(bboxs)) + "\n"
145 |             fd.write(file_name)
146 |             fd.write(bboxs_num)
147 |             for box in bboxs:
148 |                 x = int(box[0])
149 |                 y = int(box[1])
150 |                 w = int(box[2]) - int(box[0])
151 |                 h = int(box[3]) - int(box[1])
152 |                 confidence = str(box[4])
153 |                 line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + confidence + " \n"
154 |                 fd.write(line)
155 | 
156 |         print(f"im_detect: {i+1:d}/{num_images:d}"
157 |               f"forward_pass_time: {_t['forward_pass'].average_time:.4f}s misc: {_t['misc'].average_time:.4f}s")
158 | 
159 |         # save image
160 |         if args.save_image:
161 |             draw_keypoint(img_raw, dets, args.vis_thres)
162 | 
163 |             # save image
164 |             cv2.imwrite(f"./results/{i:05d}.jpg", img_raw)
165 | 
166 | 
167 | if __name__ == "__main__":
168 |     main()
169 | 


--------------------------------------------------------------------------------
/face_detection/train_detector.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import argparse
  3 | import datetime
  4 | import math
  5 | import os
  6 | import time
  7 | 
  8 | import torch
  9 | 
 10 | from data import WiderFaceDetection, cfg_mnet, cfg_re50, preproc
 11 | from model.multibox_loss import MultiBoxLoss
 12 | from model.prior_box import PriorBox
 13 | from model.retinaface import RetinaFace
 14 | 
 15 | parser = argparse.ArgumentParser(description='Retinaface Training')
 16 | parser.add_argument('--dataset', default='./data/widerface/train/label.txt', help='Training dataset directory')
 17 | parser.add_argument('--network', default='mobilenet0.25', choices={"mobilenet0.25", "resnet50"})
 18 | parser.add_argument('--batch-size', default=32, help='Batch size')
 19 | parser.add_argument('--num-workers', default=4, type=int, help='Number of workers used in dataloading')
 20 | parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, help='initial learning rate')
 21 | parser.add_argument('--momentum', default=0.9, type=float, help='momentum')
 22 | parser.add_argument('--resume-net', default=None, help='resume net for retraining')
 23 | parser.add_argument('--resume-epoch', default=0, type=int, help='resume iter for retraining')
 24 | parser.add_argument('--weight-decay', default=5e-4, type=float, help='Weight decay for SGD')
 25 | parser.add_argument('--gamma', default=0.1, type=float, help='Gamma update for SGD')
 26 | parser.add_argument('--save-folder', default='./weights/', help='Location to save checkpoint models')
 27 | args = parser.parse_args()
 28 | 
 29 | 
 30 | os.makedirs(args.save_folder, exist_ok=True)
 31 | if args.network == "mobilenet0.25":
 32 |     cfg = cfg_mnet
 33 | elif args.network == "resnet50":
 34 |     cfg = cfg_re50
 35 | 
 36 | RGB_MEAN = (104, 117, 123) # bgr order
 37 | img_dim = cfg['image_size']
 38 | batch_size = cfg['batch_size']
 39 | max_epoch = cfg['epoch']
 40 | 
 41 | initial_lr = args.lr
 42 | gamma = args.gamma
 43 | training_dataset = args.dataset
 44 | save_folder = args.save_folder
 45 | 
 46 | 
 47 | def initialize_network(cfg, checkpoint=None, print_net=False):
 48 |     net = RetinaFace(**cfg)
 49 |     if print_net:
 50 |         print("Printing net...")
 51 |         print(net)
 52 |     if checkpoint is not None:
 53 |         print('Loading resume network...')
 54 |         net.load_state_dict(checkpoint["net_state_dict"])
 55 | 
 56 |     if torch.cuda.is_available():
 57 |         net.cuda()
 58 |     num_gpu = torch.cuda.device_count()
 59 |     if num_gpu > 1:
 60 |         net = torch.nn.DataParallel(net)
 61 |     return cfg, net
 62 | 
 63 | 
 64 | def training_loop(net, optimizer, criterion, dataloader, cfg):
 65 |     assert isinstance(net, torch.nn.Module)
 66 |     assert isinstance(optimizer, torch.optim.Optimizer)
 67 |     assert isinstance(dataloader, torch.utils.data.DataLoader)
 68 |     assert isinstance(cfg, dict)
 69 | 
 70 |     priorbox = PriorBox(cfg, image_size=(cfg['image_size'],)*2)
 71 |     with torch.no_grad():
 72 |         priors = priorbox.forward()
 73 |         priors = priors.cuda()
 74 | 
 75 |     net.train()
 76 |     epoch = 0 + args.resume_epoch
 77 |     print('Loading Dataset...')
 78 | 
 79 |     epoch_size = math.ceil(len(dataloader))
 80 |     max_iter = max_epoch * epoch_size
 81 | 
 82 |     stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size)
 83 |     step_index = 0
 84 | 
 85 |     start_iter = 0
 86 |     if args.resume_epoch > 0:
 87 |         start_iter += args.resume_epoch * epoch_size
 88 | 
 89 |     for iteration in range(start_iter, max_iter):
 90 |         load_t0 = time.perf_counter()
 91 |         if iteration in stepvalues:
 92 |             step_index += 1
 93 |         lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size)
 94 | 
 95 |         # load train data
 96 |         try:
 97 |             images, targets = next(batch_iterator)
 98 |         except:
 99 |             batch_iterator = iter(dataloader)
100 |             if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']):
101 |                 net_state_dict = net.module.state_dict() if hasattr(net, "module") else net.state_dict()
102 |                 torch.save(
103 |                     {
104 |                         "net_state_dict": net_state_dict,
105 |                         "epoch": epoch,
106 |                         "config": cfg,
107 |                     }, save_folder + f"{cfg['backbone']}_epoch{epoch:03d}.pt"
108 |                 )
109 |             epoch += 1
110 |             images, targets = next(batch_iterator)
111 | 
112 |         images = images.cuda()
113 |         targets = [anno.cuda() for anno in targets]
114 | 
115 |         # forward
116 |         out = net(images)
117 | 
118 |         # backprop
119 |         optimizer.zero_grad(set_to_none=True)
120 |         loss_l, loss_c, loss_landm = criterion(out, priors, targets)
121 |         loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm
122 |         loss.backward()
123 |         optimizer.step()
124 | 
125 |         load_t1 = time.perf_counter()
126 |         if (iteration + 1) % 10 == 0:
127 |             batch_time = load_t1 - load_t0
128 |             eta = int(batch_time * (max_iter - iteration))
129 |             print(
130 |                 f"Epoch:{epoch:03d}/{max_epoch:03d} "
131 |                 f'|| Epochiter: {(iteration % epoch_size)+1}/{epoch_size} '
132 |                 f'|| Iter: {iteration+1}/{max_iter} '
133 |                 f'|| Loc: {loss_l.item():.3f} Cla: {loss_c.item():.3f} Landm: {loss_landm.item():.3f} '
134 |                 f'|| LR: {lr:.8f} || Batchtime: {batch_time:.4f} s '
135 |                 f'|| ETA: {str(datetime.timedelta(seconds=eta))}'
136 |             )
137 | 
138 |     net_state_dict = net.module.state_dict() if hasattr(net, "module") else net.state_dict()
139 |     torch.save(
140 |         {
141 |             "net_state_dict": net_state_dict,
142 |             "epoch": epoch,
143 |             "config": cfg,
144 |         }, save_folder + f"{cfg['backbone']}_final.pt"
145 |     )
146 | 
147 | 
148 | def adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size):
149 |     """Sets the learning rate
150 |     # Adapted from PyTorch Imagenet example:
151 |     # https://github.com/pytorch/examples/blob/master/imagenet/main.py
152 |     """
153 |     warmup_epoch = -1
154 |     if epoch <= warmup_epoch:
155 |         lr = 1e-6 + (initial_lr-1e-6) * iteration / (epoch_size * warmup_epoch)
156 |     else:
157 |         lr = initial_lr * (gamma ** (step_index))
158 |     for param_group in optimizer.param_groups:
159 |         param_group['lr'] = lr
160 |     return lr
161 | 
162 | 
163 | def main():
164 |     if args.resume_net is not None and os.path.isfile(args.resume_net):
165 |         checkpoint = torch.load(args.resume_net, map_location="cpu")
166 |         cfg = checkpoint["config"]
167 |     else:
168 |         checkpoint = None
169 |         if args.network == "mobilenet0.25":
170 |             cfg = cfg_mnet
171 |         elif args.network == "resnet50":
172 |             cfg = cfg_re50
173 | 
174 |     cfg, net = initialize_network(cfg, checkpoint)
175 |     torch.backends.cudnn.benchmark = True
176 | 
177 |     optimizer = torch.optim.SGD(
178 |         net.parameters(), lr=initial_lr,
179 |         momentum=args.momentum, weight_decay=args.weight_decay,
180 |     )
181 |     criterion = MultiBoxLoss(2, 0.35, True, 0, True, 7, 0.35, False)
182 |     
183 |     dataset = WiderFaceDetection(training_dataset, preproc(img_dim, RGB_MEAN))
184 |     dataloader = torch.utils.data.DataLoader(
185 |         dataset, batch_size, shuffle=True,
186 |         num_workers=args.num_workers, collate_fn=dataset.collate,
187 |     )
188 | 
189 |     training_loop(net, optimizer, criterion, dataloader, cfg)
190 | 
191 | 
192 | if __name__ == '__main__':
193 |     main()
194 | 


--------------------------------------------------------------------------------
/face_detection/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/utils/__init__.py


--------------------------------------------------------------------------------
/face_detection/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import torch
 4 | from torchvision.ops import nms
 5 | 
 6 | from .box_utils import decode, decode_landm
 7 | 
 8 | 
 9 | def draw_keypoint(image, dets, threshold):
10 |     for b in dets:
11 |         if b[4] < threshold:
12 |             continue
13 |         text = f"{b[4]:.4f}"
14 |         b = list(map(round, b))
15 |         cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
16 |         cx = b[0]
17 |         cy = b[1] + 12
18 |         cv2.putText(
19 |             image, text, (cx, cy),
20 |             cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)
21 |         )
22 | 
23 |         # landms
24 |         cv2.circle(image, (b[5], b[6]), 1, (0, 0, 255), 4)
25 |         cv2.circle(image, (b[7], b[8]), 1, (0, 255, 255), 4)
26 |         cv2.circle(image, (b[9], b[10]), 1, (255, 0, 255), 4)
27 |         cv2.circle(image, (b[11], b[12]), 1, (0, 255, 0), 4)
28 |         cv2.circle(image, (b[13], b[14]), 1, (255, 0, 0), 4)
29 | 
30 | 
31 | def inference(
32 |     network, image, scale, scale1, prior_data,
33 |     cfg, confidence_threshold, nms_threshold, device
34 | ):
35 |     img = image - (104, 117, 123)
36 |     img = img.transpose(2, 0, 1)
37 |     img = np.float32(img)
38 |     img = torch.from_numpy(img).unsqueeze(0)
39 |     img = img.to(device)
40 | 
41 |     loc, conf, landms = network(img)  # forward pass
42 | 
43 |     boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
44 |     boxes *= scale
45 |     scores = conf.squeeze(0)[:, 1]
46 |     landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
47 |     landms *= scale1
48 | 
49 |     # ignore low scores
50 |     inds = torch.where(scores > confidence_threshold)[0]
51 |     boxes = boxes[inds]
52 |     landms = landms[inds]
53 |     scores = scores[inds]
54 | 
55 |     # do NMS
56 |     keep = nms(boxes, scores, nms_threshold)
57 |     boxes = boxes[keep]
58 |     scores = scores[keep]
59 |     landms = landms[keep]
60 | 
61 |     boxes = boxes.cpu().numpy()
62 |     scores = scores.cpu().numpy()
63 |     landms = landms.cpu().numpy()
64 |     dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
65 |     dets = np.concatenate((dets, landms), axis=1)
66 |     return dets


--------------------------------------------------------------------------------
/face_detection/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer:
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |     def tic(self):
21 |         # using time.time instead of time.clock because time time.clock
22 |         # does not normalize for multithreading
23 |         self.start_time = time.perf_counter()
24 | 
25 |     def toc(self, average=True):
26 |         self.diff = time.perf_counter() - self.start_time
27 |         self.total_time += self.diff
28 |         self.calls += 1
29 |         self.average_time = self.total_time / self.calls
30 |         if average:
31 |             return self.average_time
32 |         else:
33 |             return self.diff
34 | 
35 |     def clear(self):
36 |         self.total_time = 0.
37 |         self.calls = 0
38 |         self.start_time = 0.
39 |         self.diff = 0.
40 |         self.average_time = 0.
41 | 


--------------------------------------------------------------------------------
/face_detection/webcam_demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import argparse
  3 | import os
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | import torch
  8 | import torch.backends.cudnn as cudnn
  9 | 
 10 | from model.prior_box import PriorBox
 11 | from model.retinaface import RetinaFace
 12 | from utils.misc import draw_keypoint, inference
 13 | 
 14 | parser = argparse.ArgumentParser(description='Retinaface')
 15 | parser.add_argument(
 16 |     '--checkpoint', type=str,
 17 |     default='./weights/mobilenet0.25_final.pt',
 18 |     help='Trained state_dict file path to open'
 19 | )
 20 | parser.add_argument(
 21 |     '--cpu', action="store_true", default=False,
 22 |     help='Use cpu inference'
 23 | )
 24 | parser.add_argument(
 25 |     '--jit', action="store_true", default=False,
 26 |     help='Use JIT'
 27 | )
 28 | parser.add_argument(
 29 |     '--confidence-threshold', type=float, default=0.02,
 30 |     help='confidence_threshold'
 31 | )
 32 | parser.add_argument(
 33 |     '--nms-threshold', type=float, default=0.4,
 34 |     help='nms_threshold'
 35 | )
 36 | parser.add_argument(
 37 |     '--vis-thres', type=float, default=0.5,
 38 |     help='visualization_threshold'
 39 | )
 40 | parser.add_argument(
 41 |     '-s', '--save-image', action="store_true", default=False,
 42 |     help='show detection results'
 43 | )
 44 | parser.add_argument(
 45 |     '--save-dir', type=str, default='demo',
 46 |     help='Dir to save results'
 47 | )
 48 | 
 49 | 
 50 | def main():
 51 |     args = parser.parse_args()
 52 |     assert os.path.isfile(args.checkpoint)
 53 | 
 54 |     checkpoint = torch.load(args.checkpoint, map_location="cpu")
 55 |     cfg = checkpoint["config"]
 56 |     device = torch.device("cpu" if args.cpu else "cuda")
 57 | 
 58 |     # net and model
 59 |     net = RetinaFace(**cfg)
 60 |     net.load_state_dict(checkpoint["net_state_dict"])
 61 |     net.eval().requires_grad_(False)
 62 |     net.to(device)
 63 |     print('Finished loading model!')
 64 |     cudnn.benchmark = True
 65 | 
 66 |     # prepare testing
 67 |     cap = cv2.VideoCapture(0)
 68 |     assert cap.isOpened()
 69 |     ret_val, img_tmp = cap.read()
 70 |     im_height, im_width, _ = img_tmp.shape
 71 |     scale = torch.Tensor([im_width, im_height, im_width, im_height])
 72 |     scale = scale.to(device)
 73 | 
 74 |     scale1 = torch.Tensor([im_width, im_height] * 5)
 75 |     scale1 = scale1.to(device)
 76 | 
 77 |     priorbox = PriorBox(cfg, image_size=(im_height, im_width))
 78 |     priors = priorbox.forward()
 79 |     priors = priors.to(device)
 80 |     prior_data = priors.data
 81 | 
 82 |     if args.jit:
 83 |         img_tmp = img_tmp.transpose(2, 0, 1)
 84 |         img_tmp = np.float32(img_tmp)
 85 |         img_tmp = torch.from_numpy(img_tmp).unsqueeze(0)
 86 |         dummy = img_tmp.to(device)
 87 |         net = torch.jit.trace(net, example_inputs=dummy)
 88 | 
 89 |     if args.save_image:
 90 |         nframe = 0
 91 |         fname = os.path.join(args.save_dir, "{:06d}.jpg")
 92 |         os.makedirs(args.save_dir, exist_ok=True)
 93 | 
 94 |     # testing begin
 95 |     ret_val, img_raw = cap.read()
 96 |     while ret_val:
 97 |         start = cv2.getTickCount()
 98 | 
 99 |         # NOTE preprocessing.
100 |         dets = inference(
101 |             net, img_raw, scale, scale1, prior_data, cfg,
102 |             args.confidence_threshold, args.nms_threshold, device
103 |         )
104 | 
105 |         fps = float(cv2.getTickFrequency() / (cv2.getTickCount() - start))
106 |         print(f"runtime: {fps:.1f} sec/iter")
107 |         cv2.putText(
108 |             img_raw, f"FPS: {fps:.1f}", (5, 15),
109 |             cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)
110 |         )
111 | 
112 |         # show image
113 |         draw_keypoint(img_raw, dets, args.vis_thres)
114 | 
115 |         if args.save_image:
116 |             cv2.imwrite(fname.format(nframe), img_raw)
117 |             nframe += 1
118 | 
119 |         cv2.imshow("Face Detection Demo", img_raw)
120 |         if cv2.waitKey(1) == 27:  # Press ESC button to quit.
121 |             break
122 | 
123 |         ret_val, img_raw = cap.read()
124 | 
125 |     cap.release()
126 |     cv2.destroyAllWindows()
127 | 
128 | 
129 | if __name__ == "__main__":
130 |     main()


--------------------------------------------------------------------------------
/face_detection/weights/mobilenet0.25_final.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/weights/mobilenet0.25_final.pt


--------------------------------------------------------------------------------
/face_detection/weights/mobilenet0.25_pretrain.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/weights/mobilenet0.25_pretrain.pt


--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/README.md:
--------------------------------------------------------------------------------
 1 | # WiderFace-Evaluation
 2 | Python Evaluation Code for [Wider Face Dataset](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/)
 3 | 
 4 | 
 5 | ## Usage
 6 | 
 7 | 
 8 | ##### before evaluating ....
 9 | 
10 | ````
11 | python3 setup.py build_ext --inplace
12 | ````
13 | 
14 | ##### evaluating
15 | 
16 | **GroungTruth:** `wider_face_val.mat`, `wider_easy_val.mat`, `wider_medium_val.mat`,`wider_hard_val.mat`
17 | 
18 | ````
19 | python3 evaluation.py -p <your prediction dir> -g <groud truth dir>
20 | ````
21 | 
22 | ## Bugs & Problems
23 | please issue
24 | 
25 | ## Acknowledgements
26 | 
27 | some code borrowed from Sergey Karayev
28 | 


--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/box_overlaps.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps


--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/evaluation.py:
--------------------------------------------------------------------------------
  1 | """
  2 | WiderFace evaluation code
  3 | author: wondervictor
  4 | mail: tianhengcheng@gmail.com
  5 | copyright@wondervictor
  6 | """
  7 | 
  8 | import os
  9 | import tqdm
 10 | import pickle
 11 | import argparse
 12 | import numpy as np
 13 | from scipy.io import loadmat
 14 | from bbox import bbox_overlaps
 15 | from IPython import embed
 16 | 
 17 | 
 18 | def get_gt_boxes(gt_dir):
 19 |     """ gt dir: (wider_face_val.mat, wider_easy_val.mat, wider_medium_val.mat, wider_hard_val.mat)"""
 20 | 
 21 |     gt_mat = loadmat(os.path.join(gt_dir, 'wider_face_val.mat'))
 22 |     hard_mat = loadmat(os.path.join(gt_dir, 'wider_hard_val.mat'))
 23 |     medium_mat = loadmat(os.path.join(gt_dir, 'wider_medium_val.mat'))
 24 |     easy_mat = loadmat(os.path.join(gt_dir, 'wider_easy_val.mat'))
 25 | 
 26 |     facebox_list = gt_mat['face_bbx_list']
 27 |     event_list = gt_mat['event_list']
 28 |     file_list = gt_mat['file_list']
 29 | 
 30 |     hard_gt_list = hard_mat['gt_list']
 31 |     medium_gt_list = medium_mat['gt_list']
 32 |     easy_gt_list = easy_mat['gt_list']
 33 | 
 34 |     return facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list
 35 | 
 36 | 
 37 | def get_gt_boxes_from_txt(gt_path, cache_dir):
 38 | 
 39 |     cache_file = os.path.join(cache_dir, 'gt_cache.pkl')
 40 |     if os.path.exists(cache_file):
 41 |         f = open(cache_file, 'rb')
 42 |         boxes = pickle.load(f)
 43 |         f.close()
 44 |         return boxes
 45 | 
 46 |     f = open(gt_path, 'r')
 47 |     state = 0
 48 |     lines = f.readlines()
 49 |     lines = list(map(lambda x: x.rstrip('\r\n'), lines))
 50 |     boxes = {}
 51 |     print(len(lines))
 52 |     f.close()
 53 |     current_boxes = []
 54 |     current_name = None
 55 |     for line in lines:
 56 |         if state == 0 and '--' in line:
 57 |             state = 1
 58 |             current_name = line
 59 |             continue
 60 |         if state == 1:
 61 |             state = 2
 62 |             continue
 63 | 
 64 |         if state == 2 and '--' in line:
 65 |             state = 1
 66 |             boxes[current_name] = np.array(current_boxes).astype('float32')
 67 |             current_name = line
 68 |             current_boxes = []
 69 |             continue
 70 | 
 71 |         if state == 2:
 72 |             box = [float(x) for x in line.split(' ')[:4]]
 73 |             current_boxes.append(box)
 74 |             continue
 75 | 
 76 |     f = open(cache_file, 'wb')
 77 |     pickle.dump(boxes, f)
 78 |     f.close()
 79 |     return boxes
 80 | 
 81 | 
 82 | def read_pred_file(filepath):
 83 | 
 84 |     with open(filepath, 'r') as f:
 85 |         lines = f.readlines()
 86 |         img_file = lines[0].rstrip('\n\r')
 87 |         lines = lines[2:]
 88 | 
 89 |     # b = lines[0].rstrip('\r\n').split(' ')[:-1]
 90 |     # c = float(b)
 91 |     # a = map(lambda x: [[float(a[0]), float(a[1]), float(a[2]), float(a[3]), float(a[4])] for a in x.rstrip('\r\n').split(' ')], lines)
 92 |     boxes = []
 93 |     for line in lines:
 94 |         line = line.rstrip('\r\n').split(' ')
 95 |         if line[0] is '':
 96 |             continue
 97 |         # a = float(line[4])
 98 |         boxes.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), float(line[4])])
 99 |     boxes = np.array(boxes)
100 |     # boxes = np.array(list(map(lambda x: [float(a) for a in x.rstrip('\r\n').split(' ')], lines))).astype('float')
101 |     return img_file.split('/')[-1], boxes
102 | 
103 | 
104 | def get_preds(pred_dir):
105 |     events = os.listdir(pred_dir)
106 |     boxes = dict()
107 |     pbar = tqdm.tqdm(events)
108 | 
109 |     for event in pbar:
110 |         pbar.set_description('Reading Predictions ')
111 |         event_dir = os.path.join(pred_dir, event)
112 |         event_images = os.listdir(event_dir)
113 |         current_event = dict()
114 |         for imgtxt in event_images:
115 |             imgname, _boxes = read_pred_file(os.path.join(event_dir, imgtxt))
116 |             current_event[imgname.rstrip('.jpg')] = _boxes
117 |         boxes[event] = current_event
118 |     return boxes
119 | 
120 | 
121 | def norm_score(pred):
122 |     """ norm score
123 |     pred {key: [[x1,y1,x2,y2,s]]}
124 |     """
125 | 
126 |     max_score = 0
127 |     min_score = 1
128 | 
129 |     for _, k in pred.items():
130 |         for _, v in k.items():
131 |             if len(v) == 0:
132 |                 continue
133 |             _min = np.min(v[:, -1])
134 |             _max = np.max(v[:, -1])
135 |             max_score = max(_max, max_score)
136 |             min_score = min(_min, min_score)
137 | 
138 |     diff = max_score - min_score
139 |     for _, k in pred.items():
140 |         for _, v in k.items():
141 |             if len(v) == 0:
142 |                 continue
143 |             v[:, -1] = (v[:, -1] - min_score)/diff
144 | 
145 | 
146 | def image_eval(pred, gt, ignore, iou_thresh):
147 |     """ single image evaluation
148 |     pred: Nx5
149 |     gt: Nx4
150 |     ignore:
151 |     """
152 | 
153 |     _pred = pred.copy()
154 |     _gt = gt.copy()
155 |     pred_recall = np.zeros(_pred.shape[0])
156 |     recall_list = np.zeros(_gt.shape[0])
157 |     proposal_list = np.ones(_pred.shape[0])
158 | 
159 |     _pred[:, 2] = _pred[:, 2] + _pred[:, 0]
160 |     _pred[:, 3] = _pred[:, 3] + _pred[:, 1]
161 |     _gt[:, 2] = _gt[:, 2] + _gt[:, 0]
162 |     _gt[:, 3] = _gt[:, 3] + _gt[:, 1]
163 | 
164 |     overlaps = bbox_overlaps(_pred[:, :4], _gt)
165 | 
166 |     for h in range(_pred.shape[0]):
167 | 
168 |         gt_overlap = overlaps[h]
169 |         max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax()
170 |         if max_overlap >= iou_thresh:
171 |             if ignore[max_idx] == 0:
172 |                 recall_list[max_idx] = -1
173 |                 proposal_list[h] = -1
174 |             elif recall_list[max_idx] == 0:
175 |                 recall_list[max_idx] = 1
176 | 
177 |         r_keep_index = np.where(recall_list == 1)[0]
178 |         pred_recall[h] = len(r_keep_index)
179 |     return pred_recall, proposal_list
180 | 
181 | 
182 | def img_pr_info(thresh_num, pred_info, proposal_list, pred_recall):
183 |     pr_info = np.zeros((thresh_num, 2)).astype('float')
184 |     for t in range(thresh_num):
185 | 
186 |         thresh = 1 - (t+1)/thresh_num
187 |         r_index = np.where(pred_info[:, 4] >= thresh)[0]
188 |         if len(r_index) == 0:
189 |             pr_info[t, 0] = 0
190 |             pr_info[t, 1] = 0
191 |         else:
192 |             r_index = r_index[-1]
193 |             p_index = np.where(proposal_list[:r_index+1] == 1)[0]
194 |             pr_info[t, 0] = len(p_index)
195 |             pr_info[t, 1] = pred_recall[r_index]
196 |     return pr_info
197 | 
198 | 
199 | def dataset_pr_info(thresh_num, pr_curve, count_face):
200 |     _pr_curve = np.zeros((thresh_num, 2))
201 |     for i in range(thresh_num):
202 |         _pr_curve[i, 0] = pr_curve[i, 1] / pr_curve[i, 0]
203 |         _pr_curve[i, 1] = pr_curve[i, 1] / count_face
204 |     return _pr_curve
205 | 
206 | 
207 | def voc_ap(rec, prec):
208 | 
209 |     # correct AP calculation
210 |     # first append sentinel values at the end
211 |     mrec = np.concatenate(([0.], rec, [1.]))
212 |     mpre = np.concatenate(([0.], prec, [0.]))
213 | 
214 |     # compute the precision envelope
215 |     for i in range(mpre.size - 1, 0, -1):
216 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
217 | 
218 |     # to calculate area under PR curve, look for points
219 |     # where X axis (recall) changes value
220 |     i = np.where(mrec[1:] != mrec[:-1])[0]
221 | 
222 |     # and sum (\Delta recall) * prec
223 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
224 |     return ap
225 | 
226 | 
227 | def evaluation(pred, gt_path, iou_thresh=0.5):
228 |     pred = get_preds(pred)
229 |     norm_score(pred)
230 |     facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list = get_gt_boxes(gt_path)
231 |     event_num = len(event_list)
232 |     thresh_num = 1000
233 |     settings = ['easy', 'medium', 'hard']
234 |     setting_gts = [easy_gt_list, medium_gt_list, hard_gt_list]
235 |     aps = []
236 |     for setting_id in range(3):
237 |         # different setting
238 |         gt_list = setting_gts[setting_id]
239 |         count_face = 0
240 |         pr_curve = np.zeros((thresh_num, 2)).astype('float')
241 |         # [hard, medium, easy]
242 |         pbar = tqdm.tqdm(range(event_num))
243 |         for i in pbar:
244 |             pbar.set_description('Processing {}'.format(settings[setting_id]))
245 |             event_name = str(event_list[i][0][0])
246 |             img_list = file_list[i][0]
247 |             pred_list = pred[event_name]
248 |             sub_gt_list = gt_list[i][0]
249 |             # img_pr_info_list = np.zeros((len(img_list), thresh_num, 2))
250 |             gt_bbx_list = facebox_list[i][0]
251 | 
252 |             for j in range(len(img_list)):
253 |                 pred_info = pred_list[str(img_list[j][0][0])]
254 | 
255 |                 gt_boxes = gt_bbx_list[j][0].astype('float')
256 |                 keep_index = sub_gt_list[j][0]
257 |                 count_face += len(keep_index)
258 | 
259 |                 if len(gt_boxes) == 0 or len(pred_info) == 0:
260 |                     continue
261 |                 ignore = np.zeros(gt_boxes.shape[0])
262 |                 if len(keep_index) != 0:
263 |                     ignore[keep_index-1] = 1
264 |                 pred_recall, proposal_list = image_eval(pred_info, gt_boxes, ignore, iou_thresh)
265 | 
266 |                 _img_pr_info = img_pr_info(thresh_num, pred_info, proposal_list, pred_recall)
267 | 
268 |                 pr_curve += _img_pr_info
269 |         pr_curve = dataset_pr_info(thresh_num, pr_curve, count_face)
270 | 
271 |         propose = pr_curve[:, 0]
272 |         recall = pr_curve[:, 1]
273 | 
274 |         ap = voc_ap(recall, propose)
275 |         aps.append(ap)
276 | 
277 |     print("==================== Results ====================")
278 |     print("Easy   Val AP: {}".format(aps[0]))
279 |     print("Medium Val AP: {}".format(aps[1]))
280 |     print("Hard   Val AP: {}".format(aps[2]))
281 |     print("=================================================")
282 | 
283 | 
284 | if __name__ == '__main__':
285 | 
286 |     parser = argparse.ArgumentParser()
287 |     parser.add_argument('-p', '--pred', default="./widerface_txt/")
288 |     parser.add_argument('-g', '--gt', default='./ground_truth/')
289 | 
290 |     args = parser.parse_args()
291 |     evaluation(args.pred, args.gt)
292 | 
293 | 
294 | 
295 | 
296 | 
297 | 
298 | 
299 | 
300 | 
301 | 
302 | 
303 | 
304 | 


--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/ground_truth/wider_easy_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/widerface_evaluate/ground_truth/wider_easy_val.mat


--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/ground_truth/wider_face_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/widerface_evaluate/ground_truth/wider_face_val.mat


--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/ground_truth/wider_hard_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/widerface_evaluate/ground_truth/wider_hard_val.mat


--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/ground_truth/wider_medium_val.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/face_detection/widerface_evaluate/ground_truth/wider_medium_val.mat


--------------------------------------------------------------------------------
/face_detection/widerface_evaluate/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WiderFace evaluation code
 3 | author: wondervictor
 4 | mail: tianhengcheng@gmail.com
 5 | copyright@wondervictor
 6 | """
 7 | 
 8 | from distutils.core import setup, Extension
 9 | from Cython.Build import cythonize
10 | import numpy
11 | 
12 | package = Extension('bbox', ['box_overlaps.pyx'], include_dirs=[numpy.get_include()])
13 | setup(ext_modules=cythonize([package]))
14 | 


--------------------------------------------------------------------------------
/face_recognition/config.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | configurations = {
 4 |     1: dict(
 5 |         SEED = 1993, # random seed for reproduce results
 6 | 
 7 |         DATA_ROOT = '../DATA', # the parent root where your train/val/test data are stored
 8 |         MODEL_ROOT = '../CHECKPOINT', # the root to buffer your checkpoints
 9 |         LOG_ROOT = '../LOG', # the root to log your train/val status
10 |         BACKBONE_RESUME_ROOT = '../CHECKPOINT/Backbone_IR_152_Epoch_112.pth', # the root to resume training from a saved checkpoint
11 |         HEAD_RESUME_ROOT = '../CHECKPOINT/Head_ArcFace_Epoch_112.pth', # the root to resume training from a saved checkpoint
12 | 
13 |         BACKBONE_NAME = 'IR_50', # support: ['ResNet_50', 'ResNet_101', 'ResNet_152', 'IR_50', 'IR_101', 'IR_152', 'IR_SE_50', 'IR_SE_101', 'IR_SE_152']
14 |         HEAD_NAME = 'ArcFace', # support:  ['Softmax', 'ArcFace', 'CosFace', 'SphereFace', 'Am_softmax']
15 |         LOSS_NAME = 'Focal', # support: ['Focal', 'Softmax']
16 | 
17 |         INPUT_SIZE = [112, 112], # support: [112, 112] and [224, 224]
18 |         RGB_MEAN = [0.5, 0.5, 0.5], # for normalize inputs to [-1, 1]
19 |         RGB_STD = [0.5, 0.5, 0.5],
20 |         EMBEDDING_SIZE = 1024, # feature dimension
21 |         BATCH_SIZE = 256*8,
22 |         DROP_LAST = True, # whether drop the last batch to ensure consistent batch_norm statistics
23 |         LR = 0.1, # initial LR
24 |         NUM_EPOCH = 125, # total epoch number (use the firt 1/25 epochs to warm up)
25 |         WEIGHT_DECAY = 5e-4, # do not apply to batch_norm parameters
26 |         MOMENTUM = 0.9,
27 |         STAGES = [35, 65, 95], # epoch stages to decay learning rate
28 | 
29 |         DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
30 |         MULTI_GPU = True, # flag to use multiple GPUs; if you choose to train with single GPU, you should first run "export CUDA_VISILE_DEVICES=device_id" to specify the GPU card you want to use
31 |          GPU_ID = [0, 1, 2, 3, 4, 5, 6, 7], # specify your GPU ids
32 |         #GPU_ID = [0], # specify your GPU ids
33 |         PIN_MEMORY = True,
34 |         NUM_WORKERS = 0
35 | )
36 | }
37 | 


--------------------------------------------------------------------------------
/gaze_estimation/README.md:
--------------------------------------------------------------------------------
 1 | # IR_Driver_Gaze_Estimation
 2 | 
 3 | Implementation of gaze estimation using IR camera images with CNN. 
 4 | 
 5 | In this repository, light model version of gaze estimation (caffe, tensorflow and pytorch) and heavy model version
 6 | 
 7 | * input : 120 x 100 grayscale face image
 8 | * Light version : use 120 x 100 grayscale image for global estimator
 9 | * Heavy version : use 120 x 100 grayscale image for global estimator and crop it to 80 x 100 image for local estimator
10 | * Heavy+Att version : add attention mask to heavy version
11 | 
12 | 
13 | ## CAFFE version
14 | Light model version is supported
15 | 
16 | -TRAINING from Scratch-
17 | > bin\caffe train --solver=ir_gaze_solver.prototxt --gpu=0
18 | 
19 | -TRAINING from Weights-
20 | > bin\caffe train --solver=ir_gaze_solver.prototxt --weights=caffemodels/***.caffemodel --gpu=0
21 | 
22 | 
23 | 
24 | ## TENSORFLOW version
25 | Light model version is supported
26 | 
27 | -TRAINING/EVALUATION from Scratch-
28 | > python train.py 
29 | 
30 | -PREDICT-
31 | >python test_sequences.py
32 | 
33 | 
34 | 
35 | ## PYTORCH version
36 | Modify config.py for various options (such as batch size, gpu index, ..)
37 | 
38 | -TRAINING-
39 | > python train.py


--------------------------------------------------------------------------------
/gaze_estimation/example_movie/media2_slow.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-IMLAB/PIMNet_Internal_Environment_Recognition/c0569c94302926638238bfd7fd7859402954cc67/gaze_estimation/example_movie/media2_slow.avi


--------------------------------------------------------------------------------
/gaze_estimation/v1_caffe_model/ir_gaze_deploy.prototxt:
--------------------------------------------------------------------------------
  1 | ###############################################################################
  2 | ## 20172258 Cha Dongmin
  3 | ################################################################################
  4 | 
  5 | name: "IR_GAZE_ESTIMATION"
  6 | input: "data"
  7 | input_dim: 1 # batch size
  8 | input_dim: 1
  9 | input_dim: 100
 10 | input_dim: 120
 11 | 
 12 | layer {
 13 |   name: "data"
 14 |   type: "HDF5Data"
 15 |   top: "data"
 16 |   top: "label"
 17 |   hdf5_data_param {
 18 |     source: "list_train.txt"
 19 |     batch_size: 32
 20 |   }
 21 | }
 22 | 
 23 | 
 24 | 
 25 | layer {
 26 | 	name: "conv1"
 27 | 	type: "Convolution"
 28 | 	bottom: "data"
 29 | 	top: "conv1"
 30 | 	param {
 31 | 		lr_mult: 1.0
 32 | 	}
 33 | 	param {
 34 | 		lr_mult: 2.0
 35 | 	}
 36 | 	convolution_param {
 37 | 		num_output: 40
 38 | 		kernel_size: 7
 39 | 		stride: 2
 40 | 	}
 41 | }
 42 | 
 43 | 
 44 | 
 45 | layer {
 46 | 	name: "relu1"
 47 | 	type: "ReLU"
 48 | 	bottom: "conv1"
 49 | 	top: "conv1"
 50 | }
 51 | 
 52 | 
 53 | layer {
 54 | 	name: "pool1"
 55 | 	type: "Pooling"
 56 | 	bottom: "conv1"
 57 | 	top: "pool1"
 58 | 	pooling_param {
 59 | 		kernel_size: 3
 60 | 		stride: 2
 61 | 		pool: MAX
 62 | 	}
 63 | }
 64 | 
 65 | 
 66 | layer {
 67 | 	name: "conv2"
 68 | 	type: "Convolution"
 69 | 	bottom: "pool1"
 70 | 	top: "conv2"
 71 | 	param {
 72 | 		lr_mult: 1.0
 73 | 	}
 74 | 	param {
 75 | 		lr_mult: 2.0
 76 | 	}
 77 | 	convolution_param {
 78 | 		num_output: 70
 79 | 		kernel_size: 5
 80 | 		pad: 1
 81 | 		stride: 2
 82 | 	}
 83 | }
 84 | 
 85 | 
 86 | layer {
 87 | 	name: "relu2"
 88 | 	type: "ReLU"
 89 | 	bottom: "conv2"
 90 | 	top: "conv2"
 91 | }
 92 | 
 93 | 
 94 | 
 95 | 
 96 | layer {
 97 | 	name: "pool2"
 98 | 	type: "Pooling"
 99 | 	bottom: "conv2"
100 | 	top: "pool2"
101 | 	pooling_param {
102 | 		kernel_size: 2
103 | 		stride: 2
104 | 		pool: MAX
105 | 	}
106 | }
107 | 
108 | 
109 | layer {
110 | 	name: "conv3"
111 | 	type: "Convolution"
112 | 	bottom: "pool2"
113 | 	top: "conv3"
114 | 	param {
115 | 		lr_mult: 1.0
116 | 	}
117 | 	param {
118 | 		lr_mult: 2.0
119 | 	}
120 | 	convolution_param {
121 | 		num_output: 60
122 | 		kernel_size: 3
123 | 		pad: 1
124 | 	}
125 | }
126 | 
127 | 
128 | 
129 | layer {
130 | 	name: "relu3"
131 | 	type: "ReLU"
132 | 	bottom: "conv3"
133 | 	top: "conv3"
134 | }
135 | 
136 | layer {
137 | 	name: "pool3"
138 | 	type: "Pooling"
139 | 	bottom: "conv3"
140 | 	top: "pool3"
141 | 	pooling_param {
142 | 		kernel_size: 2
143 | 		stride: 2
144 | 		pool: MAX
145 | 	}
146 | }
147 | 
148 | 
149 | 
150 | layer {
151 | 	name: "conv4"
152 | 	type: "Convolution"
153 | 	bottom: "pool3"
154 | 	top: "conv4"
155 | 	param {
156 | 		lr_mult: 1.0
157 | 	}
158 | 	param {
159 | 		lr_mult: 2.0
160 | 	}
161 | 	convolution_param {
162 | 		num_output: 80
163 | 		kernel_size: 3
164 | 		pad: 1
165 | 	}
166 | }
167 | 
168 | 
169 | layer {
170 | 	name: "relu4"
171 | 	type: "ReLU"
172 | 	bottom: "conv4"
173 | 	top: "conv4"
174 | }
175 | 
176 | layer {
177 | 	name: "pool4"
178 | 	type: "Pooling"
179 | 	bottom: "conv4"
180 | 	top: "pool4"
181 | 	pooling_param {
182 | 		kernel_size: 2
183 | 		stride: 2
184 | 		pool: MAX
185 | 	}
186 | }
187 | 
188 | 
189 | 
190 | layer {
191 | 	name: "conv5"
192 | 	type: "Convolution"
193 | 	bottom: "pool4"
194 | 	top: "conv5"
195 | 	param {
196 | 		lr_mult: 1.0
197 | 	}
198 | 	param {
199 | 		lr_mult: 2.0
200 | 	}
201 | 	convolution_param {
202 | 		num_output: 100
203 | 		kernel_size: 3
204 | 		pad: 1
205 | 	}
206 | }
207 | 
208 | 
209 | layer {
210 | 	name: "relu5"
211 | 	type: "ReLU"
212 | 	bottom: "conv5"
213 | 	top: "conv5"
214 | }
215 | 
216 | 
217 | layer {
218 | 	name: "pool5"
219 | 	type: "Pooling"
220 | 	bottom: "conv5"
221 | 	top: "pool5"
222 | 	pooling_param {
223 | 		kernel_size: 2
224 | 		stride: 2
225 | 		pool: MAX
226 | 	}
227 | }
228 | 
229 | 
230 | layer {
231 | 	name: "concat1"
232 | 	bottom: "conv5"
233 | 	bottom: "pool4"
234 | 	top: "concat1"
235 | 	type: "Concat"
236 | 	concat_param {
237 | 		axis: 1
238 | 	}
239 | }
240 | 
241 | 
242 | 
243 | layer {
244 |   name: "fc1"
245 |   type: "InnerProduct"
246 |   bottom: "concat1"
247 |   top: "fc1"
248 |   inner_product_param {
249 |     num_output: 4000
250 |   }
251 | }
252 | 
253 | 
254 | layer {
255 | 	name: "relu6"
256 | 	type: "ReLU"
257 | 	bottom: "fc1"
258 | 	top: "fc1"
259 | }
260 | 
261 | 
262 | layer {
263 | 	name: "drop1"
264 | 	type: "Dropout"
265 | 	bottom: "fc1"
266 | 	top: "fc1"
267 | 	dropout_param {
268 | 		dropout_ratio: 0.5
269 | 	}
270 | 
271 | 
272 | }
273 | 
274 | 
275 | 
276 | 
277 | layer {
278 |   name: "fc2"
279 |   type: "InnerProduct"
280 |   bottom: "fc1"
281 |   top: "fc2"
282 | 
283 | param{
284 | 	lr_mult: 10
285 | 	decay_mult: 1
286 | 	}
287 | param{
288 | 	lr_mult: 20
289 | 	decay_mult: 0
290 | 	}
291 |   inner_product_param {
292 |     num_output: 6
293 |     weight_filler {
294 |       type: "xavier"
295 |     }
296 |     bias_filler {
297 |     type: "constant"
298 |     value: 0.0
299 |     }
300 |   }
301 | }
302 | 
303 | layer {
304 | 	name: "prob"
305 | 	type: "Softmax"
306 | 	bottom: "fc2"
307 | 	top: "prob"
308 | 
309 | }
310 | 


--------------------------------------------------------------------------------
/gaze_estimation/v1_caffe_model/ir_gaze_solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "ir_gaze_train_val.prototxt"
 2 | base_lr: 0.001
 3 | lr_policy: "step"
 4 | gamma: 0.1
 5 | stepsize: 30000
 6 | display: 20
 7 | max_iter: 40000
 8 | momentum: 0.9
 9 | weight_decay: 0.0005
10 | ## We disable standard caffe solver snapshotting and implement our own snapshot
11 | #snapshot: 0
12 | snapshot: 5000
13 | snapshot_prefix: "GAZE"
14 | #debug_info: true
15 | 
16 | 


--------------------------------------------------------------------------------
/gaze_estimation/v1_caffe_model/ir_gaze_train_val.prototxt:
--------------------------------------------------------------------------------
  1 | ###############################################################################
  2 | ## 20172258 Cha Dongmin
  3 | ################################################################################
  4 | 
  5 | name: "IR_GAZE_ESTIMATION"
  6 | 
  7 | #input: "data"
  8 | #input_dim: BATCH
  9 | #input_dim: 1
 10 | #input_dim: 100
 11 | #input_dim: 120
 12 | 
 13 | layer {
 14 |   name: "data"
 15 |   type: "HDF5Data"
 16 |   top: "data"
 17 |   top: "label"
 18 |   hdf5_data_param {
 19 |     source: "list_train.txt"
 20 |     batch_size: 32
 21 |   }
 22 | }
 23 | 
 24 | 
 25 | 
 26 | layer {
 27 | 	name: "conv1"
 28 | 	type: "Convolution"
 29 | 	bottom: "data"
 30 | 	top: "conv1"
 31 | 	param {
 32 | 		lr_mult: 1.0
 33 | 	}
 34 | 	param {
 35 | 		lr_mult: 2.0
 36 | 	}
 37 | 	convolution_param {
 38 | 		num_output: 40
 39 | 		kernel_size: 7
 40 | 		stride: 2
 41 | 	}
 42 | }
 43 | 
 44 | 
 45 | 
 46 | layer {
 47 | 	name: "relu1"
 48 | 	type: "ReLU"
 49 | 	bottom: "conv1"
 50 | 	top: "conv1"
 51 | }
 52 | 
 53 | 
 54 | layer {
 55 | 	name: "pool1"
 56 | 	type: "Pooling"
 57 | 	bottom: "conv1"
 58 | 	top: "pool1"
 59 | 	pooling_param {
 60 | 		kernel_size: 3
 61 | 		stride: 2
 62 | 		pool: MAX
 63 | 	}
 64 | }
 65 | 
 66 | 
 67 | layer {
 68 | 	name: "conv2"
 69 | 	type: "Convolution"
 70 | 	bottom: "pool1"
 71 | 	top: "conv2"
 72 | 	param {
 73 | 		lr_mult: 1.0
 74 | 	}
 75 | 	param {
 76 | 		lr_mult: 2.0
 77 | 	}
 78 | 	convolution_param {
 79 | 		num_output: 70
 80 | 		kernel_size: 5
 81 | 		pad: 1
 82 | 		stride: 2
 83 | 	}
 84 | }
 85 | 
 86 | 
 87 | layer {
 88 | 	name: "relu2"
 89 | 	type: "ReLU"
 90 | 	bottom: "conv2"
 91 | 	top: "conv2"
 92 | }
 93 | 
 94 | 
 95 | 
 96 | 
 97 | layer {
 98 | 	name: "pool2"
 99 | 	type: "Pooling"
100 | 	bottom: "conv2"
101 | 	top: "pool2"
102 | 	pooling_param {
103 | 		kernel_size: 2
104 | 		stride: 2
105 | 		pool: MAX
106 | 	}
107 | }
108 | 
109 | 
110 | layer {
111 | 	name: "conv3"
112 | 	type: "Convolution"
113 | 	bottom: "pool2"
114 | 	top: "conv3"
115 | 	param {
116 | 		lr_mult: 1.0
117 | 	}
118 | 	param {
119 | 		lr_mult: 2.0
120 | 	}
121 | 	convolution_param {
122 | 		num_output: 60
123 | 		kernel_size: 3
124 | 		pad: 1
125 | 	}
126 | }
127 | 
128 | 
129 | 
130 | layer {
131 | 	name: "relu3"
132 | 	type: "ReLU"
133 | 	bottom: "conv3"
134 | 	top: "conv3"
135 | }
136 | 
137 | layer {
138 | 	name: "pool3"
139 | 	type: "Pooling"
140 | 	bottom: "conv3"
141 | 	top: "pool3"
142 | 	pooling_param {
143 | 		kernel_size: 2
144 | 		stride: 2
145 | 		pool: MAX
146 | 	}
147 | }
148 | 
149 | 
150 | 
151 | layer {
152 | 	name: "conv4"
153 | 	type: "Convolution"
154 | 	bottom: "pool3"
155 | 	top: "conv4"
156 | 	param {
157 | 		lr_mult: 1.0
158 | 	}
159 | 	param {
160 | 		lr_mult: 2.0
161 | 	}
162 | 	convolution_param {
163 | 		num_output: 80
164 | 		kernel_size: 3
165 | 		pad: 1
166 | 	}
167 | }
168 | 
169 | 
170 | layer {
171 | 	name: "relu4"
172 | 	type: "ReLU"
173 | 	bottom: "conv4"
174 | 	top: "conv4"
175 | }
176 | 
177 | layer {
178 | 	name: "pool4"
179 | 	type: "Pooling"
180 | 	bottom: "conv4"
181 | 	top: "pool4"
182 | 	pooling_param {
183 | 		kernel_size: 2
184 | 		stride: 2
185 | 		pool: MAX
186 | 	}
187 | }
188 | 
189 | 
190 | 
191 | layer {
192 | 	name: "conv5"
193 | 	type: "Convolution"
194 | 	bottom: "pool4"
195 | 	top: "conv5"
196 | 	param {
197 | 		lr_mult: 1.0
198 | 	}
199 | 	param {
200 | 		lr_mult: 2.0
201 | 	}
202 | 	convolution_param {
203 | 		num_output: 100
204 | 		kernel_size: 3
205 | 		pad: 1
206 | 	}
207 | }
208 | 
209 | 
210 | layer {
211 | 	name: "relu5"
212 | 	type: "ReLU"
213 | 	bottom: "conv5"
214 | 	top: "conv5"
215 | }
216 | 
217 | 
218 | layer {
219 | 	name: "pool5"
220 | 	type: "Pooling"
221 | 	bottom: "conv5"
222 | 	top: "pool5"
223 | 	pooling_param {
224 | 		kernel_size: 2
225 | 		stride: 2
226 | 		pool: MAX
227 | 	}
228 | }
229 | 
230 | 
231 | layer {
232 | 	name: "concat1"
233 | 	bottom: "conv5"
234 | 	bottom: "pool4"
235 | 	top: "concat1"
236 | 	type: "Concat"
237 | 	concat_param {
238 | 		axis: 1
239 | 	}
240 | }
241 | 
242 | 
243 | 
244 | layer {
245 |   name: "fc1"
246 |   type: "InnerProduct"
247 |   bottom: "concat1"
248 |   top: "fc1"
249 |   inner_product_param {
250 |     num_output: 4000
251 |   }
252 | }
253 | 
254 | 
255 | layer {
256 | 	name: "relu6"
257 | 	type: "ReLU"
258 | 	bottom: "fc1"
259 | 	top: "fc1"
260 | }
261 | 
262 | 
263 | layer {
264 | 	name: "drop1"
265 | 	type: "Dropout"
266 | 	bottom: "fc1"
267 | 	top: "fc1"
268 | 	dropout_param {
269 | 		dropout_ratio: 0.5
270 | 	}
271 | 
272 | 
273 | }
274 | 
275 | 
276 | 
277 | 
278 | layer {
279 |   name: "fc2"
280 |   type: "InnerProduct"
281 |   bottom: "fc1"
282 |   top: "fc2"
283 | 
284 | param{
285 | 	lr_mult: 10
286 | 	decay_mult: 1
287 | 	}
288 | param{
289 | 	lr_mult: 20
290 | 	decay_mult: 0
291 | 	}
292 |   inner_product_param {
293 |     num_output: 6
294 |     weight_filler {
295 |       type: "xavier"
296 |     }
297 |     bias_filler {
298 |     type: "constant"
299 |     value: 0.0
300 |     }
301 |   }
302 | }
303 | 
304 | layer {
305 | 	name: "loss"
306 | 	type: "SoftmaxWithLoss"
307 | 	bottom:"fc2"
308 | 	bottom:"label"
309 | 	top:"loss"
310 | }


--------------------------------------------------------------------------------
/gaze_estimation/v2_tensorflow_model/model.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorflow.contrib.slim as slim
  3 | import numpy as np
  4 | from opt import *
  5 | 
  6 | 
  7 | 
  8 | def gazenetwork(features, labels, mode):
  9 | 
 10 |     # image : [batch, 100, 120, 1]
 11 |     input = tf.reshape(features["x"], [-1, 100, 120, 1])
 12 | 
 13 |     # 트레이닝 때는 드롭아웃 적용
 14 |     if mode == tf.estimator.ModeKeys.TRAIN:
 15 |         dropout = 0.5
 16 |     else:
 17 |         dropout = 1.0
 18 | 
 19 | 
 20 | 
 21 |     # * conv는 기본적으로 SAME PADDING
 22 |     # H0
 23 |     h0 = lrelu(conv2d(input, output_dim=40, ks=7, s=2, name='h0_conv'))
 24 |     h0 = slim.max_pool2d(h0, kernel_size=3, stride=2, scope='h0_pool')
 25 | 
 26 |     # H1
 27 |     h1 = lrelu(conv2d(h0, output_dim=70, ks=5, s=2, name='h1_conv'))
 28 |     h1 = slim.max_pool2d(h1, kernel_size=2, stride=2, scope='h1_pool')
 29 | 
 30 |     # H2
 31 |     h2 = lrelu(conv2d(h1, output_dim=60, ks=3, s=1, name='h2_conv'))
 32 |     h2 = slim.max_pool2d(h2, kernel_size=2, stride=2, scope='h2_pool')
 33 | 
 34 |     # H3
 35 |     h3 = lrelu(conv2d(h2, output_dim=80, ks=3, s=1, name='h3_conv'))
 36 |     h3 = slim.max_pool2d(h3 , kernel_size=2, stride=2, scope='h3_pool')
 37 | 
 38 |     # H4
 39 |     h4 = lrelu(conv2d(h3, output_dim=100, ks=3, s=1, name='h4_conv'))
 40 | 
 41 |     # h3 & h4 concatenate
 42 |     h3_flat = slim.flatten(h3, scope="h3_flat")
 43 |     h4_flat = slim.flatten(h4, scope="h4_flat")
 44 |     h_concat = tf.concat([h3_flat, h4_flat], 1, name='h3_h4_concat')
 45 | 
 46 |     # start of fc
 47 |     fc1 = slim.fully_connected(h_concat, 4000, scope="fc1")
 48 |     fc1_dropout = slim.dropout(fc1, dropout)
 49 |     logits = slim.fully_connected(fc1_dropout, 6, activation_fn=None, scope="logits")
 50 |     class_logits = tf.argmax(input=logits, axis=1)
 51 | 
 52 |     # softmax 거침
 53 |     #softmax
 54 |     predictions = {"classes" : tf.argmax(input=logits, axis=1),
 55 |                 "probabilities" : tf.nn.softmax(logits, name="softmax_tensor")}
 56 |     #predictions = tf.nn.softmax(logits, name='predictions')
 57 | 
 58 | 
 59 | 
 60 | 
 61 |     if mode == tf.estimator.ModeKeys.PREDICT:
 62 |         return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
 63 | 
 64 |     loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
 65 |     #accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions)
 66 | 
 67 | 
 68 |     #in TRAINING mode,
 69 |     if mode == tf.estimator.ModeKeys.TRAIN:
 70 |         optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.005)
 71 |         train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
 72 | 
 73 |         # Training 모드의 EstimatorSpec을 출력해야 한다. EstimatorSpec은 mode, loss, train_op를 포함하여야 한다.
 74 |         # train_po는 loss의 optimizer을 minimization 하는 것
 75 |         return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
 76 | 
 77 |     #in PREDICT mode.
 78 |     if mode == tf.estimator.ModeKeys.PREDICT:
 79 |         out_predictions = {
 80 |             "classes": tf.argmax(input=logits, axis=1),
 81 |             "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
 82 |         }
 83 | 
 84 |         #out_predictions = {"logits": logits}
 85 |         return tf.estimator.EstimatorSpec(mode=mode, predictions=out_predictions)
 86 | 
 87 |     #in EVAL mode.
 88 |     print(labels)
 89 |     print(class_logits)
 90 |     eval_ops = {"accuracy" : tf.metrics.accuracy(labels=labels, predictions=class_logits)}
 91 | 
 92 |     # Eval 모드의 EstimatorSpec을 출력해야 한다. EstimatorSpec은 mode, loss, eval_ops를 포함하여야 한다.
 93 |     # eval은 accuracy
 94 |     return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_ops)
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/gaze_estimation/v2_tensorflow_model/opt.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import random
  4 | import math
  5 | from glob import glob
  6 | from PIL import Image
  7 | 
  8 | import tensorflow.contrib.slim as slim
  9 | 
 10 | 
 11 | BATCH_SIZE = 256
 12 | IMG_WIDTH = 120
 13 | IMG_HEIGHT = 100
 14 | CHANNEL_N  = 1
 15 | CLASS_N = 6
 16 | 
 17 | def load_img_and_label_from_npy(image_npy, label_npy):
 18 |     images = load_np(image_npy)
 19 |     labels = load_np(label_npy)
 20 | 
 21 |     return images, labels
 22 | 
 23 | def load_images(train_ratio=0.95, test_ratio=0.05):
 24 |     print("Loading Images...")
 25 | 
 26 |     #응시영역 레이블별로 읽도록 한다.
 27 |     #6 gaze zones 
 28 |     data_list_1 = glob('*part_1.jpg') #1
 29 |     data_list_2 = glob('*part_3.jpg') #2
 30 |     data_list_3 = glob('*part_6.jpg')  #3
 31 |     data_list_4 = glob('*part_8.jpg')  #4
 32 |     data_list_5 = glob('*part_10.jpg')  #5
 33 |     data_list_6 = glob('*part_12.jpg')  #6
 34 | 
 35 | 
 36 |     batch_tuple = []
 37 | 
 38 |     n = 0
 39 |     #------------1
 40 |     for i in range(len(data_list_1)):
 41 |         path = data_list_1[i]
 42 |         img = read_image(path)
 43 | 
 44 |         #불러온 이미지 batch에 저장
 45 |         batch_tuple.append((path, 0))
 46 | 
 47 | 
 48 |     #-------------- 2
 49 |     for i in range(len(data_list_2)):
 50 |         path = data_list_2[i]
 51 |         img = read_image(path)
 52 | 
 53 |         #불러온 이미지 batch에 저장
 54 |         batch_tuple.append((path, 1))
 55 | 
 56 |     #--------------- 3
 57 |     for i in range(len(data_list_3)):
 58 |         path = data_list_3[i]
 59 |         img = read_image(path)
 60 | 
 61 |         #불러온 이미지 batch에 저장
 62 |         batch_tuple.append((path, 2))
 63 | 
 64 |     # ---------------- 4
 65 |     for i in range(len(data_list_4)):
 66 |         path = data_list_4[i]
 67 |         img = read_image(path)
 68 | 
 69 |         #불러온 이미지 batch에 저장
 70 |         batch_tuple.append((path, 3))
 71 | 
 72 |     # ---------------- 5
 73 |     for i in range(len(data_list_5)):
 74 |         path = data_list_5[i]
 75 |         img = read_image(path)
 76 | 
 77 |         #불러온 이미지 batch에 저장
 78 |         batch_tuple.append((path, 4))
 79 | 
 80 |     # ----------------- 6
 81 |     for i in range(len(data_list_6)):
 82 |         path = data_list_6[i]
 83 |         img = read_image(path)
 84 | 
 85 |         #불러온 이미지 batch에 저장
 86 |         batch_tuple.append((path, 5))
 87 | 
 88 | 
 89 |     #섞은 후에 저장된 tuple을 풀어낸다
 90 |     random.shuffle(batch_tuple)
 91 |     #print(batch_tuple)
 92 | 
 93 |     #train:test 나눈다
 94 |     num = len(batch_tuple)
 95 |     train_num =  math.floor(train_ratio*num)
 96 |     test_num = num - train_num
 97 | 
 98 | 
 99 |     #트레인, 테스트 나눔
100 |     train_batch = batch_tuple[0:train_num]
101 |     test_batch = batch_tuple[train_num:num]
102 |     print(len(train_batch))
103 | 
104 |     # 이미지를 numpy 형태로 받아야 한다.
105 |     # BATCH_SIZE = len(data_list)
106 | 
107 |     train_image = np.zeros((train_num, IMG_HEIGHT, IMG_WIDTH, CHANNEL_N))
108 |     train_label = np.zeros((train_num, CLASS_N))
109 |     test_image = np.zeros((test_num, IMG_HEIGHT, IMG_WIDTH, CHANNEL_N))
110 |     test_label = np.zeros((test_num, CLASS_N))
111 | 
112 |     # [TRAINING] numpy로 변환
113 |     bat_idx = 0
114 |     for path, label in train_batch:
115 |         img = read_image(path)
116 |         train_image[bat_idx,:, :,:] = img
117 |         train_label[bat_idx, label] = 1
118 |         bat_idx += 1
119 | 
120 |     # [TEST] numpy로 변환
121 |     bat_idx = 0
122 |     for path, label in test_batch:
123 |         img = read_image(path)
124 |         test_image[bat_idx, :, :, :] = img
125 |         test_label[bat_idx, label] = 1
126 |         bat_idx += 1
127 | 
128 |     print('[train_img]')
129 |     print(train_image.shape)
130 |     print('[test_img]')
131 |     print(test_image.shape)
132 |     print('[train_label]')
133 |     print(train_label.shape)
134 |     print('[test_label]')
135 |     print(test_label.shape)
136 | 
137 |     save_np('train_img', train_image)
138 |     save_np('train_label', train_label)
139 |     save_np('test_img', test_image)
140 |     save_np('test_label', test_label)
141 | 
142 | 
143 | 
144 | 
145 | def save_np(filename, data):
146 |     np.save(filename, data)
147 | 
148 | def load_np(filename):
149 |     print('loading ' + filename + '......')
150 |     return np.load(filename)
151 | 
152 | def read_image_and_label(path):
153 |     return read_image(path), read_label(path)
154 | 
155 | def read_image(path):
156 |     image = np.array(Image.open(path).convert('L'))
157 |     image = image.astype(np.float32)
158 |     image = image / 255.0
159 |     image = np.expand_dims(image, axis=2)
160 |     #image = image.reshape(IMG_HEIGHT, IMG_WIDTH, 1)
161 |     return image
162 | 
163 | 
164 | def instance_norm(input, name="instance_norm"):
165 |     with tf.variable_scope(name):
166 |         depth = input.get_shape()[3]
167 |         scale = tf.get_variable("scale", [depth], initializer=tf.random_normal_initializer(1.0, 0.02, dtype=tf.float32))
168 |         offset = tf.get_variable("offset", [depth], initializer=tf.constant_initializer(0.0))
169 |         mean, variance = tf.nn.moments(input, axes=[1,2], keep_dims=True)
170 |         epsilon = 1e-5
171 |         inv = tf.rsqrt(variance + epsilon)
172 |         normalized = (input-mean)*inv
173 |         return scale*normalized + offset
174 | 
175 | 
176 | # conv layer
177 | def conv2d(input_, output_dim, ks=4, s=2, stddev=0.02, padding='SAME', name="conv2d"):
178 |     with tf.variable_scope(name):
179 |         return slim.conv2d(input_, output_dim, ks, s, padding=padding, activation_fn=None,
180 |                             weights_initializer=tf.truncated_normal_initializer(stddev=stddev),
181 |                             biases_initializer=None)
182 | 
183 | # relu를 수행한다.
184 | def lrelu(x, leak=0.2, name="lrelu"):
185 |     return tf.maximum(x, leak*x)


--------------------------------------------------------------------------------
/gaze_estimation/v2_tensorflow_model/test_sequences.py:
--------------------------------------------------------------------------------
  1 | #import tensorflow as tf
  2 | from opt import  *
  3 | from model import gazenetwork
  4 | import random
  5 | import math
  6 | from glob import glob
  7 | from PIL import Image, ImageDraw, ImageFont
  8 | import matplotlib.pyplot as plt
  9 | import time
 10 | 
 11 | 
 12 | 
 13 | BATCH_SIZE = 256
 14 | IMG_WIDTH = 120
 15 | IMG_HEIGHT = 100
 16 | CHANNEL_N  = 1
 17 | CLASS_N = 6
 18 | 
 19 | 
 20 | def predict_imgs():
 21 |     tf.logging.set_verbosity(tf.logging.INFO)
 22 |     # to avoid cuda memory out error
 23 |     gpu_options = tf.GPUOptions(allow_growth=True)
 24 |     config = tf.ConfigProto(gpu_options=gpu_options)
 25 | 
 26 |     # data load
 27 |     face_npy, img_list = load_imgs()
 28 |     IMG_NUM = len(img_list)
 29 | 
 30 |     # estimator 선언
 31 |     gaze_classifier = tf.estimator.Estimator(model_fn=gazenetwork, model_dir="./model",
 32 |                                              config=tf.contrib.learn.RunConfig(session_config=config))
 33 | 
 34 | 
 35 | 
 36 |     # START
 37 |     img_template = None
 38 |     for  i in range(IMG_NUM):
 39 |         test_data = face_npy[i, :, :, :]
 40 |         test_data = np.expand_dims(test_data, axis=0)
 41 | 
 42 |         # test
 43 |         test_input_fn = tf.estimator.inputs.numpy_input_fn(
 44 |             x={"x": test_data},
 45 |             shuffle=False)
 46 |         #test_spec = tf.estimator.EvalSpec(input_fn=test_input_fn)
 47 | 
 48 |         predictions = gaze_classifier.predict(input_fn=test_input_fn)
 49 |         predictor = list(predictions)
 50 |         label = predictor[0]['classes'] + 1
 51 | 
 52 |         #draw pic
 53 |         draw_pic(img_template, img_list[i], label, i)
 54 | 
 55 |         #print(list(predictions)[0]['claasses'])
 56 | 
 57 | 
 58 | 
 59 | 
 60 | def draw_pic(img_template, img_path, text, frameidx):
 61 | 
 62 | 
 63 |     plt.gcf().clear()
 64 |     image = Image.open(img_path)
 65 |     draw = ImageDraw.Draw(image)
 66 |     (x, y) = (10, 10)
 67 |     font = ImageFont.truetype('arial', size=125)
 68 |     message = str(text)
 69 |     color = 'rgb(255, 255, 255)'  # black color
 70 |     draw.text((x, y), message, fill=color, font=font)
 71 |     #plt.imshow(image)
 72 | 
 73 |     if img_template is None:
 74 |         img_template = plt.imshow(image)
 75 |     else:
 76 |         img_template.set_data(image)
 77 | 
 78 |     plt.pause(0.1)
 79 | 
 80 |     #im = plt.imshow(image, animated=True)
 81 |     plt.draw()
 82 | 
 83 | 
 84 | 
 85 | '''
 86 | def load_imgs():
 87 |     BASE_DIR = "F:/2-2/cv/proj_gaze/sequences/4"
 88 |     face_dir = BASE_DIR + "/face/*.jpg"
 89 |     img_dir = BASE_DIR + "/entire/*.jpg"
 90 | 
 91 |     face_list = glob(face_dir)
 92 |     img_list = glob(img_dir)
 93 | 
 94 |     IMG_NUM = len(img_list)
 95 |     test_image = np.zeros((IMG_NUM, IMG_HEIGHT, IMG_WIDTH, CHANNEL_N))
 96 | 
 97 |     # LOOP START
 98 |     bat_idx = 0
 99 |     for path in face_list:
100 |         img = read_image(path)
101 |         test_image[bat_idx,:, :,:] = img
102 |         bat_idx += 1
103 | 
104 | 
105 | 
106 | 
107 |     return test_image, img_list
108 | '''
109 | 
110 | def read_image(path):
111 |     image = np.array(Image.open(path).convert('L'))
112 |     image = image.astype(np.float32)
113 |     image = image / 255.0
114 |     image = np.expand_dims(image, axis=2)
115 |     return image
116 | 
117 | # main func
118 | def main(unused_argv):
119 |     #load_imgs()
120 |     predict_imgs()
121 | 
122 | 
123 | if __name__ == "__main__":
124 |     tf.app.run()


--------------------------------------------------------------------------------
/gaze_estimation/v2_tensorflow_model/train.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from opt import  *
 3 | from model import gazenetwork
 4 | 
 5 | def test():
 6 |     tf.logging.set_verbosity(tf.logging.INFO)
 7 |     # to avoid cuda memory out error
 8 |     gpu_options = tf.GPUOptions(allow_growth=True)
 9 |     config = tf.ConfigProto(gpu_options=gpu_options)
10 | 
11 |     # data load
12 |     eval_data, eval_label = load_img_and_label_from_npy('test_img.npy', 'test_label.npy')
13 |     eval_label = np.argmax(eval_label, axis=1)
14 |     print(eval_data[3])
15 |     print(eval_label[3])
16 |     print('npy loaded')
17 | 
18 |     # estimator 선언
19 |     gaze_classifier = tf.estimator.Estimator(model_fn=gazenetwork, model_dir="./model",
20 |                                              config=tf.contrib.learn.RunConfig(session_config=config))
21 | 
22 |     # eval
23 |     eval_input_fn = tf.estimator.inputs.numpy_input_fn(
24 |         x={"x": eval_data},
25 |         y=eval_label,
26 | 
27 |         num_epochs=1,
28 |         shuffle=False)
29 |     eval_results = gaze_classifier.evaluate(input_fn=eval_input_fn)
30 |     print(eval_results)
31 | 
32 | def train():
33 |     # load_images()
34 | 
35 |     tf.logging.set_verbosity(tf.logging.INFO)
36 |     # to avoid cuda memory out error
37 |     gpu_options = tf.GPUOptions(allow_growth=True)
38 |     config = tf.ConfigProto(gpu_options=gpu_options)
39 | 
40 |     # == ESTIMATOR 에 들어갈 input_fn 역시 조건 있다.
41 |     # input_fn의 조건은 datrue과 label data 반환을 목적으로 한다
42 |     train_data, train_label = load_img_and_label_from_npy('train_img.npy', 'train_label.npy')
43 |     train_label = np.argmax(train_label, axis=1)
44 |     print('npy loaded')
45 | 
46 |     train_input_fn = tf.estimator.inputs.numpy_input_fn(
47 |         x={"x": train_data},
48 |         y=train_label,
49 |         batch_size=712, num_epochs=None, shuffle=True)
50 |     print('input_fn craeated')
51 | 
52 |     # == ESTIMATOR 학습을 위한 model_fn에 파라미터 등등에 대해 조건이 필요하다
53 |     # <arg>
54 |     # (features, labels, mode, params, config) 인데, features와 labels는 반드시 필수
55 |     # <return>
56 |     # tf.estimator.EstimatorSpecwor
57 | 
58 |     # == ESTIMATOR의 model_dir은 학습 파라미터가 저장된다 그리고 config도 들어가고..
59 |     gaze_classifier = tf.estimator.Estimator(model_fn=gazenetwork, model_dir="./model",
60 |                                              config=tf.contrib.learn.RunConfig(session_config=config))
61 |     print('estimator craeated')
62 | 
63 |     # recording logs
64 |     log_tensor = {"loss" : "loss"}
65 |     #logging_hook = tf.train.LoggingTensorHook({"loss": loss,
66 |     #                                           "accuracy": accuracy}, every_n_iter=10)
67 | 
68 |     log_hook = tf.train.LoggingTensorHook(tensors=log_tensor, every_n_iter=50)
69 | 
70 |     # train
71 |     print('start train')
72 |     gaze_classifier.train(input_fn=train_input_fn, steps=100000)
73 | 
74 | def make_db():
75 |     load_images()
76 | 
77 | #main func
78 | def main(unused_argv):
79 |     #make_db()
80 |     #test()
81 |     train()
82 | 
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     tf.app.run()


--------------------------------------------------------------------------------
/gaze_estimation/v3_pytorch_model/config.py:
--------------------------------------------------------------------------------
 1 | class Config(object):
 2 |     lr = 0.001
 3 | 
 4 |     # 'LIGHT' or 'HEAVY' or 'HEAVY+ATT' or 'MORE_LIGHT'
 5 |     #use_model_type = 'HEAVY+ATT'
 6 |     use_model_type = 'MORE_LIGHT'
 7 | 
 8 |     alpha = 2
 9 |     batch_size = 200
10 |     global_img_size = [100, 120]
11 |     local_img_size = [100, 80]
12 |     schedule = [150, 225]
13 |     gamma = 0.1
14 |     print_iter = 5
15 |     save_epoch = 10
16 | 
17 |     data_path = 'D:/-----/cropped_fld_and_face'
18 |     save_path = 'save_checks_more_light'
19 | 
20 |     max_epoch = 200
21 |     gpus = "0"
22 |     class_num = 6
23 |     momentum= 0.9
24 |     weight_decay = 5e-4


--------------------------------------------------------------------------------
/gaze_estimation/v3_pytorch_model/gaze_model_heavy_ver.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import cv2
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | class Estimator(nn.Module):
  7 |     def __init__(self, use_attention_map=False):
  8 |         super(Estimator, self).__init__()
  9 | 
 10 |         self.global_estimator = Global_Estimator(use_attention_map)
 11 |         self.local_estimator = Local_Estimator(use_attention_map)
 12 |         self.use_attention_map = use_attention_map
 13 |         
 14 |         #if use_mtcnn:
 15 |         #    self.final_fc = nn.Linear(1024 + 512 + 136, 6)
 16 |         #else:
 17 |         self.final_fc = nn.Linear(4000 + 1000, 6)
 18 | 
 19 |         
 20 | 
 21 |     def forward(self, input_x, input_local_x, flds=None):
 22 |         
 23 |         g_output = self.global_estimator(input_x)
 24 |         l_output = self.local_estimator(input_local_x)
 25 | 
 26 |         output = self.final_fc(torch.cat([g_output, l_output], dim=1))
 27 | 
 28 |         return output
 29 | 
 30 | 
 31 | 
 32 | 
 33 | # ------------------------------------- GLOBAL ---------------------
 34 | class Global_Estimator(nn.Module):
 35 |     def __init__(self, use_attention=False):
 36 |         super(Global_Estimator, self).__init__()
 37 | 
 38 |         input_dim = 1
 39 | 
 40 |         self.use_attention = use_attention
 41 |         self.lrelu = nn.LeakyReLU(0.2)
 42 |         self.drop = nn.Dropout(0.5)
 43 |         self.pool = nn.MaxPool2d(2)
 44 |         self.pool3 = nn.MaxPool2d(3, 2)
 45 |         
 46 | 
 47 |         if self.use_attention:
 48 |             self.conv1_att = conv2d_block(40, 1, 3, 1, 1)
 49 |             self.conv2_att = conv2d_block(70, 1, 3, 1, 1)
 50 |             self.conv3_att = conv2d_block(60, 1, 3, 1, 1)
 51 |             self.conv4_att = conv2d_block(80, 1, 3, 1, 1)
 52 |             self.conv5_att = conv2d_block(100, 1, 3, 1, 1)
 53 | 
 54 |         # 120 x 180
 55 |         self.conv1 = conv2d_block(input_dim, 40, 7, 2, 0)
 56 |         self.norm_1 = nn.InstanceNorm2d(40)
 57 | 
 58 |         # 60 x 90
 59 |         self.conv2 = conv2d_block(40, 70, 5, 2, 1)
 60 |         self.norm_2 = nn.InstanceNorm2d(70)
 61 | 
 62 |         # 30 x 45
 63 |         self.conv3 = conv2d_block(70, 60, 3, 1, 0)
 64 |         self.norm_3 = nn.InstanceNorm2d(60)
 65 |         
 66 |         self.conv4 = conv2d_block(60, 80, 3, 1, 0)
 67 |         self.norm_4 = nn.InstanceNorm2d(80)
 68 | 
 69 |         self.conv5 = conv2d_block(80, 100, 3, 1, 0)
 70 |         self.norm_5 = nn.InstanceNorm2d(100)
 71 | 
 72 |         self.fc1 = nn.Linear((80 * 7 * 6) + (100 * 7 * 6), 4000)
 73 |     
 74 | 
 75 |     def forward(self, x):
 76 | 
 77 |         # input : B x C x 120 x 100
 78 |         x = F.pad(x, (53, 53, 63, 63)) # [left, right, top, bot]
 79 |         x = self.lrelu(self.conv1(x))
 80 |         if self.use_attention:
 81 |             x_att1 = self.conv1_att(x)
 82 |             x = x_att1 * x
 83 |         x = self.norm_1(x)
 84 |         x = self.pool3(x)
 85 |         
 86 |         # B x C x 59 x 49
 87 |         x = F.pad(x, (25, 25, 30, 30)) # [left, right, top, bot]
 88 |         x = self.lrelu(self.conv2(x))
 89 |         if self.use_attention:
 90 |             x_att2 = self.conv2_att(x)
 91 |             x = x_att2 * x
 92 |         x = self.norm_2(x)
 93 |         x = self.pool(x)
 94 | 
 95 |         # B x C x 29 x 24
 96 |         x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot]
 97 |         x = self.lrelu(self.conv3(x))
 98 |         if self.use_attention:
 99 |             x_att3 = self.conv3_att(x)
100 |             x = x_att3 * x
101 |         x = self.norm_3(x)
102 |         x = self.pool(x)
103 | 
104 |         # B x C x 14 x 12
105 |         x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot]
106 |         x = self.lrelu(self.conv4(x))
107 |         if self.use_attention:
108 |             x_att4 = self.conv4_att(x)
109 |             x = x_att4 * x
110 |         x = self.norm_4(x)
111 |         x = self.pool(x)
112 |         x_41 = x.view(x.size()[0], -1)
113 | 
114 | 
115 |         # B x C x 7 x 6
116 |         x = F.pad(x, (1, 1, 1, 1)) 
117 |         x = self.lrelu(self.conv5(x))
118 |         if self.use_attention:
119 |             x_att5 = self.conv5_att(x)
120 |             x = x_att5 * x
121 |         x = self.norm_5(x)
122 |         x_51 = x.view(x.size()[0], -1)
123 | 
124 |         # concat 41 & 51
125 |         x = self.fc1(torch.cat((x_41, x_51), dim=1))
126 |         #x = self.fc2(x)
127 |         
128 |         return x
129 | 
130 | 
131 | 
132 | 
133 | 
134 | # ------------------------------------- LOCAL ---------------------
135 | class Local_Estimator(nn.Module):
136 |     def __init__(self, use_attention=False):
137 |         super(Local_Estimator, self).__init__()
138 | 
139 | 
140 |         input_dim = 1
141 |         self.use_attention = use_attention
142 | 
143 |         self.lrelu = nn.LeakyReLU(0.2)
144 |         self.drop = nn.Dropout(0.5)
145 |         self.pool = nn.MaxPool2d(2)
146 |         self.pool3 = nn.MaxPool2d(3, 2)
147 |         
148 | 
149 |         # att maps
150 |         if self.use_attention:
151 |             self.conv1_att = conv2d_block(40, 1, 3, 1, 1)
152 |             self.conv2_att = conv2d_block(70, 1, 3, 1, 1)
153 |             self.conv3_att = conv2d_block(60, 1, 3, 1, 1)
154 |             self.conv4_att = conv2d_block(80, 1, 3, 1, 1)
155 |             self.conv5_att = conv2d_block(100, 1, 3, 1, 1)
156 | 
157 | 
158 |         # 120 x 180
159 |         self.conv1 = conv2d_block(input_dim, 40, 7, 2, 0)
160 |         self.norm_1 = nn.InstanceNorm2d(40)
161 | 
162 |         # 60 x 90
163 |         self.conv2 = conv2d_block(40, 70, 5, 2, 1)
164 |         self.norm_2 = nn.InstanceNorm2d(70)
165 | 
166 |         # 30 x 45
167 |         self.conv3 = conv2d_block(70, 60, 3, 1, 0)
168 |         self.norm_3 = nn.InstanceNorm2d(60)
169 |         
170 |         self.conv4 = conv2d_block(60, 80, 3, 1, 0)
171 |         self.norm_4 = nn.InstanceNorm2d(80)
172 | 
173 |         self.conv5 = conv2d_block(80, 100, 3, 1, 0)
174 |         self.norm_5 = nn.InstanceNorm2d(100)
175 | 
176 |         self.fc1 = nn.Linear((80 * 5 * 6) + (100 * 5 * 6), 1000)
177 |     
178 | 
179 |     def forward(self, x):
180 |         # input : B x C x 50 x 100
181 |         x = F.pad(x, (53, 53, 28, 28)) # [left, right, top, bot]
182 |         x = self.lrelu(self.conv1(x))
183 |         if self.use_attention:
184 |             x_att1 = self.conv1_att(x)
185 |             x = x_att1 * x
186 |         x = self.norm_1(x)
187 |         x = self.pool3(x)
188 |         
189 |         # B x C x 25 x 50
190 |         x = F.pad(x, (25, 25, 30, 30)) # [left, right, top, bot]
191 |         x = self.lrelu(self.conv2(x))
192 |         if self.use_attention:
193 |             x_att2 = self.conv2_att(x)
194 |             x = x_att2 * x
195 |         x = self.norm_2(x)
196 |         x = self.pool(x)
197 | 
198 |         # B x C x 12 x 25
199 |         x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot]
200 |         x = self.lrelu(self.conv3(x))
201 |         if self.use_attention:
202 |             x_att3 = self.conv3_att(x)
203 |             x = x_att3 * x
204 |         x = self.norm_3(x)
205 |         x = self.pool(x)
206 | 
207 |         # B x C x 6 x 12
208 |         x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot]
209 |         x = self.lrelu(self.conv4(x))
210 |         if self.use_attention:
211 |             x_att4 = self.conv4_att(x)
212 |             x = x_att4 * x
213 |         x = self.norm_4(x)
214 |         x = self.pool(x)
215 |         x_41 = x.view(x.size()[0], -1)
216 | 
217 | 
218 |         # B x C x 3 x 6
219 |         x = F.pad(x, (1, 1, 1, 1)) 
220 |         x = self.lrelu(self.conv5(x))
221 |         if self.use_attention:
222 |             x_att5 = self.conv5_att(x)
223 |             x = x_att5 * x
224 |         x = self.norm_5(x)
225 |         #print("51b" + str(x.size()))
226 |         x_51 = x.view(x.size()[0], -1)
227 | 
228 |         # concat 41 & 51
229 |         x = self.fc1(torch.cat((x_41, x_51), dim=1))
230 |         
231 |         return x
232 | 
233 | 
234 | 
235 | 
236 |         
237 | 
238 | 
239 | 
240 | # ------ conv blocks -----------
241 | 
242 | class conv2d_block(nn.Module):
243 |     def __init__(self, input_dim, output_dim, kernel_size=4, stride=2, padding=0, stddev=0.02):
244 |         super(conv2d_block, self).__init__()
245 |     
246 |         self.conv = nn.Conv2d(input_dim, output_dim, kernel_size, stride,
247 |                                   padding=padding)
248 |     def forward(self, x):
249 |         return self.conv(x)
250 | 
251 | 


--------------------------------------------------------------------------------
/gaze_estimation/v3_pytorch_model/gaze_model_light_ver.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import cv2
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | class Estimator(nn.Module):
  7 |     def __init__(self, use_mtcnn=False):
  8 |         super(Estimator, self).__init__()
  9 | 
 10 |         self.global_estimator = Global_Estimator()
 11 |         self.use_mtcnn = use_mtcnn
 12 |         
 13 | 
 14 |     def forward(self, input_x, flds=None):
 15 |         
 16 |         output = self.global_estimator(input_x)
 17 |         return output
 18 | 
 19 | 
 20 | 
 21 | 
 22 | # ------------------------------------- GLOBAL ---------------------
 23 | class Global_Estimator(nn.Module):
 24 |     def __init__(self):
 25 |         super(Global_Estimator, self).__init__()
 26 | 
 27 |         input_dim = 1
 28 |         cnum = 16
 29 | 
 30 | 
 31 |         self.lrelu = nn.LeakyReLU(0.2)
 32 |         self.drop = nn.Dropout(0.5)
 33 |         self.pool = nn.MaxPool2d(2)
 34 |         self.pool3 = nn.MaxPool2d(3, 2)
 35 |         
 36 | 
 37 |         # 120 x 180
 38 |         self.conv1 = conv2d_block(input_dim, 20, 7, 2, 0)
 39 |         self.norm_1 = nn.InstanceNorm2d(20)
 40 | 
 41 |         # 60 x 90
 42 |         self.conv2 = conv2d_block(20, 32, 5, 2, 1)
 43 |         self.norm_2 = nn.InstanceNorm2d(32)
 44 | 
 45 |         # 30 x 45
 46 |         self.conv3 = conv2d_block(32, 30, 3, 1, 0)
 47 |         self.norm_3 = nn.InstanceNorm2d(30)
 48 |         
 49 |         self.conv4 = conv2d_block(30, 20, 3, 1, 0)
 50 |         self.norm_4 = nn.InstanceNorm2d(20)
 51 | 
 52 |         self.conv5 = conv2d_block(20, 50, 3, 1, 0)
 53 |         self.norm_5 = nn.InstanceNorm2d(50)
 54 | 
 55 |         self.fc1 = nn.Linear((20 * 7 * 6) + (50 * 7 * 6), 2000)
 56 |         self.fc2 = nn.Linear(2000, 6)
 57 |     
 58 | 
 59 |     def forward(self, x):
 60 |         #print("ORIG -" + str(x.size()))
 61 |         x = F.pad(x, (53, 53, 63, 63)) # [left, right, top, bot]
 62 |         x = self.lrelu(self.conv1(x))
 63 |         x = self.norm_1(x)
 64 |         x = self.pool3(x)
 65 | 
 66 |         x = F.pad(x, (25, 25, 30, 30)) # [left, right, top, bot]
 67 |         x = self.lrelu(self.conv2(x))
 68 |         x = self.norm_2(x)
 69 |         x = self.pool(x)
 70 | 
 71 |         x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot]
 72 |         x = self.lrelu(self.conv3(x))
 73 |         x = self.norm_3(x)
 74 |         x = self.pool(x)
 75 | 
 76 |         x = F.pad(x, (1, 1, 1, 1)) # [left, right, top, bot]
 77 |         x = self.lrelu(self.conv4(x))
 78 |         x = self.norm_4(x)
 79 |         x = self.pool(x)
 80 |         x_41 = x.view(x.size()[0], -1)
 81 | 
 82 |         x = F.pad(x, (1, 1, 1, 1)) 
 83 |         x = self.lrelu(self.conv5(x))
 84 |         x = self.norm_5(x)
 85 |         x_51 = x.view(x.size()[0], -1)
 86 | 
 87 |         # concat 41 & 51
 88 |         x = self.fc1(torch.cat((x_41, x_51), dim=1))
 89 |         x = self.fc2(x)
 90 |         
 91 |         return x
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | # ------ conv blocks -----------
100 | 
101 | class conv2d_block(nn.Module):
102 |     def __init__(self, input_dim, output_dim, kernel_size=4, stride=2, padding=0, stddev=0.02):
103 |         super(conv2d_block, self).__init__()
104 |     
105 |         self.conv = nn.Conv2d(input_dim, output_dim, kernel_size, stride,
106 |                                   padding=padding)
107 |     def forward(self, x):
108 |         return self.conv(x)
109 | 
110 | 


--------------------------------------------------------------------------------
/gaze_estimation/v3_pytorch_model/ir_data.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import torch.utils.data as data
  3 | from os import listdir
  4 | import os
  5 | import random
  6 | import torch
  7 | import cv2
  8 | from PIL import Image
  9 | import numpy as np
 10 | 
 11 | import torchvision.transforms as transforms
 12 | 
 13 | def is_image_file(filename):
 14 |     IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif']
 15 |     filename_lower = filename.lower()
 16 |     return any(filename_lower.endswith(extension) for extension in IMG_EXTENSIONS)
 17 | 
 18 | def is_usable_gaze(filename):
 19 |     GAZE_ZONES = ['part_1', 'part_3', 'part_6', 'part_8', 'part_10', 'part_12']
 20 |     filename_lower = filename.lower().split('.')[0]
 21 |     return any(filename_lower.endswith(gaze_zone) for gaze_zone in GAZE_ZONES)
 22 | 
 23 | def img_loader(path):
 24 |     try:
 25 |         with open(path, 'rb') as f:
 26 |             img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
 27 | 
 28 |             return img
 29 |     except IOError:
 30 |         print('Cannot load image ' + path)
 31 | 
 32 | class IR_FACE_Dataset(data.Dataset):
 33 |     def __init__(self, data_path, img_w, img_h, img_local_h, transform, loader=img_loader,\
 34 |          with_subfolder=False, random_crop=True, read_fld=True, return_name=False):
 35 |         super(IR_FACE_Dataset, self).__init__()
 36 |         if with_subfolder:
 37 |             self.samples = self._find_samples_in_subfolders(data_path)
 38 |         else:
 39 |             self.samples = [x for x in listdir(data_path) if is_image_file(x)]
 40 | 
 41 | 
 42 | 
 43 |         # <pick specific zones>
 44 |         self.samples = [x for x in self.samples if is_usable_gaze(x)]
 45 |         '''
 46 |         data_list_1 = glob('F:/DB/MOBIS/CROPPED_2/*part_1.jpg') #1
 47 |         data_list_2 = glob('F:/DB/MOBIS/CROPPED_2/*part_3.jpg') #2
 48 |         data_list_3 = glob('F:/DB/MOBIS/CROPPED_2/*part_6.jpg')  #3
 49 |         data_list_4 = glob('F:/DB/MOBIS/CROPPED_2/*part_8.jpg')  #4
 50 |         data_list_5 = glob('F:/DB/MOBIS/CROPPED_2/*part_10.jpg')  #5
 51 |         data_list_6 = glob('F:/DB/MOBIS/CROPPED_2/*part_12.jpg')  #6
 52 |         '''
 53 | 
 54 |         self.data_path = data_path
 55 |         self.img_w = img_w
 56 |         self.img_h = img_h
 57 |         self.img_local_h = img_local_h
 58 |         self.transform = transform
 59 |         self.random_crop = random_crop
 60 |         self.return_name = return_name
 61 |         self.loader = loader
 62 | 
 63 |         # if true, read facial landmarks
 64 |         self.read_fld = read_fld
 65 | 
 66 | 
 67 |         print(str(len(self.samples)) + "  items found")
 68 | 
 69 |     def __len__(self):
 70 |         return len(self.samples)
 71 | 
 72 |     def __getitem__(self, index):
 73 |         #path = os.path.join(self.data_path, self.samples[index])
 74 | 
 75 |         path = self.data_path + '/' + self.samples[index]
 76 |         
 77 |         img = self.loader(path)
 78 |         w, h = img.shape[0], img.shape[1]
 79 | 
 80 |         # use fld?
 81 |         if self.read_fld:
 82 |             fld_file = path.replace("jpg", "txt")
 83 |             fld_fdes = open(fld_file, "r")
 84 |             flds = np.array(fld_fdes.read().split(), dtype=np.float32)
 85 |             flds = flds.reshape(68, 2)
 86 |             fld_fdes.close()
 87 | 
 88 |         # need resize?
 89 |         if w < self.img_w or h < self.img_h or w > self.img_w or h > self.img_h:
 90 |             
 91 |             if self.read_fld:
 92 |                 w_ratio, h_ratio = self.img_w / w, self.img_h / h 
 93 |                 flds[:, 0] = flds[:, 0] * w_ratio
 94 |                 flds[:, 1] = flds[:, 1] * h_ratio
 95 |             
 96 |             img = cv2.resize(img, (self.img_w, self.img_h), interpolation=cv2.INTER_AREA)
 97 | 
 98 | 
 99 |         local_img = img[0:self.img_local_h, 0:self.img_w]
100 |         
101 | 
102 |             
103 | 
104 |         # pick class
105 |         gaze_part = int(path.split('_')[-1].split('.')[0])
106 |         label_tensor = np.zeros([6])
107 | 
108 |         '''
109 |         data_list_1 = glob('F:/DB/MOBIS/CROPPED_2/*part_1.jpg') #1
110 |         data_list_2 = glob('F:/DB/MOBIS/CROPPED_2/*part_3.jpg') #2
111 |         data_list_3 = glob('F:/DB/MOBIS/CROPPED_2/*part_6.jpg')  #3
112 |         data_list_4 = glob('F:/DB/MOBIS/CROPPED_2/*part_8.jpg')  #4
113 |         data_list_5 = glob('F:/DB/MOBIS/CROPPED_2/*part_10.jpg')  #5
114 |         data_list_6 = glob('F:/DB/MOBIS/CROPPED_2/*part_12.jpg')  #6
115 |         '''
116 |         if gaze_part == 1:
117 |             gaze_class = 0
118 |             label_tensor[0] = 1
119 |         elif gaze_part == 3:
120 |             gaze_class = 1
121 |             label_tensor[1] = 1
122 |         elif gaze_part == 6:
123 |             gaze_class = 2
124 |             label_tensor[2] = 1
125 |         elif gaze_part == 8:
126 |             gaze_class = 3
127 |             label_tensor[3] = 1
128 |         elif gaze_part == 10:
129 |             gaze_class = 4
130 |             label_tensor[4] = 1
131 |         elif gaze_part == 12:
132 |             gaze_class = 5
133 |             label_tensor[5] = 1
134 |         
135 |         label_tensor = torch.LongTensor(label_tensor)
136 |         #print(path + " --- " + gaze_class)
137 | 
138 | 
139 |         if self.transform is not None:
140 |             img = self.transform(img)
141 |             local_img = self.transform(local_img)
142 |         else:
143 |             img = torch.from_numpy(img)
144 |             local_img = torch.from_numpy(local_img)
145 | 
146 |         return img, local_img, label_tensor


--------------------------------------------------------------------------------
/gaze_estimation/v3_pytorch_model/train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision.transforms as transforms
  4 | import torch.optim as optim
  5 | 
  6 | import os
  7 | import time
  8 | 
  9 | from ir_data import IR_FACE_Dataset
 10 | from config import Config
 11 | import numpy as np
 12 | 
 13 | from torch.utils.data import Dataset, DataLoader
 14 | 
 15 | from utils import AverageMeter
 16 | # ----------------------------------
 17 | if Config.use_model_type == 'LIGHT':
 18 |     from gaze_model_light_ver import Estimator
 19 | elif Config.use_model_type == 'HEAVY' or Config.use_model_type == 'HEAVY+ATT':
 20 |     from gaze_model_heavy_ver import Estimator
 21 | 
 22 | # ----------------------------------
 23 | 
 24 | def train():
 25 |     torch.multiprocessing.freeze_support()
 26 |     train_transform = transforms.Compose([
 27 |             transforms.ToTensor(),  # range [0, 255] -> [0.0,1.0]
 28 |         ])
 29 | 
 30 |     ir_dataset = IR_FACE_Dataset(data_path=Config.data_path, \
 31 |         img_w=Config.global_img_size[0] ,img_h=Config.global_img_size[1], img_local_h=Config.local_img_size[1], \
 32 |         transform=train_transform)
 33 |     ir_dataloader = DataLoader(ir_dataset, batch_size=Config.batch_size, \
 34 |         shuffle=True, num_workers=1)
 35 | 
 36 |     device = torch.device("cuda")
 37 | 
 38 |     # checkpt dir
 39 |     if os.path.exists(Config.save_path) == False:
 40 |         os.makedirs(Config.save_path)
 41 | 
 42 |     # model
 43 |     if Config.use_model_type == 'HEAVY+ATT':
 44 |         model = Estimator(use_attention_map=True).cuda()
 45 |     else:
 46 |         model = Estimator().cuda()
 47 |     model = model.to(device)
 48 | 
 49 |     # opt
 50 |     criterion = nn.CrossEntropyLoss().cuda()
 51 |     optimizer = optim.SGD(model.parameters(), lr=Config.lr, momentum=Config.momentum, \
 52 |         weight_decay=Config.weight_decay)
 53 | 
 54 | 
 55 |     for epoch_i in range(Config.max_epoch):
 56 |         model.train()
 57 | 
 58 |         #Config.lr = adjust_learning_rate_v2(optimizer, epoch_i - 1, Config)
 59 |         #for param_group in optimizer.param_groups:
 60 |         #    param_group["lr"] = Config.lr
 61 | 
 62 |         iter_max = ir_dataset.__len__() // Config.batch_size
 63 | 
 64 |         # for print
 65 |         data_time = AverageMeter()
 66 |         losses = AverageMeter()
 67 |         top1 = AverageMeter()
 68 |         top5 = AverageMeter()
 69 |         end = time.time()
 70 |         
 71 |         dataiter = iter(ir_dataloader)
 72 |         steps_per_epoch = iter_max + 1
 73 |         #for ii, data in enumerate(ir_dataloader):
 74 |         for ii in range(steps_per_epoch):
 75 | 
 76 |             data_time.update(time.time() - end)
 77 | 
 78 |             data_input, data_input_local, label = dataiter.next()
 79 |             data_input = data_input.to(device)
 80 |             targets = label.to(device)
 81 |             data_input_local = data_input_local.to(device)
 82 |             
 83 |             
 84 | 
 85 |             # optimizer step
 86 |             optimizer.zero_grad()
 87 |             outputs = model(data_input, data_input_local)
 88 |             loss = criterion(outputs, torch.argmax(targets, 1))
 89 | 
 90 |             loss.backward()
 91 |             optimizer.step()
 92 | 
 93 |             # measure accuracy and record loss
 94 |             total = data_input.size(0)
 95 |             _, predicted = outputs.max(1)
 96 |             correct = predicted.eq(torch.argmax(targets,1)).sum().item()
 97 |             top1.update(100.*correct/total)
 98 | 
 99 |             losses.update(loss.item(), data_input.size(0))
100 | 
101 | 
102 |             end = time.time()
103 | 
104 |             if ii % Config.print_iter == 0:
105 |                 print('\nEpoch: [%d | %d], Iter : [%d | %d] LR: %f | Loss : %f | top1 : %.4f | batch_time : %.3f'  \
106 |                     % (epoch_i, Config.max_epoch, ii, iter_max + 1, Config.lr, losses.avg, top1.avg, data_time.val))
107 | 
108 | 
109 |              # measure elapsed time
110 | 
111 | 
112 |         # save model
113 |         if epoch_i % Config.save_epoch == 0:
114 |             torch.save({'state_dict' : model.state_dict(), 'opt' : optimizer.state_dict()}, \
115 |                 Config.save_path + "/check_" + str(epoch_i) + ".pth")
116 |         
117 | 
118 |     
119 | # not using -
120 | def adjust_learning_rate(optimizer, epoch, config):
121 |     global state
122 |     if epoch in config.schedule:
123 |         config.lr *= config.gamma
124 |         for param_group in optimizer.param_groups:
125 |             param_group['lr'] = config.lr
126 | 
127 | def adjust_learning_rate_v2(optimizer, epoch, config):
128 |     lr = config.lr * (0.1 ** (epoch // 10))
129 |     return lr
130 | 
131 | if __name__ == '__main__':
132 |     train()


--------------------------------------------------------------------------------
/gaze_estimation/v3_pytorch_model/utils.py:
--------------------------------------------------------------------------------
 1 | class AverageMeter(object):
 2 |     """Computes and stores the average and current value
 3 |        Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262
 4 |     """
 5 |     def __init__(self):
 6 |         self.reset()
 7 | 
 8 |     def reset(self):
 9 |         self.val = 0
10 |         self.avg = 0
11 |         self.sum = 0
12 |         self.count = 0
13 | 
14 |     def update(self, val, n=1):
15 |         self.val = val
16 |         self.sum += val * n
17 |         self.count += n
18 |         self.avg = self.sum / self.count
19 | 
20 | 
21 | # accuracy of gaze
22 | def accuracy(output, target, topk=(1,)):
23 |     """Computes the precision@k for the specified values of k"""
24 |     maxk = max(topk)
25 |     batch_size = target.size(0)
26 | 
27 |     _, pred = output.topk(maxk, 1, True, True)
28 |     pred = pred.t()
29 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
30 | 
31 |     
32 |     res = []
33 |     for k in topk:
34 |         correct_k = correct[:k].reshape(-1).float().sum(0)
35 |         res.append(correct_k.mul_(100.0 / batch_size))
36 |     return res
37 | 
38 | 
39 | def data_from_captue(img, use_fld=False):
40 |     img 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | # Using this code to force the usage of any specific GPUs
 3 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 4 | import argparse
 5 | import os
 6 | import random
 7 | import torch
 8 | import torch.nn as nn
 9 | import torch.nn.parallel
10 | import torch.backends.cudnn as cudnn
11 | import torch.optim as optim
12 | import torch.utils.data
13 | import torchvision.datasets as dset
14 | import torch.utils.data as data
15 | import time
16 | import numpy as np
17 | import torchvision.utils as vutils
18 | from torch.autograd import Variable
19 | from math import log10
20 | import torchvision
21 | import cv2
22 | import skimage
23 | import scipy.io
24 | import glob
25 | import matplotlib.image as mpimg
26 | import matplotlib.pyplot as plt
27 | from model import losses
28 | from model.networks import *
29 | from util.model_storage import save_checkpoint
30 | from data.dataloader import *
31 | 
32 | parser = argparse.ArgumentParser()
33 | parser.add_argument("--pretrained", default="./pretrained/weight.pth", type=str, help="path to pretrained model (default: none)")
34 | parser.add_argument("--batch_size", default="8", type=int, help="The path to store our batch_size")
35 | parser.add_argument("--image_dir", default="./data/test_img/", type=str, help="The path to store our batch_size")
36 | parser.add_argument("--image_list", default="./data/test_fileList.txt", type=str, help="The path to store our batch_size")
37 | 
38 | global opt,model
39 | opt = parser.parse_args()
40 | 
41 | fsrnet = define_G(input_nc = 3, output_nc = 3, ngf=64, which_model_netG=0)
42 | 
43 | if torch.cuda.is_available():
44 |     fsrnet = fsrnet.cuda()
45 | 
46 | if opt.pretrained:
47 |     if os.path.isfile(opt.pretrained):
48 |         print("=> loading model '{}'".format(opt.pretrained))
49 |         weights = torch.load(opt.pretrained)
50 | 
51 |         pretrained_dict = weights['model'].state_dict()
52 |         model_dict = fsrnet.state_dict()
53 | 
54 |         pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
55 |         model_dict.update(pretrained_dict)
56 | 
57 |         fsrnet.load_state_dict(model_dict)
58 |     else:
59 |         print("=> no model found at '{}'".format(opt.pretrained))
60 | 
61 | demo_dataset = TestDatasetFromFile(
62 |     opt.image_list,
63 |     opt.image_dir)  
64 | test_data_loader = data.DataLoader(dataset=demo_dataset, batch_size=opt.batch_size, num_workers=8, drop_last=True,
65 |                                     pin_memory=True)
66 | 
67 | for iteration, batch in enumerate(test_data_loader):
68 |     input = Variable(batch[0])
69 |     input = input.cuda()
70 |     upscaled,boundaries,reconstructed = fsrnet(input)
71 | 
72 |     if not os.path.isdir('./test_result/Coarse_SR_network'):
73 |         os.makedirs('./test_result/Coarse_SR_network')
74 |     if not os.path.isdir('./test_result/Prior_Estimation'):
75 |         os.makedirs('./test_result/Prior_Estimation')
76 |     if not os.path.isdir('./test_result/Final_SR_reconstruction'):
77 |         os.makedirs('./test_result/Final_SR_reconstruction')
78 | 
79 |     for index in range(opt.batch_size):
80 |         final_output = reconstructed.permute(0,2,3,1).detach().cpu().numpy()
81 |         final_output_0 = final_output[index,:,:,:]
82 | 
83 |         estimated_boundary = boundaries.permute(0,2,3,1).detach().cpu().numpy()
84 |         estimated_boundary_0 = estimated_boundary[index,:,:,0]
85 | 
86 |         output = upscaled.permute(0,2,3,1).detach().cpu().numpy()
87 |         output_0 = output[index,:,:,:]
88 | 
89 |         img_num = iteration*opt.batch_size + index
90 | 
91 |         scipy.misc.toimage(output_0 * 255, high=255, low=0, cmin=0, cmax=255).save(
92 |                 './test_result/Coarse_SR_network/%4d.jpg'% (img_num))
93 |         scipy.misc.toimage(estimated_boundary_0 * 255, high=255, low=0, cmin=0, cmax=255).save(
94 |                 './test_result/Prior_Estimation/%4d.jpg' % (img_num))
95 |         scipy.misc.toimage(final_output_0 * 255, high=255, low=0, cmin=0, cmax=255).save(
96 |                 './test_result/Final_SR_reconstruction/%4d.jpg' % (img_num))
97 |         #code minor changeB10
98 | 
99 | 


--------------------------------------------------------------------------------
/webcam_demo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import argparse
  3 | import os
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | import torch
  8 | import torch.backends.cudnn as cudnn
  9 | 
 10 | from face_detection.model.prior_box import PriorBox
 11 | from face_detection.model.retinaface import RetinaFace
 12 | from face_detection.utils.misc import draw_keypoint, inference
 13 | 
 14 | parser = argparse.ArgumentParser(description='PIMNet')
 15 | parser.add_argument(
 16 |     '--checkpoint', type=str,
 17 |     default='face_detection/weights/mobilenet0.25_final.pt',
 18 |     help='Trained state_dict file path to open'
 19 | )
 20 | parser.add_argument(
 21 |     '--cpu', action="store_true", default=False,
 22 |     help='Use cpu inference'
 23 | )
 24 | parser.add_argument(
 25 |     '--jit', action="store_true", default=False,
 26 |     help='Use JIT'
 27 | )
 28 | parser.add_argument(
 29 |     '--confidence-threshold', type=float, default=0.02,
 30 |     help='confidence_threshold'
 31 | )
 32 | parser.add_argument(
 33 |     '--nms-threshold', type=float, default=0.4,
 34 |     help='nms_threshold'
 35 | )
 36 | parser.add_argument(
 37 |     '--vis-thres', type=float, default=0.5,
 38 |     help='visualization_threshold'
 39 | )
 40 | parser.add_argument(
 41 |     '-s', '--save-image', action="store_true", default=False,
 42 |     help='show detection results'
 43 | )
 44 | parser.add_argument(
 45 |     '--save-dir', type=str, default='demo',
 46 |     help='Dir to save results'
 47 | )
 48 | 
 49 | 
 50 | def main():
 51 |     args = parser.parse_args()
 52 |     assert os.path.isfile(args.checkpoint)
 53 | 
 54 |     checkpoint = torch.load(args.checkpoint, map_location="cpu")
 55 |     cfg = checkpoint["config"]
 56 |     device = torch.device("cpu" if args.cpu else "cuda")
 57 | 
 58 |     # net and model
 59 |     detector = RetinaFace(**cfg)
 60 |     detector.load_state_dict(checkpoint["net_state_dict"])
 61 |     detector.eval().requires_grad_(False)
 62 |     detector.to(device)
 63 |     print('Finished loading model!')
 64 |     cudnn.benchmark = True
 65 | 
 66 |     # prepare testing
 67 |     cap = cv2.VideoCapture(0)
 68 |     assert cap.isOpened()
 69 |     ret_val, img_tmp = cap.read()
 70 |     im_height, im_width, _ = img_tmp.shape
 71 |     scale = torch.Tensor([im_width, im_height, im_width, im_height])
 72 |     scale = scale.to(device)
 73 | 
 74 |     scale1 = torch.Tensor([im_width, im_height] * 5)
 75 |     scale1 = scale1.to(device)
 76 | 
 77 |     priorbox = PriorBox(cfg, image_size=(im_height, im_width))
 78 |     priors = priorbox.forward()
 79 |     priors = priors.to(device)
 80 |     prior_data = priors.data
 81 | 
 82 |     if args.jit:
 83 |         img_tmp = img_tmp.transpose(2, 0, 1)
 84 |         img_tmp = np.float32(img_tmp)
 85 |         img_tmp = torch.from_numpy(img_tmp).unsqueeze(0)
 86 |         dummy = img_tmp.to(device)
 87 |         detector = torch.jit.trace(detector, example_inputs=dummy)
 88 | 
 89 |     if args.save_image:
 90 |         nframe = 0
 91 |         fname = os.path.join(args.save_dir, "{:06d}.jpg")
 92 |         os.makedirs(args.save_dir, exist_ok=True)
 93 | 
 94 |     # testing begin
 95 |     ret_val, img_raw = cap.read()
 96 |     while ret_val:
 97 |         start = cv2.getTickCount()
 98 | 
 99 |         # NOTE preprocessing.
100 |         dets = inference(
101 |             detector, img_raw, scale, scale1, prior_data, cfg,
102 |             args.confidence_threshold, args.nms_threshold, device
103 |         )
104 | 
105 |         fps = float(cv2.getTickFrequency() / (cv2.getTickCount() - start))
106 |         cv2.putText(
107 |             img_raw, f"FPS: {fps:.1f}", (5, 15),
108 |             cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)
109 |         )
110 | 
111 |         # show image
112 |         draw_keypoint(img_raw, dets, args.vis_thres)
113 | 
114 |         if args.save_image:
115 |             cv2.imwrite(fname.format(nframe), img_raw)
116 |             nframe += 1
117 | 
118 |         cv2.imshow("Webcam Demo", img_raw)
119 |         if cv2.waitKey(1) == 27:  # Press ESC button to quit.
120 |             break
121 | 
122 |         ret_val, img_raw = cap.read()
123 | 
124 |     cap.release()
125 |     cv2.destroyAllWindows()
126 | 
127 | 
128 | if __name__ == "__main__":
129 |     main()


--------------------------------------------------------------------------------