├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── align
    ├── __init__.py
    ├── align_trans.py
    ├── box_utils.py
    ├── detector.py
    ├── face_align.py
    ├── face_resize.py
    ├── first_stage.py
    ├── get_nets.py
    ├── matlab_cp2tform.py
    ├── onet.npy
    ├── pnet.npy
    ├── rnet.npy
    └── visualization_utils.py
├── backbone
    ├── __init__.py
    ├── model_irse.py
    └── model_resnet.py
├── imgs
    ├── 9.jpg
    ├── align.jpg
    ├── detect_landmark.png
    ├── parsing.jpg
    ├── parsing_maps.png
    ├── person_1
    │   ├── 17.jpg
    │   ├── 18.jpg
    │   ├── 19.jpg
    │   └── 20.jpg
    ├── person_2
    │   ├── 151.jpg
    │   ├── 152.jpg
    │   ├── 153.jpg
    │   └── 154.jpg
    └── single.jpg
├── parsing
    ├── __init__.py
    ├── face_parsing.py
    ├── model.py
    └── resnet.py
└── util
    ├── __init__.py
    ├── extract_feature.py
    ├── utils.py
    └── verification.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # specified
107 | .idea
108 | just_for_test.py
109 | test_imgs/
110 | result/
111 | checkpoint/backbone_ir50_ms1m_epoch120.pth
112 | checkpoint/face_parsing.pth


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 zll
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # facetools
  2 | Easy-to-use face related tools, including face detection, landmark localization, alignment &amp; recognition, based on **PyTorch**.
  3 | 
  4 | ## Quick start
  5 | * **Do face detection and landmark localization  using MTCNN**
  6 | ```python
  7 | from PIL import Image
  8 | from align.detector import detect_faces
  9 | from align.visualization_utils import show_results
 10 | 
 11 | img = Image.open('imgs/single.jpg')  # modify the image path to yours
 12 | bounding_boxes, landmarks = detect_faces(img)  # detect bboxes and landmarks for all faces in the image
 13 | show_results(img, bounding_boxes, landmarks)  # visualize the results
 14 | ```
 15 | ![](imgs/single.jpg)   ![](imgs/detect_landmark.png)
 16 | 
 17 | * **Do alignment**
 18 | ```python
 19 | from align.face_align import align
 20 | res = align('imgs/single.jpg', save_path='./result', vis=False)
 21 | res.show()
 22 | ```
 23 | ![](imgs/align.jpg)
 24 | 
 25 | * **Do face encoding using IR50 model** ([download pretrained model](https://pan.baidu.com/s/1L8yOF1oZf6JHfeY9iN59Mg#list/path=%2F))
 26 | ```python
 27 | from PIL import Image
 28 | from util.extract_feature import extract_feature
 29 | from backbone.model_irse import IR_50
 30 | 
 31 | image_1 = Image.open('imgs/align.jpg')  # modify the image path to yours
 32 | 
 33 | model = IR_50([112, 112])
 34 | model_cp = 'checkpoint/backbone_ir50_ms1m_epoch120.pth'
 35 | 
 36 | features = extract_feature(image_1, model, model_cp)
 37 | print(features.size())  # output : torch.Size([1, 512])
 38 | 
 39 | ```
 40 | 
 41 | * **Calculate the distance between two images**
 42 | ```python
 43 | import numpy as np
 44 | from PIL import Image
 45 | from util.extract_feature import extract_feature
 46 | from backbone.model_irse import IR_50
 47 | from scipy.spatial.distance import pdist
 48 | 
 49 | 
 50 | face_1 = Image.open('imgs/person_1/17.jpg')
 51 | face_2 = Image.open('imgs/person_1/18.jpg')  # face_1 and face_2 belong to the same one
 52 | 
 53 | face_3 = Image.open('imgs/person_2/151.jpg')
 54 | face_4 = Image.open('imgs/person_2/152.jpg')  # face_3 and face_4 belong to the same one
 55 | 
 56 | model = IR_50([112, 112])
 57 | model_cp = 'checkpoint/backbone_ir50_ms1m_epoch120.pth'
 58 | 
 59 | data = [face_1, face_2, face_3, face_4]
 60 | 
 61 | features = extract_feature(data, model, model_cp)
 62 | features = [i.numpy() for i in features]  # embeddings for face_1, face_2, face_3 and face_4
 63 | 
 64 | diff = np.subtract(features[0], features[1])
 65 | dist = np.sum(np.square(diff), 1)
 66 | print(dist)  # output : 1984.6016
 67 | 
 68 | diff = np.subtract(features[2], features[3])
 69 | dist = np.sum(np.square(diff), 1)
 70 | print(dist)  # output : 1921.2222
 71 | 
 72 | diff = np.subtract(features[0], features[2])
 73 | dist = np.sum(np.square(diff), 1)
 74 | print(dist)  # output : 16876.32
 75 | 
 76 | diff = np.subtract(features[1], features[3])
 77 | dist = np.sum(np.square(diff), 1)
 78 | print(dist)  # output : 17107.396
 79 | 
 80 | dist = pdist(np.vstack([features[0], features[1]]), 'cosine')
 81 | print(dist)  # output : 0.12932935
 82 | 
 83 | dist = pdist(np.vstack([features[2], features[3]]), 'cosine')
 84 | print(dist)  # output : 0.11706942
 85 | 
 86 | dist = pdist(np.vstack([features[0], features[2]]), 'cosine')
 87 | print(dist)  # output : 1.09022914
 88 | 
 89 | dist = pdist(np.vstack([features[1], features[3]]), 'cosine')
 90 | print(dist)  # output : 1.07447068
 91 | ```
 92 | 
 93 | * **Do face parsing**
 94 | ```python
 95 | from PIL import Image
 96 | from parsing.face_parsing import parsing, vis_parsing_maps
 97 | 
 98 | image = Image.open('imgs/9.jpg')
 99 | 
100 | res = parsing(image)
101 | vis_parsing_maps(image, res, show=True, save_im=True)
102 | ```
103 | ![](imgs/9.jpg) ![](imgs/parsing.jpg)
104 | 
105 | ### Using facetools in Your Project
106 | It is easy to use facetools in your project.
107 | ```
108 | Your project
109 | │   README.md
110 | │   ...
111 | │   foo.py
112 | │
113 | └───facetools
114 | │
115 | └───directory1
116 | │   
117 | └───...
118 | ```
119 | 
120 | In `foo.py`, you can easily import facetools by adding:
121 | ```python
122 | from facetools import detect_faces, show_results
123 | from PIL import Image
124 | 
125 | def foo():
126 |     img = Image.open('/path/to/your/image') 
127 |     bounding_boxes, landmarks = detect_faces(img) 
128 |     show_results(img, bounding_boxes, landmarks) 
129 | ```
130 | 
131 | ## Acknowledgement
132 | - This repo is based on [face.evoLVe.PyTorch](https://github.com/ZhaoJ9014/face.evoLVe.PyTorch) and [face-parsing.PyTorch](https://github.com/zllrunning/face-parsing.PyTorch). Many thanks to the excellent repo.
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from util.extract_feature import extract_feature
2 | from backbone.model_irse import IR_50
3 | from align.detector import detect_faces
4 | from align.visualization_utils import show_results
5 | from align.face_align import align
6 | from parsing.face_parsing import parsing, vis_parsing_maps


--------------------------------------------------------------------------------
/align/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/align/align_trans.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | from .matlab_cp2tform import get_similarity_transform_for_cv2
  4 | 
  5 | 
  6 | # reference facial points, a list of coordinates (x,y)
  7 | REFERENCE_FACIAL_POINTS = [        # default reference facial points for crop_size = (112, 112); should adjust REFERENCE_FACIAL_POINTS accordingly for other crop_size
  8 |     [30.29459953,  51.69630051], 
  9 |     [65.53179932,  51.50139999],
 10 |     [48.02519989,  71.73660278],
 11 |     [33.54930115,  92.3655014],
 12 |     [62.72990036,  92.20410156]
 13 | ]
 14 | 
 15 | DEFAULT_CROP_SIZE = (96, 112)
 16 | 
 17 | 
 18 | class FaceWarpException(Exception):
 19 |     def __str__(self):
 20 |         return 'In File {}:{}'.format(
 21 |             __file__, super.__str__(self))
 22 | 
 23 | 
 24 | def get_reference_facial_points(output_size = None,
 25 |                                 inner_padding_factor = 0.0,
 26 |                                 outer_padding=(0, 0),
 27 |                                 default_square = False):
 28 |     """
 29 |     Function:
 30 |     ----------
 31 |         get reference 5 key points according to crop settings:
 32 |         0. Set default crop_size:
 33 |             if default_square: 
 34 |                 crop_size = (112, 112)
 35 |             else: 
 36 |                 crop_size = (96, 112)
 37 |         1. Pad the crop_size by inner_padding_factor in each side;
 38 |         2. Resize crop_size into (output_size - outer_padding*2),
 39 |             pad into output_size with outer_padding;
 40 |         3. Output reference_5point;
 41 |     Parameters:
 42 |     ----------
 43 |         @output_size: (w, h) or None
 44 |             size of aligned face image
 45 |         @inner_padding_factor: (w_factor, h_factor)
 46 |             padding factor for inner (w, h)
 47 |         @outer_padding: (w_pad, h_pad)
 48 |             each row is a pair of coordinates (x, y)
 49 |         @default_square: True or False
 50 |             if True:
 51 |                 default crop_size = (112, 112)
 52 |             else:
 53 |                 default crop_size = (96, 112);
 54 |         !!! make sure, if output_size is not None:
 55 |                 (output_size - outer_padding) 
 56 |                 = some_scale * (default crop_size * (1.0 + inner_padding_factor))
 57 |     Returns:
 58 |     ----------
 59 |         @reference_5point: 5x2 np.array
 60 |             each row is a pair of transformed coordinates (x, y)
 61 |     """
 62 |     #print('\n===> get_reference_facial_points():')
 63 | 
 64 |     #print('---> Params:')
 65 |     #print('            output_size: ', output_size)
 66 |     #print('            inner_padding_factor: ', inner_padding_factor)
 67 |     #print('            outer_padding:', outer_padding)
 68 |     #print('            default_square: ', default_square)
 69 | 
 70 |     tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
 71 |     tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
 72 | 
 73 |     # 0) make the inner region a square
 74 |     if default_square:
 75 |         size_diff = max(tmp_crop_size) - tmp_crop_size
 76 |         tmp_5pts += size_diff / 2
 77 |         tmp_crop_size += size_diff
 78 | 
 79 |     #print('---> default:')
 80 |     #print('              crop_size = ', tmp_crop_size)
 81 |     #print('              reference_5pts = ', tmp_5pts)
 82 | 
 83 |     if (output_size and
 84 |             output_size[0] == tmp_crop_size[0] and
 85 |             output_size[1] == tmp_crop_size[1]):
 86 |         #print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size))
 87 |         return tmp_5pts
 88 | 
 89 |     if (inner_padding_factor == 0 and
 90 |             outer_padding == (0, 0)):
 91 |         if output_size is None:
 92 |             #print('No paddings to do: return default reference points')
 93 |             return tmp_5pts
 94 |         else:
 95 |             raise FaceWarpException(
 96 |                 'No paddings to do, output_size must be None or {}'.format(tmp_crop_size))
 97 | 
 98 |     # check output size
 99 |     if not (0 <= inner_padding_factor <= 1.0):
100 |         raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
101 | 
102 |     if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0)
103 |             and output_size is None):
104 |         output_size = tmp_crop_size * \
105 |             (1 + inner_padding_factor * 2).astype(np.int32)
106 |         output_size += np.array(outer_padding)
107 |         #print('              deduced from paddings, output_size = ', output_size)
108 | 
109 |     if not (outer_padding[0] < output_size[0]
110 |             and outer_padding[1] < output_size[1]):
111 |         raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
112 |                                 'and outer_padding[1] < output_size[1])')
113 | 
114 |     # 1) pad the inner region according inner_padding_factor
115 |     #print('---> STEP1: pad the inner region according inner_padding_factor')
116 |     if inner_padding_factor > 0:
117 |         size_diff = tmp_crop_size * inner_padding_factor * 2
118 |         tmp_5pts += size_diff / 2
119 |         tmp_crop_size += np.round(size_diff).astype(np.int32)
120 | 
121 |     #print('              crop_size = ', tmp_crop_size)
122 |     #print('              reference_5pts = ', tmp_5pts)
123 | 
124 |     # 2) resize the padded inner region
125 |     #print('---> STEP2: resize the padded inner region')
126 |     size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
127 |     #print('              crop_size = ', tmp_crop_size)
128 |     #print('              size_bf_outer_pad = ', size_bf_outer_pad)
129 | 
130 |     if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
131 |         raise FaceWarpException('Must have (output_size - outer_padding)'
132 |                                 '= some_scale * (crop_size * (1.0 + inner_padding_factor)')
133 | 
134 |     scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
135 |     #print('              resize scale_factor = ', scale_factor)
136 |     tmp_5pts = tmp_5pts * scale_factor
137 | #    size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
138 | #    tmp_5pts = tmp_5pts + size_diff / 2
139 |     tmp_crop_size = size_bf_outer_pad
140 |     #print('              crop_size = ', tmp_crop_size)
141 |     #print('              reference_5pts = ', tmp_5pts)
142 | 
143 |     # 3) add outer_padding to make output_size
144 |     reference_5point = tmp_5pts + np.array(outer_padding)
145 |     tmp_crop_size = output_size
146 |     #print('---> STEP3: add outer_padding to make output_size')
147 |     #print('              crop_size = ', tmp_crop_size)
148 |     #print('              reference_5pts = ', tmp_5pts)
149 | 
150 |     #print('===> end get_reference_facial_points\n')
151 | 
152 |     return reference_5point
153 | 
154 | 
155 | def get_affine_transform_matrix(src_pts, dst_pts):
156 |     """
157 |     Function:
158 |     ----------
159 |         get affine transform matrix 'tfm' from src_pts to dst_pts
160 |     Parameters:
161 |     ----------
162 |         @src_pts: Kx2 np.array
163 |             source points matrix, each row is a pair of coordinates (x, y)
164 |         @dst_pts: Kx2 np.array
165 |             destination points matrix, each row is a pair of coordinates (x, y)
166 |     Returns:
167 |     ----------
168 |         @tfm: 2x3 np.array
169 |             transform matrix from src_pts to dst_pts
170 |     """
171 | 
172 |     tfm = np.float32([[1, 0, 0], [0, 1, 0]])
173 |     n_pts = src_pts.shape[0]
174 |     ones = np.ones((n_pts, 1), src_pts.dtype)
175 |     src_pts_ = np.hstack([src_pts, ones])
176 |     dst_pts_ = np.hstack([dst_pts, ones])
177 | 
178 | #    #print(('src_pts_:\n' + str(src_pts_))
179 | #    #print(('dst_pts_:\n' + str(dst_pts_))
180 | 
181 |     A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
182 | 
183 | #    #print(('np.linalg.lstsq return A: \n' + str(A))
184 | #    #print(('np.linalg.lstsq return res: \n' + str(res))
185 | #    #print(('np.linalg.lstsq return rank: \n' + str(rank))
186 | #    #print(('np.linalg.lstsq return s: \n' + str(s))
187 | 
188 |     if rank == 3:
189 |         tfm = np.float32([
190 |             [A[0, 0], A[1, 0], A[2, 0]],
191 |             [A[0, 1], A[1, 1], A[2, 1]]
192 |         ])
193 |     elif rank == 2:
194 |         tfm = np.float32([
195 |             [A[0, 0], A[1, 0], 0],
196 |             [A[0, 1], A[1, 1], 0]
197 |         ])
198 | 
199 |     return tfm
200 | 
201 | 
202 | def warp_and_crop_face(src_img,
203 |                        facial_pts,
204 |                        reference_pts = None,
205 |                        crop_size=(96, 112),
206 |                        align_type = 'smilarity'):
207 |     """
208 |     Function:
209 |     ----------
210 |         apply affine transform 'trans' to uv
211 |     Parameters:
212 |     ----------
213 |         @src_img: 3x3 np.array
214 |             input image
215 |         @facial_pts: could be
216 |             1)a list of K coordinates (x,y)
217 |         or
218 |             2) Kx2 or 2xK np.array
219 |             each row or col is a pair of coordinates (x, y)
220 |         @reference_pts: could be
221 |             1) a list of K coordinates (x,y)
222 |         or
223 |             2) Kx2 or 2xK np.array
224 |             each row or col is a pair of coordinates (x, y)
225 |         or
226 |             3) None
227 |             if None, use default reference facial points
228 |         @crop_size: (w, h)
229 |             output face image size
230 |         @align_type: transform type, could be one of
231 |             1) 'similarity': use similarity transform
232 |             2) 'cv2_affine': use the first 3 points to do affine transform,
233 |                     by calling cv2.getAffineTransform()
234 |             3) 'affine': use all points to do affine transform
235 |     Returns:
236 |     ----------
237 |         @face_img: output face image with size (w, h) = @crop_size
238 |     """
239 | 
240 |     if reference_pts is None:
241 |         if crop_size[0] == 96 and crop_size[1] == 112:
242 |             reference_pts = REFERENCE_FACIAL_POINTS
243 |         else:
244 |             default_square = False
245 |             inner_padding_factor = 0
246 |             outer_padding = (0, 0)
247 |             output_size = crop_size
248 | 
249 |             reference_pts = get_reference_facial_points(output_size,
250 |                                                         inner_padding_factor,
251 |                                                         outer_padding,
252 |                                                         default_square)
253 | 
254 |     ref_pts = np.float32(reference_pts)
255 |     ref_pts_shp = ref_pts.shape
256 |     if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
257 |         raise FaceWarpException(
258 |             'reference_pts.shape must be (K,2) or (2,K) and K>2')
259 | 
260 |     if ref_pts_shp[0] == 2:
261 |         ref_pts = ref_pts.T
262 | 
263 |     src_pts = np.float32(facial_pts)
264 |     src_pts_shp = src_pts.shape
265 |     if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
266 |         raise FaceWarpException(
267 |             'facial_pts.shape must be (K,2) or (2,K) and K>2')
268 | 
269 |     if src_pts_shp[0] == 2:
270 |         src_pts = src_pts.T
271 | 
272 | #    #print('--->src_pts:\n', src_pts
273 | #    #print('--->ref_pts\n', ref_pts
274 | 
275 |     if src_pts.shape != ref_pts.shape:
276 |         raise FaceWarpException(
277 |             'facial_pts and reference_pts must have the same shape')
278 | 
279 |     if align_type is 'cv2_affine':
280 |         tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
281 | #        #print(('cv2.getAffineTransform() returns tfm=\n' + str(tfm))
282 |     elif align_type is 'affine':
283 |         tfm = get_affine_transform_matrix(src_pts, ref_pts)
284 | #        #print(('get_affine_transform_matrix() returns tfm=\n' + str(tfm))
285 |     else:
286 |         tfm = get_similarity_transform_for_cv2(src_pts, ref_pts)
287 | #        #print(('get_similarity_transform_for_cv2() returns tfm=\n' + str(tfm))
288 | 
289 | #    #print('--->Transform matrix: '
290 | #    #print(('type(tfm):' + str(type(tfm)))
291 | #    #print(('tfm.dtype:' + str(tfm.dtype))
292 | #    #print( tfm
293 | 
294 |     face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))
295 | 
296 |     return face_img


--------------------------------------------------------------------------------
/align/box_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from PIL import Image
  3 | 
  4 | 
  5 | def nms(boxes, overlap_threshold = 0.5, mode = 'union'):
  6 |     """Non-maximum suppression.
  7 | 
  8 |     Arguments:
  9 |         boxes: a float numpy array of shape [n, 5],
 10 |             where each row is (xmin, ymin, xmax, ymax, score).
 11 |         overlap_threshold: a float number.
 12 |         mode: 'union' or 'min'.
 13 | 
 14 |     Returns:
 15 |         list with indices of the selected boxes
 16 |     """
 17 | 
 18 |     # if there are no boxes, return the empty list
 19 |     if len(boxes) == 0:
 20 |         return []
 21 | 
 22 |     # list of picked indices
 23 |     pick = []
 24 | 
 25 |     # grab the coordinates of the bounding boxes
 26 |     x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
 27 | 
 28 |     area = (x2 - x1 + 1.0)*(y2 - y1 + 1.0)
 29 |     ids = np.argsort(score)  # in increasing order
 30 | 
 31 |     while len(ids) > 0:
 32 | 
 33 |         # grab index of the largest value
 34 |         last = len(ids) - 1
 35 |         i = ids[last]
 36 |         pick.append(i)
 37 | 
 38 |         # compute intersections
 39 |         # of the box with the largest score
 40 |         # with the rest of boxes
 41 | 
 42 |         # left top corner of intersection boxes
 43 |         ix1 = np.maximum(x1[i], x1[ids[:last]])
 44 |         iy1 = np.maximum(y1[i], y1[ids[:last]])
 45 | 
 46 |         # right bottom corner of intersection boxes
 47 |         ix2 = np.minimum(x2[i], x2[ids[:last]])
 48 |         iy2 = np.minimum(y2[i], y2[ids[:last]])
 49 | 
 50 |         # width and height of intersection boxes
 51 |         w = np.maximum(0.0, ix2 - ix1 + 1.0)
 52 |         h = np.maximum(0.0, iy2 - iy1 + 1.0)
 53 | 
 54 |         # intersections' areas
 55 |         inter = w * h
 56 |         if mode == 'min':
 57 |             overlap = inter/np.minimum(area[i], area[ids[:last]])
 58 |         elif mode == 'union':
 59 |             # intersection over union (IoU)
 60 |             overlap = inter/(area[i] + area[ids[:last]] - inter)
 61 | 
 62 |         # delete all boxes where overlap is too big
 63 |         ids = np.delete(
 64 |             ids,
 65 |             np.concatenate([[last], np.where(overlap > overlap_threshold)[0]])
 66 |         )
 67 | 
 68 |     return pick
 69 | 
 70 | 
 71 | def convert_to_square(bboxes):
 72 |     """Convert bounding boxes to a square form.
 73 | 
 74 |     Arguments:
 75 |         bboxes: a float numpy array of shape [n, 5].
 76 | 
 77 |     Returns:
 78 |         a float numpy array of shape [n, 5],
 79 |             squared bounding boxes.
 80 |     """
 81 | 
 82 |     square_bboxes = np.zeros_like(bboxes)
 83 |     x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
 84 |     h = y2 - y1 + 1.0
 85 |     w = x2 - x1 + 1.0
 86 |     max_side = np.maximum(h, w)
 87 |     square_bboxes[:, 0] = x1 + w*0.5 - max_side*0.5
 88 |     square_bboxes[:, 1] = y1 + h*0.5 - max_side*0.5
 89 |     square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0
 90 |     square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0
 91 |     return square_bboxes
 92 | 
 93 | 
 94 | def calibrate_box(bboxes, offsets):
 95 |     """Transform bounding boxes to be more like true bounding boxes.
 96 |     'offsets' is one of the outputs of the nets.
 97 | 
 98 |     Arguments:
 99 |         bboxes: a float numpy array of shape [n, 5].
100 |         offsets: a float numpy array of shape [n, 4].
101 | 
102 |     Returns:
103 |         a float numpy array of shape [n, 5].
104 |     """
105 |     x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
106 |     w = x2 - x1 + 1.0
107 |     h = y2 - y1 + 1.0
108 |     w = np.expand_dims(w, 1)
109 |     h = np.expand_dims(h, 1)
110 | 
111 |     # this is what happening here:
112 |     # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
113 |     # x1_true = x1 + tx1*w
114 |     # y1_true = y1 + ty1*h
115 |     # x2_true = x2 + tx2*w
116 |     # y2_true = y2 + ty2*h
117 |     # below is just more compact form of this
118 | 
119 |     # are offsets always such that
120 |     # x1 < x2 and y1 < y2 ?
121 | 
122 |     translation = np.hstack([w, h, w, h])*offsets
123 |     bboxes[:, 0:4] = bboxes[:, 0:4] + translation
124 |     return bboxes
125 | 
126 | 
127 | def get_image_boxes(bounding_boxes, img, size = 24):
128 |     """Cut out boxes from the image.
129 | 
130 |     Arguments:
131 |         bounding_boxes: a float numpy array of shape [n, 5].
132 |         img: an instance of PIL.Image.
133 |         size: an integer, size of cutouts.
134 | 
135 |     Returns:
136 |         a float numpy array of shape [n, 3, size, size].
137 |     """
138 | 
139 |     num_boxes = len(bounding_boxes)
140 |     width, height = img.size
141 | 
142 |     [dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bounding_boxes, width, height)
143 |     img_boxes = np.zeros((num_boxes, 3, size, size), 'float32')
144 | 
145 |     for i in range(num_boxes):
146 |         img_box = np.zeros((h[i], w[i], 3), 'uint8')
147 | 
148 |         img_array = np.asarray(img, 'uint8')
149 |         img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\
150 |             img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :]
151 | 
152 |         # resize
153 |         img_box = Image.fromarray(img_box)
154 |         img_box = img_box.resize((size, size), Image.BILINEAR)
155 |         img_box = np.asarray(img_box, 'float32')
156 | 
157 |         img_boxes[i, :, :, :] = _preprocess(img_box)
158 | 
159 |     return img_boxes
160 | 
161 | 
162 | def correct_bboxes(bboxes, width, height):
163 |     """Crop boxes that are too big and get coordinates
164 |     with respect to cutouts.
165 | 
166 |     Arguments:
167 |         bboxes: a float numpy array of shape [n, 5],
168 |             where each row is (xmin, ymin, xmax, ymax, score).
169 |         width: a float number.
170 |         height: a float number.
171 | 
172 |     Returns:
173 |         dy, dx, edy, edx: a int numpy arrays of shape [n],
174 |             coordinates of the boxes with respect to the cutouts.
175 |         y, x, ey, ex: a int numpy arrays of shape [n],
176 |             corrected ymin, xmin, ymax, xmax.
177 |         h, w: a int numpy arrays of shape [n],
178 |             just heights and widths of boxes.
179 | 
180 |         in the following order:
181 |             [dy, edy, dx, edx, y, ey, x, ex, w, h].
182 |     """
183 | 
184 |     x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
185 |     w, h = x2 - x1 + 1.0,  y2 - y1 + 1.0
186 |     num_boxes = bboxes.shape[0]
187 | 
188 |     # 'e' stands for end
189 |     # (x, y) -> (ex, ey)
190 |     x, y, ex, ey = x1, y1, x2, y2
191 | 
192 |     # we need to cut out a box from the image.
193 |     # (x, y, ex, ey) are corrected coordinates of the box
194 |     # in the image.
195 |     # (dx, dy, edx, edy) are coordinates of the box in the cutout
196 |     # from the image.
197 |     dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,))
198 |     edx, edy = w.copy() - 1.0, h.copy() - 1.0
199 | 
200 |     # if box's bottom right corner is too far right
201 |     ind = np.where(ex > width - 1.0)[0]
202 |     edx[ind] = w[ind] + width - 2.0 - ex[ind]
203 |     ex[ind] = width - 1.0
204 | 
205 |     # if box's bottom right corner is too low
206 |     ind = np.where(ey > height - 1.0)[0]
207 |     edy[ind] = h[ind] + height - 2.0 - ey[ind]
208 |     ey[ind] = height - 1.0
209 | 
210 |     # if box's top left corner is too far left
211 |     ind = np.where(x < 0.0)[0]
212 |     dx[ind] = 0.0 - x[ind]
213 |     x[ind] = 0.0
214 | 
215 |     # if box's top left corner is too high
216 |     ind = np.where(y < 0.0)[0]
217 |     dy[ind] = 0.0 - y[ind]
218 |     y[ind] = 0.0
219 | 
220 |     return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h]
221 |     return_list = [i.astype('int32') for i in return_list]
222 | 
223 |     return return_list
224 | 
225 | 
226 | def _preprocess(img):
227 |     """Preprocessing step before feeding the network.
228 | 
229 |     Arguments:
230 |         img: a float numpy array of shape [h, w, c].
231 | 
232 |     Returns:
233 |         a float numpy array of shape [1, c, h, w].
234 |     """
235 |     img = img.transpose((2, 0, 1))
236 |     img = np.expand_dims(img, 0)
237 |     img = (img - 127.5) * 0.0078125
238 |     return img
239 | 


--------------------------------------------------------------------------------
/align/detector.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch.autograd import Variable
  4 | from .get_nets import PNet, RNet, ONet
  5 | from .box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
  6 | from .first_stage import run_first_stage
  7 | 
  8 | 
  9 | def detect_faces(image, min_face_size=20.0,
 10 |                  thresholds=[0.6, 0.7, 0.8],
 11 |                  nms_thresholds=[0.7, 0.7, 0.7]):
 12 |     """
 13 |     Arguments:
 14 |         image: an instance of PIL.Image.
 15 |         min_face_size: a float number.
 16 |         thresholds: a list of length 3.
 17 |         nms_thresholds: a list of length 3.
 18 | 
 19 |     Returns:
 20 |         two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
 21 |         bounding boxes and facial landmarks.
 22 |     """
 23 | 
 24 |     # LOAD MODELS
 25 |     pnet = PNet()
 26 |     rnet = RNet()
 27 |     onet = ONet()
 28 |     onet.eval()
 29 | 
 30 |     # BUILD AN IMAGE PYRAMID
 31 |     width, height = image.size
 32 |     min_length = min(height, width)
 33 | 
 34 |     min_detection_size = 12
 35 |     factor = 0.707  # sqrt(0.5)
 36 | 
 37 |     # scales for scaling the image
 38 |     scales = []
 39 | 
 40 |     # scales the image so that
 41 |     # minimum size that we can detect equals to
 42 |     # minimum face size that we want to detect
 43 |     m = min_detection_size/min_face_size
 44 |     min_length *= m
 45 | 
 46 |     factor_count = 0
 47 |     while min_length > min_detection_size:
 48 |         scales.append(m*factor**factor_count)
 49 |         min_length *= factor
 50 |         factor_count += 1
 51 | 
 52 |     # STAGE 1
 53 | 
 54 |     # it will be returned
 55 |     bounding_boxes = []
 56 | 
 57 |     # run P-Net on different scales
 58 |     for s in scales:
 59 |         boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0])
 60 |         bounding_boxes.append(boxes)
 61 | 
 62 |     # collect boxes (and offsets, and scores) from different scales
 63 |     bounding_boxes = [i for i in bounding_boxes if i is not None]
 64 |     bounding_boxes = np.vstack(bounding_boxes)
 65 | 
 66 |     keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
 67 |     bounding_boxes = bounding_boxes[keep]
 68 | 
 69 |     # use offsets predicted by pnet to transform bounding boxes
 70 |     bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
 71 |     # shape [n_boxes, 5]
 72 | 
 73 |     bounding_boxes = convert_to_square(bounding_boxes)
 74 |     bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
 75 | 
 76 |     # STAGE 2
 77 |     with torch.no_grad():
 78 |         img_boxes = get_image_boxes(bounding_boxes, image, size = 24)
 79 |         img_boxes = torch.FloatTensor(img_boxes)
 80 |         output = rnet(img_boxes)
 81 |         offsets = output[0].data.numpy()  # shape [n_boxes, 4]
 82 |         probs = output[1].data.numpy()  # shape [n_boxes, 2]
 83 | 
 84 |         keep = np.where(probs[:, 1] > thresholds[1])[0]
 85 |         bounding_boxes = bounding_boxes[keep]
 86 |         bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
 87 |         offsets = offsets[keep]
 88 | 
 89 |         keep = nms(bounding_boxes, nms_thresholds[1])
 90 |         bounding_boxes = bounding_boxes[keep]
 91 |         bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
 92 |         bounding_boxes = convert_to_square(bounding_boxes)
 93 |         bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
 94 | 
 95 |     # STAGE 3
 96 |     with torch.no_grad():
 97 |         img_boxes = get_image_boxes(bounding_boxes, image, size=48)
 98 |         if len(img_boxes) == 0:
 99 |             return [], []
100 |         img_boxes = torch.FloatTensor(img_boxes)
101 |         output = onet(img_boxes)
102 |         landmarks = output[0].data.numpy()  # shape [n_boxes, 10]
103 |         offsets = output[1].data.numpy()  # shape [n_boxes, 4]
104 |         probs = output[2].data.numpy()  # shape [n_boxes, 2]
105 | 
106 |         keep = np.where(probs[:, 1] > thresholds[2])[0]
107 |         bounding_boxes = bounding_boxes[keep]
108 |         bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
109 |         offsets = offsets[keep]
110 |         landmarks = landmarks[keep]
111 | 
112 |         # compute landmark points
113 |         width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
114 |         height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
115 |         xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
116 |         landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1)*landmarks[:, 0:5]
117 |         landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1)*landmarks[:, 5:10]
118 | 
119 |         bounding_boxes = calibrate_box(bounding_boxes, offsets)
120 |         keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
121 |         bounding_boxes = bounding_boxes[keep]
122 |         landmarks = landmarks[keep]
123 | 
124 |     return bounding_boxes, landmarks
125 | 
126 | 


--------------------------------------------------------------------------------
/align/face_align.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | from .detector import detect_faces
 3 | from .align_trans import get_reference_facial_points, warp_and_crop_face
 4 | import numpy as np
 5 | import os
 6 | from tqdm import tqdm
 7 | 
 8 | 
 9 | def align(image_or_folder, crop_size=112, save_path=None, vis=True):  # 传入已读的图像
10 |     scale = crop_size / 112.
11 |     reference = get_reference_facial_points(default_square=True) * scale
12 | 
13 |     if save_path is not None and not os.path.isdir(save_path) :
14 |         os.makedirs(save_path)
15 | 
16 |     if os.path.isdir(image_or_folder):
17 |         for subfolder in tqdm(os.listdir(image_or_folder)):
18 |             if not os.path.isdir(os.path.join(save_path, subfolder)):
19 |                 os.makedirs(os.path.join(save_path, subfolder))
20 |             for image_name in os.listdir(os.path.join(image_or_folder, subfolder)):
21 |                 print("Processing\t{}".format(os.path.join(image_or_folder, subfolder, image_name)))
22 |                 img = Image.open(os.path.join(image_or_folder, subfolder, image_name))
23 |                 try: # Handle exception
24 |                     _, landmarks = detect_faces(img)
25 |                 except Exception:
26 |                     print("{} is discarded due to exception!".format(os.path.join(image_or_folder, subfolder, image_name)))
27 |                     continue
28 |                 if len(landmarks) == 0:  # If the landmarks cannot be detected, the img will be discarded
29 |                     print("{} is discarded due to non-detected landmarks!".format(os.path.join(image_or_folder, subfolder, image_name)))
30 |                     continue
31 |                 facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]
32 |                 warped_face = warp_and_crop_face(np.array(img), facial5points, reference, crop_size=(crop_size, crop_size))
33 |                 img_warped = Image.fromarray(warped_face)
34 |                 if image_name.split('.')[-1].lower() not in ['jpg', 'jpeg']:  # not from jpg
35 |                     image_name = '.'.join(image_name.split('.')[:-1]) + '.jpg'
36 |                 img_warped.save(os.path.join(save_path, subfolder, image_name))
37 |     else:
38 |         img = Image.open(image_or_folder)
39 |         try:  # Handle exception
40 |             _, landmarks = detect_faces(img)
41 |         except Exception:
42 |             print("{} is discarded due to exception!".format(image_or_folder))
43 |         if len(landmarks) == 0:  # If the landmarks cannot be detected, the img will be discarded
44 |             print("{} is discarded due to non-detected landmarks!".format(image_or_folder))
45 |         facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]
46 |         warped_face = warp_and_crop_face(np.array(img), facial5points, reference, crop_size=(crop_size, crop_size))
47 |         img_warped = Image.fromarray(warped_face)
48 |         if vis:
49 |             img_warped.show()
50 |         if save_path is not None:
51 |             if image_or_folder.split('.')[-1].lower() not in ['jpg', 'jpeg']:  # not from jpg
52 |                 image_or_folder = '.'.join(os.path.basename(image_or_folder).split('.')[:-1]) + '.jpg'
53 |             else:
54 |                 image_or_folder = os.path.basename(image_or_folder)
55 | 
56 |             img_warped.save(os.path.join(save_path, image_or_folder))
57 |         return img_warped
58 | 


--------------------------------------------------------------------------------
/align/face_resize.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | from tqdm import tqdm
 4 | 
 5 | 
 6 | def mkdir(path):
 7 |     if not os.path.exists(path):
 8 |         os.mkdir(path)
 9 | 
10 | 
11 | def process_image(img):
12 | 
13 |     size = img.shape
14 |     h, w = size[0], size[1]
15 |     scale = max(w, h) / float(min_side)
16 |     new_w, new_h = int(w / scale), int(h / scale)
17 |     resize_img = cv2.resize(img, (new_w, new_h))
18 |     if new_w % 2 != 0 and new_h % 2 == 0:
19 |         top, bottom, left, right = (min_side - new_h) / 2, (min_side - new_h) / 2, (min_side - new_w) / 2 + 1, (
20 |                     min_side - new_w) / 2
21 |     elif new_h % 2 != 0 and new_w % 2 == 0:
22 |         top, bottom, left, right = (min_side - new_h) / 2 + 1, (min_side - new_h) / 2, (min_side - new_w) / 2, (
23 |                     min_side - new_w) / 2
24 |     elif new_h % 2 == 0 and new_w % 2 == 0:
25 |         top, bottom, left, right = (min_side - new_h) / 2, (min_side - new_h) / 2, (min_side - new_w) / 2, (
26 |                     min_side - new_w) / 2
27 |     else:
28 |         top, bottom, left, right = (min_side - new_h) / 2 + 1, (min_side - new_h) / 2, (min_side - new_w) / 2 + 1, (
29 |                     min_side - new_w) / 2
30 |     pad_img = cv2.copyMakeBorder(resize_img, top, bottom, left, right, cv2.BORDER_CONSTANT,
31 |                                  value=[0, 0, 0])
32 | 
33 |     return pad_img
34 | 
35 | 
36 | def main(source_root):
37 | 
38 |     dest_root = "/media/pc/6T/jasonjzhao/data/MS-Celeb-1M_Resized"
39 |     mkdir(dest_root)
40 |     cwd = os.getcwd()  # delete '.DS_Store' existed in the source_root
41 |     os.chdir(source_root)
42 |     os.system("find . -name '*.DS_Store' -type f -delete")
43 |     os.chdir(cwd)
44 | 
45 |     if not os.path.isdir(dest_root):
46 |         os.mkdir(dest_root)
47 | 
48 |     for subfolder in tqdm(os.listdir(source_root)):
49 |         if not os.path.isdir(os.path.join(dest_root, subfolder)):
50 |             os.mkdir(os.path.join(dest_root, subfolder))
51 |         for image_name in os.listdir(os.path.join(source_root, subfolder)):
52 |             print("Processing\t{}".format(os.path.join(source_root, subfolder, image_name)))
53 |             img = cv2.imread(os.path.join(source_root, subfolder, image_name))
54 |             if type(img) == type(None):
55 |                 print("damaged image %s, del it" % (img))
56 |                 os.remove(img)
57 |                 continue
58 |             size = img.shape
59 |             h, w = size[0], size[1]
60 |             if max(w, h) > 512:
61 |                 img_pad = process_image(img)
62 |             else:
63 |                 img_pad = img
64 |             cv2.imwrite(os.path.join(dest_root, subfolder, image_name.split('.')[0] + '.jpg'), img_pad)
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     min_side = 512
69 |     main(source_root = "/media/pc/6T/jasonjzhao/data/MS-Celeb-1M/database/base")


--------------------------------------------------------------------------------
/align/first_stage.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | import math
 4 | from PIL import Image
 5 | import numpy as np
 6 | from .box_utils import nms, _preprocess
 7 | 
 8 | 
 9 | def run_first_stage(image, net, scale, threshold):
10 |     """Run P-Net, generate bounding boxes, and do NMS.
11 | 
12 |     Arguments:
13 |         image: an instance of PIL.Image.
14 |         net: an instance of pytorch's nn.Module, P-Net.
15 |         scale: a float number,
16 |             scale width and height of the image by this number.
17 |         threshold: a float number,
18 |             threshold on the probability of a face when generating
19 |             bounding boxes from predictions of the net.
20 | 
21 |     Returns:
22 |         a float numpy array of shape [n_boxes, 9],
23 |             bounding boxes with scores and offsets (4 + 1 + 4).
24 |     """
25 | 
26 |     # scale the image and convert it to a float array
27 |     width, height = image.size
28 |     sw, sh = math.ceil(width*scale), math.ceil(height*scale)
29 |     img = image.resize((sw, sh), Image.BILINEAR)
30 |     img = np.asarray(img, 'float32')
31 |     with torch.no_grad():
32 |         img = torch.FloatTensor(_preprocess(img))
33 |         output = net(img)
34 |         probs = output[1].data.numpy()[0, 1, :, :]
35 |         offsets = output[0].data.numpy()
36 |         # probs: probability of a face at each sliding window
37 |         # offsets: transformations to true bounding boxes
38 | 
39 |         boxes = _generate_bboxes(probs, offsets, scale, threshold)
40 |         if len(boxes) == 0:
41 |             return None
42 | 
43 |         keep = nms(boxes[:, 0:5], overlap_threshold = 0.5)
44 |         return boxes[keep]
45 | 
46 | 
47 | def _generate_bboxes(probs, offsets, scale, threshold):
48 |     """Generate bounding boxes at places
49 |     where there is probably a face.
50 | 
51 |     Arguments:
52 |         probs: a float numpy array of shape [n, m].
53 |         offsets: a float numpy array of shape [1, 4, n, m].
54 |         scale: a float number,
55 |             width and height of the image were scaled by this number.
56 |         threshold: a float number.
57 | 
58 |     Returns:
59 |         a float numpy array of shape [n_boxes, 9]
60 |     """
61 | 
62 |     # applying P-Net is equivalent, in some sense, to
63 |     # moving 12x12 window with stride 2
64 |     stride = 2
65 |     cell_size = 12
66 | 
67 |     # indices of boxes where there is probably a face
68 |     inds = np.where(probs > threshold)
69 | 
70 |     if inds[0].size == 0:
71 |         return np.array([])
72 | 
73 |     # transformations of bounding boxes
74 |     tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)]
75 |     # they are defined as:
76 |     # w = x2 - x1 + 1
77 |     # h = y2 - y1 + 1
78 |     # x1_true = x1 + tx1*w
79 |     # x2_true = x2 + tx2*w
80 |     # y1_true = y1 + ty1*h
81 |     # y2_true = y2 + ty2*h
82 | 
83 |     offsets = np.array([tx1, ty1, tx2, ty2])
84 |     score = probs[inds[0], inds[1]]
85 | 
86 |     # P-Net is applied to scaled images
87 |     # so we need to rescale bounding boxes back
88 |     bounding_boxes = np.vstack([
89 |         np.round((stride*inds[1] + 1.0)/scale),
90 |         np.round((stride*inds[0] + 1.0)/scale),
91 |         np.round((stride*inds[1] + 1.0 + cell_size)/scale),
92 |         np.round((stride*inds[0] + 1.0 + cell_size)/scale),
93 |         score, offsets
94 |     ])
95 |     # why one is added?
96 | 
97 |     return bounding_boxes.T


--------------------------------------------------------------------------------
/align/get_nets.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from collections import OrderedDict
  5 | import numpy as np
  6 | 
  7 | 
  8 | class Flatten(nn.Module):
  9 | 
 10 |     def __init__(self):
 11 |         super(Flatten, self).__init__()
 12 | 
 13 |     def forward(self, x):
 14 |         """
 15 |         Arguments:
 16 |             x: a float tensor with shape [batch_size, c, h, w].
 17 |         Returns:
 18 |             a float tensor with shape [batch_size, c*h*w].
 19 |         """
 20 | 
 21 |         # without this pretrained model isn't working
 22 |         x = x.transpose(3, 2).contiguous()
 23 | 
 24 |         return x.view(x.size(0), -1)
 25 | 
 26 | 
 27 | class PNet(nn.Module):
 28 | 
 29 |     def __init__(self):
 30 | 
 31 |         super(PNet, self).__init__()
 32 | 
 33 |         # suppose we have input with size HxW, then
 34 |         # after first layer: H - 2,
 35 |         # after pool: ceil((H - 2)/2),
 36 |         # after second conv: ceil((H - 2)/2) - 2,
 37 |         # after last conv: ceil((H - 2)/2) - 4,
 38 |         # and the same for W
 39 | 
 40 |         self.features = nn.Sequential(OrderedDict([
 41 |             ('conv1', nn.Conv2d(3, 10, 3, 1)),
 42 |             ('prelu1', nn.PReLU(10)),
 43 |             ('pool1', nn.MaxPool2d(2, 2, ceil_mode = True)),
 44 | 
 45 |             ('conv2', nn.Conv2d(10, 16, 3, 1)),
 46 |             ('prelu2', nn.PReLU(16)),
 47 | 
 48 |             ('conv3', nn.Conv2d(16, 32, 3, 1)),
 49 |             ('prelu3', nn.PReLU(32))
 50 |         ]))
 51 | 
 52 |         self.conv4_1 = nn.Conv2d(32, 2, 1, 1)
 53 |         self.conv4_2 = nn.Conv2d(32, 4, 1, 1)
 54 | 
 55 |         weights = np.load("./align/pnet.npy", allow_pickle=True)[()]
 56 |         for n, p in self.named_parameters():
 57 |             p.data = torch.FloatTensor(weights[n])
 58 | 
 59 |     def forward(self, x):
 60 |         """
 61 |         Arguments:
 62 |             x: a float tensor with shape [batch_size, 3, h, w].
 63 |         Returns:
 64 |             b: a float tensor with shape [batch_size, 4, h', w'].
 65 |             a: a float tensor with shape [batch_size, 2, h', w'].
 66 |         """
 67 |         x = self.features(x)
 68 |         a = self.conv4_1(x)
 69 |         b = self.conv4_2(x)
 70 |         a = F.softmax(a, dim=1)
 71 |         return b, a
 72 | 
 73 | 
 74 | class RNet(nn.Module):
 75 | 
 76 |     def __init__(self):
 77 | 
 78 |         super(RNet, self).__init__()
 79 | 
 80 |         self.features = nn.Sequential(OrderedDict([
 81 |             ('conv1', nn.Conv2d(3, 28, 3, 1)),
 82 |             ('prelu1', nn.PReLU(28)),
 83 |             ('pool1', nn.MaxPool2d(3, 2, ceil_mode = True)),
 84 | 
 85 |             ('conv2', nn.Conv2d(28, 48, 3, 1)),
 86 |             ('prelu2', nn.PReLU(48)),
 87 |             ('pool2', nn.MaxPool2d(3, 2, ceil_mode = True)),
 88 | 
 89 |             ('conv3', nn.Conv2d(48, 64, 2, 1)),
 90 |             ('prelu3', nn.PReLU(64)),
 91 | 
 92 |             ('flatten', Flatten()),
 93 |             ('conv4', nn.Linear(576, 128)),
 94 |             ('prelu4', nn.PReLU(128))
 95 |         ]))
 96 | 
 97 |         self.conv5_1 = nn.Linear(128, 2)
 98 |         self.conv5_2 = nn.Linear(128, 4)
 99 | 
100 |         weights = np.load("./align/rnet.npy", allow_pickle=True)[()]
101 |         for n, p in self.named_parameters():
102 |             p.data = torch.FloatTensor(weights[n])
103 | 
104 |     def forward(self, x):
105 |         """
106 |         Arguments:
107 |             x: a float tensor with shape [batch_size, 3, h, w].
108 |         Returns:
109 |             b: a float tensor with shape [batch_size, 4].
110 |             a: a float tensor with shape [batch_size, 2].
111 |         """
112 |         x = self.features(x)
113 |         a = self.conv5_1(x)
114 |         b = self.conv5_2(x)
115 |         a = F.softmax(a, dim=1)
116 |         return b, a
117 | 
118 | 
119 | class ONet(nn.Module):
120 | 
121 |     def __init__(self):
122 | 
123 |         super(ONet, self).__init__()
124 | 
125 |         self.features = nn.Sequential(OrderedDict([
126 |             ('conv1', nn.Conv2d(3, 32, 3, 1)),
127 |             ('prelu1', nn.PReLU(32)),
128 |             ('pool1', nn.MaxPool2d(3, 2, ceil_mode = True)),
129 | 
130 |             ('conv2', nn.Conv2d(32, 64, 3, 1)),
131 |             ('prelu2', nn.PReLU(64)),
132 |             ('pool2', nn.MaxPool2d(3, 2, ceil_mode = True)),
133 | 
134 |             ('conv3', nn.Conv2d(64, 64, 3, 1)),
135 |             ('prelu3', nn.PReLU(64)),
136 |             ('pool3', nn.MaxPool2d(2, 2, ceil_mode = True)),
137 | 
138 |             ('conv4', nn.Conv2d(64, 128, 2, 1)),
139 |             ('prelu4', nn.PReLU(128)),
140 | 
141 |             ('flatten', Flatten()),
142 |             ('conv5', nn.Linear(1152, 256)),
143 |             ('drop5', nn.Dropout(0.25)),
144 |             ('prelu5', nn.PReLU(256)),
145 |         ]))
146 | 
147 |         self.conv6_1 = nn.Linear(256, 2)
148 |         self.conv6_2 = nn.Linear(256, 4)
149 |         self.conv6_3 = nn.Linear(256, 10)
150 | 
151 |         weights = np.load("./align/onet.npy", allow_pickle=True)[()]
152 |         for n, p in self.named_parameters():
153 |             p.data = torch.FloatTensor(weights[n])
154 | 
155 |     def forward(self, x):
156 |         """
157 |         Arguments:
158 |             x: a float tensor with shape [batch_size, 3, h, w].
159 |         Returns:
160 |             c: a float tensor with shape [batch_size, 10].
161 |             b: a float tensor with shape [batch_size, 4].
162 |             a: a float tensor with shape [batch_size, 2].
163 |         """
164 |         x = self.features(x)
165 |         a = self.conv6_1(x)
166 |         b = self.conv6_2(x)
167 |         c = self.conv6_3(x)
168 |         a = F.softmax(a, dim=1)
169 |         return c, b, a


--------------------------------------------------------------------------------
/align/matlab_cp2tform.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy.linalg import inv, norm, lstsq
  3 | from numpy.linalg import matrix_rank as rank
  4 | 
  5 | 
  6 | class MatlabCp2tormException(Exception):
  7 |     def __str__(self):
  8 |         return "In File {}:{}".format(
  9 |                 __file__, super.__str__(self))
 10 | 
 11 | def tformfwd(trans, uv):
 12 |     """
 13 |     Function:
 14 |     ----------
 15 |         apply affine transform 'trans' to uv
 16 | 
 17 |     Parameters:
 18 |     ----------
 19 |         @trans: 3x3 np.array
 20 |             transform matrix
 21 |         @uv: Kx2 np.array
 22 |             each row is a pair of coordinates (x, y)
 23 | 
 24 |     Returns:
 25 |     ----------
 26 |         @xy: Kx2 np.array
 27 |             each row is a pair of transformed coordinates (x, y)
 28 |     """
 29 |     uv = np.hstack((
 30 |         uv, np.ones((uv.shape[0], 1))
 31 |     ))
 32 |     xy = np.dot(uv, trans)
 33 |     xy = xy[:, 0:-1]
 34 |     return xy
 35 | 
 36 | 
 37 | def tforminv(trans, uv):
 38 |     """
 39 |     Function:
 40 |     ----------
 41 |         apply the inverse of affine transform 'trans' to uv
 42 | 
 43 |     Parameters:
 44 |     ----------
 45 |         @trans: 3x3 np.array
 46 |             transform matrix
 47 |         @uv: Kx2 np.array
 48 |             each row is a pair of coordinates (x, y)
 49 | 
 50 |     Returns:
 51 |     ----------
 52 |         @xy: Kx2 np.array
 53 |             each row is a pair of inverse-transformed coordinates (x, y)
 54 |     """
 55 |     Tinv = inv(trans)
 56 |     xy = tformfwd(Tinv, uv)
 57 |     return xy
 58 | 
 59 | 
 60 | def findNonreflectiveSimilarity(uv, xy, options=None):
 61 | 
 62 |     options = {'K': 2}
 63 | 
 64 |     K = options['K']
 65 |     M = xy.shape[0]
 66 |     x = xy[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
 67 |     y = xy[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
 68 |     # print('--->x, y:\n', x, y
 69 | 
 70 |     tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
 71 |     tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
 72 |     X = np.vstack((tmp1, tmp2))
 73 |     # print('--->X.shape: ', X.shape
 74 |     # print('X:\n', X
 75 | 
 76 |     u = uv[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
 77 |     v = uv[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
 78 |     U = np.vstack((u, v))
 79 |     # print('--->U.shape: ', U.shape
 80 |     # print('U:\n', U
 81 | 
 82 |     # We know that X * r = U
 83 |     if rank(X) >= 2 * K:
 84 |         r, _, _, _ = lstsq(X, U)
 85 |         r = np.squeeze(r)
 86 |     else:
 87 |         raise Exception("cp2tform: two Unique Points Req")
 88 | 
 89 |     # print('--->r:\n', r
 90 | 
 91 |     sc = r[0]
 92 |     ss = r[1]
 93 |     tx = r[2]
 94 |     ty = r[3]
 95 | 
 96 |     Tinv = np.array([
 97 |         [sc, -ss, 0],
 98 |         [ss,  sc, 0],
 99 |         [tx,  ty, 1]
100 |     ])
101 | 
102 |     # print('--->Tinv:\n', Tinv
103 | 
104 |     T = inv(Tinv)
105 |     # print('--->T:\n', T
106 | 
107 |     T[:, 2] = np.array([0, 0, 1])
108 | 
109 |     return T, Tinv
110 | 
111 | 
112 | def findSimilarity(uv, xy, options=None):
113 | 
114 |     options = {'K': 2}
115 | 
116 | #    uv = np.array(uv)
117 | #    xy = np.array(xy)
118 | 
119 |     # Solve for trans1
120 |     trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
121 | 
122 |     # Solve for trans2
123 | 
124 |     # manually reflect the xy data across the Y-axis
125 |     xyR = xy
126 |     xyR[:, 0] = -1 * xyR[:, 0]
127 | 
128 |     trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
129 | 
130 |     # manually reflect the tform to undo the reflection done on xyR
131 |     TreflectY = np.array([
132 |         [-1, 0, 0],
133 |         [0, 1, 0],
134 |         [0, 0, 1]
135 |     ])
136 | 
137 |     trans2 = np.dot(trans2r, TreflectY)
138 | 
139 |     # Figure out if trans1 or trans2 is better
140 |     xy1 = tformfwd(trans1, uv)
141 |     norm1 = norm(xy1 - xy)
142 | 
143 |     xy2 = tformfwd(trans2, uv)
144 |     norm2 = norm(xy2 - xy)
145 | 
146 |     if norm1 <= norm2:
147 |         return trans1, trans1_inv
148 |     else:
149 |         trans2_inv = inv(trans2)
150 |         return trans2, trans2_inv
151 | 
152 | 
153 | def get_similarity_transform(src_pts, dst_pts, reflective = True):
154 |     """
155 |     Function:
156 |     ----------
157 |         Find Similarity Transform Matrix 'trans':
158 |             u = src_pts[:, 0]
159 |             v = src_pts[:, 1]
160 |             x = dst_pts[:, 0]
161 |             y = dst_pts[:, 1]
162 |             [x, y, 1] = [u, v, 1] * trans
163 | 
164 |     Parameters:
165 |     ----------
166 |         @src_pts: Kx2 np.array
167 |             source points, each row is a pair of coordinates (x, y)
168 |         @dst_pts: Kx2 np.array
169 |             destination points, each row is a pair of transformed
170 |             coordinates (x, y)
171 |         @reflective: True or False
172 |             if True:
173 |                 use reflective similarity transform
174 |             else:
175 |                 use non-reflective similarity transform
176 | 
177 |     Returns:
178 |     ----------
179 |        @trans: 3x3 np.array
180 |             transform matrix from uv to xy
181 |         trans_inv: 3x3 np.array
182 |             inverse of trans, transform matrix from xy to uv
183 |     """
184 | 
185 |     if reflective:
186 |         trans, trans_inv = findSimilarity(src_pts, dst_pts)
187 |     else:
188 |         trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
189 | 
190 |     return trans, trans_inv
191 | 
192 | 
193 | def cvt_tform_mat_for_cv2(trans):
194 |     """
195 |     Function:
196 |     ----------
197 |         Convert Transform Matrix 'trans' into 'cv2_trans' which could be
198 |         directly used by cv2.warpAffine():
199 |             u = src_pts[:, 0]
200 |             v = src_pts[:, 1]
201 |             x = dst_pts[:, 0]
202 |             y = dst_pts[:, 1]
203 |             [x, y].T = cv_trans * [u, v, 1].T
204 | 
205 |     Parameters:
206 |     ----------
207 |         @trans: 3x3 np.array
208 |             transform matrix from uv to xy
209 | 
210 |     Returns:
211 |     ----------
212 |         @cv2_trans: 2x3 np.array
213 |             transform matrix from src_pts to dst_pts, could be directly used
214 |             for cv2.warpAffine()
215 |     """
216 |     cv2_trans = trans[:, 0:2].T
217 | 
218 |     return cv2_trans
219 | 
220 | 
221 | def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective = True):
222 |     """
223 |     Function:
224 |     ----------
225 |         Find Similarity Transform Matrix 'cv2_trans' which could be
226 |         directly used by cv2.warpAffine():
227 |             u = src_pts[:, 0]
228 |             v = src_pts[:, 1]
229 |             x = dst_pts[:, 0]
230 |             y = dst_pts[:, 1]
231 |             [x, y].T = cv_trans * [u, v, 1].T
232 | 
233 |     Parameters:
234 |     ----------
235 |         @src_pts: Kx2 np.array
236 |             source points, each row is a pair of coordinates (x, y)
237 |         @dst_pts: Kx2 np.array
238 |             destination points, each row is a pair of transformed
239 |             coordinates (x, y)
240 |         reflective: True or False
241 |             if True:
242 |                 use reflective similarity transform
243 |             else:
244 |                 use non-reflective similarity transform
245 | 
246 |     Returns:
247 |     ----------
248 |         @cv2_trans: 2x3 np.array
249 |             transform matrix from src_pts to dst_pts, could be directly used
250 |             for cv2.warpAffine()
251 |     """
252 |     trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
253 |     cv2_trans = cvt_tform_mat_for_cv2(trans)
254 | 
255 |     return cv2_trans
256 | 
257 | 
258 | if __name__ == '__main__':
259 |     """
260 |     u = [0, 6, -2]
261 |     v = [0, 3, 5]
262 |     x = [-1, 0, 4]
263 |     y = [-1, -10, 4]
264 | 
265 |     # In Matlab, run:
266 |     #
267 |     #   uv = [u'; v'];
268 |     #   xy = [x'; y'];
269 |     #   tform_sim=cp2tform(uv,xy,'similarity');
270 |     #
271 |     #   trans = tform_sim.tdata.T
272 |     #   ans =
273 |     #       -0.0764   -1.6190         0
274 |     #        1.6190   -0.0764         0
275 |     #       -3.2156    0.0290    1.0000
276 |     #   trans_inv = tform_sim.tdata.Tinv
277 |     #    ans =
278 |     #
279 |     #       -0.0291    0.6163         0
280 |     #       -0.6163   -0.0291         0
281 |     #       -0.0756    1.9826    1.0000
282 |     #    xy_m=tformfwd(tform_sim, u,v)
283 |     #
284 |     #    xy_m =
285 |     #
286 |     #       -3.2156    0.0290
287 |     #        1.1833   -9.9143
288 |     #        5.0323    2.8853
289 |     #    uv_m=tforminv(tform_sim, x,y)
290 |     #
291 |     #    uv_m =
292 |     #
293 |     #        0.5698    1.3953
294 |     #        6.0872    2.2733
295 |     #       -2.6570    4.3314
296 |     """
297 |     u = [0, 6, -2]
298 |     v = [0, 3, 5]
299 |     x = [-1, 0, 4]
300 |     y = [-1, -10, 4]
301 | 
302 |     uv = np.array((u, v)).T
303 |     xy = np.array((x, y)).T
304 | 
305 |     print("\n--->uv:")
306 |     print(uv)
307 |     print("\n--->xy:")
308 |     print(xy)
309 | 
310 |     trans, trans_inv = get_similarity_transform(uv, xy)
311 | 
312 |     print("\n--->trans matrix:")
313 |     print(trans)
314 | 
315 |     print("\n--->trans_inv matrix:")
316 |     print(trans_inv)
317 | 
318 |     print("\n---> apply transform to uv")
319 |     print("\nxy_m = uv_augmented * trans")
320 |     uv_aug = np.hstack((
321 |         uv, np.ones((uv.shape[0], 1))
322 |     ))
323 |     xy_m = np.dot(uv_aug, trans)
324 |     print(xy_m)
325 | 
326 |     print("\nxy_m = tformfwd(trans, uv)")
327 |     xy_m = tformfwd(trans, uv)
328 |     print(xy_m)
329 | 
330 |     print("\n---> apply inverse transform to xy")
331 |     print("\nuv_m = xy_augmented * trans_inv")
332 |     xy_aug = np.hstack((
333 |         xy, np.ones((xy.shape[0], 1))
334 |     ))
335 |     uv_m = np.dot(xy_aug, trans_inv)
336 |     print(uv_m)
337 | 
338 |     print("\nuv_m = tformfwd(trans_inv, xy)")
339 |     uv_m = tformfwd(trans_inv, xy)
340 |     print(uv_m)
341 | 
342 |     uv_m = tforminv(trans, xy)
343 |     print("\nuv_m = tforminv(trans, xy)")
344 |     print(uv_m)


--------------------------------------------------------------------------------
/align/onet.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/align/onet.npy


--------------------------------------------------------------------------------
/align/pnet.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/align/pnet.npy


--------------------------------------------------------------------------------
/align/rnet.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/align/rnet.npy


--------------------------------------------------------------------------------
/align/visualization_utils.py:
--------------------------------------------------------------------------------
 1 | from PIL import ImageDraw
 2 | 
 3 | 
 4 | def show_results(img, bounding_boxes, facial_landmarks = []):
 5 |     """Draw bounding boxes and facial landmarks.
 6 |     Arguments:
 7 |         img: an instance of PIL.Image.
 8 |         bounding_boxes: a float numpy array of shape [n, 5].
 9 |         facial_landmarks: a float numpy array of shape [n, 10].
10 |     Returns:
11 |         an instance of PIL.Image.
12 |     """
13 |     img_copy = img.copy()
14 |     draw = ImageDraw.Draw(img_copy)
15 | 
16 |     for b in bounding_boxes:
17 |         draw.rectangle([
18 |             (b[0], b[1]), (b[2], b[3])
19 |         ], outline='blue')
20 | 
21 |     inx = 0
22 |     for p in facial_landmarks:
23 |         for i in range(5):
24 |             draw.ellipse([
25 |                 (p[i] - 1.0, p[i + 5] - 1.0),
26 |                 (p[i] + 1.0, p[i + 5] + 1.0)
27 |             ], fill='red')
28 | 
29 |     img_copy.show()
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backbone/model_irse.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, Dropout, MaxPool2d, \
  4 |     AdaptiveAvgPool2d, Sequential, Module
  5 | from collections import namedtuple
  6 | 
  7 | 
  8 | # Support: ['IR_50', 'IR_101', 'IR_152', 'IR_SE_50', 'IR_SE_101', 'IR_SE_152']
  9 | 
 10 | 
 11 | class Flatten(Module):
 12 |     def forward(self, input):
 13 |         return input.view(input.size(0), -1)
 14 | 
 15 | 
 16 | def l2_norm(input, axis=1):
 17 |     norm = torch.norm(input, 2, axis, True)
 18 |     output = torch.div(input, norm)
 19 | 
 20 |     return output
 21 | 
 22 | 
 23 | class SEModule(Module):
 24 |     def __init__(self, channels, reduction):
 25 |         super(SEModule, self).__init__()
 26 |         self.avg_pool = AdaptiveAvgPool2d(1)
 27 |         self.fc1 = Conv2d(
 28 |             channels, channels // reduction, kernel_size=1, padding=0, bias=False)
 29 | 
 30 |         nn.init.xavier_uniform_(self.fc1.weight.data)
 31 | 
 32 |         self.relu = ReLU(inplace=True)
 33 |         self.fc2 = Conv2d(
 34 |             channels // reduction, channels, kernel_size=1, padding=0, bias=False)
 35 | 
 36 |         self.sigmoid = Sigmoid()
 37 | 
 38 |     def forward(self, x):
 39 |         module_input = x
 40 |         x = self.avg_pool(x)
 41 |         x = self.fc1(x)
 42 |         x = self.relu(x)
 43 |         x = self.fc2(x)
 44 |         x = self.sigmoid(x)
 45 | 
 46 |         return module_input * x
 47 | 
 48 | 
 49 | class bottleneck_IR(Module):
 50 |     def __init__(self, in_channel, depth, stride):
 51 |         super(bottleneck_IR, self).__init__()
 52 |         if in_channel == depth:
 53 |             self.shortcut_layer = MaxPool2d(1, stride)
 54 |         else:
 55 |             self.shortcut_layer = Sequential(
 56 |                 Conv2d(in_channel, depth, (1, 1), stride, bias=False), BatchNorm2d(depth))
 57 |         self.res_layer = Sequential(
 58 |             BatchNorm2d(in_channel),
 59 |             Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth),
 60 |             Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth))
 61 | 
 62 |     def forward(self, x):
 63 |         shortcut = self.shortcut_layer(x)
 64 |         res = self.res_layer(x)
 65 | 
 66 |         return res + shortcut
 67 | 
 68 | 
 69 | class bottleneck_IR_SE(Module):
 70 |     def __init__(self, in_channel, depth, stride):
 71 |         super(bottleneck_IR_SE, self).__init__()
 72 |         if in_channel == depth:
 73 |             self.shortcut_layer = MaxPool2d(1, stride)
 74 |         else:
 75 |             self.shortcut_layer = Sequential(
 76 |                 Conv2d(in_channel, depth, (1, 1), stride, bias=False),
 77 |                 BatchNorm2d(depth))
 78 |         self.res_layer = Sequential(
 79 |             BatchNorm2d(in_channel),
 80 |             Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
 81 |             PReLU(depth),
 82 |             Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
 83 |             BatchNorm2d(depth),
 84 |             SEModule(depth, 16)
 85 |         )
 86 | 
 87 |     def forward(self, x):
 88 |         shortcut = self.shortcut_layer(x)
 89 |         res = self.res_layer(x)
 90 | 
 91 |         return res + shortcut
 92 | 
 93 | 
 94 | class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
 95 |     '''A named tuple describing a ResNet block.'''
 96 | 
 97 | 
 98 | def get_block(in_channel, depth, num_units, stride=2):
 99 | 
100 |     return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
101 | 
102 | 
103 | def get_blocks(num_layers):
104 |     if num_layers == 50:
105 |         blocks = [
106 |             get_block(in_channel=64, depth=64, num_units=3),
107 |             get_block(in_channel=64, depth=128, num_units=4),
108 |             get_block(in_channel=128, depth=256, num_units=14),
109 |             get_block(in_channel=256, depth=512, num_units=3)
110 |         ]
111 |     elif num_layers == 100:
112 |         blocks = [
113 |             get_block(in_channel=64, depth=64, num_units=3),
114 |             get_block(in_channel=64, depth=128, num_units=13),
115 |             get_block(in_channel=128, depth=256, num_units=30),
116 |             get_block(in_channel=256, depth=512, num_units=3)
117 |         ]
118 |     elif num_layers == 152:
119 |         blocks = [
120 |             get_block(in_channel=64, depth=64, num_units=3),
121 |             get_block(in_channel=64, depth=128, num_units=8),
122 |             get_block(in_channel=128, depth=256, num_units=36),
123 |             get_block(in_channel=256, depth=512, num_units=3)
124 |         ]
125 | 
126 |     return blocks
127 | 
128 | 
129 | class Backbone(Module):
130 |     def __init__(self, input_size, num_layers, mode='ir'):
131 |         super(Backbone, self).__init__()
132 |         assert input_size[0] in [112, 224], "input_size should be [112, 112] or [224, 224]"
133 |         assert num_layers in [50, 100, 152], "num_layers should be 50, 100 or 152"
134 |         assert mode in ['ir', 'ir_se'], "mode should be ir or ir_se"
135 |         blocks = get_blocks(num_layers)
136 |         if mode == 'ir':
137 |             unit_module = bottleneck_IR
138 |         elif mode == 'ir_se':
139 |             unit_module = bottleneck_IR_SE
140 |         self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
141 |                                       BatchNorm2d(64),
142 |                                       PReLU(64))
143 |         if input_size[0] == 112:
144 |             self.output_layer = Sequential(BatchNorm2d(512),
145 |                                            Dropout(),
146 |                                            Flatten(),
147 |                                            Linear(512 * 7 * 7, 512),
148 |                                            BatchNorm1d(512))
149 |         else:
150 |             self.output_layer = Sequential(BatchNorm2d(512),
151 |                                            Dropout(),
152 |                                            Flatten(),
153 |                                            Linear(512 * 14 * 14, 512),
154 |                                            BatchNorm1d(512))
155 | 
156 |         modules = []
157 |         for block in blocks:
158 |             for bottleneck in block:
159 |                 modules.append(
160 |                     unit_module(bottleneck.in_channel,
161 |                                 bottleneck.depth,
162 |                                 bottleneck.stride))
163 |         self.body = Sequential(*modules)
164 | 
165 |         self._initialize_weights()
166 | 
167 |     def forward(self, x):
168 |         x = self.input_layer(x)
169 |         x = self.body(x)
170 |         x = self.output_layer(x)
171 | 
172 |         return x
173 | 
174 |     def _initialize_weights(self):
175 |         for m in self.modules():
176 |             if isinstance(m, nn.Conv2d):
177 |                 nn.init.xavier_uniform_(m.weight.data)
178 |                 if m.bias is not None:
179 |                     m.bias.data.zero_()
180 |             elif isinstance(m, nn.BatchNorm2d):
181 |                 m.weight.data.fill_(1)
182 |                 m.bias.data.zero_()
183 |             elif isinstance(m, nn.BatchNorm1d):
184 |                 m.weight.data.fill_(1)
185 |                 m.bias.data.zero_()
186 |             elif isinstance(m, nn.Linear):
187 |                 nn.init.xavier_uniform_(m.weight.data)
188 |                 if m.bias is not None:
189 |                     m.bias.data.zero_()
190 | 
191 | 
192 | def IR_50(input_size):
193 |     """Constructs a ir-50 model.
194 |     """
195 |     model = Backbone(input_size, 50, 'ir')
196 | 
197 |     return model
198 | 
199 | 
200 | def IR_101(input_size):
201 |     """Constructs a ir-101 model.
202 |     """
203 |     model = Backbone(input_size, 100, 'ir')
204 | 
205 |     return model
206 | 
207 | 
208 | def IR_152(input_size):
209 |     """Constructs a ir-152 model.
210 |     """
211 |     model = Backbone(input_size, 152, 'ir')
212 | 
213 |     return model
214 | 
215 | 
216 | def IR_SE_50(input_size):
217 |     """Constructs a ir_se-50 model.
218 |     """
219 |     model = Backbone(input_size, 50, 'ir_se')
220 | 
221 |     return model
222 | 
223 | 
224 | def IR_SE_101(input_size):
225 |     """Constructs a ir_se-101 model.
226 |     """
227 |     model = Backbone(input_size, 100, 'ir_se')
228 | 
229 |     return model
230 | 
231 | 
232 | def IR_SE_152(input_size):
233 |     """Constructs a ir_se-152 model.
234 |     """
235 |     model = Backbone(input_size, 152, 'ir_se')
236 | 
237 |     return model
238 | 


--------------------------------------------------------------------------------
/backbone/model_resnet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, ReLU, Dropout, MaxPool2d, Sequential, Module
  3 | 
  4 | 
  5 | # Support: ['ResNet_50', 'ResNet_101', 'ResNet_152']
  6 | 
  7 | 
  8 | def conv3x3(in_planes, out_planes, stride = 1):
  9 |     """3x3 convolution with padding"""
 10 | 
 11 |     return Conv2d(in_planes, out_planes, kernel_size = 3, stride = stride,
 12 |                      padding = 1, bias = False)
 13 | 
 14 | 
 15 | def conv1x1(in_planes, out_planes, stride = 1):
 16 |     """1x1 convolution"""
 17 | 
 18 |     return Conv2d(in_planes, out_planes, kernel_size = 1, stride = stride, bias = False)
 19 | 
 20 | 
 21 | class BasicBlock(Module):
 22 |     expansion = 1
 23 | 
 24 |     def __init__(self, inplanes, planes, stride = 1, downsample = None):
 25 |         super(BasicBlock, self).__init__()
 26 |         self.conv1 = conv3x3(inplanes, planes, stride)
 27 |         self.bn1 = BatchNorm2d(planes)
 28 |         self.relu = ReLU(inplace = True)
 29 |         self.conv2 = conv3x3(planes, planes)
 30 |         self.bn2 = BatchNorm2d(planes)
 31 |         self.downsample = downsample
 32 |         self.stride = stride
 33 | 
 34 |     def forward(self, x):
 35 |         identity = x
 36 | 
 37 |         out = self.conv1(x)
 38 |         out = self.bn1(out)
 39 |         out = self.relu(out)
 40 | 
 41 |         out = self.conv2(out)
 42 |         out = self.bn2(out)
 43 | 
 44 |         if self.downsample is not None:
 45 |             identity = self.downsample(x)
 46 | 
 47 |         out += identity
 48 |         out = self.relu(out)
 49 | 
 50 |         return out
 51 | 
 52 | 
 53 | class Bottleneck(Module):
 54 |     expansion = 4
 55 | 
 56 |     def __init__(self, inplanes, planes, stride = 1, downsample = None):
 57 |         super(Bottleneck, self).__init__()
 58 |         self.conv1 = conv1x1(inplanes, planes)
 59 |         self.bn1 = BatchNorm2d(planes)
 60 |         self.conv2 = conv3x3(planes, planes, stride)
 61 |         self.bn2 = BatchNorm2d(planes)
 62 |         self.conv3 = conv1x1(planes, planes * self.expansion)
 63 |         self.bn3 = BatchNorm2d(planes * self.expansion)
 64 |         self.relu = ReLU(inplace = True)
 65 |         self.downsample = downsample
 66 |         self.stride = stride
 67 | 
 68 |     def forward(self, x):
 69 |         identity = x
 70 | 
 71 |         out = self.conv1(x)
 72 |         out = self.bn1(out)
 73 |         out = self.relu(out)
 74 | 
 75 |         out = self.conv2(out)
 76 |         out = self.bn2(out)
 77 |         out = self.relu(out)
 78 | 
 79 |         out = self.conv3(out)
 80 |         out = self.bn3(out)
 81 | 
 82 |         if self.downsample is not None:
 83 |             identity = self.downsample(x)
 84 | 
 85 |         out += identity
 86 |         out = self.relu(out)
 87 | 
 88 |         return out
 89 | 
 90 | 
 91 | class ResNet(Module):
 92 | 
 93 |     def __init__(self, input_size, block, layers, zero_init_residual = True):
 94 |         super(ResNet, self).__init__()
 95 |         assert input_size[0] in [112, 224], "input_size should be [112, 112] or [224, 224]"
 96 |         self.inplanes = 64
 97 |         self.conv1 = Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3, bias = False)
 98 |         self.bn1 = BatchNorm2d(64)
 99 |         self.relu = ReLU(inplace = True)
100 |         self.maxpool = MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
101 |         self.layer1 = self._make_layer(block, 64, layers[0])
102 |         self.layer2 = self._make_layer(block, 128, layers[1], stride = 2)
103 |         self.layer3 = self._make_layer(block, 256, layers[2], stride = 2)
104 |         self.layer4 = self._make_layer(block, 512, layers[3], stride = 2)
105 | 
106 |         self.bn_o1 = BatchNorm2d(2048)
107 |         self.dropout = Dropout()
108 |         if input_size[0] == 112:
109 |             self.fc = Linear(2048 * 4 * 4, 512)
110 |         else:
111 |             self.fc = Linear(2048 * 8 * 8, 512)
112 |         self.bn_o2 = BatchNorm1d(512)
113 | 
114 |         for m in self.modules():
115 |             if isinstance(m, Conv2d):
116 |                 nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearity = 'relu')
117 |             elif isinstance(m, BatchNorm2d):
118 |                 nn.init.constant_(m.weight, 1)
119 |                 nn.init.constant_(m.bias, 0)
120 | 
121 |         # Zero-initialize the last BN in each residual branch,
122 |         # so that the residual branch starts with zeros, and each residual block behaves like an identity.
123 |         # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
124 |         if zero_init_residual:
125 |             for m in self.modules():
126 |                 if isinstance(m, Bottleneck):
127 |                     nn.init.constant_(m.bn3.weight, 0)
128 |                 elif isinstance(m, BasicBlock):
129 |                     nn.init.constant_(m.bn2.weight, 0)
130 | 
131 |     def _make_layer(self, block, planes, blocks, stride = 1):
132 |         downsample = None
133 |         if stride != 1 or self.inplanes != planes * block.expansion:
134 |             downsample = Sequential(
135 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
136 |                 BatchNorm2d(planes * block.expansion),
137 |             )
138 | 
139 |         layers = []
140 |         layers.append(block(self.inplanes, planes, stride, downsample))
141 |         self.inplanes = planes * block.expansion
142 |         for _ in range(1, blocks):
143 |             layers.append(block(self.inplanes, planes))
144 | 
145 |         return Sequential(*layers)
146 | 
147 |     def forward(self, x):
148 |         x = self.conv1(x)
149 |         x = self.bn1(x)
150 |         x = self.relu(x)
151 |         x = self.maxpool(x)
152 | 
153 |         x = self.layer1(x)
154 |         x = self.layer2(x)
155 |         x = self.layer3(x)
156 |         x = self.layer4(x)
157 | 
158 |         x = self.bn_o1(x)
159 |         x = self.dropout(x)
160 |         x = x.view(x.size(0), -1)
161 |         x = self.fc(x)
162 |         x = self.bn_o2(x)
163 | 
164 |         return x
165 | 
166 | 
167 | def ResNet_50(input_size, **kwargs):
168 |     """Constructs a ResNet-50 model.
169 |     """
170 |     model = ResNet(input_size, Bottleneck, [3, 4, 6, 3], **kwargs)
171 | 
172 |     return model
173 | 
174 | 
175 | def ResNet_101(input_size, **kwargs):
176 |     """Constructs a ResNet-101 model.
177 |     """
178 |     model = ResNet(input_size, Bottleneck, [3, 4, 23, 3], **kwargs)
179 | 
180 |     return model
181 | 
182 | 
183 | def ResNet_152(input_size, **kwargs):
184 |     """Constructs a ResNet-152 model.
185 |     """
186 |     model = ResNet(input_size, Bottleneck, [3, 8, 36, 3], **kwargs)
187 | 
188 |     return model
189 | 


--------------------------------------------------------------------------------
/imgs/9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/9.jpg


--------------------------------------------------------------------------------
/imgs/align.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/align.jpg


--------------------------------------------------------------------------------
/imgs/detect_landmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/detect_landmark.png


--------------------------------------------------------------------------------
/imgs/parsing.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/parsing.jpg


--------------------------------------------------------------------------------
/imgs/parsing_maps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/parsing_maps.png


--------------------------------------------------------------------------------
/imgs/person_1/17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_1/17.jpg


--------------------------------------------------------------------------------
/imgs/person_1/18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_1/18.jpg


--------------------------------------------------------------------------------
/imgs/person_1/19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_1/19.jpg


--------------------------------------------------------------------------------
/imgs/person_1/20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_1/20.jpg


--------------------------------------------------------------------------------
/imgs/person_2/151.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_2/151.jpg


--------------------------------------------------------------------------------
/imgs/person_2/152.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_2/152.jpg


--------------------------------------------------------------------------------
/imgs/person_2/153.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_2/153.jpg


--------------------------------------------------------------------------------
/imgs/person_2/154.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_2/154.jpg


--------------------------------------------------------------------------------
/imgs/single.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/single.jpg


--------------------------------------------------------------------------------
/parsing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/parsing/__init__.py


--------------------------------------------------------------------------------
/parsing/face_parsing.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | import os
 5 | import cv2
 6 | import torch
 7 | import os.path as osp
 8 | import numpy as np
 9 | from PIL import Image
10 | import torchvision.transforms as transforms
11 | from .model import BiSeNet
12 | 
13 | 
14 | def vis_parsing_maps(im, parsing_anno, stride=1, show=False, save_im=False, save_path='imgs/'):
15 | 
16 |     part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
17 |                    [255, 0, 85], [255, 0, 170],
18 |                    [0, 255, 0], [85, 255, 0], [170, 255, 0],
19 |                    [0, 255, 85], [0, 255, 170],
20 |                    [0, 0, 255], [85, 0, 255], [170, 0, 255],
21 |                    [0, 85, 255], [0, 170, 255],
22 |                    [255, 255, 0], [255, 255, 85], [255, 255, 170],
23 |                    [255, 0, 255], [255, 85, 255], [255, 170, 255],
24 |                    [0, 255, 255], [85, 255, 255], [170, 255, 255]]
25 | 
26 |     im = np.array(im)
27 |     vis_im = im.copy().astype(np.uint8)
28 |     vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
29 |     vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
30 |     vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255
31 | 
32 |     num_of_class = np.max(vis_parsing_anno)
33 | 
34 |     for pi in range(1, num_of_class + 1):
35 |         index = np.where(vis_parsing_anno == pi)
36 |         vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi]
37 | 
38 |     vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8)
39 |     # print(vis_parsing_anno_color.shape, vis_im.shape)
40 |     vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0)
41 | 
42 |     if show:
43 |         cv2.imshow('parsing res', vis_im)
44 |         cv2.waitKey(0)
45 |         cv2.destroyAllWindows()
46 | 
47 |     # Save result or not
48 |     if save_im:
49 |         if not os.path.exists(save_path):
50 |             os.makedirs(save_path)
51 |         cv2.imwrite(osp.join(save_path, 'parsing_maps.png'), vis_parsing_anno)
52 |         cv2.imwrite(osp.join(save_path, 'parsing.jpg'), vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
53 | 
54 |     # return vis_im
55 | 
56 | 
57 | def parsing(imgs, cp='checkpoint/face_parsing.pth'):
58 | 
59 |     n_classes = 19
60 |     net = BiSeNet(n_classes=n_classes)
61 |     net.cuda()
62 |     net.load_state_dict(torch.load(cp))
63 |     net.eval()
64 | 
65 |     to_tensor = transforms.Compose([
66 |         transforms.ToTensor(),
67 |         transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
68 |     ])
69 | 
70 |     with torch.no_grad():
71 |         if not isinstance(imgs, list):
72 |             shape = imgs.size
73 |             image = imgs.resize((512, 512), Image.BILINEAR)
74 |             img = to_tensor(image)
75 |             img = torch.unsqueeze(img, 0)
76 |             img = img.cuda()
77 |             out = net(img)[0]
78 |             parsing_maps = out.squeeze(0).cpu().numpy().argmax(0).astype('float32')
79 |             parsing_maps = cv2.resize(parsing_maps, shape, interpolation=cv2.INTER_NEAREST)
80 |             return parsing_maps
81 | 
82 |         else:
83 |             parsing_list = []
84 |             for img in imgs:
85 |                 shape = img.size
86 |                 image = img.resize((512, 512), Image.BILINEAR)
87 |                 img = to_tensor(image)
88 |                 img = torch.unsqueeze(img, 0)
89 |                 img = img.cuda()
90 |                 out = net(img)[0]
91 |                 parsing_maps = out.squeeze(0).cpu().numpy().argmax(0).astype('float32')
92 |                 parsing_maps = cv2.resize(parsing_maps, shape, interpolation=cv2.INTER_NEAREST)
93 |                 parsing_list.append(parsing_maps)
94 |             return parsing_list
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/parsing/model.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | from .resnet import Resnet18
  9 | 
 10 | 
 11 | 
 12 | class ConvBNReLU(nn.Module):
 13 |     def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
 14 |         super(ConvBNReLU, self).__init__()
 15 |         self.conv = nn.Conv2d(in_chan,
 16 |                 out_chan,
 17 |                 kernel_size = ks,
 18 |                 stride = stride,
 19 |                 padding = padding,
 20 |                 bias = False)
 21 |         self.bn = nn.BatchNorm2d(out_chan)
 22 |         self.init_weight()
 23 | 
 24 |     def forward(self, x):
 25 |         x = self.conv(x)
 26 |         x = F.relu(self.bn(x))
 27 |         return x
 28 | 
 29 |     def init_weight(self):
 30 |         for ly in self.children():
 31 |             if isinstance(ly, nn.Conv2d):
 32 |                 nn.init.kaiming_normal_(ly.weight, a=1)
 33 |                 if not ly.bias is None: nn.init.constant_(ly.bias, 0)
 34 | 
 35 | class BiSeNetOutput(nn.Module):
 36 |     def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
 37 |         super(BiSeNetOutput, self).__init__()
 38 |         self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
 39 |         self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
 40 |         self.init_weight()
 41 | 
 42 |     def forward(self, x):
 43 |         x = self.conv(x)
 44 |         x = self.conv_out(x)
 45 |         return x
 46 | 
 47 |     def init_weight(self):
 48 |         for ly in self.children():
 49 |             if isinstance(ly, nn.Conv2d):
 50 |                 nn.init.kaiming_normal_(ly.weight, a=1)
 51 |                 if not ly.bias is None: nn.init.constant_(ly.bias, 0)
 52 | 
 53 |     def get_params(self):
 54 |         wd_params, nowd_params = [], []
 55 |         for name, module in self.named_modules():
 56 |             if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
 57 |                 wd_params.append(module.weight)
 58 |                 if not module.bias is None:
 59 |                     nowd_params.append(module.bias)
 60 |             elif isinstance(module, nn.BatchNorm2d):
 61 |                 nowd_params += list(module.parameters())
 62 |         return wd_params, nowd_params
 63 | 
 64 | 
 65 | class AttentionRefinementModule(nn.Module):
 66 |     def __init__(self, in_chan, out_chan, *args, **kwargs):
 67 |         super(AttentionRefinementModule, self).__init__()
 68 |         self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
 69 |         self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
 70 |         self.bn_atten = nn.BatchNorm2d(out_chan)
 71 |         self.sigmoid_atten = nn.Sigmoid()
 72 |         self.init_weight()
 73 | 
 74 |     def forward(self, x):
 75 |         feat = self.conv(x)
 76 |         atten = F.avg_pool2d(feat, feat.size()[2:])
 77 |         atten = self.conv_atten(atten)
 78 |         atten = self.bn_atten(atten)
 79 |         atten = self.sigmoid_atten(atten)
 80 |         out = torch.mul(feat, atten)
 81 |         return out
 82 | 
 83 |     def init_weight(self):
 84 |         for ly in self.children():
 85 |             if isinstance(ly, nn.Conv2d):
 86 |                 nn.init.kaiming_normal_(ly.weight, a=1)
 87 |                 if not ly.bias is None: nn.init.constant_(ly.bias, 0)
 88 | 
 89 | 
 90 | class ContextPath(nn.Module):
 91 |     def __init__(self, *args, **kwargs):
 92 |         super(ContextPath, self).__init__()
 93 |         self.resnet = Resnet18()
 94 |         self.arm16 = AttentionRefinementModule(256, 128)
 95 |         self.arm32 = AttentionRefinementModule(512, 128)
 96 |         self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
 97 |         self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
 98 |         self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
 99 | 
100 |         self.init_weight()
101 | 
102 |     def forward(self, x):
103 |         H0, W0 = x.size()[2:]
104 |         feat8, feat16, feat32 = self.resnet(x)
105 |         H8, W8 = feat8.size()[2:]
106 |         H16, W16 = feat16.size()[2:]
107 |         H32, W32 = feat32.size()[2:]
108 | 
109 |         avg = F.avg_pool2d(feat32, feat32.size()[2:])
110 |         avg = self.conv_avg(avg)
111 |         avg_up = F.interpolate(avg, (H32, W32), mode='nearest')
112 | 
113 |         feat32_arm = self.arm32(feat32)
114 |         feat32_sum = feat32_arm + avg_up
115 |         feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest')
116 |         feat32_up = self.conv_head32(feat32_up)
117 | 
118 |         feat16_arm = self.arm16(feat16)
119 |         feat16_sum = feat16_arm + feat32_up
120 |         feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest')
121 |         feat16_up = self.conv_head16(feat16_up)
122 | 
123 |         return feat8, feat16_up, feat32_up  # x8, x8, x16
124 | 
125 |     def init_weight(self):
126 |         for ly in self.children():
127 |             if isinstance(ly, nn.Conv2d):
128 |                 nn.init.kaiming_normal_(ly.weight, a=1)
129 |                 if not ly.bias is None: nn.init.constant_(ly.bias, 0)
130 | 
131 |     def get_params(self):
132 |         wd_params, nowd_params = [], []
133 |         for name, module in self.named_modules():
134 |             if isinstance(module, (nn.Linear, nn.Conv2d)):
135 |                 wd_params.append(module.weight)
136 |                 if not module.bias is None:
137 |                     nowd_params.append(module.bias)
138 |             elif isinstance(module, nn.BatchNorm2d):
139 |                 nowd_params += list(module.parameters())
140 |         return wd_params, nowd_params
141 | 
142 | 
143 | ### This is not used, since I replace this with the resnet feature with the same size
144 | class SpatialPath(nn.Module):
145 |     def __init__(self, *args, **kwargs):
146 |         super(SpatialPath, self).__init__()
147 |         self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
148 |         self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
149 |         self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
150 |         self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
151 |         self.init_weight()
152 | 
153 |     def forward(self, x):
154 |         feat = self.conv1(x)
155 |         feat = self.conv2(feat)
156 |         feat = self.conv3(feat)
157 |         feat = self.conv_out(feat)
158 |         return feat
159 | 
160 |     def init_weight(self):
161 |         for ly in self.children():
162 |             if isinstance(ly, nn.Conv2d):
163 |                 nn.init.kaiming_normal_(ly.weight, a=1)
164 |                 if not ly.bias is None: nn.init.constant_(ly.bias, 0)
165 | 
166 |     def get_params(self):
167 |         wd_params, nowd_params = [], []
168 |         for name, module in self.named_modules():
169 |             if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
170 |                 wd_params.append(module.weight)
171 |                 if not module.bias is None:
172 |                     nowd_params.append(module.bias)
173 |             elif isinstance(module, nn.BatchNorm2d):
174 |                 nowd_params += list(module.parameters())
175 |         return wd_params, nowd_params
176 | 
177 | 
178 | class FeatureFusionModule(nn.Module):
179 |     def __init__(self, in_chan, out_chan, *args, **kwargs):
180 |         super(FeatureFusionModule, self).__init__()
181 |         self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
182 |         self.conv1 = nn.Conv2d(out_chan,
183 |                 out_chan//4,
184 |                 kernel_size = 1,
185 |                 stride = 1,
186 |                 padding = 0,
187 |                 bias = False)
188 |         self.conv2 = nn.Conv2d(out_chan//4,
189 |                 out_chan,
190 |                 kernel_size = 1,
191 |                 stride = 1,
192 |                 padding = 0,
193 |                 bias = False)
194 |         self.relu = nn.ReLU(inplace=True)
195 |         self.sigmoid = nn.Sigmoid()
196 |         self.init_weight()
197 | 
198 |     def forward(self, fsp, fcp):
199 |         fcat = torch.cat([fsp, fcp], dim=1)
200 |         feat = self.convblk(fcat)
201 |         atten = F.avg_pool2d(feat, feat.size()[2:])
202 |         atten = self.conv1(atten)
203 |         atten = self.relu(atten)
204 |         atten = self.conv2(atten)
205 |         atten = self.sigmoid(atten)
206 |         feat_atten = torch.mul(feat, atten)
207 |         feat_out = feat_atten + feat
208 |         return feat_out
209 | 
210 |     def init_weight(self):
211 |         for ly in self.children():
212 |             if isinstance(ly, nn.Conv2d):
213 |                 nn.init.kaiming_normal_(ly.weight, a=1)
214 |                 if not ly.bias is None: nn.init.constant_(ly.bias, 0)
215 | 
216 |     def get_params(self):
217 |         wd_params, nowd_params = [], []
218 |         for name, module in self.named_modules():
219 |             if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
220 |                 wd_params.append(module.weight)
221 |                 if not module.bias is None:
222 |                     nowd_params.append(module.bias)
223 |             elif isinstance(module, nn.BatchNorm2d):
224 |                 nowd_params += list(module.parameters())
225 |         return wd_params, nowd_params
226 | 
227 | 
228 | class BiSeNet(nn.Module):
229 |     def __init__(self, n_classes, *args, **kwargs):
230 |         super(BiSeNet, self).__init__()
231 |         self.cp = ContextPath()
232 |         ## here self.sp is deleted
233 |         self.ffm = FeatureFusionModule(256, 256)
234 |         self.conv_out = BiSeNetOutput(256, 256, n_classes)
235 |         self.conv_out16 = BiSeNetOutput(128, 64, n_classes)
236 |         self.conv_out32 = BiSeNetOutput(128, 64, n_classes)
237 |         self.init_weight()
238 | 
239 |     def forward(self, x):
240 |         H, W = x.size()[2:]
241 |         feat_res8, feat_cp8, feat_cp16 = self.cp(x)  # here return res3b1 feature
242 |         feat_sp = feat_res8  # use res3b1 feature to replace spatial path feature
243 |         feat_fuse = self.ffm(feat_sp, feat_cp8)
244 | 
245 |         feat_out = self.conv_out(feat_fuse)
246 |         feat_out16 = self.conv_out16(feat_cp8)
247 |         feat_out32 = self.conv_out32(feat_cp16)
248 | 
249 |         feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True)
250 |         feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True)
251 |         feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True)
252 |         return feat_out, feat_out16, feat_out32
253 | 
254 |     def init_weight(self):
255 |         for ly in self.children():
256 |             if isinstance(ly, nn.Conv2d):
257 |                 nn.init.kaiming_normal_(ly.weight, a=1)
258 |                 if not ly.bias is None: nn.init.constant_(ly.bias, 0)
259 | 
260 |     def get_params(self):
261 |         wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
262 |         for name, child in self.named_children():
263 |             child_wd_params, child_nowd_params = child.get_params()
264 |             if isinstance(child, FeatureFusionModule) or isinstance(child, BiSeNetOutput):
265 |                 lr_mul_wd_params += child_wd_params
266 |                 lr_mul_nowd_params += child_nowd_params
267 |             else:
268 |                 wd_params += child_wd_params
269 |                 nowd_params += child_nowd_params
270 |         return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
271 | 
272 | 
273 | if __name__ == "__main__":
274 |     net = BiSeNet(19)
275 |     net.cuda()
276 |     net.eval()
277 |     in_ten = torch.randn(16, 3, 640, 480).cuda()
278 |     out, out16, out32 = net(in_ten)
279 |     print(out.shape)
280 | 
281 |     net.get_params()
282 | 


--------------------------------------------------------------------------------
/parsing/resnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- encoding: utf-8 -*-
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torch.utils.model_zoo as modelzoo
  8 | 
  9 | 
 10 | resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
 11 | 
 12 | 
 13 | def conv3x3(in_planes, out_planes, stride=1):
 14 |     """3x3 convolution with padding"""
 15 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 16 |                      padding=1, bias=False)
 17 | 
 18 | 
 19 | class BasicBlock(nn.Module):
 20 |     def __init__(self, in_chan, out_chan, stride=1):
 21 |         super(BasicBlock, self).__init__()
 22 |         self.conv1 = conv3x3(in_chan, out_chan, stride)
 23 |         self.bn1 = nn.BatchNorm2d(out_chan)
 24 |         self.conv2 = conv3x3(out_chan, out_chan)
 25 |         self.bn2 = nn.BatchNorm2d(out_chan)
 26 |         self.relu = nn.ReLU(inplace=True)
 27 |         self.downsample = None
 28 |         if in_chan != out_chan or stride != 1:
 29 |             self.downsample = nn.Sequential(
 30 |                 nn.Conv2d(in_chan, out_chan,
 31 |                           kernel_size=1, stride=stride, bias=False),
 32 |                 nn.BatchNorm2d(out_chan),
 33 |                 )
 34 | 
 35 |     def forward(self, x):
 36 |         residual = self.conv1(x)
 37 |         residual = F.relu(self.bn1(residual))
 38 |         residual = self.conv2(residual)
 39 |         residual = self.bn2(residual)
 40 | 
 41 |         shortcut = x
 42 |         if self.downsample is not None:
 43 |             shortcut = self.downsample(x)
 44 | 
 45 |         out = shortcut + residual
 46 |         out = self.relu(out)
 47 |         return out
 48 | 
 49 | 
 50 | def create_layer_basic(in_chan, out_chan, bnum, stride=1):
 51 |     layers = [BasicBlock(in_chan, out_chan, stride=stride)]
 52 |     for i in range(bnum-1):
 53 |         layers.append(BasicBlock(out_chan, out_chan, stride=1))
 54 |     return nn.Sequential(*layers)
 55 | 
 56 | 
 57 | class Resnet18(nn.Module):
 58 |     def __init__(self):
 59 |         super(Resnet18, self).__init__()
 60 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
 61 |                                bias=False)
 62 |         self.bn1 = nn.BatchNorm2d(64)
 63 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 64 |         self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
 65 |         self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
 66 |         self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
 67 |         self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
 68 |         self.init_weight()
 69 | 
 70 |     def forward(self, x):
 71 |         x = self.conv1(x)
 72 |         x = F.relu(self.bn1(x))
 73 |         x = self.maxpool(x)
 74 | 
 75 |         x = self.layer1(x)
 76 |         feat8 = self.layer2(x) # 1/8
 77 |         feat16 = self.layer3(feat8) # 1/16
 78 |         feat32 = self.layer4(feat16) # 1/32
 79 |         return feat8, feat16, feat32
 80 | 
 81 |     def init_weight(self):
 82 |         state_dict = modelzoo.load_url(resnet18_url)
 83 |         self_state_dict = self.state_dict()
 84 |         for k, v in state_dict.items():
 85 |             if 'fc' in k: continue
 86 |             self_state_dict.update({k: v})
 87 |         self.load_state_dict(self_state_dict)
 88 | 
 89 |     def get_params(self):
 90 |         wd_params, nowd_params = [], []
 91 |         for name, module in self.named_modules():
 92 |             if isinstance(module, (nn.Linear, nn.Conv2d)):
 93 |                 wd_params.append(module.weight)
 94 |                 if not module.bias is None:
 95 |                     nowd_params.append(module.bias)
 96 |             elif isinstance(module,  nn.BatchNorm2d):
 97 |                 nowd_params += list(module.parameters())
 98 |         return wd_params, nowd_params
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     net = Resnet18()
103 |     x = torch.randn(16, 3, 224, 224)
104 |     out = net(x)
105 |     print(out[0].size())
106 |     print(out[1].size())
107 |     print(out[2].size())
108 |     net.get_params()
109 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/util/extract_feature.py:
--------------------------------------------------------------------------------
  1 | # Helper function for extracting features from pre-trained models
  2 | import torch
  3 | import torchvision.transforms as transforms
  4 | import torchvision.datasets as datasets
  5 | from PIL import Image
  6 | import numpy as np
  7 | import os
  8 | from .utils import l2_norm, hflip_batch
  9 | 
 10 | 
 11 | def extract_feature(image, backbone, model_root, input_size=[112, 112], rgb_mean=[0.5, 0.5, 0.5],
 12 |                     rgb_std=[0.5, 0.5, 0.5], device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"), tta=True):
 13 | 
 14 |     # define transform
 15 |     transform = transforms.Compose([
 16 |         transforms.Resize([int(128 * input_size[0] / 112), int(128 * input_size[0] / 112)]),  # smaller side resized
 17 |         transforms.CenterCrop([input_size[0], input_size[1]]),
 18 |         transforms.ToTensor(),
 19 |         transforms.Normalize(mean=rgb_mean, std=rgb_std)])
 20 | 
 21 |     if isinstance(image, list):
 22 |         image = [transform(i).unsqueeze(0) for i in image]
 23 |     else:
 24 |         image = transform(image).unsqueeze(0)
 25 | 
 26 |     # load backbone from a checkpoint
 27 |     # print("Loading Backbone Checkpoint '{}'".format(model_root))
 28 |     backbone.load_state_dict(torch.load(model_root))
 29 |     backbone.to(device)
 30 | 
 31 |     # extract features
 32 |     backbone.eval()  # set to evaluation mode
 33 | 
 34 |     with torch.no_grad():
 35 |         if isinstance(image, list):
 36 |             embedding = []
 37 |             if tta:
 38 |                 for i in image:
 39 |                     fliped = hflip_batch(i)
 40 |                     embedding.append(backbone(i.to(device)).cpu() + backbone(fliped.to(device)).cpu())
 41 |             else:
 42 |                 for i in image:
 43 |                     embedding.append(l2_norm(backbone(i.to(device))).cpu())
 44 |         else:
 45 |             if tta:
 46 |                 fliped = hflip_batch(image)
 47 |                 embedding = backbone(image.to(device)).cpu() + backbone(fliped.to(device)).cpu()
 48 |             else:
 49 |                 embedding = l2_norm(backbone(image.to(device))).cpu()
 50 | 
 51 |     #     np.save("features.npy", features)
 52 |     #     features = np.load("features.npy")
 53 | 
 54 |     return embedding
 55 | 
 56 | 
 57 | def extract_feature_folder(data_root, backbone, model_root, input_size=[112, 112], rgb_mean=[0.5, 0.5, 0.5],
 58 |                     rgb_std=[0.5, 0.5, 0.5], embedding_size=512, batch_size=512,
 59 |                     device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"), tta=True):
 60 | 
 61 |     # define data loader
 62 |     transform = transforms.Compose([
 63 |         transforms.Resize([int(128 * input_size[0] / 112), int(128 * input_size[0] / 112)]),  # smaller side resized
 64 |         transforms.CenterCrop([input_size[0], input_size[1]]),
 65 |         transforms.ToTensor(),
 66 |         transforms.Normalize(mean=rgb_mean, std=rgb_std)])
 67 |     dataset = datasets.ImageFolder(data_root, transform)
 68 |     loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=0)
 69 | 
 70 |     # load backbone from a checkpoint
 71 |     print("Loading Backbone Checkpoint '{}'".format(model_root))
 72 |     backbone.load_state_dict(torch.load(model_root))
 73 |     backbone.to(device)
 74 | 
 75 |     # extract features
 76 |     backbone.eval()  # set to evaluation mode
 77 |     idx = 0
 78 |     features = np.zeros([len(loader.dataset), embedding_size])
 79 |     with torch.no_grad():
 80 |         iter_loader = iter(loader)
 81 |         while idx + batch_size <= len(loader.dataset):
 82 |             batch, _ = iter_loader.next()
 83 |             if tta:
 84 |                 fliped = hflip_batch(batch)
 85 |                 emb_batch = backbone(batch.to(device)).cpu() + backbone(fliped.to(device)).cpu()
 86 |                 features[idx:idx + batch_size] = l2_norm(emb_batch)
 87 |             else:
 88 |                 features[idx:idx + batch_size] = l2_norm(backbone(batch.to(device))).cpu()
 89 |             idx += batch_size
 90 | 
 91 |         if idx < len(loader.dataset):
 92 |             batch, _ = iter_loader.next()
 93 |             if tta:
 94 |                 fliped = hflip_batch(batch)
 95 |                 emb_batch = backbone(batch.to(device)).cpu() + backbone(fliped.to(device)).cpu()
 96 |                 features[idx:] = l2_norm(emb_batch)
 97 |             else:
 98 |                 features[idx:] = l2_norm(backbone(batch.to(device)).cpu())
 99 |                 
100 | #     np.save("features.npy", features) 
101 | #     features = np.load("features.npy")
102 | 
103 |     return features
104 | 
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/util/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torchvision.transforms as transforms
  3 | import torch.nn.functional as F
  4 | 
  5 | from .verification import evaluate
  6 | 
  7 | from datetime import datetime
  8 | import matplotlib.pyplot as plt
  9 | plt.switch_backend('agg')
 10 | import numpy as np
 11 | from PIL import Image
 12 | import bcolz
 13 | import io
 14 | import os
 15 | 
 16 | 
 17 | def get_time():
 18 |     return (str(datetime.now())[:-10]).replace(' ', '-').replace(':', '-')
 19 | 
 20 | 
 21 | def l2_norm(input, axis = 1):
 22 |     norm = torch.norm(input, 2, axis, True)
 23 |     output = torch.div(input, norm)
 24 | 
 25 |     return output
 26 | 
 27 | 
 28 | def de_preprocess(tensor):
 29 | 
 30 |     return tensor * 0.5 + 0.5
 31 | 
 32 | 
 33 | hflip = transforms.Compose([
 34 |             de_preprocess,
 35 |             transforms.ToPILImage(),
 36 |             transforms.functional.hflip,
 37 |             transforms.ToTensor(),
 38 |             transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
 39 |         ])
 40 | 
 41 | 
 42 | def hflip_batch(imgs_tensor):
 43 |     hfliped_imgs = torch.empty_like(imgs_tensor)
 44 |     for i, img_ten in enumerate(imgs_tensor):
 45 |         hfliped_imgs[i] = hflip(img_ten)
 46 | 
 47 |     return hfliped_imgs
 48 | 
 49 | 
 50 | ccrop = transforms.Compose([
 51 |             de_preprocess,
 52 |             transforms.ToPILImage(),
 53 |             transforms.Resize([128, 128]),  # smaller side resized
 54 |             transforms.CenterCrop([112, 112]),
 55 |             transforms.ToTensor(),
 56 |             transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
 57 |         ])
 58 | 
 59 | 
 60 | def ccrop_batch(imgs_tensor):
 61 |     ccropped_imgs = torch.empty_like(imgs_tensor)
 62 |     for i, img_ten in enumerate(imgs_tensor):
 63 |         ccropped_imgs[i] = ccrop(img_ten)
 64 | 
 65 |     return ccropped_imgs
 66 | 
 67 | 
 68 | def gen_plot(fpr, tpr):
 69 |     """Create a pyplot plot and save to buffer."""
 70 |     plt.figure()
 71 |     plt.xlabel("FPR", fontsize = 14)
 72 |     plt.ylabel("TPR", fontsize = 14)
 73 |     plt.title("ROC Curve", fontsize = 14)
 74 |     plot = plt.plot(fpr, tpr, linewidth = 2)
 75 |     buf = io.BytesIO()
 76 |     plt.savefig(buf, format = 'jpeg')
 77 |     buf.seek(0)
 78 |     plt.close()
 79 | 
 80 |     return buf
 81 | 
 82 | 
 83 | def perform_val(multi_gpu, device, embedding_size, batch_size, backbone, carray, issame, nrof_folds = 10, tta = True):
 84 |     if multi_gpu:
 85 |         backbone = backbone.module # unpackage model from DataParallel
 86 |         backbone = backbone.to(device)
 87 |     else:
 88 |         backbone = backbone.to(device)
 89 |     backbone.eval() # switch to evaluation mode
 90 | 
 91 |     idx = 0
 92 |     embeddings = np.zeros([len(carray), embedding_size])
 93 |     with torch.no_grad():
 94 |         while idx + batch_size <= len(carray):
 95 |             batch = torch.tensor(carray[idx:idx + batch_size][:, [2, 1, 0], :, :])
 96 |             if tta:
 97 |                 ccropped = ccrop_batch(batch)
 98 |                 fliped = hflip_batch(ccropped)
 99 |                 emb_batch = backbone(ccropped.to(device)).cpu() + backbone(fliped.to(device)).cpu()
100 |                 embeddings[idx:idx + batch_size] = l2_norm(emb_batch)
101 |             else:
102 |                 ccropped = ccrop_batch(batch)
103 |                 embeddings[idx:idx + batch_size] = l2_norm(backbone(ccropped.to(device))).cpu()
104 |             idx += batch_size
105 |         if idx < len(carray):
106 |             batch = torch.tensor(carray[idx:])
107 |             if tta:
108 |                 ccropped = ccrop_batch(batch)
109 |                 fliped = hflip_batch(ccropped)
110 |                 emb_batch = backbone(ccropped.to(device)).cpu() + backbone(fliped.to(device)).cpu()
111 |                 embeddings[idx:] = l2_norm(emb_batch)
112 |             else:
113 |                 ccropped = ccrop_batch(batch)
114 |                 embeddings[idx:] = l2_norm(backbone(ccropped.to(device))).cpu()
115 | 
116 |     tpr, fpr, accuracy, best_thresholds = evaluate(embeddings, issame, nrof_folds)
117 |     buf = gen_plot(fpr, tpr)
118 |     roc_curve = Image.open(buf)
119 |     roc_curve_tensor = transforms.ToTensor()(roc_curve)
120 | 
121 |     return accuracy.mean(), best_thresholds.mean(), roc_curve_tensor
122 | 
123 | 
124 | class AverageMeter(object):
125 |     """Computes and stores the average and current value"""
126 |     def __init__(self):
127 |         self.reset()
128 | 
129 |     def reset(self):
130 |         self.val   = 0
131 |         self.avg   = 0
132 |         self.sum   = 0
133 |         self.count = 0
134 | 
135 |     def update(self, val, n = 1):
136 |         self.val   = val
137 |         self.sum   += val * n
138 |         self.count += n
139 |         self.avg   = self.sum / self.count
140 | 
141 | 
142 | def accuracy(output, target, topk=(1,)):
143 |     """Computes the precision@k for the specified values of k"""
144 |     maxk = max(topk)
145 |     batch_size = target.size(0)
146 | 
147 |     _, pred = output.topk(maxk, 1, True, True)
148 |     pred    = pred.t()
149 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
150 | 
151 |     res = []
152 |     for k in topk:
153 |         correct_k = correct[:k].view(-1).float().sum(0)
154 |         res.append(correct_k.mul_(100.0 / batch_size))
155 | 
156 |     return res
157 | 


--------------------------------------------------------------------------------
/util/verification.py:
--------------------------------------------------------------------------------
  1 | """Helper for evaluation on the Labeled Faces in the Wild dataset
  2 | """
  3 | 
  4 | # MIT License
  5 | #
  6 | # Copyright (c) 2016 David Sandberg
  7 | #
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to deal
 10 | # in the Software without restriction, including without limitation the rights
 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | # copies of the Software, and to permit persons to whom the Software is
 13 | # furnished to do so, subject to the following conditions:
 14 | #
 15 | # The above copyright notice and this permission notice shall be included in all
 16 | # copies or substantial portions of the Software.
 17 | #
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 24 | # SOFTWARE.
 25 | 
 26 | import numpy as np
 27 | from sklearn.model_selection import KFold
 28 | from sklearn.decomposition import PCA
 29 | import sklearn
 30 | from scipy import interpolate
 31 | from scipy.spatial.distance import pdist
 32 | 
 33 | 
 34 | # Support: ['calculate_roc', 'calculate_accuracy', 'calculate_val', 'calculate_val_far', 'evaluate']
 35 | 
 36 | 
 37 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds = 10, pca = 0):
 38 |     assert (embeddings1.shape[0] == embeddings2.shape[0])
 39 |     assert (embeddings1.shape[1] == embeddings2.shape[1])
 40 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
 41 |     nrof_thresholds = len(thresholds)
 42 |     k_fold = KFold(n_splits = nrof_folds, shuffle = False)
 43 | 
 44 |     tprs = np.zeros((nrof_folds, nrof_thresholds))
 45 |     fprs = np.zeros((nrof_folds, nrof_thresholds))
 46 |     accuracy = np.zeros((nrof_folds))
 47 |     best_thresholds = np.zeros((nrof_folds))
 48 |     indices = np.arange(nrof_pairs)
 49 |     # print('pca', pca)
 50 | 
 51 |     if pca == 0:
 52 |         diff = np.subtract(embeddings1, embeddings2)
 53 |         dist = np.sum(np.square(diff), 1)
 54 |         # dist = pdist(np.vstack([embeddings1, embeddings2]), 'cosine')
 55 | 
 56 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
 57 |         # print('train_set', train_set)
 58 |         # print('test_set', test_set)
 59 |         if pca > 0:
 60 |             print("doing pca on", fold_idx)
 61 |             embed1_train = embeddings1[train_set]
 62 |             embed2_train = embeddings2[train_set]
 63 |             _embed_train = np.concatenate((embed1_train, embed2_train), axis = 0)
 64 |             # print(_embed_train.shape)
 65 |             pca_model = PCA(n_components = pca)
 66 |             pca_model.fit(_embed_train)
 67 |             embed1 = pca_model.transform(embeddings1)
 68 |             embed2 = pca_model.transform(embeddings2)
 69 |             embed1 = sklearn.preprocessing.normalize(embed1)
 70 |             embed2 = sklearn.preprocessing.normalize(embed2)
 71 |             # print(embed1.shape, embed2.shape)
 72 |             diff = np.subtract(embed1, embed2)
 73 |             dist = np.sum(np.square(diff), 1)
 74 | 
 75 |         # Find the best threshold for the fold
 76 |         acc_train = np.zeros((nrof_thresholds))
 77 |         for threshold_idx, threshold in enumerate(thresholds):
 78 |             _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
 79 |         best_threshold_index = np.argmax(acc_train)
 80 | #         print('best_threshold_index', best_threshold_index, acc_train[best_threshold_index])
 81 |         best_thresholds[fold_idx] = thresholds[best_threshold_index]
 82 |         for threshold_idx, threshold in enumerate(thresholds):
 83 |             tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(threshold,
 84 |                                                                                                  dist[test_set],
 85 |                                                                                                  actual_issame[
 86 |                                                                                                      test_set])
 87 |         _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
 88 | 
 89 |     tpr = np.mean(tprs, 0)
 90 |     fpr = np.mean(fprs, 0)
 91 |     return tpr, fpr, accuracy, best_thresholds
 92 | 
 93 | 
 94 | def calculate_accuracy(threshold, dist, actual_issame):
 95 |     predict_issame = np.less(dist, threshold)
 96 |     tp = np.sum(np.logical_and(predict_issame, actual_issame))
 97 |     fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
 98 |     tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
 99 |     fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
100 | 
101 |     tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
102 |     fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
103 |     acc = float(tp + tn) / dist.size
104 |     return tpr, fpr, acc
105 | 
106 | 
107 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds = 10):
108 |     '''
109 |     Copy from [insightface](https://github.com/deepinsight/insightface)
110 |     :param thresholds:
111 |     :param embeddings1:
112 |     :param embeddings2:
113 |     :param actual_issame:
114 |     :param far_target:
115 |     :param nrof_folds:
116 |     :return:
117 |     '''
118 |     assert (embeddings1.shape[0] == embeddings2.shape[0])
119 |     assert (embeddings1.shape[1] == embeddings2.shape[1])
120 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
121 |     nrof_thresholds = len(thresholds)
122 |     k_fold = KFold(n_splits = nrof_folds, shuffle = False)
123 | 
124 |     val = np.zeros(nrof_folds)
125 |     far = np.zeros(nrof_folds)
126 | 
127 |     diff = np.subtract(embeddings1, embeddings2)
128 |     dist = np.sum(np.square(diff), 1)
129 |     indices = np.arange(nrof_pairs)
130 | 
131 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
132 | 
133 |         # Find the threshold that gives FAR = far_target
134 |         far_train = np.zeros(nrof_thresholds)
135 |         for threshold_idx, threshold in enumerate(thresholds):
136 |             _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
137 |         if np.max(far_train) >= far_target:
138 |             f = interpolate.interp1d(far_train, thresholds, kind = 'slinear')
139 |             threshold = f(far_target)
140 |         else:
141 |             threshold = 0.0
142 | 
143 |         val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
144 | 
145 |     val_mean = np.mean(val)
146 |     far_mean = np.mean(far)
147 |     val_std = np.std(val)
148 |     return val_mean, val_std, far_mean
149 | 
150 | 
151 | def calculate_val_far(threshold, dist, actual_issame):
152 |     predict_issame = np.less(dist, threshold)
153 |     true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
154 |     false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
155 |     n_same = np.sum(actual_issame)
156 |     n_diff = np.sum(np.logical_not(actual_issame))
157 |     val = float(true_accept) / float(n_same)
158 |     far = float(false_accept) / float(n_diff)
159 |     return val, far
160 | 
161 | 
162 | def evaluate(embeddings, actual_issame, nrof_folds = 10, pca = 0):
163 |     # Calculate evaluation metrics
164 |     thresholds = np.arange(0, 4, 0.01)
165 |     embeddings1 = embeddings[0::2]
166 |     embeddings2 = embeddings[1::2]
167 |     tpr, fpr, accuracy, best_thresholds = calculate_roc(thresholds, embeddings1, embeddings2, np.asarray(actual_issame), nrof_folds = nrof_folds, pca = pca)
168 | #     thresholds = np.arange(0, 4, 0.001)
169 | #     val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2,
170 | #                                       np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds)
171 | #     return tpr, fpr, accuracy, best_thresholds, val, val_std, far
172 |     return tpr, fpr, accuracy, best_thresholds
173 | 


--------------------------------------------------------------------------------