├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── align ├── __init__.py ├── align_trans.py ├── box_utils.py ├── detector.py ├── face_align.py ├── face_resize.py ├── first_stage.py ├── get_nets.py ├── matlab_cp2tform.py ├── onet.npy ├── pnet.npy ├── rnet.npy └── visualization_utils.py ├── backbone ├── __init__.py ├── model_irse.py └── model_resnet.py ├── imgs ├── 9.jpg ├── align.jpg ├── detect_landmark.png ├── parsing.jpg ├── parsing_maps.png ├── person_1 │ ├── 17.jpg │ ├── 18.jpg │ ├── 19.jpg │ └── 20.jpg ├── person_2 │ ├── 151.jpg │ ├── 152.jpg │ ├── 153.jpg │ └── 154.jpg └── single.jpg ├── parsing ├── __init__.py ├── face_parsing.py ├── model.py └── resnet.py └── util ├── __init__.py ├── extract_feature.py ├── utils.py └── verification.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # specified 107 | .idea 108 | just_for_test.py 109 | test_imgs/ 110 | result/ 111 | checkpoint/backbone_ir50_ms1m_epoch120.pth 112 | checkpoint/face_parsing.pth -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 zll 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # facetools 2 | Easy-to-use face related tools, including face detection, landmark localization, alignment & recognition, based on **PyTorch**. 3 | 4 | ## Quick start 5 | * **Do face detection and landmark localization using MTCNN** 6 | ```python 7 | from PIL import Image 8 | from align.detector import detect_faces 9 | from align.visualization_utils import show_results 10 | 11 | img = Image.open('imgs/single.jpg') # modify the image path to yours 12 | bounding_boxes, landmarks = detect_faces(img) # detect bboxes and landmarks for all faces in the image 13 | show_results(img, bounding_boxes, landmarks) # visualize the results 14 | ``` 15 | ![](imgs/single.jpg) ![](imgs/detect_landmark.png) 16 | 17 | * **Do alignment** 18 | ```python 19 | from align.face_align import align 20 | res = align('imgs/single.jpg', save_path='./result', vis=False) 21 | res.show() 22 | ``` 23 | ![](imgs/align.jpg) 24 | 25 | * **Do face encoding using IR50 model** ([download pretrained model](https://pan.baidu.com/s/1L8yOF1oZf6JHfeY9iN59Mg#list/path=%2F)) 26 | ```python 27 | from PIL import Image 28 | from util.extract_feature import extract_feature 29 | from backbone.model_irse import IR_50 30 | 31 | image_1 = Image.open('imgs/align.jpg') # modify the image path to yours 32 | 33 | model = IR_50([112, 112]) 34 | model_cp = 'checkpoint/backbone_ir50_ms1m_epoch120.pth' 35 | 36 | features = extract_feature(image_1, model, model_cp) 37 | print(features.size()) # output : torch.Size([1, 512]) 38 | 39 | ``` 40 | 41 | * **Calculate the distance between two images** 42 | ```python 43 | import numpy as np 44 | from PIL import Image 45 | from util.extract_feature import extract_feature 46 | from backbone.model_irse import IR_50 47 | from scipy.spatial.distance import pdist 48 | 49 | 50 | face_1 = Image.open('imgs/person_1/17.jpg') 51 | face_2 = Image.open('imgs/person_1/18.jpg') # face_1 and face_2 belong to the same one 52 | 53 | face_3 = Image.open('imgs/person_2/151.jpg') 54 | face_4 = Image.open('imgs/person_2/152.jpg') # face_3 and face_4 belong to the same one 55 | 56 | model = IR_50([112, 112]) 57 | model_cp = 'checkpoint/backbone_ir50_ms1m_epoch120.pth' 58 | 59 | data = [face_1, face_2, face_3, face_4] 60 | 61 | features = extract_feature(data, model, model_cp) 62 | features = [i.numpy() for i in features] # embeddings for face_1, face_2, face_3 and face_4 63 | 64 | diff = np.subtract(features[0], features[1]) 65 | dist = np.sum(np.square(diff), 1) 66 | print(dist) # output : 1984.6016 67 | 68 | diff = np.subtract(features[2], features[3]) 69 | dist = np.sum(np.square(diff), 1) 70 | print(dist) # output : 1921.2222 71 | 72 | diff = np.subtract(features[0], features[2]) 73 | dist = np.sum(np.square(diff), 1) 74 | print(dist) # output : 16876.32 75 | 76 | diff = np.subtract(features[1], features[3]) 77 | dist = np.sum(np.square(diff), 1) 78 | print(dist) # output : 17107.396 79 | 80 | dist = pdist(np.vstack([features[0], features[1]]), 'cosine') 81 | print(dist) # output : 0.12932935 82 | 83 | dist = pdist(np.vstack([features[2], features[3]]), 'cosine') 84 | print(dist) # output : 0.11706942 85 | 86 | dist = pdist(np.vstack([features[0], features[2]]), 'cosine') 87 | print(dist) # output : 1.09022914 88 | 89 | dist = pdist(np.vstack([features[1], features[3]]), 'cosine') 90 | print(dist) # output : 1.07447068 91 | ``` 92 | 93 | * **Do face parsing** 94 | ```python 95 | from PIL import Image 96 | from parsing.face_parsing import parsing, vis_parsing_maps 97 | 98 | image = Image.open('imgs/9.jpg') 99 | 100 | res = parsing(image) 101 | vis_parsing_maps(image, res, show=True, save_im=True) 102 | ``` 103 | ![](imgs/9.jpg) ![](imgs/parsing.jpg) 104 | 105 | ### Using facetools in Your Project 106 | It is easy to use facetools in your project. 107 | ``` 108 | Your project 109 | │ README.md 110 | │ ... 111 | │ foo.py 112 | │ 113 | └───facetools 114 | │ 115 | └───directory1 116 | │ 117 | └───... 118 | ``` 119 | 120 | In `foo.py`, you can easily import facetools by adding: 121 | ```python 122 | from facetools import detect_faces, show_results 123 | from PIL import Image 124 | 125 | def foo(): 126 | img = Image.open('/path/to/your/image') 127 | bounding_boxes, landmarks = detect_faces(img) 128 | show_results(img, bounding_boxes, landmarks) 129 | ``` 130 | 131 | ## Acknowledgement 132 | - This repo is based on [face.evoLVe.PyTorch](https://github.com/ZhaoJ9014/face.evoLVe.PyTorch) and [face-parsing.PyTorch](https://github.com/zllrunning/face-parsing.PyTorch). Many thanks to the excellent repo. 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from util.extract_feature import extract_feature 2 | from backbone.model_irse import IR_50 3 | from align.detector import detect_faces 4 | from align.visualization_utils import show_results 5 | from align.face_align import align 6 | from parsing.face_parsing import parsing, vis_parsing_maps -------------------------------------------------------------------------------- /align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /align/align_trans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from .matlab_cp2tform import get_similarity_transform_for_cv2 4 | 5 | 6 | # reference facial points, a list of coordinates (x,y) 7 | REFERENCE_FACIAL_POINTS = [ # default reference facial points for crop_size = (112, 112); should adjust REFERENCE_FACIAL_POINTS accordingly for other crop_size 8 | [30.29459953, 51.69630051], 9 | [65.53179932, 51.50139999], 10 | [48.02519989, 71.73660278], 11 | [33.54930115, 92.3655014], 12 | [62.72990036, 92.20410156] 13 | ] 14 | 15 | DEFAULT_CROP_SIZE = (96, 112) 16 | 17 | 18 | class FaceWarpException(Exception): 19 | def __str__(self): 20 | return 'In File {}:{}'.format( 21 | __file__, super.__str__(self)) 22 | 23 | 24 | def get_reference_facial_points(output_size = None, 25 | inner_padding_factor = 0.0, 26 | outer_padding=(0, 0), 27 | default_square = False): 28 | """ 29 | Function: 30 | ---------- 31 | get reference 5 key points according to crop settings: 32 | 0. Set default crop_size: 33 | if default_square: 34 | crop_size = (112, 112) 35 | else: 36 | crop_size = (96, 112) 37 | 1. Pad the crop_size by inner_padding_factor in each side; 38 | 2. Resize crop_size into (output_size - outer_padding*2), 39 | pad into output_size with outer_padding; 40 | 3. Output reference_5point; 41 | Parameters: 42 | ---------- 43 | @output_size: (w, h) or None 44 | size of aligned face image 45 | @inner_padding_factor: (w_factor, h_factor) 46 | padding factor for inner (w, h) 47 | @outer_padding: (w_pad, h_pad) 48 | each row is a pair of coordinates (x, y) 49 | @default_square: True or False 50 | if True: 51 | default crop_size = (112, 112) 52 | else: 53 | default crop_size = (96, 112); 54 | !!! make sure, if output_size is not None: 55 | (output_size - outer_padding) 56 | = some_scale * (default crop_size * (1.0 + inner_padding_factor)) 57 | Returns: 58 | ---------- 59 | @reference_5point: 5x2 np.array 60 | each row is a pair of transformed coordinates (x, y) 61 | """ 62 | #print('\n===> get_reference_facial_points():') 63 | 64 | #print('---> Params:') 65 | #print(' output_size: ', output_size) 66 | #print(' inner_padding_factor: ', inner_padding_factor) 67 | #print(' outer_padding:', outer_padding) 68 | #print(' default_square: ', default_square) 69 | 70 | tmp_5pts = np.array(REFERENCE_FACIAL_POINTS) 71 | tmp_crop_size = np.array(DEFAULT_CROP_SIZE) 72 | 73 | # 0) make the inner region a square 74 | if default_square: 75 | size_diff = max(tmp_crop_size) - tmp_crop_size 76 | tmp_5pts += size_diff / 2 77 | tmp_crop_size += size_diff 78 | 79 | #print('---> default:') 80 | #print(' crop_size = ', tmp_crop_size) 81 | #print(' reference_5pts = ', tmp_5pts) 82 | 83 | if (output_size and 84 | output_size[0] == tmp_crop_size[0] and 85 | output_size[1] == tmp_crop_size[1]): 86 | #print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size)) 87 | return tmp_5pts 88 | 89 | if (inner_padding_factor == 0 and 90 | outer_padding == (0, 0)): 91 | if output_size is None: 92 | #print('No paddings to do: return default reference points') 93 | return tmp_5pts 94 | else: 95 | raise FaceWarpException( 96 | 'No paddings to do, output_size must be None or {}'.format(tmp_crop_size)) 97 | 98 | # check output size 99 | if not (0 <= inner_padding_factor <= 1.0): 100 | raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)') 101 | 102 | if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0) 103 | and output_size is None): 104 | output_size = tmp_crop_size * \ 105 | (1 + inner_padding_factor * 2).astype(np.int32) 106 | output_size += np.array(outer_padding) 107 | #print(' deduced from paddings, output_size = ', output_size) 108 | 109 | if not (outer_padding[0] < output_size[0] 110 | and outer_padding[1] < output_size[1]): 111 | raise FaceWarpException('Not (outer_padding[0] < output_size[0]' 112 | 'and outer_padding[1] < output_size[1])') 113 | 114 | # 1) pad the inner region according inner_padding_factor 115 | #print('---> STEP1: pad the inner region according inner_padding_factor') 116 | if inner_padding_factor > 0: 117 | size_diff = tmp_crop_size * inner_padding_factor * 2 118 | tmp_5pts += size_diff / 2 119 | tmp_crop_size += np.round(size_diff).astype(np.int32) 120 | 121 | #print(' crop_size = ', tmp_crop_size) 122 | #print(' reference_5pts = ', tmp_5pts) 123 | 124 | # 2) resize the padded inner region 125 | #print('---> STEP2: resize the padded inner region') 126 | size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2 127 | #print(' crop_size = ', tmp_crop_size) 128 | #print(' size_bf_outer_pad = ', size_bf_outer_pad) 129 | 130 | if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]: 131 | raise FaceWarpException('Must have (output_size - outer_padding)' 132 | '= some_scale * (crop_size * (1.0 + inner_padding_factor)') 133 | 134 | scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0] 135 | #print(' resize scale_factor = ', scale_factor) 136 | tmp_5pts = tmp_5pts * scale_factor 137 | # size_diff = tmp_crop_size * (scale_factor - min(scale_factor)) 138 | # tmp_5pts = tmp_5pts + size_diff / 2 139 | tmp_crop_size = size_bf_outer_pad 140 | #print(' crop_size = ', tmp_crop_size) 141 | #print(' reference_5pts = ', tmp_5pts) 142 | 143 | # 3) add outer_padding to make output_size 144 | reference_5point = tmp_5pts + np.array(outer_padding) 145 | tmp_crop_size = output_size 146 | #print('---> STEP3: add outer_padding to make output_size') 147 | #print(' crop_size = ', tmp_crop_size) 148 | #print(' reference_5pts = ', tmp_5pts) 149 | 150 | #print('===> end get_reference_facial_points\n') 151 | 152 | return reference_5point 153 | 154 | 155 | def get_affine_transform_matrix(src_pts, dst_pts): 156 | """ 157 | Function: 158 | ---------- 159 | get affine transform matrix 'tfm' from src_pts to dst_pts 160 | Parameters: 161 | ---------- 162 | @src_pts: Kx2 np.array 163 | source points matrix, each row is a pair of coordinates (x, y) 164 | @dst_pts: Kx2 np.array 165 | destination points matrix, each row is a pair of coordinates (x, y) 166 | Returns: 167 | ---------- 168 | @tfm: 2x3 np.array 169 | transform matrix from src_pts to dst_pts 170 | """ 171 | 172 | tfm = np.float32([[1, 0, 0], [0, 1, 0]]) 173 | n_pts = src_pts.shape[0] 174 | ones = np.ones((n_pts, 1), src_pts.dtype) 175 | src_pts_ = np.hstack([src_pts, ones]) 176 | dst_pts_ = np.hstack([dst_pts, ones]) 177 | 178 | # #print(('src_pts_:\n' + str(src_pts_)) 179 | # #print(('dst_pts_:\n' + str(dst_pts_)) 180 | 181 | A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_) 182 | 183 | # #print(('np.linalg.lstsq return A: \n' + str(A)) 184 | # #print(('np.linalg.lstsq return res: \n' + str(res)) 185 | # #print(('np.linalg.lstsq return rank: \n' + str(rank)) 186 | # #print(('np.linalg.lstsq return s: \n' + str(s)) 187 | 188 | if rank == 3: 189 | tfm = np.float32([ 190 | [A[0, 0], A[1, 0], A[2, 0]], 191 | [A[0, 1], A[1, 1], A[2, 1]] 192 | ]) 193 | elif rank == 2: 194 | tfm = np.float32([ 195 | [A[0, 0], A[1, 0], 0], 196 | [A[0, 1], A[1, 1], 0] 197 | ]) 198 | 199 | return tfm 200 | 201 | 202 | def warp_and_crop_face(src_img, 203 | facial_pts, 204 | reference_pts = None, 205 | crop_size=(96, 112), 206 | align_type = 'smilarity'): 207 | """ 208 | Function: 209 | ---------- 210 | apply affine transform 'trans' to uv 211 | Parameters: 212 | ---------- 213 | @src_img: 3x3 np.array 214 | input image 215 | @facial_pts: could be 216 | 1)a list of K coordinates (x,y) 217 | or 218 | 2) Kx2 or 2xK np.array 219 | each row or col is a pair of coordinates (x, y) 220 | @reference_pts: could be 221 | 1) a list of K coordinates (x,y) 222 | or 223 | 2) Kx2 or 2xK np.array 224 | each row or col is a pair of coordinates (x, y) 225 | or 226 | 3) None 227 | if None, use default reference facial points 228 | @crop_size: (w, h) 229 | output face image size 230 | @align_type: transform type, could be one of 231 | 1) 'similarity': use similarity transform 232 | 2) 'cv2_affine': use the first 3 points to do affine transform, 233 | by calling cv2.getAffineTransform() 234 | 3) 'affine': use all points to do affine transform 235 | Returns: 236 | ---------- 237 | @face_img: output face image with size (w, h) = @crop_size 238 | """ 239 | 240 | if reference_pts is None: 241 | if crop_size[0] == 96 and crop_size[1] == 112: 242 | reference_pts = REFERENCE_FACIAL_POINTS 243 | else: 244 | default_square = False 245 | inner_padding_factor = 0 246 | outer_padding = (0, 0) 247 | output_size = crop_size 248 | 249 | reference_pts = get_reference_facial_points(output_size, 250 | inner_padding_factor, 251 | outer_padding, 252 | default_square) 253 | 254 | ref_pts = np.float32(reference_pts) 255 | ref_pts_shp = ref_pts.shape 256 | if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2: 257 | raise FaceWarpException( 258 | 'reference_pts.shape must be (K,2) or (2,K) and K>2') 259 | 260 | if ref_pts_shp[0] == 2: 261 | ref_pts = ref_pts.T 262 | 263 | src_pts = np.float32(facial_pts) 264 | src_pts_shp = src_pts.shape 265 | if max(src_pts_shp) < 3 or min(src_pts_shp) != 2: 266 | raise FaceWarpException( 267 | 'facial_pts.shape must be (K,2) or (2,K) and K>2') 268 | 269 | if src_pts_shp[0] == 2: 270 | src_pts = src_pts.T 271 | 272 | # #print('--->src_pts:\n', src_pts 273 | # #print('--->ref_pts\n', ref_pts 274 | 275 | if src_pts.shape != ref_pts.shape: 276 | raise FaceWarpException( 277 | 'facial_pts and reference_pts must have the same shape') 278 | 279 | if align_type is 'cv2_affine': 280 | tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3]) 281 | # #print(('cv2.getAffineTransform() returns tfm=\n' + str(tfm)) 282 | elif align_type is 'affine': 283 | tfm = get_affine_transform_matrix(src_pts, ref_pts) 284 | # #print(('get_affine_transform_matrix() returns tfm=\n' + str(tfm)) 285 | else: 286 | tfm = get_similarity_transform_for_cv2(src_pts, ref_pts) 287 | # #print(('get_similarity_transform_for_cv2() returns tfm=\n' + str(tfm)) 288 | 289 | # #print('--->Transform matrix: ' 290 | # #print(('type(tfm):' + str(type(tfm))) 291 | # #print(('tfm.dtype:' + str(tfm.dtype)) 292 | # #print( tfm 293 | 294 | face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1])) 295 | 296 | return face_img -------------------------------------------------------------------------------- /align/box_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | 4 | 5 | def nms(boxes, overlap_threshold = 0.5, mode = 'union'): 6 | """Non-maximum suppression. 7 | 8 | Arguments: 9 | boxes: a float numpy array of shape [n, 5], 10 | where each row is (xmin, ymin, xmax, ymax, score). 11 | overlap_threshold: a float number. 12 | mode: 'union' or 'min'. 13 | 14 | Returns: 15 | list with indices of the selected boxes 16 | """ 17 | 18 | # if there are no boxes, return the empty list 19 | if len(boxes) == 0: 20 | return [] 21 | 22 | # list of picked indices 23 | pick = [] 24 | 25 | # grab the coordinates of the bounding boxes 26 | x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)] 27 | 28 | area = (x2 - x1 + 1.0)*(y2 - y1 + 1.0) 29 | ids = np.argsort(score) # in increasing order 30 | 31 | while len(ids) > 0: 32 | 33 | # grab index of the largest value 34 | last = len(ids) - 1 35 | i = ids[last] 36 | pick.append(i) 37 | 38 | # compute intersections 39 | # of the box with the largest score 40 | # with the rest of boxes 41 | 42 | # left top corner of intersection boxes 43 | ix1 = np.maximum(x1[i], x1[ids[:last]]) 44 | iy1 = np.maximum(y1[i], y1[ids[:last]]) 45 | 46 | # right bottom corner of intersection boxes 47 | ix2 = np.minimum(x2[i], x2[ids[:last]]) 48 | iy2 = np.minimum(y2[i], y2[ids[:last]]) 49 | 50 | # width and height of intersection boxes 51 | w = np.maximum(0.0, ix2 - ix1 + 1.0) 52 | h = np.maximum(0.0, iy2 - iy1 + 1.0) 53 | 54 | # intersections' areas 55 | inter = w * h 56 | if mode == 'min': 57 | overlap = inter/np.minimum(area[i], area[ids[:last]]) 58 | elif mode == 'union': 59 | # intersection over union (IoU) 60 | overlap = inter/(area[i] + area[ids[:last]] - inter) 61 | 62 | # delete all boxes where overlap is too big 63 | ids = np.delete( 64 | ids, 65 | np.concatenate([[last], np.where(overlap > overlap_threshold)[0]]) 66 | ) 67 | 68 | return pick 69 | 70 | 71 | def convert_to_square(bboxes): 72 | """Convert bounding boxes to a square form. 73 | 74 | Arguments: 75 | bboxes: a float numpy array of shape [n, 5]. 76 | 77 | Returns: 78 | a float numpy array of shape [n, 5], 79 | squared bounding boxes. 80 | """ 81 | 82 | square_bboxes = np.zeros_like(bboxes) 83 | x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] 84 | h = y2 - y1 + 1.0 85 | w = x2 - x1 + 1.0 86 | max_side = np.maximum(h, w) 87 | square_bboxes[:, 0] = x1 + w*0.5 - max_side*0.5 88 | square_bboxes[:, 1] = y1 + h*0.5 - max_side*0.5 89 | square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0 90 | square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0 91 | return square_bboxes 92 | 93 | 94 | def calibrate_box(bboxes, offsets): 95 | """Transform bounding boxes to be more like true bounding boxes. 96 | 'offsets' is one of the outputs of the nets. 97 | 98 | Arguments: 99 | bboxes: a float numpy array of shape [n, 5]. 100 | offsets: a float numpy array of shape [n, 4]. 101 | 102 | Returns: 103 | a float numpy array of shape [n, 5]. 104 | """ 105 | x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] 106 | w = x2 - x1 + 1.0 107 | h = y2 - y1 + 1.0 108 | w = np.expand_dims(w, 1) 109 | h = np.expand_dims(h, 1) 110 | 111 | # this is what happening here: 112 | # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)] 113 | # x1_true = x1 + tx1*w 114 | # y1_true = y1 + ty1*h 115 | # x2_true = x2 + tx2*w 116 | # y2_true = y2 + ty2*h 117 | # below is just more compact form of this 118 | 119 | # are offsets always such that 120 | # x1 < x2 and y1 < y2 ? 121 | 122 | translation = np.hstack([w, h, w, h])*offsets 123 | bboxes[:, 0:4] = bboxes[:, 0:4] + translation 124 | return bboxes 125 | 126 | 127 | def get_image_boxes(bounding_boxes, img, size = 24): 128 | """Cut out boxes from the image. 129 | 130 | Arguments: 131 | bounding_boxes: a float numpy array of shape [n, 5]. 132 | img: an instance of PIL.Image. 133 | size: an integer, size of cutouts. 134 | 135 | Returns: 136 | a float numpy array of shape [n, 3, size, size]. 137 | """ 138 | 139 | num_boxes = len(bounding_boxes) 140 | width, height = img.size 141 | 142 | [dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bounding_boxes, width, height) 143 | img_boxes = np.zeros((num_boxes, 3, size, size), 'float32') 144 | 145 | for i in range(num_boxes): 146 | img_box = np.zeros((h[i], w[i], 3), 'uint8') 147 | 148 | img_array = np.asarray(img, 'uint8') 149 | img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\ 150 | img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :] 151 | 152 | # resize 153 | img_box = Image.fromarray(img_box) 154 | img_box = img_box.resize((size, size), Image.BILINEAR) 155 | img_box = np.asarray(img_box, 'float32') 156 | 157 | img_boxes[i, :, :, :] = _preprocess(img_box) 158 | 159 | return img_boxes 160 | 161 | 162 | def correct_bboxes(bboxes, width, height): 163 | """Crop boxes that are too big and get coordinates 164 | with respect to cutouts. 165 | 166 | Arguments: 167 | bboxes: a float numpy array of shape [n, 5], 168 | where each row is (xmin, ymin, xmax, ymax, score). 169 | width: a float number. 170 | height: a float number. 171 | 172 | Returns: 173 | dy, dx, edy, edx: a int numpy arrays of shape [n], 174 | coordinates of the boxes with respect to the cutouts. 175 | y, x, ey, ex: a int numpy arrays of shape [n], 176 | corrected ymin, xmin, ymax, xmax. 177 | h, w: a int numpy arrays of shape [n], 178 | just heights and widths of boxes. 179 | 180 | in the following order: 181 | [dy, edy, dx, edx, y, ey, x, ex, w, h]. 182 | """ 183 | 184 | x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] 185 | w, h = x2 - x1 + 1.0, y2 - y1 + 1.0 186 | num_boxes = bboxes.shape[0] 187 | 188 | # 'e' stands for end 189 | # (x, y) -> (ex, ey) 190 | x, y, ex, ey = x1, y1, x2, y2 191 | 192 | # we need to cut out a box from the image. 193 | # (x, y, ex, ey) are corrected coordinates of the box 194 | # in the image. 195 | # (dx, dy, edx, edy) are coordinates of the box in the cutout 196 | # from the image. 197 | dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,)) 198 | edx, edy = w.copy() - 1.0, h.copy() - 1.0 199 | 200 | # if box's bottom right corner is too far right 201 | ind = np.where(ex > width - 1.0)[0] 202 | edx[ind] = w[ind] + width - 2.0 - ex[ind] 203 | ex[ind] = width - 1.0 204 | 205 | # if box's bottom right corner is too low 206 | ind = np.where(ey > height - 1.0)[0] 207 | edy[ind] = h[ind] + height - 2.0 - ey[ind] 208 | ey[ind] = height - 1.0 209 | 210 | # if box's top left corner is too far left 211 | ind = np.where(x < 0.0)[0] 212 | dx[ind] = 0.0 - x[ind] 213 | x[ind] = 0.0 214 | 215 | # if box's top left corner is too high 216 | ind = np.where(y < 0.0)[0] 217 | dy[ind] = 0.0 - y[ind] 218 | y[ind] = 0.0 219 | 220 | return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h] 221 | return_list = [i.astype('int32') for i in return_list] 222 | 223 | return return_list 224 | 225 | 226 | def _preprocess(img): 227 | """Preprocessing step before feeding the network. 228 | 229 | Arguments: 230 | img: a float numpy array of shape [h, w, c]. 231 | 232 | Returns: 233 | a float numpy array of shape [1, c, h, w]. 234 | """ 235 | img = img.transpose((2, 0, 1)) 236 | img = np.expand_dims(img, 0) 237 | img = (img - 127.5) * 0.0078125 238 | return img 239 | -------------------------------------------------------------------------------- /align/detector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import Variable 4 | from .get_nets import PNet, RNet, ONet 5 | from .box_utils import nms, calibrate_box, get_image_boxes, convert_to_square 6 | from .first_stage import run_first_stage 7 | 8 | 9 | def detect_faces(image, min_face_size=20.0, 10 | thresholds=[0.6, 0.7, 0.8], 11 | nms_thresholds=[0.7, 0.7, 0.7]): 12 | """ 13 | Arguments: 14 | image: an instance of PIL.Image. 15 | min_face_size: a float number. 16 | thresholds: a list of length 3. 17 | nms_thresholds: a list of length 3. 18 | 19 | Returns: 20 | two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10], 21 | bounding boxes and facial landmarks. 22 | """ 23 | 24 | # LOAD MODELS 25 | pnet = PNet() 26 | rnet = RNet() 27 | onet = ONet() 28 | onet.eval() 29 | 30 | # BUILD AN IMAGE PYRAMID 31 | width, height = image.size 32 | min_length = min(height, width) 33 | 34 | min_detection_size = 12 35 | factor = 0.707 # sqrt(0.5) 36 | 37 | # scales for scaling the image 38 | scales = [] 39 | 40 | # scales the image so that 41 | # minimum size that we can detect equals to 42 | # minimum face size that we want to detect 43 | m = min_detection_size/min_face_size 44 | min_length *= m 45 | 46 | factor_count = 0 47 | while min_length > min_detection_size: 48 | scales.append(m*factor**factor_count) 49 | min_length *= factor 50 | factor_count += 1 51 | 52 | # STAGE 1 53 | 54 | # it will be returned 55 | bounding_boxes = [] 56 | 57 | # run P-Net on different scales 58 | for s in scales: 59 | boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0]) 60 | bounding_boxes.append(boxes) 61 | 62 | # collect boxes (and offsets, and scores) from different scales 63 | bounding_boxes = [i for i in bounding_boxes if i is not None] 64 | bounding_boxes = np.vstack(bounding_boxes) 65 | 66 | keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0]) 67 | bounding_boxes = bounding_boxes[keep] 68 | 69 | # use offsets predicted by pnet to transform bounding boxes 70 | bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) 71 | # shape [n_boxes, 5] 72 | 73 | bounding_boxes = convert_to_square(bounding_boxes) 74 | bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) 75 | 76 | # STAGE 2 77 | with torch.no_grad(): 78 | img_boxes = get_image_boxes(bounding_boxes, image, size = 24) 79 | img_boxes = torch.FloatTensor(img_boxes) 80 | output = rnet(img_boxes) 81 | offsets = output[0].data.numpy() # shape [n_boxes, 4] 82 | probs = output[1].data.numpy() # shape [n_boxes, 2] 83 | 84 | keep = np.where(probs[:, 1] > thresholds[1])[0] 85 | bounding_boxes = bounding_boxes[keep] 86 | bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) 87 | offsets = offsets[keep] 88 | 89 | keep = nms(bounding_boxes, nms_thresholds[1]) 90 | bounding_boxes = bounding_boxes[keep] 91 | bounding_boxes = calibrate_box(bounding_boxes, offsets[keep]) 92 | bounding_boxes = convert_to_square(bounding_boxes) 93 | bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) 94 | 95 | # STAGE 3 96 | with torch.no_grad(): 97 | img_boxes = get_image_boxes(bounding_boxes, image, size=48) 98 | if len(img_boxes) == 0: 99 | return [], [] 100 | img_boxes = torch.FloatTensor(img_boxes) 101 | output = onet(img_boxes) 102 | landmarks = output[0].data.numpy() # shape [n_boxes, 10] 103 | offsets = output[1].data.numpy() # shape [n_boxes, 4] 104 | probs = output[2].data.numpy() # shape [n_boxes, 2] 105 | 106 | keep = np.where(probs[:, 1] > thresholds[2])[0] 107 | bounding_boxes = bounding_boxes[keep] 108 | bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) 109 | offsets = offsets[keep] 110 | landmarks = landmarks[keep] 111 | 112 | # compute landmark points 113 | width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 114 | height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 115 | xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] 116 | landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1)*landmarks[:, 0:5] 117 | landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1)*landmarks[:, 5:10] 118 | 119 | bounding_boxes = calibrate_box(bounding_boxes, offsets) 120 | keep = nms(bounding_boxes, nms_thresholds[2], mode='min') 121 | bounding_boxes = bounding_boxes[keep] 122 | landmarks = landmarks[keep] 123 | 124 | return bounding_boxes, landmarks 125 | 126 | -------------------------------------------------------------------------------- /align/face_align.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | from .detector import detect_faces 3 | from .align_trans import get_reference_facial_points, warp_and_crop_face 4 | import numpy as np 5 | import os 6 | from tqdm import tqdm 7 | 8 | 9 | def align(image_or_folder, crop_size=112, save_path=None, vis=True): # 传入已读的图像 10 | scale = crop_size / 112. 11 | reference = get_reference_facial_points(default_square=True) * scale 12 | 13 | if save_path is not None and not os.path.isdir(save_path) : 14 | os.makedirs(save_path) 15 | 16 | if os.path.isdir(image_or_folder): 17 | for subfolder in tqdm(os.listdir(image_or_folder)): 18 | if not os.path.isdir(os.path.join(save_path, subfolder)): 19 | os.makedirs(os.path.join(save_path, subfolder)) 20 | for image_name in os.listdir(os.path.join(image_or_folder, subfolder)): 21 | print("Processing\t{}".format(os.path.join(image_or_folder, subfolder, image_name))) 22 | img = Image.open(os.path.join(image_or_folder, subfolder, image_name)) 23 | try: # Handle exception 24 | _, landmarks = detect_faces(img) 25 | except Exception: 26 | print("{} is discarded due to exception!".format(os.path.join(image_or_folder, subfolder, image_name))) 27 | continue 28 | if len(landmarks) == 0: # If the landmarks cannot be detected, the img will be discarded 29 | print("{} is discarded due to non-detected landmarks!".format(os.path.join(image_or_folder, subfolder, image_name))) 30 | continue 31 | facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)] 32 | warped_face = warp_and_crop_face(np.array(img), facial5points, reference, crop_size=(crop_size, crop_size)) 33 | img_warped = Image.fromarray(warped_face) 34 | if image_name.split('.')[-1].lower() not in ['jpg', 'jpeg']: # not from jpg 35 | image_name = '.'.join(image_name.split('.')[:-1]) + '.jpg' 36 | img_warped.save(os.path.join(save_path, subfolder, image_name)) 37 | else: 38 | img = Image.open(image_or_folder) 39 | try: # Handle exception 40 | _, landmarks = detect_faces(img) 41 | except Exception: 42 | print("{} is discarded due to exception!".format(image_or_folder)) 43 | if len(landmarks) == 0: # If the landmarks cannot be detected, the img will be discarded 44 | print("{} is discarded due to non-detected landmarks!".format(image_or_folder)) 45 | facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)] 46 | warped_face = warp_and_crop_face(np.array(img), facial5points, reference, crop_size=(crop_size, crop_size)) 47 | img_warped = Image.fromarray(warped_face) 48 | if vis: 49 | img_warped.show() 50 | if save_path is not None: 51 | if image_or_folder.split('.')[-1].lower() not in ['jpg', 'jpeg']: # not from jpg 52 | image_or_folder = '.'.join(os.path.basename(image_or_folder).split('.')[:-1]) + '.jpg' 53 | else: 54 | image_or_folder = os.path.basename(image_or_folder) 55 | 56 | img_warped.save(os.path.join(save_path, image_or_folder)) 57 | return img_warped 58 | -------------------------------------------------------------------------------- /align/face_resize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | from tqdm import tqdm 4 | 5 | 6 | def mkdir(path): 7 | if not os.path.exists(path): 8 | os.mkdir(path) 9 | 10 | 11 | def process_image(img): 12 | 13 | size = img.shape 14 | h, w = size[0], size[1] 15 | scale = max(w, h) / float(min_side) 16 | new_w, new_h = int(w / scale), int(h / scale) 17 | resize_img = cv2.resize(img, (new_w, new_h)) 18 | if new_w % 2 != 0 and new_h % 2 == 0: 19 | top, bottom, left, right = (min_side - new_h) / 2, (min_side - new_h) / 2, (min_side - new_w) / 2 + 1, ( 20 | min_side - new_w) / 2 21 | elif new_h % 2 != 0 and new_w % 2 == 0: 22 | top, bottom, left, right = (min_side - new_h) / 2 + 1, (min_side - new_h) / 2, (min_side - new_w) / 2, ( 23 | min_side - new_w) / 2 24 | elif new_h % 2 == 0 and new_w % 2 == 0: 25 | top, bottom, left, right = (min_side - new_h) / 2, (min_side - new_h) / 2, (min_side - new_w) / 2, ( 26 | min_side - new_w) / 2 27 | else: 28 | top, bottom, left, right = (min_side - new_h) / 2 + 1, (min_side - new_h) / 2, (min_side - new_w) / 2 + 1, ( 29 | min_side - new_w) / 2 30 | pad_img = cv2.copyMakeBorder(resize_img, top, bottom, left, right, cv2.BORDER_CONSTANT, 31 | value=[0, 0, 0]) 32 | 33 | return pad_img 34 | 35 | 36 | def main(source_root): 37 | 38 | dest_root = "/media/pc/6T/jasonjzhao/data/MS-Celeb-1M_Resized" 39 | mkdir(dest_root) 40 | cwd = os.getcwd() # delete '.DS_Store' existed in the source_root 41 | os.chdir(source_root) 42 | os.system("find . -name '*.DS_Store' -type f -delete") 43 | os.chdir(cwd) 44 | 45 | if not os.path.isdir(dest_root): 46 | os.mkdir(dest_root) 47 | 48 | for subfolder in tqdm(os.listdir(source_root)): 49 | if not os.path.isdir(os.path.join(dest_root, subfolder)): 50 | os.mkdir(os.path.join(dest_root, subfolder)) 51 | for image_name in os.listdir(os.path.join(source_root, subfolder)): 52 | print("Processing\t{}".format(os.path.join(source_root, subfolder, image_name))) 53 | img = cv2.imread(os.path.join(source_root, subfolder, image_name)) 54 | if type(img) == type(None): 55 | print("damaged image %s, del it" % (img)) 56 | os.remove(img) 57 | continue 58 | size = img.shape 59 | h, w = size[0], size[1] 60 | if max(w, h) > 512: 61 | img_pad = process_image(img) 62 | else: 63 | img_pad = img 64 | cv2.imwrite(os.path.join(dest_root, subfolder, image_name.split('.')[0] + '.jpg'), img_pad) 65 | 66 | 67 | if __name__ == "__main__": 68 | min_side = 512 69 | main(source_root = "/media/pc/6T/jasonjzhao/data/MS-Celeb-1M/database/base") -------------------------------------------------------------------------------- /align/first_stage.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | import math 4 | from PIL import Image 5 | import numpy as np 6 | from .box_utils import nms, _preprocess 7 | 8 | 9 | def run_first_stage(image, net, scale, threshold): 10 | """Run P-Net, generate bounding boxes, and do NMS. 11 | 12 | Arguments: 13 | image: an instance of PIL.Image. 14 | net: an instance of pytorch's nn.Module, P-Net. 15 | scale: a float number, 16 | scale width and height of the image by this number. 17 | threshold: a float number, 18 | threshold on the probability of a face when generating 19 | bounding boxes from predictions of the net. 20 | 21 | Returns: 22 | a float numpy array of shape [n_boxes, 9], 23 | bounding boxes with scores and offsets (4 + 1 + 4). 24 | """ 25 | 26 | # scale the image and convert it to a float array 27 | width, height = image.size 28 | sw, sh = math.ceil(width*scale), math.ceil(height*scale) 29 | img = image.resize((sw, sh), Image.BILINEAR) 30 | img = np.asarray(img, 'float32') 31 | with torch.no_grad(): 32 | img = torch.FloatTensor(_preprocess(img)) 33 | output = net(img) 34 | probs = output[1].data.numpy()[0, 1, :, :] 35 | offsets = output[0].data.numpy() 36 | # probs: probability of a face at each sliding window 37 | # offsets: transformations to true bounding boxes 38 | 39 | boxes = _generate_bboxes(probs, offsets, scale, threshold) 40 | if len(boxes) == 0: 41 | return None 42 | 43 | keep = nms(boxes[:, 0:5], overlap_threshold = 0.5) 44 | return boxes[keep] 45 | 46 | 47 | def _generate_bboxes(probs, offsets, scale, threshold): 48 | """Generate bounding boxes at places 49 | where there is probably a face. 50 | 51 | Arguments: 52 | probs: a float numpy array of shape [n, m]. 53 | offsets: a float numpy array of shape [1, 4, n, m]. 54 | scale: a float number, 55 | width and height of the image were scaled by this number. 56 | threshold: a float number. 57 | 58 | Returns: 59 | a float numpy array of shape [n_boxes, 9] 60 | """ 61 | 62 | # applying P-Net is equivalent, in some sense, to 63 | # moving 12x12 window with stride 2 64 | stride = 2 65 | cell_size = 12 66 | 67 | # indices of boxes where there is probably a face 68 | inds = np.where(probs > threshold) 69 | 70 | if inds[0].size == 0: 71 | return np.array([]) 72 | 73 | # transformations of bounding boxes 74 | tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)] 75 | # they are defined as: 76 | # w = x2 - x1 + 1 77 | # h = y2 - y1 + 1 78 | # x1_true = x1 + tx1*w 79 | # x2_true = x2 + tx2*w 80 | # y1_true = y1 + ty1*h 81 | # y2_true = y2 + ty2*h 82 | 83 | offsets = np.array([tx1, ty1, tx2, ty2]) 84 | score = probs[inds[0], inds[1]] 85 | 86 | # P-Net is applied to scaled images 87 | # so we need to rescale bounding boxes back 88 | bounding_boxes = np.vstack([ 89 | np.round((stride*inds[1] + 1.0)/scale), 90 | np.round((stride*inds[0] + 1.0)/scale), 91 | np.round((stride*inds[1] + 1.0 + cell_size)/scale), 92 | np.round((stride*inds[0] + 1.0 + cell_size)/scale), 93 | score, offsets 94 | ]) 95 | # why one is added? 96 | 97 | return bounding_boxes.T -------------------------------------------------------------------------------- /align/get_nets.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | import numpy as np 6 | 7 | 8 | class Flatten(nn.Module): 9 | 10 | def __init__(self): 11 | super(Flatten, self).__init__() 12 | 13 | def forward(self, x): 14 | """ 15 | Arguments: 16 | x: a float tensor with shape [batch_size, c, h, w]. 17 | Returns: 18 | a float tensor with shape [batch_size, c*h*w]. 19 | """ 20 | 21 | # without this pretrained model isn't working 22 | x = x.transpose(3, 2).contiguous() 23 | 24 | return x.view(x.size(0), -1) 25 | 26 | 27 | class PNet(nn.Module): 28 | 29 | def __init__(self): 30 | 31 | super(PNet, self).__init__() 32 | 33 | # suppose we have input with size HxW, then 34 | # after first layer: H - 2, 35 | # after pool: ceil((H - 2)/2), 36 | # after second conv: ceil((H - 2)/2) - 2, 37 | # after last conv: ceil((H - 2)/2) - 4, 38 | # and the same for W 39 | 40 | self.features = nn.Sequential(OrderedDict([ 41 | ('conv1', nn.Conv2d(3, 10, 3, 1)), 42 | ('prelu1', nn.PReLU(10)), 43 | ('pool1', nn.MaxPool2d(2, 2, ceil_mode = True)), 44 | 45 | ('conv2', nn.Conv2d(10, 16, 3, 1)), 46 | ('prelu2', nn.PReLU(16)), 47 | 48 | ('conv3', nn.Conv2d(16, 32, 3, 1)), 49 | ('prelu3', nn.PReLU(32)) 50 | ])) 51 | 52 | self.conv4_1 = nn.Conv2d(32, 2, 1, 1) 53 | self.conv4_2 = nn.Conv2d(32, 4, 1, 1) 54 | 55 | weights = np.load("./align/pnet.npy", allow_pickle=True)[()] 56 | for n, p in self.named_parameters(): 57 | p.data = torch.FloatTensor(weights[n]) 58 | 59 | def forward(self, x): 60 | """ 61 | Arguments: 62 | x: a float tensor with shape [batch_size, 3, h, w]. 63 | Returns: 64 | b: a float tensor with shape [batch_size, 4, h', w']. 65 | a: a float tensor with shape [batch_size, 2, h', w']. 66 | """ 67 | x = self.features(x) 68 | a = self.conv4_1(x) 69 | b = self.conv4_2(x) 70 | a = F.softmax(a, dim=1) 71 | return b, a 72 | 73 | 74 | class RNet(nn.Module): 75 | 76 | def __init__(self): 77 | 78 | super(RNet, self).__init__() 79 | 80 | self.features = nn.Sequential(OrderedDict([ 81 | ('conv1', nn.Conv2d(3, 28, 3, 1)), 82 | ('prelu1', nn.PReLU(28)), 83 | ('pool1', nn.MaxPool2d(3, 2, ceil_mode = True)), 84 | 85 | ('conv2', nn.Conv2d(28, 48, 3, 1)), 86 | ('prelu2', nn.PReLU(48)), 87 | ('pool2', nn.MaxPool2d(3, 2, ceil_mode = True)), 88 | 89 | ('conv3', nn.Conv2d(48, 64, 2, 1)), 90 | ('prelu3', nn.PReLU(64)), 91 | 92 | ('flatten', Flatten()), 93 | ('conv4', nn.Linear(576, 128)), 94 | ('prelu4', nn.PReLU(128)) 95 | ])) 96 | 97 | self.conv5_1 = nn.Linear(128, 2) 98 | self.conv5_2 = nn.Linear(128, 4) 99 | 100 | weights = np.load("./align/rnet.npy", allow_pickle=True)[()] 101 | for n, p in self.named_parameters(): 102 | p.data = torch.FloatTensor(weights[n]) 103 | 104 | def forward(self, x): 105 | """ 106 | Arguments: 107 | x: a float tensor with shape [batch_size, 3, h, w]. 108 | Returns: 109 | b: a float tensor with shape [batch_size, 4]. 110 | a: a float tensor with shape [batch_size, 2]. 111 | """ 112 | x = self.features(x) 113 | a = self.conv5_1(x) 114 | b = self.conv5_2(x) 115 | a = F.softmax(a, dim=1) 116 | return b, a 117 | 118 | 119 | class ONet(nn.Module): 120 | 121 | def __init__(self): 122 | 123 | super(ONet, self).__init__() 124 | 125 | self.features = nn.Sequential(OrderedDict([ 126 | ('conv1', nn.Conv2d(3, 32, 3, 1)), 127 | ('prelu1', nn.PReLU(32)), 128 | ('pool1', nn.MaxPool2d(3, 2, ceil_mode = True)), 129 | 130 | ('conv2', nn.Conv2d(32, 64, 3, 1)), 131 | ('prelu2', nn.PReLU(64)), 132 | ('pool2', nn.MaxPool2d(3, 2, ceil_mode = True)), 133 | 134 | ('conv3', nn.Conv2d(64, 64, 3, 1)), 135 | ('prelu3', nn.PReLU(64)), 136 | ('pool3', nn.MaxPool2d(2, 2, ceil_mode = True)), 137 | 138 | ('conv4', nn.Conv2d(64, 128, 2, 1)), 139 | ('prelu4', nn.PReLU(128)), 140 | 141 | ('flatten', Flatten()), 142 | ('conv5', nn.Linear(1152, 256)), 143 | ('drop5', nn.Dropout(0.25)), 144 | ('prelu5', nn.PReLU(256)), 145 | ])) 146 | 147 | self.conv6_1 = nn.Linear(256, 2) 148 | self.conv6_2 = nn.Linear(256, 4) 149 | self.conv6_3 = nn.Linear(256, 10) 150 | 151 | weights = np.load("./align/onet.npy", allow_pickle=True)[()] 152 | for n, p in self.named_parameters(): 153 | p.data = torch.FloatTensor(weights[n]) 154 | 155 | def forward(self, x): 156 | """ 157 | Arguments: 158 | x: a float tensor with shape [batch_size, 3, h, w]. 159 | Returns: 160 | c: a float tensor with shape [batch_size, 10]. 161 | b: a float tensor with shape [batch_size, 4]. 162 | a: a float tensor with shape [batch_size, 2]. 163 | """ 164 | x = self.features(x) 165 | a = self.conv6_1(x) 166 | b = self.conv6_2(x) 167 | c = self.conv6_3(x) 168 | a = F.softmax(a, dim=1) 169 | return c, b, a -------------------------------------------------------------------------------- /align/matlab_cp2tform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import inv, norm, lstsq 3 | from numpy.linalg import matrix_rank as rank 4 | 5 | 6 | class MatlabCp2tormException(Exception): 7 | def __str__(self): 8 | return "In File {}:{}".format( 9 | __file__, super.__str__(self)) 10 | 11 | def tformfwd(trans, uv): 12 | """ 13 | Function: 14 | ---------- 15 | apply affine transform 'trans' to uv 16 | 17 | Parameters: 18 | ---------- 19 | @trans: 3x3 np.array 20 | transform matrix 21 | @uv: Kx2 np.array 22 | each row is a pair of coordinates (x, y) 23 | 24 | Returns: 25 | ---------- 26 | @xy: Kx2 np.array 27 | each row is a pair of transformed coordinates (x, y) 28 | """ 29 | uv = np.hstack(( 30 | uv, np.ones((uv.shape[0], 1)) 31 | )) 32 | xy = np.dot(uv, trans) 33 | xy = xy[:, 0:-1] 34 | return xy 35 | 36 | 37 | def tforminv(trans, uv): 38 | """ 39 | Function: 40 | ---------- 41 | apply the inverse of affine transform 'trans' to uv 42 | 43 | Parameters: 44 | ---------- 45 | @trans: 3x3 np.array 46 | transform matrix 47 | @uv: Kx2 np.array 48 | each row is a pair of coordinates (x, y) 49 | 50 | Returns: 51 | ---------- 52 | @xy: Kx2 np.array 53 | each row is a pair of inverse-transformed coordinates (x, y) 54 | """ 55 | Tinv = inv(trans) 56 | xy = tformfwd(Tinv, uv) 57 | return xy 58 | 59 | 60 | def findNonreflectiveSimilarity(uv, xy, options=None): 61 | 62 | options = {'K': 2} 63 | 64 | K = options['K'] 65 | M = xy.shape[0] 66 | x = xy[:, 0].reshape((-1, 1)) # use reshape to keep a column vector 67 | y = xy[:, 1].reshape((-1, 1)) # use reshape to keep a column vector 68 | # print('--->x, y:\n', x, y 69 | 70 | tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1)))) 71 | tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1)))) 72 | X = np.vstack((tmp1, tmp2)) 73 | # print('--->X.shape: ', X.shape 74 | # print('X:\n', X 75 | 76 | u = uv[:, 0].reshape((-1, 1)) # use reshape to keep a column vector 77 | v = uv[:, 1].reshape((-1, 1)) # use reshape to keep a column vector 78 | U = np.vstack((u, v)) 79 | # print('--->U.shape: ', U.shape 80 | # print('U:\n', U 81 | 82 | # We know that X * r = U 83 | if rank(X) >= 2 * K: 84 | r, _, _, _ = lstsq(X, U) 85 | r = np.squeeze(r) 86 | else: 87 | raise Exception("cp2tform: two Unique Points Req") 88 | 89 | # print('--->r:\n', r 90 | 91 | sc = r[0] 92 | ss = r[1] 93 | tx = r[2] 94 | ty = r[3] 95 | 96 | Tinv = np.array([ 97 | [sc, -ss, 0], 98 | [ss, sc, 0], 99 | [tx, ty, 1] 100 | ]) 101 | 102 | # print('--->Tinv:\n', Tinv 103 | 104 | T = inv(Tinv) 105 | # print('--->T:\n', T 106 | 107 | T[:, 2] = np.array([0, 0, 1]) 108 | 109 | return T, Tinv 110 | 111 | 112 | def findSimilarity(uv, xy, options=None): 113 | 114 | options = {'K': 2} 115 | 116 | # uv = np.array(uv) 117 | # xy = np.array(xy) 118 | 119 | # Solve for trans1 120 | trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options) 121 | 122 | # Solve for trans2 123 | 124 | # manually reflect the xy data across the Y-axis 125 | xyR = xy 126 | xyR[:, 0] = -1 * xyR[:, 0] 127 | 128 | trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options) 129 | 130 | # manually reflect the tform to undo the reflection done on xyR 131 | TreflectY = np.array([ 132 | [-1, 0, 0], 133 | [0, 1, 0], 134 | [0, 0, 1] 135 | ]) 136 | 137 | trans2 = np.dot(trans2r, TreflectY) 138 | 139 | # Figure out if trans1 or trans2 is better 140 | xy1 = tformfwd(trans1, uv) 141 | norm1 = norm(xy1 - xy) 142 | 143 | xy2 = tformfwd(trans2, uv) 144 | norm2 = norm(xy2 - xy) 145 | 146 | if norm1 <= norm2: 147 | return trans1, trans1_inv 148 | else: 149 | trans2_inv = inv(trans2) 150 | return trans2, trans2_inv 151 | 152 | 153 | def get_similarity_transform(src_pts, dst_pts, reflective = True): 154 | """ 155 | Function: 156 | ---------- 157 | Find Similarity Transform Matrix 'trans': 158 | u = src_pts[:, 0] 159 | v = src_pts[:, 1] 160 | x = dst_pts[:, 0] 161 | y = dst_pts[:, 1] 162 | [x, y, 1] = [u, v, 1] * trans 163 | 164 | Parameters: 165 | ---------- 166 | @src_pts: Kx2 np.array 167 | source points, each row is a pair of coordinates (x, y) 168 | @dst_pts: Kx2 np.array 169 | destination points, each row is a pair of transformed 170 | coordinates (x, y) 171 | @reflective: True or False 172 | if True: 173 | use reflective similarity transform 174 | else: 175 | use non-reflective similarity transform 176 | 177 | Returns: 178 | ---------- 179 | @trans: 3x3 np.array 180 | transform matrix from uv to xy 181 | trans_inv: 3x3 np.array 182 | inverse of trans, transform matrix from xy to uv 183 | """ 184 | 185 | if reflective: 186 | trans, trans_inv = findSimilarity(src_pts, dst_pts) 187 | else: 188 | trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts) 189 | 190 | return trans, trans_inv 191 | 192 | 193 | def cvt_tform_mat_for_cv2(trans): 194 | """ 195 | Function: 196 | ---------- 197 | Convert Transform Matrix 'trans' into 'cv2_trans' which could be 198 | directly used by cv2.warpAffine(): 199 | u = src_pts[:, 0] 200 | v = src_pts[:, 1] 201 | x = dst_pts[:, 0] 202 | y = dst_pts[:, 1] 203 | [x, y].T = cv_trans * [u, v, 1].T 204 | 205 | Parameters: 206 | ---------- 207 | @trans: 3x3 np.array 208 | transform matrix from uv to xy 209 | 210 | Returns: 211 | ---------- 212 | @cv2_trans: 2x3 np.array 213 | transform matrix from src_pts to dst_pts, could be directly used 214 | for cv2.warpAffine() 215 | """ 216 | cv2_trans = trans[:, 0:2].T 217 | 218 | return cv2_trans 219 | 220 | 221 | def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective = True): 222 | """ 223 | Function: 224 | ---------- 225 | Find Similarity Transform Matrix 'cv2_trans' which could be 226 | directly used by cv2.warpAffine(): 227 | u = src_pts[:, 0] 228 | v = src_pts[:, 1] 229 | x = dst_pts[:, 0] 230 | y = dst_pts[:, 1] 231 | [x, y].T = cv_trans * [u, v, 1].T 232 | 233 | Parameters: 234 | ---------- 235 | @src_pts: Kx2 np.array 236 | source points, each row is a pair of coordinates (x, y) 237 | @dst_pts: Kx2 np.array 238 | destination points, each row is a pair of transformed 239 | coordinates (x, y) 240 | reflective: True or False 241 | if True: 242 | use reflective similarity transform 243 | else: 244 | use non-reflective similarity transform 245 | 246 | Returns: 247 | ---------- 248 | @cv2_trans: 2x3 np.array 249 | transform matrix from src_pts to dst_pts, could be directly used 250 | for cv2.warpAffine() 251 | """ 252 | trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective) 253 | cv2_trans = cvt_tform_mat_for_cv2(trans) 254 | 255 | return cv2_trans 256 | 257 | 258 | if __name__ == '__main__': 259 | """ 260 | u = [0, 6, -2] 261 | v = [0, 3, 5] 262 | x = [-1, 0, 4] 263 | y = [-1, -10, 4] 264 | 265 | # In Matlab, run: 266 | # 267 | # uv = [u'; v']; 268 | # xy = [x'; y']; 269 | # tform_sim=cp2tform(uv,xy,'similarity'); 270 | # 271 | # trans = tform_sim.tdata.T 272 | # ans = 273 | # -0.0764 -1.6190 0 274 | # 1.6190 -0.0764 0 275 | # -3.2156 0.0290 1.0000 276 | # trans_inv = tform_sim.tdata.Tinv 277 | # ans = 278 | # 279 | # -0.0291 0.6163 0 280 | # -0.6163 -0.0291 0 281 | # -0.0756 1.9826 1.0000 282 | # xy_m=tformfwd(tform_sim, u,v) 283 | # 284 | # xy_m = 285 | # 286 | # -3.2156 0.0290 287 | # 1.1833 -9.9143 288 | # 5.0323 2.8853 289 | # uv_m=tforminv(tform_sim, x,y) 290 | # 291 | # uv_m = 292 | # 293 | # 0.5698 1.3953 294 | # 6.0872 2.2733 295 | # -2.6570 4.3314 296 | """ 297 | u = [0, 6, -2] 298 | v = [0, 3, 5] 299 | x = [-1, 0, 4] 300 | y = [-1, -10, 4] 301 | 302 | uv = np.array((u, v)).T 303 | xy = np.array((x, y)).T 304 | 305 | print("\n--->uv:") 306 | print(uv) 307 | print("\n--->xy:") 308 | print(xy) 309 | 310 | trans, trans_inv = get_similarity_transform(uv, xy) 311 | 312 | print("\n--->trans matrix:") 313 | print(trans) 314 | 315 | print("\n--->trans_inv matrix:") 316 | print(trans_inv) 317 | 318 | print("\n---> apply transform to uv") 319 | print("\nxy_m = uv_augmented * trans") 320 | uv_aug = np.hstack(( 321 | uv, np.ones((uv.shape[0], 1)) 322 | )) 323 | xy_m = np.dot(uv_aug, trans) 324 | print(xy_m) 325 | 326 | print("\nxy_m = tformfwd(trans, uv)") 327 | xy_m = tformfwd(trans, uv) 328 | print(xy_m) 329 | 330 | print("\n---> apply inverse transform to xy") 331 | print("\nuv_m = xy_augmented * trans_inv") 332 | xy_aug = np.hstack(( 333 | xy, np.ones((xy.shape[0], 1)) 334 | )) 335 | uv_m = np.dot(xy_aug, trans_inv) 336 | print(uv_m) 337 | 338 | print("\nuv_m = tformfwd(trans_inv, xy)") 339 | uv_m = tformfwd(trans_inv, xy) 340 | print(uv_m) 341 | 342 | uv_m = tforminv(trans, xy) 343 | print("\nuv_m = tforminv(trans, xy)") 344 | print(uv_m) -------------------------------------------------------------------------------- /align/onet.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/align/onet.npy -------------------------------------------------------------------------------- /align/pnet.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/align/pnet.npy -------------------------------------------------------------------------------- /align/rnet.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/align/rnet.npy -------------------------------------------------------------------------------- /align/visualization_utils.py: -------------------------------------------------------------------------------- 1 | from PIL import ImageDraw 2 | 3 | 4 | def show_results(img, bounding_boxes, facial_landmarks = []): 5 | """Draw bounding boxes and facial landmarks. 6 | Arguments: 7 | img: an instance of PIL.Image. 8 | bounding_boxes: a float numpy array of shape [n, 5]. 9 | facial_landmarks: a float numpy array of shape [n, 10]. 10 | Returns: 11 | an instance of PIL.Image. 12 | """ 13 | img_copy = img.copy() 14 | draw = ImageDraw.Draw(img_copy) 15 | 16 | for b in bounding_boxes: 17 | draw.rectangle([ 18 | (b[0], b[1]), (b[2], b[3]) 19 | ], outline='blue') 20 | 21 | inx = 0 22 | for p in facial_landmarks: 23 | for i in range(5): 24 | draw.ellipse([ 25 | (p[i] - 1.0, p[i + 5] - 1.0), 26 | (p[i] + 1.0, p[i + 5] + 1.0) 27 | ], fill='red') 28 | 29 | img_copy.show() 30 | 31 | 32 | -------------------------------------------------------------------------------- /backbone/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /backbone/model_irse.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, Dropout, MaxPool2d, \ 4 | AdaptiveAvgPool2d, Sequential, Module 5 | from collections import namedtuple 6 | 7 | 8 | # Support: ['IR_50', 'IR_101', 'IR_152', 'IR_SE_50', 'IR_SE_101', 'IR_SE_152'] 9 | 10 | 11 | class Flatten(Module): 12 | def forward(self, input): 13 | return input.view(input.size(0), -1) 14 | 15 | 16 | def l2_norm(input, axis=1): 17 | norm = torch.norm(input, 2, axis, True) 18 | output = torch.div(input, norm) 19 | 20 | return output 21 | 22 | 23 | class SEModule(Module): 24 | def __init__(self, channels, reduction): 25 | super(SEModule, self).__init__() 26 | self.avg_pool = AdaptiveAvgPool2d(1) 27 | self.fc1 = Conv2d( 28 | channels, channels // reduction, kernel_size=1, padding=0, bias=False) 29 | 30 | nn.init.xavier_uniform_(self.fc1.weight.data) 31 | 32 | self.relu = ReLU(inplace=True) 33 | self.fc2 = Conv2d( 34 | channels // reduction, channels, kernel_size=1, padding=0, bias=False) 35 | 36 | self.sigmoid = Sigmoid() 37 | 38 | def forward(self, x): 39 | module_input = x 40 | x = self.avg_pool(x) 41 | x = self.fc1(x) 42 | x = self.relu(x) 43 | x = self.fc2(x) 44 | x = self.sigmoid(x) 45 | 46 | return module_input * x 47 | 48 | 49 | class bottleneck_IR(Module): 50 | def __init__(self, in_channel, depth, stride): 51 | super(bottleneck_IR, self).__init__() 52 | if in_channel == depth: 53 | self.shortcut_layer = MaxPool2d(1, stride) 54 | else: 55 | self.shortcut_layer = Sequential( 56 | Conv2d(in_channel, depth, (1, 1), stride, bias=False), BatchNorm2d(depth)) 57 | self.res_layer = Sequential( 58 | BatchNorm2d(in_channel), 59 | Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth), 60 | Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth)) 61 | 62 | def forward(self, x): 63 | shortcut = self.shortcut_layer(x) 64 | res = self.res_layer(x) 65 | 66 | return res + shortcut 67 | 68 | 69 | class bottleneck_IR_SE(Module): 70 | def __init__(self, in_channel, depth, stride): 71 | super(bottleneck_IR_SE, self).__init__() 72 | if in_channel == depth: 73 | self.shortcut_layer = MaxPool2d(1, stride) 74 | else: 75 | self.shortcut_layer = Sequential( 76 | Conv2d(in_channel, depth, (1, 1), stride, bias=False), 77 | BatchNorm2d(depth)) 78 | self.res_layer = Sequential( 79 | BatchNorm2d(in_channel), 80 | Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), 81 | PReLU(depth), 82 | Conv2d(depth, depth, (3, 3), stride, 1, bias=False), 83 | BatchNorm2d(depth), 84 | SEModule(depth, 16) 85 | ) 86 | 87 | def forward(self, x): 88 | shortcut = self.shortcut_layer(x) 89 | res = self.res_layer(x) 90 | 91 | return res + shortcut 92 | 93 | 94 | class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])): 95 | '''A named tuple describing a ResNet block.''' 96 | 97 | 98 | def get_block(in_channel, depth, num_units, stride=2): 99 | 100 | return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)] 101 | 102 | 103 | def get_blocks(num_layers): 104 | if num_layers == 50: 105 | blocks = [ 106 | get_block(in_channel=64, depth=64, num_units=3), 107 | get_block(in_channel=64, depth=128, num_units=4), 108 | get_block(in_channel=128, depth=256, num_units=14), 109 | get_block(in_channel=256, depth=512, num_units=3) 110 | ] 111 | elif num_layers == 100: 112 | blocks = [ 113 | get_block(in_channel=64, depth=64, num_units=3), 114 | get_block(in_channel=64, depth=128, num_units=13), 115 | get_block(in_channel=128, depth=256, num_units=30), 116 | get_block(in_channel=256, depth=512, num_units=3) 117 | ] 118 | elif num_layers == 152: 119 | blocks = [ 120 | get_block(in_channel=64, depth=64, num_units=3), 121 | get_block(in_channel=64, depth=128, num_units=8), 122 | get_block(in_channel=128, depth=256, num_units=36), 123 | get_block(in_channel=256, depth=512, num_units=3) 124 | ] 125 | 126 | return blocks 127 | 128 | 129 | class Backbone(Module): 130 | def __init__(self, input_size, num_layers, mode='ir'): 131 | super(Backbone, self).__init__() 132 | assert input_size[0] in [112, 224], "input_size should be [112, 112] or [224, 224]" 133 | assert num_layers in [50, 100, 152], "num_layers should be 50, 100 or 152" 134 | assert mode in ['ir', 'ir_se'], "mode should be ir or ir_se" 135 | blocks = get_blocks(num_layers) 136 | if mode == 'ir': 137 | unit_module = bottleneck_IR 138 | elif mode == 'ir_se': 139 | unit_module = bottleneck_IR_SE 140 | self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False), 141 | BatchNorm2d(64), 142 | PReLU(64)) 143 | if input_size[0] == 112: 144 | self.output_layer = Sequential(BatchNorm2d(512), 145 | Dropout(), 146 | Flatten(), 147 | Linear(512 * 7 * 7, 512), 148 | BatchNorm1d(512)) 149 | else: 150 | self.output_layer = Sequential(BatchNorm2d(512), 151 | Dropout(), 152 | Flatten(), 153 | Linear(512 * 14 * 14, 512), 154 | BatchNorm1d(512)) 155 | 156 | modules = [] 157 | for block in blocks: 158 | for bottleneck in block: 159 | modules.append( 160 | unit_module(bottleneck.in_channel, 161 | bottleneck.depth, 162 | bottleneck.stride)) 163 | self.body = Sequential(*modules) 164 | 165 | self._initialize_weights() 166 | 167 | def forward(self, x): 168 | x = self.input_layer(x) 169 | x = self.body(x) 170 | x = self.output_layer(x) 171 | 172 | return x 173 | 174 | def _initialize_weights(self): 175 | for m in self.modules(): 176 | if isinstance(m, nn.Conv2d): 177 | nn.init.xavier_uniform_(m.weight.data) 178 | if m.bias is not None: 179 | m.bias.data.zero_() 180 | elif isinstance(m, nn.BatchNorm2d): 181 | m.weight.data.fill_(1) 182 | m.bias.data.zero_() 183 | elif isinstance(m, nn.BatchNorm1d): 184 | m.weight.data.fill_(1) 185 | m.bias.data.zero_() 186 | elif isinstance(m, nn.Linear): 187 | nn.init.xavier_uniform_(m.weight.data) 188 | if m.bias is not None: 189 | m.bias.data.zero_() 190 | 191 | 192 | def IR_50(input_size): 193 | """Constructs a ir-50 model. 194 | """ 195 | model = Backbone(input_size, 50, 'ir') 196 | 197 | return model 198 | 199 | 200 | def IR_101(input_size): 201 | """Constructs a ir-101 model. 202 | """ 203 | model = Backbone(input_size, 100, 'ir') 204 | 205 | return model 206 | 207 | 208 | def IR_152(input_size): 209 | """Constructs a ir-152 model. 210 | """ 211 | model = Backbone(input_size, 152, 'ir') 212 | 213 | return model 214 | 215 | 216 | def IR_SE_50(input_size): 217 | """Constructs a ir_se-50 model. 218 | """ 219 | model = Backbone(input_size, 50, 'ir_se') 220 | 221 | return model 222 | 223 | 224 | def IR_SE_101(input_size): 225 | """Constructs a ir_se-101 model. 226 | """ 227 | model = Backbone(input_size, 100, 'ir_se') 228 | 229 | return model 230 | 231 | 232 | def IR_SE_152(input_size): 233 | """Constructs a ir_se-152 model. 234 | """ 235 | model = Backbone(input_size, 152, 'ir_se') 236 | 237 | return model 238 | -------------------------------------------------------------------------------- /backbone/model_resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, ReLU, Dropout, MaxPool2d, Sequential, Module 3 | 4 | 5 | # Support: ['ResNet_50', 'ResNet_101', 'ResNet_152'] 6 | 7 | 8 | def conv3x3(in_planes, out_planes, stride = 1): 9 | """3x3 convolution with padding""" 10 | 11 | return Conv2d(in_planes, out_planes, kernel_size = 3, stride = stride, 12 | padding = 1, bias = False) 13 | 14 | 15 | def conv1x1(in_planes, out_planes, stride = 1): 16 | """1x1 convolution""" 17 | 18 | return Conv2d(in_planes, out_planes, kernel_size = 1, stride = stride, bias = False) 19 | 20 | 21 | class BasicBlock(Module): 22 | expansion = 1 23 | 24 | def __init__(self, inplanes, planes, stride = 1, downsample = None): 25 | super(BasicBlock, self).__init__() 26 | self.conv1 = conv3x3(inplanes, planes, stride) 27 | self.bn1 = BatchNorm2d(planes) 28 | self.relu = ReLU(inplace = True) 29 | self.conv2 = conv3x3(planes, planes) 30 | self.bn2 = BatchNorm2d(planes) 31 | self.downsample = downsample 32 | self.stride = stride 33 | 34 | def forward(self, x): 35 | identity = x 36 | 37 | out = self.conv1(x) 38 | out = self.bn1(out) 39 | out = self.relu(out) 40 | 41 | out = self.conv2(out) 42 | out = self.bn2(out) 43 | 44 | if self.downsample is not None: 45 | identity = self.downsample(x) 46 | 47 | out += identity 48 | out = self.relu(out) 49 | 50 | return out 51 | 52 | 53 | class Bottleneck(Module): 54 | expansion = 4 55 | 56 | def __init__(self, inplanes, planes, stride = 1, downsample = None): 57 | super(Bottleneck, self).__init__() 58 | self.conv1 = conv1x1(inplanes, planes) 59 | self.bn1 = BatchNorm2d(planes) 60 | self.conv2 = conv3x3(planes, planes, stride) 61 | self.bn2 = BatchNorm2d(planes) 62 | self.conv3 = conv1x1(planes, planes * self.expansion) 63 | self.bn3 = BatchNorm2d(planes * self.expansion) 64 | self.relu = ReLU(inplace = True) 65 | self.downsample = downsample 66 | self.stride = stride 67 | 68 | def forward(self, x): 69 | identity = x 70 | 71 | out = self.conv1(x) 72 | out = self.bn1(out) 73 | out = self.relu(out) 74 | 75 | out = self.conv2(out) 76 | out = self.bn2(out) 77 | out = self.relu(out) 78 | 79 | out = self.conv3(out) 80 | out = self.bn3(out) 81 | 82 | if self.downsample is not None: 83 | identity = self.downsample(x) 84 | 85 | out += identity 86 | out = self.relu(out) 87 | 88 | return out 89 | 90 | 91 | class ResNet(Module): 92 | 93 | def __init__(self, input_size, block, layers, zero_init_residual = True): 94 | super(ResNet, self).__init__() 95 | assert input_size[0] in [112, 224], "input_size should be [112, 112] or [224, 224]" 96 | self.inplanes = 64 97 | self.conv1 = Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3, bias = False) 98 | self.bn1 = BatchNorm2d(64) 99 | self.relu = ReLU(inplace = True) 100 | self.maxpool = MaxPool2d(kernel_size = 3, stride = 2, padding = 1) 101 | self.layer1 = self._make_layer(block, 64, layers[0]) 102 | self.layer2 = self._make_layer(block, 128, layers[1], stride = 2) 103 | self.layer3 = self._make_layer(block, 256, layers[2], stride = 2) 104 | self.layer4 = self._make_layer(block, 512, layers[3], stride = 2) 105 | 106 | self.bn_o1 = BatchNorm2d(2048) 107 | self.dropout = Dropout() 108 | if input_size[0] == 112: 109 | self.fc = Linear(2048 * 4 * 4, 512) 110 | else: 111 | self.fc = Linear(2048 * 8 * 8, 512) 112 | self.bn_o2 = BatchNorm1d(512) 113 | 114 | for m in self.modules(): 115 | if isinstance(m, Conv2d): 116 | nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearity = 'relu') 117 | elif isinstance(m, BatchNorm2d): 118 | nn.init.constant_(m.weight, 1) 119 | nn.init.constant_(m.bias, 0) 120 | 121 | # Zero-initialize the last BN in each residual branch, 122 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 123 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 124 | if zero_init_residual: 125 | for m in self.modules(): 126 | if isinstance(m, Bottleneck): 127 | nn.init.constant_(m.bn3.weight, 0) 128 | elif isinstance(m, BasicBlock): 129 | nn.init.constant_(m.bn2.weight, 0) 130 | 131 | def _make_layer(self, block, planes, blocks, stride = 1): 132 | downsample = None 133 | if stride != 1 or self.inplanes != planes * block.expansion: 134 | downsample = Sequential( 135 | conv1x1(self.inplanes, planes * block.expansion, stride), 136 | BatchNorm2d(planes * block.expansion), 137 | ) 138 | 139 | layers = [] 140 | layers.append(block(self.inplanes, planes, stride, downsample)) 141 | self.inplanes = planes * block.expansion 142 | for _ in range(1, blocks): 143 | layers.append(block(self.inplanes, planes)) 144 | 145 | return Sequential(*layers) 146 | 147 | def forward(self, x): 148 | x = self.conv1(x) 149 | x = self.bn1(x) 150 | x = self.relu(x) 151 | x = self.maxpool(x) 152 | 153 | x = self.layer1(x) 154 | x = self.layer2(x) 155 | x = self.layer3(x) 156 | x = self.layer4(x) 157 | 158 | x = self.bn_o1(x) 159 | x = self.dropout(x) 160 | x = x.view(x.size(0), -1) 161 | x = self.fc(x) 162 | x = self.bn_o2(x) 163 | 164 | return x 165 | 166 | 167 | def ResNet_50(input_size, **kwargs): 168 | """Constructs a ResNet-50 model. 169 | """ 170 | model = ResNet(input_size, Bottleneck, [3, 4, 6, 3], **kwargs) 171 | 172 | return model 173 | 174 | 175 | def ResNet_101(input_size, **kwargs): 176 | """Constructs a ResNet-101 model. 177 | """ 178 | model = ResNet(input_size, Bottleneck, [3, 4, 23, 3], **kwargs) 179 | 180 | return model 181 | 182 | 183 | def ResNet_152(input_size, **kwargs): 184 | """Constructs a ResNet-152 model. 185 | """ 186 | model = ResNet(input_size, Bottleneck, [3, 8, 36, 3], **kwargs) 187 | 188 | return model 189 | -------------------------------------------------------------------------------- /imgs/9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/9.jpg -------------------------------------------------------------------------------- /imgs/align.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/align.jpg -------------------------------------------------------------------------------- /imgs/detect_landmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/detect_landmark.png -------------------------------------------------------------------------------- /imgs/parsing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/parsing.jpg -------------------------------------------------------------------------------- /imgs/parsing_maps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/parsing_maps.png -------------------------------------------------------------------------------- /imgs/person_1/17.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_1/17.jpg -------------------------------------------------------------------------------- /imgs/person_1/18.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_1/18.jpg -------------------------------------------------------------------------------- /imgs/person_1/19.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_1/19.jpg -------------------------------------------------------------------------------- /imgs/person_1/20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_1/20.jpg -------------------------------------------------------------------------------- /imgs/person_2/151.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_2/151.jpg -------------------------------------------------------------------------------- /imgs/person_2/152.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_2/152.jpg -------------------------------------------------------------------------------- /imgs/person_2/153.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_2/153.jpg -------------------------------------------------------------------------------- /imgs/person_2/154.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/person_2/154.jpg -------------------------------------------------------------------------------- /imgs/single.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/imgs/single.jpg -------------------------------------------------------------------------------- /parsing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zllrunning/facetools/d3d79c14c1baa1e2371b31a1908abdbc7b2a2008/parsing/__init__.py -------------------------------------------------------------------------------- /parsing/face_parsing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | import os 5 | import cv2 6 | import torch 7 | import os.path as osp 8 | import numpy as np 9 | from PIL import Image 10 | import torchvision.transforms as transforms 11 | from .model import BiSeNet 12 | 13 | 14 | def vis_parsing_maps(im, parsing_anno, stride=1, show=False, save_im=False, save_path='imgs/'): 15 | 16 | part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], 17 | [255, 0, 85], [255, 0, 170], 18 | [0, 255, 0], [85, 255, 0], [170, 255, 0], 19 | [0, 255, 85], [0, 255, 170], 20 | [0, 0, 255], [85, 0, 255], [170, 0, 255], 21 | [0, 85, 255], [0, 170, 255], 22 | [255, 255, 0], [255, 255, 85], [255, 255, 170], 23 | [255, 0, 255], [255, 85, 255], [255, 170, 255], 24 | [0, 255, 255], [85, 255, 255], [170, 255, 255]] 25 | 26 | im = np.array(im) 27 | vis_im = im.copy().astype(np.uint8) 28 | vis_parsing_anno = parsing_anno.copy().astype(np.uint8) 29 | vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST) 30 | vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255 31 | 32 | num_of_class = np.max(vis_parsing_anno) 33 | 34 | for pi in range(1, num_of_class + 1): 35 | index = np.where(vis_parsing_anno == pi) 36 | vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi] 37 | 38 | vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8) 39 | # print(vis_parsing_anno_color.shape, vis_im.shape) 40 | vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0) 41 | 42 | if show: 43 | cv2.imshow('parsing res', vis_im) 44 | cv2.waitKey(0) 45 | cv2.destroyAllWindows() 46 | 47 | # Save result or not 48 | if save_im: 49 | if not os.path.exists(save_path): 50 | os.makedirs(save_path) 51 | cv2.imwrite(osp.join(save_path, 'parsing_maps.png'), vis_parsing_anno) 52 | cv2.imwrite(osp.join(save_path, 'parsing.jpg'), vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) 53 | 54 | # return vis_im 55 | 56 | 57 | def parsing(imgs, cp='checkpoint/face_parsing.pth'): 58 | 59 | n_classes = 19 60 | net = BiSeNet(n_classes=n_classes) 61 | net.cuda() 62 | net.load_state_dict(torch.load(cp)) 63 | net.eval() 64 | 65 | to_tensor = transforms.Compose([ 66 | transforms.ToTensor(), 67 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), 68 | ]) 69 | 70 | with torch.no_grad(): 71 | if not isinstance(imgs, list): 72 | shape = imgs.size 73 | image = imgs.resize((512, 512), Image.BILINEAR) 74 | img = to_tensor(image) 75 | img = torch.unsqueeze(img, 0) 76 | img = img.cuda() 77 | out = net(img)[0] 78 | parsing_maps = out.squeeze(0).cpu().numpy().argmax(0).astype('float32') 79 | parsing_maps = cv2.resize(parsing_maps, shape, interpolation=cv2.INTER_NEAREST) 80 | return parsing_maps 81 | 82 | else: 83 | parsing_list = [] 84 | for img in imgs: 85 | shape = img.size 86 | image = img.resize((512, 512), Image.BILINEAR) 87 | img = to_tensor(image) 88 | img = torch.unsqueeze(img, 0) 89 | img = img.cuda() 90 | out = net(img)[0] 91 | parsing_maps = out.squeeze(0).cpu().numpy().argmax(0).astype('float32') 92 | parsing_maps = cv2.resize(parsing_maps, shape, interpolation=cv2.INTER_NEAREST) 93 | parsing_list.append(parsing_maps) 94 | return parsing_list 95 | 96 | 97 | -------------------------------------------------------------------------------- /parsing/model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | from .resnet import Resnet18 9 | 10 | 11 | 12 | class ConvBNReLU(nn.Module): 13 | def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs): 14 | super(ConvBNReLU, self).__init__() 15 | self.conv = nn.Conv2d(in_chan, 16 | out_chan, 17 | kernel_size = ks, 18 | stride = stride, 19 | padding = padding, 20 | bias = False) 21 | self.bn = nn.BatchNorm2d(out_chan) 22 | self.init_weight() 23 | 24 | def forward(self, x): 25 | x = self.conv(x) 26 | x = F.relu(self.bn(x)) 27 | return x 28 | 29 | def init_weight(self): 30 | for ly in self.children(): 31 | if isinstance(ly, nn.Conv2d): 32 | nn.init.kaiming_normal_(ly.weight, a=1) 33 | if not ly.bias is None: nn.init.constant_(ly.bias, 0) 34 | 35 | class BiSeNetOutput(nn.Module): 36 | def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs): 37 | super(BiSeNetOutput, self).__init__() 38 | self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1) 39 | self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False) 40 | self.init_weight() 41 | 42 | def forward(self, x): 43 | x = self.conv(x) 44 | x = self.conv_out(x) 45 | return x 46 | 47 | def init_weight(self): 48 | for ly in self.children(): 49 | if isinstance(ly, nn.Conv2d): 50 | nn.init.kaiming_normal_(ly.weight, a=1) 51 | if not ly.bias is None: nn.init.constant_(ly.bias, 0) 52 | 53 | def get_params(self): 54 | wd_params, nowd_params = [], [] 55 | for name, module in self.named_modules(): 56 | if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): 57 | wd_params.append(module.weight) 58 | if not module.bias is None: 59 | nowd_params.append(module.bias) 60 | elif isinstance(module, nn.BatchNorm2d): 61 | nowd_params += list(module.parameters()) 62 | return wd_params, nowd_params 63 | 64 | 65 | class AttentionRefinementModule(nn.Module): 66 | def __init__(self, in_chan, out_chan, *args, **kwargs): 67 | super(AttentionRefinementModule, self).__init__() 68 | self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1) 69 | self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False) 70 | self.bn_atten = nn.BatchNorm2d(out_chan) 71 | self.sigmoid_atten = nn.Sigmoid() 72 | self.init_weight() 73 | 74 | def forward(self, x): 75 | feat = self.conv(x) 76 | atten = F.avg_pool2d(feat, feat.size()[2:]) 77 | atten = self.conv_atten(atten) 78 | atten = self.bn_atten(atten) 79 | atten = self.sigmoid_atten(atten) 80 | out = torch.mul(feat, atten) 81 | return out 82 | 83 | def init_weight(self): 84 | for ly in self.children(): 85 | if isinstance(ly, nn.Conv2d): 86 | nn.init.kaiming_normal_(ly.weight, a=1) 87 | if not ly.bias is None: nn.init.constant_(ly.bias, 0) 88 | 89 | 90 | class ContextPath(nn.Module): 91 | def __init__(self, *args, **kwargs): 92 | super(ContextPath, self).__init__() 93 | self.resnet = Resnet18() 94 | self.arm16 = AttentionRefinementModule(256, 128) 95 | self.arm32 = AttentionRefinementModule(512, 128) 96 | self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1) 97 | self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1) 98 | self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0) 99 | 100 | self.init_weight() 101 | 102 | def forward(self, x): 103 | H0, W0 = x.size()[2:] 104 | feat8, feat16, feat32 = self.resnet(x) 105 | H8, W8 = feat8.size()[2:] 106 | H16, W16 = feat16.size()[2:] 107 | H32, W32 = feat32.size()[2:] 108 | 109 | avg = F.avg_pool2d(feat32, feat32.size()[2:]) 110 | avg = self.conv_avg(avg) 111 | avg_up = F.interpolate(avg, (H32, W32), mode='nearest') 112 | 113 | feat32_arm = self.arm32(feat32) 114 | feat32_sum = feat32_arm + avg_up 115 | feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest') 116 | feat32_up = self.conv_head32(feat32_up) 117 | 118 | feat16_arm = self.arm16(feat16) 119 | feat16_sum = feat16_arm + feat32_up 120 | feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest') 121 | feat16_up = self.conv_head16(feat16_up) 122 | 123 | return feat8, feat16_up, feat32_up # x8, x8, x16 124 | 125 | def init_weight(self): 126 | for ly in self.children(): 127 | if isinstance(ly, nn.Conv2d): 128 | nn.init.kaiming_normal_(ly.weight, a=1) 129 | if not ly.bias is None: nn.init.constant_(ly.bias, 0) 130 | 131 | def get_params(self): 132 | wd_params, nowd_params = [], [] 133 | for name, module in self.named_modules(): 134 | if isinstance(module, (nn.Linear, nn.Conv2d)): 135 | wd_params.append(module.weight) 136 | if not module.bias is None: 137 | nowd_params.append(module.bias) 138 | elif isinstance(module, nn.BatchNorm2d): 139 | nowd_params += list(module.parameters()) 140 | return wd_params, nowd_params 141 | 142 | 143 | ### This is not used, since I replace this with the resnet feature with the same size 144 | class SpatialPath(nn.Module): 145 | def __init__(self, *args, **kwargs): 146 | super(SpatialPath, self).__init__() 147 | self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3) 148 | self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1) 149 | self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1) 150 | self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0) 151 | self.init_weight() 152 | 153 | def forward(self, x): 154 | feat = self.conv1(x) 155 | feat = self.conv2(feat) 156 | feat = self.conv3(feat) 157 | feat = self.conv_out(feat) 158 | return feat 159 | 160 | def init_weight(self): 161 | for ly in self.children(): 162 | if isinstance(ly, nn.Conv2d): 163 | nn.init.kaiming_normal_(ly.weight, a=1) 164 | if not ly.bias is None: nn.init.constant_(ly.bias, 0) 165 | 166 | def get_params(self): 167 | wd_params, nowd_params = [], [] 168 | for name, module in self.named_modules(): 169 | if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): 170 | wd_params.append(module.weight) 171 | if not module.bias is None: 172 | nowd_params.append(module.bias) 173 | elif isinstance(module, nn.BatchNorm2d): 174 | nowd_params += list(module.parameters()) 175 | return wd_params, nowd_params 176 | 177 | 178 | class FeatureFusionModule(nn.Module): 179 | def __init__(self, in_chan, out_chan, *args, **kwargs): 180 | super(FeatureFusionModule, self).__init__() 181 | self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0) 182 | self.conv1 = nn.Conv2d(out_chan, 183 | out_chan//4, 184 | kernel_size = 1, 185 | stride = 1, 186 | padding = 0, 187 | bias = False) 188 | self.conv2 = nn.Conv2d(out_chan//4, 189 | out_chan, 190 | kernel_size = 1, 191 | stride = 1, 192 | padding = 0, 193 | bias = False) 194 | self.relu = nn.ReLU(inplace=True) 195 | self.sigmoid = nn.Sigmoid() 196 | self.init_weight() 197 | 198 | def forward(self, fsp, fcp): 199 | fcat = torch.cat([fsp, fcp], dim=1) 200 | feat = self.convblk(fcat) 201 | atten = F.avg_pool2d(feat, feat.size()[2:]) 202 | atten = self.conv1(atten) 203 | atten = self.relu(atten) 204 | atten = self.conv2(atten) 205 | atten = self.sigmoid(atten) 206 | feat_atten = torch.mul(feat, atten) 207 | feat_out = feat_atten + feat 208 | return feat_out 209 | 210 | def init_weight(self): 211 | for ly in self.children(): 212 | if isinstance(ly, nn.Conv2d): 213 | nn.init.kaiming_normal_(ly.weight, a=1) 214 | if not ly.bias is None: nn.init.constant_(ly.bias, 0) 215 | 216 | def get_params(self): 217 | wd_params, nowd_params = [], [] 218 | for name, module in self.named_modules(): 219 | if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): 220 | wd_params.append(module.weight) 221 | if not module.bias is None: 222 | nowd_params.append(module.bias) 223 | elif isinstance(module, nn.BatchNorm2d): 224 | nowd_params += list(module.parameters()) 225 | return wd_params, nowd_params 226 | 227 | 228 | class BiSeNet(nn.Module): 229 | def __init__(self, n_classes, *args, **kwargs): 230 | super(BiSeNet, self).__init__() 231 | self.cp = ContextPath() 232 | ## here self.sp is deleted 233 | self.ffm = FeatureFusionModule(256, 256) 234 | self.conv_out = BiSeNetOutput(256, 256, n_classes) 235 | self.conv_out16 = BiSeNetOutput(128, 64, n_classes) 236 | self.conv_out32 = BiSeNetOutput(128, 64, n_classes) 237 | self.init_weight() 238 | 239 | def forward(self, x): 240 | H, W = x.size()[2:] 241 | feat_res8, feat_cp8, feat_cp16 = self.cp(x) # here return res3b1 feature 242 | feat_sp = feat_res8 # use res3b1 feature to replace spatial path feature 243 | feat_fuse = self.ffm(feat_sp, feat_cp8) 244 | 245 | feat_out = self.conv_out(feat_fuse) 246 | feat_out16 = self.conv_out16(feat_cp8) 247 | feat_out32 = self.conv_out32(feat_cp16) 248 | 249 | feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True) 250 | feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True) 251 | feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True) 252 | return feat_out, feat_out16, feat_out32 253 | 254 | def init_weight(self): 255 | for ly in self.children(): 256 | if isinstance(ly, nn.Conv2d): 257 | nn.init.kaiming_normal_(ly.weight, a=1) 258 | if not ly.bias is None: nn.init.constant_(ly.bias, 0) 259 | 260 | def get_params(self): 261 | wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], [] 262 | for name, child in self.named_children(): 263 | child_wd_params, child_nowd_params = child.get_params() 264 | if isinstance(child, FeatureFusionModule) or isinstance(child, BiSeNetOutput): 265 | lr_mul_wd_params += child_wd_params 266 | lr_mul_nowd_params += child_nowd_params 267 | else: 268 | wd_params += child_wd_params 269 | nowd_params += child_nowd_params 270 | return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params 271 | 272 | 273 | if __name__ == "__main__": 274 | net = BiSeNet(19) 275 | net.cuda() 276 | net.eval() 277 | in_ten = torch.randn(16, 3, 640, 480).cuda() 278 | out, out16, out32 = net(in_ten) 279 | print(out.shape) 280 | 281 | net.get_params() 282 | -------------------------------------------------------------------------------- /parsing/resnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.utils.model_zoo as modelzoo 8 | 9 | 10 | resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth' 11 | 12 | 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | """3x3 convolution with padding""" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | 19 | class BasicBlock(nn.Module): 20 | def __init__(self, in_chan, out_chan, stride=1): 21 | super(BasicBlock, self).__init__() 22 | self.conv1 = conv3x3(in_chan, out_chan, stride) 23 | self.bn1 = nn.BatchNorm2d(out_chan) 24 | self.conv2 = conv3x3(out_chan, out_chan) 25 | self.bn2 = nn.BatchNorm2d(out_chan) 26 | self.relu = nn.ReLU(inplace=True) 27 | self.downsample = None 28 | if in_chan != out_chan or stride != 1: 29 | self.downsample = nn.Sequential( 30 | nn.Conv2d(in_chan, out_chan, 31 | kernel_size=1, stride=stride, bias=False), 32 | nn.BatchNorm2d(out_chan), 33 | ) 34 | 35 | def forward(self, x): 36 | residual = self.conv1(x) 37 | residual = F.relu(self.bn1(residual)) 38 | residual = self.conv2(residual) 39 | residual = self.bn2(residual) 40 | 41 | shortcut = x 42 | if self.downsample is not None: 43 | shortcut = self.downsample(x) 44 | 45 | out = shortcut + residual 46 | out = self.relu(out) 47 | return out 48 | 49 | 50 | def create_layer_basic(in_chan, out_chan, bnum, stride=1): 51 | layers = [BasicBlock(in_chan, out_chan, stride=stride)] 52 | for i in range(bnum-1): 53 | layers.append(BasicBlock(out_chan, out_chan, stride=1)) 54 | return nn.Sequential(*layers) 55 | 56 | 57 | class Resnet18(nn.Module): 58 | def __init__(self): 59 | super(Resnet18, self).__init__() 60 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 61 | bias=False) 62 | self.bn1 = nn.BatchNorm2d(64) 63 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 64 | self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1) 65 | self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2) 66 | self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2) 67 | self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2) 68 | self.init_weight() 69 | 70 | def forward(self, x): 71 | x = self.conv1(x) 72 | x = F.relu(self.bn1(x)) 73 | x = self.maxpool(x) 74 | 75 | x = self.layer1(x) 76 | feat8 = self.layer2(x) # 1/8 77 | feat16 = self.layer3(feat8) # 1/16 78 | feat32 = self.layer4(feat16) # 1/32 79 | return feat8, feat16, feat32 80 | 81 | def init_weight(self): 82 | state_dict = modelzoo.load_url(resnet18_url) 83 | self_state_dict = self.state_dict() 84 | for k, v in state_dict.items(): 85 | if 'fc' in k: continue 86 | self_state_dict.update({k: v}) 87 | self.load_state_dict(self_state_dict) 88 | 89 | def get_params(self): 90 | wd_params, nowd_params = [], [] 91 | for name, module in self.named_modules(): 92 | if isinstance(module, (nn.Linear, nn.Conv2d)): 93 | wd_params.append(module.weight) 94 | if not module.bias is None: 95 | nowd_params.append(module.bias) 96 | elif isinstance(module, nn.BatchNorm2d): 97 | nowd_params += list(module.parameters()) 98 | return wd_params, nowd_params 99 | 100 | 101 | if __name__ == "__main__": 102 | net = Resnet18() 103 | x = torch.randn(16, 3, 224, 224) 104 | out = net(x) 105 | print(out[0].size()) 106 | print(out[1].size()) 107 | print(out[2].size()) 108 | net.get_params() 109 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /util/extract_feature.py: -------------------------------------------------------------------------------- 1 | # Helper function for extracting features from pre-trained models 2 | import torch 3 | import torchvision.transforms as transforms 4 | import torchvision.datasets as datasets 5 | from PIL import Image 6 | import numpy as np 7 | import os 8 | from .utils import l2_norm, hflip_batch 9 | 10 | 11 | def extract_feature(image, backbone, model_root, input_size=[112, 112], rgb_mean=[0.5, 0.5, 0.5], 12 | rgb_std=[0.5, 0.5, 0.5], device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"), tta=True): 13 | 14 | # define transform 15 | transform = transforms.Compose([ 16 | transforms.Resize([int(128 * input_size[0] / 112), int(128 * input_size[0] / 112)]), # smaller side resized 17 | transforms.CenterCrop([input_size[0], input_size[1]]), 18 | transforms.ToTensor(), 19 | transforms.Normalize(mean=rgb_mean, std=rgb_std)]) 20 | 21 | if isinstance(image, list): 22 | image = [transform(i).unsqueeze(0) for i in image] 23 | else: 24 | image = transform(image).unsqueeze(0) 25 | 26 | # load backbone from a checkpoint 27 | # print("Loading Backbone Checkpoint '{}'".format(model_root)) 28 | backbone.load_state_dict(torch.load(model_root)) 29 | backbone.to(device) 30 | 31 | # extract features 32 | backbone.eval() # set to evaluation mode 33 | 34 | with torch.no_grad(): 35 | if isinstance(image, list): 36 | embedding = [] 37 | if tta: 38 | for i in image: 39 | fliped = hflip_batch(i) 40 | embedding.append(backbone(i.to(device)).cpu() + backbone(fliped.to(device)).cpu()) 41 | else: 42 | for i in image: 43 | embedding.append(l2_norm(backbone(i.to(device))).cpu()) 44 | else: 45 | if tta: 46 | fliped = hflip_batch(image) 47 | embedding = backbone(image.to(device)).cpu() + backbone(fliped.to(device)).cpu() 48 | else: 49 | embedding = l2_norm(backbone(image.to(device))).cpu() 50 | 51 | # np.save("features.npy", features) 52 | # features = np.load("features.npy") 53 | 54 | return embedding 55 | 56 | 57 | def extract_feature_folder(data_root, backbone, model_root, input_size=[112, 112], rgb_mean=[0.5, 0.5, 0.5], 58 | rgb_std=[0.5, 0.5, 0.5], embedding_size=512, batch_size=512, 59 | device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"), tta=True): 60 | 61 | # define data loader 62 | transform = transforms.Compose([ 63 | transforms.Resize([int(128 * input_size[0] / 112), int(128 * input_size[0] / 112)]), # smaller side resized 64 | transforms.CenterCrop([input_size[0], input_size[1]]), 65 | transforms.ToTensor(), 66 | transforms.Normalize(mean=rgb_mean, std=rgb_std)]) 67 | dataset = datasets.ImageFolder(data_root, transform) 68 | loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=0) 69 | 70 | # load backbone from a checkpoint 71 | print("Loading Backbone Checkpoint '{}'".format(model_root)) 72 | backbone.load_state_dict(torch.load(model_root)) 73 | backbone.to(device) 74 | 75 | # extract features 76 | backbone.eval() # set to evaluation mode 77 | idx = 0 78 | features = np.zeros([len(loader.dataset), embedding_size]) 79 | with torch.no_grad(): 80 | iter_loader = iter(loader) 81 | while idx + batch_size <= len(loader.dataset): 82 | batch, _ = iter_loader.next() 83 | if tta: 84 | fliped = hflip_batch(batch) 85 | emb_batch = backbone(batch.to(device)).cpu() + backbone(fliped.to(device)).cpu() 86 | features[idx:idx + batch_size] = l2_norm(emb_batch) 87 | else: 88 | features[idx:idx + batch_size] = l2_norm(backbone(batch.to(device))).cpu() 89 | idx += batch_size 90 | 91 | if idx < len(loader.dataset): 92 | batch, _ = iter_loader.next() 93 | if tta: 94 | fliped = hflip_batch(batch) 95 | emb_batch = backbone(batch.to(device)).cpu() + backbone(fliped.to(device)).cpu() 96 | features[idx:] = l2_norm(emb_batch) 97 | else: 98 | features[idx:] = l2_norm(backbone(batch.to(device)).cpu()) 99 | 100 | # np.save("features.npy", features) 101 | # features = np.load("features.npy") 102 | 103 | return features 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /util/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as transforms 3 | import torch.nn.functional as F 4 | 5 | from .verification import evaluate 6 | 7 | from datetime import datetime 8 | import matplotlib.pyplot as plt 9 | plt.switch_backend('agg') 10 | import numpy as np 11 | from PIL import Image 12 | import bcolz 13 | import io 14 | import os 15 | 16 | 17 | def get_time(): 18 | return (str(datetime.now())[:-10]).replace(' ', '-').replace(':', '-') 19 | 20 | 21 | def l2_norm(input, axis = 1): 22 | norm = torch.norm(input, 2, axis, True) 23 | output = torch.div(input, norm) 24 | 25 | return output 26 | 27 | 28 | def de_preprocess(tensor): 29 | 30 | return tensor * 0.5 + 0.5 31 | 32 | 33 | hflip = transforms.Compose([ 34 | de_preprocess, 35 | transforms.ToPILImage(), 36 | transforms.functional.hflip, 37 | transforms.ToTensor(), 38 | transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 39 | ]) 40 | 41 | 42 | def hflip_batch(imgs_tensor): 43 | hfliped_imgs = torch.empty_like(imgs_tensor) 44 | for i, img_ten in enumerate(imgs_tensor): 45 | hfliped_imgs[i] = hflip(img_ten) 46 | 47 | return hfliped_imgs 48 | 49 | 50 | ccrop = transforms.Compose([ 51 | de_preprocess, 52 | transforms.ToPILImage(), 53 | transforms.Resize([128, 128]), # smaller side resized 54 | transforms.CenterCrop([112, 112]), 55 | transforms.ToTensor(), 56 | transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 57 | ]) 58 | 59 | 60 | def ccrop_batch(imgs_tensor): 61 | ccropped_imgs = torch.empty_like(imgs_tensor) 62 | for i, img_ten in enumerate(imgs_tensor): 63 | ccropped_imgs[i] = ccrop(img_ten) 64 | 65 | return ccropped_imgs 66 | 67 | 68 | def gen_plot(fpr, tpr): 69 | """Create a pyplot plot and save to buffer.""" 70 | plt.figure() 71 | plt.xlabel("FPR", fontsize = 14) 72 | plt.ylabel("TPR", fontsize = 14) 73 | plt.title("ROC Curve", fontsize = 14) 74 | plot = plt.plot(fpr, tpr, linewidth = 2) 75 | buf = io.BytesIO() 76 | plt.savefig(buf, format = 'jpeg') 77 | buf.seek(0) 78 | plt.close() 79 | 80 | return buf 81 | 82 | 83 | def perform_val(multi_gpu, device, embedding_size, batch_size, backbone, carray, issame, nrof_folds = 10, tta = True): 84 | if multi_gpu: 85 | backbone = backbone.module # unpackage model from DataParallel 86 | backbone = backbone.to(device) 87 | else: 88 | backbone = backbone.to(device) 89 | backbone.eval() # switch to evaluation mode 90 | 91 | idx = 0 92 | embeddings = np.zeros([len(carray), embedding_size]) 93 | with torch.no_grad(): 94 | while idx + batch_size <= len(carray): 95 | batch = torch.tensor(carray[idx:idx + batch_size][:, [2, 1, 0], :, :]) 96 | if tta: 97 | ccropped = ccrop_batch(batch) 98 | fliped = hflip_batch(ccropped) 99 | emb_batch = backbone(ccropped.to(device)).cpu() + backbone(fliped.to(device)).cpu() 100 | embeddings[idx:idx + batch_size] = l2_norm(emb_batch) 101 | else: 102 | ccropped = ccrop_batch(batch) 103 | embeddings[idx:idx + batch_size] = l2_norm(backbone(ccropped.to(device))).cpu() 104 | idx += batch_size 105 | if idx < len(carray): 106 | batch = torch.tensor(carray[idx:]) 107 | if tta: 108 | ccropped = ccrop_batch(batch) 109 | fliped = hflip_batch(ccropped) 110 | emb_batch = backbone(ccropped.to(device)).cpu() + backbone(fliped.to(device)).cpu() 111 | embeddings[idx:] = l2_norm(emb_batch) 112 | else: 113 | ccropped = ccrop_batch(batch) 114 | embeddings[idx:] = l2_norm(backbone(ccropped.to(device))).cpu() 115 | 116 | tpr, fpr, accuracy, best_thresholds = evaluate(embeddings, issame, nrof_folds) 117 | buf = gen_plot(fpr, tpr) 118 | roc_curve = Image.open(buf) 119 | roc_curve_tensor = transforms.ToTensor()(roc_curve) 120 | 121 | return accuracy.mean(), best_thresholds.mean(), roc_curve_tensor 122 | 123 | 124 | class AverageMeter(object): 125 | """Computes and stores the average and current value""" 126 | def __init__(self): 127 | self.reset() 128 | 129 | def reset(self): 130 | self.val = 0 131 | self.avg = 0 132 | self.sum = 0 133 | self.count = 0 134 | 135 | def update(self, val, n = 1): 136 | self.val = val 137 | self.sum += val * n 138 | self.count += n 139 | self.avg = self.sum / self.count 140 | 141 | 142 | def accuracy(output, target, topk=(1,)): 143 | """Computes the precision@k for the specified values of k""" 144 | maxk = max(topk) 145 | batch_size = target.size(0) 146 | 147 | _, pred = output.topk(maxk, 1, True, True) 148 | pred = pred.t() 149 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 150 | 151 | res = [] 152 | for k in topk: 153 | correct_k = correct[:k].view(-1).float().sum(0) 154 | res.append(correct_k.mul_(100.0 / batch_size)) 155 | 156 | return res 157 | -------------------------------------------------------------------------------- /util/verification.py: -------------------------------------------------------------------------------- 1 | """Helper for evaluation on the Labeled Faces in the Wild dataset 2 | """ 3 | 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 David Sandberg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | import numpy as np 27 | from sklearn.model_selection import KFold 28 | from sklearn.decomposition import PCA 29 | import sklearn 30 | from scipy import interpolate 31 | from scipy.spatial.distance import pdist 32 | 33 | 34 | # Support: ['calculate_roc', 'calculate_accuracy', 'calculate_val', 'calculate_val_far', 'evaluate'] 35 | 36 | 37 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds = 10, pca = 0): 38 | assert (embeddings1.shape[0] == embeddings2.shape[0]) 39 | assert (embeddings1.shape[1] == embeddings2.shape[1]) 40 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 41 | nrof_thresholds = len(thresholds) 42 | k_fold = KFold(n_splits = nrof_folds, shuffle = False) 43 | 44 | tprs = np.zeros((nrof_folds, nrof_thresholds)) 45 | fprs = np.zeros((nrof_folds, nrof_thresholds)) 46 | accuracy = np.zeros((nrof_folds)) 47 | best_thresholds = np.zeros((nrof_folds)) 48 | indices = np.arange(nrof_pairs) 49 | # print('pca', pca) 50 | 51 | if pca == 0: 52 | diff = np.subtract(embeddings1, embeddings2) 53 | dist = np.sum(np.square(diff), 1) 54 | # dist = pdist(np.vstack([embeddings1, embeddings2]), 'cosine') 55 | 56 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 57 | # print('train_set', train_set) 58 | # print('test_set', test_set) 59 | if pca > 0: 60 | print("doing pca on", fold_idx) 61 | embed1_train = embeddings1[train_set] 62 | embed2_train = embeddings2[train_set] 63 | _embed_train = np.concatenate((embed1_train, embed2_train), axis = 0) 64 | # print(_embed_train.shape) 65 | pca_model = PCA(n_components = pca) 66 | pca_model.fit(_embed_train) 67 | embed1 = pca_model.transform(embeddings1) 68 | embed2 = pca_model.transform(embeddings2) 69 | embed1 = sklearn.preprocessing.normalize(embed1) 70 | embed2 = sklearn.preprocessing.normalize(embed2) 71 | # print(embed1.shape, embed2.shape) 72 | diff = np.subtract(embed1, embed2) 73 | dist = np.sum(np.square(diff), 1) 74 | 75 | # Find the best threshold for the fold 76 | acc_train = np.zeros((nrof_thresholds)) 77 | for threshold_idx, threshold in enumerate(thresholds): 78 | _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) 79 | best_threshold_index = np.argmax(acc_train) 80 | # print('best_threshold_index', best_threshold_index, acc_train[best_threshold_index]) 81 | best_thresholds[fold_idx] = thresholds[best_threshold_index] 82 | for threshold_idx, threshold in enumerate(thresholds): 83 | tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(threshold, 84 | dist[test_set], 85 | actual_issame[ 86 | test_set]) 87 | _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) 88 | 89 | tpr = np.mean(tprs, 0) 90 | fpr = np.mean(fprs, 0) 91 | return tpr, fpr, accuracy, best_thresholds 92 | 93 | 94 | def calculate_accuracy(threshold, dist, actual_issame): 95 | predict_issame = np.less(dist, threshold) 96 | tp = np.sum(np.logical_and(predict_issame, actual_issame)) 97 | fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 98 | tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) 99 | fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) 100 | 101 | tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn) 102 | fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn) 103 | acc = float(tp + tn) / dist.size 104 | return tpr, fpr, acc 105 | 106 | 107 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds = 10): 108 | ''' 109 | Copy from [insightface](https://github.com/deepinsight/insightface) 110 | :param thresholds: 111 | :param embeddings1: 112 | :param embeddings2: 113 | :param actual_issame: 114 | :param far_target: 115 | :param nrof_folds: 116 | :return: 117 | ''' 118 | assert (embeddings1.shape[0] == embeddings2.shape[0]) 119 | assert (embeddings1.shape[1] == embeddings2.shape[1]) 120 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 121 | nrof_thresholds = len(thresholds) 122 | k_fold = KFold(n_splits = nrof_folds, shuffle = False) 123 | 124 | val = np.zeros(nrof_folds) 125 | far = np.zeros(nrof_folds) 126 | 127 | diff = np.subtract(embeddings1, embeddings2) 128 | dist = np.sum(np.square(diff), 1) 129 | indices = np.arange(nrof_pairs) 130 | 131 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 132 | 133 | # Find the threshold that gives FAR = far_target 134 | far_train = np.zeros(nrof_thresholds) 135 | for threshold_idx, threshold in enumerate(thresholds): 136 | _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) 137 | if np.max(far_train) >= far_target: 138 | f = interpolate.interp1d(far_train, thresholds, kind = 'slinear') 139 | threshold = f(far_target) 140 | else: 141 | threshold = 0.0 142 | 143 | val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) 144 | 145 | val_mean = np.mean(val) 146 | far_mean = np.mean(far) 147 | val_std = np.std(val) 148 | return val_mean, val_std, far_mean 149 | 150 | 151 | def calculate_val_far(threshold, dist, actual_issame): 152 | predict_issame = np.less(dist, threshold) 153 | true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) 154 | false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 155 | n_same = np.sum(actual_issame) 156 | n_diff = np.sum(np.logical_not(actual_issame)) 157 | val = float(true_accept) / float(n_same) 158 | far = float(false_accept) / float(n_diff) 159 | return val, far 160 | 161 | 162 | def evaluate(embeddings, actual_issame, nrof_folds = 10, pca = 0): 163 | # Calculate evaluation metrics 164 | thresholds = np.arange(0, 4, 0.01) 165 | embeddings1 = embeddings[0::2] 166 | embeddings2 = embeddings[1::2] 167 | tpr, fpr, accuracy, best_thresholds = calculate_roc(thresholds, embeddings1, embeddings2, np.asarray(actual_issame), nrof_folds = nrof_folds, pca = pca) 168 | # thresholds = np.arange(0, 4, 0.001) 169 | # val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2, 170 | # np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds) 171 | # return tpr, fpr, accuracy, best_thresholds, val, val_std, far 172 | return tpr, fpr, accuracy, best_thresholds 173 | --------------------------------------------------------------------------------