├── 1_001.jpg ├── 1_002.jpg ├── 9.jpg ├── FaceRecognition.py ├── README.md ├── RetinafaceConvertTest.py ├── data ├── FDDB │ └── img_list.txt ├── __init__.py ├── config.py ├── data_augment.py ├── wider_face.py └── widerface ├── dataset.txt ├── dataset_retinaface.txt ├── encoding.py ├── face.caffemodel ├── face.prototxt ├── face_dataset ├── obama_1.jpg ├── 刘德华1_1.jpeg ├── 刘德华1_2.jpeg ├── 张学友2_1.jpg ├── 张学友3_1.jpg ├── 张学友_1.jpg ├── 郭富城2_2.jpg ├── 郭富城_1.jpg ├── 黎明1_1.jpg └── 黎明2_2.jpg ├── img ├── 4.jpeg ├── 4_2.jpg ├── 4_3.jpg ├── 4_4.jpg ├── 5.jpg ├── 6.jpeg ├── ldh.jpg ├── lm.jpg ├── obama.jpg ├── zhangxueyou.jpg └── zhangxueyou4.jpg ├── layers ├── __init__.py ├── functions │ └── prior_box.py └── modules │ ├── __init__.py │ └── multibox_loss.py ├── main.py ├── mobilefacenetConvert.py ├── model_data ├── face_encoding.npy └── names.npy ├── predict.py ├── resizeImgTest.py ├── retinaface.rknn ├── retinaface_quant.rknn ├── rtspPredict.py ├── rtspdec.py ├── test.py ├── utils ├── __init__.py ├── box_utils.py ├── nms │ ├── __init__.py │ └── py_cpu_nms.py └── timer.py └── weights ├── ArcFace.onnx ├── facenet_mobilenet_all.h5 ├── mobilefacenet.caffemodel ├── mobilefacenet.onnx ├── mobilefacenet.prototxt ├── mobilefacenet2.caffemodel ├── mobilefacenet2.onnx ├── mobilefacenet2.prototxt └── retinaface.onnx /1_001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/1_001.jpg -------------------------------------------------------------------------------- /1_002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/1_002.jpg -------------------------------------------------------------------------------- /9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/9.jpg -------------------------------------------------------------------------------- /FaceRecognition.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | # import urllib.request 5 | # from matplotlib import gridspec 6 | # from matplotlib import pyplot as plt 7 | # from PIL import Image 8 | # from tensorflow.python.platform import gfile 9 | from rknn.api import RKNN 10 | from layers.functions.prior_box import PriorBox 11 | from utils.nms.py_cpu_nms import py_cpu_nms 12 | import cv2 13 | from utils.box_utils import decode, decode_landm 14 | import time 15 | import argparse 16 | import torch 17 | from tqdm import tqdm 18 | import matplotlib.pyplot as plt 19 | import numpy as np 20 | from PIL import Image, ImageDraw, ImageFont 21 | from sklearn import preprocessing 22 | from scipy.spatial.distance import pdist, squareform 23 | 24 | def show_config(**kwargs): 25 | print('Configurations:') 26 | print('-' * 70) 27 | print('|%25s | %40s|' % ('keys', 'values')) 28 | print('-' * 70) 29 | for key, value in kwargs.items(): 30 | print('|%25s | %40s|' % (str(key), str(value))) 31 | print('-' * 70) 32 | #---------------------------------------------------# 33 | # 对输入图像进行resize 34 | #---------------------------------------------------# 35 | def letterbox_image(image, size): 36 | ih, iw, _ = np.shape(image) 37 | w, h = size 38 | scale = min(w/iw, h/ih) 39 | nw = int(iw*scale) 40 | nh = int(ih*scale) 41 | 42 | image = cv2.resize(image, (nw, nh)) 43 | new_image = np.ones([size[1], size[0], 3],np.uint8) * 255 44 | new_image[(h-nh)//2:nh+(h-nh)//2, (w-nw)//2:nw+(w-nw)//2] = image 45 | return new_image 46 | 47 | 48 | # -----------------------------------------------------------------# 49 | # 将输出调整为相对于原图的大小 50 | # -----------------------------------------------------------------# 51 | def retinaface_correct_boxes(result, input_shape, image_shape): 52 | new_shape = image_shape * np.min(input_shape / image_shape) 53 | 54 | offset = (input_shape - new_shape) / 2. / input_shape 55 | scale = input_shape / new_shape 56 | 57 | scale_for_boxs = [scale[1], scale[0], scale[1], scale[0]] 58 | scale_for_landmarks = [scale[1], scale[0], scale[1], scale[0], scale[1], scale[0], scale[1], scale[0], scale[1], 59 | scale[0]] 60 | 61 | offset_for_boxs = [offset[1], offset[0], offset[1], offset[0]] 62 | offset_for_landmarks = [offset[1], offset[0], offset[1], offset[0], offset[1], offset[0], offset[1], offset[0], 63 | offset[1], offset[0]] 64 | 65 | result[:, :4] = (result[:, :4] - np.array(offset_for_boxs)) * np.array(scale_for_boxs) 66 | result[:, 5:] = (result[:, 5:] - np.array(offset_for_landmarks)) * np.array(scale_for_landmarks) 67 | 68 | return result 69 | 70 | 71 | #---------------------------------# 72 | # 计算人脸距离 73 | #---------------------------------# 74 | def face_distance(face_encodings, face_to_compare): 75 | if len(face_encodings) == 0: 76 | return np.empty((0)) 77 | # 已知所有人脸的特征向量和当前人脸的特征向量的欧氏距离 78 | cosSim = [] 79 | for i , face_encode in enumerate(face_encodings): 80 | cosSim.append(pdist(np.vstack([face_encode, face_to_compare]), 'cosine')[0]) 81 | # i+=1 82 | cosSim = np.array(cosSim) 83 | 84 | return cosSim 85 | # return np.linalg.norm(face_encodings - face_to_compare, axis=1) 86 | 87 | #---------------------------------# 88 | # 比较人脸 89 | #---------------------------------# 90 | def compare_faces(known_face_encodings, face_encoding_to_check, tolerance=1): 91 | # (n) 92 | dis = face_distance(known_face_encodings, face_encoding_to_check) 93 | # for i in range(dis.size): 94 | # print("dis "+dis[i]) 95 | print("dis ",format(dis)) 96 | 97 | return list(dis <= tolerance), dis 98 | 99 | 100 | # --------------------------------------# 101 | # 写中文需要转成PIL来写。 102 | # --------------------------------------# 103 | def cv2ImgAddText(img, label, left, top, textColor=(255, 255, 255)): 104 | img = Image.fromarray(np.uint8(img)) 105 | # ---------------# 106 | # 设置字体 107 | # ---------------# 108 | font = ImageFont.truetype(font='model_data/simhei.ttf', size=20) 109 | 110 | draw = ImageDraw.Draw(img) 111 | label = label.encode('utf-8') 112 | draw.text((left, top), str(label, 'UTF-8'), fill=textColor, font=font) 113 | return np.asarray(img) 114 | 115 | 116 | 117 | class Facenet(object): 118 | _defaults = { 119 | 'name': 'mobilenet0.25', 120 | 'min_sizes': [[16, 32], [64, 128], [256, 512]], 121 | 'steps': [8, 16, 32], 122 | 'variance': [0.1, 0.2], 123 | 'clip': False, 124 | 'loc_weight': 2.0, 125 | 'gpu_train': True, 126 | 'batch_size': 32, 127 | 'ngpu': 1, 128 | 'epoch': 250, 129 | 'decay1': 190, 130 | 'decay2': 220, 131 | 'image_size': 640, 132 | 'pretrain': True, 133 | 'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3}, 134 | 'in_channel': 32, 135 | 'out_channel': 64, 136 | 137 | 'confidence_threshold' : 0.2,#预先框的阈值 138 | 'nms_threshold' : 0.4,#nms 阈值 139 | 'vis_thres' : 0.8,#人脸置信值 140 | 'retinaface_rknn_model_path' : './retinaface_quant.rknn', 141 | 'mobilefacenet_rknn_model_path': './mobilefacenet_quant.rknn', 142 | # 'retinaface_rknn_model_path': './retinaface.rknn', 143 | # 'mobilefacenet_rknn_model_path': './mobilefacenet.rknn', 144 | 'target' : 'rv1126', 145 | 'device_id' : 'd81352278dd4de31', 146 | # ----------------------------------------------------------------------# 147 | # 是否需要进行图像大小限制。 148 | # 输入图像大小会大幅度地影响FPS,想加快检测速度可以减少input_shape。 149 | # 开启后,会将输入图像的大小限制为input_shape。否则使用原图进行预测。 150 | # keras代码中主干为mobilenet时存在小bug,当输入图像的宽高不为32的倍数 151 | # 会导致检测结果偏差,主干为resnet50不存在此问题。 152 | # 可根据输入图像的大小自行调整input_shape,注意为32的倍数,如[640, 640, 3] 153 | # ----------------------------------------------------------------------# 154 | "retinaface_input_shape": [640, 640, 3], 155 | # ----------------------------------------------------------------------# 156 | # facenet所使用到的输入图片大小 157 | # ----------------------------------------------------------------------# 158 | "facenet_input_shape": [112, 112, 3], 159 | "letterbox_image": False, 160 | "facenet_l2_norm": False, 161 | # ----------------------------------------------------------------------# 162 | # facenet所使用的人脸距离门限 163 | # ----------------------------------------------------------------------# 164 | "facenet_threhold": 1.0 165 | } 166 | 167 | @classmethod 168 | def get_defaults(cls, n): 169 | if n in cls._defaults: 170 | return cls._defaults[n] 171 | else: 172 | return "Unrecognized attribute name '" + n + "'" 173 | 174 | # ---------------------------------------------------# 175 | # 初始化Retinaface+facenet 176 | # ---------------------------------------------------# 177 | def __init__(self, encoding=0,**kwargs): 178 | #更新参数 179 | self.__dict__.update(self._defaults) 180 | for name, value in kwargs.items(): 181 | setattr(self, name, value) 182 | self.generate() 183 | try: 184 | self.known_face_encodings = np.load("model_data/face_encoding.npy".format()) 185 | self.known_face_names = np.load("model_data/names.npy".format()) 186 | except: 187 | if not encoding: 188 | print("载入已有人脸特征失败,请检查model_data下面是否生成了相关的人脸特征文件。") 189 | pass 190 | show_config(**self._defaults) 191 | 192 | def generate(self): 193 | # Create retinaface RKNN object 194 | self.retinaface_rknn = RKNN() 195 | self.mobilefacenet_rknn = RKNN() 196 | print('Loading retinaface_rknn model') 197 | ret = self.retinaface_rknn.load_rknn(self.retinaface_rknn_model_path) 198 | if ret != 0: 199 | print('load retinaface_rknn model failed.') 200 | exit(ret) 201 | print('done') 202 | 203 | print('--> Init retinaface_rknn runtime environment') 204 | ret = self.retinaface_rknn.init_runtime(target=self.target, device_id=self.device_id, rknn2precompile=False) 205 | if ret != 0: 206 | print('Init retinaface_rknn runtime environment failed') 207 | exit(ret) 208 | print('done') 209 | 210 | ret = self.mobilefacenet_rknn.load_rknn(self.mobilefacenet_rknn_model_path) 211 | if ret != 0: 212 | print('load mobilefacenet_rknn model failed.') 213 | exit(ret) 214 | print('done') 215 | 216 | print('--> Init mobilefacenet_rknn runtime environment') 217 | ret = self.mobilefacenet_rknn.init_runtime(target=self.target, device_id=self.device_id, rknn2precompile=False) 218 | if ret != 0: 219 | print('Init mobilefacenet_rknn runtime environment failed') 220 | exit(ret) 221 | print('done') 222 | 223 | #TODO : need to change result with true img size 224 | #现在是都缩放在640x640上实现的,后面需要映射到原始图片中 225 | def detect_one_image(self,image): 226 | 227 | # cv2.imshow("src", image) 228 | # cv2.waitKey() 229 | img = np.asarray(image) 230 | # img = np.array(img, np.uint8) 231 | 232 | print('--> inter') 233 | 234 | # ---------------------------------------------------# 235 | # 计算输入图片的高和宽 236 | # ---------------------------------------------------# 237 | resize = 1 238 | device = torch.device("cpu") 239 | im_height, im_width, _ = np.shape(image) 240 | scale = torch.Tensor([image.shape[1], image.shape[0], image.shape[1], image.shape[0]]) 241 | scale = scale.to(device) 242 | 243 | # ---------------------------------------------------# 244 | # 图片预处理,归一化 在RKNN模型里实现,只需要输入RGB 245 | # ---------------------------------------------------# 246 | resize = 1 247 | # ---------------------------------------------------# 248 | # 将处理完的图片传入Retinaface网络当中进行预测 249 | # ---------------------------------------------------# 250 | loc, conf, landms = self.retinaface_rknn.inference(inputs=[img]) 251 | 252 | img = image.transpose(2, 0, 1) 253 | new_shape = [1, img.shape[0], img.shape[1], img.shape[2]] 254 | img = img.reshape(new_shape) 255 | # ---------------------------------------------------# 256 | # Retinaface网络的解码,最终我们会获得预测框 257 | # 将预测结果进行解码和非极大抑制 258 | # ---------------------------------------------------# 259 | loc = torch.tensor(loc) 260 | conf = torch.tensor(conf) 261 | landms = torch.tensor(landms) 262 | loc = loc.view(loc.shape[1], -1, 4) 263 | conf = conf.view(conf.shape[1], -1, 2) 264 | landms = landms.view(landms.shape[1], -1, 10) 265 | cfg_mnet = { 266 | 'min_sizes': [[16, 32], [64, 128], [256, 512]], 267 | 'steps': [8, 16, 32], 268 | 'variance': [0.1, 0.2], 269 | 'clip': False, 270 | 'loc_weight': 2.0, 271 | 'image_size': 640, 272 | } 273 | cfg = cfg_mnet 274 | priorbox = PriorBox(cfg, image_size=(im_height, im_width)) 275 | priors = priorbox.forward() 276 | prior_data = priors.data 277 | boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) 278 | boxes = boxes * scale / resize 279 | boxes = boxes.cpu().numpy() 280 | scores = conf.squeeze(0).data.cpu().numpy()[:, 1] 281 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) 282 | scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], 283 | img.shape[3], img.shape[2], img.shape[3], img.shape[2], 284 | img.shape[3], img.shape[2]]) 285 | scale1 = scale1.to(device) 286 | landms = landms * scale1 / resize 287 | landms = landms.cpu().numpy() 288 | 289 | # ignore low scores 290 | inds = np.where(scores > self.confidence_threshold)[0] 291 | boxes = boxes[inds] 292 | landms = landms[inds] 293 | scores = scores[inds] 294 | 295 | # keep top-K before NMS 296 | # anchor 最大5000个 297 | top_k = 5000 298 | order = scores.argsort()[::-1][:top_k] 299 | boxes = boxes[order] 300 | landms = landms[order] 301 | scores = scores[order] 302 | 303 | # do NMS 304 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) 305 | keep = py_cpu_nms(dets, self.nms_threshold) 306 | # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) 307 | dets = dets[keep, :] 308 | landms = landms[keep] 309 | 310 | # keep top-K faster NMS 最大分析750个 311 | keep_top_k = 750 312 | dets = dets[:keep_top_k, :] 313 | landms = landms[:keep_top_k, :] 314 | # x1,y1,x2,y2,score,landm x1,y1,x2,y2...x5,y5 315 | results = np.concatenate((dets, landms), axis=1) 316 | return results 317 | 318 | def encode_face_dataset(self, image_paths, names): 319 | face_encodings = [] 320 | for index, path in enumerate(tqdm(image_paths)): 321 | # ---------------------------------------------------# 322 | # 打开人脸图片 323 | # ---------------------------------------------------# 324 | image = np.array(Image.open(path).convert('RGB'), np.uint8) # if not use .convert(‘RGB’) it will be RGBA 325 | # image = cv2.resize(image, (self.retinaface_input_shape[1], self.retinaface_input_shape[0])) 326 | image = letterbox_image(image, (self.retinaface_input_shape[1], self.retinaface_input_shape[0])) 327 | 328 | # ---------------------------------------------------# 329 | # 对输入图像进行一个备份 330 | # ---------------------------------------------------# 331 | old_image = image.copy() # old_image is rgb 332 | # ---------------------------------------------------# 333 | # 计算输入图片的高和宽 334 | # ---------------------------------------------------# 335 | im_height, im_width, _ = np.shape(image) 336 | # ---------------------------------------------------# 337 | # 计算scale,用于将获得的预测框转换成原图的高宽 338 | # ---------------------------------------------------# 339 | scale = [ 340 | np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] 341 | ] 342 | scale_for_landmarks = [ 343 | np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], 344 | np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], 345 | np.shape(image)[1], np.shape(image)[0] 346 | ] 347 | 348 | # image = np.array(Image.open(path), np.float32) 349 | 350 | # image = cv2.imread(path)#这个打不开中文路径或者名字的图片 351 | if self.letterbox_image: 352 | image = letterbox_image(image, [self.retinaface_input_shape[1], self.retinaface_input_shape[0]]) 353 | 354 | # image = cv2.resize(image, (self.retinaface_input_shape[0], self.retinaface_input_shape[1])) 355 | 356 | # image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR) # rknn need to be RGB in and handled means and std in rknn model 357 | 358 | 359 | results = self.detect_one_image(image) 360 | if len(results) <= 0: 361 | print(names[index], ":未检测到人脸") 362 | continue 363 | # ---------------------------------------------------# 364 | # 4人脸框置信度 365 | # :4是框的坐标 366 | # 5:是人脸关键点的坐标 367 | # ---------------------------------------------------# 368 | # 将结果映射到原来图像大小 369 | # results[:, :4] = results[:, :4] / scale_in * scale 370 | # results[:, 5:] = results[:, 5:] / scale_for_landmarks_in * scale_for_landmarks 371 | # ---------------------------------------------------------# 372 | # 如果使用了letterbox_image的话,要把灰条的部分去除掉。 373 | # ---------------------------------------------------------# 374 | # if self.letterbox_image: 375 | # results = retinaface_correct_boxes(results, np.array( 376 | # (self.retinaface_input_shape[0], self.retinaface_input_shape[1])), 377 | # np.array([im_height, im_width])) 378 | 379 | 380 | 381 | faces = 0 382 | # show image 383 | # ---------------------------------------------------# 384 | # 选取最大的人脸框。 385 | # ---------------------------------------------------# 386 | 387 | best_face_location = None 388 | biggest_area = 0 389 | if True: 390 | for b in results: 391 | if b[4] < self.vis_thres: 392 | continue 393 | text = "{:.4f}".format(b[4]) 394 | faces += 1 395 | b = list(map(int, b)) 396 | print(b) 397 | if True:#测试显示结果 398 | tmpImage = old_image.copy() 399 | tmpImage = cv2.cvtColor(tmpImage,cv2.COLOR_RGB2BGR) 400 | cv2.rectangle(tmpImage, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) 401 | cx = b[0] 402 | cy = b[1] + 12 403 | cv2.putText(tmpImage, text, (cx, cy), 404 | cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) 405 | 406 | # landms 407 | cv2.circle(tmpImage, (b[5], b[6]), 1, (0, 0, 255), 4) 408 | cv2.circle(tmpImage, (b[7], b[8]), 1, (0, 255, 255), 4) 409 | cv2.circle(tmpImage, (b[9], b[10]), 1, (255, 0, 255), 4) 410 | cv2.circle(tmpImage, (b[11], b[12]), 1, (0, 255, 0), 4) 411 | cv2.circle(tmpImage, (b[13], b[14]), 1, (255, 0, 0), 4) 412 | # cv2.imshow("Detect", tmpImage) 413 | # cv2.waitKey(0) 414 | 415 | # ---------------------------------------------------# 416 | # 选取最大的人脸框。 417 | # ---------------------------------------------------# 418 | left, top, right, bottom = b[0:4] 419 | 420 | w = right - left 421 | h = bottom - top 422 | if w * h > biggest_area: 423 | biggest_area = w * h 424 | best_face_location = b 425 | 426 | 427 | if faces == 0: 428 | print(names[index], ":未检测到人脸") 429 | continue 430 | # results = np.array(results) 431 | 432 | 433 | # ---------------------------------------------------# 434 | # 截取图像 old_image (RGB)[h1:h2,w1:w2]=old_image[y1:y2,x1:x2] 435 | # ---------------------------------------------------# 436 | crop_img = old_image[int(best_face_location[1]):int(best_face_location[3]), 437 | int(best_face_location[0]):int(best_face_location[2])] 438 | 439 | landmark = np.reshape(best_face_location[5:], (5, 2)) - np.array( 440 | [int(best_face_location[0]), int(best_face_location[1])]) 441 | # crop_img, _ = Alignment_1(crop_img, landmark) 442 | if True:#self.letterbox_image: 443 | crop_img = np.array( 444 | letterbox_image(np.uint8(crop_img), (self.facenet_input_shape[1], self.facenet_input_shape[0])),np.uint8) 445 | else: 446 | crop_img = cv2.resize(crop_img,(self.facenet_input_shape[1], self.facenet_input_shape[0])) 447 | # cv2.imshow("encFaceView", crop_img) 448 | # cv2.waitKey(0) 449 | # crop_img = np.expand_dims(crop_img, 0) 450 | # ---------------------------------------------------# 451 | # 利用图像算取长度为128的特征向量 452 | # ---------------------------------------------------# 453 | print(crop_img.shape) 454 | if self.facenet_l2_norm == False: 455 | face_encoding = self.mobilefacenet_rknn.inference(data_format='nhwc',inputs=[crop_img])[0][0] 456 | print(face_encoding) 457 | face_encodings.append(face_encoding) 458 | else: 459 | face_encoding = self.mobilefacenet_rknn.inference(data_format='nhwc', inputs=[crop_img]) 460 | face_encoding = preprocessing.normalize(face_encoding[0], norm='l2') 461 | print(face_encoding) 462 | face_encodings.append(face_encoding[0]) 463 | 464 | np.save("model_data/face_encoding.npy".format(), face_encodings) 465 | np.save("model_data/names.npy".format(), names) 466 | 467 | # 检测图片 输入image是RGB格式 468 | # ---------------------------------------------------# 469 | def detect_image(self, image): 470 | # ---------------------------------------------------# 471 | # 对输入图像进行一个备份,后面用于绘图 472 | # ---------------------------------------------------# 473 | # cv2.imshow("Src",image) 474 | 475 | # image = np.asarray(image, np.uint8) 476 | old_image = image.copy() 477 | # ---------------------------------------------------# 478 | # 计算输入图片的高和宽 479 | # ---------------------------------------------------# 480 | im_height, im_width, _ = np.shape(image) 481 | # ---------------------------------------------------# 482 | # 计算scale,用于将获得的预测框转换成原图的高宽 483 | # ---------------------------------------------------# 484 | scale = [ 485 | np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0] 486 | ] 487 | scale_for_landmarks = [ 488 | np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], 489 | np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], 490 | np.shape(image)[1], np.shape(image)[0] 491 | ] 492 | # ---------------------------------------------------# 493 | # 把图像转换成numpy的形式 494 | # ---------------------------------------------------# 495 | if self.letterbox_image: 496 | image = letterbox_image(image, [self.retinaface_input_shape[1], self.retinaface_input_shape[0]]) 497 | 498 | # image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR) # rknn need to be RGB in and handled means and std in rknn model 499 | 500 | # ---------------------------------------------------# 501 | # Retinaface检测部分-开始 502 | # ---------------------------------------------------# 503 | results = self.detect_one_image(image) 504 | # ---------------------------------------------------# 505 | # 4人脸框置信度 506 | # :4是框的坐标 507 | # 5:是人脸关键点的坐标 508 | # ---------------------------------------------------# 509 | # 将结果映射到原来图像大小 510 | # results[:, :4] = results[:, :4] / scale_in * scale 511 | # results[:, 5:] = results[:, 5:] / scale_for_landmarks_in * scale_for_landmarks 512 | 513 | # ---------------------------------------------------# 514 | # 如果没有预测框则返回原图 515 | # ---------------------------------------------------# 516 | if len(results) <= 0: 517 | print("未检测到人脸") 518 | return old_image 519 | # ---------------------------------------------------------# 520 | # 如果使用了letterbox_image的话,要把灰条的部分去除掉。 521 | # ---------------------------------------------------------# 522 | # if self.letterbox_image: 523 | # results = retinaface_correct_boxes(results, np.array( 524 | # (self.retinaface_input_shape[0], self.retinaface_input_shape[1])), 525 | # np.array([im_height, im_width])) 526 | 527 | # # ---------------------------------------------------# 528 | # # 4人脸框置信度 529 | # # :4是框的坐标 530 | # # 5:是人脸关键点的坐标 531 | # # ---------------------------------------------------# 532 | # results[:, :4] = results[:, :4] * scale 533 | # results[:, 5:] = results[:, 5:] * scale_for_landmarks 534 | 535 | #results = np.array(results) 536 | 537 | # ---------------------------------------------------# 538 | # Retinaface检测部分-结束 539 | # ---------------------------------------------------# 540 | 541 | # -----------------------------------------------# 542 | # Facenet编码部分-开始 543 | # -----------------------------------------------# 544 | face_encodings = [] 545 | idxCount = 0 546 | plt.figure() 547 | detectResult = [] 548 | 549 | tmpImage = old_image.copy() 550 | for result in results: 551 | # ----------------------# 552 | # 图像截取,人脸矫正 553 | # ----------------------# 554 | if result[4] < self.vis_thres: 555 | continue 556 | detectResult.append(result) 557 | result = np.maximum(result, 0) 558 | 559 | crop_img = np.array(old_image)[int(result[1]):int(result[3]), int(result[0]):int(result[2])] 560 | landmark = np.reshape(result[5:], (5, 2)) - np.array([int(result[0]), int(result[1])]) 561 | # crop_img, _ = Alignment_1(crop_img, landmark) 562 | # cv2.imwrite("out.jpg", crop_img) 563 | idxCount = idxCount + 1 564 | pltShowNp = np.array(crop_img, np.uint8(Image.BILINEAR)) 565 | pltShowImg = Image.fromarray(pltShowNp) 566 | 567 | plt.subplot(len(results), len(results), idxCount) 568 | # pltShowImg = cv2.cvtColor(pltShowImg, cv2.COLOR_BGR2RGB) # 改变显示的颜色 569 | # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 570 | plt.imshow(pltShowImg) 571 | 572 | text = "{:.4f}".format(result[4]) 573 | b = list(map(int, result)) 574 | print(b) 575 | if False: # 测试显示结果 576 | tmpImage = cv2.cvtColor(tmpImage, cv2.COLOR_RGB2BGR) 577 | cv2.rectangle(tmpImage, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) 578 | cx = b[0] 579 | cy = b[1] + 12 580 | cv2.putText(tmpImage, text, (cx, cy), 581 | cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) 582 | 583 | # landms 584 | cv2.circle(tmpImage, (b[5], b[6]), 1, (0, 0, 255), 4) 585 | cv2.circle(tmpImage, (b[7], b[8]), 1, (0, 255, 255), 4) 586 | cv2.circle(tmpImage, (b[9], b[10]), 1, (255, 0, 255), 4) 587 | cv2.circle(tmpImage, (b[11], b[12]), 1, (0, 255, 0), 4) 588 | cv2.circle(tmpImage, (b[13], b[14]), 1, (255, 0, 0), 4) 589 | 590 | # ----------------------# 591 | # 人脸编码 592 | # ----------------------# 593 | 594 | # -----------------------------------------------# 595 | # 不失真的resize,然后进行归一化 596 | # -----------------------------------------------# 597 | crop_img = np.array( 598 | letterbox_image(np.uint8(crop_img), (self.facenet_input_shape[1], self.facenet_input_shape[0])),np.uint8) 599 | # crop_img = np.expand_dims(crop_img, 0) 600 | # cv2.imshow("faceEnc", crop_img) 601 | # cv2.waitKey(0) 602 | print(crop_img.shape) 603 | # -----------------------------------------------# 604 | # 利用图像算取长度为128的特征向量 605 | # -----------------------------------------------# 606 | if self.facenet_l2_norm == False: 607 | face_encoding = self.mobilefacenet_rknn.inference(data_format='nhwc', inputs=[crop_img])[0][0] 608 | print(face_encoding) 609 | face_encodings.append(face_encoding) 610 | else: 611 | face_encoding = self.mobilefacenet_rknn.inference(data_format='nhwc', inputs=[crop_img]) 612 | face_encoding = preprocessing.normalize(face_encoding[0], norm='l2') 613 | print(face_encoding) 614 | face_encodings.append(face_encoding[0]) 615 | # -----------------------------------------------# 616 | # Facenet编码部分-结束 617 | # -----------------------------------------------# 618 | # plt.show() 619 | # cv2.imshow("faceDetect",tmpImage) 620 | # cv2.waitKey() 621 | # -----------------------------------------------# 622 | # 人脸特征比对-开始 623 | # -----------------------------------------------# 624 | face_names = [] 625 | face_dist = [] 626 | for face_encoding in face_encodings: 627 | # -----------------------------------------------------# 628 | # 取出一张脸并与数据库中所有的人脸进行对比,计算得分 629 | # -----------------------------------------------------# 630 | matches, face_distances = compare_faces(self.known_face_encodings, face_encoding, 631 | tolerance=self.facenet_threhold) 632 | name = "Unknown" 633 | 634 | # -----------------------------------------------------# 635 | # 找到已知最贴近当前人脸的人脸序号 636 | # -----------------------------------------------------# 637 | best_match_index = np.argmin(face_distances) 638 | if matches[best_match_index]: 639 | name = self.known_face_names[best_match_index] 640 | face_dist.append(face_distances[best_match_index]) 641 | else: 642 | face_dist.append(0) 643 | face_names.append(name) 644 | # -----------------------------------------------# 645 | # 人脸特征比对-结束 646 | # -----------------------------------------------# 647 | 648 | for i, b in enumerate(detectResult): 649 | text_retinaface = "{:.4f}".format(b[4]) 650 | text = "{:.4f}".format(face_dist[i]) 651 | b = list(map(int, b)) 652 | # ---------------------------------------------------# 653 | # b[0]-b[3]为人脸框的坐标,b[4]为得分 654 | # ---------------------------------------------------# 655 | 656 | old_image = cv2.cvtColor(np.asarray(old_image), cv2.COLOR_RGB2BGR) 657 | cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) 658 | cx = b[0] 659 | cy = b[1] + 12 660 | cv2.putText(old_image, text, (cx, cy), 661 | cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) 662 | cx = b[0] 663 | cy = b[1] + 32 664 | cv2.putText(old_image, text_retinaface, (cx, cy), 665 | cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) 666 | 667 | # ---------------------------------------------------# 668 | # b[5]-b[14]为人脸关键点的坐标 669 | # ---------------------------------------------------# 670 | cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) 671 | cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) 672 | cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) 673 | cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) 674 | cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) 675 | 676 | name = face_names[i] 677 | # font = cv2.FONT_HERSHEY_SIMPLEX 678 | # cv2.putText(old_image, name, (b[0] , b[3] - 15), font, 0.75, (255, 255, 255), 2) 679 | # --------------------------------------------------------------# 680 | # cv2不能写中文,加上这段可以,但是检测速度会有一定的下降。 681 | # 如果不是必须,可以换成cv2只显示英文。 682 | # --------------------------------------------------------------# 683 | old_image = cv2ImgAddText(old_image, name, b[0] + 5, b[3] - 25) 684 | return old_image 685 | 686 | 687 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RV1126的人脸检测项目 2 | 3 | 首先需要把相应的训练好的retinaface模型以及mobilefaceNet模型拷贝到weights/下 4 | 保证NTP正常需要,使用USB ADB方式 5 | (rknn) python -m rknn.bin.list_devices 6 | 7 | # 量化Retinaface 8 | 9 | python RetinafaceConvertTest.py 10 | 11 | 注意修改模型路径,以及输入输出层 12 | 代码中使用的是 13 | ret = rknn.load_onnx(model='./weights/retinaface.onnx', 14 | inputs='input0', 15 | input_size_list=[[3,640,640]], 16 | outputs=['output0','590','589']) 17 | 输入输出层可以使用netron查看,基本output0 590 589要分别对应loc, conf, landms。错了运行会报错 18 | 19 | 把不量化直接运行的结果判断一下,简单先判断是否人脸检测正常。 20 | BUILD_QUANT = False 21 | NEED_BUILD_MODEL = True 22 | 量化结果会打印每层的量化精度,再判断是否人脸检测正常 23 | BUILD_QUANT = True 24 | NEED_BUILD_MODEL = True 25 | 26 | # MobilefaceNet量化 27 | 28 | 修改 29 | 30 | cfg = cfg_facenet_mxnet 31 | 32 | 选择对应的模型配置 33 | 34 | 把不量化直接运行的结果判断一下,结果与pytorch工程里的输出对比 35 | BUILD_QUANT = False 36 | NEED_BUILD_MODEL = True 37 | 量化结果会打印每层的量化精度,结果与pytorch工程里的输出对比 38 | BUILD_QUANT = True 39 | NEED_BUILD_MODEL = True 40 | 41 | 测试: 42 | 先编码 43 | 44 | python encoding.py 45 | 46 | 图片预测: 47 | 48 | python predict.py 49 | 50 | rtsp预测: 51 | 52 | python rtspPredict.py 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /RetinafaceConvertTest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | import matplotlib 5 | matplotlib.use('Agg') 6 | # import urllib.request 7 | # from matplotlib import gridspec 8 | # from matplotlib import pyplot as plt 9 | # from PIL import Image 10 | # from tensorflow.python.platform import gfile 11 | from rknn.api import RKNN 12 | from layers.functions.prior_box import PriorBox 13 | from utils.nms.py_cpu_nms import py_cpu_nms 14 | import cv2 15 | from utils.box_utils import decode, decode_landm 16 | import time 17 | import argparse 18 | import torch 19 | 20 | # os.environ['RKNN_DRAW_DATA_DISTRIBUTE'] = "1" 21 | #os.environ['NN_LAYER_DUMP'] = "1" 22 | 23 | def compute_cos_dis(x, y): 24 | cos_dist = (x * y) / (np.linalg.norm(x) * (np.linalg.norm(y))) 25 | return cos_dist.sum() 26 | 27 | cfg_mnet = { 28 | 'name': 'mobilenet0.25', 29 | 'min_sizes': [[16, 32], [64, 128], [256, 512]], 30 | 'steps': [8, 16, 32], 31 | 'variance': [0.1, 0.2], 32 | 'clip': False, 33 | 'loc_weight': 2.0, 34 | 'gpu_train': True, 35 | 'batch_size': 32, 36 | 'ngpu': 1, 37 | 'epoch': 250, 38 | 'decay1': 190, 39 | 'decay2': 220, 40 | 'image_size': 640, 41 | 'pretrain': True, 42 | 'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3}, 43 | 'in_channel': 32, 44 | 'out_channel': 64 45 | } 46 | 47 | #---------------------------------------------------# 48 | # 对输入图像进行resize 49 | #---------------------------------------------------# 50 | def letterbox_image(image, size): 51 | ih, iw, _ = np.shape(image) 52 | w, h = size 53 | scale = min(w/iw, h/ih) 54 | nw = int(iw*scale) 55 | nh = int(ih*scale) 56 | 57 | image = cv2.resize(image, (nw, nh)) 58 | new_image = np.ones([size[1], size[0], 3],np.uint8) * 255 59 | new_image[(h-nh)//2:nh+(h-nh)//2, (w-nw)//2:nw+(w-nw)//2] = image 60 | return new_image 61 | 62 | 63 | if __name__ == '__main__': 64 | parser = argparse.ArgumentParser(description='Retinaface') 65 | 66 | parser.add_argument('-m', '--trained_model', default='/opt/deeplearning/ONNXToCaffe/model/face.caffemodel', 67 | type=str, help='Trained caffemodel path') 68 | parser.add_argument('--deploy', default='/opt/deeplearning/ONNXToCaffe/model/face.prototxt', 69 | help='Path of deploy file') 70 | parser.add_argument('--img_path', default='../curve/t1.jpg', help='Path of test image') 71 | parser.add_argument('--cpu', action="store_true", default=True, help='Use cpu inference') 72 | parser.add_argument('--confidence_threshold', default=0.2, type=float, help='confidence_threshold') 73 | parser.add_argument('--top_k', default=5000, type=int, help='top_k') 74 | parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold') 75 | parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k') 76 | parser.add_argument('-s', '--save_image', action="store_true", default=True, help='save detection results') 77 | parser.add_argument('--show_image', action="store_true", default=True, help='show detection results') 78 | parser.add_argument('--vis_thres', default=0.6, type=float, help='visualization_threshold') 79 | args = parser.parse_args() 80 | 81 | BUILD_QUANT = True 82 | RKNN_MODEL_PATH = './retinaface.rknn' 83 | if BUILD_QUANT: 84 | RKNN_MODEL_PATH='./retinaface_quant.rknn' 85 | im_file = './img/4.jpeg' 86 | 87 | # Create RKNN object 88 | rknn = RKNN() 89 | 90 | NEED_BUILD_MODEL = True 91 | if NEED_BUILD_MODEL: 92 | print('--> config model') 93 | rknn.config(reorder_channel='2 1 0', mean_values=[[104, 117, 123]], std_values=[[1, 1, 1]], 94 | target_platform=['rv1126'], batch_size=1) 95 | print('done') 96 | 97 | # Load tensorflow model 98 | # print('--> Loading model') 99 | # ret = rknn.load_caffe(model='./face.prototxt',proto='caffe',blobs='./face.caffemodel') 100 | # if ret != 0: 101 | # print('Load model failed! Ret = {}'.format(ret)) 102 | # exit(ret) 103 | # print('done') 104 | 105 | # # Load mxnet model 106 | # symbol = './mobilefacenet-symbol.json' 107 | # params = './mobilefacenet-0000.params' 108 | # input_size_list = [[3, 112, 112]] 109 | # print('--> Loading model') 110 | # ret = rknn.load_mxnet(symbol, params, input_size_list) 111 | # if ret != 0: 112 | # print('Load mxnet model failed!') 113 | # exit(ret) 114 | # print('done') 115 | 116 | # Load keras model 117 | # print('--> Loading model') 118 | # ret = rknn.load_keras(model='./facenet_mobilenet_all.h5') 119 | # if ret != 0: 120 | # print('Load keras model failed!') 121 | # exit(ret) 122 | # print('done') 123 | # print('--> Loading model') 124 | ret = rknn.load_onnx(model='./weights/retinaface.onnx', 125 | inputs='input0', 126 | input_size_list=[[3,640,640]], 127 | outputs=['output0','590','589']) 128 | if ret != 0: 129 | print('Load retinaface failed!') 130 | exit(ret) 131 | print('done') 132 | 133 | # Build model 134 | print('--> Building model') 135 | ret = rknn.build(do_quantization=BUILD_QUANT, dataset='./dataset_retinaface.txt') 136 | if ret != 0: 137 | print('Build model failed!') 138 | exit(ret) 139 | print('done') 140 | 141 | if BUILD_QUANT: 142 | print('--> Accuracy analysis') 143 | rknn.accuracy_analysis(inputs='./dataset_retinaface.txt', output_dir="./retinaface_result", target='rv1126') 144 | print('done') 145 | 146 | # Export rknn model 147 | print('--> Export RKNN model') 148 | ret = rknn.export_rknn(RKNN_MODEL_PATH) 149 | if ret != 0: 150 | print('Export rknn failed!') 151 | exit(ret) 152 | print('done') 153 | else: 154 | # Direct load rknn model 155 | print('Loading RKNN model') 156 | ret = rknn.load_rknn(RKNN_MODEL_PATH) 157 | if ret != 0: 158 | print('load rknn model failed.') 159 | exit(ret) 160 | print('done') 161 | 162 | print('--> Init runtime environment') 163 | ret = rknn.init_runtime(target='rv1126', device_id='d81352278dd4de31', rknn2precompile=False) 164 | # ret = rknn.init_runtime(target='rv1126') 165 | if ret != 0: 166 | print('Init runtime environment failed') 167 | exit(ret) 168 | print('done') 169 | 170 | # Set inputs 171 | cfg = cfg_mnet 172 | device = torch.device("cpu") 173 | img_raw = cv2.imread(im_file) 174 | # img_raw = cv2.resize(img_raw, (640, 640)) 175 | img_raw = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB) 176 | img_raw = letterbox_image(img_raw, [640, 640]) 177 | 178 | cv2.imshow("Src", img_raw) 179 | img_raw = np.asarray(img_raw) 180 | # img = np.array(img, np.uint8) 181 | 182 | print('--> inter') 183 | # print(x2) 184 | # 增加一个维度 185 | # img = img[:, :, :, np.newaxis] 186 | # 转换为模型需要的输入维度(640, 640 ,3) 187 | #opencv读的就是hwc格式,如果转换成其他img.transpose([3, 2, 0, 1]) nchw,推理时间会加长 188 | # img = img.transpose([3, 2, 0, 1]) 189 | # img = img.transpose([2, 0, 1]) 190 | print(img_raw.shape) 191 | 192 | 193 | im_height, im_width, _ = img_raw.shape 194 | scale = torch.Tensor([img_raw.shape[1], img_raw.shape[0], img_raw.shape[1], img_raw.shape[0]]) 195 | scale = scale.to(device) 196 | 197 | 198 | # inference 199 | for i in range(1): 200 | print('--> inference') 201 | # loc, conf, landms = rknn.inference(data_format='nchw',inputs=[img]) 202 | loc, conf, landms = rknn.inference(inputs=[img_raw]) 203 | print('done') 204 | rknn.eval_perf() 205 | # rknn.accuracy_analysis() 206 | # rknn.eval_memory() 207 | 208 | 209 | img = img_raw.transpose(2, 0, 1) 210 | new_shape = [1, img.shape[0], img.shape[1], img.shape[2]] 211 | img = img.reshape(new_shape) 212 | 213 | # print(outputs) 214 | resize = 1 215 | loc = torch.tensor(loc) 216 | conf = torch.tensor(conf) 217 | landms = torch.tensor(landms) 218 | loc = loc.view(loc.shape[1], -1, 4) 219 | conf = conf.view(conf.shape[1], -1, 2) 220 | landms = landms.view(landms.shape[1], -1, 10) 221 | 222 | priorbox = PriorBox(cfg, image_size=(im_height, im_width)) 223 | priors = priorbox.forward() 224 | priors = priors.to(device) 225 | prior_data = priors.data 226 | boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) 227 | boxes = boxes * scale / resize 228 | boxes = boxes.cpu().numpy() 229 | scores = conf.squeeze(0).data.cpu().numpy()[:, 1] 230 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) 231 | scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], 232 | img.shape[3], img.shape[2], img.shape[3], img.shape[2], 233 | img.shape[3], img.shape[2]]) 234 | scale1 = scale1.to(device) 235 | landms = landms * scale1 / resize 236 | landms = landms.cpu().numpy() 237 | 238 | # ignore low scores 239 | inds = np.where(scores > args.confidence_threshold)[0] 240 | boxes = boxes[inds] 241 | landms = landms[inds] 242 | scores = scores[inds] 243 | 244 | # keep top-K before NMS 245 | order = scores.argsort()[::-1][:args.top_k] 246 | boxes = boxes[order] 247 | landms = landms[order] 248 | scores = scores[order] 249 | 250 | # do NMS 251 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) 252 | keep = py_cpu_nms(dets, args.nms_threshold) 253 | # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) 254 | dets = dets[keep, :] 255 | landms = landms[keep] 256 | 257 | # keep top-K faster NMS 258 | dets = dets[:args.keep_top_k, :] 259 | landms = landms[:args.keep_top_k, :] 260 | 261 | dets = np.concatenate((dets, landms), axis=1) 262 | faces = 0 263 | # show image 264 | if True: 265 | for b in dets: 266 | if b[4] < args.vis_thres: 267 | continue 268 | text = "{:.4f}".format(b[4]) 269 | b = list(map(int, b)) 270 | print(b) 271 | cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) 272 | cx = b[0] 273 | cy = b[1] + 12 274 | cv2.putText(img_raw, text, (cx, cy), 275 | cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) 276 | 277 | # landms 278 | cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) 279 | cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) 280 | cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) 281 | cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) 282 | cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) 283 | # save image 284 | 285 | name = "test.jpg" 286 | cv2.imwrite(name, img_raw) 287 | if args.show_image: 288 | cv2.imshow("Demo", img_raw) 289 | cv2.waitKey(0) 290 | rknn.release() 291 | 292 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | from .wider_face import WiderFaceDetection, detection_collate 2 | from .data_augment import * 3 | from .config import * 4 | -------------------------------------------------------------------------------- /data/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | 3 | cfg_mnet = { 4 | 'name': 'mobilenet0.25', 5 | 'min_sizes': [[16, 32], [64, 128], [256, 512]], 6 | 'steps': [8, 16, 32], 7 | 'variance': [0.1, 0.2], 8 | 'clip': False, 9 | 'loc_weight': 2.0, 10 | 'gpu_train': True, 11 | 'batch_size': 32, 12 | 'ngpu': 1, 13 | 'epoch': 250, 14 | 'decay1': 190, 15 | 'decay2': 220, 16 | 'image_size': 640, 17 | 'pretrain': True, 18 | 'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3}, 19 | 'in_channel': 32, 20 | 'out_channel': 64 21 | } 22 | 23 | cfg_re50 = { 24 | 'name': 'Resnet50', 25 | 'min_sizes': [[16, 32], [64, 128], [256, 512]], 26 | 'steps': [8, 16, 32], 27 | 'variance': [0.1, 0.2], 28 | 'clip': False, 29 | 'loc_weight': 2.0, 30 | 'gpu_train': True, 31 | 'batch_size': 24, 32 | 'ngpu': 4, 33 | 'epoch': 100, 34 | 'decay1': 70, 35 | 'decay2': 90, 36 | 'image_size': 840, 37 | 'pretrain': True, 38 | 'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3}, 39 | 'in_channel': 256, 40 | 'out_channel': 256 41 | } 42 | 43 | -------------------------------------------------------------------------------- /data/data_augment.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import random 4 | from utils.box_utils import matrix_iof 5 | 6 | 7 | def _crop(image, boxes, labels, landm, img_dim): 8 | height, width, _ = image.shape 9 | pad_image_flag = True 10 | 11 | for _ in range(250): 12 | """ 13 | if random.uniform(0, 1) <= 0.2: 14 | scale = 1.0 15 | else: 16 | scale = random.uniform(0.3, 1.0) 17 | """ 18 | PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0] 19 | scale = random.choice(PRE_SCALES) 20 | short_side = min(width, height) 21 | w = int(scale * short_side) 22 | h = w 23 | 24 | if width == w: 25 | l = 0 26 | else: 27 | l = random.randrange(width - w) 28 | if height == h: 29 | t = 0 30 | else: 31 | t = random.randrange(height - h) 32 | roi = np.array((l, t, l + w, t + h)) 33 | 34 | value = matrix_iof(boxes, roi[np.newaxis]) 35 | flag = (value >= 1) 36 | if not flag.any(): 37 | continue 38 | 39 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2 40 | mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1) 41 | boxes_t = boxes[mask_a].copy() 42 | labels_t = labels[mask_a].copy() 43 | landms_t = landm[mask_a].copy() 44 | landms_t = landms_t.reshape([-1, 5, 2]) 45 | 46 | if boxes_t.shape[0] == 0: 47 | continue 48 | 49 | image_t = image[roi[1]:roi[3], roi[0]:roi[2]] 50 | 51 | boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2]) 52 | boxes_t[:, :2] -= roi[:2] 53 | boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:]) 54 | boxes_t[:, 2:] -= roi[:2] 55 | 56 | # landm 57 | landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2] 58 | landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0])) 59 | landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2]) 60 | landms_t = landms_t.reshape([-1, 10]) 61 | 62 | 63 | # make sure that the cropped image contains at least one face > 16 pixel at training image scale 64 | b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim 65 | b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim 66 | mask_b = np.minimum(b_w_t, b_h_t) > 0.0 67 | boxes_t = boxes_t[mask_b] 68 | labels_t = labels_t[mask_b] 69 | landms_t = landms_t[mask_b] 70 | 71 | if boxes_t.shape[0] == 0: 72 | continue 73 | 74 | pad_image_flag = False 75 | 76 | return image_t, boxes_t, labels_t, landms_t, pad_image_flag 77 | return image, boxes, labels, landm, pad_image_flag 78 | 79 | 80 | def _distort(image): 81 | 82 | def _convert(image, alpha=1, beta=0): 83 | tmp = image.astype(float) * alpha + beta 84 | tmp[tmp < 0] = 0 85 | tmp[tmp > 255] = 255 86 | image[:] = tmp 87 | 88 | image = image.copy() 89 | 90 | if random.randrange(2): 91 | 92 | #brightness distortion 93 | if random.randrange(2): 94 | _convert(image, beta=random.uniform(-32, 32)) 95 | 96 | #contrast distortion 97 | if random.randrange(2): 98 | _convert(image, alpha=random.uniform(0.5, 1.5)) 99 | 100 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 101 | 102 | #saturation distortion 103 | if random.randrange(2): 104 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) 105 | 106 | #hue distortion 107 | if random.randrange(2): 108 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) 109 | tmp %= 180 110 | image[:, :, 0] = tmp 111 | 112 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) 113 | 114 | else: 115 | 116 | #brightness distortion 117 | if random.randrange(2): 118 | _convert(image, beta=random.uniform(-32, 32)) 119 | 120 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 121 | 122 | #saturation distortion 123 | if random.randrange(2): 124 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) 125 | 126 | #hue distortion 127 | if random.randrange(2): 128 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) 129 | tmp %= 180 130 | image[:, :, 0] = tmp 131 | 132 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) 133 | 134 | #contrast distortion 135 | if random.randrange(2): 136 | _convert(image, alpha=random.uniform(0.5, 1.5)) 137 | 138 | return image 139 | 140 | 141 | def _expand(image, boxes, fill, p): 142 | if random.randrange(2): 143 | return image, boxes 144 | 145 | height, width, depth = image.shape 146 | 147 | scale = random.uniform(1, p) 148 | w = int(scale * width) 149 | h = int(scale * height) 150 | 151 | left = random.randint(0, w - width) 152 | top = random.randint(0, h - height) 153 | 154 | boxes_t = boxes.copy() 155 | boxes_t[:, :2] += (left, top) 156 | boxes_t[:, 2:] += (left, top) 157 | expand_image = np.empty( 158 | (h, w, depth), 159 | dtype=image.dtype) 160 | expand_image[:, :] = fill 161 | expand_image[top:top + height, left:left + width] = image 162 | image = expand_image 163 | 164 | return image, boxes_t 165 | 166 | 167 | def _mirror(image, boxes, landms): 168 | _, width, _ = image.shape 169 | if random.randrange(2): 170 | image = image[:, ::-1] 171 | boxes = boxes.copy() 172 | boxes[:, 0::2] = width - boxes[:, 2::-2] 173 | 174 | # landm 175 | landms = landms.copy() 176 | landms = landms.reshape([-1, 5, 2]) 177 | landms[:, :, 0] = width - landms[:, :, 0] 178 | tmp = landms[:, 1, :].copy() 179 | landms[:, 1, :] = landms[:, 0, :] 180 | landms[:, 0, :] = tmp 181 | tmp1 = landms[:, 4, :].copy() 182 | landms[:, 4, :] = landms[:, 3, :] 183 | landms[:, 3, :] = tmp1 184 | landms = landms.reshape([-1, 10]) 185 | 186 | return image, boxes, landms 187 | 188 | 189 | def _pad_to_square(image, rgb_mean, pad_image_flag): 190 | if not pad_image_flag: 191 | return image 192 | height, width, _ = image.shape 193 | long_side = max(width, height) 194 | image_t = np.empty((long_side, long_side, 3), dtype=image.dtype) 195 | image_t[:, :] = rgb_mean 196 | image_t[0:0 + height, 0:0 + width] = image 197 | return image_t 198 | 199 | 200 | def _resize_subtract_mean(image, insize, rgb_mean): 201 | interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4] 202 | interp_method = interp_methods[random.randrange(5)] 203 | image = cv2.resize(image, (insize, insize), interpolation=interp_method) 204 | image = image.astype(np.float32) 205 | image -= rgb_mean 206 | return image.transpose(2, 0, 1) 207 | 208 | 209 | class preproc(object): 210 | 211 | def __init__(self, img_dim, rgb_means): 212 | self.img_dim = img_dim 213 | self.rgb_means = rgb_means 214 | 215 | def __call__(self, image, targets): 216 | assert targets.shape[0] > 0, "this image does not have gt" 217 | 218 | boxes = targets[:, :4].copy() 219 | labels = targets[:, -1].copy() 220 | landm = targets[:, 4:-1].copy() 221 | 222 | image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim) 223 | image_t = _distort(image_t) 224 | image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag) 225 | image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t) 226 | height, width, _ = image_t.shape 227 | image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means) 228 | boxes_t[:, 0::2] /= width 229 | boxes_t[:, 1::2] /= height 230 | 231 | landm_t[:, 0::2] /= width 232 | landm_t[:, 1::2] /= height 233 | 234 | labels_t = np.expand_dims(labels_t, 1) 235 | targets_t = np.hstack((boxes_t, landm_t, labels_t)) 236 | 237 | return image_t, targets_t 238 | -------------------------------------------------------------------------------- /data/wider_face.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | import torch 5 | import torch.utils.data as data 6 | import cv2 7 | import numpy as np 8 | 9 | class WiderFaceDetection(data.Dataset): 10 | def __init__(self, txt_path, preproc=None): 11 | self.preproc = preproc 12 | self.imgs_path = [] 13 | self.words = [] 14 | f = open(txt_path,'r') 15 | lines = f.readlines() 16 | isFirst = True 17 | labels = [] 18 | for line in lines: 19 | line = line.rstrip() 20 | if line.startswith('#'): 21 | if isFirst is True: 22 | isFirst = False 23 | else: 24 | labels_copy = labels.copy() 25 | self.words.append(labels_copy) 26 | labels.clear() 27 | path = line[2:] 28 | path = txt_path.replace('label.txt','images/') + path 29 | self.imgs_path.append(path) 30 | else: 31 | line = line.split(' ') 32 | label = [float(x) for x in line] 33 | labels.append(label) 34 | 35 | self.words.append(labels) 36 | 37 | def __len__(self): 38 | return len(self.imgs_path) 39 | 40 | def __getitem__(self, index): 41 | img = cv2.imread(self.imgs_path[index]) 42 | height, width, _ = img.shape 43 | 44 | labels = self.words[index] 45 | annotations = np.zeros((0, 15)) 46 | if len(labels) == 0: 47 | return annotations 48 | for idx, label in enumerate(labels): 49 | annotation = np.zeros((1, 15)) 50 | # bbox 51 | annotation[0, 0] = label[0] # x1 52 | annotation[0, 1] = label[1] # y1 53 | annotation[0, 2] = label[0] + label[2] # x2 54 | annotation[0, 3] = label[1] + label[3] # y2 55 | 56 | # landmarks 57 | annotation[0, 4] = label[4] # l0_x 58 | annotation[0, 5] = label[5] # l0_y 59 | annotation[0, 6] = label[7] # l1_x 60 | annotation[0, 7] = label[8] # l1_y 61 | annotation[0, 8] = label[10] # l2_x 62 | annotation[0, 9] = label[11] # l2_y 63 | annotation[0, 10] = label[13] # l3_x 64 | annotation[0, 11] = label[14] # l3_y 65 | annotation[0, 12] = label[16] # l4_x 66 | annotation[0, 13] = label[17] # l4_y 67 | if (annotation[0, 4]<0): 68 | annotation[0, 14] = -1 69 | else: 70 | annotation[0, 14] = 1 71 | 72 | annotations = np.append(annotations, annotation, axis=0) 73 | target = np.array(annotations) 74 | if self.preproc is not None: 75 | img, target = self.preproc(img, target) 76 | 77 | return torch.from_numpy(img), target 78 | 79 | def detection_collate(batch): 80 | """Custom collate fn for dealing with batches of images that have a different 81 | number of associated object annotations (bounding boxes). 82 | 83 | Arguments: 84 | batch: (tuple) A tuple of tensor images and lists of annotations 85 | 86 | Return: 87 | A tuple containing: 88 | 1) (tensor) batch of images stacked on their 0 dim 89 | 2) (list of tensors) annotations for a given image are stacked on 0 dim 90 | """ 91 | targets = [] 92 | imgs = [] 93 | for _, sample in enumerate(batch): 94 | for _, tup in enumerate(sample): 95 | if torch.is_tensor(tup): 96 | imgs.append(tup) 97 | elif isinstance(tup, type(np.empty(0))): 98 | annos = torch.from_numpy(tup).float() 99 | targets.append(annos) 100 | 101 | return (torch.stack(imgs, 0), targets) 102 | -------------------------------------------------------------------------------- /data/widerface: -------------------------------------------------------------------------------- 1 | IntxLNK/opt/deeplearning/insightface/detection/retinaface/data/retinaface/ -------------------------------------------------------------------------------- /dataset.txt: -------------------------------------------------------------------------------- 1 | ./9.jpg 2 | -------------------------------------------------------------------------------- /dataset_retinaface.txt: -------------------------------------------------------------------------------- 1 | ./t1.jpg 2 | -------------------------------------------------------------------------------- /encoding.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from FaceRecognition import Facenet 4 | 5 | 6 | ''' 7 | 在更换facenet网络后一定要重新进行人脸编码,运行encoding.py。 8 | ''' 9 | retinaface = Facenet(1) 10 | 11 | list_dir = os.listdir("face_dataset") 12 | image_paths = [] 13 | names = [] 14 | for name in list_dir: 15 | image_paths.append("face_dataset/"+name) 16 | names.append(name.split("_")[0]) 17 | 18 | retinaface.encode_face_dataset(image_paths,names) 19 | -------------------------------------------------------------------------------- /face.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/face.caffemodel -------------------------------------------------------------------------------- /face.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "input0" 3 | type: "Input" 4 | top: "input0" 5 | input_param { 6 | shape { 7 | dim: 1 8 | dim: 3 9 | dim: 640 10 | dim: 640 11 | } 12 | } 13 | } 14 | layer { 15 | name: "Conv_13" 16 | type: "Convolution" 17 | bottom: "input0" 18 | top: "input.4" 19 | convolution_param { 20 | num_output: 8 21 | bias_term: true 22 | group: 1 23 | pad_h: 1 24 | pad_w: 1 25 | kernel_h: 3 26 | kernel_w: 3 27 | stride_h: 2 28 | stride_w: 2 29 | dilation: 1 30 | } 31 | } 32 | layer { 33 | name: "LeakyRelu_14" 34 | type: "ReLU" 35 | bottom: "input.4" 36 | top: "onnx::Conv_303" 37 | relu_param { 38 | negative_slope: 0.10000000149011612 39 | } 40 | } 41 | layer { 42 | name: "Conv_15" 43 | type: "Convolution" 44 | bottom: "onnx::Conv_303" 45 | top: "input.12" 46 | convolution_param { 47 | num_output: 8 48 | bias_term: true 49 | group: 8 50 | pad_h: 1 51 | pad_w: 1 52 | kernel_h: 3 53 | kernel_w: 3 54 | stride_h: 1 55 | stride_w: 1 56 | dilation: 1 57 | } 58 | } 59 | layer { 60 | name: "LeakyRelu_16" 61 | type: "ReLU" 62 | bottom: "input.12" 63 | top: "onnx::Conv_306" 64 | relu_param { 65 | negative_slope: 0.10000000149011612 66 | } 67 | } 68 | layer { 69 | name: "Conv_17" 70 | type: "Convolution" 71 | bottom: "onnx::Conv_306" 72 | top: "input.20" 73 | convolution_param { 74 | num_output: 16 75 | bias_term: true 76 | group: 1 77 | pad_h: 0 78 | pad_w: 0 79 | kernel_h: 1 80 | kernel_w: 1 81 | stride_h: 1 82 | stride_w: 1 83 | dilation: 1 84 | } 85 | } 86 | layer { 87 | name: "LeakyRelu_18" 88 | type: "ReLU" 89 | bottom: "input.20" 90 | top: "onnx::Conv_309" 91 | relu_param { 92 | negative_slope: 0.10000000149011612 93 | } 94 | } 95 | layer { 96 | name: "Conv_19" 97 | type: "Convolution" 98 | bottom: "onnx::Conv_309" 99 | top: "input.28" 100 | convolution_param { 101 | num_output: 16 102 | bias_term: true 103 | group: 16 104 | pad_h: 1 105 | pad_w: 1 106 | kernel_h: 3 107 | kernel_w: 3 108 | stride_h: 2 109 | stride_w: 2 110 | dilation: 1 111 | } 112 | } 113 | layer { 114 | name: "LeakyRelu_20" 115 | type: "ReLU" 116 | bottom: "input.28" 117 | top: "onnx::Conv_312" 118 | relu_param { 119 | negative_slope: 0.10000000149011612 120 | } 121 | } 122 | layer { 123 | name: "Conv_21" 124 | type: "Convolution" 125 | bottom: "onnx::Conv_312" 126 | top: "input.36" 127 | convolution_param { 128 | num_output: 32 129 | bias_term: true 130 | group: 1 131 | pad_h: 0 132 | pad_w: 0 133 | kernel_h: 1 134 | kernel_w: 1 135 | stride_h: 1 136 | stride_w: 1 137 | dilation: 1 138 | } 139 | } 140 | layer { 141 | name: "LeakyRelu_22" 142 | type: "ReLU" 143 | bottom: "input.36" 144 | top: "onnx::Conv_315" 145 | relu_param { 146 | negative_slope: 0.10000000149011612 147 | } 148 | } 149 | layer { 150 | name: "Conv_23" 151 | type: "Convolution" 152 | bottom: "onnx::Conv_315" 153 | top: "input.44" 154 | convolution_param { 155 | num_output: 32 156 | bias_term: true 157 | group: 32 158 | pad_h: 1 159 | pad_w: 1 160 | kernel_h: 3 161 | kernel_w: 3 162 | stride_h: 1 163 | stride_w: 1 164 | dilation: 1 165 | } 166 | } 167 | layer { 168 | name: "LeakyRelu_24" 169 | type: "ReLU" 170 | bottom: "input.44" 171 | top: "onnx::Conv_318" 172 | relu_param { 173 | negative_slope: 0.10000000149011612 174 | } 175 | } 176 | layer { 177 | name: "Conv_25" 178 | type: "Convolution" 179 | bottom: "onnx::Conv_318" 180 | top: "input.52" 181 | convolution_param { 182 | num_output: 32 183 | bias_term: true 184 | group: 1 185 | pad_h: 0 186 | pad_w: 0 187 | kernel_h: 1 188 | kernel_w: 1 189 | stride_h: 1 190 | stride_w: 1 191 | dilation: 1 192 | } 193 | } 194 | layer { 195 | name: "LeakyRelu_26" 196 | type: "ReLU" 197 | bottom: "input.52" 198 | top: "onnx::Conv_321" 199 | relu_param { 200 | negative_slope: 0.10000000149011612 201 | } 202 | } 203 | layer { 204 | name: "Conv_27" 205 | type: "Convolution" 206 | bottom: "onnx::Conv_321" 207 | top: "input.60" 208 | convolution_param { 209 | num_output: 32 210 | bias_term: true 211 | group: 32 212 | pad_h: 1 213 | pad_w: 1 214 | kernel_h: 3 215 | kernel_w: 3 216 | stride_h: 2 217 | stride_w: 2 218 | dilation: 1 219 | } 220 | } 221 | layer { 222 | name: "LeakyRelu_28" 223 | type: "ReLU" 224 | bottom: "input.60" 225 | top: "onnx::Conv_324" 226 | relu_param { 227 | negative_slope: 0.10000000149011612 228 | } 229 | } 230 | layer { 231 | name: "Conv_29" 232 | type: "Convolution" 233 | bottom: "onnx::Conv_324" 234 | top: "input.68" 235 | convolution_param { 236 | num_output: 64 237 | bias_term: true 238 | group: 1 239 | pad_h: 0 240 | pad_w: 0 241 | kernel_h: 1 242 | kernel_w: 1 243 | stride_h: 1 244 | stride_w: 1 245 | dilation: 1 246 | } 247 | } 248 | layer { 249 | name: "LeakyRelu_30" 250 | type: "ReLU" 251 | bottom: "input.68" 252 | top: "onnx::Conv_327" 253 | relu_param { 254 | negative_slope: 0.10000000149011612 255 | } 256 | } 257 | layer { 258 | name: "Conv_31" 259 | type: "Convolution" 260 | bottom: "onnx::Conv_327" 261 | top: "input.76" 262 | convolution_param { 263 | num_output: 64 264 | bias_term: true 265 | group: 64 266 | pad_h: 1 267 | pad_w: 1 268 | kernel_h: 3 269 | kernel_w: 3 270 | stride_h: 1 271 | stride_w: 1 272 | dilation: 1 273 | } 274 | } 275 | layer { 276 | name: "LeakyRelu_32" 277 | type: "ReLU" 278 | bottom: "input.76" 279 | top: "onnx::Conv_330" 280 | relu_param { 281 | negative_slope: 0.10000000149011612 282 | } 283 | } 284 | layer { 285 | name: "Conv_33" 286 | type: "Convolution" 287 | bottom: "onnx::Conv_330" 288 | top: "input.84" 289 | convolution_param { 290 | num_output: 64 291 | bias_term: true 292 | group: 1 293 | pad_h: 0 294 | pad_w: 0 295 | kernel_h: 1 296 | kernel_w: 1 297 | stride_h: 1 298 | stride_w: 1 299 | dilation: 1 300 | } 301 | } 302 | layer { 303 | name: "LeakyRelu_34" 304 | type: "ReLU" 305 | bottom: "input.84" 306 | top: "onnx::Conv_333" 307 | relu_param { 308 | negative_slope: 0.10000000149011612 309 | } 310 | } 311 | layer { 312 | name: "Conv_35" 313 | type: "Convolution" 314 | bottom: "onnx::Conv_333" 315 | top: "input.92" 316 | convolution_param { 317 | num_output: 64 318 | bias_term: true 319 | group: 64 320 | pad_h: 1 321 | pad_w: 1 322 | kernel_h: 3 323 | kernel_w: 3 324 | stride_h: 2 325 | stride_w: 2 326 | dilation: 1 327 | } 328 | } 329 | layer { 330 | name: "LeakyRelu_36" 331 | type: "ReLU" 332 | bottom: "input.92" 333 | top: "onnx::Conv_336" 334 | relu_param { 335 | negative_slope: 0.10000000149011612 336 | } 337 | } 338 | layer { 339 | name: "Conv_37" 340 | type: "Convolution" 341 | bottom: "onnx::Conv_336" 342 | top: "input.100" 343 | convolution_param { 344 | num_output: 128 345 | bias_term: true 346 | group: 1 347 | pad_h: 0 348 | pad_w: 0 349 | kernel_h: 1 350 | kernel_w: 1 351 | stride_h: 1 352 | stride_w: 1 353 | dilation: 1 354 | } 355 | } 356 | layer { 357 | name: "LeakyRelu_38" 358 | type: "ReLU" 359 | bottom: "input.100" 360 | top: "onnx::Conv_339" 361 | relu_param { 362 | negative_slope: 0.10000000149011612 363 | } 364 | } 365 | layer { 366 | name: "Conv_39" 367 | type: "Convolution" 368 | bottom: "onnx::Conv_339" 369 | top: "input.108" 370 | convolution_param { 371 | num_output: 128 372 | bias_term: true 373 | group: 128 374 | pad_h: 1 375 | pad_w: 1 376 | kernel_h: 3 377 | kernel_w: 3 378 | stride_h: 1 379 | stride_w: 1 380 | dilation: 1 381 | } 382 | } 383 | layer { 384 | name: "LeakyRelu_40" 385 | type: "ReLU" 386 | bottom: "input.108" 387 | top: "onnx::Conv_342" 388 | relu_param { 389 | negative_slope: 0.10000000149011612 390 | } 391 | } 392 | layer { 393 | name: "Conv_41" 394 | type: "Convolution" 395 | bottom: "onnx::Conv_342" 396 | top: "input.116" 397 | convolution_param { 398 | num_output: 128 399 | bias_term: true 400 | group: 1 401 | pad_h: 0 402 | pad_w: 0 403 | kernel_h: 1 404 | kernel_w: 1 405 | stride_h: 1 406 | stride_w: 1 407 | dilation: 1 408 | } 409 | } 410 | layer { 411 | name: "LeakyRelu_42" 412 | type: "ReLU" 413 | bottom: "input.116" 414 | top: "onnx::Conv_345" 415 | relu_param { 416 | negative_slope: 0.10000000149011612 417 | } 418 | } 419 | layer { 420 | name: "Conv_43" 421 | type: "Convolution" 422 | bottom: "onnx::Conv_345" 423 | top: "input.124" 424 | convolution_param { 425 | num_output: 128 426 | bias_term: true 427 | group: 128 428 | pad_h: 1 429 | pad_w: 1 430 | kernel_h: 3 431 | kernel_w: 3 432 | stride_h: 1 433 | stride_w: 1 434 | dilation: 1 435 | } 436 | } 437 | layer { 438 | name: "LeakyRelu_44" 439 | type: "ReLU" 440 | bottom: "input.124" 441 | top: "onnx::Conv_348" 442 | relu_param { 443 | negative_slope: 0.10000000149011612 444 | } 445 | } 446 | layer { 447 | name: "Conv_45" 448 | type: "Convolution" 449 | bottom: "onnx::Conv_348" 450 | top: "input.132" 451 | convolution_param { 452 | num_output: 128 453 | bias_term: true 454 | group: 1 455 | pad_h: 0 456 | pad_w: 0 457 | kernel_h: 1 458 | kernel_w: 1 459 | stride_h: 1 460 | stride_w: 1 461 | dilation: 1 462 | } 463 | } 464 | layer { 465 | name: "LeakyRelu_46" 466 | type: "ReLU" 467 | bottom: "input.132" 468 | top: "onnx::Conv_351" 469 | relu_param { 470 | negative_slope: 0.10000000149011612 471 | } 472 | } 473 | layer { 474 | name: "Conv_47" 475 | type: "Convolution" 476 | bottom: "onnx::Conv_351" 477 | top: "input.140" 478 | convolution_param { 479 | num_output: 128 480 | bias_term: true 481 | group: 128 482 | pad_h: 1 483 | pad_w: 1 484 | kernel_h: 3 485 | kernel_w: 3 486 | stride_h: 1 487 | stride_w: 1 488 | dilation: 1 489 | } 490 | } 491 | layer { 492 | name: "LeakyRelu_48" 493 | type: "ReLU" 494 | bottom: "input.140" 495 | top: "onnx::Conv_354" 496 | relu_param { 497 | negative_slope: 0.10000000149011612 498 | } 499 | } 500 | layer { 501 | name: "Conv_49" 502 | type: "Convolution" 503 | bottom: "onnx::Conv_354" 504 | top: "input.148" 505 | convolution_param { 506 | num_output: 128 507 | bias_term: true 508 | group: 1 509 | pad_h: 0 510 | pad_w: 0 511 | kernel_h: 1 512 | kernel_w: 1 513 | stride_h: 1 514 | stride_w: 1 515 | dilation: 1 516 | } 517 | } 518 | layer { 519 | name: "LeakyRelu_50" 520 | type: "ReLU" 521 | bottom: "input.148" 522 | top: "onnx::Conv_357" 523 | relu_param { 524 | negative_slope: 0.10000000149011612 525 | } 526 | } 527 | layer { 528 | name: "Conv_51" 529 | type: "Convolution" 530 | bottom: "onnx::Conv_357" 531 | top: "input.156" 532 | convolution_param { 533 | num_output: 128 534 | bias_term: true 535 | group: 128 536 | pad_h: 1 537 | pad_w: 1 538 | kernel_h: 3 539 | kernel_w: 3 540 | stride_h: 1 541 | stride_w: 1 542 | dilation: 1 543 | } 544 | } 545 | layer { 546 | name: "LeakyRelu_52" 547 | type: "ReLU" 548 | bottom: "input.156" 549 | top: "onnx::Conv_360" 550 | relu_param { 551 | negative_slope: 0.10000000149011612 552 | } 553 | } 554 | layer { 555 | name: "Conv_53" 556 | type: "Convolution" 557 | bottom: "onnx::Conv_360" 558 | top: "input.164" 559 | convolution_param { 560 | num_output: 128 561 | bias_term: true 562 | group: 1 563 | pad_h: 0 564 | pad_w: 0 565 | kernel_h: 1 566 | kernel_w: 1 567 | stride_h: 1 568 | stride_w: 1 569 | dilation: 1 570 | } 571 | } 572 | layer { 573 | name: "LeakyRelu_54" 574 | type: "ReLU" 575 | bottom: "input.164" 576 | top: "onnx::Conv_363" 577 | relu_param { 578 | negative_slope: 0.10000000149011612 579 | } 580 | } 581 | layer { 582 | name: "Conv_55" 583 | type: "Convolution" 584 | bottom: "onnx::Conv_363" 585 | top: "input.172" 586 | convolution_param { 587 | num_output: 128 588 | bias_term: true 589 | group: 128 590 | pad_h: 1 591 | pad_w: 1 592 | kernel_h: 3 593 | kernel_w: 3 594 | stride_h: 1 595 | stride_w: 1 596 | dilation: 1 597 | } 598 | } 599 | layer { 600 | name: "LeakyRelu_56" 601 | type: "ReLU" 602 | bottom: "input.172" 603 | top: "onnx::Conv_366" 604 | relu_param { 605 | negative_slope: 0.10000000149011612 606 | } 607 | } 608 | layer { 609 | name: "Conv_57" 610 | type: "Convolution" 611 | bottom: "onnx::Conv_366" 612 | top: "input.180" 613 | convolution_param { 614 | num_output: 128 615 | bias_term: true 616 | group: 1 617 | pad_h: 0 618 | pad_w: 0 619 | kernel_h: 1 620 | kernel_w: 1 621 | stride_h: 1 622 | stride_w: 1 623 | dilation: 1 624 | } 625 | } 626 | layer { 627 | name: "LeakyRelu_58" 628 | type: "ReLU" 629 | bottom: "input.180" 630 | top: "onnx::Conv_369" 631 | relu_param { 632 | negative_slope: 0.10000000149011612 633 | } 634 | } 635 | layer { 636 | name: "Conv_59" 637 | type: "Convolution" 638 | bottom: "onnx::Conv_369" 639 | top: "input.188" 640 | convolution_param { 641 | num_output: 128 642 | bias_term: true 643 | group: 128 644 | pad_h: 1 645 | pad_w: 1 646 | kernel_h: 3 647 | kernel_w: 3 648 | stride_h: 2 649 | stride_w: 2 650 | dilation: 1 651 | } 652 | } 653 | layer { 654 | name: "LeakyRelu_60" 655 | type: "ReLU" 656 | bottom: "input.188" 657 | top: "onnx::Conv_372" 658 | relu_param { 659 | negative_slope: 0.10000000149011612 660 | } 661 | } 662 | layer { 663 | name: "Conv_61" 664 | type: "Convolution" 665 | bottom: "onnx::Conv_372" 666 | top: "input.196" 667 | convolution_param { 668 | num_output: 256 669 | bias_term: true 670 | group: 1 671 | pad_h: 0 672 | pad_w: 0 673 | kernel_h: 1 674 | kernel_w: 1 675 | stride_h: 1 676 | stride_w: 1 677 | dilation: 1 678 | } 679 | } 680 | layer { 681 | name: "LeakyRelu_62" 682 | type: "ReLU" 683 | bottom: "input.196" 684 | top: "onnx::Conv_375" 685 | relu_param { 686 | negative_slope: 0.10000000149011612 687 | } 688 | } 689 | layer { 690 | name: "Conv_63" 691 | type: "Convolution" 692 | bottom: "onnx::Conv_375" 693 | top: "input.204" 694 | convolution_param { 695 | num_output: 256 696 | bias_term: true 697 | group: 256 698 | pad_h: 1 699 | pad_w: 1 700 | kernel_h: 3 701 | kernel_w: 3 702 | stride_h: 1 703 | stride_w: 1 704 | dilation: 1 705 | } 706 | } 707 | layer { 708 | name: "LeakyRelu_64" 709 | type: "ReLU" 710 | bottom: "input.204" 711 | top: "onnx::Conv_378" 712 | relu_param { 713 | negative_slope: 0.10000000149011612 714 | } 715 | } 716 | layer { 717 | name: "Conv_65" 718 | type: "Convolution" 719 | bottom: "onnx::Conv_378" 720 | top: "input.212" 721 | convolution_param { 722 | num_output: 256 723 | bias_term: true 724 | group: 1 725 | pad_h: 0 726 | pad_w: 0 727 | kernel_h: 1 728 | kernel_w: 1 729 | stride_h: 1 730 | stride_w: 1 731 | dilation: 1 732 | } 733 | } 734 | layer { 735 | name: "LeakyRelu_66" 736 | type: "ReLU" 737 | bottom: "input.212" 738 | top: "onnx::Conv_381" 739 | relu_param { 740 | negative_slope: 0.10000000149011612 741 | } 742 | } 743 | layer { 744 | name: "Conv_67" 745 | type: "Convolution" 746 | bottom: "onnx::Conv_333" 747 | top: "input.220" 748 | convolution_param { 749 | num_output: 64 750 | bias_term: true 751 | group: 1 752 | pad_h: 0 753 | pad_w: 0 754 | kernel_h: 1 755 | kernel_w: 1 756 | stride_h: 1 757 | stride_w: 1 758 | dilation: 1 759 | } 760 | } 761 | layer { 762 | name: "LeakyRelu_68" 763 | type: "ReLU" 764 | bottom: "input.220" 765 | top: "onnx::Add_384" 766 | relu_param { 767 | negative_slope: 0.10000000149011612 768 | } 769 | } 770 | layer { 771 | name: "Conv_69" 772 | type: "Convolution" 773 | bottom: "onnx::Conv_369" 774 | top: "input.228" 775 | convolution_param { 776 | num_output: 64 777 | bias_term: true 778 | group: 1 779 | pad_h: 0 780 | pad_w: 0 781 | kernel_h: 1 782 | kernel_w: 1 783 | stride_h: 1 784 | stride_w: 1 785 | dilation: 1 786 | } 787 | } 788 | layer { 789 | name: "LeakyRelu_70" 790 | type: "ReLU" 791 | bottom: "input.228" 792 | top: "onnx::Add_387" 793 | relu_param { 794 | negative_slope: 0.10000000149011612 795 | } 796 | } 797 | layer { 798 | name: "Conv_71" 799 | type: "Convolution" 800 | bottom: "onnx::Conv_381" 801 | top: "input.236" 802 | convolution_param { 803 | num_output: 64 804 | bias_term: true 805 | group: 1 806 | pad_h: 0 807 | pad_w: 0 808 | kernel_h: 1 809 | kernel_w: 1 810 | stride_h: 1 811 | stride_w: 1 812 | dilation: 1 813 | } 814 | } 815 | layer { 816 | name: "LeakyRelu_72" 817 | type: "ReLU" 818 | bottom: "input.236" 819 | top: "onnx::Shape_390" 820 | relu_param { 821 | negative_slope: 0.10000000149011612 822 | } 823 | } 824 | layer { 825 | name: "Upsample_79" 826 | type: "Deconvolution" 827 | bottom: "onnx::Shape_390" 828 | top: "onnx::Add_403" 829 | param { 830 | lr_mult: 0.0 831 | decay_mult: 0.0 832 | } 833 | convolution_param { 834 | num_output: 64 835 | bias_term: false 836 | group: 64 837 | weight_filler { 838 | type: "bilinear" 839 | } 840 | pad_h: 1 841 | pad_w: 1 842 | kernel_h: 4 843 | kernel_w: 4 844 | stride_h: 2 845 | stride_w: 2 846 | } 847 | } 848 | layer { 849 | name: "Add_80" 850 | type: "Eltwise" 851 | bottom: "onnx::Add_387" 852 | bottom: "onnx::Add_403" 853 | top: "input.240" 854 | eltwise_param { 855 | operation: SUM 856 | } 857 | } 858 | layer { 859 | name: "Conv_81" 860 | type: "Convolution" 861 | bottom: "input.240" 862 | top: "input.248" 863 | convolution_param { 864 | num_output: 64 865 | bias_term: true 866 | group: 1 867 | pad_h: 1 868 | pad_w: 1 869 | kernel_h: 3 870 | kernel_w: 3 871 | stride_h: 1 872 | stride_w: 1 873 | dilation: 1 874 | } 875 | } 876 | layer { 877 | name: "LeakyRelu_82" 878 | type: "ReLU" 879 | bottom: "input.248" 880 | top: "onnx::Shape_407" 881 | relu_param { 882 | negative_slope: 0.10000000149011612 883 | } 884 | } 885 | layer { 886 | name: "Upsample_89" 887 | type: "Deconvolution" 888 | bottom: "onnx::Shape_407" 889 | top: "onnx::Add_420" 890 | param { 891 | lr_mult: 0.0 892 | decay_mult: 0.0 893 | } 894 | convolution_param { 895 | num_output: 64 896 | bias_term: false 897 | group: 64 898 | weight_filler { 899 | type: "bilinear" 900 | } 901 | pad_h: 1 902 | pad_w: 1 903 | kernel_h: 4 904 | kernel_w: 4 905 | stride_h: 2 906 | stride_w: 2 907 | } 908 | } 909 | layer { 910 | name: "Add_90" 911 | type: "Eltwise" 912 | bottom: "onnx::Add_384" 913 | bottom: "onnx::Add_420" 914 | top: "input.252" 915 | eltwise_param { 916 | operation: SUM 917 | } 918 | } 919 | layer { 920 | name: "Conv_91" 921 | type: "Convolution" 922 | bottom: "input.252" 923 | top: "input.260" 924 | convolution_param { 925 | num_output: 64 926 | bias_term: true 927 | group: 1 928 | pad_h: 1 929 | pad_w: 1 930 | kernel_h: 3 931 | kernel_w: 3 932 | stride_h: 1 933 | stride_w: 1 934 | dilation: 1 935 | } 936 | } 937 | layer { 938 | name: "LeakyRelu_92" 939 | type: "ReLU" 940 | bottom: "input.260" 941 | top: "onnx::Conv_424" 942 | relu_param { 943 | negative_slope: 0.10000000149011612 944 | } 945 | } 946 | layer { 947 | name: "Conv_93" 948 | type: "Convolution" 949 | bottom: "onnx::Conv_424" 950 | top: "onnx::Concat_687" 951 | convolution_param { 952 | num_output: 32 953 | bias_term: true 954 | group: 1 955 | pad_h: 1 956 | pad_w: 1 957 | kernel_h: 3 958 | kernel_w: 3 959 | stride_h: 1 960 | stride_w: 1 961 | dilation: 1 962 | } 963 | } 964 | layer { 965 | name: "Conv_94" 966 | type: "Convolution" 967 | bottom: "onnx::Conv_424" 968 | top: "input.272" 969 | convolution_param { 970 | num_output: 16 971 | bias_term: true 972 | group: 1 973 | pad_h: 1 974 | pad_w: 1 975 | kernel_h: 3 976 | kernel_w: 3 977 | stride_h: 1 978 | stride_w: 1 979 | dilation: 1 980 | } 981 | } 982 | layer { 983 | name: "LeakyRelu_95" 984 | type: "ReLU" 985 | bottom: "input.272" 986 | top: "onnx::Conv_429" 987 | relu_param { 988 | negative_slope: 0.10000000149011612 989 | } 990 | } 991 | layer { 992 | name: "Conv_96" 993 | type: "Convolution" 994 | bottom: "onnx::Conv_429" 995 | top: "onnx::Concat_693" 996 | convolution_param { 997 | num_output: 16 998 | bias_term: true 999 | group: 1 1000 | pad_h: 1 1001 | pad_w: 1 1002 | kernel_h: 3 1003 | kernel_w: 3 1004 | stride_h: 1 1005 | stride_w: 1 1006 | dilation: 1 1007 | } 1008 | } 1009 | layer { 1010 | name: "Conv_97" 1011 | type: "Convolution" 1012 | bottom: "onnx::Conv_429" 1013 | top: "input.284" 1014 | convolution_param { 1015 | num_output: 16 1016 | bias_term: true 1017 | group: 1 1018 | pad_h: 1 1019 | pad_w: 1 1020 | kernel_h: 3 1021 | kernel_w: 3 1022 | stride_h: 1 1023 | stride_w: 1 1024 | dilation: 1 1025 | } 1026 | } 1027 | layer { 1028 | name: "LeakyRelu_98" 1029 | type: "ReLU" 1030 | bottom: "input.284" 1031 | top: "onnx::Conv_434" 1032 | relu_param { 1033 | negative_slope: 0.10000000149011612 1034 | } 1035 | } 1036 | layer { 1037 | name: "Conv_99" 1038 | type: "Convolution" 1039 | bottom: "onnx::Conv_434" 1040 | top: "onnx::Concat_699" 1041 | convolution_param { 1042 | num_output: 16 1043 | bias_term: true 1044 | group: 1 1045 | pad_h: 1 1046 | pad_w: 1 1047 | kernel_h: 3 1048 | kernel_w: 3 1049 | stride_h: 1 1050 | stride_w: 1 1051 | dilation: 1 1052 | } 1053 | } 1054 | layer { 1055 | name: "Concat_100" 1056 | type: "Concat" 1057 | bottom: "onnx::Concat_687" 1058 | bottom: "onnx::Concat_693" 1059 | bottom: "onnx::Concat_699" 1060 | top: "out" 1061 | concat_param { 1062 | axis: 1 1063 | } 1064 | } 1065 | layer { 1066 | name: "Relu_101" 1067 | type: "ReLU" 1068 | bottom: "out" 1069 | top: "input.292" 1070 | } 1071 | layer { 1072 | name: "Conv_102" 1073 | type: "Convolution" 1074 | bottom: "onnx::Shape_407" 1075 | top: "onnx::Concat_702" 1076 | convolution_param { 1077 | num_output: 32 1078 | bias_term: true 1079 | group: 1 1080 | pad_h: 1 1081 | pad_w: 1 1082 | kernel_h: 3 1083 | kernel_w: 3 1084 | stride_h: 1 1085 | stride_w: 1 1086 | dilation: 1 1087 | } 1088 | } 1089 | layer { 1090 | name: "Conv_103" 1091 | type: "Convolution" 1092 | bottom: "onnx::Shape_407" 1093 | top: "input.304" 1094 | convolution_param { 1095 | num_output: 16 1096 | bias_term: true 1097 | group: 1 1098 | pad_h: 1 1099 | pad_w: 1 1100 | kernel_h: 3 1101 | kernel_w: 3 1102 | stride_h: 1 1103 | stride_w: 1 1104 | dilation: 1 1105 | } 1106 | } 1107 | layer { 1108 | name: "LeakyRelu_104" 1109 | type: "ReLU" 1110 | bottom: "input.304" 1111 | top: "onnx::Conv_443" 1112 | relu_param { 1113 | negative_slope: 0.10000000149011612 1114 | } 1115 | } 1116 | layer { 1117 | name: "Conv_105" 1118 | type: "Convolution" 1119 | bottom: "onnx::Conv_443" 1120 | top: "onnx::Concat_708" 1121 | convolution_param { 1122 | num_output: 16 1123 | bias_term: true 1124 | group: 1 1125 | pad_h: 1 1126 | pad_w: 1 1127 | kernel_h: 3 1128 | kernel_w: 3 1129 | stride_h: 1 1130 | stride_w: 1 1131 | dilation: 1 1132 | } 1133 | } 1134 | layer { 1135 | name: "Conv_106" 1136 | type: "Convolution" 1137 | bottom: "onnx::Conv_443" 1138 | top: "input.316" 1139 | convolution_param { 1140 | num_output: 16 1141 | bias_term: true 1142 | group: 1 1143 | pad_h: 1 1144 | pad_w: 1 1145 | kernel_h: 3 1146 | kernel_w: 3 1147 | stride_h: 1 1148 | stride_w: 1 1149 | dilation: 1 1150 | } 1151 | } 1152 | layer { 1153 | name: "LeakyRelu_107" 1154 | type: "ReLU" 1155 | bottom: "input.316" 1156 | top: "onnx::Conv_448" 1157 | relu_param { 1158 | negative_slope: 0.10000000149011612 1159 | } 1160 | } 1161 | layer { 1162 | name: "Conv_108" 1163 | type: "Convolution" 1164 | bottom: "onnx::Conv_448" 1165 | top: "onnx::Concat_714" 1166 | convolution_param { 1167 | num_output: 16 1168 | bias_term: true 1169 | group: 1 1170 | pad_h: 1 1171 | pad_w: 1 1172 | kernel_h: 3 1173 | kernel_w: 3 1174 | stride_h: 1 1175 | stride_w: 1 1176 | dilation: 1 1177 | } 1178 | } 1179 | layer { 1180 | name: "Concat_109" 1181 | type: "Concat" 1182 | bottom: "onnx::Concat_702" 1183 | bottom: "onnx::Concat_708" 1184 | bottom: "onnx::Concat_714" 1185 | top: "out.3" 1186 | concat_param { 1187 | axis: 1 1188 | } 1189 | } 1190 | layer { 1191 | name: "Relu_110" 1192 | type: "ReLU" 1193 | bottom: "out.3" 1194 | top: "input.324" 1195 | } 1196 | layer { 1197 | name: "Conv_111" 1198 | type: "Convolution" 1199 | bottom: "onnx::Shape_390" 1200 | top: "onnx::Concat_717" 1201 | convolution_param { 1202 | num_output: 32 1203 | bias_term: true 1204 | group: 1 1205 | pad_h: 1 1206 | pad_w: 1 1207 | kernel_h: 3 1208 | kernel_w: 3 1209 | stride_h: 1 1210 | stride_w: 1 1211 | dilation: 1 1212 | } 1213 | } 1214 | layer { 1215 | name: "Conv_112" 1216 | type: "Convolution" 1217 | bottom: "onnx::Shape_390" 1218 | top: "input.336" 1219 | convolution_param { 1220 | num_output: 16 1221 | bias_term: true 1222 | group: 1 1223 | pad_h: 1 1224 | pad_w: 1 1225 | kernel_h: 3 1226 | kernel_w: 3 1227 | stride_h: 1 1228 | stride_w: 1 1229 | dilation: 1 1230 | } 1231 | } 1232 | layer { 1233 | name: "LeakyRelu_113" 1234 | type: "ReLU" 1235 | bottom: "input.336" 1236 | top: "onnx::Conv_457" 1237 | relu_param { 1238 | negative_slope: 0.10000000149011612 1239 | } 1240 | } 1241 | layer { 1242 | name: "Conv_114" 1243 | type: "Convolution" 1244 | bottom: "onnx::Conv_457" 1245 | top: "onnx::Concat_723" 1246 | convolution_param { 1247 | num_output: 16 1248 | bias_term: true 1249 | group: 1 1250 | pad_h: 1 1251 | pad_w: 1 1252 | kernel_h: 3 1253 | kernel_w: 3 1254 | stride_h: 1 1255 | stride_w: 1 1256 | dilation: 1 1257 | } 1258 | } 1259 | layer { 1260 | name: "Conv_115" 1261 | type: "Convolution" 1262 | bottom: "onnx::Conv_457" 1263 | top: "input.348" 1264 | convolution_param { 1265 | num_output: 16 1266 | bias_term: true 1267 | group: 1 1268 | pad_h: 1 1269 | pad_w: 1 1270 | kernel_h: 3 1271 | kernel_w: 3 1272 | stride_h: 1 1273 | stride_w: 1 1274 | dilation: 1 1275 | } 1276 | } 1277 | layer { 1278 | name: "LeakyRelu_116" 1279 | type: "ReLU" 1280 | bottom: "input.348" 1281 | top: "onnx::Conv_462" 1282 | relu_param { 1283 | negative_slope: 0.10000000149011612 1284 | } 1285 | } 1286 | layer { 1287 | name: "Conv_117" 1288 | type: "Convolution" 1289 | bottom: "onnx::Conv_462" 1290 | top: "onnx::Concat_729" 1291 | convolution_param { 1292 | num_output: 16 1293 | bias_term: true 1294 | group: 1 1295 | pad_h: 1 1296 | pad_w: 1 1297 | kernel_h: 3 1298 | kernel_w: 3 1299 | stride_h: 1 1300 | stride_w: 1 1301 | dilation: 1 1302 | } 1303 | } 1304 | layer { 1305 | name: "Concat_118" 1306 | type: "Concat" 1307 | bottom: "onnx::Concat_717" 1308 | bottom: "onnx::Concat_723" 1309 | bottom: "onnx::Concat_729" 1310 | top: "out.7" 1311 | concat_param { 1312 | axis: 1 1313 | } 1314 | } 1315 | layer { 1316 | name: "Relu_119" 1317 | type: "ReLU" 1318 | bottom: "out.7" 1319 | top: "input.356" 1320 | } 1321 | layer { 1322 | name: "Conv_120" 1323 | type: "Convolution" 1324 | bottom: "input.292" 1325 | top: "onnx::Transpose_467" 1326 | convolution_param { 1327 | num_output: 8 1328 | bias_term: true 1329 | group: 1 1330 | pad_h: 0 1331 | pad_w: 0 1332 | kernel_h: 1 1333 | kernel_w: 1 1334 | stride_h: 1 1335 | stride_w: 1 1336 | dilation: 1 1337 | } 1338 | } 1339 | layer { 1340 | name: "Transpose_121" 1341 | type: "Permute" 1342 | bottom: "onnx::Transpose_467" 1343 | top: "onnx::Shape_468" 1344 | permute_param { 1345 | order: 0 1346 | order: 2 1347 | order: 3 1348 | order: 1 1349 | } 1350 | } 1351 | layer { 1352 | name: "Reshape_127" 1353 | type: "Reshape" 1354 | bottom: "onnx::Shape_468" 1355 | top: "onnx::Concat_480" 1356 | reshape_param { 1357 | shape { 1358 | dim: 0 1359 | dim: 1 1360 | dim: -1 1361 | dim: 4 1362 | } 1363 | } 1364 | } 1365 | layer { 1366 | name: "Conv_128" 1367 | type: "Convolution" 1368 | bottom: "input.324" 1369 | top: "onnx::Transpose_481" 1370 | convolution_param { 1371 | num_output: 8 1372 | bias_term: true 1373 | group: 1 1374 | pad_h: 0 1375 | pad_w: 0 1376 | kernel_h: 1 1377 | kernel_w: 1 1378 | stride_h: 1 1379 | stride_w: 1 1380 | dilation: 1 1381 | } 1382 | } 1383 | layer { 1384 | name: "Transpose_129" 1385 | type: "Permute" 1386 | bottom: "onnx::Transpose_481" 1387 | top: "onnx::Shape_482" 1388 | permute_param { 1389 | order: 0 1390 | order: 2 1391 | order: 3 1392 | order: 1 1393 | } 1394 | } 1395 | layer { 1396 | name: "Reshape_135" 1397 | type: "Reshape" 1398 | bottom: "onnx::Shape_482" 1399 | top: "onnx::Concat_494" 1400 | reshape_param { 1401 | shape { 1402 | dim: 0 1403 | dim: 1 1404 | dim: -1 1405 | dim: 4 1406 | } 1407 | } 1408 | } 1409 | layer { 1410 | name: "Conv_136" 1411 | type: "Convolution" 1412 | bottom: "input.356" 1413 | top: "onnx::Transpose_495" 1414 | convolution_param { 1415 | num_output: 8 1416 | bias_term: true 1417 | group: 1 1418 | pad_h: 0 1419 | pad_w: 0 1420 | kernel_h: 1 1421 | kernel_w: 1 1422 | stride_h: 1 1423 | stride_w: 1 1424 | dilation: 1 1425 | } 1426 | } 1427 | layer { 1428 | name: "Transpose_137" 1429 | type: "Permute" 1430 | bottom: "onnx::Transpose_495" 1431 | top: "onnx::Reshape_496" 1432 | permute_param { 1433 | order: 0 1434 | order: 2 1435 | order: 3 1436 | order: 1 1437 | } 1438 | } 1439 | layer { 1440 | name: "Reshape_138" 1441 | type: "Reshape" 1442 | bottom: "onnx::Reshape_496" 1443 | top: "onnx::Concat_506" 1444 | reshape_param { 1445 | shape { 1446 | dim: 0 1447 | dim: 1 1448 | dim: -1 1449 | dim: 4 1450 | } 1451 | } 1452 | } 1453 | layer { 1454 | name: "Concat_139" 1455 | type: "Concat" 1456 | bottom: "onnx::Concat_480" 1457 | bottom: "onnx::Concat_494" 1458 | bottom: "onnx::Concat_506" 1459 | top: "output0" 1460 | concat_param { 1461 | axis: 2 1462 | } 1463 | } 1464 | layer { 1465 | name: "Conv_140" 1466 | type: "Convolution" 1467 | bottom: "input.292" 1468 | top: "onnx::Transpose_508" 1469 | convolution_param { 1470 | num_output: 4 1471 | bias_term: true 1472 | group: 1 1473 | pad_h: 0 1474 | pad_w: 0 1475 | kernel_h: 1 1476 | kernel_w: 1 1477 | stride_h: 1 1478 | stride_w: 1 1479 | dilation: 1 1480 | } 1481 | } 1482 | layer { 1483 | name: "Transpose_141" 1484 | type: "Permute" 1485 | bottom: "onnx::Transpose_508" 1486 | top: "onnx::Shape_509" 1487 | permute_param { 1488 | order: 0 1489 | order: 2 1490 | order: 3 1491 | order: 1 1492 | } 1493 | } 1494 | layer { 1495 | name: "Reshape_147" 1496 | type: "Reshape" 1497 | bottom: "onnx::Shape_509" 1498 | top: "onnx::Concat_521" 1499 | reshape_param { 1500 | shape { 1501 | dim: 0 1502 | dim: 1 1503 | dim: -1 1504 | dim: 2 1505 | } 1506 | } 1507 | } 1508 | layer { 1509 | name: "Conv_148" 1510 | type: "Convolution" 1511 | bottom: "input.324" 1512 | top: "onnx::Transpose_522" 1513 | convolution_param { 1514 | num_output: 4 1515 | bias_term: true 1516 | group: 1 1517 | pad_h: 0 1518 | pad_w: 0 1519 | kernel_h: 1 1520 | kernel_w: 1 1521 | stride_h: 1 1522 | stride_w: 1 1523 | dilation: 1 1524 | } 1525 | } 1526 | layer { 1527 | name: "Transpose_149" 1528 | type: "Permute" 1529 | bottom: "onnx::Transpose_522" 1530 | top: "onnx::Shape_523" 1531 | permute_param { 1532 | order: 0 1533 | order: 2 1534 | order: 3 1535 | order: 1 1536 | } 1537 | } 1538 | layer { 1539 | name: "Reshape_155" 1540 | type: "Reshape" 1541 | bottom: "onnx::Shape_523" 1542 | top: "onnx::Concat_535" 1543 | reshape_param { 1544 | shape { 1545 | dim: 0 1546 | dim: 1 1547 | dim: -1 1548 | dim: 2 1549 | } 1550 | } 1551 | } 1552 | layer { 1553 | name: "Conv_156" 1554 | type: "Convolution" 1555 | bottom: "input.356" 1556 | top: "onnx::Transpose_536" 1557 | convolution_param { 1558 | num_output: 4 1559 | bias_term: true 1560 | group: 1 1561 | pad_h: 0 1562 | pad_w: 0 1563 | kernel_h: 1 1564 | kernel_w: 1 1565 | stride_h: 1 1566 | stride_w: 1 1567 | dilation: 1 1568 | } 1569 | } 1570 | layer { 1571 | name: "Transpose_157" 1572 | type: "Permute" 1573 | bottom: "onnx::Transpose_536" 1574 | top: "onnx::Reshape_537" 1575 | permute_param { 1576 | order: 0 1577 | order: 2 1578 | order: 3 1579 | order: 1 1580 | } 1581 | } 1582 | layer { 1583 | name: "Reshape_158" 1584 | type: "Reshape" 1585 | bottom: "onnx::Reshape_537" 1586 | top: "onnx::Concat_547" 1587 | reshape_param { 1588 | shape { 1589 | dim: 0 1590 | dim: 1 1591 | dim: -1 1592 | dim: 2 1593 | } 1594 | } 1595 | } 1596 | layer { 1597 | name: "Concat_159" 1598 | type: "Concat" 1599 | bottom: "onnx::Concat_521" 1600 | bottom: "onnx::Concat_535" 1601 | bottom: "onnx::Concat_547" 1602 | top: "classifications" 1603 | concat_param { 1604 | axis: 2 1605 | } 1606 | } 1607 | layer { 1608 | name: "Conv_160" 1609 | type: "Convolution" 1610 | bottom: "input.292" 1611 | top: "onnx::Transpose_549" 1612 | convolution_param { 1613 | num_output: 20 1614 | bias_term: true 1615 | group: 1 1616 | pad_h: 0 1617 | pad_w: 0 1618 | kernel_h: 1 1619 | kernel_w: 1 1620 | stride_h: 1 1621 | stride_w: 1 1622 | dilation: 1 1623 | } 1624 | } 1625 | layer { 1626 | name: "Transpose_161" 1627 | type: "Permute" 1628 | bottom: "onnx::Transpose_549" 1629 | top: "onnx::Shape_550" 1630 | permute_param { 1631 | order: 0 1632 | order: 2 1633 | order: 3 1634 | order: 1 1635 | } 1636 | } 1637 | layer { 1638 | name: "Reshape_167" 1639 | type: "Reshape" 1640 | bottom: "onnx::Shape_550" 1641 | top: "onnx::Concat_562" 1642 | reshape_param { 1643 | shape { 1644 | dim: 0 1645 | dim: 1 1646 | dim: -1 1647 | dim: 10 1648 | } 1649 | } 1650 | } 1651 | layer { 1652 | name: "Conv_168" 1653 | type: "Convolution" 1654 | bottom: "input.324" 1655 | top: "onnx::Transpose_563" 1656 | convolution_param { 1657 | num_output: 20 1658 | bias_term: true 1659 | group: 1 1660 | pad_h: 0 1661 | pad_w: 0 1662 | kernel_h: 1 1663 | kernel_w: 1 1664 | stride_h: 1 1665 | stride_w: 1 1666 | dilation: 1 1667 | } 1668 | } 1669 | layer { 1670 | name: "Transpose_169" 1671 | type: "Permute" 1672 | bottom: "onnx::Transpose_563" 1673 | top: "onnx::Shape_564" 1674 | permute_param { 1675 | order: 0 1676 | order: 2 1677 | order: 3 1678 | order: 1 1679 | } 1680 | } 1681 | layer { 1682 | name: "Reshape_175" 1683 | type: "Reshape" 1684 | bottom: "onnx::Shape_564" 1685 | top: "onnx::Concat_576" 1686 | reshape_param { 1687 | shape { 1688 | dim: 0 1689 | dim: 1 1690 | dim: -1 1691 | dim: 10 1692 | } 1693 | } 1694 | } 1695 | layer { 1696 | name: "Conv_176" 1697 | type: "Convolution" 1698 | bottom: "input.356" 1699 | top: "onnx::Transpose_577" 1700 | convolution_param { 1701 | num_output: 20 1702 | bias_term: true 1703 | group: 1 1704 | pad_h: 0 1705 | pad_w: 0 1706 | kernel_h: 1 1707 | kernel_w: 1 1708 | stride_h: 1 1709 | stride_w: 1 1710 | dilation: 1 1711 | } 1712 | } 1713 | layer { 1714 | name: "Transpose_177" 1715 | type: "Permute" 1716 | bottom: "onnx::Transpose_577" 1717 | top: "onnx::Reshape_578" 1718 | permute_param { 1719 | order: 0 1720 | order: 2 1721 | order: 3 1722 | order: 1 1723 | } 1724 | } 1725 | layer { 1726 | name: "Reshape_178" 1727 | type: "Reshape" 1728 | bottom: "onnx::Reshape_578" 1729 | top: "onnx::Concat_588" 1730 | reshape_param { 1731 | shape { 1732 | dim: 0 1733 | dim: 1 1734 | dim: -1 1735 | dim: 10 1736 | } 1737 | } 1738 | } 1739 | layer { 1740 | name: "Concat_179" 1741 | type: "Concat" 1742 | bottom: "onnx::Concat_562" 1743 | bottom: "onnx::Concat_576" 1744 | bottom: "onnx::Concat_588" 1745 | top: "589" 1746 | concat_param { 1747 | axis: 2 1748 | } 1749 | } 1750 | layer { 1751 | name: "Softmax_180" 1752 | type: "Softmax" 1753 | bottom: "classifications" 1754 | top: "590" 1755 | softmax_param { 1756 | axis: 3 1757 | } 1758 | } 1759 | 1760 | -------------------------------------------------------------------------------- /face_dataset/obama_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/face_dataset/obama_1.jpg -------------------------------------------------------------------------------- /face_dataset/刘德华1_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/face_dataset/刘德华1_1.jpeg -------------------------------------------------------------------------------- /face_dataset/刘德华1_2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/face_dataset/刘德华1_2.jpeg -------------------------------------------------------------------------------- /face_dataset/张学友2_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/face_dataset/张学友2_1.jpg -------------------------------------------------------------------------------- /face_dataset/张学友3_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/face_dataset/张学友3_1.jpg -------------------------------------------------------------------------------- /face_dataset/张学友_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/face_dataset/张学友_1.jpg -------------------------------------------------------------------------------- /face_dataset/郭富城2_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/face_dataset/郭富城2_2.jpg -------------------------------------------------------------------------------- /face_dataset/郭富城_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/face_dataset/郭富城_1.jpg -------------------------------------------------------------------------------- /face_dataset/黎明1_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/face_dataset/黎明1_1.jpg -------------------------------------------------------------------------------- /face_dataset/黎明2_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/face_dataset/黎明2_2.jpg -------------------------------------------------------------------------------- /img/4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/img/4.jpeg -------------------------------------------------------------------------------- /img/4_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/img/4_2.jpg -------------------------------------------------------------------------------- /img/4_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/img/4_3.jpg -------------------------------------------------------------------------------- /img/4_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/img/4_4.jpg -------------------------------------------------------------------------------- /img/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/img/5.jpg -------------------------------------------------------------------------------- /img/6.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/img/6.jpeg -------------------------------------------------------------------------------- /img/ldh.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/img/ldh.jpg -------------------------------------------------------------------------------- /img/lm.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/img/lm.jpg -------------------------------------------------------------------------------- /img/obama.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/img/obama.jpg -------------------------------------------------------------------------------- /img/zhangxueyou.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/img/zhangxueyou.jpg -------------------------------------------------------------------------------- /img/zhangxueyou4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/img/zhangxueyou4.jpg -------------------------------------------------------------------------------- /layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | from .modules import * 3 | -------------------------------------------------------------------------------- /layers/functions/prior_box.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from itertools import product as product 3 | import numpy as np 4 | from math import ceil 5 | 6 | 7 | class PriorBox(object): 8 | def __init__(self, cfg, image_size=None, phase='train'): 9 | super(PriorBox, self).__init__() 10 | self.min_sizes = cfg['min_sizes'] 11 | self.steps = cfg['steps'] 12 | self.clip = cfg['clip'] 13 | self.image_size = image_size 14 | self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps] 15 | self.name = "s" 16 | 17 | def forward(self): 18 | anchors = [] 19 | for k, f in enumerate(self.feature_maps): 20 | min_sizes = self.min_sizes[k] 21 | for i, j in product(range(f[0]), range(f[1])): 22 | for min_size in min_sizes: 23 | s_kx = min_size / self.image_size[1] 24 | s_ky = min_size / self.image_size[0] 25 | dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]] 26 | dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]] 27 | for cy, cx in product(dense_cy, dense_cx): 28 | anchors += [cx, cy, s_kx, s_ky] 29 | 30 | # back to torch land 31 | output = torch.Tensor(anchors).view(-1, 4) 32 | if self.clip: 33 | output.clamp_(max=1, min=0) 34 | return output 35 | -------------------------------------------------------------------------------- /layers/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .multibox_loss import MultiBoxLoss 2 | 3 | __all__ = ['MultiBoxLoss'] 4 | -------------------------------------------------------------------------------- /layers/modules/multibox_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | from utils.box_utils import match, log_sum_exp 6 | from data import cfg_mnet 7 | GPU = cfg_mnet['gpu_train'] 8 | 9 | class MultiBoxLoss(nn.Module): 10 | """SSD Weighted Loss Function 11 | Compute Targets: 12 | 1) Produce Confidence Target Indices by matching ground truth boxes 13 | with (default) 'priorboxes' that have jaccard index > threshold parameter 14 | (default threshold: 0.5). 15 | 2) Produce localization target by 'encoding' variance into offsets of ground 16 | truth boxes and their matched 'priorboxes'. 17 | 3) Hard negative mining to filter the excessive number of negative examples 18 | that comes with using a large number of default bounding boxes. 19 | (default negative:positive ratio 3:1) 20 | Objective Loss: 21 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 22 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss 23 | weighted by α which is set to 1 by cross val. 24 | Args: 25 | c: class confidences, 26 | l: predicted boxes, 27 | g: ground truth boxes 28 | N: number of matched default boxes 29 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 30 | """ 31 | 32 | def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target): 33 | super(MultiBoxLoss, self).__init__() 34 | self.num_classes = num_classes 35 | self.threshold = overlap_thresh 36 | self.background_label = bkg_label 37 | self.encode_target = encode_target 38 | self.use_prior_for_matching = prior_for_matching 39 | self.do_neg_mining = neg_mining 40 | self.negpos_ratio = neg_pos 41 | self.neg_overlap = neg_overlap 42 | self.variance = [0.1, 0.2] 43 | 44 | def forward(self, predictions, priors, targets): 45 | """Multibox Loss 46 | Args: 47 | predictions (tuple): A tuple containing loc preds, conf preds, 48 | and prior boxes from SSD net. 49 | conf shape: torch.size(batch_size,num_priors,num_classes) 50 | loc shape: torch.size(batch_size,num_priors,4) 51 | priors shape: torch.size(num_priors,4) 52 | 53 | ground_truth (tensor): Ground truth boxes and labels for a batch, 54 | shape: [batch_size,num_objs,5] (last idx is the label). 55 | """ 56 | 57 | loc_data, conf_data, landm_data = predictions 58 | priors = priors 59 | num = loc_data.size(0) 60 | num_priors = (priors.size(0)) 61 | 62 | # match priors (default boxes) and ground truth boxes 63 | loc_t = torch.Tensor(num, num_priors, 4) 64 | landm_t = torch.Tensor(num, num_priors, 10) 65 | conf_t = torch.LongTensor(num, num_priors) 66 | for idx in range(num): 67 | truths = targets[idx][:, :4].data 68 | labels = targets[idx][:, -1].data 69 | landms = targets[idx][:, 4:14].data 70 | defaults = priors.data 71 | match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx) 72 | if GPU: 73 | loc_t = loc_t.cuda() 74 | conf_t = conf_t.cuda() 75 | landm_t = landm_t.cuda() 76 | 77 | zeros = torch.tensor(0).cuda() 78 | # landm Loss (Smooth L1) 79 | # Shape: [batch,num_priors,10] 80 | pos1 = conf_t > zeros 81 | num_pos_landm = pos1.long().sum(1, keepdim=True) 82 | N1 = max(num_pos_landm.data.sum().float(), 1) 83 | pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) 84 | landm_p = landm_data[pos_idx1].view(-1, 10) 85 | landm_t = landm_t[pos_idx1].view(-1, 10) 86 | loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum') 87 | 88 | 89 | pos = conf_t != zeros 90 | conf_t[pos] = 1 91 | 92 | # Localization Loss (Smooth L1) 93 | # Shape: [batch,num_priors,4] 94 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) 95 | loc_p = loc_data[pos_idx].view(-1, 4) 96 | loc_t = loc_t[pos_idx].view(-1, 4) 97 | loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') 98 | 99 | # Compute max conf across batch for hard negative mining 100 | batch_conf = conf_data.view(-1, self.num_classes) 101 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) 102 | 103 | # Hard Negative Mining 104 | loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now 105 | loss_c = loss_c.view(num, -1) 106 | _, loss_idx = loss_c.sort(1, descending=True) 107 | _, idx_rank = loss_idx.sort(1) 108 | num_pos = pos.long().sum(1, keepdim=True) 109 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) 110 | neg = idx_rank < num_neg.expand_as(idx_rank) 111 | 112 | # Confidence Loss Including Positive and Negative Examples 113 | pos_idx = pos.unsqueeze(2).expand_as(conf_data) 114 | neg_idx = neg.unsqueeze(2).expand_as(conf_data) 115 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) 116 | targets_weighted = conf_t[(pos+neg).gt(0)] 117 | loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') 118 | 119 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 120 | N = max(num_pos.data.sum().float(), 1) 121 | loss_l /= N 122 | loss_c /= N 123 | loss_landm /= N1 124 | 125 | return loss_l, loss_c, loss_landm 126 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | # sys.path.insert(0, "/opt/caffe-1.0/python") 4 | # sys.path.insert(0, "/opt/caffe_plus/python") 5 | # sys.path.insert(1, "../") 6 | import caffe 7 | import argparse 8 | # import torch 9 | import numpy as np 10 | import cv2 11 | import time 12 | 13 | 14 | class CaffeInference(caffe.Net): 15 | """docstring for ClassName""" 16 | 17 | def __init__(self, model_file, pretrained_file, mean=None, use_gpu=False, device_id=0): 18 | self.__mean = mean 19 | if use_gpu: 20 | caffe.set_mode_gpu() 21 | caffe.set_device(device_id) 22 | else: 23 | caffe.set_mode_cpu() 24 | 25 | self.__net = caffe.Net(model_file, pretrained_file, caffe.TEST) 26 | 27 | def predict(self, img, input_name="data", output_name=["BboxHead_Concat", "ClassHead_Softmax", "LandmarkHead_Concat"]): 28 | img -= (self.__mean) 29 | # img *= 0.0078125 30 | if 3 == len(self.__mean): 31 | img = img.transpose(2, 0, 1)#hwc > chw 32 | new_shape = [1, img.shape[0], img.shape[1], img.shape[2]] 33 | else: 34 | new_shape = [1, 1, img.shape[0], img.shape[1]] 35 | 36 | img = img.reshape(new_shape) 37 | self.__net.blobs[input_name].reshape(*new_shape) 38 | self.__net.blobs[input_name].data[...] = img 39 | 40 | self.__net.forward() 41 | 42 | res = [] 43 | 44 | res.append(self.__net.blobs[output_name].data) 45 | 46 | return (*res, img) 47 | 48 | 49 | def demo(args): 50 | 51 | net = CaffeInference(args.deploy, args.trained_model, mean=(0, 0, 0), use_gpu=not args.cpu, device_id=0) 52 | print('Finished loading model!') 53 | 54 | # device = torch.device("cpu" if args.cpu else "cuda") 55 | data_layer = 'data' 56 | resize = 1 57 | 58 | # testing begin 59 | img_raw = cv2.imread(args.img_path, cv2.IMREAD_COLOR) 60 | img_raw = cv2.resize(img_raw,(112,112)) 61 | 62 | img = np.float32(img_raw) 63 | 64 | 65 | 66 | im_height, im_width, _ = img.shape 67 | 68 | # scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) 69 | # scale = scale.to(device) 70 | 71 | tic = time.time() 72 | # loc, conf, landms, img = net.predict(img) # forward pass 73 | #for gyl test 74 | # output_name = ["BboxHead_Concat", "ClassHead_Softmax", "LandmarkHead_Concat"] 75 | result, img = net.predict(img,input_name="data", output_name="fc1") # forward pass 76 | print('net forward time: {:.4f}'.format(time.time() - tic)) 77 | 78 | print(result) 79 | 80 | 81 | 82 | if __name__ == '__main__': 83 | 84 | parser = argparse.ArgumentParser(description='Retinaface') 85 | 86 | parser.add_argument('-m', '--trained_model', default='./mobilefacenet.caffemodel', 87 | type=str, help='Trained caffemodel path') 88 | parser.add_argument('--deploy', default='./mobilefacenet.prototxt', help='Path of deploy file') 89 | parser.add_argument('--img_path', default='./9.jpg', help='Path of test image') 90 | parser.add_argument('--cpu', action="store_true", default=True, help='Use cpu inference') 91 | parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold') 92 | parser.add_argument('--top_k', default=5000, type=int, help='top_k') 93 | parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold') 94 | parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k') 95 | parser.add_argument('-s', '--save_image', action="store_true", default=True, help='save detection results') 96 | parser.add_argument('--show_image', action="store_true", default=True, help='show detection results') 97 | parser.add_argument('--vis_thres', default=0.6, type=float, help='visualization_threshold') 98 | args = parser.parse_args() 99 | 100 | 101 | demo(args) 102 | -------------------------------------------------------------------------------- /mobilefacenetConvert.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | # import matplotlib 5 | # matplotlib.use('Agg') 6 | # import urllib.request 7 | # from matplotlib import gridspec 8 | # from matplotlib import pyplot as plt 9 | # from PIL import Image 10 | # from tensorflow.python.platform import gfile 11 | from rknn.api import RKNN 12 | from PIL import Image 13 | from sklearn import preprocessing 14 | from scipy.spatial.distance import pdist, squareform 15 | 16 | os.environ['RKNN_DRAW_DATA_DISTRIBUTE']="1" 17 | def compute_cos_dis(x, y): 18 | cos_dist= (x* y)/(np.linalg.norm(x)*(np.linalg.norm(y))) 19 | return cos_dist.sum() 20 | 21 | 22 | cfg_facenet_pytorch_onnx = { 23 | 'modelType':"onnx", 24 | 'model': './weights/mobilefacenet2.onnx', 25 | 'inputs' : "input0", 26 | 'input_size_list':[[3, 160, 160]], 27 | 'outputs':['output0'], 28 | 'reorder_channel':'0 1 2', 29 | 'mean_values':[[0, 0, 0]], 30 | 'std_values':[[255, 255, 255]], 31 | 'input_img_size':(160,160) 32 | } 33 | 34 | cfg_facenet_mxnet_caffe = { 35 | 'modelType':"caffe", 36 | 'model': './weights/mobilefacenet.prototxt', 37 | 'blob' : './weights/mobilefacenet.caffemodel', 38 | 'inputs' : "input0", 39 | 'input_size_list':[[3, 112, 112]], 40 | 'outputs':['output0'], 41 | 'reorder_channel':'2 1 0', 42 | 'mean_values':[[127.5, 127.5, 127.5]], 43 | 'std_values':[[128, 128, 128]], 44 | 'input_img_size':(112,112) 45 | } 46 | cfg_facenet_mxnet = { 47 | 'modelType':"mxnet", 48 | 'inputs' : "input0", 49 | 'input_size_list':[[3, 112, 112]], 50 | 'outputs':['output0'], 51 | 'reorder_channel':'2 1 0', 52 | 'mean_values':[[0, 0, 0]], 53 | 'std_values':[[1, 1, 1]], 54 | 'symbol' : './model-symbol.json', 55 | 'params' : './model-0000.params', 56 | 'input_img_size':(112,112) 57 | } 58 | 59 | if __name__ == '__main__': 60 | 61 | cfg = cfg_facenet_mxnet 62 | im_file = './9.jpg' 63 | BUILD_QUANT = False 64 | RKNN_MODEL_PATH = './mobilefacenet.rknn' 65 | if BUILD_QUANT: 66 | RKNN_MODEL_PATH = './mobilefacenet_quant.rknn' 67 | 68 | # Create RKNN object 69 | rknn = RKNN() 70 | 71 | NEED_BUILD_MODEL = True 72 | if NEED_BUILD_MODEL: 73 | print('--> config model') 74 | 75 | rknn.config(reorder_channel=cfg['reorder_channel'], mean_values=cfg['mean_values'], std_values=cfg['std_values'],target_platform=['rv1126'],batch_size=1,quantized_dtype='dynamic_fixed_point-i16') 76 | 77 | print('done') 78 | print('--> Loading model') 79 | if cfg['modelType'] == "caffe": 80 | # Load caffe model 81 | print("load caffe model proto[%s] weights[%s]"%(cfg['model'],cfg['blob'])) 82 | ret = rknn.load_caffe(model=cfg['model'],proto='caffe',blobs=cfg['blob']) 83 | if ret != 0: 84 | print('Load model failed! Ret = {}'.format(ret)) 85 | exit(ret) 86 | elif cfg['modelType'] == "onnx": 87 | print("load onnx model model[%s] inputs[%s] input_size_list[%s] outputs[%s]" 88 | % (cfg['model'],cfg['inputs'],cfg['input_size_list'],cfg['outputs'])) 89 | ret = rknn.load_onnx(model=cfg['model'], 90 | inputs=cfg['inputs'], 91 | input_size_list=cfg['input_size_list'], 92 | outputs=cfg['outputs']) 93 | if ret != 0: 94 | print('Load retinaface failed!') 95 | exit(ret) 96 | elif cfg['modelType'] == "mxnet":# # Load mxnet model 97 | print("load mxnet model symbol[%s] params[%s] input_size_list[%s]" % (cfg['symbol'], cfg['params'], cfg['input_size_list'])) 98 | ret = rknn.load_mxnet(cfg['symbol'], cfg['params'], cfg['input_size_list']) 99 | if ret != 0: 100 | print('Load mxnet model failed!') 101 | exit(ret) 102 | print('done') 103 | elif cfg['modelType'] == "keras":# # Load mxnet model 104 | ret = rknn.load_keras(model=cfg['model']) 105 | if ret != 0: 106 | print('Load keras model failed!') 107 | exit(ret) 108 | print('done') 109 | else: 110 | print('Load mxnet failed!') 111 | exit(-1) 112 | print('done') 113 | 114 | # Build model 115 | print('--> Building model') 116 | ret = rknn.build(do_quantization=BUILD_QUANT, dataset='./dataset.txt') 117 | if ret != 0: 118 | print('Build model failed!') 119 | exit(ret) 120 | print('done') 121 | 122 | if BUILD_QUANT: 123 | print('--> Accuracy analysis') 124 | rknn.accuracy_analysis(inputs='./dataset.txt',output_dir="./result",target='rv1126') 125 | print('done') 126 | 127 | # Export rknn model 128 | 129 | if False:#BUILD_QUANT: 130 | print('--> Export RKNN precompile model') 131 | ret = rknn.export_rknn_precompile_model(RKNN_MODEL_PATH) 132 | else: 133 | print('--> Export RKNN model') 134 | ret = rknn.export_rknn(RKNN_MODEL_PATH) 135 | 136 | if ret != 0: 137 | print('Export rknn failed!') 138 | exit(ret) 139 | print('done') 140 | else: 141 | # Direct load rknn model 142 | print('Loading RKNN model') 143 | ret = rknn.load_rknn(RKNN_MODEL_PATH) 144 | if ret != 0: 145 | print('load rknn model failed.') 146 | exit(ret) 147 | print('done') 148 | 149 | 150 | print('--> Init runtime environment') 151 | ret = rknn.init_runtime(target='rv1126', device_id='d81352278dd4de31',rknn2precompile=False) 152 | # ret = rknn.init_runtime(target='rv1126') 153 | if ret != 0: 154 | print('Init runtime environment failed') 155 | exit(ret) 156 | print('done') 157 | 158 | # Set inputs 159 | img = cv2.imread(im_file) 160 | img = cv2.resize(img, cfg['input_img_size']) 161 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 162 | # img = np.random.randint(low=0, high=255, size=(112,112,3), dtype=np.uint8) 163 | 164 | 165 | image_1 = Image.open(im_file) 166 | image_1 = image_1.resize(cfg['input_img_size'], Image.BICUBIC) 167 | img = np.asarray(image_1, np.uint8) 168 | # img = np.float32(img) 169 | # img -= (128.0, 128.0, 128.0) 170 | print(img.shape) 171 | 172 | # img *= 0.0078125 173 | 174 | 175 | # inference 176 | 177 | print('--> inference') 178 | # outputs = rknn.inference(inputs=[img]) 179 | outputs = rknn.inference(data_format='nhwc',inputs=[img]) 180 | print('done') 181 | # outputs = np.expand_dims(outputs,axis=1) 182 | 183 | # outputs = preprocessing.normalize(outputs[0], norm='l2') 184 | 185 | 186 | print(outputs) 187 | image_1 = Image.open("1_001.jpg") 188 | image_1 = image_1.resize(cfg['input_img_size'], Image.BICUBIC) 189 | img = np.asarray(image_1, np.uint8) 190 | outputs0 = np.array(rknn.inference(data_format='nhwc', inputs=[img])[0]) 191 | 192 | 193 | image_1 = Image.open("1_002.jpg") 194 | image_1 = image_1.resize(cfg['input_img_size'], Image.BICUBIC) 195 | img = np.asarray(image_1, np.uint8) 196 | outputs1 = np.array(rknn.inference(data_format='nhwc', inputs=[img])[0]) 197 | 198 | 199 | l1 = np.linalg.norm(outputs1 - outputs0, axis=1) 200 | print("l1 %f"%l1) 201 | cosSim = 1 - pdist(np.vstack([outputs1, outputs0]), 'cosine') 202 | print("pdist %f"%cosSim) 203 | outputs1 = preprocessing.normalize(outputs1, norm='l2') 204 | outputs0 = preprocessing.normalize(outputs0, norm='l2') 205 | l1 = np.linalg.norm(outputs1 - outputs0, axis=1) 206 | print("after l2 l1 %f" % l1) 207 | 208 | rknn.eval_perf() 209 | rknn.release() 210 | 211 | -------------------------------------------------------------------------------- /model_data/face_encoding.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/model_data/face_encoding.npy -------------------------------------------------------------------------------- /model_data/names.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/model_data/names.npy -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from PIL import Image 3 | import cv2 4 | from FaceRecognition import Facenet 5 | import os 6 | import numpy as np 7 | #---------------------------------------------------# 8 | # 对输入图像进行resize 9 | #---------------------------------------------------# 10 | def letterbox_image(image, size): 11 | ih, iw, _ = np.shape(image) 12 | w, h = size 13 | scale = min(w/iw, h/ih) 14 | nw = int(iw*scale) 15 | nh = int(ih*scale) 16 | 17 | image = cv2.resize(image, (nw, nh)) 18 | new_image = np.ones([size[1], size[0], 3],np.uint8) * 255 19 | new_image[(h-nh)//2:nh+(h-nh)//2, (w-nw)//2:nw+(w-nw)//2] = image 20 | return new_image 21 | 22 | 23 | if __name__ == "__main__": 24 | model = Facenet(facenet_threhold=12) 25 | 26 | exit 27 | totalImg = os.listdir("./img") 28 | for i in range(len(totalImg)): 29 | 30 | # image_1 = input('Input image_1 filename:') 31 | image_path = "img/"+totalImg[i] 32 | try: 33 | image_1 = Image.open(image_path).convert('RGB') 34 | except: 35 | print('Image_1 Open Error! Try again!') 36 | continue 37 | 38 | # image_2 = input('Input image_2 filename:') 39 | # try: 40 | # image_2 = Image.open(image_2) 41 | # except: 42 | # print('Image_2 Open Error! Try again!') 43 | # continue 44 | img_raw = cv2.imread(image_path) 45 | # img_raw = cv2.resize(img_raw, (640, 640)) 46 | img_raw = letterbox_image(img_raw, [640, 640]) 47 | img = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB) 48 | 49 | r_image = model.detect_image(img) 50 | r_image = cv2.cvtColor(r_image, cv2.COLOR_RGB2BGR) 51 | cv2.imshow("after", r_image) 52 | cv2.waitKey(0) 53 | # print(probability) 54 | -------------------------------------------------------------------------------- /resizeImgTest.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import cv2 5 | from PIL import Image 6 | 7 | #---------------------------------------------------# 8 | # 对输入图像进行resize 9 | #---------------------------------------------------# 10 | def letterbox_image(image, size): 11 | ih, iw, _ = np.shape(image) 12 | w, h = size 13 | scale = min(w/iw, h/ih) 14 | nw = int(iw*scale) 15 | nh = int(ih*scale) 16 | 17 | image = cv2.resize(image, (nw, nh)) 18 | new_image = np.ones([size[1], size[0], 3],np.uint8) * 255 19 | new_image[(h-nh)//2:nh+(h-nh)//2, (w-nw)//2:nw+(w-nw)//2] = image 20 | return new_image 21 | 22 | srcPath = "img/" 23 | dstPath = "img2/" 24 | list_dir = os.listdir(srcPath) 25 | image_paths = [] 26 | names = [] 27 | idx = 0 28 | 29 | for name in list_dir: 30 | image_paths.append(srcPath+name) 31 | img_raw = Image.open(srcPath+name).convert('RGB') 32 | img_raw = cv2.cvtColor(np.asarray(img_raw),cv2.COLOR_RGB2BGR) 33 | img_raw = letterbox_image(img_raw, [640, 640]) 34 | if os.path.exists(dstPath) == False: 35 | os.mkdir(dstPath) 36 | cv2.imwrite(dstPath+str(idx)+".jpg",img_raw) 37 | idx += 1 38 | names.append(name.split("_")[0]) 39 | 40 | 41 | -------------------------------------------------------------------------------- /retinaface.rknn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/retinaface.rknn -------------------------------------------------------------------------------- /retinaface_quant.rknn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/retinaface_quant.rknn -------------------------------------------------------------------------------- /rtspPredict.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from PIL import Image 3 | import cv2 4 | from FaceRecognition import Facenet 5 | import os 6 | import numpy as np 7 | from timeit import default_timer as timer 8 | import argparse 9 | import os 10 | import glob 11 | import random 12 | import time 13 | from rtspdec import RTSCapture 14 | 15 | #---------------------------------------------------# 16 | # 对输入图像进行resize 17 | #---------------------------------------------------# 18 | def letterbox_image(image, size): 19 | ih, iw, _ = np.shape(image) 20 | w, h = size 21 | scale = min(w/iw, h/ih) 22 | nw = int(iw*scale) 23 | nh = int(ih*scale) 24 | 25 | image = cv2.resize(image, (nw, nh)) 26 | new_image = np.ones([size[1], size[0], 3],np.uint8) * 255 27 | new_image[(h-nh)//2:nh+(h-nh)//2, (w-nw)//2:nw+(w-nw)//2] = image 28 | return new_image 29 | 30 | ENABLE_CACHE_IMG = False 31 | def detect_rtsp(model): 32 | 33 | #rtscap = RTSCapture.create("rtsp://172.16.3.44:10554/analyse_full") 34 | rtscap = RTSCapture.create("rtsp://172.16.3.194:554/live/av0") 35 | rtscap.start_read() 36 | accum_time = 0 37 | curr_fps = 0 38 | prev_time = timer() 39 | while rtscap.isStarted(): 40 | ok, frame = rtscap.read_latest_frame() 41 | 42 | if cv2.waitKey(100) & 0xFF == ord('q'): 43 | break 44 | if not ok: 45 | continue 46 | 47 | 48 | # inhere 49 | test = Image.fromarray(frame)#test is BGR 50 | img = np.asarray(test, np.uint8) 51 | if ENABLE_CACHE_IMG: 52 | cv2.imshow("test",img) 53 | r = cv2.waitKey() 54 | if r == ord('s'): 55 | cv2.imwrite("face_dataset/gyl_1.jpg",img) 56 | print("save img") 57 | continue 58 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 59 | img = letterbox_image(img, [640, 640]) 60 | # r_image = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB) 61 | 62 | r_image = model.detect_image(img) 63 | # r_image = cv2.cvtColor(r_image, cv2.COLOR_RGB2BGR) 64 | 65 | 66 | 67 | # result = np.asarray(r_image) 68 | # 69 | curr_time = timer() 70 | exec_time = curr_time - prev_time 71 | prev_time = curr_time 72 | accum_time = accum_time + exec_time 73 | curr_fps = curr_fps + 1 74 | if accum_time > 1: 75 | accum_time = accum_time - 1 76 | fps = "FPS: " + str(curr_fps) 77 | curr_fps = 0 78 | print("fps %s"%fps) 79 | # cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, 80 | # fontScale=0.50, color=(255, 0, 0), thickness=2) 81 | # cv2.namedWindow("result", cv2.WINDOW_NORMAL) 82 | # cv2.imshow("result", result) 83 | 84 | cv2.imshow("after", r_image) 85 | # cv2.waitKey(0) 86 | #r_image.show() 87 | 88 | #cv2.imshow("cam", r_image) 89 | #cv2.destroyAllWindows() 90 | 91 | 92 | rtscap.stop_read() 93 | rtscap.release() 94 | cv2.destroyAllWindows() 95 | 96 | if __name__ == "__main__": 97 | 98 | # text_retinaface = "{:.4f}".format(0.88777) 99 | # text = "{:.4f}".format(0.98456) 100 | # img_raw = cv2.imread("img/4.jpeg") 101 | # # img_raw = cv2.resize(img_raw, (640, 640)) 102 | # old_image = letterbox_image(img_raw, [640, 640]) 103 | # 104 | # b = [0,0] 105 | # cx = b[0] 106 | # cy = b[1] + 12 107 | # 108 | # cv2.putText(old_image, text, (cx, cy), 109 | # cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) 110 | # cx = b[0] + 60 111 | # cy = b[1] + 32 112 | # cv2.putText(old_image, text_retinaface, (cx, cy), 113 | # cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) 114 | # cv2.imshow("test",old_image) 115 | # cv2.waitKey() 116 | if ENABLE_CACHE_IMG != True: 117 | model = Facenet(facenet_threhold=0.6) 118 | else: 119 | model = [] 120 | detect_rtsp(model) 121 | -------------------------------------------------------------------------------- /rtspdec.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import threading 3 | import sys 4 | 5 | 6 | class RTSCapture(cv2.VideoCapture): 7 | _cur_frame = None 8 | _reading = False 9 | schemes = ["rtsp://","rtmp://"] 10 | @staticmethod 11 | def create(url, *schemes): 12 | rtscap = RTSCapture(url) 13 | rtscap.frame_receiver = threading.Thread(target=rtscap.recv_frame, daemon=True) 14 | rtscap.schemes.extend(schemes) 15 | if isinstance(url, str) and url.startswith(tuple(rtscap.schemes)): 16 | rtscap._reading = True 17 | elif isinstance(url, int): 18 | pass 19 | return rtscap 20 | 21 | def isStarted(self): 22 | ok = self.isOpened() 23 | if ok and self._reading: 24 | ok = self.frame_receiver.is_alive() 25 | return ok 26 | 27 | def recv_frame(self): 28 | while self._reading and self.isOpened(): 29 | ok, frame = self.read() 30 | if not ok: break 31 | self._cur_frame = frame 32 | self._reading = False 33 | 34 | def read2(self): 35 | frame = self._cur_frame 36 | self._cur_frame = None 37 | return frame is not None, frame 38 | 39 | def start_read(self): 40 | self.frame_receiver.start() 41 | self.read_latest_frame = self.read2 if self._reading else self.read 42 | 43 | def stop_read(self): 44 | self._reading = False 45 | if self.frame_receiver.is_alive(): self.frame_receiver.join() 46 | 47 | 48 | # if __name__ == '__main__': 49 | # if len(sys.argv) < 2: 50 | # print("usage:") 51 | # print("need rtsp://xxx") 52 | # sys.exit() 53 | # 54 | # rtscap = RTSCapture.create(sys.argv[1]) 55 | # rtscap.start_read() 56 | # 57 | # while rtscap.isStarted(): 58 | # ok, frame = rtscap.read_latest_frame() 59 | # if cv2.waitKey(100) & 0xFF == ord('q'): 60 | # break 61 | # if not ok: 62 | # continue 63 | # 64 | # 65 | # # inhere 66 | # cv2.imshow("cam", frame) 67 | # 68 | # 69 | # rtscap.stop_read() 70 | # rtscap.release() 71 | # cv2.destroyAllWindows() 72 | 73 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from PIL import Image 3 | import numpy as np 4 | 5 | def getCoordinate(img): 6 | rectangle = [] 7 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 灰度图 8 | ret, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) # 二值化 9 | 10 | element3 = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8)) # 设置膨胀和腐蚀操作 11 | dilation = cv2.dilate(binary, element3, iterations=1) # 膨胀一次,让轮廓突出 12 | contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_L1) # 检测轮廓 13 | cv2.drawContours(img, contours, -1, (0, 0, 255), 3) # 参数值为1, 给contours[1]绘制轮廓。 -1: 给所有的contours绘制轮廓 14 | cv2.imshow("img", img) 15 | cv2.waitKey() 16 | 17 | for contour in contours: 18 | x, y, w, h = cv2.boundingRect(contour) 19 | rectangle.append((x, y, x + w, y + h)) 20 | print(f'rectangle: {rectangle}') 21 | return rectangle 22 | 23 | 24 | def savePic(rectangle): 25 | for i in range(len(rectangle)): 26 | imgPath = "D:\\PythonWork\\Contour\\Photos\\" + str(i + 1) + ".PNG" # notes: 图片的扩展名要一致 27 | im = Image.open(defaultImgPath) 28 | im = im.crop(rectangle[i]) # 对图片进行切割 im.crop(top_x, top_y, bottom_x, bottom_y) 29 | im.save(imgPath) 30 | 31 | 32 | if __name__ == '__main__': 33 | 34 | # 创建一个长度为 14 的 Python 列表 list_data 35 | list_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] 36 | 37 | # 使用 array 函数将 list_data 转换为形状为 (14) 的 NumPy 数组 38 | arr = np.array(list_data) 39 | print (arr.ndim) 40 | # 打印 arr 的形状,输出 (14,) 41 | print(arr.shape) 42 | exit(0) 43 | defaultImgPath = './t1.jpg' 44 | img = cv2.imread(defaultImgPath) 45 | img_crop = img[0:100,100:200] 46 | cv2.imshow("img", img_crop) 47 | cv2.waitKey() 48 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/utils/__init__.py -------------------------------------------------------------------------------- /utils/box_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def point_form(boxes): 6 | """ Convert prior_boxes to (xmin, ymin, xmax, ymax) 7 | representation for comparison to point form ground truth data. 8 | Args: 9 | boxes: (tensor) center-size default boxes from priorbox layers. 10 | Return: 11 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 12 | """ 13 | return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin 14 | boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax 15 | 16 | 17 | def center_size(boxes): 18 | """ Convert prior_boxes to (cx, cy, w, h) 19 | representation for comparison to center-size form ground truth data. 20 | Args: 21 | boxes: (tensor) point_form boxes 22 | Return: 23 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 24 | """ 25 | return torch.cat((boxes[:, 2:] + boxes[:, :2])/2, # cx, cy 26 | boxes[:, 2:] - boxes[:, :2], 1) # w, h 27 | 28 | 29 | def intersect(box_a, box_b): 30 | """ We resize both tensors to [A,B,2] without new malloc: 31 | [A,2] -> [A,1,2] -> [A,B,2] 32 | [B,2] -> [1,B,2] -> [A,B,2] 33 | Then we compute the area of intersect between box_a and box_b. 34 | Args: 35 | box_a: (tensor) bounding boxes, Shape: [A,4]. 36 | box_b: (tensor) bounding boxes, Shape: [B,4]. 37 | Return: 38 | (tensor) intersection area, Shape: [A,B]. 39 | """ 40 | A = box_a.size(0) 41 | B = box_b.size(0) 42 | max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), 43 | box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) 44 | min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), 45 | box_b[:, :2].unsqueeze(0).expand(A, B, 2)) 46 | inter = torch.clamp((max_xy - min_xy), min=0) 47 | return inter[:, :, 0] * inter[:, :, 1] 48 | 49 | 50 | def jaccard(box_a, box_b): 51 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap 52 | is simply the intersection over union of two boxes. Here we operate on 53 | ground truth boxes and default boxes. 54 | E.g.: 55 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) 56 | Args: 57 | box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] 58 | box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] 59 | Return: 60 | jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] 61 | """ 62 | inter = intersect(box_a, box_b) 63 | area_a = ((box_a[:, 2]-box_a[:, 0]) * 64 | (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] 65 | area_b = ((box_b[:, 2]-box_b[:, 0]) * 66 | (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] 67 | union = area_a + area_b - inter 68 | return inter / union # [A,B] 69 | 70 | 71 | def matrix_iou(a, b): 72 | """ 73 | return iou of a and b, numpy version for data augenmentation 74 | """ 75 | lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) 76 | rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) 77 | 78 | area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) 79 | area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) 80 | area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) 81 | return area_i / (area_a[:, np.newaxis] + area_b - area_i) 82 | 83 | 84 | def matrix_iof(a, b): 85 | """ 86 | return iof of a and b, numpy version for data augenmentation 87 | """ 88 | lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) 89 | rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) 90 | 91 | area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) 92 | area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) 93 | return area_i / np.maximum(area_a[:, np.newaxis], 1) 94 | 95 | 96 | def match(threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx): 97 | """Match each prior box with the ground truth box of the highest jaccard 98 | overlap, encode the bounding boxes, then return the matched indices 99 | corresponding to both confidence and location preds. 100 | Args: 101 | threshold: (float) The overlap threshold used when mathing boxes. 102 | truths: (tensor) Ground truth boxes, Shape: [num_obj, 4]. 103 | priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. 104 | variances: (tensor) Variances corresponding to each prior coord, 105 | Shape: [num_priors, 4]. 106 | labels: (tensor) All the class labels for the image, Shape: [num_obj]. 107 | landms: (tensor) Ground truth landms, Shape [num_obj, 10]. 108 | loc_t: (tensor) Tensor to be filled w/ endcoded location targets. 109 | conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. 110 | landm_t: (tensor) Tensor to be filled w/ endcoded landm targets. 111 | idx: (int) current batch index 112 | Return: 113 | The matched indices corresponding to 1)location 2)confidence 3)landm preds. 114 | """ 115 | # jaccard index 116 | overlaps = jaccard( 117 | truths, 118 | point_form(priors) 119 | ) 120 | # (Bipartite Matching) 121 | # [1,num_objects] best prior for each ground truth 122 | best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) 123 | 124 | # ignore hard gt 125 | valid_gt_idx = best_prior_overlap[:, 0] >= 0.2 126 | best_prior_idx_filter = best_prior_idx[valid_gt_idx, :] 127 | if best_prior_idx_filter.shape[0] <= 0: 128 | loc_t[idx] = 0 129 | conf_t[idx] = 0 130 | return 131 | 132 | # [1,num_priors] best ground truth for each prior 133 | best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True) 134 | best_truth_idx.squeeze_(0) 135 | best_truth_overlap.squeeze_(0) 136 | best_prior_idx.squeeze_(1) 137 | best_prior_idx_filter.squeeze_(1) 138 | best_prior_overlap.squeeze_(1) 139 | best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2) # ensure best prior 140 | # TODO refactor: index best_prior_idx with long tensor 141 | # ensure every gt matches with its prior of max overlap 142 | for j in range(best_prior_idx.size(0)): # 判别此anchor是预测哪一个boxes 143 | best_truth_idx[best_prior_idx[j]] = j 144 | matches = truths[best_truth_idx] # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来 145 | conf = labels[best_truth_idx] # Shape: [num_priors] 此处为每一个anchor对应的label取出来 146 | conf[best_truth_overlap < threshold] = 0 # label as background overlap<0.35的全部作为负样本 147 | loc = encode(matches, priors, variances) 148 | 149 | matches_landm = landms[best_truth_idx] 150 | landm = encode_landm(matches_landm, priors, variances) 151 | loc_t[idx] = loc # [num_priors,4] encoded offsets to learn 152 | conf_t[idx] = conf # [num_priors] top class label for each prior 153 | landm_t[idx] = landm 154 | 155 | 156 | def encode(matched, priors, variances): 157 | """Encode the variances from the priorbox layers into the ground truth boxes 158 | we have matched (based on jaccard overlap) with the prior boxes. 159 | Args: 160 | matched: (tensor) Coords of ground truth for each prior in point-form 161 | Shape: [num_priors, 4]. 162 | priors: (tensor) Prior boxes in center-offset form 163 | Shape: [num_priors,4]. 164 | variances: (list[float]) Variances of priorboxes 165 | Return: 166 | encoded boxes (tensor), Shape: [num_priors, 4] 167 | """ 168 | 169 | # dist b/t match center and prior's center 170 | g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] 171 | # encode variance 172 | g_cxcy /= (variances[0] * priors[:, 2:]) 173 | # match wh / prior wh 174 | g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] 175 | g_wh = torch.log(g_wh) / variances[1] 176 | # return target for smooth_l1_loss 177 | return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] 178 | 179 | def encode_landm(matched, priors, variances): 180 | """Encode the variances from the priorbox layers into the ground truth boxes 181 | we have matched (based on jaccard overlap) with the prior boxes. 182 | Args: 183 | matched: (tensor) Coords of ground truth for each prior in point-form 184 | Shape: [num_priors, 10]. 185 | priors: (tensor) Prior boxes in center-offset form 186 | Shape: [num_priors,4]. 187 | variances: (list[float]) Variances of priorboxes 188 | Return: 189 | encoded landm (tensor), Shape: [num_priors, 10] 190 | """ 191 | 192 | # dist b/t match center and prior's center 193 | matched = torch.reshape(matched, (matched.size(0), 5, 2)) 194 | priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) 195 | priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) 196 | priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) 197 | priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) 198 | priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2) 199 | g_cxcy = matched[:, :, :2] - priors[:, :, :2] 200 | # encode variance 201 | g_cxcy /= (variances[0] * priors[:, :, 2:]) 202 | # g_cxcy /= priors[:, :, 2:] 203 | g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1) 204 | # return target for smooth_l1_loss 205 | return g_cxcy 206 | 207 | 208 | # Adapted from https://github.com/Hakuyume/chainer-ssd 209 | def decode(loc, priors, variances): 210 | """Decode locations from predictions using priors to undo 211 | the encoding we did for offset regression at train time. 212 | Args: 213 | loc (tensor): location predictions for loc layers, 214 | Shape: [num_priors,4] 215 | priors (tensor): Prior boxes in center-offset form. 216 | Shape: [num_priors,4]. 217 | variances: (list[float]) Variances of priorboxes 218 | Return: 219 | decoded bounding box predictions 220 | """ 221 | 222 | boxes = torch.cat(( 223 | priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], 224 | priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) 225 | boxes[:, :2] -= boxes[:, 2:] / 2 226 | boxes[:, 2:] += boxes[:, :2] 227 | return boxes 228 | 229 | def decode_landm(pre, priors, variances): 230 | """Decode landm from predictions using priors to undo 231 | the encoding we did for offset regression at train time. 232 | Args: 233 | pre (tensor): landm predictions for loc layers, 234 | Shape: [num_priors,10] 235 | priors (tensor): Prior boxes in center-offset form. 236 | Shape: [num_priors,4]. 237 | variances: (list[float]) Variances of priorboxes 238 | Return: 239 | decoded landm predictions 240 | """ 241 | landms = torch.cat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:], 242 | priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:], 243 | priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:], 244 | priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:], 245 | priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:], 246 | ), dim=1) 247 | return landms 248 | 249 | 250 | def log_sum_exp(x): 251 | """Utility function for computing log_sum_exp while determining 252 | This will be used to determine unaveraged confidence loss across 253 | all examples in a batch. 254 | Args: 255 | x (Variable(tensor)): conf_preds from conf layers 256 | """ 257 | x_max = x.data.max() 258 | return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max 259 | 260 | 261 | # Original author: Francisco Massa: 262 | # https://github.com/fmassa/object-detection.torch 263 | # Ported to PyTorch by Max deGroot (02/01/2017) 264 | def nms(boxes, scores, overlap=0.5, top_k=200): 265 | """Apply non-maximum suppression at test time to avoid detecting too many 266 | overlapping bounding boxes for a given object. 267 | Args: 268 | boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. 269 | scores: (tensor) The class predscores for the img, Shape:[num_priors]. 270 | overlap: (float) The overlap thresh for suppressing unnecessary boxes. 271 | top_k: (int) The Maximum number of box preds to consider. 272 | Return: 273 | The indices of the kept boxes with respect to num_priors. 274 | """ 275 | 276 | keep = torch.Tensor(scores.size(0)).fill_(0).long() 277 | if boxes.numel() == 0: 278 | return keep 279 | x1 = boxes[:, 0] 280 | y1 = boxes[:, 1] 281 | x2 = boxes[:, 2] 282 | y2 = boxes[:, 3] 283 | area = torch.mul(x2 - x1, y2 - y1) 284 | v, idx = scores.sort(0) # sort in ascending order 285 | # I = I[v >= 0.01] 286 | idx = idx[-top_k:] # indices of the top-k largest vals 287 | xx1 = boxes.new() 288 | yy1 = boxes.new() 289 | xx2 = boxes.new() 290 | yy2 = boxes.new() 291 | w = boxes.new() 292 | h = boxes.new() 293 | 294 | # keep = torch.Tensor() 295 | count = 0 296 | while idx.numel() > 0: 297 | i = idx[-1] # index of current largest val 298 | # keep.append(i) 299 | keep[count] = i 300 | count += 1 301 | if idx.size(0) == 1: 302 | break 303 | idx = idx[:-1] # remove kept element from view 304 | # load bboxes of next highest vals 305 | torch.index_select(x1, 0, idx, out=xx1) 306 | torch.index_select(y1, 0, idx, out=yy1) 307 | torch.index_select(x2, 0, idx, out=xx2) 308 | torch.index_select(y2, 0, idx, out=yy2) 309 | # store element-wise max with next highest score 310 | xx1 = torch.clamp(xx1, min=x1[i]) 311 | yy1 = torch.clamp(yy1, min=y1[i]) 312 | xx2 = torch.clamp(xx2, max=x2[i]) 313 | yy2 = torch.clamp(yy2, max=y2[i]) 314 | w.resize_as_(xx2) 315 | h.resize_as_(yy2) 316 | w = xx2 - xx1 317 | h = yy2 - yy1 318 | # check sizes of xx1 and xx2.. after each iteration 319 | w = torch.clamp(w, min=0.0) 320 | h = torch.clamp(h, min=0.0) 321 | inter = w*h 322 | # IoU = i / (area(a) + area(b) - i) 323 | rem_areas = torch.index_select(area, 0, idx) # load remaining areas) 324 | union = (rem_areas - inter) + area[i] 325 | IoU = inter/union # store result in iou 326 | # keep only elements with an IoU <= overlap 327 | idx = idx[IoU.le(overlap)] 328 | return keep, count 329 | 330 | 331 | -------------------------------------------------------------------------------- /utils/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/utils/nms/__init__.py -------------------------------------------------------------------------------- /utils/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | self.total_time += self.diff 28 | self.calls += 1 29 | self.average_time = self.total_time / self.calls 30 | if average: 31 | return self.average_time 32 | else: 33 | return self.diff 34 | 35 | def clear(self): 36 | self.total_time = 0. 37 | self.calls = 0 38 | self.start_time = 0. 39 | self.diff = 0. 40 | self.average_time = 0. 41 | -------------------------------------------------------------------------------- /weights/ArcFace.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/weights/ArcFace.onnx -------------------------------------------------------------------------------- /weights/facenet_mobilenet_all.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/weights/facenet_mobilenet_all.h5 -------------------------------------------------------------------------------- /weights/mobilefacenet.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/weights/mobilefacenet.caffemodel -------------------------------------------------------------------------------- /weights/mobilefacenet.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/weights/mobilefacenet.onnx -------------------------------------------------------------------------------- /weights/mobilefacenet2.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/weights/mobilefacenet2.caffemodel -------------------------------------------------------------------------------- /weights/mobilefacenet2.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/weights/mobilefacenet2.onnx -------------------------------------------------------------------------------- /weights/mobilefacenet2.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "input0" 3 | type: "Input" 4 | top: "input0" 5 | input_param { 6 | shape { 7 | dim: 1 8 | dim: 3 9 | dim: 160 10 | dim: 160 11 | } 12 | } 13 | } 14 | layer { 15 | name: "Conv_0" 16 | type: "Convolution" 17 | bottom: "input0" 18 | top: "input.4" 19 | convolution_param { 20 | num_output: 32 21 | bias_term: true 22 | group: 1 23 | pad_h: 1 24 | pad_w: 1 25 | kernel_h: 3 26 | kernel_w: 3 27 | stride_h: 2 28 | stride_w: 2 29 | dilation: 1 30 | } 31 | } 32 | layer { 33 | name: "Clip_1_relu6_relu" 34 | type: "ReLU" 35 | bottom: "input.4" 36 | top: "Clip_1_relu6_relu_out" 37 | } 38 | layer { 39 | name: "Clip_1_relu6_thre" 40 | type: "Threshold" 41 | bottom: "Clip_1_relu6_relu_out" 42 | top: "Clip_1_relu6_thre_out" 43 | threshold_param { 44 | threshold: 6.0 45 | } 46 | } 47 | layer { 48 | name: "Clip_1_relu6_thre_left_power" 49 | type: "Power" 50 | bottom: "Clip_1_relu6_thre_out" 51 | top: "Clip_1_relu6_thre_left_power_out" 52 | power_param { 53 | power: 1.0 54 | scale: -1.0 55 | shift: 1.0 56 | } 57 | } 58 | layer { 59 | name: "Clip_1_relu6_x_mul_thre_out" 60 | type: "Eltwise" 61 | bottom: "Clip_1_relu6_relu_out" 62 | bottom: "Clip_1_relu6_thre_left_power_out" 63 | top: "Clip_1_relu6_x_mul_thre_out_out" 64 | eltwise_param { 65 | operation: PROD 66 | } 67 | } 68 | layer { 69 | name: "Clip_1_relu6_thre_right_power" 70 | type: "Power" 71 | bottom: "Clip_1_relu6_thre_out" 72 | top: "Clip_1_relu6_thre_right_power_out" 73 | power_param { 74 | power: 1.0 75 | scale: 6.0 76 | shift: 0.0 77 | } 78 | } 79 | layer { 80 | name: "Clip_1_relu6_add" 81 | type: "Eltwise" 82 | bottom: "Clip_1_relu6_x_mul_thre_out_out" 83 | bottom: "Clip_1_relu6_thre_right_power_out" 84 | top: "input.8" 85 | eltwise_param { 86 | operation: SUM 87 | } 88 | } 89 | layer { 90 | name: "Conv_2" 91 | type: "Convolution" 92 | bottom: "input.8" 93 | top: "input.16" 94 | convolution_param { 95 | num_output: 32 96 | bias_term: true 97 | group: 32 98 | pad_h: 1 99 | pad_w: 1 100 | kernel_h: 3 101 | kernel_w: 3 102 | stride_h: 1 103 | stride_w: 1 104 | dilation: 1 105 | } 106 | } 107 | layer { 108 | name: "Clip_3_relu6_relu" 109 | type: "ReLU" 110 | bottom: "input.16" 111 | top: "Clip_3_relu6_relu_out" 112 | } 113 | layer { 114 | name: "Clip_3_relu6_thre" 115 | type: "Threshold" 116 | bottom: "Clip_3_relu6_relu_out" 117 | top: "Clip_3_relu6_thre_out" 118 | threshold_param { 119 | threshold: 6.0 120 | } 121 | } 122 | layer { 123 | name: "Clip_3_relu6_thre_left_power" 124 | type: "Power" 125 | bottom: "Clip_3_relu6_thre_out" 126 | top: "Clip_3_relu6_thre_left_power_out" 127 | power_param { 128 | power: 1.0 129 | scale: -1.0 130 | shift: 1.0 131 | } 132 | } 133 | layer { 134 | name: "Clip_3_relu6_x_mul_thre_out" 135 | type: "Eltwise" 136 | bottom: "Clip_3_relu6_relu_out" 137 | bottom: "Clip_3_relu6_thre_left_power_out" 138 | top: "Clip_3_relu6_x_mul_thre_out_out" 139 | eltwise_param { 140 | operation: PROD 141 | } 142 | } 143 | layer { 144 | name: "Clip_3_relu6_thre_right_power" 145 | type: "Power" 146 | bottom: "Clip_3_relu6_thre_out" 147 | top: "Clip_3_relu6_thre_right_power_out" 148 | power_param { 149 | power: 1.0 150 | scale: 6.0 151 | shift: 0.0 152 | } 153 | } 154 | layer { 155 | name: "Clip_3_relu6_add" 156 | type: "Eltwise" 157 | bottom: "Clip_3_relu6_x_mul_thre_out_out" 158 | bottom: "Clip_3_relu6_thre_right_power_out" 159 | top: "input.20" 160 | eltwise_param { 161 | operation: SUM 162 | } 163 | } 164 | layer { 165 | name: "Conv_4" 166 | type: "Convolution" 167 | bottom: "input.20" 168 | top: "input.28" 169 | convolution_param { 170 | num_output: 64 171 | bias_term: true 172 | group: 1 173 | pad_h: 0 174 | pad_w: 0 175 | kernel_h: 1 176 | kernel_w: 1 177 | stride_h: 1 178 | stride_w: 1 179 | dilation: 1 180 | } 181 | } 182 | layer { 183 | name: "Clip_5_relu6_relu" 184 | type: "ReLU" 185 | bottom: "input.28" 186 | top: "Clip_5_relu6_relu_out" 187 | } 188 | layer { 189 | name: "Clip_5_relu6_thre" 190 | type: "Threshold" 191 | bottom: "Clip_5_relu6_relu_out" 192 | top: "Clip_5_relu6_thre_out" 193 | threshold_param { 194 | threshold: 6.0 195 | } 196 | } 197 | layer { 198 | name: "Clip_5_relu6_thre_left_power" 199 | type: "Power" 200 | bottom: "Clip_5_relu6_thre_out" 201 | top: "Clip_5_relu6_thre_left_power_out" 202 | power_param { 203 | power: 1.0 204 | scale: -1.0 205 | shift: 1.0 206 | } 207 | } 208 | layer { 209 | name: "Clip_5_relu6_x_mul_thre_out" 210 | type: "Eltwise" 211 | bottom: "Clip_5_relu6_relu_out" 212 | bottom: "Clip_5_relu6_thre_left_power_out" 213 | top: "Clip_5_relu6_x_mul_thre_out_out" 214 | eltwise_param { 215 | operation: PROD 216 | } 217 | } 218 | layer { 219 | name: "Clip_5_relu6_thre_right_power" 220 | type: "Power" 221 | bottom: "Clip_5_relu6_thre_out" 222 | top: "Clip_5_relu6_thre_right_power_out" 223 | power_param { 224 | power: 1.0 225 | scale: 6.0 226 | shift: 0.0 227 | } 228 | } 229 | layer { 230 | name: "Clip_5_relu6_add" 231 | type: "Eltwise" 232 | bottom: "Clip_5_relu6_x_mul_thre_out_out" 233 | bottom: "Clip_5_relu6_thre_right_power_out" 234 | top: "input.32" 235 | eltwise_param { 236 | operation: SUM 237 | } 238 | } 239 | layer { 240 | name: "Conv_6" 241 | type: "Convolution" 242 | bottom: "input.32" 243 | top: "input.40" 244 | convolution_param { 245 | num_output: 64 246 | bias_term: true 247 | group: 64 248 | pad_h: 1 249 | pad_w: 1 250 | kernel_h: 3 251 | kernel_w: 3 252 | stride_h: 2 253 | stride_w: 2 254 | dilation: 1 255 | } 256 | } 257 | layer { 258 | name: "Clip_7_relu6_relu" 259 | type: "ReLU" 260 | bottom: "input.40" 261 | top: "Clip_7_relu6_relu_out" 262 | } 263 | layer { 264 | name: "Clip_7_relu6_thre" 265 | type: "Threshold" 266 | bottom: "Clip_7_relu6_relu_out" 267 | top: "Clip_7_relu6_thre_out" 268 | threshold_param { 269 | threshold: 6.0 270 | } 271 | } 272 | layer { 273 | name: "Clip_7_relu6_thre_left_power" 274 | type: "Power" 275 | bottom: "Clip_7_relu6_thre_out" 276 | top: "Clip_7_relu6_thre_left_power_out" 277 | power_param { 278 | power: 1.0 279 | scale: -1.0 280 | shift: 1.0 281 | } 282 | } 283 | layer { 284 | name: "Clip_7_relu6_x_mul_thre_out" 285 | type: "Eltwise" 286 | bottom: "Clip_7_relu6_relu_out" 287 | bottom: "Clip_7_relu6_thre_left_power_out" 288 | top: "Clip_7_relu6_x_mul_thre_out_out" 289 | eltwise_param { 290 | operation: PROD 291 | } 292 | } 293 | layer { 294 | name: "Clip_7_relu6_thre_right_power" 295 | type: "Power" 296 | bottom: "Clip_7_relu6_thre_out" 297 | top: "Clip_7_relu6_thre_right_power_out" 298 | power_param { 299 | power: 1.0 300 | scale: 6.0 301 | shift: 0.0 302 | } 303 | } 304 | layer { 305 | name: "Clip_7_relu6_add" 306 | type: "Eltwise" 307 | bottom: "Clip_7_relu6_x_mul_thre_out_out" 308 | bottom: "Clip_7_relu6_thre_right_power_out" 309 | top: "input.44" 310 | eltwise_param { 311 | operation: SUM 312 | } 313 | } 314 | layer { 315 | name: "Conv_8" 316 | type: "Convolution" 317 | bottom: "input.44" 318 | top: "input.52" 319 | convolution_param { 320 | num_output: 128 321 | bias_term: true 322 | group: 1 323 | pad_h: 0 324 | pad_w: 0 325 | kernel_h: 1 326 | kernel_w: 1 327 | stride_h: 1 328 | stride_w: 1 329 | dilation: 1 330 | } 331 | } 332 | layer { 333 | name: "Clip_9_relu6_relu" 334 | type: "ReLU" 335 | bottom: "input.52" 336 | top: "Clip_9_relu6_relu_out" 337 | } 338 | layer { 339 | name: "Clip_9_relu6_thre" 340 | type: "Threshold" 341 | bottom: "Clip_9_relu6_relu_out" 342 | top: "Clip_9_relu6_thre_out" 343 | threshold_param { 344 | threshold: 6.0 345 | } 346 | } 347 | layer { 348 | name: "Clip_9_relu6_thre_left_power" 349 | type: "Power" 350 | bottom: "Clip_9_relu6_thre_out" 351 | top: "Clip_9_relu6_thre_left_power_out" 352 | power_param { 353 | power: 1.0 354 | scale: -1.0 355 | shift: 1.0 356 | } 357 | } 358 | layer { 359 | name: "Clip_9_relu6_x_mul_thre_out" 360 | type: "Eltwise" 361 | bottom: "Clip_9_relu6_relu_out" 362 | bottom: "Clip_9_relu6_thre_left_power_out" 363 | top: "Clip_9_relu6_x_mul_thre_out_out" 364 | eltwise_param { 365 | operation: PROD 366 | } 367 | } 368 | layer { 369 | name: "Clip_9_relu6_thre_right_power" 370 | type: "Power" 371 | bottom: "Clip_9_relu6_thre_out" 372 | top: "Clip_9_relu6_thre_right_power_out" 373 | power_param { 374 | power: 1.0 375 | scale: 6.0 376 | shift: 0.0 377 | } 378 | } 379 | layer { 380 | name: "Clip_9_relu6_add" 381 | type: "Eltwise" 382 | bottom: "Clip_9_relu6_x_mul_thre_out_out" 383 | bottom: "Clip_9_relu6_thre_right_power_out" 384 | top: "input.56" 385 | eltwise_param { 386 | operation: SUM 387 | } 388 | } 389 | layer { 390 | name: "Conv_10" 391 | type: "Convolution" 392 | bottom: "input.56" 393 | top: "input.64" 394 | convolution_param { 395 | num_output: 128 396 | bias_term: true 397 | group: 128 398 | pad_h: 1 399 | pad_w: 1 400 | kernel_h: 3 401 | kernel_w: 3 402 | stride_h: 1 403 | stride_w: 1 404 | dilation: 1 405 | } 406 | } 407 | layer { 408 | name: "Clip_11_relu6_relu" 409 | type: "ReLU" 410 | bottom: "input.64" 411 | top: "Clip_11_relu6_relu_out" 412 | } 413 | layer { 414 | name: "Clip_11_relu6_thre" 415 | type: "Threshold" 416 | bottom: "Clip_11_relu6_relu_out" 417 | top: "Clip_11_relu6_thre_out" 418 | threshold_param { 419 | threshold: 6.0 420 | } 421 | } 422 | layer { 423 | name: "Clip_11_relu6_thre_left_power" 424 | type: "Power" 425 | bottom: "Clip_11_relu6_thre_out" 426 | top: "Clip_11_relu6_thre_left_power_out" 427 | power_param { 428 | power: 1.0 429 | scale: -1.0 430 | shift: 1.0 431 | } 432 | } 433 | layer { 434 | name: "Clip_11_relu6_x_mul_thre_out" 435 | type: "Eltwise" 436 | bottom: "Clip_11_relu6_relu_out" 437 | bottom: "Clip_11_relu6_thre_left_power_out" 438 | top: "Clip_11_relu6_x_mul_thre_out_out" 439 | eltwise_param { 440 | operation: PROD 441 | } 442 | } 443 | layer { 444 | name: "Clip_11_relu6_thre_right_power" 445 | type: "Power" 446 | bottom: "Clip_11_relu6_thre_out" 447 | top: "Clip_11_relu6_thre_right_power_out" 448 | power_param { 449 | power: 1.0 450 | scale: 6.0 451 | shift: 0.0 452 | } 453 | } 454 | layer { 455 | name: "Clip_11_relu6_add" 456 | type: "Eltwise" 457 | bottom: "Clip_11_relu6_x_mul_thre_out_out" 458 | bottom: "Clip_11_relu6_thre_right_power_out" 459 | top: "input.68" 460 | eltwise_param { 461 | operation: SUM 462 | } 463 | } 464 | layer { 465 | name: "Conv_12" 466 | type: "Convolution" 467 | bottom: "input.68" 468 | top: "input.76" 469 | convolution_param { 470 | num_output: 128 471 | bias_term: true 472 | group: 1 473 | pad_h: 0 474 | pad_w: 0 475 | kernel_h: 1 476 | kernel_w: 1 477 | stride_h: 1 478 | stride_w: 1 479 | dilation: 1 480 | } 481 | } 482 | layer { 483 | name: "Clip_13_relu6_relu" 484 | type: "ReLU" 485 | bottom: "input.76" 486 | top: "Clip_13_relu6_relu_out" 487 | } 488 | layer { 489 | name: "Clip_13_relu6_thre" 490 | type: "Threshold" 491 | bottom: "Clip_13_relu6_relu_out" 492 | top: "Clip_13_relu6_thre_out" 493 | threshold_param { 494 | threshold: 6.0 495 | } 496 | } 497 | layer { 498 | name: "Clip_13_relu6_thre_left_power" 499 | type: "Power" 500 | bottom: "Clip_13_relu6_thre_out" 501 | top: "Clip_13_relu6_thre_left_power_out" 502 | power_param { 503 | power: 1.0 504 | scale: -1.0 505 | shift: 1.0 506 | } 507 | } 508 | layer { 509 | name: "Clip_13_relu6_x_mul_thre_out" 510 | type: "Eltwise" 511 | bottom: "Clip_13_relu6_relu_out" 512 | bottom: "Clip_13_relu6_thre_left_power_out" 513 | top: "Clip_13_relu6_x_mul_thre_out_out" 514 | eltwise_param { 515 | operation: PROD 516 | } 517 | } 518 | layer { 519 | name: "Clip_13_relu6_thre_right_power" 520 | type: "Power" 521 | bottom: "Clip_13_relu6_thre_out" 522 | top: "Clip_13_relu6_thre_right_power_out" 523 | power_param { 524 | power: 1.0 525 | scale: 6.0 526 | shift: 0.0 527 | } 528 | } 529 | layer { 530 | name: "Clip_13_relu6_add" 531 | type: "Eltwise" 532 | bottom: "Clip_13_relu6_x_mul_thre_out_out" 533 | bottom: "Clip_13_relu6_thre_right_power_out" 534 | top: "input.80" 535 | eltwise_param { 536 | operation: SUM 537 | } 538 | } 539 | layer { 540 | name: "Conv_14" 541 | type: "Convolution" 542 | bottom: "input.80" 543 | top: "input.88" 544 | convolution_param { 545 | num_output: 128 546 | bias_term: true 547 | group: 128 548 | pad_h: 1 549 | pad_w: 1 550 | kernel_h: 3 551 | kernel_w: 3 552 | stride_h: 2 553 | stride_w: 2 554 | dilation: 1 555 | } 556 | } 557 | layer { 558 | name: "Clip_15_relu6_relu" 559 | type: "ReLU" 560 | bottom: "input.88" 561 | top: "Clip_15_relu6_relu_out" 562 | } 563 | layer { 564 | name: "Clip_15_relu6_thre" 565 | type: "Threshold" 566 | bottom: "Clip_15_relu6_relu_out" 567 | top: "Clip_15_relu6_thre_out" 568 | threshold_param { 569 | threshold: 6.0 570 | } 571 | } 572 | layer { 573 | name: "Clip_15_relu6_thre_left_power" 574 | type: "Power" 575 | bottom: "Clip_15_relu6_thre_out" 576 | top: "Clip_15_relu6_thre_left_power_out" 577 | power_param { 578 | power: 1.0 579 | scale: -1.0 580 | shift: 1.0 581 | } 582 | } 583 | layer { 584 | name: "Clip_15_relu6_x_mul_thre_out" 585 | type: "Eltwise" 586 | bottom: "Clip_15_relu6_relu_out" 587 | bottom: "Clip_15_relu6_thre_left_power_out" 588 | top: "Clip_15_relu6_x_mul_thre_out_out" 589 | eltwise_param { 590 | operation: PROD 591 | } 592 | } 593 | layer { 594 | name: "Clip_15_relu6_thre_right_power" 595 | type: "Power" 596 | bottom: "Clip_15_relu6_thre_out" 597 | top: "Clip_15_relu6_thre_right_power_out" 598 | power_param { 599 | power: 1.0 600 | scale: 6.0 601 | shift: 0.0 602 | } 603 | } 604 | layer { 605 | name: "Clip_15_relu6_add" 606 | type: "Eltwise" 607 | bottom: "Clip_15_relu6_x_mul_thre_out_out" 608 | bottom: "Clip_15_relu6_thre_right_power_out" 609 | top: "input.92" 610 | eltwise_param { 611 | operation: SUM 612 | } 613 | } 614 | layer { 615 | name: "Conv_16" 616 | type: "Convolution" 617 | bottom: "input.92" 618 | top: "input.100" 619 | convolution_param { 620 | num_output: 256 621 | bias_term: true 622 | group: 1 623 | pad_h: 0 624 | pad_w: 0 625 | kernel_h: 1 626 | kernel_w: 1 627 | stride_h: 1 628 | stride_w: 1 629 | dilation: 1 630 | } 631 | } 632 | layer { 633 | name: "Clip_17_relu6_relu" 634 | type: "ReLU" 635 | bottom: "input.100" 636 | top: "Clip_17_relu6_relu_out" 637 | } 638 | layer { 639 | name: "Clip_17_relu6_thre" 640 | type: "Threshold" 641 | bottom: "Clip_17_relu6_relu_out" 642 | top: "Clip_17_relu6_thre_out" 643 | threshold_param { 644 | threshold: 6.0 645 | } 646 | } 647 | layer { 648 | name: "Clip_17_relu6_thre_left_power" 649 | type: "Power" 650 | bottom: "Clip_17_relu6_thre_out" 651 | top: "Clip_17_relu6_thre_left_power_out" 652 | power_param { 653 | power: 1.0 654 | scale: -1.0 655 | shift: 1.0 656 | } 657 | } 658 | layer { 659 | name: "Clip_17_relu6_x_mul_thre_out" 660 | type: "Eltwise" 661 | bottom: "Clip_17_relu6_relu_out" 662 | bottom: "Clip_17_relu6_thre_left_power_out" 663 | top: "Clip_17_relu6_x_mul_thre_out_out" 664 | eltwise_param { 665 | operation: PROD 666 | } 667 | } 668 | layer { 669 | name: "Clip_17_relu6_thre_right_power" 670 | type: "Power" 671 | bottom: "Clip_17_relu6_thre_out" 672 | top: "Clip_17_relu6_thre_right_power_out" 673 | power_param { 674 | power: 1.0 675 | scale: 6.0 676 | shift: 0.0 677 | } 678 | } 679 | layer { 680 | name: "Clip_17_relu6_add" 681 | type: "Eltwise" 682 | bottom: "Clip_17_relu6_x_mul_thre_out_out" 683 | bottom: "Clip_17_relu6_thre_right_power_out" 684 | top: "input.104" 685 | eltwise_param { 686 | operation: SUM 687 | } 688 | } 689 | layer { 690 | name: "Conv_18" 691 | type: "Convolution" 692 | bottom: "input.104" 693 | top: "input.112" 694 | convolution_param { 695 | num_output: 256 696 | bias_term: true 697 | group: 256 698 | pad_h: 1 699 | pad_w: 1 700 | kernel_h: 3 701 | kernel_w: 3 702 | stride_h: 1 703 | stride_w: 1 704 | dilation: 1 705 | } 706 | } 707 | layer { 708 | name: "Clip_19_relu6_relu" 709 | type: "ReLU" 710 | bottom: "input.112" 711 | top: "Clip_19_relu6_relu_out" 712 | } 713 | layer { 714 | name: "Clip_19_relu6_thre" 715 | type: "Threshold" 716 | bottom: "Clip_19_relu6_relu_out" 717 | top: "Clip_19_relu6_thre_out" 718 | threshold_param { 719 | threshold: 6.0 720 | } 721 | } 722 | layer { 723 | name: "Clip_19_relu6_thre_left_power" 724 | type: "Power" 725 | bottom: "Clip_19_relu6_thre_out" 726 | top: "Clip_19_relu6_thre_left_power_out" 727 | power_param { 728 | power: 1.0 729 | scale: -1.0 730 | shift: 1.0 731 | } 732 | } 733 | layer { 734 | name: "Clip_19_relu6_x_mul_thre_out" 735 | type: "Eltwise" 736 | bottom: "Clip_19_relu6_relu_out" 737 | bottom: "Clip_19_relu6_thre_left_power_out" 738 | top: "Clip_19_relu6_x_mul_thre_out_out" 739 | eltwise_param { 740 | operation: PROD 741 | } 742 | } 743 | layer { 744 | name: "Clip_19_relu6_thre_right_power" 745 | type: "Power" 746 | bottom: "Clip_19_relu6_thre_out" 747 | top: "Clip_19_relu6_thre_right_power_out" 748 | power_param { 749 | power: 1.0 750 | scale: 6.0 751 | shift: 0.0 752 | } 753 | } 754 | layer { 755 | name: "Clip_19_relu6_add" 756 | type: "Eltwise" 757 | bottom: "Clip_19_relu6_x_mul_thre_out_out" 758 | bottom: "Clip_19_relu6_thre_right_power_out" 759 | top: "input.116" 760 | eltwise_param { 761 | operation: SUM 762 | } 763 | } 764 | layer { 765 | name: "Conv_20" 766 | type: "Convolution" 767 | bottom: "input.116" 768 | top: "input.124" 769 | convolution_param { 770 | num_output: 256 771 | bias_term: true 772 | group: 1 773 | pad_h: 0 774 | pad_w: 0 775 | kernel_h: 1 776 | kernel_w: 1 777 | stride_h: 1 778 | stride_w: 1 779 | dilation: 1 780 | } 781 | } 782 | layer { 783 | name: "Clip_21_relu6_relu" 784 | type: "ReLU" 785 | bottom: "input.124" 786 | top: "Clip_21_relu6_relu_out" 787 | } 788 | layer { 789 | name: "Clip_21_relu6_thre" 790 | type: "Threshold" 791 | bottom: "Clip_21_relu6_relu_out" 792 | top: "Clip_21_relu6_thre_out" 793 | threshold_param { 794 | threshold: 6.0 795 | } 796 | } 797 | layer { 798 | name: "Clip_21_relu6_thre_left_power" 799 | type: "Power" 800 | bottom: "Clip_21_relu6_thre_out" 801 | top: "Clip_21_relu6_thre_left_power_out" 802 | power_param { 803 | power: 1.0 804 | scale: -1.0 805 | shift: 1.0 806 | } 807 | } 808 | layer { 809 | name: "Clip_21_relu6_x_mul_thre_out" 810 | type: "Eltwise" 811 | bottom: "Clip_21_relu6_relu_out" 812 | bottom: "Clip_21_relu6_thre_left_power_out" 813 | top: "Clip_21_relu6_x_mul_thre_out_out" 814 | eltwise_param { 815 | operation: PROD 816 | } 817 | } 818 | layer { 819 | name: "Clip_21_relu6_thre_right_power" 820 | type: "Power" 821 | bottom: "Clip_21_relu6_thre_out" 822 | top: "Clip_21_relu6_thre_right_power_out" 823 | power_param { 824 | power: 1.0 825 | scale: 6.0 826 | shift: 0.0 827 | } 828 | } 829 | layer { 830 | name: "Clip_21_relu6_add" 831 | type: "Eltwise" 832 | bottom: "Clip_21_relu6_x_mul_thre_out_out" 833 | bottom: "Clip_21_relu6_thre_right_power_out" 834 | top: "input.128" 835 | eltwise_param { 836 | operation: SUM 837 | } 838 | } 839 | layer { 840 | name: "Conv_22" 841 | type: "Convolution" 842 | bottom: "input.128" 843 | top: "input.136" 844 | convolution_param { 845 | num_output: 256 846 | bias_term: true 847 | group: 256 848 | pad_h: 1 849 | pad_w: 1 850 | kernel_h: 3 851 | kernel_w: 3 852 | stride_h: 2 853 | stride_w: 2 854 | dilation: 1 855 | } 856 | } 857 | layer { 858 | name: "Clip_23_relu6_relu" 859 | type: "ReLU" 860 | bottom: "input.136" 861 | top: "Clip_23_relu6_relu_out" 862 | } 863 | layer { 864 | name: "Clip_23_relu6_thre" 865 | type: "Threshold" 866 | bottom: "Clip_23_relu6_relu_out" 867 | top: "Clip_23_relu6_thre_out" 868 | threshold_param { 869 | threshold: 6.0 870 | } 871 | } 872 | layer { 873 | name: "Clip_23_relu6_thre_left_power" 874 | type: "Power" 875 | bottom: "Clip_23_relu6_thre_out" 876 | top: "Clip_23_relu6_thre_left_power_out" 877 | power_param { 878 | power: 1.0 879 | scale: -1.0 880 | shift: 1.0 881 | } 882 | } 883 | layer { 884 | name: "Clip_23_relu6_x_mul_thre_out" 885 | type: "Eltwise" 886 | bottom: "Clip_23_relu6_relu_out" 887 | bottom: "Clip_23_relu6_thre_left_power_out" 888 | top: "Clip_23_relu6_x_mul_thre_out_out" 889 | eltwise_param { 890 | operation: PROD 891 | } 892 | } 893 | layer { 894 | name: "Clip_23_relu6_thre_right_power" 895 | type: "Power" 896 | bottom: "Clip_23_relu6_thre_out" 897 | top: "Clip_23_relu6_thre_right_power_out" 898 | power_param { 899 | power: 1.0 900 | scale: 6.0 901 | shift: 0.0 902 | } 903 | } 904 | layer { 905 | name: "Clip_23_relu6_add" 906 | type: "Eltwise" 907 | bottom: "Clip_23_relu6_x_mul_thre_out_out" 908 | bottom: "Clip_23_relu6_thre_right_power_out" 909 | top: "input.140" 910 | eltwise_param { 911 | operation: SUM 912 | } 913 | } 914 | layer { 915 | name: "Conv_24" 916 | type: "Convolution" 917 | bottom: "input.140" 918 | top: "input.148" 919 | convolution_param { 920 | num_output: 512 921 | bias_term: true 922 | group: 1 923 | pad_h: 0 924 | pad_w: 0 925 | kernel_h: 1 926 | kernel_w: 1 927 | stride_h: 1 928 | stride_w: 1 929 | dilation: 1 930 | } 931 | } 932 | layer { 933 | name: "Clip_25_relu6_relu" 934 | type: "ReLU" 935 | bottom: "input.148" 936 | top: "Clip_25_relu6_relu_out" 937 | } 938 | layer { 939 | name: "Clip_25_relu6_thre" 940 | type: "Threshold" 941 | bottom: "Clip_25_relu6_relu_out" 942 | top: "Clip_25_relu6_thre_out" 943 | threshold_param { 944 | threshold: 6.0 945 | } 946 | } 947 | layer { 948 | name: "Clip_25_relu6_thre_left_power" 949 | type: "Power" 950 | bottom: "Clip_25_relu6_thre_out" 951 | top: "Clip_25_relu6_thre_left_power_out" 952 | power_param { 953 | power: 1.0 954 | scale: -1.0 955 | shift: 1.0 956 | } 957 | } 958 | layer { 959 | name: "Clip_25_relu6_x_mul_thre_out" 960 | type: "Eltwise" 961 | bottom: "Clip_25_relu6_relu_out" 962 | bottom: "Clip_25_relu6_thre_left_power_out" 963 | top: "Clip_25_relu6_x_mul_thre_out_out" 964 | eltwise_param { 965 | operation: PROD 966 | } 967 | } 968 | layer { 969 | name: "Clip_25_relu6_thre_right_power" 970 | type: "Power" 971 | bottom: "Clip_25_relu6_thre_out" 972 | top: "Clip_25_relu6_thre_right_power_out" 973 | power_param { 974 | power: 1.0 975 | scale: 6.0 976 | shift: 0.0 977 | } 978 | } 979 | layer { 980 | name: "Clip_25_relu6_add" 981 | type: "Eltwise" 982 | bottom: "Clip_25_relu6_x_mul_thre_out_out" 983 | bottom: "Clip_25_relu6_thre_right_power_out" 984 | top: "input.152" 985 | eltwise_param { 986 | operation: SUM 987 | } 988 | } 989 | layer { 990 | name: "Conv_26" 991 | type: "Convolution" 992 | bottom: "input.152" 993 | top: "input.160" 994 | convolution_param { 995 | num_output: 512 996 | bias_term: true 997 | group: 512 998 | pad_h: 1 999 | pad_w: 1 1000 | kernel_h: 3 1001 | kernel_w: 3 1002 | stride_h: 1 1003 | stride_w: 1 1004 | dilation: 1 1005 | } 1006 | } 1007 | layer { 1008 | name: "Clip_27_relu6_relu" 1009 | type: "ReLU" 1010 | bottom: "input.160" 1011 | top: "Clip_27_relu6_relu_out" 1012 | } 1013 | layer { 1014 | name: "Clip_27_relu6_thre" 1015 | type: "Threshold" 1016 | bottom: "Clip_27_relu6_relu_out" 1017 | top: "Clip_27_relu6_thre_out" 1018 | threshold_param { 1019 | threshold: 6.0 1020 | } 1021 | } 1022 | layer { 1023 | name: "Clip_27_relu6_thre_left_power" 1024 | type: "Power" 1025 | bottom: "Clip_27_relu6_thre_out" 1026 | top: "Clip_27_relu6_thre_left_power_out" 1027 | power_param { 1028 | power: 1.0 1029 | scale: -1.0 1030 | shift: 1.0 1031 | } 1032 | } 1033 | layer { 1034 | name: "Clip_27_relu6_x_mul_thre_out" 1035 | type: "Eltwise" 1036 | bottom: "Clip_27_relu6_relu_out" 1037 | bottom: "Clip_27_relu6_thre_left_power_out" 1038 | top: "Clip_27_relu6_x_mul_thre_out_out" 1039 | eltwise_param { 1040 | operation: PROD 1041 | } 1042 | } 1043 | layer { 1044 | name: "Clip_27_relu6_thre_right_power" 1045 | type: "Power" 1046 | bottom: "Clip_27_relu6_thre_out" 1047 | top: "Clip_27_relu6_thre_right_power_out" 1048 | power_param { 1049 | power: 1.0 1050 | scale: 6.0 1051 | shift: 0.0 1052 | } 1053 | } 1054 | layer { 1055 | name: "Clip_27_relu6_add" 1056 | type: "Eltwise" 1057 | bottom: "Clip_27_relu6_x_mul_thre_out_out" 1058 | bottom: "Clip_27_relu6_thre_right_power_out" 1059 | top: "input.164" 1060 | eltwise_param { 1061 | operation: SUM 1062 | } 1063 | } 1064 | layer { 1065 | name: "Conv_28" 1066 | type: "Convolution" 1067 | bottom: "input.164" 1068 | top: "input.172" 1069 | convolution_param { 1070 | num_output: 512 1071 | bias_term: true 1072 | group: 1 1073 | pad_h: 0 1074 | pad_w: 0 1075 | kernel_h: 1 1076 | kernel_w: 1 1077 | stride_h: 1 1078 | stride_w: 1 1079 | dilation: 1 1080 | } 1081 | } 1082 | layer { 1083 | name: "Clip_29_relu6_relu" 1084 | type: "ReLU" 1085 | bottom: "input.172" 1086 | top: "Clip_29_relu6_relu_out" 1087 | } 1088 | layer { 1089 | name: "Clip_29_relu6_thre" 1090 | type: "Threshold" 1091 | bottom: "Clip_29_relu6_relu_out" 1092 | top: "Clip_29_relu6_thre_out" 1093 | threshold_param { 1094 | threshold: 6.0 1095 | } 1096 | } 1097 | layer { 1098 | name: "Clip_29_relu6_thre_left_power" 1099 | type: "Power" 1100 | bottom: "Clip_29_relu6_thre_out" 1101 | top: "Clip_29_relu6_thre_left_power_out" 1102 | power_param { 1103 | power: 1.0 1104 | scale: -1.0 1105 | shift: 1.0 1106 | } 1107 | } 1108 | layer { 1109 | name: "Clip_29_relu6_x_mul_thre_out" 1110 | type: "Eltwise" 1111 | bottom: "Clip_29_relu6_relu_out" 1112 | bottom: "Clip_29_relu6_thre_left_power_out" 1113 | top: "Clip_29_relu6_x_mul_thre_out_out" 1114 | eltwise_param { 1115 | operation: PROD 1116 | } 1117 | } 1118 | layer { 1119 | name: "Clip_29_relu6_thre_right_power" 1120 | type: "Power" 1121 | bottom: "Clip_29_relu6_thre_out" 1122 | top: "Clip_29_relu6_thre_right_power_out" 1123 | power_param { 1124 | power: 1.0 1125 | scale: 6.0 1126 | shift: 0.0 1127 | } 1128 | } 1129 | layer { 1130 | name: "Clip_29_relu6_add" 1131 | type: "Eltwise" 1132 | bottom: "Clip_29_relu6_x_mul_thre_out_out" 1133 | bottom: "Clip_29_relu6_thre_right_power_out" 1134 | top: "input.176" 1135 | eltwise_param { 1136 | operation: SUM 1137 | } 1138 | } 1139 | layer { 1140 | name: "Conv_30" 1141 | type: "Convolution" 1142 | bottom: "input.176" 1143 | top: "input.184" 1144 | convolution_param { 1145 | num_output: 512 1146 | bias_term: true 1147 | group: 512 1148 | pad_h: 1 1149 | pad_w: 1 1150 | kernel_h: 3 1151 | kernel_w: 3 1152 | stride_h: 1 1153 | stride_w: 1 1154 | dilation: 1 1155 | } 1156 | } 1157 | layer { 1158 | name: "Clip_31_relu6_relu" 1159 | type: "ReLU" 1160 | bottom: "input.184" 1161 | top: "Clip_31_relu6_relu_out" 1162 | } 1163 | layer { 1164 | name: "Clip_31_relu6_thre" 1165 | type: "Threshold" 1166 | bottom: "Clip_31_relu6_relu_out" 1167 | top: "Clip_31_relu6_thre_out" 1168 | threshold_param { 1169 | threshold: 6.0 1170 | } 1171 | } 1172 | layer { 1173 | name: "Clip_31_relu6_thre_left_power" 1174 | type: "Power" 1175 | bottom: "Clip_31_relu6_thre_out" 1176 | top: "Clip_31_relu6_thre_left_power_out" 1177 | power_param { 1178 | power: 1.0 1179 | scale: -1.0 1180 | shift: 1.0 1181 | } 1182 | } 1183 | layer { 1184 | name: "Clip_31_relu6_x_mul_thre_out" 1185 | type: "Eltwise" 1186 | bottom: "Clip_31_relu6_relu_out" 1187 | bottom: "Clip_31_relu6_thre_left_power_out" 1188 | top: "Clip_31_relu6_x_mul_thre_out_out" 1189 | eltwise_param { 1190 | operation: PROD 1191 | } 1192 | } 1193 | layer { 1194 | name: "Clip_31_relu6_thre_right_power" 1195 | type: "Power" 1196 | bottom: "Clip_31_relu6_thre_out" 1197 | top: "Clip_31_relu6_thre_right_power_out" 1198 | power_param { 1199 | power: 1.0 1200 | scale: 6.0 1201 | shift: 0.0 1202 | } 1203 | } 1204 | layer { 1205 | name: "Clip_31_relu6_add" 1206 | type: "Eltwise" 1207 | bottom: "Clip_31_relu6_x_mul_thre_out_out" 1208 | bottom: "Clip_31_relu6_thre_right_power_out" 1209 | top: "input.188" 1210 | eltwise_param { 1211 | operation: SUM 1212 | } 1213 | } 1214 | layer { 1215 | name: "Conv_32" 1216 | type: "Convolution" 1217 | bottom: "input.188" 1218 | top: "input.196" 1219 | convolution_param { 1220 | num_output: 512 1221 | bias_term: true 1222 | group: 1 1223 | pad_h: 0 1224 | pad_w: 0 1225 | kernel_h: 1 1226 | kernel_w: 1 1227 | stride_h: 1 1228 | stride_w: 1 1229 | dilation: 1 1230 | } 1231 | } 1232 | layer { 1233 | name: "Clip_33_relu6_relu" 1234 | type: "ReLU" 1235 | bottom: "input.196" 1236 | top: "Clip_33_relu6_relu_out" 1237 | } 1238 | layer { 1239 | name: "Clip_33_relu6_thre" 1240 | type: "Threshold" 1241 | bottom: "Clip_33_relu6_relu_out" 1242 | top: "Clip_33_relu6_thre_out" 1243 | threshold_param { 1244 | threshold: 6.0 1245 | } 1246 | } 1247 | layer { 1248 | name: "Clip_33_relu6_thre_left_power" 1249 | type: "Power" 1250 | bottom: "Clip_33_relu6_thre_out" 1251 | top: "Clip_33_relu6_thre_left_power_out" 1252 | power_param { 1253 | power: 1.0 1254 | scale: -1.0 1255 | shift: 1.0 1256 | } 1257 | } 1258 | layer { 1259 | name: "Clip_33_relu6_x_mul_thre_out" 1260 | type: "Eltwise" 1261 | bottom: "Clip_33_relu6_relu_out" 1262 | bottom: "Clip_33_relu6_thre_left_power_out" 1263 | top: "Clip_33_relu6_x_mul_thre_out_out" 1264 | eltwise_param { 1265 | operation: PROD 1266 | } 1267 | } 1268 | layer { 1269 | name: "Clip_33_relu6_thre_right_power" 1270 | type: "Power" 1271 | bottom: "Clip_33_relu6_thre_out" 1272 | top: "Clip_33_relu6_thre_right_power_out" 1273 | power_param { 1274 | power: 1.0 1275 | scale: 6.0 1276 | shift: 0.0 1277 | } 1278 | } 1279 | layer { 1280 | name: "Clip_33_relu6_add" 1281 | type: "Eltwise" 1282 | bottom: "Clip_33_relu6_x_mul_thre_out_out" 1283 | bottom: "Clip_33_relu6_thre_right_power_out" 1284 | top: "input.200" 1285 | eltwise_param { 1286 | operation: SUM 1287 | } 1288 | } 1289 | layer { 1290 | name: "Conv_34" 1291 | type: "Convolution" 1292 | bottom: "input.200" 1293 | top: "input.208" 1294 | convolution_param { 1295 | num_output: 512 1296 | bias_term: true 1297 | group: 512 1298 | pad_h: 1 1299 | pad_w: 1 1300 | kernel_h: 3 1301 | kernel_w: 3 1302 | stride_h: 1 1303 | stride_w: 1 1304 | dilation: 1 1305 | } 1306 | } 1307 | layer { 1308 | name: "Clip_35_relu6_relu" 1309 | type: "ReLU" 1310 | bottom: "input.208" 1311 | top: "Clip_35_relu6_relu_out" 1312 | } 1313 | layer { 1314 | name: "Clip_35_relu6_thre" 1315 | type: "Threshold" 1316 | bottom: "Clip_35_relu6_relu_out" 1317 | top: "Clip_35_relu6_thre_out" 1318 | threshold_param { 1319 | threshold: 6.0 1320 | } 1321 | } 1322 | layer { 1323 | name: "Clip_35_relu6_thre_left_power" 1324 | type: "Power" 1325 | bottom: "Clip_35_relu6_thre_out" 1326 | top: "Clip_35_relu6_thre_left_power_out" 1327 | power_param { 1328 | power: 1.0 1329 | scale: -1.0 1330 | shift: 1.0 1331 | } 1332 | } 1333 | layer { 1334 | name: "Clip_35_relu6_x_mul_thre_out" 1335 | type: "Eltwise" 1336 | bottom: "Clip_35_relu6_relu_out" 1337 | bottom: "Clip_35_relu6_thre_left_power_out" 1338 | top: "Clip_35_relu6_x_mul_thre_out_out" 1339 | eltwise_param { 1340 | operation: PROD 1341 | } 1342 | } 1343 | layer { 1344 | name: "Clip_35_relu6_thre_right_power" 1345 | type: "Power" 1346 | bottom: "Clip_35_relu6_thre_out" 1347 | top: "Clip_35_relu6_thre_right_power_out" 1348 | power_param { 1349 | power: 1.0 1350 | scale: 6.0 1351 | shift: 0.0 1352 | } 1353 | } 1354 | layer { 1355 | name: "Clip_35_relu6_add" 1356 | type: "Eltwise" 1357 | bottom: "Clip_35_relu6_x_mul_thre_out_out" 1358 | bottom: "Clip_35_relu6_thre_right_power_out" 1359 | top: "input.212" 1360 | eltwise_param { 1361 | operation: SUM 1362 | } 1363 | } 1364 | layer { 1365 | name: "Conv_36" 1366 | type: "Convolution" 1367 | bottom: "input.212" 1368 | top: "input.220" 1369 | convolution_param { 1370 | num_output: 512 1371 | bias_term: true 1372 | group: 1 1373 | pad_h: 0 1374 | pad_w: 0 1375 | kernel_h: 1 1376 | kernel_w: 1 1377 | stride_h: 1 1378 | stride_w: 1 1379 | dilation: 1 1380 | } 1381 | } 1382 | layer { 1383 | name: "Clip_37_relu6_relu" 1384 | type: "ReLU" 1385 | bottom: "input.220" 1386 | top: "Clip_37_relu6_relu_out" 1387 | } 1388 | layer { 1389 | name: "Clip_37_relu6_thre" 1390 | type: "Threshold" 1391 | bottom: "Clip_37_relu6_relu_out" 1392 | top: "Clip_37_relu6_thre_out" 1393 | threshold_param { 1394 | threshold: 6.0 1395 | } 1396 | } 1397 | layer { 1398 | name: "Clip_37_relu6_thre_left_power" 1399 | type: "Power" 1400 | bottom: "Clip_37_relu6_thre_out" 1401 | top: "Clip_37_relu6_thre_left_power_out" 1402 | power_param { 1403 | power: 1.0 1404 | scale: -1.0 1405 | shift: 1.0 1406 | } 1407 | } 1408 | layer { 1409 | name: "Clip_37_relu6_x_mul_thre_out" 1410 | type: "Eltwise" 1411 | bottom: "Clip_37_relu6_relu_out" 1412 | bottom: "Clip_37_relu6_thre_left_power_out" 1413 | top: "Clip_37_relu6_x_mul_thre_out_out" 1414 | eltwise_param { 1415 | operation: PROD 1416 | } 1417 | } 1418 | layer { 1419 | name: "Clip_37_relu6_thre_right_power" 1420 | type: "Power" 1421 | bottom: "Clip_37_relu6_thre_out" 1422 | top: "Clip_37_relu6_thre_right_power_out" 1423 | power_param { 1424 | power: 1.0 1425 | scale: 6.0 1426 | shift: 0.0 1427 | } 1428 | } 1429 | layer { 1430 | name: "Clip_37_relu6_add" 1431 | type: "Eltwise" 1432 | bottom: "Clip_37_relu6_x_mul_thre_out_out" 1433 | bottom: "Clip_37_relu6_thre_right_power_out" 1434 | top: "input.224" 1435 | eltwise_param { 1436 | operation: SUM 1437 | } 1438 | } 1439 | layer { 1440 | name: "Conv_38" 1441 | type: "Convolution" 1442 | bottom: "input.224" 1443 | top: "input.232" 1444 | convolution_param { 1445 | num_output: 512 1446 | bias_term: true 1447 | group: 512 1448 | pad_h: 1 1449 | pad_w: 1 1450 | kernel_h: 3 1451 | kernel_w: 3 1452 | stride_h: 1 1453 | stride_w: 1 1454 | dilation: 1 1455 | } 1456 | } 1457 | layer { 1458 | name: "Clip_39_relu6_relu" 1459 | type: "ReLU" 1460 | bottom: "input.232" 1461 | top: "Clip_39_relu6_relu_out" 1462 | } 1463 | layer { 1464 | name: "Clip_39_relu6_thre" 1465 | type: "Threshold" 1466 | bottom: "Clip_39_relu6_relu_out" 1467 | top: "Clip_39_relu6_thre_out" 1468 | threshold_param { 1469 | threshold: 6.0 1470 | } 1471 | } 1472 | layer { 1473 | name: "Clip_39_relu6_thre_left_power" 1474 | type: "Power" 1475 | bottom: "Clip_39_relu6_thre_out" 1476 | top: "Clip_39_relu6_thre_left_power_out" 1477 | power_param { 1478 | power: 1.0 1479 | scale: -1.0 1480 | shift: 1.0 1481 | } 1482 | } 1483 | layer { 1484 | name: "Clip_39_relu6_x_mul_thre_out" 1485 | type: "Eltwise" 1486 | bottom: "Clip_39_relu6_relu_out" 1487 | bottom: "Clip_39_relu6_thre_left_power_out" 1488 | top: "Clip_39_relu6_x_mul_thre_out_out" 1489 | eltwise_param { 1490 | operation: PROD 1491 | } 1492 | } 1493 | layer { 1494 | name: "Clip_39_relu6_thre_right_power" 1495 | type: "Power" 1496 | bottom: "Clip_39_relu6_thre_out" 1497 | top: "Clip_39_relu6_thre_right_power_out" 1498 | power_param { 1499 | power: 1.0 1500 | scale: 6.0 1501 | shift: 0.0 1502 | } 1503 | } 1504 | layer { 1505 | name: "Clip_39_relu6_add" 1506 | type: "Eltwise" 1507 | bottom: "Clip_39_relu6_x_mul_thre_out_out" 1508 | bottom: "Clip_39_relu6_thre_right_power_out" 1509 | top: "input.236" 1510 | eltwise_param { 1511 | operation: SUM 1512 | } 1513 | } 1514 | layer { 1515 | name: "Conv_40" 1516 | type: "Convolution" 1517 | bottom: "input.236" 1518 | top: "input.244" 1519 | convolution_param { 1520 | num_output: 512 1521 | bias_term: true 1522 | group: 1 1523 | pad_h: 0 1524 | pad_w: 0 1525 | kernel_h: 1 1526 | kernel_w: 1 1527 | stride_h: 1 1528 | stride_w: 1 1529 | dilation: 1 1530 | } 1531 | } 1532 | layer { 1533 | name: "Clip_41_relu6_relu" 1534 | type: "ReLU" 1535 | bottom: "input.244" 1536 | top: "Clip_41_relu6_relu_out" 1537 | } 1538 | layer { 1539 | name: "Clip_41_relu6_thre" 1540 | type: "Threshold" 1541 | bottom: "Clip_41_relu6_relu_out" 1542 | top: "Clip_41_relu6_thre_out" 1543 | threshold_param { 1544 | threshold: 6.0 1545 | } 1546 | } 1547 | layer { 1548 | name: "Clip_41_relu6_thre_left_power" 1549 | type: "Power" 1550 | bottom: "Clip_41_relu6_thre_out" 1551 | top: "Clip_41_relu6_thre_left_power_out" 1552 | power_param { 1553 | power: 1.0 1554 | scale: -1.0 1555 | shift: 1.0 1556 | } 1557 | } 1558 | layer { 1559 | name: "Clip_41_relu6_x_mul_thre_out" 1560 | type: "Eltwise" 1561 | bottom: "Clip_41_relu6_relu_out" 1562 | bottom: "Clip_41_relu6_thre_left_power_out" 1563 | top: "Clip_41_relu6_x_mul_thre_out_out" 1564 | eltwise_param { 1565 | operation: PROD 1566 | } 1567 | } 1568 | layer { 1569 | name: "Clip_41_relu6_thre_right_power" 1570 | type: "Power" 1571 | bottom: "Clip_41_relu6_thre_out" 1572 | top: "Clip_41_relu6_thre_right_power_out" 1573 | power_param { 1574 | power: 1.0 1575 | scale: 6.0 1576 | shift: 0.0 1577 | } 1578 | } 1579 | layer { 1580 | name: "Clip_41_relu6_add" 1581 | type: "Eltwise" 1582 | bottom: "Clip_41_relu6_x_mul_thre_out_out" 1583 | bottom: "Clip_41_relu6_thre_right_power_out" 1584 | top: "input.248" 1585 | eltwise_param { 1586 | operation: SUM 1587 | } 1588 | } 1589 | layer { 1590 | name: "Conv_42" 1591 | type: "Convolution" 1592 | bottom: "input.248" 1593 | top: "input.256" 1594 | convolution_param { 1595 | num_output: 512 1596 | bias_term: true 1597 | group: 512 1598 | pad_h: 1 1599 | pad_w: 1 1600 | kernel_h: 3 1601 | kernel_w: 3 1602 | stride_h: 1 1603 | stride_w: 1 1604 | dilation: 1 1605 | } 1606 | } 1607 | layer { 1608 | name: "Clip_43_relu6_relu" 1609 | type: "ReLU" 1610 | bottom: "input.256" 1611 | top: "Clip_43_relu6_relu_out" 1612 | } 1613 | layer { 1614 | name: "Clip_43_relu6_thre" 1615 | type: "Threshold" 1616 | bottom: "Clip_43_relu6_relu_out" 1617 | top: "Clip_43_relu6_thre_out" 1618 | threshold_param { 1619 | threshold: 6.0 1620 | } 1621 | } 1622 | layer { 1623 | name: "Clip_43_relu6_thre_left_power" 1624 | type: "Power" 1625 | bottom: "Clip_43_relu6_thre_out" 1626 | top: "Clip_43_relu6_thre_left_power_out" 1627 | power_param { 1628 | power: 1.0 1629 | scale: -1.0 1630 | shift: 1.0 1631 | } 1632 | } 1633 | layer { 1634 | name: "Clip_43_relu6_x_mul_thre_out" 1635 | type: "Eltwise" 1636 | bottom: "Clip_43_relu6_relu_out" 1637 | bottom: "Clip_43_relu6_thre_left_power_out" 1638 | top: "Clip_43_relu6_x_mul_thre_out_out" 1639 | eltwise_param { 1640 | operation: PROD 1641 | } 1642 | } 1643 | layer { 1644 | name: "Clip_43_relu6_thre_right_power" 1645 | type: "Power" 1646 | bottom: "Clip_43_relu6_thre_out" 1647 | top: "Clip_43_relu6_thre_right_power_out" 1648 | power_param { 1649 | power: 1.0 1650 | scale: 6.0 1651 | shift: 0.0 1652 | } 1653 | } 1654 | layer { 1655 | name: "Clip_43_relu6_add" 1656 | type: "Eltwise" 1657 | bottom: "Clip_43_relu6_x_mul_thre_out_out" 1658 | bottom: "Clip_43_relu6_thre_right_power_out" 1659 | top: "input.260" 1660 | eltwise_param { 1661 | operation: SUM 1662 | } 1663 | } 1664 | layer { 1665 | name: "Conv_44" 1666 | type: "Convolution" 1667 | bottom: "input.260" 1668 | top: "input.268" 1669 | convolution_param { 1670 | num_output: 512 1671 | bias_term: true 1672 | group: 1 1673 | pad_h: 0 1674 | pad_w: 0 1675 | kernel_h: 1 1676 | kernel_w: 1 1677 | stride_h: 1 1678 | stride_w: 1 1679 | dilation: 1 1680 | } 1681 | } 1682 | layer { 1683 | name: "Clip_45_relu6_relu" 1684 | type: "ReLU" 1685 | bottom: "input.268" 1686 | top: "Clip_45_relu6_relu_out" 1687 | } 1688 | layer { 1689 | name: "Clip_45_relu6_thre" 1690 | type: "Threshold" 1691 | bottom: "Clip_45_relu6_relu_out" 1692 | top: "Clip_45_relu6_thre_out" 1693 | threshold_param { 1694 | threshold: 6.0 1695 | } 1696 | } 1697 | layer { 1698 | name: "Clip_45_relu6_thre_left_power" 1699 | type: "Power" 1700 | bottom: "Clip_45_relu6_thre_out" 1701 | top: "Clip_45_relu6_thre_left_power_out" 1702 | power_param { 1703 | power: 1.0 1704 | scale: -1.0 1705 | shift: 1.0 1706 | } 1707 | } 1708 | layer { 1709 | name: "Clip_45_relu6_x_mul_thre_out" 1710 | type: "Eltwise" 1711 | bottom: "Clip_45_relu6_relu_out" 1712 | bottom: "Clip_45_relu6_thre_left_power_out" 1713 | top: "Clip_45_relu6_x_mul_thre_out_out" 1714 | eltwise_param { 1715 | operation: PROD 1716 | } 1717 | } 1718 | layer { 1719 | name: "Clip_45_relu6_thre_right_power" 1720 | type: "Power" 1721 | bottom: "Clip_45_relu6_thre_out" 1722 | top: "Clip_45_relu6_thre_right_power_out" 1723 | power_param { 1724 | power: 1.0 1725 | scale: 6.0 1726 | shift: 0.0 1727 | } 1728 | } 1729 | layer { 1730 | name: "Clip_45_relu6_add" 1731 | type: "Eltwise" 1732 | bottom: "Clip_45_relu6_x_mul_thre_out_out" 1733 | bottom: "Clip_45_relu6_thre_right_power_out" 1734 | top: "input.272" 1735 | eltwise_param { 1736 | operation: SUM 1737 | } 1738 | } 1739 | layer { 1740 | name: "Conv_46" 1741 | type: "Convolution" 1742 | bottom: "input.272" 1743 | top: "input.280" 1744 | convolution_param { 1745 | num_output: 512 1746 | bias_term: true 1747 | group: 512 1748 | pad_h: 1 1749 | pad_w: 1 1750 | kernel_h: 3 1751 | kernel_w: 3 1752 | stride_h: 2 1753 | stride_w: 2 1754 | dilation: 1 1755 | } 1756 | } 1757 | layer { 1758 | name: "Clip_47_relu6_relu" 1759 | type: "ReLU" 1760 | bottom: "input.280" 1761 | top: "Clip_47_relu6_relu_out" 1762 | } 1763 | layer { 1764 | name: "Clip_47_relu6_thre" 1765 | type: "Threshold" 1766 | bottom: "Clip_47_relu6_relu_out" 1767 | top: "Clip_47_relu6_thre_out" 1768 | threshold_param { 1769 | threshold: 6.0 1770 | } 1771 | } 1772 | layer { 1773 | name: "Clip_47_relu6_thre_left_power" 1774 | type: "Power" 1775 | bottom: "Clip_47_relu6_thre_out" 1776 | top: "Clip_47_relu6_thre_left_power_out" 1777 | power_param { 1778 | power: 1.0 1779 | scale: -1.0 1780 | shift: 1.0 1781 | } 1782 | } 1783 | layer { 1784 | name: "Clip_47_relu6_x_mul_thre_out" 1785 | type: "Eltwise" 1786 | bottom: "Clip_47_relu6_relu_out" 1787 | bottom: "Clip_47_relu6_thre_left_power_out" 1788 | top: "Clip_47_relu6_x_mul_thre_out_out" 1789 | eltwise_param { 1790 | operation: PROD 1791 | } 1792 | } 1793 | layer { 1794 | name: "Clip_47_relu6_thre_right_power" 1795 | type: "Power" 1796 | bottom: "Clip_47_relu6_thre_out" 1797 | top: "Clip_47_relu6_thre_right_power_out" 1798 | power_param { 1799 | power: 1.0 1800 | scale: 6.0 1801 | shift: 0.0 1802 | } 1803 | } 1804 | layer { 1805 | name: "Clip_47_relu6_add" 1806 | type: "Eltwise" 1807 | bottom: "Clip_47_relu6_x_mul_thre_out_out" 1808 | bottom: "Clip_47_relu6_thre_right_power_out" 1809 | top: "input.284" 1810 | eltwise_param { 1811 | operation: SUM 1812 | } 1813 | } 1814 | layer { 1815 | name: "Conv_48" 1816 | type: "Convolution" 1817 | bottom: "input.284" 1818 | top: "input.292" 1819 | convolution_param { 1820 | num_output: 1024 1821 | bias_term: true 1822 | group: 1 1823 | pad_h: 0 1824 | pad_w: 0 1825 | kernel_h: 1 1826 | kernel_w: 1 1827 | stride_h: 1 1828 | stride_w: 1 1829 | dilation: 1 1830 | } 1831 | } 1832 | layer { 1833 | name: "Clip_49_relu6_relu" 1834 | type: "ReLU" 1835 | bottom: "input.292" 1836 | top: "Clip_49_relu6_relu_out" 1837 | } 1838 | layer { 1839 | name: "Clip_49_relu6_thre" 1840 | type: "Threshold" 1841 | bottom: "Clip_49_relu6_relu_out" 1842 | top: "Clip_49_relu6_thre_out" 1843 | threshold_param { 1844 | threshold: 6.0 1845 | } 1846 | } 1847 | layer { 1848 | name: "Clip_49_relu6_thre_left_power" 1849 | type: "Power" 1850 | bottom: "Clip_49_relu6_thre_out" 1851 | top: "Clip_49_relu6_thre_left_power_out" 1852 | power_param { 1853 | power: 1.0 1854 | scale: -1.0 1855 | shift: 1.0 1856 | } 1857 | } 1858 | layer { 1859 | name: "Clip_49_relu6_x_mul_thre_out" 1860 | type: "Eltwise" 1861 | bottom: "Clip_49_relu6_relu_out" 1862 | bottom: "Clip_49_relu6_thre_left_power_out" 1863 | top: "Clip_49_relu6_x_mul_thre_out_out" 1864 | eltwise_param { 1865 | operation: PROD 1866 | } 1867 | } 1868 | layer { 1869 | name: "Clip_49_relu6_thre_right_power" 1870 | type: "Power" 1871 | bottom: "Clip_49_relu6_thre_out" 1872 | top: "Clip_49_relu6_thre_right_power_out" 1873 | power_param { 1874 | power: 1.0 1875 | scale: 6.0 1876 | shift: 0.0 1877 | } 1878 | } 1879 | layer { 1880 | name: "Clip_49_relu6_add" 1881 | type: "Eltwise" 1882 | bottom: "Clip_49_relu6_x_mul_thre_out_out" 1883 | bottom: "Clip_49_relu6_thre_right_power_out" 1884 | top: "input.296" 1885 | eltwise_param { 1886 | operation: SUM 1887 | } 1888 | } 1889 | layer { 1890 | name: "Conv_50" 1891 | type: "Convolution" 1892 | bottom: "input.296" 1893 | top: "input.304" 1894 | convolution_param { 1895 | num_output: 1024 1896 | bias_term: true 1897 | group: 1024 1898 | pad_h: 1 1899 | pad_w: 1 1900 | kernel_h: 3 1901 | kernel_w: 3 1902 | stride_h: 1 1903 | stride_w: 1 1904 | dilation: 1 1905 | } 1906 | } 1907 | layer { 1908 | name: "Clip_51_relu6_relu" 1909 | type: "ReLU" 1910 | bottom: "input.304" 1911 | top: "Clip_51_relu6_relu_out" 1912 | } 1913 | layer { 1914 | name: "Clip_51_relu6_thre" 1915 | type: "Threshold" 1916 | bottom: "Clip_51_relu6_relu_out" 1917 | top: "Clip_51_relu6_thre_out" 1918 | threshold_param { 1919 | threshold: 6.0 1920 | } 1921 | } 1922 | layer { 1923 | name: "Clip_51_relu6_thre_left_power" 1924 | type: "Power" 1925 | bottom: "Clip_51_relu6_thre_out" 1926 | top: "Clip_51_relu6_thre_left_power_out" 1927 | power_param { 1928 | power: 1.0 1929 | scale: -1.0 1930 | shift: 1.0 1931 | } 1932 | } 1933 | layer { 1934 | name: "Clip_51_relu6_x_mul_thre_out" 1935 | type: "Eltwise" 1936 | bottom: "Clip_51_relu6_relu_out" 1937 | bottom: "Clip_51_relu6_thre_left_power_out" 1938 | top: "Clip_51_relu6_x_mul_thre_out_out" 1939 | eltwise_param { 1940 | operation: PROD 1941 | } 1942 | } 1943 | layer { 1944 | name: "Clip_51_relu6_thre_right_power" 1945 | type: "Power" 1946 | bottom: "Clip_51_relu6_thre_out" 1947 | top: "Clip_51_relu6_thre_right_power_out" 1948 | power_param { 1949 | power: 1.0 1950 | scale: 6.0 1951 | shift: 0.0 1952 | } 1953 | } 1954 | layer { 1955 | name: "Clip_51_relu6_add" 1956 | type: "Eltwise" 1957 | bottom: "Clip_51_relu6_x_mul_thre_out_out" 1958 | bottom: "Clip_51_relu6_thre_right_power_out" 1959 | top: "input.308" 1960 | eltwise_param { 1961 | operation: SUM 1962 | } 1963 | } 1964 | layer { 1965 | name: "Conv_52" 1966 | type: "Convolution" 1967 | bottom: "input.308" 1968 | top: "input.316" 1969 | convolution_param { 1970 | num_output: 1024 1971 | bias_term: true 1972 | group: 1 1973 | pad_h: 0 1974 | pad_w: 0 1975 | kernel_h: 1 1976 | kernel_w: 1 1977 | stride_h: 1 1978 | stride_w: 1 1979 | dilation: 1 1980 | } 1981 | } 1982 | layer { 1983 | name: "Clip_53_relu6_relu" 1984 | type: "ReLU" 1985 | bottom: "input.316" 1986 | top: "Clip_53_relu6_relu_out" 1987 | } 1988 | layer { 1989 | name: "Clip_53_relu6_thre" 1990 | type: "Threshold" 1991 | bottom: "Clip_53_relu6_relu_out" 1992 | top: "Clip_53_relu6_thre_out" 1993 | threshold_param { 1994 | threshold: 6.0 1995 | } 1996 | } 1997 | layer { 1998 | name: "Clip_53_relu6_thre_left_power" 1999 | type: "Power" 2000 | bottom: "Clip_53_relu6_thre_out" 2001 | top: "Clip_53_relu6_thre_left_power_out" 2002 | power_param { 2003 | power: 1.0 2004 | scale: -1.0 2005 | shift: 1.0 2006 | } 2007 | } 2008 | layer { 2009 | name: "Clip_53_relu6_x_mul_thre_out" 2010 | type: "Eltwise" 2011 | bottom: "Clip_53_relu6_relu_out" 2012 | bottom: "Clip_53_relu6_thre_left_power_out" 2013 | top: "Clip_53_relu6_x_mul_thre_out_out" 2014 | eltwise_param { 2015 | operation: PROD 2016 | } 2017 | } 2018 | layer { 2019 | name: "Clip_53_relu6_thre_right_power" 2020 | type: "Power" 2021 | bottom: "Clip_53_relu6_thre_out" 2022 | top: "Clip_53_relu6_thre_right_power_out" 2023 | power_param { 2024 | power: 1.0 2025 | scale: 6.0 2026 | shift: 0.0 2027 | } 2028 | } 2029 | layer { 2030 | name: "Clip_53_relu6_add" 2031 | type: "Eltwise" 2032 | bottom: "Clip_53_relu6_x_mul_thre_out_out" 2033 | bottom: "Clip_53_relu6_thre_right_power_out" 2034 | top: "input.320" 2035 | eltwise_param { 2036 | operation: SUM 2037 | } 2038 | } 2039 | layer { 2040 | name: "GlobalAveragePool_54" 2041 | type: "Pooling" 2042 | bottom: "input.320" 2043 | top: "onnx::Reshape_250" 2044 | pooling_param { 2045 | pool: AVE 2046 | kernel_h: 5 2047 | kernel_w: 5 2048 | stride_h: 1 2049 | stride_w: 1 2050 | pad_h: 0 2051 | pad_w: 0 2052 | } 2053 | } 2054 | layer { 2055 | name: "Reshape_55" 2056 | type: "Flatten" 2057 | bottom: "onnx::Reshape_250" 2058 | top: "input.324" 2059 | } 2060 | layer { 2061 | name: "MatMul_56" 2062 | type: "InnerProduct" 2063 | bottom: "input.324" 2064 | top: "input.328" 2065 | inner_product_param { 2066 | num_output: 128 2067 | bias_term: false 2068 | } 2069 | } 2070 | layer { 2071 | name: "BatchNormalization_57_bn" 2072 | type: "BatchNorm" 2073 | bottom: "input.328" 2074 | top: "output0" 2075 | batch_norm_param { 2076 | use_global_stats: true 2077 | eps: 0.0010000000474974513 2078 | } 2079 | } 2080 | layer { 2081 | name: "BatchNormalization_57" 2082 | type: "Scale" 2083 | bottom: "output0" 2084 | top: "output0" 2085 | scale_param { 2086 | bias_term: true 2087 | } 2088 | } 2089 | 2090 | -------------------------------------------------------------------------------- /weights/retinaface.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guolele1990/rknn_FaceRecognization/5877c20fe75b4052e044941dff2c44c2952d8e24/weights/retinaface.onnx --------------------------------------------------------------------------------