├── common ├── __init__.py ├── __pycache__ │ ├── nms.cpython-36.pyc │ └── __init__.cpython-36.pyc └── nms.py ├── utils ├── __init__.py ├── __pycache__ │ ├── image.cpython-36.pyc │ ├── __init__.cpython-36.pyc │ ├── download.cpython-36.pyc │ ├── getter.cpython-36.pyc │ ├── helpers.cpython-36.pyc │ └── model_store.cpython-36.pyc ├── helpers.py ├── image.py ├── model_store.py ├── download.py └── getter.py ├── converters ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── insight2onnx.cpython-36.pyc │ ├── onnx_to_trt.cpython-36.pyc │ ├── reshape_onnx.cpython-36.pyc │ └── remove_initializer_from_input.cpython-36.pyc ├── mx2onnx_conv │ └── onnx │ │ ├── __pycache__ │ │ └── __init__.cpython-36.pyc │ │ ├── mx2onnx │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── export_model.cpython-36.pyc │ │ │ ├── export_onnx.cpython-36.pyc │ │ │ ├── _export_helper.cpython-36.pyc │ │ │ └── _op_translations.cpython-36.pyc │ │ ├── __init__.py │ │ ├── _export_helper.py │ │ ├── LICENSE │ │ ├── export_model.py │ │ └── export_onnx.py │ │ └── __init__.py ├── remove_initializer_from_input.py ├── reshape_onnx.py ├── onnx_to_trt.py └── insight2onnx.py ├── detectors ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ └── retinaface.cpython-36.pyc └── retinaface.py ├── exec_backends ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── trt_loader.cpython-36.pyc │ ├── trt_backend.cpython-36.pyc │ ├── onnxrt_backend.cpython-36.pyc │ └── triton_backend.cpython-36.pyc ├── onnxrt_backend.py ├── trt_backend.py ├── trt_loader.py └── triton_backend.py ├── test_images ├── TH.png ├── TH1.jpg ├── crop.jpg ├── lumia.jpg └── Stallone.jpg ├── retinaface_r50_v1.jpg ├── __pycache__ ├── configs.cpython-36.pyc ├── face_detectors.cpython-36.pyc └── face_processors.cpython-36.pyc ├── face_detectors.py ├── face_processors.py ├── README.md ├── configs.py ├── sample_retinaface_to_trt.py └── sample_retinaface_to_onnx.py /common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /converters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /detectors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /exec_backends/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test_images/TH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/test_images/TH.png -------------------------------------------------------------------------------- /retinaface_r50_v1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/retinaface_r50_v1.jpg -------------------------------------------------------------------------------- /test_images/TH1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/test_images/TH1.jpg -------------------------------------------------------------------------------- /test_images/crop.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/test_images/crop.jpg -------------------------------------------------------------------------------- /test_images/lumia.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/test_images/lumia.jpg -------------------------------------------------------------------------------- /test_images/Stallone.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/test_images/Stallone.jpg -------------------------------------------------------------------------------- /__pycache__/configs.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/__pycache__/configs.cpython-36.pyc -------------------------------------------------------------------------------- /common/__pycache__/nms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/common/__pycache__/nms.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/image.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/utils/__pycache__/image.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/face_detectors.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/__pycache__/face_detectors.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/download.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/utils/__pycache__/download.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/getter.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/utils/__pycache__/getter.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/helpers.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/utils/__pycache__/helpers.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/face_processors.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/__pycache__/face_processors.cpython-36.pyc -------------------------------------------------------------------------------- /common/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/common/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /converters/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/converters/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /detectors/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/detectors/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/model_store.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/utils/__pycache__/model_store.cpython-36.pyc -------------------------------------------------------------------------------- /detectors/__pycache__/retinaface.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/detectors/__pycache__/retinaface.cpython-36.pyc -------------------------------------------------------------------------------- /converters/__pycache__/insight2onnx.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/converters/__pycache__/insight2onnx.cpython-36.pyc -------------------------------------------------------------------------------- /converters/__pycache__/onnx_to_trt.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/converters/__pycache__/onnx_to_trt.cpython-36.pyc -------------------------------------------------------------------------------- /converters/__pycache__/reshape_onnx.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/converters/__pycache__/reshape_onnx.cpython-36.pyc -------------------------------------------------------------------------------- /exec_backends/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/exec_backends/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /exec_backends/__pycache__/trt_loader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/exec_backends/__pycache__/trt_loader.cpython-36.pyc -------------------------------------------------------------------------------- /exec_backends/__pycache__/trt_backend.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/exec_backends/__pycache__/trt_backend.cpython-36.pyc -------------------------------------------------------------------------------- /exec_backends/__pycache__/onnxrt_backend.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/exec_backends/__pycache__/onnxrt_backend.cpython-36.pyc -------------------------------------------------------------------------------- /exec_backends/__pycache__/triton_backend.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/exec_backends/__pycache__/triton_backend.cpython-36.pyc -------------------------------------------------------------------------------- /converters/mx2onnx_conv/onnx/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/converters/mx2onnx_conv/onnx/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /converters/__pycache__/remove_initializer_from_input.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/converters/__pycache__/remove_initializer_from_input.cpython-36.pyc -------------------------------------------------------------------------------- /converters/mx2onnx_conv/onnx/mx2onnx/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/converters/mx2onnx_conv/onnx/mx2onnx/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /converters/mx2onnx_conv/onnx/mx2onnx/__pycache__/export_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/converters/mx2onnx_conv/onnx/mx2onnx/__pycache__/export_model.cpython-36.pyc -------------------------------------------------------------------------------- /converters/mx2onnx_conv/onnx/mx2onnx/__pycache__/export_onnx.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/converters/mx2onnx_conv/onnx/mx2onnx/__pycache__/export_onnx.cpython-36.pyc -------------------------------------------------------------------------------- /converters/mx2onnx_conv/onnx/mx2onnx/__pycache__/_export_helper.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/converters/mx2onnx_conv/onnx/mx2onnx/__pycache__/_export_helper.cpython-36.pyc -------------------------------------------------------------------------------- /converters/mx2onnx_conv/onnx/mx2onnx/__pycache__/_op_translations.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NNDam/Retinaface-TensorRT/HEAD/converters/mx2onnx_conv/onnx/mx2onnx/__pycache__/_op_translations.cpython-36.pyc -------------------------------------------------------------------------------- /utils/helpers.py: -------------------------------------------------------------------------------- 1 | import os 2 | from itertools import chain, islice 3 | from distutils import util 4 | 5 | def prepare_folders(paths): 6 | for path in paths: 7 | os.makedirs(path, exist_ok=True) 8 | 9 | def to_chunks(iterable, size=10): 10 | iterator = iter(iterable) 11 | for first in iterator: 12 | yield chain([first], islice(iterator, size - 1)) 13 | 14 | def tobool(input): 15 | try: 16 | return bool(util.strtobool(input)) 17 | except: 18 | return False 19 | 20 | def parse_size(size=None, def_size='640,480'): 21 | if size is None: 22 | size = def_size 23 | size_lst = list(map(int, size.split(','))) 24 | return size_lst 25 | 26 | -------------------------------------------------------------------------------- /converters/remove_initializer_from_input.py: -------------------------------------------------------------------------------- 1 | import onnx 2 | import argparse 3 | 4 | def remove_initializer_from_input(input, output): 5 | 6 | model = onnx.load(input) 7 | if model.ir_version < 4: 8 | print( 9 | 'Model with ir_version below 4 requires to include initilizer in graph input' 10 | ) 11 | return 12 | 13 | inputs = model.graph.input 14 | name_to_input = {} 15 | for input in inputs: 16 | name_to_input[input.name] = input 17 | 18 | for initializer in model.graph.initializer: 19 | if initializer.name in name_to_input: 20 | inputs.remove(name_to_input[initializer.name]) 21 | 22 | onnx.save(model, output) 23 | -------------------------------------------------------------------------------- /face_detectors.py: -------------------------------------------------------------------------------- 1 | from detectors.retinaface import RetinaFace 2 | 3 | def get_retinaface(model_path, backend, outputs, rac, masks=False): 4 | 5 | inference_backend = backend.DetectorInfer(model=model_path, output_order=outputs) 6 | model = RetinaFace(inference_backend=inference_backend, rac=rac, masks=masks) 7 | return model 8 | 9 | 10 | def retinaface_r50_v1(model_path, backend, outputs): 11 | model = get_retinaface(model_path, backend, outputs, rac="net3") 12 | return model 13 | 14 | 15 | def retinaface_mnet025_v1(model_path, backend, outputs): 16 | model = get_retinaface(model_path, backend, outputs, rac="net3") 17 | return model 18 | 19 | 20 | def retinaface_mnet025_v2(model_path, backend, outputs): 21 | model = get_retinaface(model_path, backend, outputs, rac="net3l") 22 | return model 23 | -------------------------------------------------------------------------------- /face_processors.py: -------------------------------------------------------------------------------- 1 | from detectors.retinaface import RetinaFace 2 | 3 | 4 | def arcface_r100_v1(model_path, backend, outputs): 5 | model = backend.Arcface(rec_name=model_path) 6 | return model 7 | 8 | 9 | def r50_arcface_msfdrop75(model_path, backend, outputs): 10 | model = backend.Arcface(rec_name=model_path) 11 | return model 12 | 13 | 14 | def r100_arcface_msfdrop75(model_path, backend, outputs): 15 | model = backend.Arcface(rec_name=model_path) 16 | return model 17 | 18 | 19 | def glint360k_r100FC_1_0(model_path, backend, outputs): 20 | model = backend.Arcface(rec_name=model_path) 21 | return model 22 | 23 | 24 | def glint360k_r100FC_0_1(model_path, backend, outputs): 25 | model = backend.Arcface(rec_name=model_path) 26 | return model 27 | 28 | 29 | def genderage_v1(model_path, backend, outputs): 30 | model = backend.FaceGenderage(rec_name=model_path) 31 | return model 32 | -------------------------------------------------------------------------------- /common/nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import njit 3 | 4 | @njit() 5 | def nms(dets, thresh = 0.4): 6 | x1 = dets[:, 0] 7 | y1 = dets[:, 1] 8 | x2 = dets[:, 2] 9 | y2 = dets[:, 3] 10 | scores = dets[:, 4] 11 | 12 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 13 | order = scores.argsort()[::-1] 14 | 15 | keep = [] 16 | while order.size > 0: 17 | i = order[0] 18 | keep.append(i) 19 | xx1 = np.maximum(x1[i], x1[order[1:]]) 20 | yy1 = np.maximum(y1[i], y1[order[1:]]) 21 | xx2 = np.minimum(x2[i], x2[order[1:]]) 22 | yy2 = np.minimum(y2[i], y2[order[1:]]) 23 | 24 | w = np.maximum(0.0, xx2 - xx1 + 1) 25 | h = np.maximum(0.0, yy2 - yy1 + 1) 26 | inter = w * h 27 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 28 | 29 | inds = np.where(ovr <= thresh)[0] 30 | order = order[inds + 1] 31 | 32 | return keep -------------------------------------------------------------------------------- /converters/mx2onnx_conv/onnx/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | """Module for ONNX model format support for Apache MXNet.""" 18 | 19 | from .mx2onnx.export_model import export_model 20 | -------------------------------------------------------------------------------- /converters/mx2onnx_conv/onnx/mx2onnx/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # coding: utf-8 19 | """ONNX Export module""" 20 | 21 | from . import export_model 22 | from . import export_onnx 23 | from . import _op_translations 24 | -------------------------------------------------------------------------------- /utils/image.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import cv2 3 | 4 | class ImageData: 5 | def __init__(self, image, max_size: List[int] = None): 6 | 7 | if max_size is None: 8 | max_size = [640, 480] 9 | 10 | if len(max_size) == 1: 11 | max_size = [max_size[0]] * 2 12 | 13 | self.orig_image = image 14 | self.transformed_image = self.orig_image 15 | self.const_width = max_size[0] 16 | self.const_height = max_size[1] 17 | self.resize_times = 0 18 | self.scale_factor = 1.0 19 | 20 | def resize_image(self, pad: bool = True, mode: str = 'pad'): 21 | self.resize_times += 1 22 | cw = int(self.const_width / self.resize_times) 23 | ch = int(self.const_height / self.resize_times) 24 | h, w, _ = self.transformed_image.shape 25 | if mode == 'stretch': 26 | self.transformed_image = cv2.resize(self.transformed_image, dsize=(self.const_width, self.const_height)) 27 | else: 28 | self.scale_factor = min(cw / w, ch / h) 29 | # If image is too small, it may contain only single face, which leads to decreased detection accuracy, 30 | # so we reduce scale factor by some factor 31 | if self.scale_factor > 5: 32 | self.scale_factor = self.scale_factor * 0.7 33 | 34 | self.transformed_image = cv2.resize(self.transformed_image, (0, 0), fx=self.scale_factor, fy=self.scale_factor, 35 | interpolation=cv2.INTER_LINEAR) 36 | if pad: 37 | # # Pad right and bottom with black border for fixed image proportions 38 | h, w, _ = self.transformed_image.shape 39 | if w < cw: 40 | self.transformed_image = cv2.copyMakeBorder(self.transformed_image, 0, 0, 0, cw - w, 41 | cv2.BORDER_CONSTANT) 42 | self.left_border = cw - w 43 | if h < ch: 44 | self.transformed_image = cv2.copyMakeBorder(self.transformed_image, 0, ch - h, 0, 0, 45 | cv2.BORDER_CONSTANT) 46 | self.bottom_border = ch - h 47 | -------------------------------------------------------------------------------- /converters/reshape_onnx.py: -------------------------------------------------------------------------------- 1 | import onnx 2 | import math 3 | import os 4 | from typing import List 5 | 6 | 7 | def reshape(model, n: int = 1, h: int = 480, w: int = 640, mode='auto'): 8 | ''' 9 | :param model: Input ONNX model object 10 | :param n: Batch size dimension 11 | :param h: Height dimension 12 | :param w: Width dimension 13 | :param mode: Set `retinaface` to reshape RetinaFace model, otherwise reshape Centerface 14 | :return: ONNX model with reshaped input and outputs 15 | ''' 16 | if mode == 'auto': 17 | # Assert that retinaface models have outputs containing word 'stride' in their names 18 | 19 | out_name = model.graph.output[0].name 20 | if 'stride' in out_name.lower(): 21 | mode = 'retinaface' 22 | elif out_name.lower() == 'fc1': 23 | mode = 'arcface' 24 | else: 25 | mode = 'centerface' 26 | 27 | d = model.graph.input[0].type.tensor_type.shape.dim 28 | d[0].dim_value = n 29 | if mode != 'arcface': 30 | d[2].dim_value = h 31 | d[3].dim_value = w 32 | divisor = 4 33 | for output in model.graph.output: 34 | if mode == 'retinaface': 35 | divisor = int(output.name.split('stride')[-1]) 36 | d = output.type.tensor_type.shape.dim 37 | d[0].dim_value = n 38 | if mode != 'arcface': 39 | d[2].dim_value = math.ceil(h / divisor) 40 | d[3].dim_value = math.ceil(w / divisor) 41 | return model 42 | 43 | 44 | def reshape_onnx_input(onnx_path: str, out_path: str, im_size: List[int] = None, batch_size: int = 1, 45 | mode: str = 'auto'): 46 | ''' 47 | Reshape ONNX file input and output for different image sizes. Only applicable for MXNet Retinaface models 48 | and official Centerface models. 49 | 50 | :param onnx_path: Path to input ONNX file 51 | :param out_path: Path to output ONNX file 52 | :param im_size: Desired output image size in W, H format. Default: [640, 480] 53 | :param mode: Available modes: retinaface, centerface, auto (try to detect if input model is retina- or centerface) 54 | :return: 55 | ''' 56 | 57 | if im_size is None: 58 | im_size = [640, 480] 59 | 60 | model = onnx.load(onnx_path) 61 | reshaped = reshape(model, n=batch_size, h=im_size[1], w=im_size[0], mode=mode) 62 | 63 | with open(out_path, "wb") as file_handle: 64 | serialized = reshaped.SerializeToString() 65 | file_handle.write(serialized) 66 | -------------------------------------------------------------------------------- /converters/mx2onnx_conv/onnx/mx2onnx/_export_helper.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | """export helper functions""" 18 | # coding: utf-8 19 | import os 20 | import logging 21 | import mxnet as mx 22 | 23 | 24 | def load_module(sym_filepath, params_filepath): 25 | """Loads the MXNet model file and 26 | returns MXNet symbol and params (weights). 27 | 28 | Parameters 29 | ---------- 30 | json_path : str 31 | Path to the json file 32 | params_path : str 33 | Path to the params file 34 | 35 | Returns 36 | ------- 37 | sym : MXNet symbol 38 | Model symbol object 39 | 40 | params : params object 41 | Model weights including both arg and aux params. 42 | """ 43 | if not (os.path.isfile(sym_filepath) and os.path.isfile(params_filepath)): 44 | raise ValueError("Symbol and params files provided are invalid") 45 | 46 | try: 47 | # reads symbol.json file from given path and 48 | # retrieves model prefix and number of epochs 49 | model_name = sym_filepath.rsplit('.', 1)[0].rsplit('-', 1)[0] 50 | params_file_list = params_filepath.rsplit('.', 1)[0].rsplit('-', 1) 51 | # Setting num_epochs to 0 if not present in filename 52 | num_epochs = 0 if len(params_file_list) == 1 else int(params_file_list[1]) 53 | except IndexError: 54 | logging.info("Model and params name should be in format: " 55 | "prefix-symbol.json, prefix-epoch.params") 56 | raise 57 | 58 | sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, num_epochs) 59 | 60 | # Merging arg and aux parameters 61 | params = {} 62 | params.update(arg_params) 63 | params.update(aux_params) 64 | 65 | return sym, params 66 | -------------------------------------------------------------------------------- /converters/mx2onnx_conv/onnx/mx2onnx/LICENSE: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | # 18 | # Based on 19 | # https://github.com/NVIDIA/mxnet_to_onnx/blob/master/mx2onnx_converter/# 20 | # Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. 21 | # 22 | # Redistribution and use in source and binary forms, with or without 23 | # modification, are permitted provided that the following conditions 24 | # are met: 25 | # * Redistributions of source code must retain the above copyright 26 | # notice, this list of conditions and the following disclaimer. 27 | # * Redistributions in binary form must reproduce the above copyright 28 | # notice, this list of conditions and the following disclaimer in the 29 | # documentation and/or other materials provided with the distribution. 30 | # * Neither the name of NVIDIA CORPORATION nor the names of its 31 | # contributors may be used to endorse or promote products derived 32 | # from this software without specific prior written permission. 33 | # 34 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 35 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 36 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 37 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 38 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 39 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 40 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 41 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 42 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Retinaface with TensorRT support 2 | ![alt text](retinaface_r50_v1.jpg) 3 | ## Pre-requirement 4 | * TensorRT 5 | * CUDA 6 | * PyCUDA 7 | * Mxnet with TensorRT support 8 | ## Installation 9 | * Install PyCUDA 10 | ``` 11 | sudo apt-get install build-essential python-dev python-setuptools libboost-python-dev libboost-thread-dev -y 12 | sudo apt-get install python-numpy python3-numpy -y 13 | sudo apt-get install libboost-all-dev 14 | git clone --recursive --branch v2020.1 https://github.com/inducer/pycuda.git 15 | cd pycuda 16 | python configure.py --cuda-root=/usr/local/cuda-10.2 17 | pip install -e . 18 | ``` 19 | * Install some python packages 20 | ``` 21 | pip install opencv-python insightface nvidia-pyindex geventhttpclient tritonclient onnx onnxruntime 22 | pip install tensorrt-7.2.2.3-cp36-none-linux_x86_64.whl 23 | ``` 24 | ## Run build Retinaface-TensorRT with single-inference 25 | * Without FP16 26 | ``` 27 | python3 build_detectors.py --im_width 1600 --im_height 898 28 | ``` 29 | * With FP16 30 | ``` 31 | python3 build_detectors.py --im_width 1600 --im_height 898 --fp16 32 | ``` 33 | ## Run build Retinaface-TensorRT with batch-inference 34 | * Step 1: Convert to ONNX: download model weights at: https://github.com/deepinsight/insightface 35 | ``` 36 | python3 sample_retinaface_to_onnx.py \ 37 | --model_symbol R50-symbol.json \ 38 | --model_params R50-0000.params \ 39 | --batch_size 4 \ 40 | --im_width 640 \ 41 | --im_height 640 \ 42 | --onnx_path retinaface_r50_v1-640x640-batchsize_4.onnx 43 | ``` 44 | * Step 2: Convert ONNX to TensorRT with FP16 45 | ``` 46 | /usr/src/tensorrt/bin/trtexec \ 47 | --onnx=retinaface_r50_v1-640x640-batchsize_4.onnx \ 48 | --saveEngine=retinaface_r50_v1-640x640-batchsize_4.trt \ 49 | --verbose \ 50 | --fp16 51 | ``` 52 | * Step 3: Test converted model 53 | ``` 54 | python3 sample_retinaface_to_trt.py 55 | --model_path retinaface_r50_v1-640x640-batchsize_4.trt\ 56 | --batch_size 4 \ 57 | --im_width 640 \ 58 | --im_height 640 \ 59 | ``` 60 | Note: Batch and image size is fixed for TensorRT 61 | ## Performance 62 | Resolution | Baseline | TensorRT-FP32 (batchsize=1) | TensorRT-FP16 (batchsize=1) | Speed Up 63 | --- | --- | --- | --- |--- 64 | 1280x1280 | 18.7 | 22.2 | 28.1 | 1.50x 65 | 640x640 | 57.1 | 71.1 | 131.1 | 2.30x 66 | 512x512 | 75.4 | 109.2 | 191.8 | 2.54x 67 | 512x256 | 80.0 | 127.6 | 212.6 | 2.66x 68 | ## Reference 69 | [1] https://github.com/deepinsight/insightface
70 | [2] https://github.com/SthPhoenix/InsightFace-REST 71 | -------------------------------------------------------------------------------- /exec_backends/onnxrt_backend.py: -------------------------------------------------------------------------------- 1 | import onnxruntime 2 | import cv2 3 | import numpy as np 4 | import logging 5 | 6 | class Arcface: 7 | def __init__(self, rec_name='/models/onnx/arcface_r100_v1/arcface_r100_v1.onnx'): 8 | self.rec_model = onnxruntime.InferenceSession(rec_name) 9 | self.outputs = [e.name for e in self.rec_model.get_outputs()] 10 | 11 | # warmup 12 | def prepare(self, **kwargs): 13 | logging.info("Warming up ArcFace ONNX Runtime engine...") 14 | self.rec_model.run(self.outputs, {self.rec_model.get_inputs()[0].name: [np.zeros((3, 112, 112), np.float32)]}) 15 | 16 | def get_embedding(self, face_img): 17 | if not isinstance(face_img, list): 18 | face_img = [face_img] 19 | 20 | for i, img in enumerate(face_img): 21 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 22 | img = np.transpose(img, (2, 0, 1)) 23 | face_img[i] = img.astype(np.float32) 24 | 25 | face_img = np.stack(face_img) 26 | net_out = self.rec_model.run(self.outputs, {self.rec_model.get_inputs()[0].name: face_img}) 27 | return net_out[0] 28 | 29 | 30 | class FaceGenderage: 31 | 32 | def __init__(self, rec_name='/models/onnx/genderage_v1/genderage_v1.onnx', outputs=None): 33 | self.rec_model = onnxruntime.InferenceSession(rec_name) 34 | self.input = self.rec_model.get_inputs()[0] 35 | if outputs is None: 36 | outputs = [e.name for e in self.rec_model.get_outputs()] 37 | self.outputs = outputs 38 | 39 | # warmup 40 | def prepare(self, **kwargs): 41 | logging.info("Warming up GenderAge ONNX Runtime engine...") 42 | self.rec_model.run(self.outputs, 43 | {self.rec_model.get_inputs()[0].name: [np.zeros(tuple(self.input.shape[1:]), np.float32)]}) 44 | 45 | def get(self, face_img): 46 | face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) 47 | face_img = np.transpose(face_img, (2, 0, 1)) 48 | face_img = np.expand_dims(face_img, axis=0) 49 | face_img = face_img.astype(np.float32) 50 | 51 | ret = self.rec_model.run(self.outputs, {self.input.name: face_img})[0] 52 | g = ret[:, 0:2].flatten() 53 | gender = np.argmax(g) 54 | a = ret[:, 2:202].reshape((100, 2)) 55 | a = np.argmax(a, axis=1) 56 | age = int(sum(a)) 57 | return gender, age 58 | 59 | 60 | class DetectorInfer: 61 | 62 | def __init__(self, model='/models/onnx/centerface/centerface.onnx', 63 | output_order=None): 64 | 65 | self.rec_model = onnxruntime.InferenceSession(model) 66 | self.input = self.rec_model.get_inputs()[0] 67 | 68 | if output_order is None: 69 | output_order = [e.name for e in self.rec_model.get_outputs()] 70 | self.output_order = output_order 71 | 72 | self.input_shape = tuple(self.input.shape) 73 | print(self.input_shape) 74 | 75 | # warmup 76 | def prepare(self, ctx=0): 77 | logging.info("Warming up face detection ONNX Runtime engine...") 78 | self.rec_model.run(self.output_order, 79 | {self.rec_model.get_inputs()[0].name: [np.zeros(tuple(self.input.shape[1:]), np.float32)]}) 80 | 81 | def run(self, input): 82 | net_out = self.rec_model.run(self.output_order, {self.input.name: input}) 83 | return net_out -------------------------------------------------------------------------------- /utils/model_store.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/model_zoo/model_store.py 4 | """ 5 | from __future__ import print_function 6 | 7 | __all__ = ['get_model_file'] 8 | import os 9 | import zipfile 10 | import glob 11 | 12 | from .download import download, check_sha1 13 | 14 | _model_sha1 = {name: checksum for checksum, name in [ 15 | ('95be21b58e29e9c1237f229dae534bd854009ce0', 'arcface_r100_v1'), 16 | ('', 'arcface_mfn_v1'), 17 | ('39fd1e087a2a2ed70a154ac01fecaa86c315d01b', 'retinaface_r50_v1'), 18 | ('2c9de8116d1f448fd1d4661f90308faae34c990a', 'retinaface_mnet025_v1'), 19 | ('0db1d07921d005e6c9a5b38e059452fc5645e5a4', 'retinaface_mnet025_v2'), 20 | ('7dd8111652b7aac2490c5dcddeb268e53ac643e6', 'genderage_v1'), 21 | ]} 22 | 23 | base_repo_url = 'http://insightface.ai/files/' 24 | _url_format = '{repo_url}models/{file_name}.zip' 25 | 26 | 27 | def short_hash(name): 28 | if name not in _model_sha1: 29 | raise ValueError('Pretrained model for {name} is not available.'.format(name=name)) 30 | return _model_sha1[name][:8] 31 | 32 | 33 | def find_params_file(dir_path): 34 | if not os.path.exists(dir_path): 35 | return None 36 | paths = glob.glob("%s/*.params"%dir_path) 37 | if len(paths)==0: 38 | return None 39 | paths = sorted(paths) 40 | return paths[-1] 41 | 42 | def get_model_file(name, root=os.path.join('~', '.insightface', 'models')): 43 | r"""Return location for the pretrained on local file system. 44 | 45 | This function will download from online model zoo when model cannot be found or has mismatch. 46 | The root directory will be created if it doesn't exist. 47 | 48 | Parameters 49 | ---------- 50 | name : str 51 | Name of the model. 52 | root : str, default '~/.mxnet/models' 53 | Location for keeping the model parameters. 54 | 55 | Returns 56 | ------- 57 | file_path 58 | Path to the requested pretrained model file. 59 | """ 60 | 61 | file_name = name 62 | root = os.path.expanduser(root) 63 | dir_path = os.path.join(root, name) 64 | file_path = find_params_file(dir_path) 65 | #file_path = os.path.join(root, file_name + '.params') 66 | sha1_hash = _model_sha1[name] 67 | if file_path is not None: 68 | if check_sha1(file_path, sha1_hash): 69 | return file_path 70 | else: 71 | print('Mismatch in the content of model file detected. Downloading again.') 72 | else: 73 | print('Model file is not found. Downloading.') 74 | 75 | if not os.path.exists(root): 76 | os.makedirs(root) 77 | if not os.path.exists(dir_path): 78 | os.makedirs(dir_path) 79 | 80 | zip_file_path = os.path.join(root, file_name + '.zip') 81 | repo_url = base_repo_url 82 | if repo_url[-1] != '/': 83 | repo_url = repo_url + '/' 84 | download(_url_format.format(repo_url=repo_url, file_name=file_name), 85 | path=zip_file_path, 86 | overwrite=True) 87 | with zipfile.ZipFile(zip_file_path) as zf: 88 | zf.extractall(dir_path) 89 | os.remove(zip_file_path) 90 | file_path = find_params_file(dir_path) 91 | 92 | if check_sha1(file_path, sha1_hash): 93 | return file_path 94 | else: 95 | raise ValueError('Downloaded file has different hash. Please try again.') 96 | 97 | 98 | -------------------------------------------------------------------------------- /utils/download.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/utils/download.py 3 | """ 4 | import os 5 | import hashlib 6 | import requests 7 | from tqdm import tqdm 8 | 9 | def check_sha1(filename, sha1_hash): 10 | """Check whether the sha1 hash of the file content matches the expected hash. 11 | Parameters 12 | ---------- 13 | filename : str 14 | Path to the file. 15 | sha1_hash : str 16 | Expected sha1 hash in hexadecimal digits. 17 | Returns 18 | ------- 19 | bool 20 | Whether the file content matches the expected hash. 21 | """ 22 | sha1 = hashlib.sha1() 23 | with open(filename, 'rb') as f: 24 | while True: 25 | data = f.read(1048576) 26 | if not data: 27 | break 28 | sha1.update(data) 29 | 30 | sha1_file = sha1.hexdigest() 31 | l = min(len(sha1_file), len(sha1_hash)) 32 | return sha1.hexdigest()[0:l] == sha1_hash[0:l] 33 | 34 | def download(url, path=None, overwrite=False, sha1_hash=None): 35 | """Download an given URL 36 | Parameters 37 | ---------- 38 | url : str 39 | URL to download 40 | path : str, optional 41 | Destination path to store downloaded file. By default stores to the 42 | current directory with same name as in url. 43 | overwrite : bool, optional 44 | Whether to overwrite destination file if already exists. 45 | sha1_hash : str, optional 46 | Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified 47 | but doesn't match. 48 | Returns 49 | ------- 50 | str 51 | The file path of the downloaded file. 52 | """ 53 | if path is None: 54 | fname = url.split('/')[-1] 55 | else: 56 | path = os.path.expanduser(path) 57 | if os.path.isdir(path): 58 | fname = os.path.join(path, url.split('/')[-1]) 59 | else: 60 | fname = path 61 | 62 | if overwrite or not os.path.exists(fname) or (sha1_hash and not check_sha1(fname, sha1_hash)): 63 | dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname))) 64 | if not os.path.exists(dirname): 65 | os.makedirs(dirname) 66 | 67 | print('Downloading %s from %s...'%(fname, url)) 68 | r = requests.get(url, stream=True) 69 | if r.status_code != 200: 70 | raise RuntimeError("Failed downloading url %s"%url) 71 | total_length = r.headers.get('content-length') 72 | with open(fname, 'wb') as f: 73 | if total_length is None: # no content length header 74 | for chunk in r.iter_content(chunk_size=1024): 75 | if chunk: # filter out keep-alive new chunks 76 | f.write(chunk) 77 | else: 78 | total_length = int(total_length) 79 | for chunk in tqdm(r.iter_content(chunk_size=1024), 80 | total=int(total_length / 1024. + 0.5), 81 | unit='KB', unit_scale=False, dynamic_ncols=True): 82 | f.write(chunk) 83 | 84 | if sha1_hash and not check_sha1(fname, sha1_hash): 85 | raise UserWarning('File {} is downloaded but the content hash does not match. ' \ 86 | 'The repo may be outdated or download may be incomplete. ' \ 87 | 'If the "repo_url" is overridden, consider switching to ' \ 88 | 'the default repo.'.format(fname)) 89 | 90 | return fname 91 | -------------------------------------------------------------------------------- /exec_backends/trt_backend.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | import time 5 | import logging 6 | 7 | from .trt_loader import TrtModel 8 | 9 | class Arcface: 10 | 11 | def __init__(self, rec_name: str='/models/trt-engines/arcface_r100_v1/arcface_r100_v1.plan'): 12 | self.rec_model = TrtModel(rec_name) 13 | self.input_shape = None 14 | self.max_batch_size = 1 15 | 16 | # warmup 17 | def prepare(self, ctx_id=0): 18 | logging.info("Warming up ArcFace TensorRT engine...") 19 | self.rec_model.build() 20 | self.input_shape = self.rec_model.input_shapes[0] 21 | self.max_batch_size = self.rec_model.max_batch_size 22 | if self.input_shape[0] == -1: 23 | self.input_shape = (1,) + self.input_shape[1:] 24 | 25 | self.rec_model.run(np.zeros(self.input_shape, np.float32)) 26 | logging.info(f"Engine warmup complete! Expecting input shape: {self.input_shape}. Max batch size: {self.max_batch_size}") 27 | 28 | def get_embedding(self, face_img): 29 | 30 | if not isinstance(face_img, list): 31 | face_img = [face_img] 32 | 33 | for i, img in enumerate(face_img): 34 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 35 | img = np.transpose(img, (2, 0, 1)) 36 | #img = np.expand_dims(img, axis=0) 37 | face_img[i] = img 38 | #assert face_img.shape == self.rec_model.input_shapes[0] 39 | face_img = np.stack(face_img) 40 | embeddings = self.rec_model.run(face_img, deflatten=True)[0] 41 | return embeddings 42 | 43 | 44 | class FaceGenderage: 45 | 46 | def __init__(self, rec_name: str='/models/trt-engines/genderage_v1/genderage_v1.plan'): 47 | self.rec_model = TrtModel(rec_name) 48 | self.input_shape = None 49 | 50 | # warmup 51 | def prepare(self, ctx_id=0): 52 | logging.info("Warming up GenderAge TensorRT engine...") 53 | self.rec_model.build() 54 | self.input_shape = self.rec_model.input_shapes[0] 55 | self.rec_model.run(np.zeros(self.input_shape, np.float32)) 56 | logging.info(f"Engine warmup complete! Expecting input shape: {self.input_shape}") 57 | 58 | def get(self, face_img): 59 | face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) 60 | face_img = np.transpose(face_img, (2, 0, 1)) 61 | face_img = np.expand_dims(face_img, axis=0) 62 | assert face_img.shape == self.rec_model.input_shapes[0] 63 | ret = self.rec_model.run(face_img, deflatten=True)[0] 64 | g = ret[:, 0:2].flatten() 65 | gender = np.argmax(g) 66 | a = ret[:, 2:202].reshape((100, 2)) 67 | a = np.argmax(a, axis=1) 68 | age = int(sum(a)) 69 | return gender, age 70 | 71 | class DetectorInfer: 72 | 73 | def __init__(self, model='/models/trt-engines/centerface/centerface.plan', 74 | output_order=None): 75 | self.rec_model = TrtModel(model) 76 | self.model_name = os.path.basename(model) 77 | self.input_shape = None 78 | self.output_order = output_order 79 | 80 | # warmup 81 | def prepare(self, ctx_id=0): 82 | logging.info(f"Warming up face detector TensorRT engine...") 83 | self.rec_model.build() 84 | self.input_shape = self.rec_model.input_shapes[0] 85 | if not self.output_order: 86 | self.output_order = self.rec_model.out_names 87 | self.rec_model.run(np.zeros(self.input_shape, np.float32)) 88 | logging.info(f"Engine warmup complete! Expecting input shape: {self.input_shape}") 89 | 90 | def run(self, input): 91 | net_out = self.rec_model.run(input, deflatten=True, as_dict=True) 92 | net_out = [net_out[e] for e in self.output_order] 93 | return net_out -------------------------------------------------------------------------------- /converters/onnx_to_trt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorrt as trt 3 | import sys 4 | from typing import Tuple, Union 5 | import logging 6 | 7 | # Based on code from NVES_R's response at 8 | # https://forums.developer.nvidia.com/t/segmentation-fault-when-creating-the-trt-builder-in-python-works-fine-with-trtexec/111376 9 | 10 | 11 | TRT_LOGGER = trt.Logger(trt.Logger.WARNING) 12 | EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) 13 | 14 | 15 | def _build_engine_onnx(input_onnx: Union[str, bytes], force_fp16: bool = False, max_batch_size: int = 1): 16 | """ 17 | Builds TensorRT engine from provided ONNX file 18 | 19 | :param input_onnx: serialized ONNX model. 20 | :param force_fp16: Force use of FP16 precision, even if device doesn't support it. Be careful. 21 | :param max_batch_size: Define maximum batch size supported by engine. If >1 creates optimization profile. 22 | :return: TensorRT engine 23 | """ 24 | with trt.Builder(TRT_LOGGER) as builder, \ 25 | builder.create_network(EXPLICIT_BATCH) as network, \ 26 | builder.create_builder_config() as config, \ 27 | trt.OnnxParser(network, TRT_LOGGER) as parser: 28 | 29 | if force_fp16 is True: 30 | logging.info('Building TensorRT engine with FP16 support.') 31 | has_fp16 = builder.platform_has_fast_fp16 32 | if not has_fp16: 33 | logging.warning('Builder report no fast FP16 support. Performance drop expected') 34 | config.set_flag(trt.BuilderFlag.FP16) 35 | config.set_flag(trt.BuilderFlag.STRICT_TYPES) 36 | 37 | config.max_workspace_size = 1 << 20 38 | 39 | if not parser.parse(input_onnx): 40 | print('ERROR: Failed to parse the ONNX') 41 | for error in range(parser.num_errors): 42 | print(parser.get_error(error)) 43 | sys.exit(1) 44 | 45 | if max_batch_size != 1: 46 | logging.warning('Batch size !=1 is used. Ensure your inference code supports it.') 47 | profile = builder.create_optimization_profile() 48 | # Get input name and shape for building optimization profile 49 | input = network.get_input(0) 50 | im_size = input.shape[2:] 51 | input_name = input.name 52 | profile.set_shape(input_name, (1, 3) + im_size, (1, 3) + im_size, (max_batch_size, 3) + im_size) 53 | config.add_optimization_profile(profile) 54 | 55 | return builder.build_engine(network, config=config) 56 | 57 | 58 | 59 | def convert_onnx(input_onnx: Union[str, bytes], engine_file_path: str, force_fp16: bool = False, 60 | max_batch_size: int = 1, 61 | im_size: Tuple[int] = None): 62 | ''' 63 | Creates TensorRT engine and serializes it to disk 64 | :param input_onnx: Path to ONNX file on disk or serialized ONNX model. 65 | :param engine_file_path: Path where TensorRT engine should be saved. 66 | :param force_fp16: Force use of FP16 precision, even if device doesn't support it. Be careful. 67 | :param max_batch_size: Define maximum batch size supported by engine. If >1 creates optimization profile. 68 | :param im_size: Required if max_batch_size > 1. Used for creation of optimization profile. 69 | :return: None 70 | ''' 71 | 72 | onnx_obj = None 73 | if isinstance(input_onnx, str): 74 | with open(input_onnx, "rb") as f: 75 | onnx_obj = f.read() 76 | elif isinstance(input_onnx, bytes): 77 | onnx_obj = input_onnx 78 | 79 | engine = _build_engine_onnx(input_onnx=onnx_obj, 80 | force_fp16=force_fp16, max_batch_size=max_batch_size) 81 | 82 | assert not isinstance(engine, type(None)) 83 | 84 | with open(engine_file_path, "wb") as f: 85 | f.write(engine.serialize()) 86 | -------------------------------------------------------------------------------- /converters/mx2onnx_conv/onnx/mx2onnx/export_model.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # coding: utf-8 19 | #pylint: disable-msg=too-many-arguments 20 | 21 | """Exports an MXNet model to the ONNX model format""" 22 | import logging 23 | import numpy as np 24 | 25 | from mxnet.base import string_types 26 | from mxnet import symbol 27 | from .export_onnx import MXNetGraph 28 | from ._export_helper import load_module 29 | 30 | 31 | def export_model(sym, params, input_shape, input_type=np.float32, 32 | onnx_file_path='model.onnx', verbose=False): 33 | """Exports the MXNet model file, passed as a parameter, into ONNX model. 34 | Accepts both symbol,parameter objects as well as json and params filepaths as input. 35 | Operator support and coverage - 36 | https://cwiki.apache.org/confluence/display/MXNET/ONNX+Operator+Coverage 37 | 38 | Parameters 39 | ---------- 40 | sym : str or symbol object 41 | Path to the json file or Symbol object 42 | params : str or symbol object 43 | Path to the params file or params dictionary. (Including both arg_params and aux_params) 44 | input_shape : List of tuple 45 | Input shape of the model e.g [(1,3,224,224)] 46 | input_type : data type 47 | Input data type e.g. np.float32 48 | onnx_file_path : str 49 | Path where to save the generated onnx file 50 | verbose : Boolean 51 | If true will print logs of the model conversion 52 | 53 | Returns 54 | ------- 55 | onnx_file_path : str 56 | Onnx file path 57 | 58 | Notes 59 | ----- 60 | This method is available when you ``import mxnet.contrib.onnx`` 61 | 62 | """ 63 | 64 | try: 65 | from onnx import helper, mapping 66 | except ImportError: 67 | raise ImportError("Onnx and protobuf need to be installed. " 68 | + "Instructions to install - https://github.com/onnx/onnx") 69 | 70 | converter = MXNetGraph() 71 | 72 | data_format = np.dtype(input_type) 73 | # if input parameters are strings(file paths), load files and create symbol parameter objects 74 | if isinstance(sym, string_types) and isinstance(params, string_types): 75 | logging.info("Converting json and weight file to sym and params") 76 | sym_obj, params_obj = load_module(sym, params) 77 | onnx_graph = converter.create_onnx_graph_proto(sym_obj, params_obj, input_shape, 78 | mapping.NP_TYPE_TO_TENSOR_TYPE[data_format], 79 | verbose=verbose) 80 | elif isinstance(sym, symbol.Symbol) and isinstance(params, dict): 81 | onnx_graph = converter.create_onnx_graph_proto(sym, params, input_shape, 82 | mapping.NP_TYPE_TO_TENSOR_TYPE[data_format], 83 | verbose=verbose) 84 | else: 85 | raise ValueError("Input sym and params should either be files or objects") 86 | 87 | # Create the model (ModelProto) 88 | onnx_model = helper.make_model(onnx_graph) 89 | 90 | # Save model on disk 91 | with open(onnx_file_path, "wb") as file_handle: 92 | serialized = onnx_model.SerializeToString() 93 | file_handle.write(serialized) 94 | logging.info("Input shape of the model %s ", input_shape) 95 | logging.info("Exported ONNX file %s saved to disk", onnx_file_path) 96 | 97 | return onnx_file_path 98 | -------------------------------------------------------------------------------- /exec_backends/trt_loader.py: -------------------------------------------------------------------------------- 1 | import pycuda.driver as cuda 2 | import pycuda.autoinit 3 | import numpy as np 4 | 5 | import tensorrt as trt 6 | 7 | TRT_LOGGER = trt.Logger() 8 | 9 | # Simple helper data class that's a little nicer to use than a 2-tuple. 10 | class HostDeviceMem(object): 11 | def __init__(self, host_mem, device_mem): 12 | self.host = host_mem 13 | self.device = device_mem 14 | 15 | def __str__(self): 16 | return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) 17 | 18 | def __repr__(self): 19 | return self.__str__() 20 | 21 | # Allocates all buffers required for an engine, i.e. host/device inputs/outputs. 22 | def allocate_buffers(engine): 23 | inputs = [] 24 | outputs = [] 25 | bindings = [] 26 | stream = cuda.Stream() 27 | out_shapes = [] 28 | input_shapes = [] 29 | out_names = [] 30 | max_batch_size = engine.get_profile_shape(0, 0)[2][0] 31 | for binding in engine: 32 | binding_shape = engine.get_binding_shape(binding) 33 | #Fix -1 dimension for proper memory allocation for batch_size > 1 34 | if binding_shape[0] == -1: 35 | binding_shape = (1,) + binding_shape[1:] 36 | size = trt.volume(binding_shape) * max_batch_size 37 | dtype = trt.nptype(engine.get_binding_dtype(binding)) 38 | # Allocate host and device buffers 39 | host_mem = cuda.pagelocked_empty(size, dtype) 40 | device_mem = cuda.mem_alloc(host_mem.nbytes) 41 | # Append the device buffer to device bindings. 42 | bindings.append(int(device_mem)) 43 | # Append to the appropriate list. 44 | if engine.binding_is_input(binding): 45 | inputs.append(HostDeviceMem(host_mem, device_mem)) 46 | input_shapes.append(engine.get_binding_shape(binding)) 47 | else: 48 | outputs.append(HostDeviceMem(host_mem, device_mem)) 49 | #Collect original output shapes and names from engine 50 | out_shapes.append(engine.get_binding_shape(binding)) 51 | out_names.append(binding) 52 | return inputs, outputs, bindings, stream, input_shapes, out_shapes, out_names, max_batch_size 53 | 54 | # This function is generalized for multiple inputs/outputs. 55 | # inputs and outputs are expected to be lists of HostDeviceMem objects. 56 | def do_inference(context, bindings, inputs, outputs, stream): 57 | # Transfer input data to the GPU. 58 | [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] 59 | # Run inference. 60 | context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) 61 | # Transfer predictions back from the GPU. 62 | [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] 63 | # Synchronize the stream 64 | stream.synchronize() 65 | # Return only the host outputs. 66 | return [out.host for out in outputs] 67 | 68 | class TrtModel(object): 69 | def __init__(self, model): 70 | self.engine_file = model 71 | self.engine = None 72 | self.inputs = None 73 | self.outputs = None 74 | self.bindings = None 75 | self.stream = None 76 | self.context = None 77 | self.input_shapes = None 78 | self.out_shapes = None 79 | self.max_batch_size = 1 80 | 81 | def build(self): 82 | with open(self.engine_file, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime: 83 | self.engine = runtime.deserialize_cuda_engine(f.read()) 84 | self.inputs, self.outputs, self.bindings, self.stream, self.input_shapes, self.out_shapes, self.out_names, self.max_batch_size = allocate_buffers( 85 | self.engine) 86 | 87 | self.context = self.engine.create_execution_context() 88 | self.context.active_optimization_profile = 0 89 | 90 | def run(self, input, deflatten: bool = True, as_dict=False): 91 | # lazy load implementation 92 | if self.engine is None: 93 | self.build() 94 | 95 | input = np.asarray(input) 96 | batch_size = input.shape[0] 97 | allocate_place = np.prod(input.shape) 98 | self.inputs[0].host[:allocate_place] = input.flatten(order='C').astype(np.float32) 99 | self.context.set_binding_shape(0, input.shape) 100 | trt_outputs = do_inference( 101 | self.context, bindings=self.bindings, 102 | inputs=self.inputs, outputs=self.outputs, stream=self.stream) 103 | #Reshape TRT outputs to original shape instead of flattened array 104 | if deflatten: 105 | trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, self.out_shapes)] 106 | if as_dict: 107 | return {name: trt_outputs[i] for i, name in enumerate(self.out_names)} 108 | 109 | return [trt_output[:batch_size] for trt_output in trt_outputs] 110 | 111 | -------------------------------------------------------------------------------- /exec_backends/triton_backend.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | import sys 5 | import argparse 6 | import numpy as np 7 | 8 | import tritonclient.http as httpclient 9 | from tritonclient.utils import triton_to_np_dtype 10 | from tritonclient.utils import InferenceServerException 11 | 12 | from numpy.linalg import norm 13 | 14 | 15 | def normalize(embedding): 16 | embedding_norm = norm(embedding) 17 | normed_embedding = embedding / embedding_norm 18 | return normed_embedding 19 | 20 | # Callback function used for async_run() 21 | def completion_callback(input_filenames, user_data, infer_ctx, request_id): 22 | user_data._completed_requests.put((request_id, input_filenames)) 23 | 24 | FLAGS = None 25 | 26 | def parse_model_http(model_metadata, model_config): 27 | """ 28 | Check the configuration of a model to make sure it meets the 29 | requirements for an image classification network (as expected by 30 | this client) 31 | """ 32 | if len(model_metadata['inputs']) != 1: 33 | raise Exception("expecting 1 input, got {}".format( 34 | len(model_metadata['inputs']))) 35 | 36 | if len(model_config['input']) != 1: 37 | raise Exception( 38 | "expecting 1 input in model configuration, got {}".format( 39 | len(model_config['input']))) 40 | 41 | input_metadata = model_metadata['inputs'][0] 42 | input_config = model_config['input'][0] 43 | output_names = [e['name'] for e in model_metadata['outputs']] 44 | 45 | max_batch_size = 0 46 | if 'max_batch_size' in model_config: 47 | max_batch_size = model_config['max_batch_size'] 48 | 49 | # Model input must have 3 dims (not counting the batch dimension), 50 | # either CHW or HWC 51 | input_batch_dim = (max_batch_size > 0) 52 | expected_input_dims = 3 + (1 if input_batch_dim else 0) 53 | if len(input_metadata['shape']) != expected_input_dims: 54 | raise Exception( 55 | "expecting input to have {} dimensions, model '{}' input has {}". 56 | format(expected_input_dims, model_metadata['name'], 57 | len(input_metadata['shape']))) 58 | 59 | 60 | if input_config['format'] == "FORMAT_NHWC": 61 | h = input_metadata['shape'][1 if input_batch_dim else 0] 62 | w = input_metadata['shape'][2 if input_batch_dim else 1] 63 | c = input_metadata['shape'][3 if input_batch_dim else 2] 64 | else: 65 | c = input_metadata['shape'][1 if input_batch_dim else 0] 66 | h = input_metadata['shape'][2 if input_batch_dim else 1] 67 | w = input_metadata['shape'][3 if input_batch_dim else 2] 68 | 69 | return (max_batch_size, input_metadata['name'],output_names, c, 70 | h, w, input_config['format'], input_metadata['datatype']) 71 | 72 | url = 'localhost:8001' 73 | model_name = 'arcface_r100_v1' 74 | model_version = '1' 75 | 76 | class Arcface: 77 | 78 | def __init__(self, rec_name = 'arcface_r100_v1', model_version='1',url='localhost:8001'): 79 | self.model_name = 'arcface_r100_v1' 80 | self.model_version = model_version 81 | self.url = url 82 | self.triton_client = httpclient.InferenceServerClient(url=url, concurrency=1) 83 | 84 | 85 | def prepare(self,ctx=0): 86 | concurrency = 1 87 | # Make sure the model matches our requirements, and get some 88 | # properties of the model that we need for preprocessing 89 | try: 90 | model_metadata = self.triton_client.get_model_metadata( 91 | model_name=self.model_name, model_version=self.model_version) 92 | except InferenceServerException as e: 93 | print("failed to retrieve the metadata: " + str(e)) 94 | sys.exit(1) 95 | 96 | try: 97 | model_config = self.triton_client.get_model_config( 98 | model_name=self.model_name, model_version=self.model_version) 99 | except InferenceServerException as e: 100 | print("failed to retrieve the config: " + str(e)) 101 | sys.exit(1) 102 | 103 | self.max_batch_size, self.input_name, self.output_name, self.c, self.h, self.w, self.format, self.dtype = parse_model_http( 104 | model_metadata, model_config) 105 | 106 | 107 | def get_embedding(self,face_img): 108 | face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) 109 | face_img = np.transpose(face_img, (2, 0, 1)) 110 | face_img = np.expand_dims(face_img, axis=0) 111 | face_img = face_img.astype(triton_to_np_dtype(self.dtype)) 112 | inputs = [] 113 | inputs.append(httpclient.InferInput(self.input_name, [1, self.c, self.h,self.w], "FP32")) 114 | inputs[0].set_data_from_numpy(face_img) 115 | 116 | out = self.triton_client.infer(self.model_name, 117 | inputs, 118 | model_version=self.model_version, 119 | outputs=None) 120 | out = [out.as_numpy(e)[0] for e in self.output_name] 121 | #print(output.get_output(self.output_name)['data']) 122 | return out 123 | -------------------------------------------------------------------------------- /configs.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import namedtuple 3 | 4 | # Net outputs in correct order expected by postprocessing code. 5 | # TensorRT might change output order for some reasons. 6 | # Also Triton Inference Server may change output order for both 7 | # ONNX and TensorRT backends if automatic configuration is used. 8 | 9 | retina_outputs = ['face_rpn_cls_prob_reshape_stride32', 10 | 'face_rpn_bbox_pred_stride32', 11 | 'face_rpn_landmark_pred_stride32', 12 | 'face_rpn_cls_prob_reshape_stride16', 13 | 'face_rpn_bbox_pred_stride16', 14 | 'face_rpn_landmark_pred_stride16', 15 | 'face_rpn_cls_prob_reshape_stride8', 16 | 'face_rpn_bbox_pred_stride8', 17 | 'face_rpn_landmark_pred_stride8'] 18 | 19 | anticonv_outputs = [ 20 | 'face_rpn_cls_prob_reshape_stride32', 21 | 'face_rpn_bbox_pred_stride32', 22 | 'face_rpn_landmark_pred_stride32', 23 | 'face_rpn_type_prob_reshape_stride32', 24 | 'face_rpn_cls_prob_reshape_stride16', 25 | 'face_rpn_bbox_pred_stride16', 26 | 'face_rpn_landmark_pred_stride16', 27 | 'face_rpn_type_prob_reshape_stride16', 28 | 'face_rpn_cls_prob_reshape_stride8', 29 | 'face_rpn_bbox_pred_stride8', 30 | 'face_rpn_landmark_pred_stride8', 31 | 'face_rpn_type_prob_reshape_stride8' 32 | ] 33 | 34 | centerface_outputs = ['537', '538', '539', '540'] 35 | dbface_outputs = ["hm", "tlrb", "landmark"] 36 | 37 | mxnet_models = { 38 | 'retinaface_mnet025_v0': { 39 | 'symbol': 'mnet.25-symbol.json', 40 | 'params': 'mnet.25-0000.params', 41 | 'shape': (1, 3, 480, 640), 42 | 'outputs': retina_outputs, 43 | 'reshape': True, 44 | 'in_package': False 45 | }, 46 | 'retinaface_mnet025_v1': { 47 | 'symbol': 'mnet10-symbol.json', 48 | 'params': 'mnet10-0000.params', 49 | 'shape': (1, 3, 480, 640), 50 | 'outputs': retina_outputs, 51 | 'reshape': True, 52 | 'in_package': True, 53 | }, 54 | 'retinaface_mnet025_v2': { 55 | 'symbol': 'mnet12-symbol.json', 56 | 'params': 'mnet12-0000.params', 57 | 'shape': (1, 3, 480, 640), 58 | 'outputs': retina_outputs, 59 | 'reshape': True, 60 | 'in_package': True, 61 | }, 62 | 'retinaface_r50_v1': { 63 | 'symbol': 'R50-symbol.json', 64 | 'params': 'R50-0000.params', 65 | 'shape': (1, 3, 480, 640), 66 | 'outputs': retina_outputs, 67 | 'reshape': True, 68 | 'in_package': True 69 | }, 70 | 'mnet_cov2': { 71 | 'symbol': 'mnet_cov2-symbol.json', 72 | 'params': 'mnet_cov2-0000.params', 73 | 'shape': (1, 3, 480, 640), 74 | 'outputs': anticonv_outputs, 75 | 'reshape': True, 76 | 'in_package': False 77 | }, 78 | 'arcface_r100_v1': { 79 | 'symbol': 'model-symbol.json', 80 | 'params': 'model-0000.params', 81 | 'shape': (1, 3, 112, 112), 82 | 'reshape': False, 83 | 'in_package': True 84 | }, 85 | 'genderage_v1': { 86 | 'symbol': 'model-symbol.json', 87 | 'params': 'model-0000.params', 88 | 'shape': (1, 3, 112, 112), 89 | 'reshape': False, 90 | 'in_package': True 91 | }, 92 | 'centerface': { 93 | 'in_package': False, 94 | 'shape': (1, 3, 480, 640), 95 | 'reshape': True, 96 | 'outputs': centerface_outputs, 97 | 'link': 'https://raw.githubusercontent.com/Star-Clouds/CenterFace/master/models/onnx/centerface_bnmerged.onnx' 98 | }, 99 | 100 | 'dbface': { 101 | 'in_package': False, 102 | 'shape': (1, 3, 640, 640), 103 | 'reshape': True, 104 | 'outputs': dbface_outputs 105 | }, 106 | 107 | 'coordinateReg': { 108 | 'symbol': '2d106det-symbol.json', 109 | 'params': '2d106det-0000.params', 110 | 'in_package': False, 111 | 'shape': (1, 3, 192, 192), 112 | 'reshape': False 113 | }, 114 | 'r100-arcface-msfdrop75': { 115 | 'symbol': 'model-symbol.json', 116 | 'params': 'model-0000.params', 117 | 'in_package': False, 118 | 'shape': (1, 3, 112, 112), 119 | 'reshape': False 120 | }, 121 | 'r50-arcface-msfdrop75': { 122 | 'symbol': 'model-symbol.json', 123 | 'params': 'model-0000.params', 124 | 'in_package': False, 125 | 'shape': (1, 3, 112, 112), 126 | 'reshape': False 127 | }, 128 | 'glint360k_r100FC_1.0': { 129 | 'symbol': 'model-symbol.json', 130 | 'params': 'model-0000.params', 131 | 'in_package': False, 132 | 'shape': (1, 3, 112, 112), 133 | 'reshape': False 134 | }, 135 | 'glint360k_r100FC_0.1': { 136 | 'symbol': 'model-symbol.json', 137 | 'params': 'model-0000.params', 138 | 'in_package': False, 139 | 'shape': (1, 3, 112, 112), 140 | 'reshape': False 141 | } 142 | } 143 | 144 | models_repo = 'https://drive.google.com/drive/folders/109D__GLXHPmiW9tIgsCadTdjcXB0u0vK' 145 | 146 | 147 | class Configs(object): 148 | def __init__(self, models_dir: str = '/models'): 149 | self.models_dir = self.__get_param('MODELS_DIR', models_dir) 150 | self.mxnet_models_dir = os.path.join(self.models_dir, 'mxnet') 151 | self.onnx_models_dir = os.path.join(self.models_dir, 'onnx') 152 | self.trt_engines_dir = os.path.join(self.models_dir, 'trt-engines') 153 | self.mxnet_models = mxnet_models 154 | self.type2path = dict( 155 | mxnet=self.mxnet_models_dir, 156 | onnx=self.onnx_models_dir, 157 | engine=self.trt_engines_dir, 158 | plan=self.trt_engines_dir 159 | ) 160 | 161 | def __get_param(self, ENV, default=None): 162 | return os.environ.get(ENV, default) 163 | 164 | def get_mxnet_model_paths(self, model_name): 165 | symbol_path = os.path.join(self.mxnet_models_dir, model_name, self.mxnet_models[model_name].get('symbol', '')) 166 | param_path = os.path.join(self.mxnet_models_dir, model_name, self.mxnet_models[model_name].get('params', '')) 167 | return symbol_path, param_path 168 | 169 | def in_official_package(self, model_name): 170 | return mxnet_models[model_name]['in_package'] 171 | 172 | def build_model_paths(self, model_name: str, ext: str): 173 | base = self.type2path[ext] 174 | parent = os.path.join(base, model_name) 175 | file = os.path.join(parent, f"{model_name}.{ext}") 176 | return parent, file 177 | 178 | def get_outputs_order(self, model_name): 179 | return self.mxnet_models.get(model_name, {}).get('outputs') 180 | 181 | def get_shape(self, model_name): 182 | return self.mxnet_models.get(model_name, {}).get('shape') 183 | 184 | def get_dl_link(self, model_name): 185 | return self.mxnet_models.get(model_name, {}).get('link') 186 | -------------------------------------------------------------------------------- /converters/insight2onnx.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import json 4 | import shutil 5 | from typing import List 6 | 7 | import onnx 8 | from onnx import checker 9 | import mxnet as mx 10 | import numpy as np 11 | 12 | # Load edited version of mxnet.contrib.onnx 13 | from .mx2onnx_conv import onnx as onnx_mxnet 14 | from .remove_initializer_from_input import remove_initializer_from_input 15 | 16 | print('mxnet version:', mx.__version__) 17 | print('onnx version:', onnx.__version__) 18 | 19 | 20 | 21 | # assert onnx.__version__ == '1.2.1' 22 | 23 | 24 | # Based on 25 | # https://github.com/onnx/models/issues/156#issuecomment-690847276 26 | # And MXNet export _op_translations fixes from https://github.com/zheshipinyinMc/arcface_retinaface_mxnet2onnx 27 | 28 | def add_dyn_batch(model_in, model_out): 29 | model = onnx.load(model_in) 30 | d = model.graph.input[0].type.tensor_type.shape.dim 31 | d[0].dim_value = -1 32 | for output in model.graph.output: 33 | d = output.type.tensor_type.shape.dim 34 | d[0].dim_value = -1 35 | onnx.save_model(model, model_out) 36 | 37 | 38 | def mxnet_fixgamma_params(input_param: str, layers: List[str]): 39 | ''' 40 | Replace gamma weights with zeros if fix_gamma is True. 41 | Specific to retinaface_mnet025_v* and genderage_v1 models. 42 | 43 | :param input_param: path to MXnet .param file 44 | :param layers: List of nodes names containg fix_gamma = True attribute 45 | ''' 46 | 47 | net_param = mx.nd.load(input_param) 48 | for layer in layers: 49 | name = f'arg:{layer}' 50 | gamma = net_param[name].asnumpy() 51 | gamma *= 0 52 | gamma += 1 53 | net_param[name] = mx.nd.array(gamma) 54 | return net_param 55 | 56 | 57 | def mxnet_model_fix(input_symbol_path: str, input_params_path: str, rewrite: bool = True): 58 | ''' 59 | Apply retinaface specific fixes, like renaming SoftmaxActivation and fixing gamma values. 60 | 61 | :param input_symbol_path: Path to MXNet .symbol file 62 | :param input_params_path: Path to MXNet .param file 63 | :param rewrite: Write fixed symbol and param at input path 64 | :return: 65 | ''' 66 | 67 | names = [] 68 | fix_gamma_layers = [] 69 | 70 | with open(input_symbol_path, 'r') as _input_symbol: 71 | fixed_sym = json.load(_input_symbol) 72 | for e in fixed_sym['nodes']: 73 | if e['op'] == 'SoftmaxActivation': 74 | e['op'] = 'softmax' 75 | e['attrs'] = {"axis": "1"} 76 | # Fix for "Graph must be in single static assignment (SSA) form" 77 | if e['name'] in names: 78 | e['name'] = f"{e['name']}_1" 79 | names.append(e['name']) 80 | if e.get('attrs', {}).get('fix_gamma') == 'True' and e['name'].endswith('_gamma'): 81 | fix_gamma_layers.append(e['name']) 82 | _input_symbol.close() 83 | 84 | fixed_params = mxnet_fixgamma_params(input_params_path, layers=fix_gamma_layers) 85 | 86 | if rewrite is True: 87 | mx.nd.save(input_params_path, fixed_params) 88 | with open(input_symbol_path, 'w') as sym_temp: 89 | json.dump(fixed_sym, sym_temp, indent=2) 90 | 91 | return fixed_sym, fixed_params 92 | 93 | 94 | def arcface_onnx_fixes(onnx_path: str, rewrite: bool = True): 95 | ''' 96 | Apply fixes specific for InsightFace ArcFace model. 97 | (BatchNormalization spatial, and PRelu reshape) 98 | 99 | :param onnx_path: Path to ONNX model produced by MXNet export (str) 100 | :param write: Overwrite input model (bool, default: True) 101 | :return: ONNX model object 102 | ''' 103 | 104 | model = onnx.load(onnx_path) 105 | onnx_processed_nodes = [] 106 | onnx_processed_inputs = [] 107 | onnx_processed_outputs = [] 108 | onnx_processed_initializers = [] 109 | 110 | reshape_node = [] 111 | 112 | for ind, node in enumerate(model.graph.node): 113 | if node.op_type == "BatchNormalization": 114 | for attr in node.attribute: 115 | if (attr.name == "spatial"): 116 | attr.i = 1 117 | onnx_processed_nodes.append(node) 118 | 119 | list_new_inp = [] 120 | list_new_init = [] 121 | for name_rs in reshape_node: 122 | new_inp = onnx.helper.make_tensor_value_info( 123 | name=name_rs, 124 | elem_type=onnx.TensorProto.INT64, 125 | shape=[4] 126 | ) 127 | new_init = onnx.helper.make_tensor( 128 | name=name_rs, 129 | data_type=onnx.TensorProto.INT64, 130 | dims=[4], 131 | vals=[1, -1, 1, 1] 132 | ) 133 | 134 | list_new_inp.append(new_inp) 135 | list_new_init.append(new_init) 136 | 137 | for k, inp in enumerate(model.graph.input): 138 | onnx_processed_inputs.extend([inp]) 139 | 140 | for k, outp in enumerate(model.graph.output): 141 | onnx_processed_outputs.extend([outp]) 142 | 143 | for k, init in enumerate(model.graph.initializer): 144 | onnx_processed_initializers.extend([init]) 145 | 146 | graph = onnx.helper.make_graph( 147 | onnx_processed_nodes, 148 | "mxnet_converted_model", 149 | onnx_processed_inputs, 150 | onnx_processed_outputs 151 | ) 152 | 153 | graph.initializer.extend(onnx_processed_initializers) 154 | 155 | # Check graph 156 | checker.check_graph(graph) 157 | 158 | onnx_model = onnx.helper.make_model(graph) 159 | 160 | if rewrite: 161 | with open(onnx_path, "wb") as file_handle: 162 | serialized = onnx_model.SerializeToString() 163 | file_handle.write(serialized) 164 | return onnx_model 165 | 166 | 167 | def convert_insight_model(symbol, params, onnx_path, input_shape=(1, 3, 112, 112)): 168 | 169 | output_dir = os.path.dirname(onnx_path) 170 | 171 | logging.info("Creating intermediate copy of source model...") 172 | 173 | intermediate_symbol = os.path.join(output_dir, 'symbol_fixed-symbol.json') 174 | intermediate_params = os.path.join(output_dir, 'symbol_fixed-0000.params') 175 | shutil.copy2(symbol, intermediate_symbol) 176 | shutil.copy2(params, intermediate_params) 177 | 178 | logging.info("Applying RetinaFace specific fixes to input MXNet model before conversion...") 179 | mxnet_model_fix(intermediate_symbol, intermediate_params, rewrite=True) 180 | 181 | logging.info("Exporting to ONNX...") 182 | onnx_mxnet.export_model(intermediate_symbol, intermediate_params, [input_shape], np.float32, onnx_path) 183 | 184 | logging.info("Applying ArcFace specific fixes to output ONNX") 185 | arcface_onnx_fixes(onnx_path, rewrite=True) 186 | 187 | logging.info("Removing initializer from inputs in ONNX model...") 188 | remove_initializer_from_input(onnx_path, onnx_path) 189 | 190 | logging.info("Removing intermediate *.symbol and *.params") 191 | os.remove(intermediate_symbol) 192 | os.remove(intermediate_params) 193 | -------------------------------------------------------------------------------- /utils/getter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from typing import List 4 | 5 | import onnx 6 | 7 | from insightface.model_zoo import get_model as get_model_orig 8 | 9 | from face_detectors import * 10 | from face_processors import * 11 | 12 | from converters.insight2onnx import convert_insight_model 13 | from converters.reshape_onnx import reshape, reshape_onnx_input 14 | from converters.remove_initializer_from_input import remove_initializer_from_input 15 | from utils.helpers import prepare_folders 16 | from utils.download import download 17 | from utils.model_store import get_model_file 18 | 19 | from configs import Configs 20 | 21 | from exec_backends import onnxrt_backend as onnx_backend 22 | from exec_backends import triton_backend as triton_backend 23 | 24 | # Since TensorRT and PyCUDA are optional dependencies it might be not available 25 | # try: 26 | from exec_backends import trt_backend 27 | from converters.onnx_to_trt import convert_onnx 28 | # except: 29 | # trt_backend = None 30 | # convert_onnx = None 31 | 32 | # Map model names to corresponding functions 33 | models = { 34 | 'arcface_r100_v1': arcface_r100_v1, 35 | 'r50-arcface-msfdrop75': r50_arcface_msfdrop75, 36 | 'r100-arcface-msfdrop75': r100_arcface_msfdrop75, 37 | 'glint360k_r100FC_1.0': glint360k_r100FC_1_0, 38 | 'glint360k_r100FC_0.1': glint360k_r100FC_0_1, 39 | 'genderage_v1': genderage_v1, 40 | 'retinaface_r50_v1': retinaface_r50_v1, 41 | 'retinaface_mnet025_v1': retinaface_mnet025_v1, 42 | 'retinaface_mnet025_v2': retinaface_mnet025_v2, 43 | } 44 | 45 | 46 | def prepare_backend(model_name, backend_name, im_size: List[int] = None, 47 | max_batch_size: int = 1, 48 | force_fp16: bool = False, 49 | download_model: bool = True, 50 | config: Configs = None): 51 | """ 52 | Check if ONNX, MXNet and TensorRT models exist and download/create them otherwise. 53 | 54 | :param model_name: Name of required model. Must be one of keys in `models` dict. 55 | :param backend_name: Name of inference backend. (onnx, trt) 56 | :param im_size: Desired maximum size of image in W,H form. Will be overridden if model doesn't support reshaping. 57 | :param max_batch_size: Maximum batch size for inference, currently supported for ArcFace model only. 58 | :param force_fp16: Force use of FP16 precision, even if device doesn't support it. Be careful. TensorRT specific. 59 | :param download_model: Download MXNet or ONNX model if it not exist. 60 | :param config: Configs class instance 61 | :return: ONNX model serialized to string, or path to TensorRT engine 62 | """ 63 | 64 | prepare_folders([config.mxnet_models_dir, config.onnx_models_dir, config.trt_engines_dir]) 65 | 66 | in_package = config.in_official_package(model_name) 67 | reshape_allowed = config.mxnet_models[model_name].get('reshape') 68 | shape = config.get_shape(model_name) 69 | if reshape_allowed is True and im_size is not None: 70 | shape = (1, 3) + tuple(im_size)[::-1] 71 | 72 | mxnet_symbol, mxnet_params = config.get_mxnet_model_paths(model_name) 73 | onnx_dir, onnx_path = config.build_model_paths(model_name, 'onnx') 74 | trt_dir, trt_path = config.build_model_paths(model_name, 'plan') 75 | 76 | if not os.path.exists(onnx_path) and download_model is True: 77 | prepare_folders([onnx_dir]) 78 | if in_package: 79 | print(f"Downloading model: {model_name}...") 80 | get_model_file(model_name, root=config.mxnet_models_dir) 81 | convert_insight_model(mxnet_symbol, mxnet_params, onnx_path, shape) 82 | else: 83 | dl_link = config.get_dl_link(model_name) 84 | if dl_link: 85 | download(config.get_dl_link(model_name), onnx_path) 86 | remove_initializer_from_input(onnx_path, onnx_path) 87 | elif os.path.exists(mxnet_symbol) and os.path.exists(mxnet_params): 88 | convert_insight_model(mxnet_symbol, mxnet_params, onnx_path, shape) 89 | else: 90 | logging.error("You have requested non standard model, but haven't provided download link or " 91 | "MXNet model. Place model to proper folder and change configs.py accordingly.") 92 | 93 | if backend_name == 'onnx': 94 | model = onnx.load(onnx_path) 95 | if reshape_allowed is True: 96 | logging.info(f'Reshaping ONNX inputs to: {shape}') 97 | model = reshape(model, h=im_size[1], w=im_size[0]) 98 | return model.SerializeToString() 99 | 100 | if backend_name == "trt": 101 | if reshape_allowed is True: 102 | trt_path = trt_path.replace('.plan', f'_{shape[3]}_{shape[2]}.plan') 103 | if max_batch_size > 1: 104 | trt_path = trt_path.replace('.plan', f'_batch{max_batch_size}.plan') 105 | if force_fp16 is True: 106 | trt_path = trt_path.replace('.plan', '_fp16.plan') 107 | 108 | if not os.path.exists(trt_path): 109 | prepare_folders([trt_dir]) 110 | if reshape_allowed is True or max_batch_size!=1: 111 | logging.info(f'Reshaping ONNX inputs to: {shape}') 112 | model = onnx.load(onnx_path) 113 | onnx_batch_size = 1 114 | if max_batch_size != 1: 115 | onnx_batch_size = -1 116 | reshaped = reshape(model, n=onnx_batch_size, h=shape[2], w=shape[3]) 117 | temp_onnx_model = reshaped.SerializeToString() 118 | else: 119 | temp_onnx_model = onnx_path 120 | 121 | logging.info(f"Building TRT engine for {model_name}...") 122 | convert_onnx(temp_onnx_model, 123 | engine_file_path=trt_path, 124 | max_batch_size=max_batch_size, 125 | force_fp16=force_fp16) 126 | logging.info('Building TRT engine complete!') 127 | return trt_path 128 | 129 | 130 | def get_model(model_name: str, backend_name: str, im_size: List[int] = None, max_batch_size: int = 1, force_fp16: bool = False, 131 | root_dir: str = "/models", download_model: bool = True, **kwargs): 132 | """ 133 | Returns inference backend instance with loaded model. 134 | 135 | :param model_name: Name of required model. Must be one of keys in `models` dict. 136 | :param backend_name: Name of inference backend. (onnx, mxnet, trt) 137 | :param im_size: Desired maximum size of image in W,H form. Will be overridden if model doesn't support reshaping. 138 | :param max_batch_size: Maximum batch size for inference, currently supported for ArcFace model only. 139 | :param force_fp16: Force use of FP16 precision, even if device doesn't support it. Be careful. TensorRT specific. 140 | :param root_dir: Root directory where models will be stored. 141 | :param download_model: Download MXNet or ONNX model. Might be disabled if TRT model was already created. 142 | :param kwargs: Placeholder. 143 | :return: Inference backend with loaded model. 144 | """ 145 | 146 | config = Configs(models_dir=root_dir) 147 | 148 | backends = { 149 | 'onnx': onnx_backend, 150 | 'trt': trt_backend, 151 | 'mxnet': 'mxnet', 152 | 'triton': triton_backend 153 | } 154 | 155 | if backend_name not in backends: 156 | logging.error(f"Unknown backend '{backend_name}' specified. Exiting.") 157 | exit(1) 158 | 159 | if model_name not in models: 160 | logging.error(f"Unknown model {model_name} specified." 161 | f" Please select one of the following:\n" 162 | f"{', '.join(list(models.keys()))}") 163 | exit(1) 164 | 165 | # Keep original InsightFace package available for a while for testing purposes. 166 | if backend_name == 'mxnet': 167 | return get_model_orig(model_name, root=config.mxnet_models_dir) 168 | 169 | backend = backends[backend_name] 170 | 171 | model_path = prepare_backend(model_name, backend_name, im_size=im_size, max_batch_size=max_batch_size, config=config, force_fp16=force_fp16,\ 172 | download_model=download_model) 173 | 174 | outputs = config.get_outputs_order(model_name) 175 | model = models[model_name](model_path=model_path, backend=backend, outputs=outputs) 176 | return model 177 | -------------------------------------------------------------------------------- /sample_retinaface_to_trt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import torch 4 | import argparse 5 | import time 6 | import numpy as np 7 | from utils.image import ImageData 8 | from detectors.retinaface import * 9 | from exec_backends.trt_loader import TrtModel 10 | 11 | class TRT_Retinaface(object): 12 | def __init__(self, model_path, input_shape = (640, 640), batch_size = 4): 13 | print('[INFO] Create ONNX runtime') 14 | self.model = TrtModel(model_path) 15 | self.input_shape = input_shape 16 | self.rac = 'net3' 17 | self.masks = False 18 | self.batch_size = batch_size 19 | for size in self.input_shape: 20 | if size % 32 != 0: 21 | raise ValueError("Current support only size which is multiple of 32 for compabilities") 22 | 23 | def prepare(self, nms: float = 0.4, **kwargs): 24 | self.nms_threshold = nms 25 | self.landmark_std = 1.0 26 | _ratio = (1.,) 27 | fmc = 3 28 | if self.rac == 'net3': 29 | _ratio = (1.,) 30 | elif self.rac == 'net3l': 31 | _ratio = (1.,) 32 | self.landmark_std = 0.2 33 | else: 34 | assert False, 'rac setting error %s' % self.rac 35 | 36 | if fmc == 3: 37 | self._feat_stride_fpn = [32, 16, 8] 38 | self.anchor_cfg = { 39 | '32': {'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 40 | '16': {'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 41 | '8': {'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 42 | } 43 | self.use_landmarks = True 44 | self.fpn_keys = [] 45 | 46 | for s in self._feat_stride_fpn: 47 | self.fpn_keys.append('stride%s' % s) 48 | 49 | self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(cfg=self.anchor_cfg))) 50 | for k in self._anchors_fpn: 51 | v = self._anchors_fpn[k].astype(np.float32) 52 | self._anchors_fpn[k] = v 53 | 54 | self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) 55 | # Create anchor 56 | print('Rebuild anchor') 57 | self.anchor_plane_cache = {} 58 | for _idx, s in enumerate(self._feat_stride_fpn): 59 | stride = int(s) 60 | width = int(self.input_shape[0]/stride) 61 | height = int(self.input_shape[1]/stride) 62 | K = width * height 63 | A = self._num_anchors['stride%s' % s] 64 | key = (height, width, stride) 65 | anchors_fpn = self._anchors_fpn['stride%s' % s] 66 | anchors = anchors_plane(height, width, stride, anchors_fpn) 67 | self.anchor_plane_cache[key] = np.tile(anchors.reshape((K * A, 4)), (self.batch_size, 1, 1)) 68 | self.warm_up() 69 | 70 | def warm_up(self): 71 | ''' 72 | Warm up NMS jit 73 | ''' 74 | print('Warming up NMS jit...') 75 | tik = time.time() 76 | image = cv2.imread('test_images/lumia.jpg', cv2.IMREAD_COLOR) 77 | image = ImageData(image, self.input_shape) 78 | image.resize_image(mode='pad') 79 | im = cv2.cvtColor(image.transformed_image, cv2.COLOR_BGR2RGB) 80 | im = np.transpose(im, (2, 0, 1)) 81 | input_blob = np.tile(im, (self.batch_size, 1, 1, 1)).astype(np.float32) 82 | _ = self.detect(input_blob, threshold=0.1) 83 | tok = time.time() 84 | print('Warming up complete, time cost = {}'.format(tok - tik)) 85 | 86 | def detect(self, batch_img, threshold: float = 0.6): 87 | t0 = time.time() 88 | batch_size = len(batch_img) 89 | assert batch_size == self.batch_size, "Model define with batch_size = {}, your input: {}".format(self.batch_size, batch_size) 90 | net_out = self.model.run(batch_img) 91 | # Sort 92 | indices = [4, 0, 1, 5, 2, 3, 6, 7, 8] 93 | sorted_net_out = [net_out[i] for i in indices] 94 | # print('Len net_out: {}'.format(net_out[0].shape)) 95 | t1 = time.time() 96 | print('Infer: ', t1-t0) 97 | # print(net_out[0].shape) 98 | t0 = time.time() 99 | result = self.postprocess_batch(sorted_net_out, threshold, batch_size = batch_size) 100 | t1 = time.time() 101 | print('Post: ', t1-t0) 102 | return result 103 | 104 | def postprocess_batch(self, net_out, threshold, batch_size): 105 | ''' 106 | Post process for batch-inference 107 | ''' 108 | proposals_list_batch = {i : [] for i in range(batch_size)} 109 | scores_list_batch = {i : [] for i in range(batch_size)} 110 | landmarks_list_batch = {i : [] for i in range(batch_size)} 111 | t0 = time.time() 112 | # Foreach FPN layer 113 | for _idx, s in enumerate(self._feat_stride_fpn): 114 | _key = 'stride%s' % s 115 | stride = int(s) 116 | if self.use_landmarks: 117 | idx = _idx * 3 118 | else: 119 | idx = _idx * 2 120 | if self.masks: 121 | idx = _idx * 4 122 | 123 | A = self._num_anchors['stride%s' % s] 124 | 125 | scores_batch = net_out[idx] 126 | scores_batch = scores_batch[:, A:, :, :] 127 | idx += 1 128 | bbox_deltas_batch = net_out[idx] 129 | height, width = bbox_deltas_batch.shape[2], bbox_deltas_batch.shape[3] 130 | 131 | # K = height * width 132 | key = (height, width, stride) 133 | anchors_batch = self.anchor_plane_cache[key] 134 | 135 | scores_batch = clip_pad(scores_batch, (height, width)) 136 | scores_batch = scores_batch.transpose((0, 2, 3, 1)).reshape((batch_size, -1, 1)) 137 | 138 | bbox_deltas_batch = clip_pad(bbox_deltas_batch, (height, width)) 139 | bbox_deltas_batch = bbox_deltas_batch.transpose((0, 2, 3, 1)) 140 | bbox_pred_len = bbox_deltas_batch.shape[3] // A 141 | bbox_deltas_batch = bbox_deltas_batch.reshape((batch_size, -1, bbox_pred_len)) 142 | proposals_batch = bbox_pred_batch(anchors_batch, bbox_deltas_batch) 143 | 144 | 145 | # Get proposal 146 | scores_batch = scores_batch.reshape((batch_size, -1)) 147 | order_batch = np.argwhere(scores_batch >= threshold) 148 | # print(_key, order_batch) 149 | # Get landmark 150 | if self.use_landmarks: 151 | idx += 1 152 | landmark_deltas_batch = net_out[idx] 153 | landmark_deltas_batch = clip_pad(landmark_deltas_batch, (height, width)) 154 | landmark_pred_len = landmark_deltas_batch.shape[1] // A 155 | landmark_deltas_batch = landmark_deltas_batch.transpose((0, 2, 3, 1)).reshape((batch_size, -1, 5, landmark_pred_len // 5)) 156 | landmark_deltas_batch *= self.landmark_std 157 | landmarks = landmark_pred_batch(anchors_batch, landmark_deltas_batch) 158 | 159 | # Foreach image 160 | for ib in range(batch_size): 161 | order = [id[1] for id in order_batch if id[0] == ib] 162 | proposals_list_batch[ib].append(proposals_batch[ib, order]) 163 | scores_list_batch[ib].append(scores_batch[ib, order].reshape((-1, 1))) 164 | if self.use_landmarks: 165 | landmarks_list_batch[ib].append(landmarks[ib, order]) 166 | 167 | # Foreach image 168 | list_det = [] 169 | list_landmarks = [] 170 | for ib in range(batch_size): 171 | proposals_list = proposals_list_batch[ib] 172 | scores_list = scores_list_batch[ib] 173 | landmarks_list = landmarks_list_batch[ib] 174 | 175 | proposals = np.vstack(proposals_list) 176 | landmarks = None 177 | if proposals.shape[0] == 0: 178 | if self.use_landmarks: 179 | landmarks = np.zeros((0, 5, 2)) 180 | list_det.append(np.zeros((0, 5))) 181 | list_landmarks.append(landmarks) 182 | continue 183 | 184 | scores = np.vstack(scores_list) 185 | scores_ravel = scores.ravel() 186 | order = scores_ravel.argsort()[::-1] 187 | proposals = proposals[order, :] 188 | scores = scores[order] 189 | 190 | if self.use_landmarks: 191 | landmarks = np.vstack(landmarks_list) 192 | landmarks = landmarks[order].astype(np.float32, copy=False) 193 | 194 | pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) 195 | keep = nms(pre_det, thresh=self.nms_threshold) 196 | det = np.hstack((pre_det, proposals[:, 4:])) 197 | det = det[keep, :] 198 | if self.use_landmarks: 199 | landmarks = landmarks[keep] 200 | t1 = time.time() 201 | list_det.append(det) 202 | list_landmarks.append(landmarks) 203 | return list_det, list_landmarks 204 | 205 | def read_image(im_path): 206 | image = cv2.imread(im_path, cv2.IMREAD_COLOR) 207 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 208 | image = ImageData(image, (640, 640)) 209 | image.resize_image(mode='pad') 210 | return image 211 | 212 | parser = argparse.ArgumentParser(description='Port Face Detector from ONNX to TensorRT-Runtime.') 213 | parser.add_argument('--model_path', type=str, 214 | help='Path to ONNX model') 215 | parser.add_argument('--batch_size', type=int, default=8, 216 | help='Batch size') 217 | parser.add_argument('--im_width', type=int, default=640, 218 | help='Input image width') 219 | parser.add_argument('--im_height', type=int, default=640, 220 | help='Input image height') 221 | parser.add_argument('--tmp_folder', type=str, default="tmp", 222 | help='Path to save visualize image') 223 | args = parser.parse_args() 224 | 225 | batch_size = args.batch_size 226 | TRT_FILE_PATH = args.model_path 227 | print('[INFO] Load model') 228 | trt_face = TRT_Retinaface(TRT_FILE_PATH, batch_size = batch_size) 229 | trt_face.prepare() 230 | 231 | print('[INFO] Test model') 232 | print('- Preprocess input') 233 | tt0 = time.time() 234 | img_path = 'test_images/lumia.jpg' 235 | image = read_image(img_path) 236 | inp = np.array([image.transformed_image for i in range(batch_size)], dtype = np.float32) 237 | inp = np.transpose(inp, (0, 3, 1, 2)) 238 | tt1 = time.time() 239 | 240 | print(f"Preparing image took: {tt1 - tt0}") 241 | print('- Perform inference') 242 | tik = time.time() 243 | list_det, list_landmarks = trt_face.detect(inp) 244 | tok = time.time() - tik 245 | 246 | print('- Visualize') 247 | # Visualize 248 | for i in range(1): # Foreach image 249 | vis_im = cv2.cvtColor(image.transformed_image.copy(), cv2.COLOR_RGB2BGR) 250 | for det in list_det[i]: 251 | if det[4] > 0.6: 252 | color = (0, 255, 0) 253 | else: 254 | color = (0, 0, 255) 255 | 256 | pt1 = tuple(map(int, det[0:2])) 257 | pt2 = tuple(map(int, det[2:4])) 258 | cv2.rectangle(vis_im, pt1, pt2, color, 1) 259 | cv2.imwrite(os.path.join(args.tmp_folder, 'res_{}.jpg'.format(i)), vis_im) 260 | print('[INFO] Port to TensorRT completed!') -------------------------------------------------------------------------------- /sample_retinaface_to_onnx.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import torch 4 | import onnx 5 | import time 6 | import argparse 7 | import onnxruntime 8 | import numpy as np 9 | from utils.image import ImageData 10 | from detectors.retinaface import * 11 | from converters.insight2onnx import convert_insight_model 12 | 13 | class ONNX_Retinaface(object): 14 | def __init__(self, model_path, input_shape = (640, 640), batch_size = 4): 15 | print('[INFO] Create ONNX runtime') 16 | self.ort_session = onnxruntime.InferenceSession(model_path) 17 | self.input_shape = input_shape 18 | self.rac = 'net3' 19 | self.masks = False 20 | self.batch_size = batch_size 21 | for size in self.input_shape: 22 | if size % 32 != 0: 23 | raise ValueError("Current support only size which is multiple of 32 for compabilities") 24 | 25 | def prepare(self, nms: float = 0.4, **kwargs): 26 | self.nms_threshold = nms 27 | self.landmark_std = 1.0 28 | _ratio = (1.,) 29 | fmc = 3 30 | if self.rac == 'net3': 31 | _ratio = (1.,) 32 | elif self.rac == 'net3l': 33 | _ratio = (1.,) 34 | self.landmark_std = 0.2 35 | else: 36 | assert False, 'rac setting error %s' % self.rac 37 | 38 | if fmc == 3: 39 | self._feat_stride_fpn = [32, 16, 8] 40 | self.anchor_cfg = { 41 | '32': {'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 42 | '16': {'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 43 | '8': {'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 44 | } 45 | self.use_landmarks = True 46 | self.fpn_keys = [] 47 | 48 | for s in self._feat_stride_fpn: 49 | self.fpn_keys.append('stride%s' % s) 50 | 51 | self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(cfg=self.anchor_cfg))) 52 | for k in self._anchors_fpn: 53 | v = self._anchors_fpn[k].astype(np.float32) 54 | self._anchors_fpn[k] = v 55 | 56 | self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) 57 | # Create anchor 58 | print('Rebuild anchor') 59 | self.anchor_plane_cache = {} 60 | for _idx, s in enumerate(self._feat_stride_fpn): 61 | stride = int(s) 62 | width = int(self.input_shape[0]/stride) 63 | height = int(self.input_shape[1]/stride) 64 | K = width * height 65 | A = self._num_anchors['stride%s' % s] 66 | key = (height, width, stride) 67 | anchors_fpn = self._anchors_fpn['stride%s' % s] 68 | anchors = anchors_plane(height, width, stride, anchors_fpn) 69 | self.anchor_plane_cache[key] = np.tile(anchors.reshape((K * A, 4)), (self.batch_size, 1, 1)) 70 | self.warm_up() 71 | 72 | def warm_up(self): 73 | ''' 74 | Warm up NMS jit 75 | ''' 76 | print('Warming up NMS jit...') 77 | tik = time.time() 78 | image = cv2.imread('test_images/lumia.jpg', cv2.IMREAD_COLOR) 79 | image = ImageData(image, self.input_shape) 80 | image.resize_image(mode='pad') 81 | im = cv2.cvtColor(image.transformed_image, cv2.COLOR_BGR2RGB) 82 | im = np.transpose(im, (2, 0, 1)) 83 | input_blob = np.tile(im, (self.batch_size, 1, 1, 1)).astype(np.float32) 84 | _ = self.detect(input_blob, threshold=0.1) 85 | tok = time.time() 86 | print('Warming up complete, time cost = {}'.format(tok - tik)) 87 | 88 | def detect(self, batch_img, threshold: float = 0.6): 89 | t0 = time.time() 90 | batch_size = len(batch_img) 91 | assert batch_size == self.batch_size, "Model define with batch_size = {}, your input: {}".format(self.batch_size, batch_size) 92 | ort_inputs = {self.ort_session.get_inputs()[0].name: batch_img} 93 | net_out = self.ort_session.run(None, ort_inputs) 94 | # print('Len net_out: {}'.format(net_out[0].shape)) 95 | t1 = time.time() 96 | print('Infer: ', t1-t0) 97 | # print(net_out[0].shape) 98 | t0 = time.time() 99 | result = self.postprocess(net_out, threshold, batch_size = batch_size) 100 | t1 = time.time() 101 | print('Post: ', t1-t0) 102 | return result 103 | 104 | def postprocess(self, net_out, threshold, batch_size): 105 | ''' 106 | Post process for batch-inference 107 | ''' 108 | proposals_list_batch = {i : [] for i in range(batch_size)} 109 | scores_list_batch = {i : [] for i in range(batch_size)} 110 | landmarks_list_batch = {i : [] for i in range(batch_size)} 111 | t0 = time.time() 112 | # Foreach FPN layer 113 | for _idx, s in enumerate(self._feat_stride_fpn): 114 | _key = 'stride%s' % s 115 | stride = int(s) 116 | if self.use_landmarks: 117 | idx = _idx * 3 118 | else: 119 | idx = _idx * 2 120 | if self.masks: 121 | idx = _idx * 4 122 | 123 | A = self._num_anchors['stride%s' % s] 124 | 125 | scores_batch = net_out[idx] 126 | scores_batch = scores_batch[:, A:, :, :] 127 | idx += 1 128 | bbox_deltas_batch = net_out[idx] 129 | height, width = bbox_deltas_batch.shape[2], bbox_deltas_batch.shape[3] 130 | 131 | # K = height * width 132 | key = (height, width, stride) 133 | anchors_batch = self.anchor_plane_cache[key] 134 | 135 | scores_batch = clip_pad(scores_batch, (height, width)) 136 | scores_batch = scores_batch.transpose((0, 2, 3, 1)).reshape((batch_size, -1, 1)) 137 | 138 | bbox_deltas_batch = clip_pad(bbox_deltas_batch, (height, width)) 139 | bbox_deltas_batch = bbox_deltas_batch.transpose((0, 2, 3, 1)) 140 | bbox_pred_len = bbox_deltas_batch.shape[3] // A 141 | bbox_deltas_batch = bbox_deltas_batch.reshape((batch_size, -1, bbox_pred_len)) 142 | proposals_batch = bbox_pred_batch(anchors_batch, bbox_deltas_batch) 143 | 144 | 145 | # Get proposal 146 | scores_batch = scores_batch.reshape((batch_size, -1)) 147 | order_batch = np.argwhere(scores_batch >= threshold) 148 | # print(_key, order_batch) 149 | # Get landmark 150 | if self.use_landmarks: 151 | idx += 1 152 | landmark_deltas_batch = net_out[idx] 153 | landmark_deltas_batch = clip_pad(landmark_deltas_batch, (height, width)) 154 | landmark_pred_len = landmark_deltas_batch.shape[1] // A 155 | landmark_deltas_batch = landmark_deltas_batch.transpose((0, 2, 3, 1)).reshape((batch_size, -1, 5, landmark_pred_len // 5)) 156 | landmark_deltas_batch *= self.landmark_std 157 | landmarks = landmark_pred_batch(anchors_batch, landmark_deltas_batch) 158 | 159 | # Foreach image 160 | for ib in range(batch_size): 161 | order = [id[1] for id in order_batch if id[0] == ib] 162 | proposals_list_batch[ib].append(proposals_batch[ib, order]) 163 | scores_list_batch[ib].append(scores_batch[ib, order].reshape((-1, 1))) 164 | if self.use_landmarks: 165 | landmarks_list_batch[ib].append(landmarks[ib, order]) 166 | 167 | # Foreach image 168 | list_det = [] 169 | list_landmarks = [] 170 | for ib in range(batch_size): 171 | proposals_list = proposals_list_batch[ib] 172 | scores_list = scores_list_batch[ib] 173 | landmarks_list = landmarks_list_batch[ib] 174 | 175 | proposals = np.vstack(proposals_list) 176 | landmarks = None 177 | if proposals.shape[0] == 0: 178 | if self.use_landmarks: 179 | landmarks = np.zeros((0, 5, 2)) 180 | list_det.append(np.zeros((0, 5))) 181 | list_landmarks.append(landmarks) 182 | continue 183 | 184 | scores = np.vstack(scores_list) 185 | scores_ravel = scores.ravel() 186 | order = scores_ravel.argsort()[::-1] 187 | proposals = proposals[order, :] 188 | scores = scores[order] 189 | 190 | if self.use_landmarks: 191 | landmarks = np.vstack(landmarks_list) 192 | landmarks = landmarks[order].astype(np.float32, copy=False) 193 | 194 | pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) 195 | keep = nms(pre_det, thresh=self.nms_threshold) 196 | det = np.hstack((pre_det, proposals[:, 4:])) 197 | det = det[keep, :] 198 | if self.use_landmarks: 199 | landmarks = landmarks[keep] 200 | t1 = time.time() 201 | list_det.append(det) 202 | list_landmarks.append(landmarks) 203 | return list_det, list_landmarks 204 | 205 | def read_image(im_path): 206 | image = cv2.imread(im_path, cv2.IMREAD_COLOR) 207 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 208 | image = ImageData(image, (640, 640)) 209 | image.resize_image(mode='pad') 210 | return image 211 | 212 | parser = argparse.ArgumentParser(description='Port Face Detector from Mxnet to ONNX-Runtime.') 213 | parser.add_argument('--model_symbol', type=str, 214 | help='Path to model symbol') 215 | parser.add_argument('--model_params', type=str, 216 | help='Path to model params') 217 | parser.add_argument('--batch_size', type=int, default=8, 218 | help='Batch size') 219 | parser.add_argument('--im_width', type=int, default=640, 220 | help='Input image width') 221 | parser.add_argument('--im_height', type=int, default=640, 222 | help='Input image height') 223 | parser.add_argument('--tmp_folder', type=str, default="tmp", 224 | help='Path to save visualize image') 225 | parser.add_argument('--onnx_path', type=str, 226 | help='Path to output ONNX model') 227 | args = parser.parse_args() 228 | if not os.path.exists(args.tmp_folder): 229 | os.makedirs(args.tmp_folder) 230 | batch_size = args.batch_size 231 | symbol = args.model_symbol 232 | params = args.model_params 233 | ONNX_FILE_PATH = args.onnx_path 234 | im_width = args.im_width 235 | im_height = args.im_height 236 | print('[INFO] Convert model') 237 | convert_insight_model(symbol, params, ONNX_FILE_PATH, input_shape=(batch_size, 3, im_width, im_height)) 238 | print('[INFO] Test model') 239 | ort_session = onnxruntime.InferenceSession(ONNX_FILE_PATH) 240 | onnx_face = ONNX_Retinaface(ONNX_FILE_PATH, batch_size = batch_size) 241 | onnx_face.prepare() 242 | 243 | print('- Preprocess input') 244 | tt0 = time.time() 245 | img_path = 'test_images/lumia.jpg' 246 | image = read_image(img_path) 247 | inp = np.array([image.transformed_image for i in range(batch_size)], dtype = np.float32) 248 | inp = np.transpose(inp, (0, 3, 1, 2)) 249 | tt1 = time.time() 250 | print(f"Preparing image took: {tt1 - tt0}") 251 | 252 | print('- Perform inference') 253 | tik = time.time() 254 | list_det, list_landmarks = onnx_face.detect(inp) 255 | tok = time.time() - tik 256 | 257 | print('- Visualize') 258 | # Visualize 259 | for i in range(1): # Foreach image 260 | vis_im = cv2.cvtColor(image.transformed_image.copy(), cv2.COLOR_RGB2BGR) 261 | for det in list_det[i]: 262 | if det[4] > 0.6: 263 | color = (0, 255, 0) 264 | else: 265 | color = (0, 0, 255) 266 | pt1 = tuple(map(int, det[0:2])) 267 | pt2 = tuple(map(int, det[2:4])) 268 | cv2.rectangle(vis_im, pt1, pt2, color, 1) 269 | cv2.imwrite(os.path.join(args.tmp_folder, 'res_{}.jpg'.format(i)), vis_im) 270 | print('[INFO] Port to ONXX completed! Please read document if u want to port to TensorRT') -------------------------------------------------------------------------------- /converters/mx2onnx_conv/onnx/mx2onnx/export_onnx.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | # 18 | # Based on 19 | # https://github.com/NVIDIA/mxnet_to_onnx/blob/master/mx2onnx_converter/mx2onnx_converter.py# 20 | # Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. 21 | # 22 | # Redistribution and use in source and binary forms, with or without 23 | # modification, are permitted provided that the following conditions 24 | # are met: 25 | # * Redistributions of source code must retain the above copyright 26 | # notice, this list of conditions and the following disclaimer. 27 | # * Redistributions in binary form must reproduce the above copyright 28 | # notice, this list of conditions and the following disclaimer in the 29 | # documentation and/or other materials provided with the distribution. 30 | # * Neither the name of NVIDIA CORPORATION nor the names of its 31 | # contributors may be used to endorse or promote products derived 32 | # from this software without specific prior written permission. 33 | # 34 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY 35 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 36 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 37 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 38 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 39 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 40 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 41 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 42 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 | 46 | # coding: utf-8 47 | # pylint: disable=invalid-name,too-many-locals,no-self-use,too-many-arguments, 48 | # pylint: disable=maybe-no-member,too-many-nested-blocks 49 | """MXNet to ONNX graph converter functions""" 50 | import logging 51 | import json 52 | 53 | from mxnet import ndarray as nd 54 | 55 | 56 | class MXNetGraph(object): 57 | """Class to convert MXNet to ONNX graph""" 58 | registry_ = {} 59 | input_output_maps_ = {} 60 | 61 | def __init__(self): 62 | # topologically sorted nodes 63 | self.nodes = [] 64 | self.input_tensors = [] 65 | self.output_tensors = [] 66 | 67 | @staticmethod 68 | def register(op_name): 69 | """Register operators""" 70 | def wrapper(func): 71 | """Helper function to map functions""" 72 | try: 73 | import onnx as _ 74 | MXNetGraph.registry_[op_name] = func 75 | except ImportError: 76 | pass 77 | return func 78 | 79 | return wrapper 80 | 81 | @staticmethod 82 | def convert_layer(node, **kwargs): 83 | """Convert MXNet layer to ONNX""" 84 | op = str(node["op"]) 85 | if op not in MXNetGraph.registry_: 86 | raise AttributeError("No conversion function registered for op type %s yet." % op) 87 | convert_func = MXNetGraph.registry_[op] 88 | return convert_func(node, **kwargs) 89 | 90 | @staticmethod 91 | def split_params(sym, params): 92 | """Helper function to split params dictionary into args and aux params 93 | 94 | Parameters 95 | ---------- 96 | sym : :class:`~mxnet.symbol.Symbol` 97 | MXNet symbol object 98 | params : dict of ``str`` to :class:`~mxnet.ndarray.NDArray` 99 | Dict of converted parameters stored in ``mxnet.ndarray.NDArray`` format 100 | 101 | Returns 102 | ------- 103 | arg_params : dict of ``str`` to :class:`~mxnet.ndarray.NDArray` 104 | Dict of converted parameters stored in ``mxnet.ndarray.NDArray`` format 105 | aux_params : dict of ``str`` to :class:`~mxnet.ndarray.NDArray` 106 | Dict of converted parameters stored in ``mxnet.ndarray.NDArray`` format 107 | """ 108 | arg_params = {} 109 | aux_params = {} 110 | for args in sym.list_arguments(): 111 | if args in params: 112 | arg_params.update({args: nd.array(params[args])}) 113 | for aux in sym.list_auxiliary_states(): 114 | if aux in params: 115 | aux_params.update({aux: nd.array(params[aux])}) 116 | return arg_params, aux_params 117 | 118 | @staticmethod 119 | def get_outputs(sym, params, in_shape, in_label, verbose=True): 120 | """ Infer output shapes and return dictionary of output name to shape 121 | 122 | :param :class:`~mxnet.symbol.Symbol` sym: symbol to perform infer shape on 123 | :param dic of (str, nd.NDArray) params: 124 | :param list of tuple(int, ...) in_shape: list of all input shapes 125 | :param in_label: name of label typically used in loss that may be left in graph. This name is 126 | removed from list of inputs required by symbol 127 | :param verbose: If false, info logging messages are deactivated 128 | :return: dictionary of output name to shape 129 | :rtype: dict of (str, tuple(int, ...)) 130 | """ 131 | # remove any input listed in params from sym.list_inputs() and bind them to the input shapes provided 132 | # by user. Also remove in_label, which is the name of the label symbol that may have been used 133 | # as the label for loss during training. 134 | inputs = {n: tuple(s) for n, s in zip([n for n in sym.list_inputs() if n not in params and n != in_label], 135 | in_shape)} 136 | # Add params and their shape to list of inputs 137 | inputs.update({n: v.shape for n, v in params.items() if n in sym.list_inputs()}) 138 | # Provide input data as well as input params to infer_shape() 139 | _, out_shapes, _ = sym.infer_shape(**inputs) 140 | 141 | out_names = list() 142 | for name in sym.list_outputs(): 143 | if name.endswith('_output'): 144 | out_names.append(name[:-len('_output')]) 145 | else: 146 | if verbose: 147 | logging.info("output '%s' does not end with '_output'", name) 148 | out_names.append(name) 149 | 150 | assert len(out_shapes) == len(out_names) 151 | # bind output shapes with output names 152 | graph_outputs = {n: s for n, s in zip(out_names, out_shapes)} 153 | 154 | return graph_outputs 155 | 156 | @staticmethod 157 | def convert_weights_to_numpy(weights_dict): 158 | """Convert weights to numpy""" 159 | return dict([(k.replace("arg:", "").replace("aux:", ""), v.asnumpy()) 160 | for k, v in weights_dict.items()]) 161 | 162 | def create_onnx_graph_proto(self, sym, params, in_shape, in_type, verbose=False): 163 | """Convert MXNet graph to ONNX graph 164 | 165 | Parameters 166 | ---------- 167 | sym : :class:`~mxnet.symbol.Symbol` 168 | MXNet symbol object 169 | params : dict of ``str`` to :class:`~mxnet.ndarray.NDArray` 170 | Dict of converted parameters stored in ``mxnet.ndarray.NDArray`` format 171 | in_shape : List of tuple 172 | Input shape of the model e.g [(1,3,224,224)] 173 | in_type : data type 174 | Input data type e.g. np.float32 175 | verbose : Boolean 176 | If true will print logs of the model conversion 177 | 178 | Returns 179 | ------- 180 | graph : GraphProto 181 | ONNX graph 182 | """ 183 | try: 184 | from onnx import (checker, helper, NodeProto, ValueInfoProto, TensorProto) 185 | from onnx.helper import make_tensor_value_info 186 | except ImportError: 187 | raise ImportError("Onnx and protobuf need to be installed. " 188 | + "Instructions to install - https://github.com/onnx/onnx") 189 | 190 | # When MXNet model is saved to json file , MXNet adds a node for label. 191 | # The name of this node is, name of the last node + "_label" ( i.e if last node 192 | # name is "Softmax", this node will have a name "Softmax_label". Also, the new node 193 | # will always be second last node in the json graph. 194 | # Deriving the output_label name. 195 | output_label = sym.get_internals()[len(sym.get_internals()) - 1].name + "_label" 196 | 197 | weights = MXNetGraph.convert_weights_to_numpy(params) 198 | 199 | mx_graph = json.loads(sym.tojson())["nodes"] 200 | 201 | initializer = [] 202 | all_processed_nodes = [] 203 | onnx_processed_nodes = [] 204 | onnx_processed_inputs = [] 205 | onnx_processed_outputs = [] 206 | index_lookup = [] 207 | 208 | # Determine output and internal shapes 209 | graph_outputs = MXNetGraph.get_outputs(sym, params, in_shape, output_label) 210 | graph_shapes = MXNetGraph.get_outputs(sym.get_internals(), params, in_shape, output_label, verbose=False) 211 | 212 | graph_input_idx = 0 213 | for idx, node in enumerate(mx_graph): 214 | op = node["op"] 215 | name = node["name"] 216 | if verbose: 217 | logging.info("Converting idx: %d, op: %s, name: %s", idx, op, name) 218 | 219 | # A node is an input node if its op_name is "null" and is not 220 | # in params dict 221 | if op == "null" and name not in params: 222 | # Handling graph input 223 | 224 | # Skipping output_label node, as this node is not part of graph 225 | # Refer "output_label" assignment above for more details. 226 | if name == output_label: 227 | continue 228 | converted = MXNetGraph.convert_layer( 229 | node, 230 | is_input=True, 231 | mx_graph=mx_graph, 232 | weights=weights, 233 | in_shape=in_shape[graph_input_idx], 234 | in_type=in_type, 235 | proc_nodes=all_processed_nodes, 236 | graph_shapes=graph_shapes, 237 | initializer=initializer, 238 | index_lookup=index_lookup) 239 | graph_input_idx += 1 240 | 241 | else: 242 | # Handling graph layers 243 | converted = MXNetGraph.convert_layer( 244 | node, 245 | is_input=False, 246 | mx_graph=mx_graph, 247 | weights=weights, 248 | in_shape=in_shape, 249 | in_type=in_type, 250 | proc_nodes=all_processed_nodes, 251 | graph_shapes=graph_shapes, 252 | initializer=initializer, 253 | index_lookup=index_lookup, 254 | idx=idx 255 | ) 256 | 257 | if isinstance(converted, list): 258 | # Iterate for all converted nodes 259 | for converted_node in converted: 260 | # If converted node is ValueInfoProto, add it in inputs 261 | if isinstance(converted_node, ValueInfoProto): 262 | onnx_processed_inputs.append(converted_node) 263 | # If converted node is NodeProto, add it in processed nodes list 264 | elif isinstance(converted_node, NodeProto): 265 | onnx_processed_nodes.append(converted_node) 266 | # some operators have multiple outputs, 267 | # therefore, check all output node names 268 | node_names = list(converted_node.output) 269 | for nodename in node_names: 270 | if nodename in graph_outputs: 271 | onnx_processed_outputs.append( 272 | make_tensor_value_info( 273 | name=nodename, 274 | elem_type=in_type, 275 | shape=graph_outputs[nodename] 276 | ) 277 | ) 278 | if verbose: 279 | logging.info("Output node is: %s", nodename) 280 | elif isinstance(converted_node, TensorProto): 281 | raise ValueError("Did not expect TensorProto") 282 | else: 283 | raise ValueError("node is of an unrecognized type: %s" % type(node)) 284 | 285 | all_processed_nodes.append(converted_node) 286 | 287 | if idx > 0: 288 | # Handling extra node added to the graph if the MXNet model was 289 | # saved to json file, 290 | # refer "output_label" initialization above for more details. 291 | # if extra node was added then prev_index to the last node is adjusted. 292 | if idx == (len(mx_graph) - 1) and \ 293 | mx_graph[len(mx_graph)-2]["name"] == output_label: 294 | prev_index = index_lookup[idx - 2] 295 | else: 296 | prev_index = index_lookup[idx - 1] 297 | 298 | index_lookup.append(prev_index+len(converted)) 299 | else: 300 | index_lookup.append(len(converted) - 1) 301 | else: 302 | logging.info("Operator converter function should always return a list") 303 | 304 | graph = helper.make_graph( 305 | onnx_processed_nodes, 306 | "mxnet_converted_model", 307 | onnx_processed_inputs, 308 | onnx_processed_outputs 309 | ) 310 | 311 | graph.initializer.extend(initializer) 312 | 313 | checker.check_graph(graph) 314 | return graph 315 | -------------------------------------------------------------------------------- /detectors/retinaface.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import cv2 4 | import time 5 | import logging 6 | from typing import Union 7 | 8 | from common.nms import nms 9 | from exec_backends.onnxrt_backend import DetectorInfer as DIO 10 | 11 | # Since TensorRT and pycuda are optional dependencies it might be not available 12 | from exec_backends.trt_backend import DetectorInfer as DIT 13 | 14 | 15 | def _whctrs(anchor): 16 | """ 17 | Return width, height, x center, and y center for an anchor (window). 18 | """ 19 | 20 | w = anchor[2] - anchor[0] + 1 21 | h = anchor[3] - anchor[1] + 1 22 | x_ctr = anchor[0] + 0.5 * (w - 1) 23 | y_ctr = anchor[1] + 0.5 * (h - 1) 24 | return w, h, x_ctr, y_ctr 25 | 26 | 27 | def _mkanchors(ws, hs, x_ctr, y_ctr): 28 | """ 29 | Given a vector of widths (ws) and heights (hs) around a center 30 | (x_ctr, y_ctr), output a set of anchors (windows). 31 | """ 32 | 33 | ws = ws[:, np.newaxis] 34 | hs = hs[:, np.newaxis] 35 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 36 | y_ctr - 0.5 * (hs - 1), 37 | x_ctr + 0.5 * (ws - 1), 38 | y_ctr + 0.5 * (hs - 1))) 39 | return anchors 40 | 41 | 42 | # @jit() 43 | def _ratio_enum(anchor, ratios): 44 | """ 45 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 46 | """ 47 | 48 | w, h, x_ctr, y_ctr = _whctrs(anchor) 49 | size = w * h 50 | size_ratios = size / ratios 51 | ws = np.round(np.sqrt(size_ratios)) 52 | hs = np.round(ws * ratios) 53 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 54 | return anchors 55 | 56 | 57 | # @jit() 58 | def _scale_enum(anchor, scales): 59 | """ 60 | Enumerate a set of anchors for each scale wrt an anchor. 61 | """ 62 | 63 | w, h, x_ctr, y_ctr = _whctrs(anchor) 64 | ws = w * scales 65 | hs = h * scales 66 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 67 | return anchors 68 | 69 | 70 | # @jit() 71 | def anchors_plane(height, width, stride, base_anchors): 72 | """ 73 | Parameters 74 | ---------- 75 | height: height of plane 76 | width: width of plane 77 | stride: stride ot the original image 78 | anchors_base: (A, 4) a base set of anchors 79 | Returns 80 | ------- 81 | all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane 82 | """ 83 | A = base_anchors.shape[0] 84 | all_anchors = np.zeros((height, width, A, 4), dtype=np.float32) 85 | for iw in range(width): 86 | sw = iw * stride 87 | for ih in range(height): 88 | sh = ih * stride 89 | for k in range(A): 90 | all_anchors[ih, iw, k, 0] = base_anchors[k, 0] + sw 91 | all_anchors[ih, iw, k, 1] = base_anchors[k, 1] + sh 92 | all_anchors[ih, iw, k, 2] = base_anchors[k, 2] + sw 93 | all_anchors[ih, iw, k, 3] = base_anchors[k, 3] + sh 94 | return all_anchors 95 | 96 | 97 | # @jit() 98 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 99 | scales=2 ** np.arange(3, 6), stride=16): 100 | """ 101 | Generate anchor (reference) windows by enumerating aspect ratios X 102 | scales wrt a reference (0, 0, 15, 15) window. 103 | """ 104 | 105 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 106 | ratio_anchors = _ratio_enum(base_anchor, ratios) 107 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 108 | for i in range(ratio_anchors.shape[0])]) 109 | return anchors 110 | 111 | 112 | # @jit() 113 | def generate_anchors_fpn(cfg): 114 | """ 115 | Generate anchor (reference) windows by enumerating aspect ratios X 116 | scales wrt a reference (0, 0, 15, 15) window. 117 | """ 118 | RPN_FEAT_STRIDE = [] 119 | for k in cfg: 120 | RPN_FEAT_STRIDE.append(int(k)) 121 | RPN_FEAT_STRIDE = sorted(RPN_FEAT_STRIDE, reverse=True) 122 | anchors = [] 123 | for k in RPN_FEAT_STRIDE: 124 | v = cfg[str(k)] 125 | bs = v['BASE_SIZE'] 126 | __ratios = np.array(v['RATIOS']) 127 | __scales = np.array(v['SCALES']) 128 | stride = int(k) 129 | # print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr) 130 | r = generate_anchors(bs, __ratios, __scales, stride) 131 | # print('anchors_fpn', r.shape, file=sys.stderr) 132 | anchors.append(r) 133 | 134 | return anchors 135 | 136 | 137 | def clip_pad(tensor, pad_shape): 138 | """ 139 | Clip boxes of the pad area. 140 | :param tensor: [n, c, H, W] 141 | :param pad_shape: [h, w] 142 | :return: [n, c, h, w] 143 | """ 144 | H, W = tensor.shape[2:] 145 | h, w = pad_shape 146 | 147 | if h < H or w < W: 148 | tensor = tensor[:, :, :h, :w].copy() 149 | 150 | return tensor 151 | 152 | 153 | def bbox_pred(boxes, box_deltas): 154 | """ 155 | Transform the set of class-agnostic boxes into class-specific boxes 156 | by applying the predicted offsets (box_deltas) 157 | :param boxes: !important [N 4] 158 | :param box_deltas: [N, 4 * num_classes] 159 | :return: [N 4 * num_classes] 160 | """ 161 | if boxes.shape[0] == 0: 162 | return np.zeros((0, box_deltas.shape[1])) 163 | 164 | boxes = boxes.astype(np.float, copy=False) 165 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 166 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 167 | ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) 168 | ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) 169 | 170 | dx = box_deltas[:, 0:1] 171 | dy = box_deltas[:, 1:2] 172 | dw = box_deltas[:, 2:3] 173 | dh = box_deltas[:, 3:4] 174 | 175 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 176 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 177 | pred_w = np.exp(dw) * widths[:, np.newaxis] 178 | pred_h = np.exp(dh) * heights[:, np.newaxis] 179 | 180 | pred_boxes = np.zeros(box_deltas.shape) 181 | # x1 182 | pred_boxes[:, 0:1] = pred_ctr_x - 0.5 * (pred_w - 1.0) 183 | # y1 184 | pred_boxes[:, 1:2] = pred_ctr_y - 0.5 * (pred_h - 1.0) 185 | # x2 186 | pred_boxes[:, 2:3] = pred_ctr_x + 0.5 * (pred_w - 1.0) 187 | # y2 188 | pred_boxes[:, 3:4] = pred_ctr_y + 0.5 * (pred_h - 1.0) 189 | 190 | if box_deltas.shape[1] > 4: 191 | pred_boxes[:, 4:] = box_deltas[:, 4:] 192 | 193 | return pred_boxes 194 | 195 | def bbox_pred_batch(boxes, box_deltas): 196 | """ 197 | Transform the set of class-agnostic boxes into class-specific boxes 198 | by applying the predicted offsets (box_deltas) 199 | :param boxes: !important [B N 4] 200 | :param box_deltas: [B, N, 4 * num_classes] 201 | :return: [B N 4 * num_classes] 202 | """ 203 | batch_size = boxes.shape[0] 204 | if boxes.shape[1] == 0: 205 | return np.zeros((batch_size, 0, box_deltas.shape[2])) 206 | 207 | boxes = boxes.astype(np.float, copy=False) 208 | widths = boxes[:, :, 2] - boxes[:, :, 0] + 1.0 209 | heights = boxes[:, :, 3] - boxes[:, :, 1] + 1.0 210 | ctr_x = boxes[:, :, 0] + 0.5 * (widths - 1.0) 211 | ctr_y = boxes[:, :, 1] + 0.5 * (heights - 1.0) 212 | 213 | dx = box_deltas[:, :, 0:1] 214 | dy = box_deltas[:, :, 1:2] 215 | dw = box_deltas[:, :, 2:3] 216 | dh = box_deltas[:, :, 3:4] 217 | 218 | pred_ctr_x = dx * widths[:, :, np.newaxis] + ctr_x[:, :, np.newaxis] 219 | pred_ctr_y = dy * heights[:, :, np.newaxis] + ctr_y[:, :, np.newaxis] 220 | pred_w = np.exp(dw) * widths[:, :, np.newaxis] 221 | pred_h = np.exp(dh) * heights[:, :, np.newaxis] 222 | 223 | pred_boxes = np.zeros(box_deltas.shape) 224 | # x1 225 | pred_boxes[:, :, 0:1] = pred_ctr_x - 0.5 * (pred_w - 1.0) 226 | # y1 227 | pred_boxes[:, :, 1:2] = pred_ctr_y - 0.5 * (pred_h - 1.0) 228 | # x2 229 | pred_boxes[:, :, 2:3] = pred_ctr_x + 0.5 * (pred_w - 1.0) 230 | # y2 231 | pred_boxes[:, :, 3:4] = pred_ctr_y + 0.5 * (pred_h - 1.0) 232 | 233 | if box_deltas.shape[2] > 4: 234 | pred_boxes[:, :, 4:] = box_deltas[:, :, 4:] 235 | 236 | return pred_boxes 237 | 238 | def landmark_pred(boxes, landmark_deltas): 239 | if boxes.shape[0] == 0: 240 | return np.zeros((0, landmark_deltas.shape[1])) 241 | boxes = boxes.astype(np.float, copy=False) 242 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 243 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 244 | ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) 245 | ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) 246 | pred = landmark_deltas.copy() 247 | for i in range(5): 248 | pred[:, i, 0] = landmark_deltas[:, i, 0] * widths + ctr_x 249 | pred[:, i, 1] = landmark_deltas[:, i, 1] * heights + ctr_y 250 | return pred 251 | 252 | def landmark_pred_batch(boxes, landmark_deltas): 253 | ''' 254 | Perform batch-landmark prediction 255 | ''' 256 | batch_size = len(boxes) 257 | if boxes.shape[0] == 0: 258 | return np.zeros((batch_size, 0, landmark_deltas.shape[2])) 259 | boxes = boxes.astype(np.float, copy=False) 260 | widths = boxes[:, :, 2] - boxes[:, :, 0] + 1.0 261 | heights = boxes[:, :, 3] - boxes[:, :, 1] + 1.0 262 | ctr_x = boxes[:, :, 0] + 0.5 * (widths - 1.0) 263 | ctr_y = boxes[:, :, 1] + 0.5 * (heights - 1.0) 264 | pred = landmark_deltas.copy() 265 | for i in range(5): 266 | pred[:, :, i, 0] = landmark_deltas[:, :, i, 0] * widths + ctr_x 267 | pred[:, :, i, 1] = landmark_deltas[:, :, i, 1] * heights + ctr_y 268 | return pred 269 | 270 | class RetinaFace: 271 | def __init__(self, inference_backend: Union[DIO, DIT], rac='net3l', masks: bool =False, **kwargs): 272 | self.rac = rac 273 | self.masks=masks 274 | self.model = inference_backend 275 | self.input_shape = (1, 3, 480, 640) 276 | 277 | def prepare(self, nms: float = 0.4, **kwargs): 278 | self.model.prepare() 279 | self.input_shape = self.model.input_shape 280 | self.nms_threshold = nms 281 | self.landmark_std = 1.0 282 | 283 | _ratio = (1.,) 284 | fmc = 3 285 | if self.rac == 'net3': 286 | _ratio = (1.,) 287 | elif self.rac == 'net3l': 288 | _ratio = (1.,) 289 | self.landmark_std = 0.2 290 | else: 291 | assert False, 'rac setting error %s' % self.rac 292 | 293 | if fmc == 3: 294 | self._feat_stride_fpn = [32, 16, 8] 295 | self.anchor_cfg = { 296 | '32': {'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 297 | '16': {'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 298 | '8': {'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 299 | } 300 | 301 | self.use_landmarks = True 302 | self.fpn_keys = [] 303 | 304 | for s in self._feat_stride_fpn: 305 | self.fpn_keys.append('stride%s' % s) 306 | 307 | self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(cfg=self.anchor_cfg))) 308 | for k in self._anchors_fpn: 309 | v = self._anchors_fpn[k].astype(np.float32) 310 | self._anchors_fpn[k] = v 311 | self.anchor_plane_cache = {} 312 | 313 | self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) 314 | 315 | def detect(self, im: np.ndarray, threshold: float = 0.6): 316 | im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) 317 | im = np.transpose(im, (2, 0, 1)) 318 | input_blob = np.expand_dims(im, axis=0).astype(np.float32) 319 | t0 = time.time() 320 | net_out = self.model.run(input_blob) 321 | t1 = time.time() 322 | logging.debug(f"Retina inference took: {t1 - t0}") 323 | return self.postprocess(net_out, threshold) 324 | 325 | def postprocess(self, net_out, threshold): 326 | proposals_list = [] 327 | scores_list = [] 328 | mask_scores_list = [] 329 | landmarks_list = [] 330 | t0 = time.time() 331 | for _idx, s in enumerate(self._feat_stride_fpn): 332 | _key = 'stride%s' % s 333 | stride = int(s) 334 | if self.use_landmarks: 335 | idx = _idx * 3 336 | else: 337 | idx = _idx * 2 338 | if self.masks: 339 | idx = _idx * 4 340 | 341 | A = self._num_anchors['stride%s' % s] 342 | 343 | scores = net_out[idx] 344 | scores = scores[:, A:, :, :] 345 | idx += 1 346 | bbox_deltas = net_out[idx] 347 | height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] 348 | 349 | K = height * width 350 | key = (height, width, stride) 351 | if key in self.anchor_plane_cache: 352 | anchors = self.anchor_plane_cache[key] 353 | else: 354 | 355 | anchors_fpn = self._anchors_fpn['stride%s' % s] 356 | anchors = anchors_plane(height, width, stride, anchors_fpn) 357 | anchors = anchors.reshape((K * A, 4)) 358 | if len(self.anchor_plane_cache) < 100: 359 | self.anchor_plane_cache[key] = anchors 360 | 361 | scores = clip_pad(scores, (height, width)) 362 | scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) 363 | 364 | bbox_deltas = clip_pad(bbox_deltas, (height, width)) 365 | bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)) 366 | bbox_pred_len = bbox_deltas.shape[3] // A 367 | bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len)) 368 | 369 | proposals = bbox_pred(anchors, bbox_deltas) 370 | 371 | scores_ravel = scores.ravel() 372 | order = np.where(scores_ravel >= threshold)[0] 373 | proposals = proposals[order, :] 374 | scores = scores[order] 375 | 376 | proposals_list.append(proposals) 377 | scores_list.append(scores) 378 | 379 | if self.masks: 380 | type_scores = net_out[idx + 2] 381 | mask_scores = type_scores[:, A*2:, :, :] 382 | mask_scores = clip_pad(mask_scores,(height, width)) 383 | mask_scores = mask_scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) 384 | mask_scores = mask_scores[order] 385 | mask_scores_list.append(mask_scores) 386 | 387 | if self.use_landmarks: 388 | idx += 1 389 | landmark_deltas = net_out[idx] 390 | landmark_deltas = clip_pad(landmark_deltas, (height, width)) 391 | landmark_pred_len = landmark_deltas.shape[1] // A 392 | landmark_deltas = landmark_deltas.transpose((0, 2, 3, 1)).reshape((-1, 5, landmark_pred_len // 5)) 393 | landmark_deltas *= self.landmark_std 394 | landmarks = landmark_pred(anchors, landmark_deltas) 395 | landmarks = landmarks[order, :] 396 | landmarks_list.append(landmarks) 397 | 398 | proposals = np.vstack(proposals_list) 399 | landmarks = None 400 | if proposals.shape[0] == 0: 401 | if self.use_landmarks: 402 | landmarks = np.zeros((0, 5, 2)) 403 | return np.zeros((0, 5)), landmarks 404 | 405 | scores = np.vstack(scores_list) 406 | 407 | scores_ravel = scores.ravel() 408 | order = scores_ravel.argsort()[::-1] 409 | proposals = proposals[order, :] 410 | scores = scores[order] 411 | 412 | if self.use_landmarks: 413 | landmarks = np.vstack(landmarks_list) 414 | landmarks = landmarks[order].astype(np.float32, copy=False) 415 | if self.masks: 416 | mask_scores = np.vstack(mask_scores_list) 417 | mask_scores = mask_scores[order] 418 | pre_det = np.hstack((proposals[:, 0:4], scores, mask_scores)).astype(np.float32, copy=False) 419 | else: 420 | pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) 421 | keep = nms(pre_det, thresh=self.nms_threshold) 422 | det = np.hstack((pre_det, proposals[:, 4:])) 423 | det = det[keep, :] 424 | if self.use_landmarks: 425 | landmarks = landmarks[keep] 426 | t1 = time.time() 427 | logging.debug(f"Retina postprocess took: {t1 - t0}") 428 | return det, landmarks 429 | --------------------------------------------------------------------------------