├── README.md
├── yolov8_herizon
    └── mapper
    │   ├── 01_check.sh
    │   ├── 02_preprocess.sh
    │   ├── 03_build.sh
    │   ├── 04_inference.sh
    │   ├── 05_evaluate.sh
    │   ├── cal_data
    │       └── test.jpg
    │   ├── data_preprocess.py
    │   ├── hb_mapper_checker.log
    │   ├── hb_mapper_makertbin.log
    │   ├── inference_image_demo.py
    │   ├── model
    │       └── yolov8_relu_80class_ZQ.onnx
    │   ├── model_output
    │       ├── yolov8.bin
    │       └── yolov8_quantized_model.onnx
    │   ├── preprocess.py
    │   ├── src_data
    │       └── test.jpg
    │   ├── test.jpg
    │   ├── test_horizon_result.jpg
    │   └── yolov8_config.yaml
├── yolov8_onnx
    ├── test.jpg
    ├── test_onnx_result.jpg
    ├── yolov8_relu_80class_ZQ.onnx
    └── yolov8n_onnx_demo_zq.py
├── yolov8_rknn
    ├── data
    │   └── test.jpg
    ├── dataset.txt
    ├── onnx2rknn_demo_ZQ.py
    ├── test.jpg
    ├── test_rknn_result.jpg
    ├── yolov8_relu_80class_ZQ.onnx
    └── yolov8_relu_80class_ZQ.rknn
└── yolov8_tensorrt
    ├── onnx2trt_rt7.py
    ├── tensorRT_inferenc_demo.py
    ├── test.jpg
    ├── test_result_tensorRT.jpg
    ├── yolov8_relu_80class_ZQ.onnx
    └── yolov8_relu_80class_ZQ.trt


/README.md:
--------------------------------------------------------------------------------
 1 | # yolov8n_onnx_tensorRT_rknn_horizon_dfl
 2 | yolov8n 目标检测部署版本，便于移植不同平台（onnx、tensorRT、rknn、Horizon），全网部署最简单、速度最快的部署方式，后处理为C++部署而写，python 测试后处理意义不大。
 3 | 
 4 | 之前写过两次yolov8目标检测部署，后续继续思考，针对部署还有优化空间，本示例的部署方式优化了部署难度，加快了模型推理速度（略微增加了后处理的时耗）。
 5 | 
 6 | 导出onnx参考[yolov8n 瑞芯微RKNN和地平线Horizon芯片仿真测试部署，部署工程难度小、模型推理速度快](https://blog.csdn.net/zhangqian_1/article/details/135523096)。
 7 | 
 8 | # 文件夹结构说明
 9 | 
10 | yolov8n_onnx：onnx模型、测试图像、测试结果、测试demo脚本
11 | 
12 | yolov8n_TensorRT：TensorRT版本模型、测试图像、测试结果、测试demo脚本、onnx模型、onnx2tensorRT脚本(tensorRT-7.2.3.4)
13 | 
14 | yolov8n_rknn：rknn模型、测试（量化）图像、测试结果、onnx2rknn转换测试脚本
15 | 
16 | yolov8n_horizon：地平线模型、测试（量化）图像、测试结果、转换测试脚本、测试量化后onnx模型脚本
17 | 
18 | # 测试结果
19 | ![image](https://github.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/blob/main/yolov8_onnx/test_onnx_result.jpg)
20 | 
21 | （注：图片来源coco128）
22 | 
23 | 说明：推理测试预处理没有考虑等比率缩放，激活函数 SiLU 用 Relu 进行了替换。由于使用的是coco128数据进行训练的，且迭代的次数不多，效果并不是很好，仅供测试流程用。
24 | 
25 | 
26 | # tensorRT 优化前后时耗
27 | tensorRT部署推理10000次的平均时耗（显卡 Tesla V100、cuda_11.0）
28 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/direct/d7c5ecb20f80455e8236a798d1ee2534.png)
29 | 


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/01_check.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | # Copyright (c) 2020 Horizon Robotics.All Rights Reserved.
 3 | #
 4 | # The material in this file is confidential and contains trade secrets
 5 | # of Horizon Robotics Inc. This is proprietary information owned by
 6 | # Horizon Robotics Inc. No part of this work may be disclosed,
 7 | # reproduced, copied, transmitted, or used in any way for any purpose,
 8 | # without the express written permission of Horizon Robotics Inc.
 9 | 
10 | set -e -v
11 | cd $(dirname $0) || exit
12 | 
13 | model_type="onnx"
14 | onnx_model="./model/yolov8_relu_80class_ZQ.onnx"
15 | output="./yolov8_checker.log"
16 | march="bernoulli2"
17 | 
18 | hb_mapper checker --model-type ${model_type} \
19 |                   --model ${onnx_model} \
20 |                   --output ${output} --march ${march}
21 | 


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/02_preprocess.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright (c) 2020 Horizon Robotics.All Rights Reserved.
 3 | #
 4 | # The material in this file is confidential and contains trade secrets
 5 | # of Horizon Robotics Inc. This is proprietary information owned by
 6 | # Horizon Robotics Inc. No part of this work may be disclosed,
 7 | # reproduced, copied, transmitted, or used in any way for any purpose,
 8 | # without the express written permission of Horizon Robotics Inc.
 9 | 
10 | set -e -v
11 | cd $(dirname $0) || exit
12 | 
13 | python3 data_preprocess.py \
14 |   --src_dir ./src_data \
15 |   --dst_dir ./cal_data \
16 |   --pic_ext .rgb \
17 |   --read_mode opencv
18 | 


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/03_build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2020 Horizon Robotics.All Rights Reserved.
 3 | #
 4 | # The material in this file is confidential and contains trade secrets
 5 | # of Horizon Robotics Inc. This is proprietary information owned by
 6 | # Horizon Robotics Inc. No part of this work may be disclosed,
 7 | # reproduced, copied, transmitted, or used in any way for any purpose,
 8 | # without the express written permission of Horizon Robotics Inc.
 9 | 
10 | set -e -v
11 | cd $(dirname $0)
12 | config_file="./yolov8_config.yaml"
13 | model_type="onnx"
14 | # build model
15 | hb_mapper makertbin --config ${config_file}  \
16 |                     --model-type  ${model_type}
17 | 


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/04_inference.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2020 Horizon Robotics.All Rights Reserved.
 3 | #
 4 | # The material in this file is confidential and contains trade secrets
 5 | # of Horizon Robotics Inc. This is proprietary information owned by
 6 | # Horizon Robotics Inc. No part of this work may be disclosed,
 7 | # reproduced, copied, transmitted, or used in any way for any purpose,
 8 | # without the express written permission of Horizon Robotics Inc.
 9 | 
10 | python3 -u inference_image_demo.py
11 | 


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/05_evaluate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2020 Horizon Robotics.All Rights Reserved.
 3 | #
 4 | # The material in this file is confidential and contains trade secrets
 5 | # of Horizon Robotics Inc. This is proprietary information owned by
 6 | # Horizon Robotics Inc. No part of this work may be disclosed,
 7 | # reproduced, copied, transmitted, or used in any way for any purpose,
 8 | # without the express written permission of Horizon Robotics Inc.
 9 | 
10 | set -v -e
11 | cd $(dirname $0) || exit
12 | 
13 | #for converted quanti model evaluation
14 | quanti_model_file="./model_output/yolov5_672x672_nv12_quantized_model.onnx"
15 | quanti_input_layout="NHWC"
16 | 
17 | original_model_file="./model_output/yolov5_672x672_nv12_original_float_model.onnx"
18 | original_input_layout="NCHW"
19 | 
20 | if [[ $1 =~ "origin" ]];  then
21 |   model=$original_model_file
22 |   layout=$original_input_layout
23 |   input_offset=128
24 | else
25 |   model=$quanti_model_file
26 |   layout=$quanti_input_layout
27 |   input_offset=128  
28 | fi
29 | 
30 | image_path="../../../01_common/data/coco/coco_val2017/images/"
31 | anno_path="../../../01_common/data/coco/coco_val2017/annotations/instances_val2017.json"
32 | 
33 | 
34 | if [ -z $2 ]; then 
35 |   total_image_number=5000
36 | else
37 |   total_image_number=$2
38 | fi
39 | 
40 | # -------------------------------------------------------------------------------------------------------------
41 | # shell command "sh 05_evaluate.sh" runs quanti full evaluation by default 
42 | # If quanti model eval is intended, please run the shell via command "sh 05_evaluate.sh quanti"
43 | # If float  model eval is intended, please run the shell via command "sh 05_evaluate.sh origin"#
44 | # If quanti model quick eval test is intended, please run the shell via command "sh 05_evaluate.sh quanti 20"
45 | # If float  model quick eval test is intended, please run the shell via command "sh 05_evaluate.sh origin 20"
46 | # -------------------------------------------------------------------------------------------------------------
47 | # quanti model eval
48 | python3 -u ../../det_evaluate.py \
49 |   --model=${model} \
50 |   --image_path=${image_path} \
51 |   --annotation_path=${anno_path} \
52 |   --input_layout=${layout} \
53 |   --total_image_number=${total_image_number} \
54 |   --input_offset ${input_offset} 
55 | 


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/cal_data/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_herizon/mapper/cal_data/test.jpg


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/data_preprocess.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 Horizon Robotics.All Rights Reserved.
  2 | #
  3 | # The material in this file is confidential and contains trade secrets
  4 | # of Horizon Robotics Inc. This is proprietary information owned by
  5 | # Horizon Robotics Inc. No part of this work may be disclosed,
  6 | # reproduced, copied, transmitted, or used in any way for any purpose,
  7 | # without the express written permission of Horizon Robotics Inc.
  8 | 
  9 | import os
 10 | import sys
 11 | sys.path.append('.')
 12 | 
 13 | import click
 14 | import numpy as np
 15 | from preprocess import calibration_transformers
 16 | import skimage.io
 17 | import cv2
 18 | 
 19 | transformers = calibration_transformers()
 20 | 
 21 | sys.path.append("../../../01_common/python/data/")
 22 | from dataloader import DataLoader
 23 | from dataset import CifarDataset
 24 | 
 25 | regular_process_list = [
 26 |     ".rgb",
 27 |     ".rgbp",
 28 |     ".bgr",
 29 |     ".bgrp",
 30 |     ".yuv",
 31 |     ".feature",
 32 |     ".cali",
 33 | ]
 34 | 
 35 | 
 36 | def read_image(src_file, read_mode):
 37 |     if read_mode == "skimage":
 38 |         image = skimage.img_as_float(skimage.io.imread(src_file)).astype(
 39 |             np.float32)
 40 |     elif read_mode == "opencv":
 41 |         image = cv2.imread(src_file)
 42 |     else:
 43 |         raise ValueError(f"Invalid read mode {read_mode}")
 44 |     if image.ndim != 3:  # expend gray scale image to three channels
 45 |         image = image[..., np.newaxis]
 46 |         image = np.concatenate([image, image, image], axis=-1)
 47 |     return image
 48 | 
 49 | 
 50 | def regular_preprocess(src_file, transformers, dst_dir, pic_ext, read_mode):
 51 |     image = [read_image(src_file, read_mode)]
 52 |     for trans in transformers:
 53 |         image = trans(image)
 54 | 
 55 |     filename = os.path.basename(src_file)
 56 |     short_name, ext = os.path.splitext(filename)
 57 |     pic_name = os.path.join(dst_dir, short_name + pic_ext)
 58 |     print("write:%s" % pic_name)
 59 |     dtype = np.float32 if dst_dir.endswith("_f32") else np.uint8
 60 |     image[0].astype(dtype).tofile(pic_name)
 61 | 
 62 | 
 63 | def cifar_preprocess(src_file, data_loader, dst_dir, pic_ext, cal_img_num):
 64 |     for i in range(cal_img_num):
 65 |         image, label = next(data_loader)
 66 |         filename = os.path.basename(src_file)
 67 |         pic_name = os.path.join(dst_dir + '/' + str(i) + pic_ext)
 68 |         print("write:%s" % pic_name)
 69 |         image[0].astype(np.uint8).tofile(pic_name)
 70 | 
 71 | 
 72 | @click.command(help='''
 73 | A Tool used to generate preprocess pics for calibration.
 74 | ''')
 75 | @click.option('--src_dir', type=str, help='calibration source file')
 76 | @click.option('--dst_dir', type=str, help='generated calibration file')
 77 | @click.option('--pic_ext',
 78 |               type=str,
 79 |               default=".cali",
 80 |               help='picture extension.')
 81 | @click.option('--read_mode',
 82 |               type=click.Choice(["skimage", "opencv"]),
 83 |               default="opencv",
 84 |               help='picture extension.')
 85 | @click.option('--cal_img_num', type=int, default=100, help='cali picture num.')
 86 | def main(src_dir, dst_dir, pic_ext, read_mode, cal_img_num):
 87 |     '''A Tool used to generate preprocess pics for calibration.'''
 88 |     pic_num = 0
 89 |     os.makedirs(dst_dir, exist_ok=True)
 90 |     if pic_ext.strip().split('_')[0] in regular_process_list:
 91 |         print("regular preprocess")
 92 |         for src_name in sorted(os.listdir(src_dir)):
 93 |             pic_num += 1
 94 |             if pic_num > cal_img_num:
 95 |                 break
 96 |             src_file = os.path.join(src_dir, src_name)
 97 |             regular_preprocess(src_file, transformers, dst_dir, pic_ext,
 98 |                                read_mode)
 99 |     elif pic_ext.strip().split('_')[0] == ".cifar":
100 |         print("cifar preprocess")
101 |         data_loader = DataLoader(CifarDataset(src_dir), transformers, 1)
102 |         cifar_preprocess(src_dir, data_loader, dst_dir, pic_ext, cal_img_num)
103 |     else:
104 |         raise ValueError(f"invalid pic_ext {pic_ext}")
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     main()
109 | 


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/hb_mapper_checker.log:
--------------------------------------------------------------------------------
  1 | 2024-01-11 10:21:06,600 file: hb_mapper.py func: hb_mapper line No: 70 Start hb_mapper....
  2 | 2024-01-11 10:21:06,600 file: hb_mapper.py func: hb_mapper line No: 71 log will be stored in /convert_model/horizon_model_convert_sample/04_detection/03_yolov8_NoDFL/mapper/hb_mapper_checker.log
  3 | 2024-01-11 10:21:06,600 file: hb_mapper.py func: hb_mapper line No: 72 hbdk version 3.27.4
  4 | 2024-01-11 10:21:06,601 file: hb_mapper.py func: hb_mapper line No: 73 horizon_nn version 0.13.3
  5 | 2024-01-11 10:21:06,601 file: hb_mapper.py func: hb_mapper line No: 74 hb_mapper version 1.6.8a
  6 | 2024-01-11 10:21:06,601 file: hb_mapper.py func: hb_mapper line No: 76 parameter [output] is deprecated
  7 | 2024-01-11 10:21:06,611 file: helper.py func: helper line No: 124 Model input names: ['data']
  8 | 2024-01-11 10:21:06,611 file: hb_mapper_checker.py func: hb_mapper_checker line No: 104 Model type: onnx
  9 | 2024-01-11 10:21:06,611 file: hb_mapper_checker.py func: hb_mapper_checker line No: 105 march: bernoulli2
 10 | 2024-01-11 10:21:06,611 file: hb_mapper_checker.py func: hb_mapper_checker line No: 110 input names []
 11 | 2024-01-11 10:21:06,612 file: hb_mapper_checker.py func: hb_mapper_checker line No: 111 input shapes {}
 12 | 2024-01-11 10:21:06,612 file: hb_mapper_checker.py func: hb_mapper_checker line No: 117 Begin model checking....
 13 | 2024-01-11 10:21:06,612 file: build.py func: build line No: 36 [Thu Jan 11 10:21:06 2024] Start to Horizon NN Model Convert.
 14 | 2024-01-11 10:21:06,613 file: dict_parser.py func: dict_parser line No: 28 The input parameter is not specified, convert with default parameters.
 15 | 2024-01-11 10:21:06,613 file: dict_parser.py func: dict_parser line No: 513 The hbdk parameter is not specified, and the submodel will be compiled with the default parameter.
 16 | 2024-01-11 10:21:06,613 file: build.py func: build line No: 143 HorizonNN version: 0.13.3
 17 | 2024-01-11 10:21:06,613 file: build.py func: build line No: 147 HBDK version: 3.27.4
 18 | 2024-01-11 10:21:06,613 file: build.py func: build line No: 36 [Thu Jan 11 10:21:06 2024] Start to parse the onnx model.
 19 | 2024-01-11 10:21:06,656 file: onnx_parser.py func: onnx_parser line No: 146 ONNX model info:
 20 | ONNX IR version:  6
 21 | Opset version:    11
 22 | Input name:       data, [1, 3, 640, 640]
 23 | 2024-01-11 10:21:06,701 file: build.py func: build line No: 39 [Thu Jan 11 10:21:06 2024] End to parse the onnx model.
 24 | 2024-01-11 10:21:06,701 file: build.py func: build line No: 266 Model input names: ['data']
 25 | 2024-01-11 10:21:06,719 file: build.py func: build line No: 537 Saving the original float model: ./.hb_check/original_float_model.onnx.
 26 | 2024-01-11 10:21:06,720 file: build.py func: build line No: 36 [Thu Jan 11 10:21:06 2024] Start to optimize the model.
 27 | 2024-01-11 10:21:06,900 file: build.py func: build line No: 39 [Thu Jan 11 10:21:06 2024] End to optimize the model.
 28 | 2024-01-11 10:21:06,917 file: build.py func: build line No: 548 Saving the optimized model: ./.hb_check/optimized_float_model.onnx.
 29 | 2024-01-11 10:21:06,918 file: build.py func: build line No: 36 [Thu Jan 11 10:21:06 2024] Start to calibrate the model.
 30 | 2024-01-11 10:21:07,001 file: calibration_data_set.py func: calibration_data_set line No: 67 There are 1 samples in the calibration data set.
 31 | 2024-01-11 10:21:07,003 file: max_calibrater.py func: max_calibrater line No: 68 Run calibration model with max method.
 32 | 2024-01-11 10:21:07,269 file: build.py func: build line No: 39 [Thu Jan 11 10:21:07 2024] End to calibrate the model.
 33 | 2024-01-11 10:21:07,270 file: build.py func: build line No: 36 [Thu Jan 11 10:21:07 2024] Start to quantize the model.
 34 | 2024-01-11 10:21:07,786 file: build.py func: build line No: 39 [Thu Jan 11 10:21:07 2024] End to quantize the model.
 35 | 2024-01-11 10:21:07,891 file: build.py func: build line No: 562 Saving the quantized model: ./.hb_check/quantized_model.onnx.
 36 | 2024-01-11 10:21:07,891 file: build.py func: build line No: 36 [Thu Jan 11 10:21:07 2024] Start to compile the model with march bernoulli2.
 37 | 2024-01-11 10:21:07,891 file: dict_parser.py func: dict_parser line No: 518 Parsing the hbdk parameter:{'hbdk_pass_through_params': '--O0'}
 38 | 2024-01-11 10:21:08,024 file: hybrid_build.py func: hybrid_build line No: 123 Compile submodel: torch-jit-export_subgraph_0
 39 | 2024-01-11 10:21:08,215 file: hbdk_cc.py func: hbdk_cc line No: 119 hbdk-cc parameters:['--O0', '--input-layout', 'NHWC', '--output-layout', 'NCHW']
 40 | 2024-01-11 10:21:08,710 file: build.py func: build line No: 39 [Thu Jan 11 10:21:08 2024] End to compile the model with march bernoulli2.
 41 | 2024-01-11 10:21:08,712 file: node_info.py func: node_info line No: 54 The converted model node information:
 42 | ============================================================================================
 43 | Node                                               ON   Subgraph  Type                       
 44 | ---------------------------------------------------------------------------------------------
 45 | Conv_0                                             BPU  id(0)     HzSQuantizedConv           
 46 | Conv_2                                             BPU  id(0)     HzSQuantizedConv           
 47 | Conv_4                                             BPU  id(0)     HzSQuantizedConv           
 48 | Split_6                                            BPU  id(0)     Split                      
 49 | Conv_7                                             BPU  id(0)     HzSQuantizedConv           
 50 | Conv_9                                             BPU  id(0)     HzSQuantizedConv           
 51 | UNIT_CONV_FOR_Add_11                               BPU  id(0)     HzSQuantizedConv           
 52 | UNIT_CONV_FOR_365_0.065100304782391_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv           
 53 | UNIT_CONV_FOR_366_0.065100304782391_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv           
 54 | Concat_12                                          BPU  id(0)     Concat                     
 55 | Conv_13                                            BPU  id(0)     HzSQuantizedConv           
 56 | Conv_15                                            BPU  id(0)     HzSQuantizedConv           
 57 | Conv_17                                            BPU  id(0)     HzSQuantizedConv           
 58 | Split_19                                           BPU  id(0)     Split                      
 59 | Conv_20                                            BPU  id(0)     HzSQuantizedConv           
 60 | Conv_22                                            BPU  id(0)     HzSQuantizedConv           
 61 | UNIT_CONV_FOR_Add_24                               BPU  id(0)     HzSQuantizedConv           
 62 | Conv_25                                            BPU  id(0)     HzSQuantizedConv           
 63 | Conv_27                                            BPU  id(0)     HzSQuantizedConv           
 64 | UNIT_CONV_FOR_Add_29                               BPU  id(0)     HzSQuantizedConv           
 65 | UNIT_CONV_FOR_384_0.038103923201561_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv           
 66 | UNIT_CONV_FOR_385_0.038103923201561_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv           
 67 | UNIT_CONV_FOR_392_0.038103923201561_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv           
 68 | Concat_30                                          BPU  id(0)     Concat                     
 69 | Conv_31                                            BPU  id(0)     HzSQuantizedConv           
 70 | Conv_33                                            BPU  id(0)     HzSQuantizedConv           
 71 | Conv_35                                            BPU  id(0)     HzSQuantizedConv           
 72 | Split_37                                           BPU  id(0)     Split                      
 73 | Conv_38                                            BPU  id(0)     HzSQuantizedConv           
 74 | Conv_40                                            BPU  id(0)     HzSQuantizedConv           
 75 | UNIT_CONV_FOR_Add_42                               BPU  id(0)     HzSQuantizedConv           
 76 | Conv_43                                            BPU  id(0)     HzSQuantizedConv           
 77 | Conv_45                                            BPU  id(0)     HzSQuantizedConv           
 78 | UNIT_CONV_FOR_Add_47                               BPU  id(0)     HzSQuantizedConv           
 79 | UNIT_CONV_FOR_410_0.053578991442919_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv           
 80 | UNIT_CONV_FOR_411_0.053578991442919_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv           
 81 | UNIT_CONV_FOR_418_0.053578991442919_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv           
 82 | Concat_48                                          BPU  id(0)     Concat                     
 83 | Conv_49                                            BPU  id(0)     HzSQuantizedConv           
 84 | Conv_51                                            BPU  id(0)     HzSQuantizedConv           
 85 | Conv_53                                            BPU  id(0)     HzSQuantizedConv           
 86 | Split_55                                           BPU  id(0)     Split                      
 87 | Conv_56                                            BPU  id(0)     HzSQuantizedConv           
 88 | Conv_58                                            BPU  id(0)     HzSQuantizedConv           
 89 | UNIT_CONV_FOR_Add_60                               BPU  id(0)     HzSQuantizedConv           
 90 | UNIT_CONV_FOR_436_0.052549000829458_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv           
 91 | UNIT_CONV_FOR_437_0.052549000829458_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv           
 92 | Concat_61                                          BPU  id(0)     Concat                     
 93 | Conv_62                                            BPU  id(0)     HzSQuantizedConv           
 94 | Conv_64                                            BPU  id(0)     HzSQuantizedConv           
 95 | MaxPool_66                                         BPU  id(0)     HzQuantizedMaxPool         
 96 | MaxPool_67                                         BPU  id(0)     HzQuantizedMaxPool         
 97 | MaxPool_68                                         BPU  id(0)     HzQuantizedMaxPool         
 98 | Concat_69                                          BPU  id(0)     Concat                     
 99 | Conv_70                                            BPU  id(0)     HzSQuantizedConv           
100 | Resize_73                                          BPU  id(0)     HzQuantizedResizeUpsample  
101 | Concat_74                                          BPU  id(0)     Concat                     
102 | Conv_75                                            BPU  id(0)     HzSQuantizedConv           
103 | Split_77                                           BPU  id(0)     Split                      
104 | Conv_78                                            BPU  id(0)     HzSQuantizedConv           
105 | Conv_80                                            BPU  id(0)     HzSQuantizedConv           
106 | Concat_82                                          BPU  id(0)     Concat                     
107 | Conv_83                                            BPU  id(0)     HzSQuantizedConv           
108 | Resize_86                                          BPU  id(0)     HzQuantizedResizeUpsample  
109 | UNIT_CONV_FOR_403_0.031705468893051_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv           
110 | Concat_87                                          BPU  id(0)     Concat                     
111 | Conv_88                                            BPU  id(0)     HzSQuantizedConv           
112 | Split_90                                           BPU  id(0)     Split                      
113 | Conv_91                                            BPU  id(0)     HzSQuantizedConv           
114 | Conv_93                                            BPU  id(0)     HzSQuantizedConv           
115 | Concat_95                                          BPU  id(0)     Concat                     
116 | Conv_96                                            BPU  id(0)     HzSQuantizedConv           
117 | Conv_98                                            BPU  id(0)     HzSQuantizedConv           
118 | Concat_100                                         BPU  id(0)     Concat                     
119 | Conv_101                                           BPU  id(0)     HzSQuantizedConv           
120 | Split_103                                          BPU  id(0)     Split                      
121 | Conv_104                                           BPU  id(0)     HzSQuantizedConv           
122 | Conv_106                                           BPU  id(0)     HzSQuantizedConv           
123 | Concat_108                                         BPU  id(0)     Concat                     
124 | Conv_109                                           BPU  id(0)     HzSQuantizedConv           
125 | Conv_111                                           BPU  id(0)     HzSQuantizedConv           
126 | UNIT_CONV_FOR_458_0.032748799771070_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv           
127 | Concat_113                                         BPU  id(0)     Concat                     
128 | Conv_114                                           BPU  id(0)     HzSQuantizedConv           
129 | Split_116                                          BPU  id(0)     Split                      
130 | Conv_117                                           BPU  id(0)     HzSQuantizedConv           
131 | Conv_119                                           BPU  id(0)     HzSQuantizedConv           
132 | Concat_121                                         BPU  id(0)     Concat                     
133 | Conv_122                                           BPU  id(0)     HzSQuantizedConv           
134 | Conv_124                                           BPU  id(0)     HzSQuantizedConv           
135 | Conv_126                                           BPU  id(0)     HzSQuantizedConv           
136 | Conv_128                                           BPU  id(0)     HzSQuantizedConv           
137 | Conv_129                                           BPU  id(0)     HzSQuantizedConv           
138 | Conv_131                                           BPU  id(0)     HzSQuantizedConv           
139 | Conv_133                                           BPU  id(0)     HzSQuantizedConv           
140 | Conv_134                                           BPU  id(0)     HzSQuantizedConv           
141 | Conv_136                                           BPU  id(0)     HzSQuantizedConv           
142 | Conv_138                                           BPU  id(0)     HzSQuantizedConv           
143 | Conv_139                                           BPU  id(0)     HzSQuantizedConv           
144 | Conv_141                                           BPU  id(0)     HzSQuantizedConv           
145 | Conv_143                                           BPU  id(0)     HzSQuantizedConv           
146 | Conv_144                                           BPU  id(0)     HzSQuantizedConv           
147 | Conv_146                                           BPU  id(0)     HzSQuantizedConv           
148 | Conv_148                                           BPU  id(0)     HzSQuantizedConv           
149 | Conv_149                                           BPU  id(0)     HzSQuantizedConv           
150 | Conv_151                                           BPU  id(0)     HzSQuantizedConv           
151 | Conv_153                                           BPU  id(0)     HzSQuantizedConv
152 | 2024-01-11 10:21:08,712 file: build.py func: build line No: 39 [Thu Jan 11 10:21:08 2024] End to Horizon NN Model Convert.
153 | 2024-01-11 10:21:08,716 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2841 ONNX model output num : 6
154 | 2024-01-11 10:21:08,723 file: hb_mapper_checker.py func: hb_mapper_checker line No: 141 End model checking....
155 | 


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/hb_mapper_makertbin.log:
--------------------------------------------------------------------------------
  1 | 2024-01-11 10:21:18,084 file: hb_mapper.py func: hb_mapper line No: 111 Start hb_mapper....
  2 | 2024-01-11 10:21:18,084 file: hb_mapper.py func: hb_mapper line No: 112 log will be stored in /convert_model/horizon_model_convert_sample/04_detection/03_yolov8_NoDFL/mapper/hb_mapper_makertbin.log
  3 | 2024-01-11 10:21:18,084 file: hb_mapper.py func: hb_mapper line No: 113 hbdk version 3.27.4
  4 | 2024-01-11 10:21:18,084 file: hb_mapper.py func: hb_mapper line No: 114 horizon_nn version 0.13.3
  5 | 2024-01-11 10:21:18,085 file: hb_mapper.py func: hb_mapper line No: 115 hb_mapper version 1.6.8a
  6 | 2024-01-11 10:21:18,085 file: hb_mapper_makertbin.py func: hb_mapper_makertbin line No: 590 Start Model Convert....
  7 | 2024-01-11 10:21:18,098 file: mapper_conf_parser.py func: mapper_conf_parser line No: 811 Using abs path /convert_model/horizon_model_convert_sample/04_detection/03_yolov8_NoDFL/mapper/model/yolov8_relu_80class_ZQ.onnx
  8 | 2024-01-11 10:21:18,099 file: mapper_conf_parser.py func: mapper_conf_parser line No: 137 validating model_parameters...
  9 | 2024-01-11 10:21:18,110 file: mapper_conf_parser.py func: mapper_conf_parser line No: 811 Using abs path /convert_model/horizon_model_convert_sample/04_detection/03_yolov8_NoDFL/mapper/model_output
 10 | 2024-01-11 10:21:18,110 file: mapper_conf_parser.py func: mapper_conf_parser line No: 149 validating model_parameters finished
 11 | 2024-01-11 10:21:18,111 file: mapper_conf_parser.py func: mapper_conf_parser line No: 153 validating input_parameters...
 12 | 2024-01-11 10:21:18,111 file: helper.py func: helper line No: 124 Model input names: ['data']
 13 | 2024-01-11 10:21:18,111 file: mapper_conf_parser.py func: mapper_conf_parser line No: 273 input num is set to 1 according to input_names
 14 | 2024-01-11 10:21:18,111 file: mapper_conf_parser.py func: mapper_conf_parser line No: 279 model name missing, using model name from model file: ['data']
 15 | 2024-01-11 10:21:18,111 file: mapper_conf_parser.py func: mapper_conf_parser line No: 325 model input shape missing, using shape from model file: [[1, 3, 640, 640]]
 16 | 2024-01-11 10:21:18,112 file: mapper_conf_parser.py func: mapper_conf_parser line No: 164 validating input_parameters finished
 17 | 2024-01-11 10:21:18,112 file: mapper_conf_parser.py func: mapper_conf_parser line No: 168 validating calibration_parameters...
 18 | 2024-01-11 10:21:18,112 file: mapper_conf_parser.py func: mapper_conf_parser line No: 811 Using abs path /convert_model/horizon_model_convert_sample/04_detection/03_yolov8_NoDFL/mapper/cal_data
 19 | 2024-01-11 10:21:18,112 file: mapper_conf_parser.py func: mapper_conf_parser line No: 182 validating calibration_parameters finished
 20 | 2024-01-11 10:21:18,112 file: mapper_conf_parser.py func: mapper_conf_parser line No: 186 validating custom_op...
 21 | 2024-01-11 10:21:18,113 file: mapper_conf_parser.py func: mapper_conf_parser line No: 724 custom_op does not exist, skipped
 22 | 2024-01-11 10:21:18,113 file: mapper_conf_parser.py func: mapper_conf_parser line No: 192 validating custom_op finished
 23 | 2024-01-11 10:21:18,113 file: mapper_conf_parser.py func: mapper_conf_parser line No: 195 validating compiler_parameters...
 24 | 2024-01-11 10:21:18,113 file: mapper_conf_parser.py func: mapper_conf_parser line No: 201 validating compiler_parameters finished
 25 | 2024-01-11 10:21:18,113 file: hb_mapper_makertbin.py func: hb_mapper_makertbin line No: 56 Dump config:
 26 | 2024-01-11 10:21:18,113 file: hb_mapper_makertbin.py func: hb_mapper_makertbin line No: 57 calibration_parameters:
 27 |   cal_data_dir: ./cal_data
 28 |   calibration_type: default
 29 |   preprocess_on: true
 30 | compiler_parameters:
 31 |   compile_mode: latency
 32 |   debug: false
 33 |   optimize_level: O3
 34 | input_parameters:
 35 |   input_layout_rt: NCHW
 36 |   input_layout_train: NCHW
 37 |   input_name: ''
 38 |   input_shape: ''
 39 |   input_type_rt: rgb
 40 |   input_type_train: rgb
 41 |   mean_value: ''
 42 |   norm_type: data_scale
 43 |   scale_value: '0.003921568627451'
 44 | model_parameters:
 45 |   layer_out_dump: false
 46 |   log_level: debug
 47 |   march: bernoulli2
 48 |   onnx_model: /convert_model/horizon_model_convert_sample/04_detection/03_yolov8_NoDFL/mapper/model/yolov8_relu_80class_ZQ.onnx
 49 |   output_model_file_prefix: yolov8
 50 |   working_dir: model_output
 51 | 
 52 | 2024-01-11 10:21:18,117 file: hb_mapper_makertbin.py func: hb_mapper_makertbin line No: 63 input 'data' : original model shape: [1, 3, 640, 640]
 53 | 2024-01-11 10:21:18,117 file: loader.py func: loader line No: 44 *******************************************
 54 | 2024-01-11 10:21:18,117 file: loader.py func: loader line No: 45 First calibration picture name: test.jpg
 55 | 2024-01-11 10:21:18,118 file: loader.py func: loader line No: 47 First calibration picture md5:
 56 | 2024-01-11 10:21:18,123 file: loader.py func: loader line No: 51 *******************************************
 57 | 2024-01-11 10:21:18,169 file: tool_utils.py func: tool_utils line No: 320 calibration data shape: (1, 3, 640, 640)
 58 | 2024-01-11 10:21:18,170 file: hb_mapper_makertbin.py func: hb_mapper_makertbin line No: 579 call build params:
 59 |  {'march': 'bernoulli2', 'debug_mode': False, 'save_model': True, 'name_prefix': 'yolov8', 'input_dict': {'data': {'input_shape': [1, 3, 640, 640], 'expected_input_type': 'RGB_128', 'original_input_type': 'RGB', 'original_input_layout': 'NCHW', 'scales': array([0.00392157], dtype=float32)}}, 'cali_dict': {'calibration_type': 'default', 'calibration_data': {'data': array([[[[-92.875     , -67.375     , -64.        , ...,  85.125     ,
 60 |            83.        ,  83.        ],
 61 |          [-88.375     , -64.875     , -64.        , ...,  85.625015  ,
 62 |            83.        ,  83.        ],
 63 |          [-81.25      , -61.124992  , -64.        , ...,  86.        ,
 64 |            83.375     ,  83.        ],
 65 |          ...,
 66 |          [-80.        , -87.875     , -91.5       , ..., -34.375     ,
 67 |           -32.749992  , -37.5       ],
 68 |          [-83.375     , -86.375     , -90.375     , ..., -51.624992  ,
 69 |           -47.375     , -40.875     ],
 70 |          [-87.875     , -86.875     , -90.875     , ..., -71.125     ,
 71 |           -63.875     , -47.375     ]],
 72 | 
 73 |         [[-79.125     , -53.624992  , -53.        , ..., 107.125     ,
 74 |           108.        , 108.        ],
 75 |          [-75.625     , -52.124992  , -53.        , ..., 107.625     ,
 76 |           108.        , 108.        ],
 77 |          [-70.        , -50.25      , -54.124992  , ..., 108.        ,
 78 |           108.375     , 108.        ],
 79 |          ...,
 80 |          [-67.75      , -75.625     , -79.25      , ..., -17.        ,
 81 |           -15.375     , -20.125     ],
 82 |          [-70.375     , -73.375     , -77.375     , ..., -34.624992  ,
 83 |           -30.375     , -23.875     ],
 84 |          [-74.875     , -73.875     , -77.875     , ..., -54.124992  ,
 85 |           -46.875     , -30.375     ]],
 86 | 
 87 |         [[-68.        , -40.5       , -35.        , ..., 127.        ,
 88 |           127.        , 127.        ],
 89 |          [-64.        , -38.5       , -35.        , ..., 127.        ,
 90 |           127.        , 127.        ],
 91 |          [-58.        , -35.875     , -35.749992  , ..., 127.        ,
 92 |           127.        , 127.        ],
 93 |          ...,
 94 |          [-48.375     , -57.5       , -61.875     , ...,   3.7500153 ,
 95 |             5.375     ,   0.62501526],
 96 |          [-51.375     , -56.375     , -60.375     , ..., -14.625     ,
 97 |           -10.374992  ,  -3.8749924 ],
 98 |          [-55.875     , -56.875     , -60.875     , ..., -34.124992  ,
 99 |           -26.875     , -10.375     ]]]], dtype=float32)}}, 'hbdk_dict': {'hbdk_pass_through_params': '--fast --O3', 'input-source': {'data': 'ddr', '_default_value': 'ddr'}}, 'node_dict': {}}
100 | 2024-01-11 10:21:18,187 file: build.py func: build line No: 36 [Thu Jan 11 10:21:18 2024] Start to Horizon NN Model Convert.
101 | 2024-01-11 10:21:18,188 file: dict_parser.py func: dict_parser line No: 32 Parsing the input parameter:{'data': {'input_shape': [1, 3, 640, 640], 'expected_input_type': 'RGB_128', 'original_input_type': 'RGB', 'original_input_layout': 'NCHW', 'scales': array([0.00392157], dtype=float32)}}
102 | 2024-01-11 10:21:18,188 file: build.py func: build line No: 237 Parsing the calibration parameter
103 | 2024-01-11 10:21:18,188 file: dict_parser.py func: dict_parser line No: 518 Parsing the hbdk parameter:{'hbdk_pass_through_params': '--fast --O3', 'input-source': {'data': 'ddr', '_default_value': 'ddr'}}
104 | 2024-01-11 10:21:18,188 file: build.py func: build line No: 143 HorizonNN version: 0.13.3
105 | 2024-01-11 10:21:18,188 file: build.py func: build line No: 147 HBDK version: 3.27.4
106 | 2024-01-11 10:21:18,188 file: build.py func: build line No: 36 [Thu Jan 11 10:21:18 2024] Start to parse the onnx model.
107 | 2024-01-11 10:21:18,229 file: onnx_parser.py func: onnx_parser line No: 146 ONNX model info:
108 | ONNX IR version:  6
109 | Opset version:    11
110 | Input name:       data, [1, 3, 640, 640]
111 | 2024-01-11 10:21:18,279 file: build.py func: build line No: 39 [Thu Jan 11 10:21:18 2024] End to parse the onnx model.
112 | 2024-01-11 10:21:18,279 file: build.py func: build line No: 266 Model input names: ['data']
113 | 2024-01-11 10:21:18,280 file: dict_parser.py func: dict_parser line No: 288 Create a preprocessing operator for input_name data with means=None, std=[254.99998492], original_input_layout=NCHW, color convert from 'RGB' to 'RGB'.
114 | 2024-01-11 10:21:18,328 file: build.py func: build line No: 537 Saving the original float model: yolov8_original_float_model.onnx.
115 | 2024-01-11 10:21:18,329 file: build.py func: build line No: 36 [Thu Jan 11 10:21:18 2024] Start to optimize the model.
116 | 2024-01-11 10:21:18,513 file: build.py func: build line No: 39 [Thu Jan 11 10:21:18 2024] End to optimize the model.
117 | 2024-01-11 10:21:18,532 file: build.py func: build line No: 548 Saving the optimized model: yolov8_optimized_float_model.onnx.
118 | 2024-01-11 10:21:18,532 file: build.py func: build line No: 36 [Thu Jan 11 10:21:18 2024] Start to calibrate the model.
119 | 2024-01-11 10:21:18,616 file: calibration_data_set.py func: calibration_data_set line No: 67 There are 1 samples in the calibration data set.
120 | 2024-01-11 10:21:18,618 file: default_calibrater.py func: default_calibrater line No: 145 Run calibration model with default calibration method.
121 | 2024-01-11 10:21:27,056 file: default_calibrater.py func: default_calibrater line No: 169 Select max-percentile:percentile=0.99995 method.
122 | 2024-01-11 10:21:27,071 file: build.py func: build line No: 39 [Thu Jan 11 10:21:27 2024] End to calibrate the model.
123 | 2024-01-11 10:21:27,072 file: build.py func: build line No: 36 [Thu Jan 11 10:21:27 2024] Start to quantize the model.
124 | 2024-01-11 10:21:29,972 file: build.py func: build line No: 39 [Thu Jan 11 10:21:29 2024] End to quantize the model.
125 | 2024-01-11 10:21:30,064 file: build.py func: build line No: 562 Saving the quantized model: yolov8_quantized_model.onnx.
126 | 2024-01-11 10:21:30,065 file: build.py func: build line No: 36 [Thu Jan 11 10:21:30 2024] Start to compile the model with march bernoulli2.
127 | 2024-01-11 10:21:30,199 file: hybrid_build.py func: hybrid_build line No: 123 Compile submodel: torch-jit-export_subgraph_0
128 | 2024-01-11 10:21:30,381 file: hbdk_cc.py func: hbdk_cc line No: 119 hbdk-cc parameters:['--fast', '--O3', '--input-layout', 'NHWC', '--output-layout', 'NCHW', '--input-source', 'ddr']
129 | 2024-01-11 10:21:43,761 file: tool_utils.py func: tool_utils line No: 293 consumed time 13.3507
130 | 2024-01-11 10:21:43,934 file: build.py func: build line No: 39 [Thu Jan 11 10:21:43 2024] End to compile the model with march bernoulli2.
131 | 2024-01-11 10:21:43,937 file: node_info.py func: node_info line No: 54 The converted model node information:
132 | ============================================================================================================================================
133 | Node                                               ON   Subgraph  Type                       Cosine Similarity  Threshold                   
134 | --------------------------------------------------------------------------------------------------------------------------------------------
135 | HZ_PREPROCESS_FOR_data                             BPU  id(0)     HzSQuantizedPreprocess     1.000012           127.000000                  
136 | Conv_0                                             BPU  id(0)     HzSQuantizedConv           0.999544           1.000000                    
137 | Conv_2                                             BPU  id(0)     HzSQuantizedConv           0.999552           12.763289                   
138 | Conv_4                                             BPU  id(0)     HzSQuantizedConv           0.999467           11.783392                   
139 | Split_6                                            BPU  id(0)     Split                      0.999342           9.035983                    
140 | Conv_7                                             BPU  id(0)     HzSQuantizedConv           0.999538           9.035983                    
141 | Conv_9                                             BPU  id(0)     HzSQuantizedConv           0.999436           7.003490                    
142 | UNIT_CONV_FOR_Add_11                               BPU  id(0)     HzSQuantizedConv           0.999531           9.035983                    
143 | UNIT_CONV_FOR_365_0.075266055762768_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
144 | UNIT_CONV_FOR_366_0.075266055762768_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
145 | Concat_12                                          BPU  id(0)     Concat                     0.999461           9.035983                    
146 | Conv_13                                            BPU  id(0)     HzSQuantizedConv           0.999274           9.558789                    
147 | Conv_15                                            BPU  id(0)     HzSQuantizedConv           0.999484           8.257295                    
148 | Conv_17                                            BPU  id(0)     HzSQuantizedConv           0.999387           8.837413                    
149 | Split_19                                           BPU  id(0)     Split                      0.999313           6.006452                    
150 | Conv_20                                            BPU  id(0)     HzSQuantizedConv           0.999547           6.006452                    
151 | Conv_22                                            BPU  id(0)     HzSQuantizedConv           0.999518           4.893088                    
152 | UNIT_CONV_FOR_Add_24                               BPU  id(0)     HzSQuantizedConv           0.999544           6.006452                    
153 | Conv_25                                            BPU  id(0)     HzSQuantizedConv           0.999244           6.582415                    
154 | Conv_27                                            BPU  id(0)     HzSQuantizedConv           0.999078           4.284927                    
155 | UNIT_CONV_FOR_Add_29                               BPU  id(0)     HzSQuantizedConv           0.999474           6.582415                    
156 | UNIT_CONV_FOR_384_0.060171827673912_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
157 | UNIT_CONV_FOR_385_0.060171827673912_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
158 | UNIT_CONV_FOR_392_0.060171827673912_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
159 | Concat_30                                          BPU  id(0)     Concat                     0.999439           6.006452                    
160 | Conv_31                                            BPU  id(0)     HzSQuantizedConv           0.999210           7.641822                    
161 | Conv_33                                            BPU  id(0)     HzSQuantizedConv           0.999573           5.585854                    
162 | Conv_35                                            BPU  id(0)     HzSQuantizedConv           0.999497           5.087503                    
163 | Split_37                                           BPU  id(0)     Split                      0.999419           4.461800                    
164 | Conv_38                                            BPU  id(0)     HzSQuantizedConv           0.999506           4.461800                    
165 | Conv_40                                            BPU  id(0)     HzSQuantizedConv           0.999528           6.545473                    
166 | UNIT_CONV_FOR_Add_42                               BPU  id(0)     HzSQuantizedConv           0.999604           4.461800                    
167 | Conv_43                                            BPU  id(0)     HzSQuantizedConv           0.999374           6.330410                    
168 | Conv_45                                            BPU  id(0)     HzSQuantizedConv           0.999329           5.536530                    
169 | UNIT_CONV_FOR_Add_47                               BPU  id(0)     HzSQuantizedConv           0.999567           6.330410                    
170 | UNIT_CONV_FOR_410_0.062840834259987_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
171 | UNIT_CONV_FOR_411_0.062840834259987_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
172 | UNIT_CONV_FOR_418_0.062840834259987_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
173 | Concat_48                                          BPU  id(0)     Concat                     0.999519           4.461800                    
174 | Conv_49                                            BPU  id(0)     HzSQuantizedConv           0.999330           7.980786                    
175 | Conv_51                                            BPU  id(0)     HzSQuantizedConv           0.999609           5.351624                    
176 | Conv_53                                            BPU  id(0)     HzSQuantizedConv           0.999579           5.986984                    
177 | Split_55                                           BPU  id(0)     Split                      0.999567           5.149291                    
178 | Conv_56                                            BPU  id(0)     HzSQuantizedConv           0.999633           5.149291                    
179 | Conv_58                                            BPU  id(0)     HzSQuantizedConv           0.999663           4.403012                    
180 | UNIT_CONV_FOR_Add_60                               BPU  id(0)     HzSQuantizedConv           0.999663           5.149291                    
181 | UNIT_CONV_FOR_436_0.047307271510363_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
182 | UNIT_CONV_FOR_437_0.047307271510363_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
183 | Concat_61                                          BPU  id(0)     Concat                     0.999594           5.149291                    
184 | Conv_62                                            BPU  id(0)     HzSQuantizedConv           0.999554           6.008023                    
185 | Conv_64                                            BPU  id(0)     HzSQuantizedConv           0.999462           4.405106                    
186 | MaxPool_66                                         BPU  id(0)     HzQuantizedMaxPool         0.999744           5.395553                    
187 | MaxPool_67                                         BPU  id(0)     HzQuantizedMaxPool         0.999820           5.395553                    
188 | MaxPool_68                                         BPU  id(0)     HzQuantizedMaxPool         0.999860           5.395553                    
189 | Concat_69                                          BPU  id(0)     Concat                     0.999795           5.395553                    
190 | Conv_70                                            BPU  id(0)     HzSQuantizedConv           0.999512           5.395553                    
191 | Resize_73                                          BPU  id(0)     HzQuantizedResizeUpsample  0.999514           4.942308                    
192 | UNIT_CONV_FOR_429_0.038915812969208_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
193 | Concat_74                                          BPU  id(0)     Concat                     0.999450           4.942308                    
194 | Conv_75                                            BPU  id(0)     HzSQuantizedConv           0.999291           4.942308                    
195 | Split_77                                           BPU  id(0)     Split                      0.999275           4.619433                    
196 | Conv_78                                            BPU  id(0)     HzSQuantizedConv           0.999279           4.619433                    
197 | Conv_80                                            BPU  id(0)     HzSQuantizedConv           0.999327           4.529199                    
198 | UNIT_CONV_FOR_468_0.037767473608255_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
199 | UNIT_CONV_FOR_469_0.037767473608255_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
200 | Concat_82                                          BPU  id(0)     Concat                     0.999277           4.619433                    
201 | Conv_83                                            BPU  id(0)     HzSQuantizedConv           0.998956           4.796469                    
202 | Resize_86                                          BPU  id(0)     HzQuantizedResizeUpsample  0.998948           5.176179                    
203 | UNIT_CONV_FOR_403_0.040757317095995_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
204 | Concat_87                                          BPU  id(0)     Concat                     0.999026           5.176179                    
205 | Conv_88                                            BPU  id(0)     HzSQuantizedConv           0.998809           5.176179                    
206 | Split_90                                           BPU  id(0)     Split                      0.998627           4.068692                    
207 | Conv_91                                            BPU  id(0)     HzSQuantizedConv           0.998702           4.068692                    
208 | Conv_93                                            BPU  id(0)     HzSQuantizedConv           0.998439           4.666387                    
209 | UNIT_CONV_FOR_489_0.034733634442091_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
210 | UNIT_CONV_FOR_490_0.034733634442091_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
211 | Concat_95                                          BPU  id(0)     Concat                     0.998644           4.068692                    
212 | Conv_96                                            BPU  id(0)     HzSQuantizedConv           0.998172           4.411171                    
213 | Conv_98                                            BPU  id(0)     HzSQuantizedConv           0.998492           4.938739                    
214 | UNIT_CONV_FOR_479_0.039904728531837_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
215 | Concat_100                                         BPU  id(0)     Concat                     0.998779           5.067901                    
216 | Conv_101                                           BPU  id(0)     HzSQuantizedConv           0.998518           5.067901                    
217 | Split_103                                          BPU  id(0)     Split                      0.998540           4.719296                    
218 | Conv_104                                           BPU  id(0)     HzSQuantizedConv           0.998600           4.719296                    
219 | Conv_106                                           BPU  id(0)     HzSQuantizedConv           0.998648           4.189234                    
220 | UNIT_CONV_FOR_508_0.036984007805586_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
221 | UNIT_CONV_FOR_509_0.036984007805586_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
222 | Concat_108                                         BPU  id(0)     Concat                     0.998555           4.719296                    
223 | Conv_109                                           BPU  id(0)     HzSQuantizedConv           0.998193           4.696969                    
224 | Conv_111                                           BPU  id(0)     HzSQuantizedConv           0.998624           4.688850                    
225 | UNIT_CONV_FOR_458_0.033800829201937_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
226 | Concat_113                                         BPU  id(0)     Concat                     0.999246           4.292706                    
227 | Conv_114                                           BPU  id(0)     HzSQuantizedConv           0.998952           4.292706                    
228 | Split_116                                          BPU  id(0)     Split                      0.998916           4.191041                    
229 | Conv_117                                           BPU  id(0)     HzSQuantizedConv           0.999077           4.191041                    
230 | Conv_119                                           BPU  id(0)     HzSQuantizedConv           0.999117           4.719520                    
231 | UNIT_CONV_FOR_527_0.043455522507429_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
232 | UNIT_CONV_FOR_528_0.043455522507429_TO_FUSE_SCALE  BPU  id(0)     HzSQuantizedConv                                                          
233 | Concat_121                                         BPU  id(0)     Concat                     0.998989           4.191041                    
234 | Conv_122                                           BPU  id(0)     HzSQuantizedConv           0.998745           5.518851                    
235 | Conv_124                                           BPU  id(0)     HzSQuantizedConv           0.997957           4.938739                    
236 | Conv_126                                           BPU  id(0)     HzSQuantizedConv           0.998296           5.577137                    
237 | Conv_128                                           BPU  id(0)     HzSQuantizedConv           0.998942           7.808588                    
238 | Conv_129                                           BPU  id(0)     HzSQuantizedConv           0.998178           4.938739                    
239 | Conv_131                                           BPU  id(0)     HzSQuantizedConv           0.997692           4.597701                    
240 | Conv_133                                           BPU  id(0)     HzSQuantizedConv           0.999980           7.064566                    
241 | Conv_134                                           BPU  id(0)     HzSQuantizedConv           0.998697           4.688850                    
242 | Conv_136                                           BPU  id(0)     HzSQuantizedConv           0.998738           5.012026                    
243 | Conv_138                                           BPU  id(0)     HzSQuantizedConv           0.999400           9.166838                    
244 | Conv_139                                           BPU  id(0)     HzSQuantizedConv           0.998162           4.688850                    
245 | Conv_141                                           BPU  id(0)     HzSQuantizedConv           0.997885           4.587978                    
246 | Conv_143                                           BPU  id(0)     HzSQuantizedConv           0.999991           5.935476                    
247 | Conv_144                                           BPU  id(0)     HzSQuantizedConv           0.999312           5.626285                    
248 | Conv_146                                           BPU  id(0)     HzSQuantizedConv           0.999458           5.504189                    
249 | Conv_148                                           BPU  id(0)     HzSQuantizedConv           0.999676           10.797597                   
250 | Conv_149                                           BPU  id(0)     HzSQuantizedConv           0.998840           5.626285                    
251 | Conv_151                                           BPU  id(0)     HzSQuantizedConv           0.998835           5.892544                    
252 | Conv_153                                           BPU  id(0)     HzSQuantizedConv           0.999989           8.530767
253 | 2024-01-11 10:21:43,937 file: build.py func: build line No: 621 The quantify model output:
254 | ===========================================================================
255 | Node      Cosine Similarity  L1 Distance  L2 Distance  Chebyshev Distance  
256 | ---------------------------------------------------------------------------
257 | Conv_128  0.998942           0.070749     0.000160     1.376420            
258 | Conv_133  0.999980           0.051573     0.000101     0.788261            
259 | Conv_138  0.999400           0.037944     0.000176     0.724239            
260 | Conv_143  0.999991           0.037361     0.000145     0.481236            
261 | Conv_148  0.999676           0.034722     0.000314     0.367694            
262 | Conv_153  0.999989           0.033531     0.000259     0.482755
263 | 2024-01-11 10:21:43,938 file: build.py func: build line No: 39 [Thu Jan 11 10:21:43 2024] End to Horizon NN Model Convert.
264 | 2024-01-11 10:21:43,980 file: hb_mapper_makertbin.py func: hb_mapper_makertbin line No: 641 start convert to *.bin file....
265 | 2024-01-11 10:21:44,010 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2841 ONNX model output num : 6
266 | 2024-01-11 10:21:44,011 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2717 ############# model deps info #############
267 | 2024-01-11 10:21:44,011 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2719 hb_mapper version   : 1.6.8a
268 | 2024-01-11 10:21:44,011 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2722 hbdk version        : 3.27.4
269 | 2024-01-11 10:21:44,011 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2724 hbdk runtime version: 3.13.27
270 | 2024-01-11 10:21:44,011 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2727 horizon_nn version  : 0.13.3
271 | 2024-01-11 10:21:44,012 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2730 ############# model_parameters info #############
272 | 2024-01-11 10:21:44,012 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2736 onnx_model          : /convert_model/horizon_model_convert_sample/04_detection/03_yolov8_NoDFL/mapper/model/yolov8_relu_80class_ZQ.onnx
273 | 2024-01-11 10:21:44,012 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2737 BPU march           : bernoulli2
274 | 2024-01-11 10:21:44,012 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2738 layer_out_dump      : False
275 | 2024-01-11 10:21:44,012 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2739 log_level           : DEBUG
276 | 2024-01-11 10:21:44,012 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2740 working dir         : /convert_model/horizon_model_convert_sample/04_detection/03_yolov8_NoDFL/mapper/model_output
277 | 2024-01-11 10:21:44,012 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2741 output_model_file_prefix: yolov8
278 | 2024-01-11 10:21:44,013 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2748 ############# input_parameters info #############
279 | 2024-01-11 10:21:44,013 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2765 ------------------------------------------
280 | 2024-01-11 10:21:44,013 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2767 ---------input info : data ---------
281 | 2024-01-11 10:21:44,013 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2768 input_name          : data
282 | 2024-01-11 10:21:44,013 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2769 input_type_rt       : rgb
283 | 2024-01-11 10:21:44,013 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2772 input_space&range   : regular
284 | 2024-01-11 10:21:44,014 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2773 input_layout_rt     : NCHW
285 | 2024-01-11 10:21:44,014 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2774 input_type_train    : rgb
286 | 2024-01-11 10:21:44,014 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2775 input_layout_train  : NCHW
287 | 2024-01-11 10:21:44,014 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2776 norm_type           : data_scale
288 | 2024-01-11 10:21:44,014 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2777 input_shape         : 1x3x640x640
289 | 2024-01-11 10:21:44,014 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2783 scale_value         : 0.003921568627451,
290 | 2024-01-11 10:21:44,014 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2785 cal_data_dir        : /convert_model/horizon_model_convert_sample/04_detection/03_yolov8_NoDFL/mapper/cal_data
291 | 2024-01-11 10:21:44,015 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2786 ---------input info : data end -------
292 | 2024-01-11 10:21:44,015 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2787 ------------------------------------------
293 | 2024-01-11 10:21:44,015 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2789 ############# calibration_parameters info #############
294 | 2024-01-11 10:21:44,015 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2790 preprocess_on       : True
295 | 2024-01-11 10:21:44,015 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2791 calibration_type:   : default
296 | 2024-01-11 10:21:44,015 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2815 ############# compiler_parameters info #############
297 | 2024-01-11 10:21:44,016 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2819 hbdk_pass_through_params: --fast --O3
298 | 2024-01-11 10:21:44,016 file: onnx2horizonrt.py func: onnx2horizonrt line No: 2819 input-source        : {'data': 'ddr', '_default_value': 'ddr'}
299 | 2024-01-11 10:21:44,018 file: layout_util.py func: layout_util line No: 13 set_featuremap_layout start
300 | 2024-01-11 10:21:44,020 file: hb_mapper_makertbin.py func: hb_mapper_makertbin line No: 768 Convert to runtime bin file sucessfully!
301 | 2024-01-11 10:21:44,020 file: hb_mapper_makertbin.py func: hb_mapper_makertbin line No: 769 End Model Convert
302 | 


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/inference_image_demo.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from horizon_tc_ui import HB_ONNXRuntime
  3 | from horizon_tc_ui.utils.tool_utils import init_root_logger
  4 | from math import exp
  5 | import cv2
  6 | import numpy as np
  7 | 
  8 | 
  9 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 10 |          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 11 |          'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 12 |          'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 13 |          'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
 14 |          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
 15 |          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
 16 |          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
 17 |          'hair drier', 'toothbrush']
 18 | 
 19 | meshgrid = []
 20 | 
 21 | class_num = len(CLASSES)
 22 | headNum = 3
 23 | strides = [8, 16, 32]
 24 | mapSize = [[80, 80], [40, 40], [20, 20]]
 25 | nmsThresh = 0.45
 26 | objectThresh = 0.35
 27 | 
 28 | input_imgH = 640
 29 | input_imgW = 640
 30 | 
 31 | 
 32 | class DetectBox:
 33 |     def __init__(self, classId, score, xmin, ymin, xmax, ymax):
 34 |         self.classId = classId
 35 |         self.score = score
 36 |         self.xmin = xmin
 37 |         self.ymin = ymin
 38 |         self.xmax = xmax
 39 |         self.ymax = ymax
 40 | 
 41 | 
 42 | def GenerateMeshgrid():
 43 |     for index in range(headNum):
 44 |         for i in range(mapSize[index][0]):
 45 |             for j in range(mapSize[index][1]):
 46 |                 meshgrid.append(j + 0.5)
 47 |                 meshgrid.append(i + 0.5)
 48 | 
 49 | 
 50 | def IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2):
 51 |     xmin = max(xmin1, xmin2)
 52 |     ymin = max(ymin1, ymin2)
 53 |     xmax = min(xmax1, xmax2)
 54 |     ymax = min(ymax1, ymax2)
 55 | 
 56 |     innerWidth = xmax - xmin
 57 |     innerHeight = ymax - ymin
 58 | 
 59 |     innerWidth = innerWidth if innerWidth > 0 else 0
 60 |     innerHeight = innerHeight if innerHeight > 0 else 0
 61 | 
 62 |     innerArea = innerWidth * innerHeight
 63 | 
 64 |     area1 = (xmax1 - xmin1) * (ymax1 - ymin1)
 65 |     area2 = (xmax2 - xmin2) * (ymax2 - ymin2)
 66 | 
 67 |     total = area1 + area2 - innerArea
 68 | 
 69 |     return innerArea / total
 70 | 
 71 | 
 72 | def NMS(detectResult):
 73 |     predBoxs = []
 74 | 
 75 |     sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True)
 76 | 
 77 |     for i in range(len(sort_detectboxs)):
 78 |         xmin1 = sort_detectboxs[i].xmin
 79 |         ymin1 = sort_detectboxs[i].ymin
 80 |         xmax1 = sort_detectboxs[i].xmax
 81 |         ymax1 = sort_detectboxs[i].ymax
 82 |         classId = sort_detectboxs[i].classId
 83 | 
 84 |         if sort_detectboxs[i].classId != -1:
 85 |             predBoxs.append(sort_detectboxs[i])
 86 |             for j in range(i + 1, len(sort_detectboxs), 1):
 87 |                 if classId == sort_detectboxs[j].classId:
 88 |                     xmin2 = sort_detectboxs[j].xmin
 89 |                     ymin2 = sort_detectboxs[j].ymin
 90 |                     xmax2 = sort_detectboxs[j].xmax
 91 |                     ymax2 = sort_detectboxs[j].ymax
 92 |                     iou = IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2)
 93 |                     if iou > nmsThresh:
 94 |                         sort_detectboxs[j].classId = -1
 95 |     return predBoxs
 96 | 
 97 | 
 98 | def sigmoid(x):
 99 |     return 1 / (1 + exp(-x))
100 | 
101 | 
102 | def postprocess(out, img_h, img_w):
103 |     print('postprocess ... ')
104 | 
105 |     detectResult = []
106 |     output = []
107 |     for i in range(len(out)):
108 |         print(out[i].shape)
109 |         output.append(out[i].reshape((-1)))
110 | 
111 |     scale_h = img_h / input_imgH
112 |     scale_w = img_w / input_imgW
113 | 
114 |     gridIndex = -2
115 |     cls_index = 0
116 |     cls_max = 0
117 | 
118 |     for index in range(headNum):
119 |         reg = output[index * 2 + 0]
120 |         cls = output[index * 2 + 1]
121 | 
122 |         for h in range(mapSize[index][0]):
123 |             for w in range(mapSize[index][1]):
124 |                 gridIndex += 2
125 | 
126 |                 if 1 == class_num:
127 |                     cls_max = sigmoid(cls[0 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])
128 |                     cls_index = 0
129 |                 else:
130 |                     for cl in range(class_num):
131 |                         cls_val = cls[cl * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]
132 |                         if 0 == cl:
133 |                             cls_max = cls_val
134 |                             cls_index = cl
135 |                         else:
136 |                             if cls_val > cls_max:
137 |                                 cls_max = cls_val
138 |                                 cls_index = cl
139 |                     cls_max = sigmoid(cls_max)
140 | 
141 |                 if cls_max > objectThresh:
142 |                     regdfl = []
143 |                     for lc in range(4):
144 |                         sfsum = 0
145 |                         locval = 0
146 |                         for df in range(16):
147 |                             temp = exp(reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])
148 |                             reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] = temp
149 |                             sfsum += temp
150 | 
151 |                         for df in range(16):
152 |                             sfval = reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] / sfsum
153 |                             locval += sfval * df
154 |                         regdfl.append(locval)
155 | 
156 |                     x1 = (meshgrid[gridIndex + 0] - regdfl[0]) * strides[index]
157 |                     y1 = (meshgrid[gridIndex + 1] - regdfl[1]) * strides[index]
158 |                     x2 = (meshgrid[gridIndex + 0] + regdfl[2]) * strides[index]
159 |                     y2 = (meshgrid[gridIndex + 1] + regdfl[3]) * strides[index]
160 | 
161 |                     xmin = x1 * scale_w
162 |                     ymin = y1 * scale_h
163 |                     xmax = x2 * scale_w
164 |                     ymax = y2 * scale_h
165 | 
166 |                     xmin = xmin if xmin > 0 else 0
167 |                     ymin = ymin if ymin > 0 else 0
168 |                     xmax = xmax if xmax < img_w else img_w
169 |                     ymax = ymax if ymax < img_h else img_h
170 | 
171 |                     box = DetectBox(cls_index, cls_max, xmin, ymin, xmax, ymax)
172 |                     detectResult.append(box)
173 |     # NMS
174 |     print('detectResult:', len(detectResult))
175 |     predBox = NMS(detectResult)
176 | 
177 |     return predBox
178 | 
179 | 
180 | def preprocess(src_image):
181 |     src_image = cv2.cvtColor(src_image, cv2.COLOR_BGR2RGB)
182 |     img = cv2.resize(src_image, (input_imgW, input_imgH))
183 |     return img
184 | 
185 | 
186 | def inference(model_path, image_path, input_layout, input_offset):
187 |     # init_root_logger("inference.log", console_level=logging.INFO, file_level=logging.DEBUG)
188 | 
189 |     sess = HB_ONNXRuntime(model_file=model_path)
190 |     sess.set_dim_param(0, 0, '?')
191 | 
192 |     if input_layout is None:
193 |         logging.warning(f"input_layout not provided. Using {sess.layout[0]}")
194 |         input_layout = sess.layout[0]
195 | 
196 |     origimg = cv2.imread(image_path)
197 |     img_h, img_w = origimg.shape[:2]
198 |     image_data = preprocess(origimg)
199 | 
200 |     # image_data = image_data.transpose((2, 0, 1))
201 |     image_data = np.expand_dims(image_data, axis=0)
202 | 
203 |     input_name = sess.input_names[0]
204 |     output_name = sess.output_names
205 |     output = sess.run(output_name, {input_name: image_data}, input_offset=input_offset)
206 | 
207 |     print('inference finished, output len is:', len(output))
208 | 
209 |     out = []
210 |     for i in range(len(output)):
211 |         out.append(output[i])
212 | 
213 |     predbox = postprocess(out, img_h, img_w)
214 | 
215 |     print('detect object num is:', len(predbox))
216 | 
217 |     for i in range(len(predbox)):
218 |         xmin = int(predbox[i].xmin)
219 |         ymin = int(predbox[i].ymin)
220 |         xmax = int(predbox[i].xmax)
221 |         ymax = int(predbox[i].ymax)
222 |         classId = predbox[i].classId
223 |         score = predbox[i].score
224 | 
225 |         cv2.rectangle(origimg, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
226 |         ptext = (xmin, ymin)
227 |         title = CLASSES[classId] + "%.2f" % score
228 |         cv2.putText(origimg, title, ptext, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA)
229 | 
230 |     cv2.imwrite('./test_horizon_result.jpg', origimg)
231 |     # cv2.imshow("test", origimg)
232 |     # cv2.waitKey(0)
233 | 
234 | 
235 | if __name__ == '__main__':
236 |     print('This main ... ')
237 |     GenerateMeshgrid()
238 | 
239 |     model_path = './model_output/yolov8_quantized_model.onnx'
240 |     image_path = './test.jpg'
241 |     input_layout = 'NHWC'
242 |     input_offset = 128
243 | 
244 |     inference(model_path, image_path, input_layout, input_offset)
245 | 
246 | 


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/model/yolov8_relu_80class_ZQ.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_herizon/mapper/model/yolov8_relu_80class_ZQ.onnx


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/model_output/yolov8.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_herizon/mapper/model_output/yolov8.bin


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/model_output/yolov8_quantized_model.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_herizon/mapper/model_output/yolov8_quantized_model.onnx


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/preprocess.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 Horizon Robotics.All Rights Reserved.
 2 | #
 3 | # The material in this file is confidential and contains trade secrets
 4 | # of Horizon Robotics Inc. This is proprietary information owned by
 5 | # Horizon Robotics Inc. No part of this work may be disclosed,
 6 | # reproduced, copied, transmitted, or used in any way for any purpose,
 7 | # without the express written permission of Horizon Robotics Inc.
 8 | 
 9 | import sys
10 | sys.path.append("../../../01_common/python/data/")
11 | from transformer import *
12 | from dataloader import *
13 | 
14 | 
15 | def calibration_transformers():
16 |     """
17 |     step：
18 |         1、pad resize to 672 * 672
19 |         2、NHWC to NCHW
20 |         3、bgr to rgb
21 |     """
22 |     transformers = [
23 |         PadResizeTransformer(target_size=(672, 672)),
24 |         HWC2CHWTransformer(),
25 |         BGR2RGBTransformer(data_format="CHW"),
26 |     ]
27 |     return transformers
28 | 
29 | 
30 | def infer_transformers(input_shape, input_layout="NHWC"):
31 |     """
32 |     step：
33 |         1、pad resize to target_size(input_shape)
34 |         2、bgr to rgb
35 |         3、rgb to nv12
36 |         3、nv12 to yuv444
37 |     :param input_shape: input shape(target size)
38 |     :param input_layout: NCHW / NHWC
39 |     """
40 |     transformers = [
41 |         PadResizeTransformer(target_size=input_shape),
42 |         BGR2RGBTransformer(data_format="HWC"),
43 |         RGB2NV12Transformer(data_format="HWC"),
44 |         NV12ToYUV444Transformer(target_size=input_shape,
45 |                                 yuv444_output_layout=input_layout[1:]),
46 |     ]
47 |     return transformers
48 | 
49 | 
50 | def infer_image_preprocess(image_file, input_layout, input_shape):
51 |     """
52 |     image for single image inference
53 |     note: imread_mode [skimage / opencv]
54 |         opencv read image as 8-bit unsigned integers BGR in range [0, 255]
55 |         skimage read image as float32 RGB in range [0, 1]
56 |         make sure to use the same imread_mode as the model training
57 |     :param image_file: image file
58 |     :param input_layout: NCHW / NHWC
59 |     :param input_shape: input shape（target size）
60 |     :return: origin image, processed image (uint8, 0-255)
61 |     """
62 |     transformers = infer_transformers(input_shape, input_layout)
63 |     origin_image, processed_image = SingleImageDataLoaderWithOrigin(
64 |         transformers, image_file, imread_mode="opencv")
65 |     return origin_image, processed_image
66 | 
67 | 
68 | def eval_image_preprocess(image_path, annotation_path, input_shape,
69 |                           input_layout):
70 |     """
71 |     image for full scale evaluation
72 |     note: imread_mode [skimage / opencv]
73 |         opencv read image as 8-bit unsigned integers BGR in range [0, 255]
74 |         skimage read image as float32 RGB in range [0, 1]
75 |         make sure to use the same imread_mode as the model training
76 |     :param image_path: image path
77 |     :param annotation_path: annotation path
78 |     :param input_shape: input shape（target size）
79 |     :param input_layout: input layout
80 |     :return: data loader
81 |     """
82 |     transformers = infer_transformers(input_shape, input_layout)
83 |     data_loader = COCODataLoader(transformers,
84 |                                  image_path,
85 |                                  annotation_path,
86 |                                  imread_mode='opencv')
87 | 
88 |     return data_loader
89 | 


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/src_data/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_herizon/mapper/src_data/test.jpg


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_herizon/mapper/test.jpg


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/test_horizon_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_herizon/mapper/test_horizon_result.jpg


--------------------------------------------------------------------------------
/yolov8_herizon/mapper/yolov8_config.yaml:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 Horizon Robotics.All Rights Reserved.
  2 | #
  3 | # The material in this file is confidential and contains trade secrets
  4 | # of Horizon Robotics Inc. This is proprietary information owned by
  5 | # Horizon Robotics Inc. No part of this work may be disclosed,
  6 | # reproduced, copied, transmitted, or used in any way for any purpose,
  7 | # without the express written permission of Horizon Robotics Inc.
  8 | 
  9 | # 模型转化相关的参数
 10 | # ------------------------------------
 11 | # model conversion related parameters
 12 | model_parameters:
 13 |   # Onnx浮点网络数据模型文件
 14 |   # -----------------------------------------------------------
 15 |   # the model file of floating-point ONNX neural network data
 16 |   onnx_model: './model/yolov8_relu_80class_ZQ.onnx'
 17 | 
 18 |   # 适用BPU架构
 19 |   # --------------------------------
 20 |   # the applicable BPU architecture
 21 |   march: "bernoulli2"
 22 | 
 23 |   # 指定模型转换过程中是否输出各层的中间结果，如果为True，则输出所有层的中间输出结果，
 24 |   # --------------------------------------------------------------------------------------
 25 |   # specifies whether or not to dump the intermediate results of all layers in conversion
 26 |   # if set to True, then the intermediate results of all layers shall be dumped
 27 |   layer_out_dump: False
 28 | 
 29 |   # 日志文件的输出控制参数，
 30 |   # debug输出模型转换的详细信息
 31 |   # info只输出关键信息
 32 |   # warn输出警告和错误级别以上的信息
 33 |   # ----------------------------------------------------------------------------------------
 34 |   # output control parameter of log file(s),
 35 |   # if set to 'debug', then details of model conversion will be dumped
 36 |   # if set to 'info', then only important imformation will be dumped
 37 |   # if set to 'warn', then information ranked higher than 'warn' and 'error' will be dumped
 38 |   log_level: 'debug'
 39 | 
 40 |   # 模型转换输出的结果的存放目录
 41 |   # -----------------------------------------------------------
 42 |   # the directory in which model conversion results are stored
 43 |   working_dir: 'model_output'
 44 | 
 45 |   # 模型转换输出的用于上板执行的模型文件的名称前缀
 46 |   # -----------------------------------------------------------------------------------------
 47 |   # model conversion generated name prefix of those model files used for dev board execution
 48 |   output_model_file_prefix: 'yolov8'
 49 | 
 50 | # 模型输入相关参数, 若输入多个节点, 则应使用';'进行分隔, 使用默认缺省设置则写None
 51 | # --------------------------------------------------------------------------
 52 | # model input related parameters,
 53 | # please use ";" to seperate when inputting multiple nodes,
 54 | # please use None for default setting
 55 | input_parameters:
 56 | 
 57 |   # (选填) 模型输入的节点名称, 此名称应与模型文件中的名称一致, 否则会报错, 不填则会使用模型文件中的节点名称
 58 |   # --------------------------------------------------------------------------------------------------------
 59 |   # (Optional) node name of model input,
 60 |   # it shall be the same as the name of model file, otherwise an error will be reported,
 61 |   # the node name of model file will be used when left blank
 62 |   input_name: ""
 63 | 
 64 |   # 网络实际执行时，输入给网络的数据格式，包括 nv12/rgb/bgr/yuv444/gray/featuremap,
 65 |   # ------------------------------------------------------------------------------------------
 66 |   # the data formats to be passed into neural network when actually performing neural network
 67 |   # available options: nv12/rgb/bgr/yuv444/gray/featuremap,
 68 |   input_type_rt: 'rgb'
 69 | 
 70 |   # 网络实际执行时输入的数据排布, 可选值为 NHWC/NCHW
 71 |   # 若input_type_rt配置为nv12，则此处参数不需要配置
 72 |   # ------------------------------------------------------------------
 73 |   # the data layout formats to be passed into neural network when actually performing neural network, available options: NHWC/NCHW
 74 |   # If input_type_rt is configured as nv12, then this parameter does not need to be configured
 75 |   input_layout_rt: 'NCHW'
 76 | 
 77 |   # 网络训练时输入的数据格式，可选的值为rgb/bgr/gray/featuremap/yuv444
 78 |   # --------------------------------------------------------------------
 79 |   # the data formats in network training
 80 |   # available options: rgb/bgr/gray/featuremap/yuv444
 81 |   input_type_train: 'rgb'
 82 | 
 83 |   # 网络训练时输入的数据排布, 可选值为 NHWC/NCHW
 84 |   # ------------------------------------------------------------------
 85 |   # the data layout in network training, available options: NHWC/NCHW
 86 |   input_layout_train: 'NCHW'
 87 | 
 88 |   # (选填) 模型网络的输入大小, 以'x'分隔, 不填则会使用模型文件中的网络输入大小，否则会覆盖模型文件中输入大小
 89 |   # -------------------------------------------------------------------------------------------
 90 |   # (Optional)the input size of model network, seperated by 'x'
 91 |   # note that the network input size of model file will be used if left blank
 92 |   # otherwise it will overwrite the input size of model file
 93 |   input_shape: ''
 94 | 
 95 |   # 网络实际执行时，输入给网络的batch_size, 默认值为1
 96 |   # ---------------------------------------------------------------------
 97 |   # the data batch_size to be passed into neural network when actually performing neural network, default value: 1
 98 |   #input_batch: 1
 99 |   
100 |   # 网络输入的预处理方法，主要有以下几种：
101 |   # no_preprocess 不做任何操作
102 |   # data_mean 减去通道均值mean_value
103 |   # data_scale 对图像像素乘以data_scale系数
104 |   # data_mean_and_scale 减去通道均值后再乘以scale系数
105 |   # -------------------------------------------------------------------------------------------
106 |   # preprocessing methods of network input, available options:
107 |   # 'no_preprocess' indicates that no preprocess will be made 
108 |   # 'data_mean' indicates that to minus the channel mean, i.e. mean_value
109 |   # 'data_scale' indicates that image pixels to multiply data_scale ratio
110 |   # 'data_mean_and_scale' indicates that to multiply scale ratio after channel mean is minused
111 |   norm_type: 'data_scale'
112 | 
113 |   # 图像减去的均值, 如果是通道均值，value之间必须用空格分隔
114 |   # --------------------------------------------------------------------------
115 |   # the mean value minused by image
116 |   # note that values must be seperated by space if channel mean value is used
117 |   mean_value: ''
118 | 
119 |   # 图像预处理缩放比例，如果是通道缩放比例，value之间必须用空格分隔
120 |   # ---------------------------------------------------------------------------
121 |   # scale value of image preprocess
122 |   # note that values must be seperated by space if channel scale value is used
123 |   scale_value: 0.003921568627451
124 | 
125 | # 模型量化相关参数
126 | # -----------------------------
127 | # model calibration parameters
128 | calibration_parameters:
129 | 
130 |   # 模型量化的参考图像的存放目录，图片格式支持Jpeg、Bmp等格式，输入的图片
131 |   # 应该是使用的典型场景，一般是从测试集中选择20~100张图片，另外输入
132 |   # 的图片要覆盖典型场景，不要是偏僻场景，如过曝光、饱和、模糊、纯黑、纯白等图片
133 |   # 若有多个输入节点, 则应使用';'进行分隔
134 |   # -------------------------------------------------------------------------------------------------
135 |   # the directory where reference images of model quantization are stored
136 |   # image formats include JPEG, BMP etc.
137 |   # should be classic application scenarios, usually 20~100 images are picked out from test datasets
138 |   # in addition, note that input images should cover typical scenarios
139 |   # and try to avoid those overexposed, oversaturated, vague, 
140 |   # pure blank or pure white images
141 |   # use ';' to seperate when there are multiple input nodes
142 |   cal_data_dir: './cal_data'
143 | 
144 |   # 如果输入的图片文件尺寸和模型训练的尺寸不一致时，并且preprocess_on为true，
145 |   # 则将采用默认预处理方法(skimage resize)，
146 |   # 将输入图片缩放或者裁减到指定尺寸，否则，需要用户提前把图片处理为训练时的尺寸
147 |   # ---------------------------------------------------------------------------------
148 |   # In case the size of input image file is different from that of in model training
149 |   # and that preprocess_on is set to True,
150 |   # shall the default preprocess method(skimage resize) be used
151 |   # i.e., to resize or crop input image into specified size
152 |   # otherwise user must keep image size as that of in training in advance
153 |   preprocess_on: True
154 | 
155 |   # 模型量化的算法类型，支持kl、max、default、load，通常采用default即可满足要求, 若为QAT导出的模型, 则应选择load
156 |   # ----------------------------------------------------------------------------------
157 |   # types of model quantization algorithms, usually default will meet the need
158 |   # available options:kl, max, default and load
159 |   # if converted model is quanti model exported from QAT , then choose `load`
160 |   calibration_type: 'default'
161 | 
162 | # 编译器相关参数
163 | # ----------------------------
164 | # compiler related parameters
165 | compiler_parameters:
166 | 
167 |   # 编译策略，支持bandwidth和latency两种优化模式;
168 |   # bandwidth以优化ddr的访问带宽为目标；
169 |   # latency以优化推理时间为目标
170 |   # -------------------------------------------------------------------------------------------
171 |   # compilation strategy, there are 2 available optimization modes: 'bandwidth' and 'lantency'
172 |   # the 'bandwidth' mode aims to optimize ddr access bandwidth
173 |   # while the 'lantency' mode aims to optimize inference duration
174 |   compile_mode: 'latency'
175 | 
176 |   # 设置debug为True将打开编译器的debug模式，能够输出性能仿真的相关信息，如帧率、DDR带宽占用等
177 |   # -----------------------------------------------------------------------------------
178 |   # the compiler's debug mode will be enabled by setting to True
179 |   # this will dump performance simulation related information
180 |   # such as: frame rate, DDR bandwidth usage etc.
181 |   debug: False
182 | 
183 |   # 编译模型指定核数，不指定默认编译单核模型, 若编译双核模型，将下边注释打开即可
184 |   # -------------------------------------------------------------------------------------
185 |   # specifies number of cores to be used in model compilation 
186 |   # as default, single core is used as this value left blank
187 |   # please delete the "# " below to enable dual-core mode when compiling dual-core model
188 |   # core_num: 2
189 | 
190 |   # 优化等级可选范围为O0~O3
191 |   # O0不做任何优化, 编译速度最快，优化程度最低,
192 |   # O1-O3随着优化等级提高，预期编译后的模型的执行速度会更快，但是所需编译时间也会变长。
193 |   # 推荐用O2做最快验证
194 |   # ----------------------------------------------------------------------------------------------------------
195 |   # optimization level ranges between O0~O3
196 |   # O0 indicates that no optimization will be made 
197 |   # the faster the compilation, the lower optimization level will be
198 |   # O1-O3: as optimization levels increase gradually, model execution, after compilation, shall become faster
199 |   # while compilation will be prolonged
200 |   # it is recommended to use O2 for fastest verification
201 |   optimize_level: 'O3'
202 | 


--------------------------------------------------------------------------------
/yolov8_onnx/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_onnx/test.jpg


--------------------------------------------------------------------------------
/yolov8_onnx/test_onnx_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_onnx/test_onnx_result.jpg


--------------------------------------------------------------------------------
/yolov8_onnx/yolov8_relu_80class_ZQ.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_onnx/yolov8_relu_80class_ZQ.onnx


--------------------------------------------------------------------------------
/yolov8_onnx/yolov8n_onnx_demo_zq.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | import argparse
  4 | import os
  5 | import sys
  6 | import os.path as osp
  7 | import cv2
  8 | import torch
  9 | import numpy as np
 10 | import onnxruntime as ort
 11 | from math import exp
 12 | 
 13 | ROOT = os.getcwd()
 14 | if str(ROOT) not in sys.path:
 15 |     sys.path.append(str(ROOT))
 16 | 
 17 | 
 18 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 19 |          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 20 |          'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 21 |          'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 22 |          'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
 23 |          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
 24 |          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
 25 |          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
 26 |          'hair drier', 'toothbrush']
 27 | 
 28 | meshgrid = []
 29 | 
 30 | class_num = len(CLASSES)
 31 | headNum = 3
 32 | strides = [8, 16, 32]
 33 | mapSize = [[80, 80], [40, 40], [20, 20]]
 34 | nmsThresh = 0.45
 35 | objectThresh = 0.5
 36 | 
 37 | input_imgH = 640
 38 | input_imgW = 640
 39 | 
 40 | 
 41 | class DetectBox:
 42 |     def __init__(self, classId, score, xmin, ymin, xmax, ymax):
 43 |         self.classId = classId
 44 |         self.score = score
 45 |         self.xmin = xmin
 46 |         self.ymin = ymin
 47 |         self.xmax = xmax
 48 |         self.ymax = ymax
 49 | 
 50 | 
 51 | def GenerateMeshgrid():
 52 |     for index in range(headNum):
 53 |         for i in range(mapSize[index][0]):
 54 |             for j in range(mapSize[index][1]):
 55 |                 meshgrid.append(j + 0.5)
 56 |                 meshgrid.append(i + 0.5)
 57 | 
 58 | 
 59 | def IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2):
 60 |     xmin = max(xmin1, xmin2)
 61 |     ymin = max(ymin1, ymin2)
 62 |     xmax = min(xmax1, xmax2)
 63 |     ymax = min(ymax1, ymax2)
 64 | 
 65 |     innerWidth = xmax - xmin
 66 |     innerHeight = ymax - ymin
 67 | 
 68 |     innerWidth = innerWidth if innerWidth > 0 else 0
 69 |     innerHeight = innerHeight if innerHeight > 0 else 0
 70 | 
 71 |     innerArea = innerWidth * innerHeight
 72 | 
 73 |     area1 = (xmax1 - xmin1) * (ymax1 - ymin1)
 74 |     area2 = (xmax2 - xmin2) * (ymax2 - ymin2)
 75 | 
 76 |     total = area1 + area2 - innerArea
 77 | 
 78 |     return innerArea / total
 79 | 
 80 | 
 81 | def NMS(detectResult):
 82 |     predBoxs = []
 83 | 
 84 |     sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True)
 85 | 
 86 |     for i in range(len(sort_detectboxs)):
 87 |         xmin1 = sort_detectboxs[i].xmin
 88 |         ymin1 = sort_detectboxs[i].ymin
 89 |         xmax1 = sort_detectboxs[i].xmax
 90 |         ymax1 = sort_detectboxs[i].ymax
 91 |         classId = sort_detectboxs[i].classId
 92 | 
 93 |         if sort_detectboxs[i].classId != -1:
 94 |             predBoxs.append(sort_detectboxs[i])
 95 |             for j in range(i + 1, len(sort_detectboxs), 1):
 96 |                 if classId == sort_detectboxs[j].classId:
 97 |                     xmin2 = sort_detectboxs[j].xmin
 98 |                     ymin2 = sort_detectboxs[j].ymin
 99 |                     xmax2 = sort_detectboxs[j].xmax
100 |                     ymax2 = sort_detectboxs[j].ymax
101 |                     iou = IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2)
102 |                     if iou > nmsThresh:
103 |                         sort_detectboxs[j].classId = -1
104 |     return predBoxs
105 | 
106 | 
107 | def sigmoid(x):
108 |     return 1 / (1 + exp(-x))
109 | 
110 | 
111 | def postprocess(out, img_h, img_w):
112 |     print('postprocess ... ')
113 | 
114 |     detectResult = []
115 |     output = []
116 |     for i in range(len(out)):
117 |         print(out[i].shape)
118 |         output.append(out[i].reshape((-1)))
119 | 
120 |     scale_h = img_h / input_imgH
121 |     scale_w = img_w / input_imgW
122 | 
123 |     gridIndex = -2
124 |     cls_index = 0
125 |     cls_max = 0
126 | 
127 |     for index in range(headNum):
128 |         reg = output[index * 2 + 0]
129 |         cls = output[index * 2 + 1]
130 | 
131 |         for h in range(mapSize[index][0]):
132 |             for w in range(mapSize[index][1]):
133 |                 gridIndex += 2
134 | 
135 |                 if 1 == class_num:
136 |                     cls_max = sigmoid(cls[0 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])
137 |                     cls_index = 0
138 |                 else:
139 |                     for cl in range(class_num):
140 |                         cls_val = cls[cl * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]
141 |                         if 0 == cl:
142 |                             cls_max = cls_val
143 |                             cls_index = cl
144 |                         else:
145 |                             if cls_val > cls_max:
146 |                                 cls_max = cls_val
147 |                                 cls_index = cl
148 |                     cls_max = sigmoid(cls_max)
149 | 
150 |                 if cls_max > objectThresh:
151 |                     regdfl = []
152 |                     for lc in range(4):
153 |                         sfsum = 0
154 |                         locval = 0
155 |                         for df in range(16):
156 |                             temp = exp(reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])
157 |                             reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] = temp
158 |                             sfsum += temp
159 | 
160 |                         for df in range(16):
161 |                             sfval = reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] / sfsum
162 |                             locval += sfval * df
163 |                         regdfl.append(locval)
164 | 
165 |                     x1 = (meshgrid[gridIndex + 0] - regdfl[0]) * strides[index]
166 |                     y1 = (meshgrid[gridIndex + 1] - regdfl[1]) * strides[index]
167 |                     x2 = (meshgrid[gridIndex + 0] + regdfl[2]) * strides[index]
168 |                     y2 = (meshgrid[gridIndex + 1] + regdfl[3]) * strides[index]
169 | 
170 |                     xmin = x1 * scale_w
171 |                     ymin = y1 * scale_h
172 |                     xmax = x2 * scale_w
173 |                     ymax = y2 * scale_h
174 | 
175 |                     xmin = xmin if xmin > 0 else 0
176 |                     ymin = ymin if ymin > 0 else 0
177 |                     xmax = xmax if xmax < img_w else img_w
178 |                     ymax = ymax if ymax < img_h else img_h
179 | 
180 |                     box = DetectBox(cls_index, cls_max, xmin, ymin, xmax, ymax)
181 |                     detectResult.append(box)
182 |     # NMS
183 |     print('detectResult:', len(detectResult))
184 |     predBox = NMS(detectResult)
185 | 
186 |     return predBox
187 | 
188 | 
189 | def precess_image(img_src, resize_w, resize_h):
190 |     image = cv2.resize(img_src, (resize_w, resize_h), interpolation=cv2.INTER_LINEAR)
191 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
192 |     image = image.astype(np.float32)
193 |     image /= 255.0
194 | 
195 |     return image
196 | 
197 | 
198 | def detect(img_path):
199 | 
200 |     orig = cv2.imread(img_path)
201 |     img_h, img_w = orig.shape[:2]
202 |     image = precess_image(orig, input_imgW, input_imgH)
203 | 
204 |     image = image.transpose((2, 0, 1))
205 |     image = np.expand_dims(image, axis=0)
206 | 
207 |     # image = np.ones((1, 3, 384, 640), dtype=np.float32)
208 |     # print(image.shape)
209 | 
210 |     ort_session = ort.InferenceSession('./yolov8_relu_80class_ZQ.onnx')
211 |     pred_results = (ort_session.run(None, {'data': image}))
212 | 
213 |     out = []
214 |     for i in range(len(pred_results)):
215 |         out.append(pred_results[i])
216 |     predbox = postprocess(out, img_h, img_w)
217 | 
218 |     print('obj num is :', len(predbox))
219 | 
220 |     for i in range(len(predbox)):
221 |         xmin = int(predbox[i].xmin)
222 |         ymin = int(predbox[i].ymin)
223 |         xmax = int(predbox[i].xmax)
224 |         ymax = int(predbox[i].ymax)
225 |         classId = predbox[i].classId
226 |         score = predbox[i].score
227 | 
228 |         cv2.rectangle(orig, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
229 |         ptext = (xmin, ymin)
230 |         title = CLASSES[classId] + "%.2f" % score
231 |         cv2.putText(orig, title, ptext, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA)
232 | 
233 |     cv2.imwrite('./test_onnx_result.jpg', orig)
234 | 
235 | 
236 | if __name__ == '__main__':
237 |     print('This is main ....')
238 |     GenerateMeshgrid()
239 |     img_path = './test.jpg'
240 |     detect(img_path)
241 | 


--------------------------------------------------------------------------------
/yolov8_rknn/data/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_rknn/data/test.jpg


--------------------------------------------------------------------------------
/yolov8_rknn/dataset.txt:
--------------------------------------------------------------------------------
1 | ./data/test.jpg
2 | 


--------------------------------------------------------------------------------
/yolov8_rknn/onnx2rknn_demo_ZQ.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import urllib
  3 | import traceback
  4 | import time
  5 | import sys
  6 | import numpy as np
  7 | import cv2
  8 | from rknn.api import RKNN
  9 | from math import exp
 10 | 
 11 | ONNX_MODEL = './yolov8_relu_80class_ZQ.onnx'
 12 | RKNN_MODEL = './yolov8_relu_80class_ZQ.rknn'
 13 | DATASET = './dataset.txt'
 14 | 
 15 | QUANTIZE_ON = True
 16 | 
 17 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 18 |          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 19 |          'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 20 |          'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 21 |          'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
 22 |          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
 23 |          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
 24 |          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
 25 |          'hair drier', 'toothbrush']
 26 | 
 27 | meshgrid = []
 28 | 
 29 | class_num = len(CLASSES)
 30 | headNum = 3
 31 | strides = [8, 16, 32]
 32 | mapSize = [[80, 80], [40, 40], [20, 20]]
 33 | nmsThresh = 0.5
 34 | objectThresh = 0.5
 35 | 
 36 | input_imgH = 640
 37 | input_imgW = 640
 38 | 
 39 | 
 40 | class DetectBox:
 41 |     def __init__(self, classId, score, xmin, ymin, xmax, ymax):
 42 |         self.classId = classId
 43 |         self.score = score
 44 |         self.xmin = xmin
 45 |         self.ymin = ymin
 46 |         self.xmax = xmax
 47 |         self.ymax = ymax
 48 | 
 49 | def GenerateMeshgrid():
 50 |     for index in range(headNum):
 51 |         for i in range(mapSize[index][0]):
 52 |             for j in range(mapSize[index][1]):
 53 |                 meshgrid.append(j + 0.5)
 54 |                 meshgrid.append(i + 0.5)
 55 | 
 56 | 
 57 | def IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2):
 58 |     xmin = max(xmin1, xmin2)
 59 |     ymin = max(ymin1, ymin2)
 60 |     xmax = min(xmax1, xmax2)
 61 |     ymax = min(ymax1, ymax2)
 62 | 
 63 |     innerWidth = xmax - xmin
 64 |     innerHeight = ymax - ymin
 65 | 
 66 |     innerWidth = innerWidth if innerWidth > 0 else 0
 67 |     innerHeight = innerHeight if innerHeight > 0 else 0
 68 | 
 69 |     innerArea = innerWidth * innerHeight
 70 | 
 71 |     area1 = (xmax1 - xmin1) * (ymax1 - ymin1)
 72 |     area2 = (xmax2 - xmin2) * (ymax2 - ymin2)
 73 | 
 74 |     total = area1 + area2 - innerArea
 75 | 
 76 |     return innerArea / total
 77 | 
 78 | 
 79 | def NMS(detectResult):
 80 |     predBoxs = []
 81 | 
 82 |     sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True)
 83 | 
 84 |     for i in range(len(sort_detectboxs)):
 85 |         xmin1 = sort_detectboxs[i].xmin
 86 |         ymin1 = sort_detectboxs[i].ymin
 87 |         xmax1 = sort_detectboxs[i].xmax
 88 |         ymax1 = sort_detectboxs[i].ymax
 89 |         classId = sort_detectboxs[i].classId
 90 | 
 91 |         if sort_detectboxs[i].classId != -1:
 92 |             predBoxs.append(sort_detectboxs[i])
 93 |             for j in range(i + 1, len(sort_detectboxs), 1):
 94 |                 if classId == sort_detectboxs[j].classId:
 95 |                     xmin2 = sort_detectboxs[j].xmin
 96 |                     ymin2 = sort_detectboxs[j].ymin
 97 |                     xmax2 = sort_detectboxs[j].xmax
 98 |                     ymax2 = sort_detectboxs[j].ymax
 99 |                     iou = IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2)
100 |                     if iou > nmsThresh:
101 |                         sort_detectboxs[j].classId = -1
102 |     return predBoxs
103 | 
104 | 
105 | def sigmoid(x):
106 |     return 1 / (1 + exp(-x))
107 | 
108 | 
109 | def postprocess(out, img_h, img_w):
110 |     print('postprocess ... ')
111 | 
112 |     detectResult = []
113 |     output = []
114 |     for i in range(len(out)):
115 |         print(out[i].shape)
116 |         output.append(out[i].reshape((-1)))
117 | 
118 |     scale_h = img_h / input_imgH
119 |     scale_w = img_w / input_imgW
120 | 
121 |     gridIndex = -2
122 |     cls_index = 0
123 |     cls_max = 0
124 | 
125 |     for index in range(headNum):
126 |         reg = output[index * 2 + 0]
127 |         cls = output[index * 2 + 1]
128 | 
129 |         for h in range(mapSize[index][0]):
130 |             for w in range(mapSize[index][1]):
131 |                 gridIndex += 2
132 | 
133 |                 if 1 == class_num:
134 |                     cls_max = sigmoid(cls[0 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])
135 |                     cls_index = 0
136 |                 else:
137 |                     for cl in range(class_num):
138 |                         cls_val = cls[cl * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]
139 |                         if 0 == cl:
140 |                             cls_max = cls_val
141 |                             cls_index = cl
142 |                         else:
143 |                             if cls_val > cls_max:
144 |                                 cls_max = cls_val
145 |                                 cls_index = cl
146 |                     cls_max = sigmoid(cls_max)
147 | 
148 |                 if cls_max > objectThresh:
149 |                     regdfl = []
150 |                     for lc in range(4):
151 |                         sfsum = 0
152 |                         locval = 0
153 |                         for df in range(16):
154 |                             temp = exp(reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])
155 |                             reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] = temp
156 |                             sfsum += temp
157 | 
158 |                         for df in range(16):
159 |                             sfval = reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] / sfsum
160 |                             locval += sfval * df
161 |                         regdfl.append(locval)
162 | 
163 |                     x1 = (meshgrid[gridIndex + 0] - regdfl[0]) * strides[index]
164 |                     y1 = (meshgrid[gridIndex + 1] - regdfl[1]) * strides[index]
165 |                     x2 = (meshgrid[gridIndex + 0] + regdfl[2]) * strides[index]
166 |                     y2 = (meshgrid[gridIndex + 1] + regdfl[3]) * strides[index]
167 | 
168 |                     xmin = x1 * scale_w
169 |                     ymin = y1 * scale_h
170 |                     xmax = x2 * scale_w
171 |                     ymax = y2 * scale_h
172 | 
173 |                     xmin = xmin if xmin > 0 else 0
174 |                     ymin = ymin if ymin > 0 else 0
175 |                     xmax = xmax if xmax < img_w else img_w
176 |                     ymax = ymax if ymax < img_h else img_h
177 | 
178 |                     box = DetectBox(cls_index, cls_max, xmin, ymin, xmax, ymax)
179 |                     detectResult.append(box)
180 |     # NMS
181 |     print('detectResult:', len(detectResult))
182 |     predBox = NMS(detectResult)
183 | 
184 |     return predBox
185 | 
186 | 
187 | def export_rknn_inference(img):
188 |     # Create RKNN object
189 |     rknn = RKNN(verbose=False)
190 | 
191 |     # pre-process config
192 |     print('--> Config model')
193 |     rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], quantized_algorithm='normal', quantized_method='channel', target_platform='rk3588')
194 |     print('done')
195 | 
196 |     # Load ONNX model
197 |     print('--> Loading model')
198 |     ret = rknn.load_onnx(model=ONNX_MODEL, outputs=['cls1', 'reg1', 'cls2', 'reg2', 'cls3', 'reg3'])
199 |     if ret != 0:
200 |         print('Load model failed!')
201 |         exit(ret)
202 |     print('done')
203 | 
204 |     # Build model
205 |     print('--> Building model')
206 |     ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET, rknn_batch_size=1)
207 |     if ret != 0:
208 |         print('Build model failed!')
209 |         exit(ret)
210 |     print('done')
211 | 
212 |     # Export RKNN model
213 |     print('--> Export rknn model')
214 |     ret = rknn.export_rknn(RKNN_MODEL)
215 |     if ret != 0:
216 |         print('Export rknn model failed!')
217 |         exit(ret)
218 |     print('done')
219 | 
220 |     # Init runtime environment
221 |     print('--> Init runtime environment')
222 |     ret = rknn.init_runtime()
223 |     # ret = rknn.init_runtime(target='rk3566')
224 |     if ret != 0:
225 |         print('Init runtime environment failed!')
226 |         exit(ret)
227 |     print('done')
228 | 
229 |     # Inference
230 |     print('--> Running model')
231 |     outputs = rknn.inference(inputs=[img])
232 |     rknn.release()
233 |     print('done')
234 | 
235 |     return outputs
236 | 
237 | 
238 | if __name__ == '__main__':
239 |     print('This is main ...')
240 |     GenerateMeshgrid()
241 | 
242 |     img_path = './test.jpg'
243 |     orig_img = cv2.imread(img_path)
244 |     img_h, img_w = orig_img.shape[:2]
245 |     
246 |     
247 |     origimg = cv2.resize(orig_img, (input_imgW, input_imgH), interpolation=cv2.INTER_LINEAR)
248 |     origimg = cv2.cvtColor(origimg, cv2.COLOR_BGR2RGB)
249 |     
250 |     img = np.expand_dims(origimg, 0)
251 | 
252 |     outputs = export_rknn_inference(img)
253 | 
254 |     out = []
255 |     for i in range(len(outputs)):
256 |         out.append(outputs[i])
257 | 
258 |     predbox = postprocess(out, img_h, img_w)
259 | 
260 |     print(len(predbox))
261 | 
262 |     for i in range(len(predbox)):
263 |         xmin = int(predbox[i].xmin)
264 |         ymin = int(predbox[i].ymin)
265 |         xmax = int(predbox[i].xmax)
266 |         ymax = int(predbox[i].ymax)
267 |         classId = predbox[i].classId
268 |         score = predbox[i].score
269 | 
270 |         cv2.rectangle(orig_img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
271 |         ptext = (xmin, ymin)
272 |         title = CLASSES[classId] + ":%.2f" % (score)
273 |         cv2.putText(orig_img, title, ptext, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA)
274 | 
275 |     cv2.imwrite('./test_rknn_result.jpg', orig_img)
276 |     # cv2.imshow("test", origimg)
277 |     # cv2.waitKey(0)
278 | 
279 | 


--------------------------------------------------------------------------------
/yolov8_rknn/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_rknn/test.jpg


--------------------------------------------------------------------------------
/yolov8_rknn/test_rknn_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_rknn/test_rknn_result.jpg


--------------------------------------------------------------------------------
/yolov8_rknn/yolov8_relu_80class_ZQ.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_rknn/yolov8_relu_80class_ZQ.onnx


--------------------------------------------------------------------------------
/yolov8_rknn/yolov8_relu_80class_ZQ.rknn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_rknn/yolov8_relu_80class_ZQ.rknn


--------------------------------------------------------------------------------
/yolov8_tensorrt/onnx2trt_rt7.py:
--------------------------------------------------------------------------------
 1 | import tensorrt as trt
 2 | 
 3 | G_LOGGER = trt.Logger()
 4 | 
 5 | batch_size = 1
 6 | imput_h = 640
 7 | imput_w = 640
 8 | 
 9 | 
10 | def get_engine(onnx_model_name, trt_model_name):
11 |     explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
12 |     with trt.Builder(G_LOGGER) as builder, builder.create_network(explicit_batch) as network, trt.OnnxParser(network,
13 |                                                                                                              G_LOGGER) as parser:
14 |         builder.max_batch_size = batch_size
15 |         builder.max_workspace_size = 2 << 30
16 |         print('Loading ONNX file from path {}...'.format(onnx_model_name))
17 |         with open(onnx_model_name, 'rb') as model:
18 |             print('Beginning ONNX file parsing')
19 |             if not parser.parse(model.read()):
20 |                 for error in range(parser.num_errors):
21 |                     print(parser.get_error(error))
22 | 
23 |         print('Completed parsing of ONNX file')
24 |         print('Building an engine from file {}; this may take a while...'.format(onnx_model_name))
25 | 
26 |         ####
27 |         # builder.int8_mode = True
28 |         # builder.int8_calibrator = calib
29 |         builder.fp16_mode = True
30 |         ####
31 | 
32 |         print("num layers:", network.num_layers)
33 |         # last_layer = network.get_layer(network.num_layers - 1)
34 |         # if not last_layer.get_output(0):
35 |         # network.mark_output(network.get_layer(network.num_layers - 1).get_output(0))//有的模型需要，有的模型在转onnx的之后已经指定了，就不需要这行
36 | 
37 |         network.get_input(0).shape = [batch_size, 3, imput_h, imput_w]
38 |         engine = builder.build_cuda_engine(network)
39 |         print("engine:", engine)
40 |         print("Completed creating Engine")
41 |         with open(trt_model_name, "wb") as f:
42 |             f.write(engine.serialize())
43 |         return engine
44 | 
45 | 
46 | def main():
47 |     onnx_file_path = './yolov8_relu_80class_ZQ.onnx'
48 |     engine_file_path = './yolov8_relu_80class_ZQ.trt'
49 | 
50 |     engine = get_engine(onnx_file_path, engine_file_path)
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     print("This is main ...")
55 |     main()
56 | 


--------------------------------------------------------------------------------
/yolov8_tensorrt/tensorRT_inferenc_demo.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import tensorrt as trt
  4 | import pycuda.driver as cuda
  5 | import pycuda.autoinit
  6 | from math import exp
  7 | from math import sqrt
  8 | import time
  9 | 
 10 | TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
 11 | 
 12 | 
 13 | CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
 14 |          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 15 |          'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
 16 |          'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
 17 |          'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
 18 |          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
 19 |          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
 20 |          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
 21 |          'hair drier', 'toothbrush']
 22 | 
 23 | meshgrid = []
 24 | 
 25 | class_num = len(CLASSES)
 26 | headNum = 3
 27 | strides = [8, 16, 32]
 28 | mapSize = [[80, 80], [40, 40], [20, 20]]
 29 | nmsThresh = 0.45
 30 | objectThresh = 0.35
 31 | 
 32 | input_imgH = 640
 33 | input_imgW = 640
 34 | 
 35 | # Simple helper data class that's a little nicer to use than a 2-tuple.
 36 | class HostDeviceMem(object):
 37 |     def __init__(self, host_mem, device_mem):
 38 |         self.host = host_mem
 39 |         self.device = device_mem
 40 | 
 41 |     def __str__(self):
 42 |         return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
 43 | 
 44 |     def __repr__(self):
 45 |         return self.__str__()
 46 | 
 47 | 
 48 | def allocate_buffers(engine):
 49 |     inputs = []
 50 |     outputs = []
 51 |     bindings = []
 52 |     stream = cuda.Stream()
 53 |     for binding in engine:
 54 |         size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
 55 |         dtype = trt.nptype(engine.get_binding_dtype(binding))
 56 |         # Allocate host and device buffers
 57 |         host_mem = cuda.pagelocked_empty(size, dtype)
 58 |         device_mem = cuda.mem_alloc(host_mem.nbytes)
 59 |         # Append the device buffer to device bindings.
 60 |         bindings.append(int(device_mem))
 61 |         # Append to the appropriate list.
 62 |         if engine.binding_is_input(binding):
 63 |             inputs.append(HostDeviceMem(host_mem, device_mem))
 64 |         else:
 65 |             outputs.append(HostDeviceMem(host_mem, device_mem))
 66 |     return inputs, outputs, bindings, stream
 67 | 
 68 | 
 69 | def get_engine_from_bin(engine_file_path):
 70 |     print('Reading engine from file {}'.format(engine_file_path))
 71 |     with open(engine_file_path, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
 72 |         return runtime.deserialize_cuda_engine(f.read())
 73 | 
 74 | 
 75 | # This function is generalized for multiple inputs/outputs.
 76 | # inputs and outputs are expected to be lists of HostDeviceMem objects.
 77 | def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
 78 |     # Transfer input data to the GPU.
 79 |     [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
 80 |     # Run inference.
 81 |     context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
 82 |     # Transfer predictions back from the GPU.
 83 |     [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
 84 |     # Synchronize the stream
 85 |     stream.synchronize()
 86 |     # Return only the host outputs.
 87 |     return [out.host for out in outputs]
 88 | 
 89 | 
 90 | class DetectBox:
 91 |     def __init__(self, classId, score, xmin, ymin, xmax, ymax):
 92 |         self.classId = classId
 93 |         self.score = score
 94 |         self.xmin = xmin
 95 |         self.ymin = ymin
 96 |         self.xmax = xmax
 97 |         self.ymax = ymax
 98 | 
 99 | 
100 | def GenerateMeshgrid():
101 |     for index in range(headNum):
102 |         for i in range(mapSize[index][0]):
103 |             for j in range(mapSize[index][1]):
104 |                 meshgrid.append(j + 0.5)
105 |                 meshgrid.append(i + 0.5)
106 | 
107 | 
108 | def IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2):
109 |     xmin = max(xmin1, xmin2)
110 |     ymin = max(ymin1, ymin2)
111 |     xmax = min(xmax1, xmax2)
112 |     ymax = min(ymax1, ymax2)
113 | 
114 |     innerWidth = xmax - xmin
115 |     innerHeight = ymax - ymin
116 | 
117 |     innerWidth = innerWidth if innerWidth > 0 else 0
118 |     innerHeight = innerHeight if innerHeight > 0 else 0
119 | 
120 |     innerArea = innerWidth * innerHeight
121 | 
122 |     area1 = (xmax1 - xmin1) * (ymax1 - ymin1)
123 |     area2 = (xmax2 - xmin2) * (ymax2 - ymin2)
124 | 
125 |     total = area1 + area2 - innerArea
126 | 
127 |     return innerArea / total
128 | 
129 | 
130 | def NMS(detectResult):
131 |     predBoxs = []
132 | 
133 |     sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True)
134 | 
135 |     for i in range(len(sort_detectboxs)):
136 |         xmin1 = sort_detectboxs[i].xmin
137 |         ymin1 = sort_detectboxs[i].ymin
138 |         xmax1 = sort_detectboxs[i].xmax
139 |         ymax1 = sort_detectboxs[i].ymax
140 |         classId = sort_detectboxs[i].classId
141 | 
142 |         if sort_detectboxs[i].classId != -1:
143 |             predBoxs.append(sort_detectboxs[i])
144 |             for j in range(i + 1, len(sort_detectboxs), 1):
145 |                 # if classId == sort_detectboxs[j].classId:
146 |                     xmin2 = sort_detectboxs[j].xmin
147 |                     ymin2 = sort_detectboxs[j].ymin
148 |                     xmax2 = sort_detectboxs[j].xmax
149 |                     ymax2 = sort_detectboxs[j].ymax
150 |                     iou = IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2)
151 |                     if iou > nmsThresh:
152 |                         sort_detectboxs[j].classId = -1
153 |     return predBoxs
154 | 
155 | def sigmoid(x):
156 |     return 1 / (1 + exp(-x))
157 | 
158 | 
159 | def postprocess(out, img_h, img_w):
160 |     print('postprocess ... ')
161 | 
162 |     detectResult = []
163 |     output = []
164 |     for i in range(len(out)):
165 |         print(out[i].shape)
166 |         output.append(out[i].reshape((-1)))
167 | 
168 |     scale_h = img_h / input_imgH
169 |     scale_w = img_w / input_imgW
170 | 
171 |     gridIndex = -2
172 |     cls_index = 0
173 |     cls_max = 0
174 | 
175 |     for index in range(headNum):
176 |         reg = output[index * 2 + 0]
177 |         cls = output[index * 2 + 1]
178 | 
179 |         for h in range(mapSize[index][0]):
180 |             for w in range(mapSize[index][1]):
181 |                 gridIndex += 2
182 | 
183 |                 if 1 == class_num:
184 |                     cls_max = sigmoid(cls[0 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])
185 |                     cls_index = 0
186 |                 else:
187 |                     for cl in range(class_num):
188 |                         cls_val = cls[cl * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]
189 |                         if 0 == cl:
190 |                             cls_max = cls_val
191 |                             cls_index = cl
192 |                         else:
193 |                             if cls_val > cls_max:
194 |                                 cls_max = cls_val
195 |                                 cls_index = cl
196 |                     cls_max = sigmoid(cls_max)
197 | 
198 |                 if cls_max > objectThresh:
199 |                     regdfl = []
200 |                     for lc in range(4):
201 |                         sfsum = 0
202 |                         locval = 0
203 |                         for df in range(16):
204 |                             temp = exp(reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])
205 |                             reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] = temp
206 |                             sfsum += temp
207 | 
208 |                         for df in range(16):
209 |                             sfval = reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] / sfsum
210 |                             locval += sfval * df
211 |                         regdfl.append(locval)
212 | 
213 |                     x1 = (meshgrid[gridIndex + 0] - regdfl[0]) * strides[index]
214 |                     y1 = (meshgrid[gridIndex + 1] - regdfl[1]) * strides[index]
215 |                     x2 = (meshgrid[gridIndex + 0] + regdfl[2]) * strides[index]
216 |                     y2 = (meshgrid[gridIndex + 1] + regdfl[3]) * strides[index]
217 | 
218 |                     xmin = x1 * scale_w
219 |                     ymin = y1 * scale_h
220 |                     xmax = x2 * scale_w
221 |                     ymax = y2 * scale_h
222 | 
223 |                     xmin = xmin if xmin > 0 else 0
224 |                     ymin = ymin if ymin > 0 else 0
225 |                     xmax = xmax if xmax < img_w else img_w
226 |                     ymax = ymax if ymax < img_h else img_h
227 | 
228 |                     box = DetectBox(cls_index, cls_max, xmin, ymin, xmax, ymax)
229 |                     detectResult.append(box)
230 |     # NMS
231 |     print('detectResult:', len(detectResult))
232 |     predBox = NMS(detectResult)
233 | 
234 |     return predBox
235 | 
236 | 
237 | def preprocess(src):
238 |     img = cv2.resize(src, (input_imgW, input_imgH)).astype(np.float32)
239 |     img = img * 0.00392156
240 |     img = img.transpose(2, 0, 1)
241 |     img_input = img.copy()
242 |     return img_input
243 | 
244 | 
245 | def main():
246 |     engine_file_path = 'yolov8_relu_80class_ZQ.trt'
247 |     input_image_path = 'test.jpg'
248 | 
249 |     orig_image = cv2.imread(input_image_path)
250 |     orig = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
251 |     img_h, img_w = orig.shape[:2]
252 |     image = preprocess(orig)
253 | 
254 |     with get_engine_from_bin(engine_file_path) as engine, engine.create_execution_context() as context:
255 |         inputs, outputs, bindings, stream = allocate_buffers(engine)
256 | 
257 |         inputs[0].host = image
258 |         t1  = time.time()
259 |         trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=1)
260 |         t2 = time.time()
261 |         print('run tiems time:', (t2 - t1))
262 | 
263 |         print('outputs heads num: ', len(trt_outputs))
264 | 
265 |         out = []
266 |         for i in range(len(trt_outputs)):
267 |             out.append(trt_outputs[i])
268 | 
269 |         predbox = postprocess(out, img_h, img_w)
270 | 
271 |         print(len(predbox))
272 | 
273 |         for i in range(len(predbox)):
274 |             xmin = int(predbox[i].xmin)
275 |             ymin = int(predbox[i].ymin)
276 |             xmax = int(predbox[i].xmax)
277 |             ymax = int(predbox[i].ymax)
278 |             classId = predbox[i].classId
279 |             score = predbox[i].score
280 | 
281 |             cv2.rectangle(orig_image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
282 |             ptext = (xmin, ymin)
283 |             title = CLASSES[classId] + "%.2f" % score
284 |             cv2.putText(orig_image, title, ptext, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA)
285 | 
286 |         cv2.imwrite('./test_result_tensorRT.jpg', orig_image)
287 |         # cv2.imshow("test", orig_image)
288 |         # cv2.waitKey(0)
289 | 
290 | 
291 | if __name__ == '__main__':
292 |     print('This is main ...')
293 |     GenerateMeshgrid()
294 |     main()
295 | 


--------------------------------------------------------------------------------
/yolov8_tensorrt/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_tensorrt/test.jpg


--------------------------------------------------------------------------------
/yolov8_tensorrt/test_result_tensorRT.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_tensorrt/test_result_tensorRT.jpg


--------------------------------------------------------------------------------
/yolov8_tensorrt/yolov8_relu_80class_ZQ.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_tensorrt/yolov8_relu_80class_ZQ.onnx


--------------------------------------------------------------------------------
/yolov8_tensorrt/yolov8_relu_80class_ZQ.trt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cqu20160901/yolov8n_onnx_tensorRT_rknn_horizon_dfl/f9ee9a8d34c9bb30efbe2bd433a0663d3cb9c41c/yolov8_tensorrt/yolov8_relu_80class_ZQ.trt


--------------------------------------------------------------------------------