├── README.md ├── desk.jpg ├── imagenet_21k_class_names.txt ├── main.cpp └── main.py /README.md: -------------------------------------------------------------------------------- 1 | # Detic-onnxrun-cpp-py 2 | 使用ONNXRuntime部署Detic检测2万1千种类别的物体，包含C++和Python两个版本的程序 3 | 4 | 训练源码在https://github.com/facebookresearch/Detic 5 | 6 | onnx文件在百度云盘，下载 7 | 链接：https://pan.baidu.com/s/1VXOvlSy7RTTwbsItipI2sw?pwd=c1n2 8 | 提取码：c1n2 9 | -------------------------------------------------------------------------------- /desk.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/Detic-onnxrun-cpp-py/f7c7606aee010e0912e99cfce152d501d1a7c889/desk.jpg -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #define _CRT_SECURE_NO_WARNINGS 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | //#include ///nvidia-cuda加速 8 | #include 9 | 10 | using namespace cv; 11 | using namespace std; 12 | using namespace Ort; 13 | 14 | typedef struct BoxInfo 15 | { 16 | int xmin; 17 | int ymin; 18 | int xmax; 19 | int ymax; 20 | float score; 21 | string name; 22 | } BoxInfo; 23 | 24 | class Detic 25 | { 26 | public: 27 | Detic(string modelpath); 28 | vector detect(Mat cv_image); 29 | private: 30 | void preprocess(Mat srcimg); 31 | vector input_image_; 32 | int inpWidth; 33 | int inpHeight; 34 | vector class_names; 35 | const int max_size = 800; 36 | 37 | //存储初始化获得的可执行网络 38 | Env env = Env(ORT_LOGGING_LEVEL_ERROR, "Detic"); 39 | Ort::Session *ort_session = nullptr; 40 | SessionOptions sessionOptions = SessionOptions(); 41 | vector input_names; 42 | vector output_names; 43 | vector> input_node_dims; // >=1 outputs 44 | vector> output_node_dims; // >=1 outputs 45 | }; 46 | 47 | Detic::Detic(string model_path) 48 | { 49 | //OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0); ///nvidia-cuda加速 50 | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); 51 | std::wstring widestr = std::wstring(model_path.begin(), model_path.end()); ///如果在windows系统就这么写 52 | ort_session = new Session(env, widestr.c_str(), sessionOptions); ///如果在windows系统就这么写 53 | ///ort_session = new Session(env, model_path.c_str(), sessionOptions); ///如果在linux系统，就这么写 54 | 55 | size_t numInputNodes = ort_session->GetInputCount(); 56 | size_t numOutputNodes = ort_session->GetOutputCount(); 57 | AllocatorWithDefaultOptions allocator; 58 | for (int i = 0; i < numInputNodes; i++) 59 | { 60 | input_names.push_back(ort_session->GetInputName(i, allocator)); 61 | Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i); 62 | auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo(); 63 | auto input_dims = input_tensor_info.GetShape(); 64 | input_node_dims.push_back(input_dims); 65 | } 66 | for (int i = 0; i < numOutputNodes; i++) 67 | { 68 | output_names.push_back(ort_session->GetOutputName(i, allocator)); 69 | Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i); 70 | auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo(); 71 | auto output_dims = output_tensor_info.GetShape(); 72 | output_node_dims.push_back(output_dims); 73 | } 74 | 75 | ifstream ifs("imagenet_21k_class_names.txt"); 76 | string line; 77 | while (getline(ifs, line)) 78 | { 79 | this->class_names.push_back(line); ///你可以用随机数给每个类别分配RGB值 80 | } 81 | } 82 | 83 | void Detic::preprocess(Mat srcimg) 84 | { 85 | Mat dstimg; 86 | cvtColor(srcimg, dstimg, COLOR_BGR2RGB); 87 | int im_h = srcimg.rows; 88 | int im_w = srcimg.cols; 89 | float oh, ow, scale; 90 | if (im_h < im_w) 91 | { 92 | scale = (float)max_size / (float)im_h; 93 | oh = max_size; 94 | ow = scale * (float)im_w; 95 | } 96 | else 97 | { 98 | scale = (float)max_size / (float)im_h; 99 | oh = scale * (float)im_h; 100 | ow = max_size; 101 | } 102 | float max_hw = std::max(oh, ow); 103 | if (max_hw > max_size) 104 | { 105 | scale = (float)max_size / max_hw; 106 | oh *= scale; 107 | ow *= scale; 108 | } 109 | 110 | resize(dstimg, dstimg, Size(int(ow + 0.5), int(oh + 0.5)), INTER_LINEAR); 111 | this->inpHeight = dstimg.rows; 112 | this->inpWidth = dstimg.cols; 113 | this->input_image_.resize(this->inpWidth * this->inpHeight * dstimg.channels()); 114 | int k = 0; 115 | for (int c = 0; c < 3; c++) 116 | { 117 | for (int i = 0; i < this->inpHeight; i++) 118 | { 119 | for (int j = 0; j < this->inpWidth; j++) 120 | { 121 | float pix = dstimg.ptr(i)[j * 3 + c]; 122 | this->input_image_[k] = pix; 123 | k++; 124 | } 125 | } 126 | } 127 | } 128 | 129 | vector Detic::detect(Mat srcimg) 130 | { 131 | int im_h = srcimg.rows; 132 | int im_w = srcimg.cols; 133 | this->preprocess(srcimg); 134 | array input_shape_{ 1, 3, this->inpHeight, this->inpWidth }; 135 | 136 | auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 137 | Value input_tensor_ = Value::CreateTensor(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size()); 138 | 139 | // 开始推理 140 | vector ort_outputs = ort_session->Run(RunOptions{ nullptr }, &input_names[0], &input_tensor_, 1, output_names.data(), output_names.size()); 141 | 142 | const float *pred_boxes = ort_outputs[0].GetTensorMutableData(); 143 | const float *scores = ort_outputs[1].GetTensorMutableData(); 144 | const int *pred_classes = ort_outputs[2].GetTensorMutableData(); 145 | //const float *pred_masks = ort_outputs[3].GetTensorMutableData(); 146 | 147 | int num_box = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape()[0]; 148 | const float scale_x = float(im_w) / float(inpWidth); 149 | const float scale_y = float(im_h) / float(inpHeight); 150 | vector preds; 151 | for (int i = 0; i < num_box; i++) 152 | { 153 | float xmin = pred_boxes[i * 4] * scale_x; 154 | float ymin = pred_boxes[i * 4 + 1] * scale_y; 155 | float xmax = pred_boxes[i * 4 + 2] * scale_x; 156 | float ymax = pred_boxes[i * 4 + 3] * scale_y; 157 | xmin = std::min(std::max(xmin, 0.f), float(im_w)); 158 | ymin = std::min(std::max(ymin, 0.f), float(im_h)); 159 | xmax = std::min(std::max(xmax, 0.f), float(im_w)); 160 | ymax = std::min(std::max(ymax, 0.f), float(im_h)); 161 | 162 | const float threshold = 0; 163 | const float width = xmax - xmin; 164 | const float height = ymax - ymin; 165 | if (width > threshold && height > threshold) 166 | { 167 | preds.push_back({ int(xmin), int(ymin), int(xmax), int(ymax), scores[i], class_names[pred_classes[i]] }); 168 | } 169 | } 170 | return preds; 171 | } 172 | 173 | int main() 174 | { 175 | Detic mynet("weights/Detic_C2_R50_640_4x_in21k.onnx"); 176 | string imgpath = "desk.jpg"; 177 | Mat srcimg = imread(imgpath); 178 | vector preds = mynet.detect(srcimg); 179 | for (size_t i = 0; i < preds.size(); ++i) 180 | { 181 | rectangle(srcimg, Point(preds[i].xmin, preds[i].ymin), Point(preds[i].xmax, preds[i].ymax), Scalar(0, 0, 255), 2); 182 | string label = format("%.2f", preds[i].score); 183 | label = preds[i].name + " :" + label; 184 | putText(srcimg, label, Point(preds[i].xmin, preds[i].ymin - 5), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 1); 185 | } 186 | 187 | //imwrite("result.jpg", srcimg); 188 | static const string kWinName = "Deep learning object detection in ONNXRuntime"; 189 | namedWindow(kWinName, WINDOW_NORMAL); 190 | imshow(kWinName, srcimg); 191 | waitKey(0); 192 | destroyAllWindows(); 193 | } 194 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import numpy as np 4 | import onnxruntime as ort 5 | 6 | 7 | class Detic(): 8 | def __init__(self, modelpath, detection_width=800, confThreshold=0.8): 9 | # net = cv2.dnn.readNet(modelpath) 10 | so = ort.SessionOptions() 11 | so.log_severity_level = 3 12 | self.session = ort.InferenceSession(modelpath, so) 13 | model_inputs = self.session.get_inputs() 14 | self.input_name = model_inputs[0].name 15 | self.max_size = detection_width 16 | self.confThreshold = confThreshold 17 | self.class_names = list(map(lambda x: x.strip(), open('imagenet_21k_class_names.txt').readlines())) 18 | self.assigned_colors = np.random.randint(0,high=256, size=(len(self.class_names), 3)).tolist() 19 | 20 | def preprocess(self, srcimg): 21 | im_h, im_w, _ = srcimg.shape 22 | dstimg = cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB) 23 | if im_h < im_w: 24 | scale = self.max_size / im_h 25 | oh, ow = self.max_size, scale * im_w 26 | else: 27 | scale = self.max_size / im_w 28 | oh, ow = scale * im_h, self.max_size 29 | 30 | max_hw = max(oh, ow) 31 | if max_hw > self.max_size: 32 | scale = self.max_size / max_hw 33 | oh *= scale 34 | ow *= scale 35 | ow = int(ow + 0.5) 36 | oh = int(oh + 0.5) 37 | dstimg = cv2.resize(dstimg, (ow, oh)) 38 | return dstimg 39 | 40 | def post_processing(self, pred_boxes, scores, pred_classes, pred_masks, im_hw, pred_hw): 41 | scale_x, scale_y = (im_hw[1] / pred_hw[1], im_hw[0] / pred_hw[0]) 42 | 43 | pred_boxes[:, 0::2] *= scale_x 44 | pred_boxes[:, 1::2] *= scale_y 45 | pred_boxes[:, [0, 2]] = np.clip(pred_boxes[:, [0, 2]], 0, im_hw[1]) 46 | pred_boxes[:, [1, 3]] = np.clip(pred_boxes[:, [1, 3]], 0, im_hw[0]) 47 | 48 | threshold = 0 49 | widths = pred_boxes[:, 2] - pred_boxes[:, 0] 50 | heights = pred_boxes[:, 3] - pred_boxes[:, 1] 51 | keep = (widths > threshold) & (heights > threshold) 52 | 53 | pred_boxes = pred_boxes[keep] 54 | scores = scores[keep] 55 | pred_classes = pred_classes[keep] 56 | pred_masks = pred_masks[keep] 57 | 58 | # mask_threshold = 0.5 59 | # pred_masks = paste_masks_in_image( 60 | # pred_masks[:, 0, :, :], pred_boxes, 61 | # (im_hw[0], im_hw[1]), mask_threshold 62 | # ) 63 | 64 | pred = { 65 | 'pred_boxes': pred_boxes, 66 | 'scores': scores, 67 | 'pred_classes': pred_classes, 68 | 'pred_masks': pred_masks, 69 | } 70 | return pred 71 | 72 | def draw_predictions(self, img, predictions): 73 | height, width = img.shape[:2] 74 | default_font_size = int(max(np.sqrt(height * width) // 90, 10)) 75 | boxes = predictions["pred_boxes"].astype(np.int64) 76 | scores = predictions["scores"] 77 | classes_id = predictions["pred_classes"].tolist() 78 | # masks = predictions["pred_masks"].astype(np.uint8) 79 | num_instances = len(boxes) 80 | print('detect', num_instances, 'instances') 81 | for i in range(num_instances): 82 | x0, y0, x1, y1 = boxes[i] 83 | color = self.assigned_colors[classes_id[i]] 84 | cv2.rectangle(img, (x0, y0), (x1, y1), color=color,thickness=default_font_size // 4) 85 | text = "{} {:.0f}%".format(self.class_names[classes_id[i]], round(scores[i],2) * 100) 86 | cv2.putText(img, text, (x0, y0 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness=1, lineType=cv2.LINE_AA) 87 | return img 88 | 89 | def detect(self, srcimg): 90 | im_h, im_w = srcimg.shape[:2] 91 | dstimg = self.preprocess(srcimg) 92 | pred_hw = dstimg.shape[:2] 93 | input_image = np.expand_dims(dstimg.transpose(2, 0, 1), axis=0).astype(np.float32) 94 | 95 | # Inference 96 | pred_boxes, scores, pred_classes, pred_masks = self.session.run(None, {self.input_name: input_image}) 97 | preds = self.post_processing(pred_boxes, scores, pred_classes, pred_masks, (im_h, im_w), pred_hw) 98 | return preds 99 | 100 | 101 | if __name__ == '__main__': 102 | parser = argparse.ArgumentParser() 103 | parser.add_argument("--imgpath", type=str, default='desk.jpg', help="image path") 104 | parser.add_argument("--confThreshold", default=0.5, type=float, help='class confidence') 105 | parser.add_argument("--modelpath", type=str, default='weights/Detic_C2_R50_640_4x_in21k.onnx', help="onnxmodel path") 106 | args = parser.parse_args() 107 | 108 | mynet = Detic(args.modelpath, confThreshold=args.confThreshold) 109 | srcimg = cv2.imread(args.imgpath) 110 | preds = mynet.detect(srcimg) 111 | srcimg = mynet.draw_predictions(srcimg, preds) 112 | 113 | # cv2.imwrite('result.jpg', srcimg) 114 | winName = 'Deep learning Detic in ONNXRuntime' 115 | cv2.namedWindow(winName, cv2.WINDOW_NORMAL) 116 | cv2.imshow(winName, srcimg) 117 | cv2.waitKey(0) 118 | cv2.destroyAllWindows() 119 | --------------------------------------------------------------------------------