├── AIDetector_pytorch.py ├── BaseDetector.py ├── LICENSE ├── README.md ├── deep_sort ├── configs │ └── deep_sort.yaml ├── deep_sort │ ├── README.md │ ├── __init__.py │ ├── deep │ │ ├── __init__.py │ │ ├── checkpoint │ │ │ └── .gitkeep │ │ ├── evaluate.py │ │ ├── feature_extractor.py │ │ ├── model.py │ │ ├── original_model.py │ │ ├── test.py │ │ ├── train.jpg │ │ └── train.py │ ├── deep_sort.py │ └── sort │ │ ├── __init__.py │ │ ├── detection.py │ │ ├── iou_matching.py │ │ ├── kalman_filter.py │ │ ├── linear_assignment.py │ │ ├── nn_matching.py │ │ ├── preprocessing.py │ │ ├── track.py │ │ └── tracker.py └── utils │ ├── __init__.py │ ├── asserts.py │ ├── draw.py │ ├── evaluation.py │ ├── io.py │ ├── json_logger.py │ ├── log.py │ ├── parser.py │ └── tools.py ├── demo.py ├── demo ├── MegEngine │ ├── cpp │ │ ├── README.md │ │ ├── build.sh │ │ └── yolox.cpp │ └── python │ │ ├── README.md │ │ ├── build.py │ │ ├── coco_classes.py │ │ ├── convert_weights.py │ │ ├── demo.py │ │ ├── dump.py │ │ ├── models │ │ ├── __init__.py │ │ ├── darknet.py │ │ ├── network_blocks.py │ │ ├── yolo_fpn.py │ │ ├── yolo_head.py │ │ ├── yolo_pafpn.py │ │ └── yolox.py │ │ ├── process.py │ │ └── visualize.py ├── ONNXRuntime │ ├── README.md │ └── onnx_inference.py ├── OpenVINO │ ├── README.md │ ├── cpp │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ └── yolox_openvino.cpp │ └── python │ │ ├── README.md │ │ └── openvino_inference.py ├── TensorRT │ ├── cpp │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── logging.h │ │ └── yolox.cpp │ └── python │ │ └── README.md └── ncnn │ ├── android │ ├── README.md │ ├── app │ │ ├── build.gradle │ │ └── src │ │ │ └── main │ │ │ ├── AndroidManifest.xml │ │ │ ├── assets │ │ │ └── yolox.param │ │ │ ├── java │ │ │ └── com │ │ │ │ └── megvii │ │ │ │ └── yoloXncnn │ │ │ │ ├── MainActivity.java │ │ │ │ └── yoloXncnn.java │ │ │ ├── jni │ │ │ ├── CMakeLists.txt │ │ │ └── yoloXncnn_jni.cpp │ │ │ └── res │ │ │ ├── layout │ │ │ └── main.xml │ │ │ └── values │ │ │ └── strings.xml │ ├── build.gradle │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ └── settings.gradle │ └── cpp │ ├── README.md │ └── yolox.cpp ├── exps ├── default │ ├── nano.py │ ├── yolov3.py │ ├── yolox_l.py │ ├── yolox_m.py │ ├── yolox_s.py │ ├── yolox_tiny.py │ └── yolox_x.py └── example │ ├── custom │ ├── nano.py │ └── yolox_s.py │ └── yolox_voc │ └── yolox_voc_s.py ├── requirements.txt ├── tools ├── demo.py ├── eval.py ├── export_onnx.py ├── train.py └── trt.py ├── tracker.py └── yolox ├── __init__.py ├── core ├── __init__.py ├── launch.py └── trainer.py ├── data ├── __init__.py ├── data_augment.py ├── data_prefetcher.py ├── dataloading.py ├── datasets │ ├── __init__.py │ ├── coco.py │ ├── coco_classes.py │ ├── datasets_wrapper.py │ ├── mosaicdetection.py │ ├── voc.py │ └── voc_classes.py └── samplers.py ├── evaluators ├── __init__.py ├── coco_evaluator.py ├── voc_eval.py └── voc_evaluator.py ├── exp ├── __init__.py ├── base_exp.py ├── build.py └── yolox_base.py ├── layers ├── __init__.py ├── csrc │ ├── cocoeval │ │ ├── cocoeval.cpp │ │ └── cocoeval.h │ └── vision.cpp └── fast_coco_eval_api.py ├── models ├── __init__.py ├── darknet.py ├── losses.py ├── network_blocks.py ├── yolo_fpn.py ├── yolo_head.py ├── yolo_pafpn.py └── yolox.py └── utils ├── __init__.py ├── allreduce_norm.py ├── boxes.py ├── checkpoint.py ├── demo_utils.py ├── dist.py ├── ema.py ├── logger.py ├── lr_scheduler.py ├── metric.py ├── model_utils.py ├── setup_env.py └── visualize.py /AIDetector_pytorch.py: -------------------------------------------------------------------------------- 1 | from yolox.utils.boxes import postprocess 2 | from yolox.data.data_augment import preproc 3 | import torch 4 | import torch.nn as nn 5 | import numpy as np 6 | from BaseDetector import baseDet 7 | import os 8 | from yolox.utils import fuse_model 9 | from yolox.data.datasets import COCO_CLASSES 10 | 11 | 12 | def select_device(device='', batch_size=None): 13 | # device = 'cpu' or '0' or '0,1,2,3' 14 | cpu = device.lower() == 'cpu' 15 | if cpu: 16 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False 17 | elif device: # non-cpu device requested 18 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 19 | # check availability 20 | assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' 21 | 22 | cuda = not cpu and torch.cuda.is_available() 23 | if cuda: 24 | devices = device.split(',') if device else range(torch.cuda.device_count()) # i.e. 0,1,6,7 25 | n = len(devices) # device count 26 | if n > 1 and batch_size: # check batch_size is divisible by device_count 27 | assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' 28 | 29 | return torch.device('cuda:0' if cuda else 'cpu') 30 | 31 | 32 | class Detector(baseDet): 33 | 34 | def __init__(self): 35 | super(Detector, self).__init__() 36 | 37 | self.build_config() 38 | self.mdepth = 0.33 39 | self.mwidth = 0.50 40 | self.confthre=0.01 41 | self.nmsthre=0.65 42 | self.test_size=(640, 640) 43 | self.rgb_means = (0.485, 0.456, 0.406) 44 | self.std = (0.229, 0.224, 0.225) 45 | self.init_model() 46 | 47 | def init_model(self): 48 | 49 | from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead 50 | 51 | def init_yolo(M): 52 | for m in M.modules(): 53 | if isinstance(m, nn.BatchNorm2d): 54 | m.eps = 1e-3 55 | m.momentum = 0.03 56 | 57 | if getattr(self, "model", None) is None: 58 | in_channels = [256, 512, 1024] 59 | backbone = YOLOPAFPN(self.mdepth, self.mwidth, in_channels=in_channels) 60 | head = YOLOXHead(80, self.mwidth, in_channels=in_channels) 61 | model = YOLOX(backbone, head) 62 | 63 | model.apply(init_yolo) 64 | model.head.initialize_biases(1e-2) 65 | self.weights = 'weights/yolox_s.pth' 66 | self.device = '0' if torch.cuda.is_available() else 'cpu' 67 | self.device = select_device(self.device) 68 | ckpt = torch.load(self.weights) 69 | # load the model state dict 70 | model.load_state_dict(ckpt["model"]) 71 | model.to(self.device).eval() 72 | model = fuse_model(model) 73 | self.m = model 74 | 75 | self.names = COCO_CLASSES 76 | self.num_classes = len(self.names) 77 | 78 | def preprocess(self, img): 79 | 80 | img_info = {"id": 0} 81 | img_info["file_name"] = None 82 | height, width = img.shape[:2] 83 | img_info["height"] = height 84 | img_info["width"] = width 85 | img_info["raw_img"] = img 86 | 87 | img, ratio = preproc(img, self.test_size, self.rgb_means, self.std) 88 | img_info["ratio"] = ratio 89 | img = torch.from_numpy(img).unsqueeze(0) 90 | if torch.cuda.is_available(): 91 | img = img.cuda() 92 | 93 | return img_info, img 94 | 95 | def detect(self, im): 96 | 97 | img_info, img = self.preprocess(im) 98 | 99 | outputs = self.m(img) 100 | outputs = postprocess( 101 | outputs, self.num_classes, self.confthre, self.nmsthre 102 | )[0] 103 | pred_boxes = [] 104 | ratio = img_info["ratio"] 105 | img = img_info["raw_img"] 106 | 107 | boxes = outputs[:, 0:4] 108 | 109 | # preprocessing: resize 110 | boxes /= ratio 111 | 112 | cls_ids = outputs[:, 6] 113 | scores = outputs[:, 4] * outputs[:, 5] 114 | 115 | for i in range(len(boxes)): 116 | box = boxes[i].cpu() 117 | lbl = self.names[int(cls_ids[i])] 118 | conf = scores[i] 119 | if conf < self.confthre: 120 | continue 121 | x1 = int(box[0]) 122 | y1 = int(box[1]) 123 | x2 = int(box[2]) 124 | y2 = int(box[3]) 125 | pred_boxes.append( 126 | (x1, y1, x2, y2, lbl, conf)) 127 | 128 | return im, pred_boxes 129 | 130 | 131 | if __name__ == '__main__': 132 | 133 | det = Detector() 134 | -------------------------------------------------------------------------------- /BaseDetector.py: -------------------------------------------------------------------------------- 1 | from tracker import update_tracker 2 | import cv2 3 | 4 | 5 | class baseDet(object): 6 | 7 | def __init__(self): 8 | 9 | self.stride = 1 10 | 11 | def build_config(self): 12 | 13 | self.faceTracker = {} 14 | self.faceClasses = {} 15 | self.faceLocation1 = {} 16 | self.faceLocation2 = {} 17 | self.frameCounter = 0 18 | self.currentCarID = 0 19 | self.recorded = [] 20 | 21 | self.font = cv2.FONT_HERSHEY_SIMPLEX 22 | 23 | def feedCap(self, im): 24 | 25 | retDict = { 26 | 'frame': None, 27 | 'faces': None, 28 | 'list_of_ids': None, 29 | 'face_bboxes': [] 30 | } 31 | self.frameCounter += 1 32 | 33 | im, faces, face_bboxes = update_tracker(self, im) 34 | 35 | retDict['frame'] = im 36 | retDict['faces'] = faces 37 | retDict['face_bboxes'] = face_bboxes 38 | 39 | return retDict 40 | 41 | def init_model(self): 42 | raise EOFError("Undefined model type.") 43 | 44 | def preprocess(self): 45 | raise EOFError("Undefined model type.") 46 | 47 | def detect(self): 48 | raise EOFError("Undefined model type.") 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 项目简介: 2 | 使用YOLOX+Deepsort实现车辆行人追踪和计数,代码封装成一个Detector类,更容易嵌入到自己的项目中。 3 | 4 | 代码地址(欢迎star): 5 | 6 | [https://github.com/Sharpiless/yolox-deepsort/](https://github.com/Sharpiless/yolox-deepsort/) 7 | 8 | 最终效果: 9 | 10 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/7768e8e4cf0a4bbf97bb10ab56ea028c.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDkzNjg4OQ==,size_16,color_FFFFFF,t_70) 11 | 12 | # 运行demo: 13 | 14 | ```bash 15 | python demo.py 16 | ``` 17 | 18 | # 下载预训练模型: 19 | 20 | |Model |size |mAPtest
0.5:0.95 | Speed V100
(ms) | Params
(M) |FLOPs
(G)| weights | 21 | | ------ |:---: | :---: |:---: |:---: | :---: | :----: | 22 | |[YOLOX-s](./exps/default/yolox_s.py) |640 |39.6 |9.8 |9.0 | 26.8 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EW62gmO2vnNNs5npxjzunVwB9p307qqygaCkXdTO88BLUg?e=NMTQYw)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s.pth) | 23 | |[YOLOX-m](./exps/default/yolox_m.py) |640 |46.4 |12.3 |25.3 |73.8| [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ERMTP7VFqrVBrXKMU7Vl4TcBQs0SUeCT7kvc-JdIbej4tQ?e=1MDo9y)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_m.pth) | 24 | |[YOLOX-l](./exps/default/yolox_l.py) |640 |50.0 |14.5 |54.2| 155.6 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EWA8w_IEOzBKvuueBqfaZh0BeoG5sVzR-XYbOJO4YlOkRw?e=wHWOBE)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_l.pth) | 25 | |[YOLOX-x](./exps/default/yolox_x.py) |640 |**51.2** | 17.3 |99.1 |281.9 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EdgVPHBziOVBtGAXHfeHI5kBza0q9yyueMGdT0wXZfI1rQ?e=tABO5u)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_x.pth) | 26 | |[YOLOX-Darknet53](./exps/default/yolov3.py) |640 | 47.4 | 11.1 |63.7 | 185.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZ-MV1r_fMFPkPrNjvbJEMoBLOLAnXH-XKEB77w8LhXL6Q?e=mf6wOc)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_darknet53.pth) | 27 | 28 | 下载 yolox_s.pth 放到 weights 文件夹下 29 | 30 | 下载 [https://github.com/Sharpiless/Yolov5-Deepsort/blob/main/deep_sort/deep_sort/deep/checkpoint/ckpt.t7](https://github.com/Sharpiless/Yolov5-Deepsort/blob/main/deep_sort/deep_sort/deep/checkpoint/ckpt.t7) 放到 deep_sort/deep_sort/deep/checkpoint 文件夹下 31 | 32 | # 训练自己的模型: 33 | 34 | 35 | 训练好后放到 weights 文件夹下 36 | 37 | # 调用接口: 38 | 39 | ## 创建检测器: 40 | 41 | ```python 42 | from AIDetector_pytorch import Detector 43 | 44 | det = Detector() 45 | ``` 46 | 47 | ## 调用检测接口: 48 | 49 | ```python 50 | result = det.feedCap(im) 51 | ``` 52 | 53 | 其中 im 为 BGR 图像 54 | 55 | 返回的 result 是字典,result['frame'] 返回可视化后的图像 56 | 57 | # 联系作者: 58 | 59 | > B站:[https://space.bilibili.com/470550823](https://space.bilibili.com/470550823) 60 | 61 | > CSDN:[https://blog.csdn.net/weixin_44936889](https://blog.csdn.net/weixin_44936889) 62 | 63 | > AI Studio:[https://aistudio.baidu.com/aistudio/personalcenter/thirdview/67156](https://aistudio.baidu.com/aistudio/personalcenter/thirdview/67156) 64 | 65 | > Github:[https://github.com/Sharpiless](https://github.com/Sharpiless) 66 | 67 | -------------------------------------------------------------------------------- /deep_sort/configs/deep_sort.yaml: -------------------------------------------------------------------------------- 1 | DEEPSORT: 2 | REID_CKPT: "deep_sort/deep_sort/deep/checkpoint/ckpt.t7" 3 | MAX_DIST: 0.2 4 | MIN_CONFIDENCE: 0.3 5 | NMS_MAX_OVERLAP: 0.5 6 | MAX_IOU_DISTANCE: 0.7 7 | MAX_AGE: 70 8 | N_INIT: 3 9 | NN_BUDGET: 100 10 | 11 | -------------------------------------------------------------------------------- /deep_sort/deep_sort/README.md: -------------------------------------------------------------------------------- 1 | # Deep Sort 2 | 3 | This is the implemention of deep sort with pytorch. -------------------------------------------------------------------------------- /deep_sort/deep_sort/__init__.py: -------------------------------------------------------------------------------- 1 | from .deep_sort import DeepSort 2 | 3 | 4 | __all__ = ['DeepSort', 'build_tracker'] 5 | 6 | 7 | def build_tracker(cfg, use_cuda): 8 | return DeepSort(cfg.DEEPSORT.REID_CKPT, 9 | max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 10 | nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 11 | max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda) 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /deep_sort/deep_sort/deep/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sharpiless/yolox-deepsort/4bb35dd50b2112026bdda34078e65de22afb5a8b/deep_sort/deep_sort/deep/__init__.py -------------------------------------------------------------------------------- /deep_sort/deep_sort/deep/checkpoint/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sharpiless/yolox-deepsort/4bb35dd50b2112026bdda34078e65de22afb5a8b/deep_sort/deep_sort/deep/checkpoint/.gitkeep -------------------------------------------------------------------------------- /deep_sort/deep_sort/deep/evaluate.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | features = torch.load("features.pth") 4 | qf = features["qf"] 5 | ql = features["ql"] 6 | gf = features["gf"] 7 | gl = features["gl"] 8 | 9 | scores = qf.mm(gf.t()) 10 | res = scores.topk(5, dim=1)[1][:,0] 11 | top1correct = gl[res].eq(ql).sum().item() 12 | 13 | print("Acc top1:{:.3f}".format(top1correct/ql.size(0))) 14 | 15 | 16 | -------------------------------------------------------------------------------- /deep_sort/deep_sort/deep/feature_extractor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as transforms 3 | import numpy as np 4 | import cv2 5 | import logging 6 | 7 | from .model import Net 8 | 9 | class Extractor(object): 10 | def __init__(self, model_path, use_cuda=True): 11 | self.net = Net(reid=True) 12 | self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" 13 | state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)['net_dict'] 14 | self.net.load_state_dict(state_dict) 15 | logger = logging.getLogger("root.tracker") 16 | logger.info("Loading weights from {}... Done!".format(model_path)) 17 | self.net.to(self.device) 18 | self.size = (64, 128) 19 | self.norm = transforms.Compose([ 20 | transforms.ToTensor(), 21 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 22 | ]) 23 | 24 | 25 | 26 | def _preprocess(self, im_crops): 27 | """ 28 | TODO: 29 | 1. to float with scale from 0 to 1 30 | 2. resize to (64, 128) as Market1501 dataset did 31 | 3. concatenate to a numpy array 32 | 3. to torch Tensor 33 | 4. normalize 34 | """ 35 | def _resize(im, size): 36 | return cv2.resize(im.astype(np.float32)/255., size) 37 | 38 | im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float() 39 | return im_batch 40 | 41 | 42 | def __call__(self, im_crops): 43 | im_batch = self._preprocess(im_crops) 44 | with torch.no_grad(): 45 | im_batch = im_batch.to(self.device) 46 | features = self.net(im_batch) 47 | return features.cpu().numpy() 48 | 49 | 50 | if __name__ == '__main__': 51 | img = cv2.imread("demo.jpg")[:,:,(2,1,0)] 52 | extr = Extractor("checkpoint/ckpt.t7") 53 | feature = extr(img) 54 | print(feature.shape) 55 | 56 | -------------------------------------------------------------------------------- /deep_sort/deep_sort/deep/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class BasicBlock(nn.Module): 6 | def __init__(self, c_in, c_out,is_downsample=False): 7 | super(BasicBlock,self).__init__() 8 | self.is_downsample = is_downsample 9 | if is_downsample: 10 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False) 11 | else: 12 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False) 13 | self.bn1 = nn.BatchNorm2d(c_out) 14 | self.relu = nn.ReLU(True) 15 | self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False) 16 | self.bn2 = nn.BatchNorm2d(c_out) 17 | if is_downsample: 18 | self.downsample = nn.Sequential( 19 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), 20 | nn.BatchNorm2d(c_out) 21 | ) 22 | elif c_in != c_out: 23 | self.downsample = nn.Sequential( 24 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), 25 | nn.BatchNorm2d(c_out) 26 | ) 27 | self.is_downsample = True 28 | 29 | def forward(self,x): 30 | y = self.conv1(x) 31 | y = self.bn1(y) 32 | y = self.relu(y) 33 | y = self.conv2(y) 34 | y = self.bn2(y) 35 | if self.is_downsample: 36 | x = self.downsample(x) 37 | return F.relu(x.add(y),True) 38 | 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False): 40 | blocks = [] 41 | for i in range(repeat_times): 42 | if i ==0: 43 | blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),] 44 | else: 45 | blocks += [BasicBlock(c_out,c_out),] 46 | return nn.Sequential(*blocks) 47 | 48 | class Net(nn.Module): 49 | def __init__(self, num_classes=751 ,reid=False): 50 | super(Net,self).__init__() 51 | # 3 128 64 52 | self.conv = nn.Sequential( 53 | nn.Conv2d(3,64,3,stride=1,padding=1), 54 | nn.BatchNorm2d(64), 55 | nn.ReLU(inplace=True), 56 | # nn.Conv2d(32,32,3,stride=1,padding=1), 57 | # nn.BatchNorm2d(32), 58 | # nn.ReLU(inplace=True), 59 | nn.MaxPool2d(3,2,padding=1), 60 | ) 61 | # 32 64 32 62 | self.layer1 = make_layers(64,64,2,False) 63 | # 32 64 32 64 | self.layer2 = make_layers(64,128,2,True) 65 | # 64 32 16 66 | self.layer3 = make_layers(128,256,2,True) 67 | # 128 16 8 68 | self.layer4 = make_layers(256,512,2,True) 69 | # 256 8 4 70 | self.avgpool = nn.AvgPool2d((8,4),1) 71 | # 256 1 1 72 | self.reid = reid 73 | self.classifier = nn.Sequential( 74 | nn.Linear(512, 256), 75 | nn.BatchNorm1d(256), 76 | nn.ReLU(inplace=True), 77 | nn.Dropout(), 78 | nn.Linear(256, num_classes), 79 | ) 80 | 81 | def forward(self, x): 82 | x = self.conv(x) 83 | x = self.layer1(x) 84 | x = self.layer2(x) 85 | x = self.layer3(x) 86 | x = self.layer4(x) 87 | x = self.avgpool(x) 88 | x = x.view(x.size(0),-1) 89 | # B x 128 90 | if self.reid: 91 | x = x.div(x.norm(p=2,dim=1,keepdim=True)) 92 | return x 93 | # classifier 94 | x = self.classifier(x) 95 | return x 96 | 97 | 98 | if __name__ == '__main__': 99 | net = Net() 100 | x = torch.randn(4,3,128,64) 101 | y = net(x) 102 | import ipdb; ipdb.set_trace() 103 | 104 | 105 | -------------------------------------------------------------------------------- /deep_sort/deep_sort/deep/original_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class BasicBlock(nn.Module): 6 | def __init__(self, c_in, c_out,is_downsample=False): 7 | super(BasicBlock,self).__init__() 8 | self.is_downsample = is_downsample 9 | if is_downsample: 10 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False) 11 | else: 12 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False) 13 | self.bn1 = nn.BatchNorm2d(c_out) 14 | self.relu = nn.ReLU(True) 15 | self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False) 16 | self.bn2 = nn.BatchNorm2d(c_out) 17 | if is_downsample: 18 | self.downsample = nn.Sequential( 19 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), 20 | nn.BatchNorm2d(c_out) 21 | ) 22 | elif c_in != c_out: 23 | self.downsample = nn.Sequential( 24 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), 25 | nn.BatchNorm2d(c_out) 26 | ) 27 | self.is_downsample = True 28 | 29 | def forward(self,x): 30 | y = self.conv1(x) 31 | y = self.bn1(y) 32 | y = self.relu(y) 33 | y = self.conv2(y) 34 | y = self.bn2(y) 35 | if self.is_downsample: 36 | x = self.downsample(x) 37 | return F.relu(x.add(y),True) 38 | 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False): 40 | blocks = [] 41 | for i in range(repeat_times): 42 | if i ==0: 43 | blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),] 44 | else: 45 | blocks += [BasicBlock(c_out,c_out),] 46 | return nn.Sequential(*blocks) 47 | 48 | class Net(nn.Module): 49 | def __init__(self, num_classes=625 ,reid=False): 50 | super(Net,self).__init__() 51 | # 3 128 64 52 | self.conv = nn.Sequential( 53 | nn.Conv2d(3,32,3,stride=1,padding=1), 54 | nn.BatchNorm2d(32), 55 | nn.ELU(inplace=True), 56 | nn.Conv2d(32,32,3,stride=1,padding=1), 57 | nn.BatchNorm2d(32), 58 | nn.ELU(inplace=True), 59 | nn.MaxPool2d(3,2,padding=1), 60 | ) 61 | # 32 64 32 62 | self.layer1 = make_layers(32,32,2,False) 63 | # 32 64 32 64 | self.layer2 = make_layers(32,64,2,True) 65 | # 64 32 16 66 | self.layer3 = make_layers(64,128,2,True) 67 | # 128 16 8 68 | self.dense = nn.Sequential( 69 | nn.Dropout(p=0.6), 70 | nn.Linear(128*16*8, 128), 71 | nn.BatchNorm1d(128), 72 | nn.ELU(inplace=True) 73 | ) 74 | # 256 1 1 75 | self.reid = reid 76 | self.batch_norm = nn.BatchNorm1d(128) 77 | self.classifier = nn.Sequential( 78 | nn.Linear(128, num_classes), 79 | ) 80 | 81 | def forward(self, x): 82 | x = self.conv(x) 83 | x = self.layer1(x) 84 | x = self.layer2(x) 85 | x = self.layer3(x) 86 | 87 | x = x.view(x.size(0),-1) 88 | if self.reid: 89 | x = self.dense[0](x) 90 | x = self.dense[1](x) 91 | x = x.div(x.norm(p=2,dim=1,keepdim=True)) 92 | return x 93 | x = self.dense(x) 94 | # B x 128 95 | # classifier 96 | x = self.classifier(x) 97 | return x 98 | 99 | 100 | if __name__ == '__main__': 101 | net = Net(reid=True) 102 | x = torch.randn(4,3,128,64) 103 | y = net(x) 104 | import ipdb; ipdb.set_trace() 105 | 106 | 107 | -------------------------------------------------------------------------------- /deep_sort/deep_sort/deep/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.backends.cudnn as cudnn 3 | import torchvision 4 | 5 | import argparse 6 | import os 7 | 8 | from model import Net 9 | 10 | parser = argparse.ArgumentParser(description="Train on market1501") 11 | parser.add_argument("--data-dir",default='data',type=str) 12 | parser.add_argument("--no-cuda",action="store_true") 13 | parser.add_argument("--gpu-id",default=0,type=int) 14 | args = parser.parse_args() 15 | 16 | # device 17 | device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" 18 | if torch.cuda.is_available() and not args.no_cuda: 19 | cudnn.benchmark = True 20 | 21 | # data loader 22 | root = args.data_dir 23 | query_dir = os.path.join(root,"query") 24 | gallery_dir = os.path.join(root,"gallery") 25 | transform = torchvision.transforms.Compose([ 26 | torchvision.transforms.Resize((128,64)), 27 | torchvision.transforms.ToTensor(), 28 | torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 29 | ]) 30 | queryloader = torch.utils.data.DataLoader( 31 | torchvision.datasets.ImageFolder(query_dir, transform=transform), 32 | batch_size=64, shuffle=False 33 | ) 34 | galleryloader = torch.utils.data.DataLoader( 35 | torchvision.datasets.ImageFolder(gallery_dir, transform=transform), 36 | batch_size=64, shuffle=False 37 | ) 38 | 39 | # net definition 40 | net = Net(reid=True) 41 | assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" 42 | print('Loading from checkpoint/ckpt.t7') 43 | checkpoint = torch.load("./checkpoint/ckpt.t7") 44 | net_dict = checkpoint['net_dict'] 45 | net.load_state_dict(net_dict, strict=False) 46 | net.eval() 47 | net.to(device) 48 | 49 | # compute features 50 | query_features = torch.tensor([]).float() 51 | query_labels = torch.tensor([]).long() 52 | gallery_features = torch.tensor([]).float() 53 | gallery_labels = torch.tensor([]).long() 54 | 55 | with torch.no_grad(): 56 | for idx,(inputs,labels) in enumerate(queryloader): 57 | inputs = inputs.to(device) 58 | features = net(inputs).cpu() 59 | query_features = torch.cat((query_features, features), dim=0) 60 | query_labels = torch.cat((query_labels, labels)) 61 | 62 | for idx,(inputs,labels) in enumerate(galleryloader): 63 | inputs = inputs.to(device) 64 | features = net(inputs).cpu() 65 | gallery_features = torch.cat((gallery_features, features), dim=0) 66 | gallery_labels = torch.cat((gallery_labels, labels)) 67 | 68 | gallery_labels -= 2 69 | 70 | # save features 71 | features = { 72 | "qf": query_features, 73 | "ql": query_labels, 74 | "gf": gallery_features, 75 | "gl": gallery_labels 76 | } 77 | torch.save(features,"features.pth") -------------------------------------------------------------------------------- /deep_sort/deep_sort/deep/train.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sharpiless/yolox-deepsort/4bb35dd50b2112026bdda34078e65de22afb5a8b/deep_sort/deep_sort/deep/train.jpg -------------------------------------------------------------------------------- /deep_sort/deep_sort/deep_sort.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .deep.feature_extractor import Extractor 5 | from .sort.nn_matching import NearestNeighborDistanceMetric 6 | from .sort.preprocessing import non_max_suppression 7 | from .sort.detection import Detection 8 | from .sort.tracker import Tracker 9 | 10 | 11 | __all__ = ['DeepSort'] 12 | 13 | 14 | class DeepSort(object): 15 | def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True): 16 | self.min_confidence = min_confidence 17 | self.nms_max_overlap = nms_max_overlap 18 | 19 | self.extractor = Extractor(model_path, use_cuda=use_cuda) 20 | 21 | max_cosine_distance = max_dist 22 | nn_budget = 100 23 | metric = NearestNeighborDistanceMetric( 24 | "cosine", max_cosine_distance, nn_budget) 25 | self.tracker = Tracker( 26 | metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init) 27 | 28 | def update(self, bbox_xywh, confidences, clss, ori_img): 29 | self.height, self.width = ori_img.shape[:2] 30 | # generate detections 31 | features = self._get_features(bbox_xywh, ori_img) 32 | bbox_tlwh = self._xywh_to_tlwh(bbox_xywh) 33 | detections = [Detection(bbox_tlwh[i], clss[i], conf, features[i]) for i, conf in enumerate( 34 | confidences) if conf > self.min_confidence] 35 | # update tracker 36 | self.tracker.predict() 37 | self.tracker.update(detections) 38 | 39 | # output bbox identities 40 | outputs = [] 41 | for track in self.tracker.tracks: 42 | if not track.is_confirmed() or track.time_since_update > 1: 43 | continue 44 | box = track.to_tlwh() 45 | x1, y1, x2, y2 = self._tlwh_to_xyxy(box) 46 | outputs.append((x1, y1, x2, y2, track.cls_, track.track_id)) 47 | return outputs 48 | 49 | @staticmethod 50 | def _xywh_to_tlwh(bbox_xywh): 51 | if isinstance(bbox_xywh, np.ndarray): 52 | bbox_tlwh = bbox_xywh.copy() 53 | elif isinstance(bbox_xywh, torch.Tensor): 54 | bbox_tlwh = bbox_xywh.clone() 55 | bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2]/2. 56 | bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3]/2. 57 | return bbox_tlwh 58 | 59 | def _xywh_to_xyxy(self, bbox_xywh): 60 | x, y, w, h = bbox_xywh 61 | x1 = max(int(x-w/2), 0) 62 | x2 = min(int(x+w/2), self.width-1) 63 | y1 = max(int(y-h/2), 0) 64 | y2 = min(int(y+h/2), self.height-1) 65 | return x1, y1, x2, y2 66 | 67 | def _tlwh_to_xyxy(self, bbox_tlwh): 68 | """ 69 | TODO: 70 | Convert bbox from xtl_ytl_w_h to xc_yc_w_h 71 | Thanks JieChen91@github.com for reporting this bug! 72 | """ 73 | x, y, w, h = bbox_tlwh 74 | x1 = max(int(x), 0) 75 | x2 = min(int(x+w), self.width-1) 76 | y1 = max(int(y), 0) 77 | y2 = min(int(y+h), self.height-1) 78 | return x1, y1, x2, y2 79 | 80 | def _xyxy_to_tlwh(self, bbox_xyxy): 81 | x1, y1, x2, y2 = bbox_xyxy 82 | 83 | t = x1 84 | l = y1 85 | w = int(x2-x1) 86 | h = int(y2-y1) 87 | return t, l, w, h 88 | 89 | def _get_features(self, bbox_xywh, ori_img): 90 | im_crops = [] 91 | for box in bbox_xywh: 92 | x1, y1, x2, y2 = self._xywh_to_xyxy(box) 93 | im = ori_img[y1:y2, x1:x2] 94 | im_crops.append(im) 95 | if im_crops: 96 | features = self.extractor(im_crops) 97 | else: 98 | features = np.array([]) 99 | return features 100 | -------------------------------------------------------------------------------- /deep_sort/deep_sort/sort/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sharpiless/yolox-deepsort/4bb35dd50b2112026bdda34078e65de22afb5a8b/deep_sort/deep_sort/sort/__init__.py -------------------------------------------------------------------------------- /deep_sort/deep_sort/sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | 7 | def __init__(self, tlwh, cls_, confidence, feature): 8 | self.tlwh = np.asarray(tlwh, dtype=np.float) 9 | self.cls_ = cls_ 10 | self.confidence = float(confidence) 11 | self.feature = np.asarray(feature, dtype=np.float32) 12 | 13 | def to_tlbr(self): 14 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 15 | `(top left, bottom right)`. 16 | """ 17 | ret = self.tlwh.copy() 18 | ret[2:] += ret[:2] 19 | return ret 20 | 21 | def to_xyah(self): 22 | """Convert bounding box to format `(center x, center y, aspect ratio, 23 | height)`, where the aspect ratio is `width / height`. 24 | """ 25 | ret = self.tlwh.copy() 26 | ret[:2] += ret[2:] / 2 27 | ret[2] /= ret[3] 28 | return ret 29 | -------------------------------------------------------------------------------- /deep_sort/deep_sort/sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import linear_assignment 5 | 6 | 7 | def iou(bbox, candidates): 8 | """Computer intersection over union. 9 | 10 | Parameters 11 | ---------- 12 | bbox : ndarray 13 | A bounding box in format `(top left x, top left y, width, height)`. 14 | candidates : ndarray 15 | A matrix of candidate bounding boxes (one per row) in the same format 16 | as `bbox`. 17 | 18 | Returns 19 | ------- 20 | ndarray 21 | The intersection over union in [0, 1] between the `bbox` and each 22 | candidate. A higher score means a larger fraction of the `bbox` is 23 | occluded by the candidate. 24 | 25 | """ 26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 27 | candidates_tl = candidates[:, :2] 28 | candidates_br = candidates[:, :2] + candidates[:, 2:] 29 | 30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 34 | wh = np.maximum(0., br - tl) 35 | 36 | area_intersection = wh.prod(axis=1) 37 | area_bbox = bbox[2:].prod() 38 | area_candidates = candidates[:, 2:].prod(axis=1) 39 | return area_intersection / (area_bbox + area_candidates - area_intersection) 40 | 41 | 42 | def iou_cost(tracks, detections, track_indices=None, 43 | detection_indices=None): 44 | """An intersection over union distance metric. 45 | 46 | Parameters 47 | ---------- 48 | tracks : List[deep_sort.track.Track] 49 | A list of tracks. 50 | detections : List[deep_sort.detection.Detection] 51 | A list of detections. 52 | track_indices : Optional[List[int]] 53 | A list of indices to tracks that should be matched. Defaults to 54 | all `tracks`. 55 | detection_indices : Optional[List[int]] 56 | A list of indices to detections that should be matched. Defaults 57 | to all `detections`. 58 | 59 | Returns 60 | ------- 61 | ndarray 62 | Returns a cost matrix of shape 63 | len(track_indices), len(detection_indices) where entry (i, j) is 64 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 65 | 66 | """ 67 | if track_indices is None: 68 | track_indices = np.arange(len(tracks)) 69 | if detection_indices is None: 70 | detection_indices = np.arange(len(detections)) 71 | 72 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 73 | for row, track_idx in enumerate(track_indices): 74 | if tracks[track_idx].time_since_update > 1: 75 | cost_matrix[row, :] = linear_assignment.INFTY_COST 76 | continue 77 | 78 | bbox = tracks[track_idx].to_tlwh() 79 | candidates = np.asarray([detections[i].tlwh for i in detection_indices]) 80 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 81 | return cost_matrix 82 | -------------------------------------------------------------------------------- /deep_sort/deep_sort/sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> scores = [d.confidence for d in detections] 19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 20 | >>> detections = [detections[i] for i in indices] 21 | 22 | Parameters 23 | ---------- 24 | boxes : ndarray 25 | Array of ROIs (x, y, width, height). 26 | max_bbox_overlap : float 27 | ROIs that overlap more than this values are suppressed. 28 | scores : Optional[array_like] 29 | Detector confidence score. 30 | 31 | Returns 32 | ------- 33 | List[int] 34 | Returns indices of detections that have survived non-maxima suppression. 35 | 36 | """ 37 | if len(boxes) == 0: 38 | return [] 39 | 40 | boxes = boxes.astype(np.float) 41 | pick = [] 42 | 43 | x1 = boxes[:, 0] 44 | y1 = boxes[:, 1] 45 | x2 = boxes[:, 2] + boxes[:, 0] 46 | y2 = boxes[:, 3] + boxes[:, 1] 47 | 48 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | if scores is not None: 50 | idxs = np.argsort(scores) 51 | else: 52 | idxs = np.argsort(y2) 53 | 54 | while len(idxs) > 0: 55 | last = len(idxs) - 1 56 | i = idxs[last] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 60 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 61 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 62 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 63 | 64 | w = np.maximum(0, xx2 - xx1 + 1) 65 | h = np.maximum(0, yy2 - yy1 + 1) 66 | 67 | overlap = (w * h) / area[idxs[:last]] 68 | 69 | idxs = np.delete( 70 | idxs, np.concatenate( 71 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 72 | 73 | return pick 74 | -------------------------------------------------------------------------------- /deep_sort/deep_sort/sort/track.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | 3 | 4 | class TrackState: 5 | """ 6 | Enumeration type for the single target track state. Newly created tracks are 7 | classified as `tentative` until enough evidence has been collected. Then, 8 | the track state is changed to `confirmed`. Tracks that are no longer alive 9 | are classified as `deleted` to mark them for removal from the set of active 10 | tracks. 11 | 12 | """ 13 | 14 | Tentative = 1 15 | Confirmed = 2 16 | Deleted = 3 17 | 18 | 19 | class Track: 20 | """ 21 | A single target track with state space `(x, y, a, h)` and associated 22 | velocities, where `(x, y)` is the center of the bounding box, `a` is the 23 | aspect ratio and `h` is the height. 24 | 25 | Parameters 26 | ---------- 27 | mean : ndarray 28 | Mean vector of the initial state distribution. 29 | covariance : ndarray 30 | Covariance matrix of the initial state distribution. 31 | track_id : int 32 | A unique track identifier. 33 | n_init : int 34 | Number of consecutive detections before the track is confirmed. The 35 | track state is set to `Deleted` if a miss occurs within the first 36 | `n_init` frames. 37 | max_age : int 38 | The maximum number of consecutive misses before the track state is 39 | set to `Deleted`. 40 | feature : Optional[ndarray] 41 | Feature vector of the detection this track originates from. If not None, 42 | this feature is added to the `features` cache. 43 | 44 | Attributes 45 | ---------- 46 | mean : ndarray 47 | Mean vector of the initial state distribution. 48 | covariance : ndarray 49 | Covariance matrix of the initial state distribution. 50 | track_id : int 51 | A unique track identifier. 52 | hits : int 53 | Total number of measurement updates. 54 | age : int 55 | Total number of frames since first occurance. 56 | time_since_update : int 57 | Total number of frames since last measurement update. 58 | state : TrackState 59 | The current track state. 60 | features : List[ndarray] 61 | A cache of features. On each measurement update, the associated feature 62 | vector is added to this list. 63 | 64 | """ 65 | 66 | def __init__(self, mean, cls_, covariance, track_id, n_init, max_age, 67 | feature=None): 68 | self.mean = mean 69 | self.cls_ = cls_ 70 | self.covariance = covariance 71 | self.track_id = track_id 72 | self.hits = 1 73 | self.age = 1 74 | self.time_since_update = 0 75 | 76 | self.state = TrackState.Tentative 77 | self.features = [] 78 | if feature is not None: 79 | self.features.append(feature) 80 | 81 | self._n_init = n_init 82 | self._max_age = max_age 83 | 84 | def to_tlwh(self): 85 | """Get current position in bounding box format `(top left x, top left y, 86 | width, height)`. 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | The bounding box. 92 | 93 | """ 94 | ret = self.mean[:4].copy() 95 | ret[2] *= ret[3] 96 | ret[:2] -= ret[2:] / 2 97 | return ret 98 | 99 | def to_tlbr(self): 100 | """Get current position in bounding box format `(min x, miny, max x, 101 | max y)`. 102 | 103 | Returns 104 | ------- 105 | ndarray 106 | The bounding box. 107 | 108 | """ 109 | ret = self.to_tlwh() 110 | ret[2:] = ret[:2] + ret[2:] 111 | return ret 112 | 113 | def predict(self, kf): 114 | """Propagate the state distribution to the current time step using a 115 | Kalman filter prediction step. 116 | 117 | Parameters 118 | ---------- 119 | kf : kalman_filter.KalmanFilter 120 | The Kalman filter. 121 | 122 | """ 123 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 124 | self.age += 1 125 | self.time_since_update += 1 126 | 127 | def update(self, kf, detection): 128 | """Perform Kalman filter measurement update step and update the feature 129 | cache. 130 | 131 | Parameters 132 | ---------- 133 | kf : kalman_filter.KalmanFilter 134 | The Kalman filter. 135 | detection : Detection 136 | The associated detection. 137 | 138 | """ 139 | self.mean, self.covariance = kf.update( 140 | self.mean, self.covariance, detection.to_xyah()) 141 | self.features.append(detection.feature) 142 | self.cls_ = detection.cls_ 143 | 144 | self.hits += 1 145 | self.time_since_update = 0 146 | if self.state == TrackState.Tentative and self.hits >= self._n_init: 147 | self.state = TrackState.Confirmed 148 | 149 | def mark_missed(self): 150 | """Mark this track as missed (no association at the current time step). 151 | """ 152 | if self.state == TrackState.Tentative: 153 | self.state = TrackState.Deleted 154 | elif self.time_since_update > self._max_age: 155 | self.state = TrackState.Deleted 156 | 157 | def is_tentative(self): 158 | """Returns True if this track is tentative (unconfirmed). 159 | """ 160 | return self.state == TrackState.Tentative 161 | 162 | def is_confirmed(self): 163 | """Returns True if this track is confirmed.""" 164 | return self.state == TrackState.Confirmed 165 | 166 | def is_deleted(self): 167 | """Returns True if this track is dead and should be deleted.""" 168 | return self.state == TrackState.Deleted 169 | -------------------------------------------------------------------------------- /deep_sort/deep_sort/sort/tracker.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import kalman_filter 5 | from . import linear_assignment 6 | from . import iou_matching 7 | from .track import Track 8 | 9 | 10 | class Tracker: 11 | 12 | def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3): 13 | self.metric = metric 14 | self.max_iou_distance = max_iou_distance 15 | self.max_age = max_age 16 | self.n_init = n_init 17 | 18 | self.kf = kalman_filter.KalmanFilter() 19 | self.tracks = [] 20 | self._next_id = 1 21 | 22 | def predict(self): 23 | """Propagate track state distributions one time step forward. 24 | 25 | This function should be called once every time step, before `update`. 26 | """ 27 | for track in self.tracks: 28 | track.predict(self.kf) 29 | 30 | def update(self, detections): 31 | """Perform measurement update and track management. 32 | 33 | Parameters 34 | ---------- 35 | detections : List[deep_sort.detection.Detection] 36 | A list of detections at the current time step. 37 | 38 | """ 39 | # Run matching cascade. 40 | matches, unmatched_tracks, unmatched_detections = \ 41 | self._match(detections) 42 | 43 | # Update track set. 44 | for track_idx, detection_idx in matches: 45 | self.tracks[track_idx].update( 46 | self.kf, detections[detection_idx]) 47 | for track_idx in unmatched_tracks: 48 | self.tracks[track_idx].mark_missed() 49 | for detection_idx in unmatched_detections: 50 | self._initiate_track(detections[detection_idx]) 51 | self.tracks = [t for t in self.tracks if not t.is_deleted()] 52 | 53 | # Update distance metric. 54 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] 55 | features, targets = [], [] 56 | for track in self.tracks: 57 | if not track.is_confirmed(): 58 | continue 59 | features += track.features 60 | targets += [track.track_id for _ in track.features] 61 | track.features = [] 62 | self.metric.partial_fit( 63 | np.asarray(features), np.asarray(targets), active_targets) 64 | 65 | def _match(self, detections): 66 | 67 | def gated_metric(tracks, dets, track_indices, detection_indices): 68 | features = np.array([dets[i].feature for i in detection_indices]) 69 | targets = np.array([tracks[i].track_id for i in track_indices]) 70 | cost_matrix = self.metric.distance(features, targets) 71 | cost_matrix = linear_assignment.gate_cost_matrix( 72 | self.kf, cost_matrix, tracks, dets, track_indices, 73 | detection_indices) 74 | 75 | return cost_matrix 76 | 77 | # Split track set into confirmed and unconfirmed tracks. 78 | confirmed_tracks = [ 79 | i for i, t in enumerate(self.tracks) if t.is_confirmed()] 80 | unconfirmed_tracks = [ 81 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()] 82 | 83 | # Associate confirmed tracks using appearance features. 84 | matches_a, unmatched_tracks_a, unmatched_detections = \ 85 | linear_assignment.matching_cascade( 86 | gated_metric, self.metric.matching_threshold, self.max_age, 87 | self.tracks, detections, confirmed_tracks) 88 | 89 | # Associate remaining tracks together with unconfirmed tracks using IOU. 90 | iou_track_candidates = unconfirmed_tracks + [ 91 | k for k in unmatched_tracks_a if 92 | self.tracks[k].time_since_update == 1] 93 | unmatched_tracks_a = [ 94 | k for k in unmatched_tracks_a if 95 | self.tracks[k].time_since_update != 1] 96 | matches_b, unmatched_tracks_b, unmatched_detections = \ 97 | linear_assignment.min_cost_matching( 98 | iou_matching.iou_cost, self.max_iou_distance, self.tracks, 99 | detections, iou_track_candidates, unmatched_detections) 100 | matches = matches_a + matches_b 101 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) 102 | return matches, unmatched_tracks, unmatched_detections 103 | 104 | def _initiate_track(self, detection): 105 | mean, covariance = self.kf.initiate(detection.to_xyah()) 106 | self.tracks.append(Track( 107 | mean, detection.cls_, covariance, self._next_id, self.n_init, self.max_age, 108 | detection.feature)) 109 | self._next_id += 1 110 | -------------------------------------------------------------------------------- /deep_sort/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sharpiless/yolox-deepsort/4bb35dd50b2112026bdda34078e65de22afb5a8b/deep_sort/utils/__init__.py -------------------------------------------------------------------------------- /deep_sort/utils/asserts.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | 4 | def assert_in(file, files_to_check): 5 | if file not in files_to_check: 6 | raise AssertionError("{} does not exist in the list".format(str(file))) 7 | return True 8 | 9 | 10 | def assert_in_env(check_list: list): 11 | for item in check_list: 12 | assert_in(item, environ.keys()) 13 | return True 14 | -------------------------------------------------------------------------------- /deep_sort/utils/draw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1) 5 | 6 | 7 | def compute_color_for_labels(label): 8 | """ 9 | Simple function that adds fixed color depending on the class 10 | """ 11 | color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette] 12 | return tuple(color) 13 | 14 | 15 | def draw_boxes(img, bbox, identities=None, offset=(0,0)): 16 | for i,box in enumerate(bbox): 17 | x1,y1,x2,y2 = [int(i) for i in box] 18 | x1 += offset[0] 19 | x2 += offset[0] 20 | y1 += offset[1] 21 | y2 += offset[1] 22 | # box text and bar 23 | id = int(identities[i]) if identities is not None else 0 24 | color = compute_color_for_labels(id) 25 | label = '{}{:d}'.format("", id) 26 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0] 27 | cv2.rectangle(img,(x1, y1),(x2,y2),color,3) 28 | cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1) 29 | cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2) 30 | return img 31 | 32 | 33 | 34 | if __name__ == '__main__': 35 | for i in range(82): 36 | print(compute_color_for_labels(i)) 37 | -------------------------------------------------------------------------------- /deep_sort/utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import copy 4 | import motmetrics as mm 5 | mm.lap.default_solver = 'lap' 6 | from utils.io import read_results, unzip_objs 7 | 8 | 9 | class Evaluator(object): 10 | 11 | def __init__(self, data_root, seq_name, data_type): 12 | self.data_root = data_root 13 | self.seq_name = seq_name 14 | self.data_type = data_type 15 | 16 | self.load_annotations() 17 | self.reset_accumulator() 18 | 19 | def load_annotations(self): 20 | assert self.data_type == 'mot' 21 | 22 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') 23 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) 24 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) 25 | 26 | def reset_accumulator(self): 27 | self.acc = mm.MOTAccumulator(auto_id=True) 28 | 29 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): 30 | # results 31 | trk_tlwhs = np.copy(trk_tlwhs) 32 | trk_ids = np.copy(trk_ids) 33 | 34 | # gts 35 | gt_objs = self.gt_frame_dict.get(frame_id, []) 36 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] 37 | 38 | # ignore boxes 39 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) 40 | ignore_tlwhs = unzip_objs(ignore_objs)[0] 41 | 42 | 43 | # remove ignored results 44 | keep = np.ones(len(trk_tlwhs), dtype=bool) 45 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) 46 | if len(iou_distance) > 0: 47 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 48 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 49 | match_ious = iou_distance[match_is, match_js] 50 | 51 | match_js = np.asarray(match_js, dtype=int) 52 | match_js = match_js[np.logical_not(np.isnan(match_ious))] 53 | keep[match_js] = False 54 | trk_tlwhs = trk_tlwhs[keep] 55 | trk_ids = trk_ids[keep] 56 | 57 | # get distance matrix 58 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) 59 | 60 | # acc 61 | self.acc.update(gt_ids, trk_ids, iou_distance) 62 | 63 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): 64 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics 65 | else: 66 | events = None 67 | return events 68 | 69 | def eval_file(self, filename): 70 | self.reset_accumulator() 71 | 72 | result_frame_dict = read_results(filename, self.data_type, is_gt=False) 73 | frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) 74 | for frame_id in frames: 75 | trk_objs = result_frame_dict.get(frame_id, []) 76 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] 77 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) 78 | 79 | return self.acc 80 | 81 | @staticmethod 82 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): 83 | names = copy.deepcopy(names) 84 | if metrics is None: 85 | metrics = mm.metrics.motchallenge_metrics 86 | metrics = copy.deepcopy(metrics) 87 | 88 | mh = mm.metrics.create() 89 | summary = mh.compute_many( 90 | accs, 91 | metrics=metrics, 92 | names=names, 93 | generate_overall=True 94 | ) 95 | 96 | return summary 97 | 98 | @staticmethod 99 | def save_summary(summary, filename): 100 | import pandas as pd 101 | writer = pd.ExcelWriter(filename) 102 | summary.to_excel(writer) 103 | writer.save() 104 | -------------------------------------------------------------------------------- /deep_sort/utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | 5 | # from utils.log import get_logger 6 | 7 | 8 | def write_results(filename, results, data_type): 9 | if data_type == 'mot': 10 | save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n' 11 | elif data_type == 'kitti': 12 | save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' 13 | else: 14 | raise ValueError(data_type) 15 | 16 | with open(filename, 'w') as f: 17 | for frame_id, tlwhs, track_ids in results: 18 | if data_type == 'kitti': 19 | frame_id -= 1 20 | for tlwh, track_id in zip(tlwhs, track_ids): 21 | if track_id < 0: 22 | continue 23 | x1, y1, w, h = tlwh 24 | x2, y2 = x1 + w, y1 + h 25 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h) 26 | f.write(line) 27 | 28 | 29 | # def write_results(filename, results_dict: Dict, data_type: str): 30 | # if not filename: 31 | # return 32 | # path = os.path.dirname(filename) 33 | # if not os.path.exists(path): 34 | # os.makedirs(path) 35 | 36 | # if data_type in ('mot', 'mcmot', 'lab'): 37 | # save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 38 | # elif data_type == 'kitti': 39 | # save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' 40 | # else: 41 | # raise ValueError(data_type) 42 | 43 | # with open(filename, 'w') as f: 44 | # for frame_id, frame_data in results_dict.items(): 45 | # if data_type == 'kitti': 46 | # frame_id -= 1 47 | # for tlwh, track_id in frame_data: 48 | # if track_id < 0: 49 | # continue 50 | # x1, y1, w, h = tlwh 51 | # x2, y2 = x1 + w, y1 + h 52 | # line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) 53 | # f.write(line) 54 | # logger.info('Save results to {}'.format(filename)) 55 | 56 | 57 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False): 58 | if data_type in ('mot', 'lab'): 59 | read_fun = read_mot_results 60 | else: 61 | raise ValueError('Unknown data type: {}'.format(data_type)) 62 | 63 | return read_fun(filename, is_gt, is_ignore) 64 | 65 | 66 | """ 67 | labels={'ped', ... % 1 68 | 'person_on_vhcl', ... % 2 69 | 'car', ... % 3 70 | 'bicycle', ... % 4 71 | 'mbike', ... % 5 72 | 'non_mot_vhcl', ... % 6 73 | 'static_person', ... % 7 74 | 'distractor', ... % 8 75 | 'occluder', ... % 9 76 | 'occluder_on_grnd', ... %10 77 | 'occluder_full', ... % 11 78 | 'reflection', ... % 12 79 | 'crowd' ... % 13 80 | }; 81 | """ 82 | 83 | 84 | def read_mot_results(filename, is_gt, is_ignore): 85 | valid_labels = {1} 86 | ignore_labels = {2, 7, 8, 12} 87 | results_dict = dict() 88 | if os.path.isfile(filename): 89 | with open(filename, 'r') as f: 90 | for line in f.readlines(): 91 | linelist = line.split(',') 92 | if len(linelist) < 7: 93 | continue 94 | fid = int(linelist[0]) 95 | if fid < 1: 96 | continue 97 | results_dict.setdefault(fid, list()) 98 | 99 | if is_gt: 100 | if 'MOT16-' in filename or 'MOT17-' in filename: 101 | label = int(float(linelist[7])) 102 | mark = int(float(linelist[6])) 103 | if mark == 0 or label not in valid_labels: 104 | continue 105 | score = 1 106 | elif is_ignore: 107 | if 'MOT16-' in filename or 'MOT17-' in filename: 108 | label = int(float(linelist[7])) 109 | vis_ratio = float(linelist[8]) 110 | if label not in ignore_labels and vis_ratio >= 0: 111 | continue 112 | else: 113 | continue 114 | score = 1 115 | else: 116 | score = float(linelist[6]) 117 | 118 | tlwh = tuple(map(float, linelist[2:6])) 119 | target_id = int(linelist[1]) 120 | 121 | results_dict[fid].append((tlwh, target_id, score)) 122 | 123 | return results_dict 124 | 125 | 126 | def unzip_objs(objs): 127 | if len(objs) > 0: 128 | tlwhs, ids, scores = zip(*objs) 129 | else: 130 | tlwhs, ids, scores = [], [], [] 131 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) 132 | 133 | return tlwhs, ids, scores -------------------------------------------------------------------------------- /deep_sort/utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.INFO) 14 | logger.addHandler(handler) 15 | return logger 16 | 17 | 18 | -------------------------------------------------------------------------------- /deep_sort/utils/parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | from easydict import EasyDict as edict 4 | 5 | class YamlParser(edict): 6 | """ 7 | This is yaml parser based on EasyDict. 8 | """ 9 | def __init__(self, cfg_dict=None, config_file=None): 10 | if cfg_dict is None: 11 | cfg_dict = {} 12 | 13 | if config_file is not None: 14 | assert(os.path.isfile(config_file)) 15 | with open(config_file, 'r') as fo: 16 | cfg_dict.update(yaml.load(fo.read())) 17 | 18 | super(YamlParser, self).__init__(cfg_dict) 19 | 20 | 21 | def merge_from_file(self, config_file): 22 | with open(config_file, 'r') as fo: 23 | self.update(yaml.load(fo.read())) 24 | 25 | 26 | def merge_from_dict(self, config_dict): 27 | self.update(config_dict) 28 | 29 | 30 | def get_config(config_file=None): 31 | return YamlParser(config_file=config_file) 32 | 33 | 34 | if __name__ == "__main__": 35 | cfg = YamlParser(config_file="../configs/yolov3.yaml") 36 | cfg.merge_from_file("../configs/deep_sort.yaml") 37 | 38 | import ipdb; ipdb.set_trace() -------------------------------------------------------------------------------- /deep_sort/utils/tools.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from time import time 3 | 4 | 5 | def is_video(ext: str): 6 | """ 7 | Returns true if ext exists in 8 | allowed_exts for video files. 9 | 10 | Args: 11 | ext: 12 | 13 | Returns: 14 | 15 | """ 16 | 17 | allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp') 18 | return any((ext.endswith(x) for x in allowed_exts)) 19 | 20 | 21 | def tik_tok(func): 22 | """ 23 | keep track of time for each process. 24 | Args: 25 | func: 26 | 27 | Returns: 28 | 29 | """ 30 | @wraps(func) 31 | def _time_it(*args, **kwargs): 32 | start = time() 33 | try: 34 | return func(*args, **kwargs) 35 | finally: 36 | end_ = time() 37 | print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start))) 38 | 39 | return _time_it 40 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | from AIDetector_pytorch import Detector 2 | import imutils 3 | import cv2 4 | 5 | def main(): 6 | 7 | name = 'demo' 8 | 9 | det = Detector() 10 | cap = cv2.VideoCapture('E:/视频/行人监控/test01.mp4') 11 | fps = int(cap.get(5)) 12 | print('fps:', fps) 13 | t = int(1000/fps) 14 | 15 | videoWriter = None 16 | 17 | while True: 18 | 19 | # try: 20 | _, im = cap.read() 21 | if im is None: 22 | break 23 | 24 | result = det.feedCap(im) 25 | result = result['frame'] 26 | result = imutils.resize(result, height=500) 27 | if videoWriter is None: 28 | fourcc = cv2.VideoWriter_fourcc( 29 | 'm', 'p', '4', 'v') # opencv3.0 30 | videoWriter = cv2.VideoWriter( 31 | 'result.mp4', fourcc, fps, (result.shape[1], result.shape[0])) 32 | 33 | videoWriter.write(result) 34 | cv2.imshow(name, result) 35 | cv2.waitKey(t) 36 | 37 | if cv2.getWindowProperty(name, cv2.WND_PROP_AUTOSIZE) < 1: 38 | # 点x退出 39 | break 40 | # except Exception as e: 41 | # print(e) 42 | # break 43 | 44 | cap.release() 45 | videoWriter.release() 46 | cv2.destroyAllWindows() 47 | 48 | if __name__ == '__main__': 49 | 50 | main() -------------------------------------------------------------------------------- /demo/MegEngine/cpp/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [ -z $CXX ];then 5 | echo "please export you c++ toolchain to CXX" 6 | echo "for example:" 7 | echo "build for host: export CXX=g++" 8 | echo "cross build for aarch64-android(always locate in NDK): export CXX=aarch64-linux-android21-clang++" 9 | echo "cross build for aarch64-linux: export CXX=aarch64-linux-gnu-g++" 10 | exit -1 11 | fi 12 | 13 | if [ -z $MGE_INSTALL_PATH ];then 14 | echo "please refsi ./README.md to init MGE_INSTALL_PATH env" 15 | exit -1 16 | fi 17 | 18 | if [ -z $OPENCV_INSTALL_INCLUDE_PATH ];then 19 | echo "please refs ./README.md to init OPENCV_INSTALL_INCLUDE_PATH env" 20 | exit -1 21 | fi 22 | 23 | if [ -z $OPENCV_INSTALL_LIB_PATH ];then 24 | echo "please refs ./README.md to init OPENCV_INSTALL_LIB_PATH env" 25 | exit -1 26 | fi 27 | 28 | INCLUDE_FLAG="-I$MGE_INSTALL_PATH/include -I$OPENCV_INSTALL_INCLUDE_PATH" 29 | LINK_FLAG="-L$MGE_INSTALL_PATH/lib/ -lmegengine -L$OPENCV_INSTALL_LIB_PATH -lopencv_core -lopencv_highgui -lopencv_imgproc -lopencv_imgcodecs" 30 | BUILD_FLAG="-static-libstdc++ -O3 -pie -fPIE -g" 31 | 32 | if [[ $CXX =~ "android" ]]; then 33 | LINK_FLAG="${LINK_FLAG} -llog -lz" 34 | fi 35 | 36 | echo "CXX: $CXX" 37 | echo "MGE_INSTALL_PATH: $MGE_INSTALL_PATH" 38 | echo "INCLUDE_FLAG: $INCLUDE_FLAG" 39 | echo "LINK_FLAG: $LINK_FLAG" 40 | echo "BUILD_FLAG: $BUILD_FLAG" 41 | 42 | echo "[" > compile_commands.json 43 | echo "{" >> compile_commands.json 44 | echo "\"directory\": \"$PWD\"," >> compile_commands.json 45 | echo "\"command\": \"$CXX yolox.cpp -o yolox ${INCLUDE_FLAG} ${LINK_FLAG}\"," >> compile_commands.json 46 | echo "\"file\": \"$PWD/yolox.cpp\"," >> compile_commands.json 47 | echo "}," >> compile_commands.json 48 | echo "]" >> compile_commands.json 49 | $CXX yolox.cpp -o yolox ${INCLUDE_FLAG} ${LINK_FLAG} ${BUILD_FLAG} 50 | 51 | echo "build success, output file: yolox" 52 | if [[ $CXX =~ "android" ]]; then 53 | echo "try command to run:" 54 | echo "adb push/scp $MGE_INSTALL_PATH/lib/libmegengine.so android_phone" 55 | echo "adb push/scp $OPENCV_INSTALL_LIB_PATH/*.so android_phone" 56 | echo "adb push/scp ./yolox yolox_s.mge android_phone" 57 | echo "adb push/scp ../../../assets/dog.jpg android_phone" 58 | echo "adb/ssh to android_phone, then run: LD_LIBRARY_PATH=. ./yolox yolox_s.mge dog.jpg cpu/multithread " 59 | else 60 | echo "try command to run: LD_LIBRARY_PATH=$MGE_INSTALL_PATH/lib/:$OPENCV_INSTALL_LIB_PATH ./yolox yolox_s.mge ../../../assets/dog.jpg cuda/cpu/multithread " 61 | fi 62 | -------------------------------------------------------------------------------- /demo/MegEngine/python/README.md: -------------------------------------------------------------------------------- 1 | # YOLOX-Python-MegEngine 2 | 3 | Python version of YOLOX object detection base on [MegEngine](https://github.com/MegEngine/MegEngine). 4 | 5 | ## Tutorial 6 | 7 | ### Step1: install requirements 8 | 9 | ``` 10 | python3 -m pip install megengine -f https://megengine.org.cn/whl/mge.html 11 | ``` 12 | 13 | ### Step2: convert checkpoint weights from torch's path file 14 | 15 | ``` 16 | python3 convert_weights.py -w yolox_s.pth -o yolox_s_mge.pkl 17 | ``` 18 | 19 | ### Step3: run demo 20 | 21 | This part is the same as torch's python demo, but no need to specify device. 22 | 23 | ``` 24 | python3 demo.py image -n yolox-s -c yolox_s_mge.pkl --path ../../../assets/dog.jpg --conf 0.25 --nms 0.45 --tsize 640 --save_result 25 | ``` 26 | 27 | ### [Optional]Step4: dump model for cpp inference 28 | 29 | > **Note**: result model is dumped with `optimize_for_inference` and `enable_fuse_conv_bias_nonlinearity`. 30 | 31 | ``` 32 | python3 dump.py -n yolox-s -c yolox_s_mge.pkl --dump_path yolox_s.mge 33 | ``` 34 | -------------------------------------------------------------------------------- /demo/MegEngine/python/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import megengine as mge 5 | import megengine.module as M 6 | from megengine import jit 7 | 8 | from models.yolo_fpn import YOLOFPN 9 | from models.yolo_head import YOLOXHead 10 | from models.yolo_pafpn import YOLOPAFPN 11 | from models.yolox import YOLOX 12 | 13 | 14 | def build_yolox(name="yolox-s"): 15 | num_classes = 80 16 | 17 | # value meaning: depth, width 18 | param_dict = { 19 | "yolox-nano": (0.33, 0.25), 20 | "yolox-tiny": (0.33, 0.375), 21 | "yolox-s": (0.33, 0.50), 22 | "yolox-m": (0.67, 0.75), 23 | "yolox-l": (1.0, 1.0), 24 | "yolox-x": (1.33, 1.25), 25 | } 26 | if name == "yolov3": 27 | depth = 1.0 28 | width = 1.0 29 | backbone = YOLOFPN() 30 | head = YOLOXHead(num_classes, width, in_channels=[128, 256, 512], act="lrelu") 31 | model = YOLOX(backbone, head) 32 | else: 33 | assert name in param_dict 34 | kwargs = {} 35 | depth, width = param_dict[name] 36 | if name == "yolox-nano": 37 | kwargs["depthwise"] = True 38 | in_channels = [256, 512, 1024] 39 | backbone = YOLOPAFPN(depth, width, in_channels=in_channels, **kwargs) 40 | head = YOLOXHead(num_classes, width, in_channels=in_channels, **kwargs) 41 | model = YOLOX(backbone, head) 42 | 43 | for m in model.modules(): 44 | if isinstance(m, M.BatchNorm2d): 45 | m.eps = 1e-3 46 | 47 | return model 48 | 49 | 50 | def build_and_load(weight_file, name="yolox-s"): 51 | model = build_yolox(name) 52 | model_weights = mge.load(weight_file) 53 | model.load_state_dict(model_weights, strict=False) 54 | return model 55 | -------------------------------------------------------------------------------- /demo/MegEngine/python/coco_classes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | COCO_CLASSES = ( 6 | "person", 7 | "bicycle", 8 | "car", 9 | "motorcycle", 10 | "airplane", 11 | "bus", 12 | "train", 13 | "truck", 14 | "boat", 15 | "traffic light", 16 | "fire hydrant", 17 | "stop sign", 18 | "parking meter", 19 | "bench", 20 | "bird", 21 | "cat", 22 | "dog", 23 | "horse", 24 | "sheep", 25 | "cow", 26 | "elephant", 27 | "bear", 28 | "zebra", 29 | "giraffe", 30 | "backpack", 31 | "umbrella", 32 | "handbag", 33 | "tie", 34 | "suitcase", 35 | "frisbee", 36 | "skis", 37 | "snowboard", 38 | "sports ball", 39 | "kite", 40 | "baseball bat", 41 | "baseball glove", 42 | "skateboard", 43 | "surfboard", 44 | "tennis racket", 45 | "bottle", 46 | "wine glass", 47 | "cup", 48 | "fork", 49 | "knife", 50 | "spoon", 51 | "bowl", 52 | "banana", 53 | "apple", 54 | "sandwich", 55 | "orange", 56 | "broccoli", 57 | "carrot", 58 | "hot dog", 59 | "pizza", 60 | "donut", 61 | "cake", 62 | "chair", 63 | "couch", 64 | "potted plant", 65 | "bed", 66 | "dining table", 67 | "toilet", 68 | "tv", 69 | "laptop", 70 | "mouse", 71 | "remote", 72 | "keyboard", 73 | "cell phone", 74 | "microwave", 75 | "oven", 76 | "toaster", 77 | "sink", 78 | "refrigerator", 79 | "book", 80 | "clock", 81 | "vase", 82 | "scissors", 83 | "teddy bear", 84 | "hair drier", 85 | "toothbrush", 86 | ) 87 | -------------------------------------------------------------------------------- /demo/MegEngine/python/convert_weights.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | import argparse 4 | from collections import OrderedDict 5 | 6 | import megengine as mge 7 | import torch 8 | 9 | 10 | def make_parser(): 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("-w", "--weights", type=str, help="path of weight file") 13 | parser.add_argument( 14 | "-o", 15 | "--output", 16 | default="weight_mge.pkl", 17 | type=str, 18 | help="path of weight file", 19 | ) 20 | return parser 21 | 22 | 23 | def numpy_weights(weight_file): 24 | torch_weights = torch.load(weight_file, map_location="cpu") 25 | if "model" in torch_weights: 26 | torch_weights = torch_weights["model"] 27 | new_dict = OrderedDict() 28 | for k, v in torch_weights.items(): 29 | new_dict[k] = v.cpu().numpy() 30 | return new_dict 31 | 32 | 33 | def map_weights(weight_file, output_file): 34 | torch_weights = numpy_weights(weight_file) 35 | 36 | new_dict = OrderedDict() 37 | for k, v in torch_weights.items(): 38 | if "num_batches_tracked" in k: 39 | print("drop: {}".format(k)) 40 | continue 41 | if k.endswith("bias"): 42 | print("bias key: {}".format(k)) 43 | v = v.reshape(1, -1, 1, 1) 44 | new_dict[k] = v 45 | elif "dconv" in k and "conv.weight" in k: 46 | print("depthwise conv key: {}".format(k)) 47 | cout, cin, k1, k2 = v.shape 48 | v = v.reshape(cout, 1, cin, k1, k2) 49 | new_dict[k] = v 50 | else: 51 | new_dict[k] = v 52 | 53 | mge.save(new_dict, output_file) 54 | print("save weights to {}".format(output_file)) 55 | 56 | 57 | def main(): 58 | parser = make_parser() 59 | args = parser.parse_args() 60 | map_weights(args.weights, args.output) 61 | 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /demo/MegEngine/python/dump.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import argparse 6 | 7 | import megengine as mge 8 | import numpy as np 9 | from megengine import jit 10 | 11 | from build import build_and_load 12 | 13 | 14 | def make_parser(): 15 | parser = argparse.ArgumentParser("YOLOX Demo Dump") 16 | parser.add_argument("-n", "--name", type=str, default="yolox-s", help="model name") 17 | parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval") 18 | parser.add_argument( 19 | "--dump_path", default="model.mge", help="path to save the dumped model" 20 | ) 21 | return parser 22 | 23 | 24 | def dump_static_graph(model, graph_name="model.mge"): 25 | model.eval() 26 | model.head.decode_in_inference = False 27 | 28 | data = mge.Tensor(np.random.random((1, 3, 640, 640))) 29 | 30 | @jit.trace(capture_as_const=True) 31 | def pred_func(data): 32 | outputs = model(data) 33 | return outputs 34 | 35 | pred_func(data) 36 | pred_func.dump( 37 | graph_name, 38 | arg_names=["data"], 39 | optimize_for_inference=True, 40 | enable_fuse_conv_bias_nonlinearity=True, 41 | ) 42 | 43 | 44 | def main(args): 45 | model = build_and_load(args.ckpt, name=args.name) 46 | dump_static_graph(model, args.dump_path) 47 | 48 | 49 | if __name__ == "__main__": 50 | args = make_parser().parse_args() 51 | main(args) 52 | -------------------------------------------------------------------------------- /demo/MegEngine/python/models/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | from .darknet import CSPDarknet, Darknet 6 | from .yolo_fpn import YOLOFPN 7 | from .yolo_head import YOLOXHead 8 | from .yolo_pafpn import YOLOPAFPN 9 | from .yolox import YOLOX 10 | -------------------------------------------------------------------------------- /demo/MegEngine/python/models/yolo_fpn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import megengine.functional as F 6 | import megengine.module as M 7 | 8 | from .darknet import Darknet 9 | from .network_blocks import BaseConv, UpSample 10 | 11 | 12 | class YOLOFPN(M.Module): 13 | """ 14 | YOLOFPN module. Darknet 53 is the default backbone of this model. 15 | """ 16 | 17 | def __init__( 18 | self, depth=53, in_features=["dark3", "dark4", "dark5"], 19 | ): 20 | super().__init__() 21 | 22 | self.backbone = Darknet(depth) 23 | self.in_features = in_features 24 | 25 | # out 1 26 | self.out1_cbl = self._make_cbl(512, 256, 1) 27 | self.out1 = self._make_embedding([256, 512], 512 + 256) 28 | 29 | # out 2 30 | self.out2_cbl = self._make_cbl(256, 128, 1) 31 | self.out2 = self._make_embedding([128, 256], 256 + 128) 32 | 33 | # upsample 34 | self.upsample = UpSample(scale_factor=2, mode="bilinear") 35 | 36 | def _make_cbl(self, _in, _out, ks): 37 | return BaseConv(_in, _out, ks, stride=1, act="lrelu") 38 | 39 | def _make_embedding(self, filters_list, in_filters): 40 | m = M.Sequential( 41 | *[ 42 | self._make_cbl(in_filters, filters_list[0], 1), 43 | self._make_cbl(filters_list[0], filters_list[1], 3), 44 | 45 | self._make_cbl(filters_list[1], filters_list[0], 1), 46 | 47 | self._make_cbl(filters_list[0], filters_list[1], 3), 48 | self._make_cbl(filters_list[1], filters_list[0], 1), 49 | ] 50 | ) 51 | return m 52 | 53 | def forward(self, inputs): 54 | """ 55 | Args: 56 | inputs (Tensor): input image. 57 | 58 | Returns: 59 | Tuple[Tensor]: FPN output features.. 60 | """ 61 | # backbone 62 | out_features = self.backbone(inputs) 63 | x2, x1, x0 = [out_features[f] for f in self.in_features] 64 | 65 | # yolo branch 1 66 | x1_in = self.out1_cbl(x0) 67 | x1_in = self.upsample(x1_in) 68 | x1_in = F.concat([x1_in, x1], 1) 69 | out_dark4 = self.out1(x1_in) 70 | 71 | # yolo branch 2 72 | x2_in = self.out2_cbl(out_dark4) 73 | x2_in = self.upsample(x2_in) 74 | x2_in = F.concat([x2_in, x2], 1) 75 | out_dark3 = self.out2(x2_in) 76 | 77 | outputs = (out_dark3, out_dark4, x0) 78 | return outputs 79 | -------------------------------------------------------------------------------- /demo/MegEngine/python/models/yolo_pafpn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import megengine.module as M 6 | import megengine.functional as F 7 | 8 | from .darknet import CSPDarknet 9 | from .network_blocks import BaseConv, CSPLayer, DWConv, UpSample 10 | 11 | 12 | class YOLOPAFPN(M.Module): 13 | """ 14 | YOLOv3 model. Darknet 53 is the default backbone of this model. 15 | """ 16 | 17 | def __init__( 18 | self, depth=1.0, width=1.0, in_features=("dark3", "dark4", "dark5"), 19 | in_channels=[256, 512, 1024], depthwise=False, act="silu", 20 | ): 21 | super().__init__() 22 | self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act) 23 | self.in_features = in_features 24 | self.in_channels = in_channels 25 | Conv = DWConv if depthwise else BaseConv 26 | 27 | self.upsample = UpSample(scale_factor=2, mode="bilinear") 28 | self.lateral_conv0 = BaseConv( 29 | int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act 30 | ) 31 | self.C3_p4 = CSPLayer( 32 | int(2 * in_channels[1] * width), 33 | int(in_channels[1] * width), 34 | round(3 * depth), 35 | False, 36 | depthwise=depthwise, 37 | act=act, 38 | ) # cat 39 | 40 | self.reduce_conv1 = BaseConv( 41 | int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act 42 | ) 43 | self.C3_p3 = CSPLayer( 44 | int(2 * in_channels[0] * width), 45 | int(in_channels[0] * width), 46 | round(3 * depth), 47 | False, 48 | depthwise=depthwise, 49 | act=act, 50 | ) 51 | 52 | # bottom-up conv 53 | self.bu_conv2 = Conv( 54 | int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act 55 | ) 56 | self.C3_n3 = CSPLayer( 57 | int(2 * in_channels[0] * width), 58 | int(in_channels[1] * width), 59 | round(3 * depth), 60 | False, 61 | depthwise=depthwise, 62 | act=act, 63 | ) 64 | 65 | # bottom-up conv 66 | self.bu_conv1 = Conv( 67 | int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act 68 | ) 69 | self.C3_n4 = CSPLayer( 70 | int(2 * in_channels[1] * width), 71 | int(in_channels[2] * width), 72 | round(3 * depth), 73 | False, 74 | depthwise=depthwise, 75 | act=act, 76 | ) 77 | 78 | def forward(self, input): 79 | """ 80 | Args: 81 | inputs: input images. 82 | 83 | Returns: 84 | Tuple[Tensor]: FPN feature. 85 | """ 86 | 87 | # backbone 88 | out_features = self.backbone(input) 89 | features = [out_features[f] for f in self.in_features] 90 | [x2, x1, x0] = features 91 | 92 | fpn_out0 = self.lateral_conv0(x0) # 1024->512/32 93 | f_out0 = self.upsample(fpn_out0) # 512/16 94 | f_out0 = F.concat([f_out0, x1], 1) # 512->1024/16 95 | f_out0 = self.C3_p4(f_out0) # 1024->512/16 96 | 97 | fpn_out1 = self.reduce_conv1(f_out0) # 512->256/16 98 | f_out1 = self.upsample(fpn_out1) # 256/8 99 | f_out1 = F.concat([f_out1, x2], 1) # 256->512/8 100 | pan_out2 = self.C3_p3(f_out1) # 512->256/8 101 | 102 | p_out1 = self.bu_conv2(pan_out2) # 256->256/16 103 | p_out1 = F.concat([p_out1, fpn_out1], 1) # 256->512/16 104 | pan_out1 = self.C3_n3(p_out1) # 512->512/16 105 | 106 | p_out0 = self.bu_conv1(pan_out1) # 512->512/32 107 | p_out0 = F.concat([p_out0, fpn_out0], 1) # 512->1024/32 108 | pan_out0 = self.C3_n4(p_out0) # 1024->1024/32 109 | 110 | outputs = (pan_out2, pan_out1, pan_out0) 111 | return outputs 112 | -------------------------------------------------------------------------------- /demo/MegEngine/python/models/yolox.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import megengine.module as M 6 | 7 | from .yolo_head import YOLOXHead 8 | from .yolo_pafpn import YOLOPAFPN 9 | 10 | 11 | class YOLOX(M.Module): 12 | """ 13 | YOLOX model module. The module list is defined by create_yolov3_modules function. 14 | The network returns loss values from three YOLO layers during training 15 | and detection results during test. 16 | """ 17 | 18 | def __init__(self, backbone=None, head=None): 19 | super().__init__() 20 | if backbone is None: 21 | backbone = YOLOPAFPN() 22 | if head is None: 23 | head = YOLOXHead(80) 24 | 25 | self.backbone = backbone 26 | self.head = head 27 | 28 | def forward(self, x): 29 | # fpn output content features of [dark3, dark4, dark5] 30 | fpn_outs = self.backbone(x) 31 | assert not self.training 32 | outputs = self.head(fpn_outs) 33 | 34 | return outputs 35 | -------------------------------------------------------------------------------- /demo/MegEngine/python/process.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import cv2 6 | import megengine.functional as F 7 | import numpy as np 8 | 9 | __all__ = [ 10 | "preprocess", 11 | "postprocess", 12 | ] 13 | 14 | 15 | def preprocess(image, input_size, mean, std, swap=(2, 0, 1)): 16 | if len(image.shape) == 3: 17 | padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0 18 | else: 19 | padded_img = np.ones(input_size) * 114.0 20 | img = np.array(image) 21 | r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) 22 | resized_img = cv2.resize( 23 | img, 24 | (int(img.shape[1] * r), int(img.shape[0] * r)), 25 | interpolation=cv2.INTER_LINEAR, 26 | ).astype(np.float32) 27 | padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img 28 | image = padded_img 29 | 30 | image = image.astype(np.float32) 31 | image = image[:, :, ::-1] 32 | image /= 255.0 33 | if mean is not None: 34 | image -= mean 35 | if std is not None: 36 | image /= std 37 | image = image.transpose(swap) 38 | image = np.ascontiguousarray(image, dtype=np.float32) 39 | return image, r 40 | 41 | 42 | def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45): 43 | box_corner = F.zeros_like(prediction) 44 | box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 45 | box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 46 | box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 47 | box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 48 | prediction[:, :, :4] = box_corner[:, :, :4] 49 | 50 | output = [None for _ in range(len(prediction))] 51 | for i, image_pred in enumerate(prediction): 52 | 53 | # If none are remaining => process next image 54 | if not image_pred.shape[0]: 55 | continue 56 | # Get score and class with highest confidence 57 | class_conf = F.max(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True) 58 | class_pred = F.argmax(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True) 59 | 60 | class_conf_squeeze = F.squeeze(class_conf) 61 | conf_mask = image_pred[:, 4] * class_conf_squeeze >= conf_thre 62 | detections = F.concat((image_pred[:, :5], class_conf, class_pred), 1) 63 | detections = detections[conf_mask] 64 | if not detections.shape[0]: 65 | continue 66 | 67 | nms_out_index = F.vision.nms( 68 | detections[:, :4], detections[:, 4] * detections[:, 5], nms_thre, 69 | ) 70 | detections = detections[nms_out_index] 71 | if output[i] is None: 72 | output[i] = detections 73 | else: 74 | output[i] = F.concat((output[i], detections)) 75 | 76 | return output 77 | -------------------------------------------------------------------------------- /demo/MegEngine/python/visualize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. 4 | 5 | import cv2 6 | import numpy as np 7 | 8 | __all__ = ["vis"] 9 | 10 | 11 | def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None): 12 | 13 | for i in range(len(boxes)): 14 | box = boxes[i] 15 | cls_id = int(cls_ids[i]) 16 | score = scores[i] 17 | if score < conf: 18 | continue 19 | x0 = int(box[0]) 20 | y0 = int(box[1]) 21 | x1 = int(box[2]) 22 | y1 = int(box[3]) 23 | 24 | color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist() 25 | text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100) 26 | txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255) 27 | font = cv2.FONT_HERSHEY_SIMPLEX 28 | 29 | txt_size = cv2.getTextSize(text, font, 0.4, 1)[0] 30 | cv2.rectangle(img, (x0, y0), (x1, y1), color, 2) 31 | 32 | txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist() 33 | cv2.rectangle( 34 | img, 35 | (x0, y0 + 1), 36 | (x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])), 37 | txt_bk_color, 38 | -1 39 | ) 40 | cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1) 41 | 42 | return img 43 | 44 | 45 | _COLORS = np.array( 46 | [ 47 | 0.000, 0.447, 0.741, 48 | 0.850, 0.325, 0.098, 49 | 0.929, 0.694, 0.125, 50 | 0.494, 0.184, 0.556, 51 | 0.466, 0.674, 0.188, 52 | 0.301, 0.745, 0.933, 53 | 0.635, 0.078, 0.184, 54 | 0.300, 0.300, 0.300, 55 | 0.600, 0.600, 0.600, 56 | 1.000, 0.000, 0.000, 57 | 1.000, 0.500, 0.000, 58 | 0.749, 0.749, 0.000, 59 | 0.000, 1.000, 0.000, 60 | 0.000, 0.000, 1.000, 61 | 0.667, 0.000, 1.000, 62 | 0.333, 0.333, 0.000, 63 | 0.333, 0.667, 0.000, 64 | 0.333, 1.000, 0.000, 65 | 0.667, 0.333, 0.000, 66 | 0.667, 0.667, 0.000, 67 | 0.667, 1.000, 0.000, 68 | 1.000, 0.333, 0.000, 69 | 1.000, 0.667, 0.000, 70 | 1.000, 1.000, 0.000, 71 | 0.000, 0.333, 0.500, 72 | 0.000, 0.667, 0.500, 73 | 0.000, 1.000, 0.500, 74 | 0.333, 0.000, 0.500, 75 | 0.333, 0.333, 0.500, 76 | 0.333, 0.667, 0.500, 77 | 0.333, 1.000, 0.500, 78 | 0.667, 0.000, 0.500, 79 | 0.667, 0.333, 0.500, 80 | 0.667, 0.667, 0.500, 81 | 0.667, 1.000, 0.500, 82 | 1.000, 0.000, 0.500, 83 | 1.000, 0.333, 0.500, 84 | 1.000, 0.667, 0.500, 85 | 1.000, 1.000, 0.500, 86 | 0.000, 0.333, 1.000, 87 | 0.000, 0.667, 1.000, 88 | 0.000, 1.000, 1.000, 89 | 0.333, 0.000, 1.000, 90 | 0.333, 0.333, 1.000, 91 | 0.333, 0.667, 1.000, 92 | 0.333, 1.000, 1.000, 93 | 0.667, 0.000, 1.000, 94 | 0.667, 0.333, 1.000, 95 | 0.667, 0.667, 1.000, 96 | 0.667, 1.000, 1.000, 97 | 1.000, 0.000, 1.000, 98 | 1.000, 0.333, 1.000, 99 | 1.000, 0.667, 1.000, 100 | 0.333, 0.000, 0.000, 101 | 0.500, 0.000, 0.000, 102 | 0.667, 0.000, 0.000, 103 | 0.833, 0.000, 0.000, 104 | 1.000, 0.000, 0.000, 105 | 0.000, 0.167, 0.000, 106 | 0.000, 0.333, 0.000, 107 | 0.000, 0.500, 0.000, 108 | 0.000, 0.667, 0.000, 109 | 0.000, 0.833, 0.000, 110 | 0.000, 1.000, 0.000, 111 | 0.000, 0.000, 0.167, 112 | 0.000, 0.000, 0.333, 113 | 0.000, 0.000, 0.500, 114 | 0.000, 0.000, 0.667, 115 | 0.000, 0.000, 0.833, 116 | 0.000, 0.000, 1.000, 117 | 0.000, 0.000, 0.000, 118 | 0.143, 0.143, 0.143, 119 | 0.286, 0.286, 0.286, 120 | 0.429, 0.429, 0.429, 121 | 0.571, 0.571, 0.571, 122 | 0.714, 0.714, 0.714, 123 | 0.857, 0.857, 0.857, 124 | 0.000, 0.447, 0.741, 125 | 0.314, 0.717, 0.741, 126 | 0.50, 0.5, 0 127 | ] 128 | ).astype(np.float32).reshape(-1, 3) 129 | -------------------------------------------------------------------------------- /demo/ONNXRuntime/README.md: -------------------------------------------------------------------------------- 1 | ## YOLOX-ONNXRuntime in Python 2 | 3 | This doc introduces how to convert your pytorch model into onnx, and how to run an onnxruntime demo to verify your convertion. 4 | 5 | ### Download ONNX models. 6 | | Model | Parameters | GFLOPs | Test Size | mAP | Weights | 7 | |:------| :----: | :----: | :---: | :---: | :---: | 8 | | YOLOX-Nano | 0.91M | 1.08 | 416x416 | 25.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EfAGwvevU-lNhW5OqFAyHbwBJdI_7EaKu5yU04fgF5BU7w?e=gvq4hf)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_nano.onnx) | 9 | | YOLOX-Tiny | 5.06M | 6.45 | 416x416 |32.8 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ET64VPoEV8FAm5YBiEj5JXwBVn_KYHM38iJQ_lpcK2slYw?e=uuJ7Ii)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_tiny_32dot8.onnx) | 10 | | YOLOX-S | 9.0M | 26.8 | 640x640 |39.6 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/Ec0L1d1x2UtIpbfiahgxhtgBZVjb1NCXbotO8SCOdMqpQQ?e=siyIsK)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s.onnx) | 11 | | YOLOX-M | 25.3M | 73.8 | 640x640 |46.4 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ERUKlQe-nlxBoTKPy1ynbxsBmAZ_h-VBEV-nnfPdzUIkZQ?e=hyQQtl)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_m.onnx) | 12 | | YOLOX-L | 54.2M | 155.6 | 640x640 |50.0 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ET5w926jCA5GlVfg9ixB4KEBiW0HYl7SzaHNRaRG9dYO_A?e=ISmCYX)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_l.onnx) | 13 | | YOLOX-Darknet53| 63.72M | 185.3 | 640x640 |47.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ESArloSW-MlPlLuemLh9zKkBdovgweKbfu4zkvzKAp7pPQ?e=f81Ikw)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_darknet53.onnx) | 14 | | YOLOX-X | 99.1M | 281.9 | 640x640 |51.2 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ERjqoeMJlFdGuM3tQfXQmhABmGHlIHydWCwhlugeWLE9AA)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox.onnx) | 15 | 16 | 17 | ### Convert Your Model to ONNX 18 | 19 | First, you should move to by: 20 | ```shell 21 | cd 22 | ``` 23 | Then, you can: 24 | 25 | 1. Convert a standard YOLOX model by -n: 26 | ```shell 27 | python3 tools/export_onnx.py --output-name yolox_s.onnx -n yolox-s -c yolox_s.pth 28 | ``` 29 | Notes: 30 | * -n: specify a model name. The model name must be one of the [yolox-s,m,l,x and yolox-nane, yolox-tiny, yolov3] 31 | * -c: the model you have trained 32 | * -o: opset version, default 11. **However, if you will further convert your onnx model to [OpenVINO](../OpenVINO/), please specify the opset version to 10.** 33 | * --no-onnxsim: disable onnxsim 34 | * To customize an input shape for onnx model, modify the following code in tools/export.py: 35 | 36 | ```python 37 | dummy_input = torch.randn(1, 3, exp.test_size[0], exp.test_size[1]) 38 | ``` 39 | 40 | 2. Convert a standard YOLOX model by -f. When using -f, the above command is equivalent to: 41 | 42 | ```shell 43 | python3 tools/export_onnx.py --output-name yolox_s.onnx -f exps/default/yolox_s.py -c yolox_s.pth 44 | ``` 45 | 46 | 3. To convert your customized model, please use -f: 47 | 48 | ```shell 49 | python3 tools/export_onnx.py --output-name your_yolox.onnx -f exps/your_dir/your_yolox.py -c your_yolox.pth 50 | ``` 51 | 52 | ### ONNXRuntime Demo 53 | 54 | Step1. 55 | ```shell 56 | cd /demo/ONNXRuntime 57 | ``` 58 | 59 | Step2. 60 | ```shell 61 | python3 onnx_inference.py -m -i -o -s 0.3 --input_shape 640,640 62 | ``` 63 | Notes: 64 | * -m: your converted onnx model 65 | * -i: input_image 66 | * -s: score threshold for visualization. 67 | * --input_shape: should be consistent with the shape you used for onnx convertion. 68 | -------------------------------------------------------------------------------- /demo/ONNXRuntime/onnx_inference.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import argparse 6 | import os 7 | 8 | import cv2 9 | import numpy as np 10 | 11 | import onnxruntime 12 | 13 | from yolox.data.data_augment import preproc as preprocess 14 | from yolox.data.datasets import COCO_CLASSES 15 | from yolox.utils import mkdir, multiclass_nms, demo_postprocess, vis 16 | 17 | 18 | def make_parser(): 19 | parser = argparse.ArgumentParser("onnxruntime inference sample") 20 | parser.add_argument( 21 | "-m", 22 | "--model", 23 | type=str, 24 | default="yolox.onnx", 25 | help="Input your onnx model.", 26 | ) 27 | parser.add_argument( 28 | "-i", 29 | "--image_path", 30 | type=str, 31 | default='test_image.png', 32 | help="Path to your input image.", 33 | ) 34 | parser.add_argument( 35 | "-o", 36 | "--output_dir", 37 | type=str, 38 | default='demo_output', 39 | help="Path to your output directory.", 40 | ) 41 | parser.add_argument( 42 | "-s", 43 | "--score_thr", 44 | type=float, 45 | default=0.3, 46 | help="Score threshould to filter the result.", 47 | ) 48 | parser.add_argument( 49 | "--input_shape", 50 | type=str, 51 | default="640,640", 52 | help="Specify an input shape for inference.", 53 | ) 54 | parser.add_argument( 55 | "--with_p6", 56 | action="store_true", 57 | help="Whether your model uses p6 in FPN/PAN.", 58 | ) 59 | return parser 60 | 61 | 62 | if __name__ == '__main__': 63 | args = make_parser().parse_args() 64 | 65 | input_shape = tuple(map(int, args.input_shape.split(','))) 66 | origin_img = cv2.imread(args.image_path) 67 | mean = (0.485, 0.456, 0.406) 68 | std = (0.229, 0.224, 0.225) 69 | img, ratio = preprocess(origin_img, input_shape, mean, std) 70 | 71 | session = onnxruntime.InferenceSession(args.model) 72 | 73 | ort_inputs = {session.get_inputs()[0].name: img[None, :, :, :]} 74 | output = session.run(None, ort_inputs) 75 | predictions = demo_postprocess(output[0], input_shape, p6=args.with_p6)[0] 76 | 77 | boxes = predictions[:, :4] 78 | scores = predictions[:, 4:5] * predictions[:, 5:] 79 | 80 | boxes_xyxy = np.ones_like(boxes) 81 | boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2. 82 | boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3]/2. 83 | boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]/2. 84 | boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]/2. 85 | boxes_xyxy /= ratio 86 | dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1) 87 | if dets is not None: 88 | final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5] 89 | origin_img = vis(origin_img, final_boxes, final_scores, final_cls_inds, 90 | conf=args.score_thr, class_names=COCO_CLASSES) 91 | 92 | mkdir(args.output_dir) 93 | output_path = os.path.join(args.output_dir, args.image_path.split("/")[-1]) 94 | cv2.imwrite(output_path, origin_img) 95 | -------------------------------------------------------------------------------- /demo/OpenVINO/README.md: -------------------------------------------------------------------------------- 1 | ## YOLOX for OpenVINO 2 | 3 | * [C++ Demo](./cpp) 4 | * [Python Demo](./python) -------------------------------------------------------------------------------- /demo/OpenVINO/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.4.1) 2 | set(CMAKE_CXX_STANDARD 14) 3 | 4 | project(yolox_openvino_demo) 5 | 6 | find_package(OpenCV REQUIRED) 7 | find_package(InferenceEngine REQUIRED) 8 | find_package(ngraph REQUIRED) 9 | 10 | include_directories( 11 | ${OpenCV_INCLUDE_DIRS} 12 | ${CMAKE_CURRENT_SOURCE_DIR} 13 | ${CMAKE_CURRENT_BINARY_DIR} 14 | ) 15 | 16 | add_executable(yolox_openvino yolox_openvino.cpp) 17 | 18 | target_link_libraries( 19 | yolox_openvino 20 | ${InferenceEngine_LIBRARIES} 21 | ${NGRAPH_LIBRARIES} 22 | ${OpenCV_LIBS} 23 | ) -------------------------------------------------------------------------------- /demo/OpenVINO/cpp/README.md: -------------------------------------------------------------------------------- 1 | # YOLOX-OpenVINO in C++ 2 | 3 | This toturial includes a C++ demo for OpenVINO, as well as some converted models. 4 | 5 | ### Download OpenVINO models. 6 | | Model | Parameters | GFLOPs | Test Size | mAP | Weights | 7 | |:------| :----: | :----: | :---: | :---: | :---: | 8 | | [YOLOX-Nano](../../../exps/nano.py) | 0.91M | 1.08 | 416x416 | 25.3 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EeWY57o5wQZFtXYd1KJw6Z8B4vxZru649XxQHYIFgio3Qw?e=ZS81ce)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_nano_openvino.tar.gz) | 9 | | [YOLOX-Tiny](../../../exps/yolox_tiny.py) | 5.06M | 6.45 | 416x416 |31.7 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ETfvOoCXdVZNinoSpKA_sEYBIQVqfjjF5_M6VvHRnLVcsA?e=STL1pi)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_tiny_openvino.tar.gz) | 10 | | [YOLOX-S](../../../exps/yolox_s.py) | 9.0M | 26.8 | 640x640 |39.6 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EXUjf3PQnbBLrxNrXPueqaIBzVZOrYQOnJpLK1Fytj5ssA?e=GK0LOM)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s_openvino.tar.gz) | 11 | | [YOLOX-M](../../../exps/yolox_m.py) | 25.3M | 73.8 | 640x640 |46.4 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EcoT1BPpeRpLvE_4c441zn8BVNCQ2naxDH3rho7WqdlgLQ?e=95VaM9)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_m_openvino.tar.gz) | 12 | | [YOLOX-L](../../../exps/yolox_l.py) | 54.2M | 155.6 | 640x640 |50.0 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZvmn-YLRuVPh0GAP_w3xHMB2VGvrKqQXyK_Cv5yi_DXUg?e=YRh6Eq)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_l_openvino.tar.gz) | 13 | | [YOLOX-Darknet53](../../../exps/yolov3.py) | 63.72M | 185.3 | 640x640 |47.3 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EQP8LSroikFHuwX0jFRetmcBOCDWSFmylHxolV7ezUPXGw?e=bEw5iq)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_darknet53_openvino.tar.gz) | 14 | | [YOLOX-X](../../../exps/yolox_x.py) | 99.1M | 281.9 | 640x640 |51.2 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZFPnLqiD-xIlt7rcZYDjQgB4YXE9wnq1qaSXQwJrsKbdg?e=83nwEz)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_x_openvino.tar.gz) | 15 | 16 | ## Install OpenVINO Toolkit 17 | 18 | Please visit [Openvino Homepage](https://docs.openvinotoolkit.org/latest/get_started_guides.html) for more details. 19 | 20 | ## Set up the Environment 21 | 22 | ### For Linux 23 | 24 | **Option1. Set up the environment tempororally. You need to run this command everytime you start a new shell window.** 25 | 26 | ```shell 27 | source /opt/intel/openvino_2021/bin/setupvars.sh 28 | ``` 29 | 30 | **Option2. Set up the environment permenantly.** 31 | 32 | *Step1.* For Linux: 33 | ```shell 34 | vim ~/.bashrc 35 | ``` 36 | 37 | *Step2.* Add the following line into your file: 38 | 39 | ```shell 40 | source /opt/intel/openvino_2021/bin/setupvars.sh 41 | ``` 42 | 43 | *Step3.* Save and exit the file, then run: 44 | 45 | ```shell 46 | source ~/.bashrc 47 | ``` 48 | 49 | 50 | ## Convert model 51 | 52 | 1. Export ONNX model 53 | 54 | Please refer to the [ONNX toturial](../../ONNXRuntime). **Note that you should set --opset to 10, otherwise your next step will fail.** 55 | 56 | 2. Convert ONNX to OpenVINO 57 | 58 | ``` shell 59 | cd /openvino_2021/deployment_tools/model_optimizer 60 | ``` 61 | 62 | Install requirements for convert tool 63 | 64 | ```shell 65 | sudo ./install_prerequisites/install_prerequisites_onnx.sh 66 | ``` 67 | 68 | Then convert model. 69 | ```shell 70 | python3 mo.py --input_model --input_shape [--data_type FP16] 71 | ``` 72 | For example: 73 | ```shell 74 | python3 mo.py --input_model yolox.onnx --input_shape (1,3,640,640) --data_type FP16 75 | ``` 76 | 77 | ## Build 78 | 79 | ### Linux 80 | ```shell 81 | source /opt/intel/openvino_2021/bin/setupvars.sh 82 | mkdir build 83 | cd build 84 | cmake .. 85 | make 86 | ``` 87 | 88 | ## Demo 89 | 90 | ### c++ 91 | 92 | ```shell 93 | ./yolox_openvino 94 | ``` 95 | -------------------------------------------------------------------------------- /demo/OpenVINO/python/README.md: -------------------------------------------------------------------------------- 1 | # YOLOX-OpenVINO in Python 2 | 3 | This toturial includes a Python demo for OpenVINO, as well as some converted models. 4 | 5 | ### Download OpenVINO models. 6 | | Model | Parameters | GFLOPs | Test Size | mAP | Weights | 7 | |:------| :----: | :----: | :---: | :---: | :---: | 8 | | [YOLOX-Nano](../../../exps/default/nano.py) | 0.91M | 1.08 | 416x416 | 25.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EeWY57o5wQZFtXYd1KJw6Z8B4vxZru649XxQHYIFgio3Qw?e=ZS81ce)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_nano_openvino.tar.gz) | 9 | | [YOLOX-Tiny](../../../exps/default/yolox_tiny.py) | 5.06M | 6.45 | 416x416 |31.7 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ETfvOoCXdVZNinoSpKA_sEYBIQVqfjjF5_M6VvHRnLVcsA?e=STL1pi)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_tiny_openvino.tar.gz) | 10 | | [YOLOX-S](../../../exps/default/yolox_s.py) | 9.0M | 26.8 | 640x640 |39.6 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EXUjf3PQnbBLrxNrXPueqaIBzVZOrYQOnJpLK1Fytj5ssA?e=GK0LOM)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s_openvino.tar.gz) | 11 | | [YOLOX-M](../../../exps/default/yolox_m.py) | 25.3M | 73.8 | 640x640 |46.4 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EcoT1BPpeRpLvE_4c441zn8BVNCQ2naxDH3rho7WqdlgLQ?e=95VaM9)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_m_openvino.tar.gz) | 12 | | [YOLOX-L](../../../exps/default/yolox_l.py) | 54.2M | 155.6 | 640x640 |50.0 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZvmn-YLRuVPh0GAP_w3xHMB2VGvrKqQXyK_Cv5yi_DXUg?e=YRh6Eq)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_l_openvino.tar.gz) | 13 | | [YOLOX-Darknet53](../../../exps/default/yolov3.py) | 63.72M | 185.3 | 640x640 |47.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EQP8LSroikFHuwX0jFRetmcBOCDWSFmylHxolV7ezUPXGw?e=bEw5iq)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_darknet53_openvino.tar.gz) | 14 | | [YOLOX-X](../../../exps/default/yolox_x.py) | 99.1M | 281.9 | 640x640 |51.2 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZFPnLqiD-xIlt7rcZYDjQgB4YXE9wnq1qaSXQwJrsKbdg?e=83nwEz)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_x_openvino.tar.gz) | 15 | 16 | ## Install OpenVINO Toolkit 17 | 18 | Please visit [Openvino Homepage](https://docs.openvinotoolkit.org/latest/get_started_guides.html) for more details. 19 | 20 | ## Set up the Environment 21 | 22 | ### For Linux 23 | 24 | **Option1. Set up the environment tempororally. You need to run this command everytime you start a new shell window.** 25 | 26 | ```shell 27 | source /opt/intel/openvino_2021/bin/setupvars.sh 28 | ``` 29 | 30 | **Option2. Set up the environment permenantly.** 31 | 32 | *Step1.* For Linux: 33 | ```shell 34 | vim ~/.bashrc 35 | ``` 36 | 37 | *Step2.* Add the following line into your file: 38 | 39 | ```shell 40 | source /opt/intel/openvino_2021/bin/setupvars.sh 41 | ``` 42 | 43 | *Step3.* Save and exit the file, then run: 44 | 45 | ```shell 46 | source ~/.bashrc 47 | ``` 48 | 49 | 50 | ## Convert model 51 | 52 | 1. Export ONNX model 53 | 54 | Please refer to the [ONNX toturial](../../ONNXRuntime). **Note that you should set --opset to 10, otherwise your next step will fail.** 55 | 56 | 2. Convert ONNX to OpenVINO 57 | 58 | ``` shell 59 | cd /openvino_2021/deployment_tools/model_optimizer 60 | ``` 61 | 62 | Install requirements for convert tool 63 | 64 | ```shell 65 | sudo ./install_prerequisites/install_prerequisites_onnx.sh 66 | ``` 67 | 68 | Then convert model. 69 | ```shell 70 | python3 mo.py --input_model --input_shape [--data_type FP16] 71 | ``` 72 | For example: 73 | ```shell 74 | python3 mo.py --input_model yolox.onnx --input_shape [1,3,640,640] --data_type FP16 --output_dir converted_output 75 | ``` 76 | 77 | ## Demo 78 | 79 | ### python 80 | 81 | ```shell 82 | python openvino_inference.py -m -i 83 | ``` 84 | or 85 | ```shell 86 | python openvino_inference.py -m -i -o -s -d 87 | ``` 88 | 89 | -------------------------------------------------------------------------------- /demo/TensorRT/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | 3 | project(yolox) 4 | 5 | add_definitions(-std=c++11) 6 | 7 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF) 8 | set(CMAKE_CXX_STANDARD 11) 9 | set(CMAKE_BUILD_TYPE Debug) 10 | 11 | find_package(CUDA REQUIRED) 12 | 13 | include_directories(${PROJECT_SOURCE_DIR}/include) 14 | # include and link dirs of cuda and tensorrt, you need adapt them if yours are different 15 | # cuda 16 | include_directories(/data/cuda/cuda-10.2/cuda/include) 17 | link_directories(/data/cuda/cuda-10.2/cuda/lib64) 18 | # cudnn 19 | include_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/include) 20 | link_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/lib64) 21 | # tensorrt 22 | include_directories(/data/cuda/cuda-10.2/TensorRT/v7.2.1.6/include) 23 | link_directories(/data/cuda/cuda-10.2/TensorRT/v7.2.1.6/lib) 24 | 25 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED") 26 | 27 | find_package(OpenCV) 28 | include_directories(${OpenCV_INCLUDE_DIRS}) 29 | 30 | add_executable(yolox ${PROJECT_SOURCE_DIR}/yolox.cpp) 31 | target_link_libraries(yolox nvinfer) 32 | target_link_libraries(yolox cudart) 33 | target_link_libraries(yolox ${OpenCV_LIBS}) 34 | 35 | add_definitions(-O2 -pthread) 36 | 37 | -------------------------------------------------------------------------------- /demo/TensorRT/cpp/README.md: -------------------------------------------------------------------------------- 1 | # YOLOX-TensorRT in C++ 2 | 3 | As YOLOX models are easy to convert to tensorrt using [torch2trt gitrepo](https://github.com/NVIDIA-AI-IOT/torch2trt), 4 | our C++ demo does not include the model converting or constructing like other tenorrt demos. 5 | 6 | 7 | ## Step 1: Prepare serialized engine file 8 | 9 | Follow the trt [python demo README](../python/README.md) to convert and save the serialized engine file. 10 | 11 | Check the 'model_trt.engine' file generated from Step 1, which will be automatically saved at the current demo dir. 12 | 13 | 14 | ## Step 2: build the demo 15 | 16 | Please follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) to install TensorRT. 17 | 18 | Install opencv with ```sudo apt-get install libopencv-dev``` (we don't need a higher version of opencv like v3.3+). 19 | 20 | build the demo: 21 | 22 | ```shell 23 | mkdir build 24 | cd build 25 | cmake .. 26 | make 27 | ``` 28 | 29 | Then run the demo: 30 | 31 | ```shell 32 | ./yolox ../model_trt.engine -i ../../../../assets/dog.jpg 33 | ``` 34 | 35 | or 36 | 37 | ```shell 38 | ./yolox -i 39 | ``` 40 | -------------------------------------------------------------------------------- /demo/TensorRT/python/README.md: -------------------------------------------------------------------------------- 1 | # YOLOX-TensorRT in Python 2 | 3 | This toturial includes a Python demo for TensorRT. 4 | 5 | ## Install TensorRT Toolkit 6 | 7 | Please follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) and [torch2trt gitrepo](https://github.com/NVIDIA-AI-IOT/torch2trt) to install TensorRT and torch2trt. 8 | 9 | ## Convert model 10 | 11 | YOLOX models can be easily conveted to TensorRT models using torch2trt 12 | 13 | If you want to convert our model, use the flag -n to specify a model name: 14 | ```shell 15 | python tools/trt.py -n -c 16 | ``` 17 | For example: 18 | ```shell 19 | python tools/trt.py -n yolox-s -c your_ckpt.pth 20 | ``` 21 | can be: yolox-nano, yolox-tiny. yolox-s, yolox-m, yolox-l, yolox-x. 22 | 23 | If you want to convert your customized model, use the flag -f to specify you exp file: 24 | ```shell 25 | python tools/trt.py -f -c 26 | ``` 27 | For example: 28 | ```shell 29 | python tools/trt.py -f /path/to/your/yolox/exps/yolox_s.py -c your_ckpt.pth 30 | ``` 31 | *yolox_s.py* can be any exp file modified by you. 32 | 33 | The converted model and the serialized engine file (for C++ demo) will be saved on your experiment output dir. 34 | 35 | ## Demo 36 | 37 | The TensorRT python demo is merged on our pytorch demo file, so you can run the pytorch demo command with ```--trt```. 38 | 39 | ```shell 40 | python tools/demo.py image -n yolox-s --trt --save_result 41 | ``` 42 | or 43 | ```shell 44 | python tools/demo.py image -f exps/default/yolox_s.py --trt --save_result 45 | ``` 46 | 47 | -------------------------------------------------------------------------------- /demo/ncnn/android/README.md: -------------------------------------------------------------------------------- 1 | # YOLOX-Android-ncnn 2 | 3 | Andoird app of YOLOX object detection base on [ncnn](https://github.com/Tencent/ncnn) 4 | 5 | 6 | ## Tutorial 7 | 8 | ### Step1 9 | 10 | Download ncnn-android-vulkan.zip from [releases of ncnn](https://github.com/Tencent/ncnn/releases). This repo uses 11 | [20210525 release](https://github.com/Tencent/ncnn/releases/download/20210525/ncnn-20210525-android-vulkan.zip) for building. 12 | 13 | ### Step2 14 | 15 | After downloading, please extract your zip file. Then, there are two ways to finish this step: 16 | * put your extracted directory into **app/src/main/jni** 17 | * change the **ncnn_DIR** path in **app/src/main/jni/CMakeLists.txt** to your extracted directory 18 | 19 | ### Step3 20 | Download example param and bin file from [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ESXBH_GSSmFMszWJ6YG2VkQB5cWDfqVWXgk0D996jH0rpQ?e=qzEqUh) or [github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s_ncnn.tar.gz). Unzip the file to **app/src/main/assets**. 21 | 22 | ### Step4 23 | Open this project with Android Studio, build it and enjoy! 24 | 25 | ## Reference 26 | 27 | * [ncnn-android-yolov5](https://github.com/nihui/ncnn-android-yolov5) 28 | -------------------------------------------------------------------------------- /demo/ncnn/android/app/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'com.android.application' 2 | 3 | android { 4 | compileSdkVersion 24 5 | buildToolsVersion "29.0.2" 6 | 7 | defaultConfig { 8 | applicationId "com.megvii.yoloXncnn" 9 | archivesBaseName = "$applicationId" 10 | 11 | ndk { 12 | moduleName "ncnn" 13 | abiFilters "armeabi-v7a", "arm64-v8a" 14 | } 15 | minSdkVersion 24 16 | } 17 | 18 | externalNativeBuild { 19 | cmake { 20 | version "3.10.2" 21 | path file('src/main/jni/CMakeLists.txt') 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /demo/ncnn/android/app/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /demo/ncnn/android/app/src/main/java/com/megvii/yoloXncnn/yoloXncnn.java: -------------------------------------------------------------------------------- 1 | // Copyright (C) Megvii, Inc. and its affiliates. All rights reserved. 2 | 3 | package com.megvii.yoloXncnn; 4 | 5 | import android.content.res.AssetManager; 6 | import android.graphics.Bitmap; 7 | 8 | public class YOLOXncnn 9 | { 10 | public native boolean Init(AssetManager mgr); 11 | 12 | public class Obj 13 | { 14 | public float x; 15 | public float y; 16 | public float w; 17 | public float h; 18 | public String label; 19 | public float prob; 20 | } 21 | 22 | public native Obj[] Detect(Bitmap bitmap, boolean use_gpu); 23 | 24 | static { 25 | System.loadLibrary("yoloXncnn"); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /demo/ncnn/android/app/src/main/jni/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(yoloXncnn) 2 | 3 | cmake_minimum_required(VERSION 3.4.1) 4 | 5 | set(ncnn_DIR ${CMAKE_SOURCE_DIR}/ncnn-20210525-android-vulkan/${ANDROID_ABI}/lib/cmake/ncnn) 6 | find_package(ncnn REQUIRED) 7 | 8 | add_library(yoloXncnn SHARED yoloXncnn_jni.cpp) 9 | 10 | target_link_libraries(yoloXncnn 11 | ncnn 12 | 13 | jnigraphics 14 | ) 15 | -------------------------------------------------------------------------------- /demo/ncnn/android/app/src/main/res/layout/main.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 11 | 12 |