├── AIDetector_pytorch.py
├── BaseDetector.py
├── LICENSE
├── README.md
├── deep_sort
    ├── configs
    │   └── deep_sort.yaml
    ├── deep_sort
    │   ├── README.md
    │   ├── __init__.py
    │   ├── deep
    │   │   ├── __init__.py
    │   │   ├── checkpoint
    │   │   │   └── .gitkeep
    │   │   ├── evaluate.py
    │   │   ├── feature_extractor.py
    │   │   ├── model.py
    │   │   ├── original_model.py
    │   │   ├── test.py
    │   │   ├── train.jpg
    │   │   └── train.py
    │   ├── deep_sort.py
    │   └── sort
    │   │   ├── __init__.py
    │   │   ├── detection.py
    │   │   ├── iou_matching.py
    │   │   ├── kalman_filter.py
    │   │   ├── linear_assignment.py
    │   │   ├── nn_matching.py
    │   │   ├── preprocessing.py
    │   │   ├── track.py
    │   │   └── tracker.py
    └── utils
    │   ├── __init__.py
    │   ├── asserts.py
    │   ├── draw.py
    │   ├── evaluation.py
    │   ├── io.py
    │   ├── json_logger.py
    │   ├── log.py
    │   ├── parser.py
    │   └── tools.py
├── demo.py
├── demo
    ├── MegEngine
    │   ├── cpp
    │   │   ├── README.md
    │   │   ├── build.sh
    │   │   └── yolox.cpp
    │   └── python
    │   │   ├── README.md
    │   │   ├── build.py
    │   │   ├── coco_classes.py
    │   │   ├── convert_weights.py
    │   │   ├── demo.py
    │   │   ├── dump.py
    │   │   ├── models
    │   │       ├── __init__.py
    │   │       ├── darknet.py
    │   │       ├── network_blocks.py
    │   │       ├── yolo_fpn.py
    │   │       ├── yolo_head.py
    │   │       ├── yolo_pafpn.py
    │   │       └── yolox.py
    │   │   ├── process.py
    │   │   └── visualize.py
    ├── ONNXRuntime
    │   ├── README.md
    │   └── onnx_inference.py
    ├── OpenVINO
    │   ├── README.md
    │   ├── cpp
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   └── yolox_openvino.cpp
    │   └── python
    │   │   ├── README.md
    │   │   └── openvino_inference.py
    ├── TensorRT
    │   ├── cpp
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── logging.h
    │   │   └── yolox.cpp
    │   └── python
    │   │   └── README.md
    └── ncnn
    │   ├── android
    │       ├── README.md
    │       ├── app
    │       │   ├── build.gradle
    │       │   └── src
    │       │   │   └── main
    │       │   │       ├── AndroidManifest.xml
    │       │   │       ├── assets
    │       │   │           └── yolox.param
    │       │   │       ├── java
    │       │   │           └── com
    │       │   │           │   └── megvii
    │       │   │           │       └── yoloXncnn
    │       │   │           │           ├── MainActivity.java
    │       │   │           │           └── yoloXncnn.java
    │       │   │       ├── jni
    │       │   │           ├── CMakeLists.txt
    │       │   │           └── yoloXncnn_jni.cpp
    │       │   │       └── res
    │       │   │           ├── layout
    │       │   │               └── main.xml
    │       │   │           └── values
    │       │   │               └── strings.xml
    │       ├── build.gradle
    │       ├── gradle
    │       │   └── wrapper
    │       │   │   ├── gradle-wrapper.jar
    │       │   │   └── gradle-wrapper.properties
    │       ├── gradlew
    │       ├── gradlew.bat
    │       └── settings.gradle
    │   └── cpp
    │       ├── README.md
    │       └── yolox.cpp
├── exps
    ├── default
    │   ├── nano.py
    │   ├── yolov3.py
    │   ├── yolox_l.py
    │   ├── yolox_m.py
    │   ├── yolox_s.py
    │   ├── yolox_tiny.py
    │   └── yolox_x.py
    └── example
    │   ├── custom
    │       ├── nano.py
    │       └── yolox_s.py
    │   └── yolox_voc
    │       └── yolox_voc_s.py
├── requirements.txt
├── tools
    ├── demo.py
    ├── eval.py
    ├── export_onnx.py
    ├── train.py
    └── trt.py
├── tracker.py
└── yolox
    ├── __init__.py
    ├── core
        ├── __init__.py
        ├── launch.py
        └── trainer.py
    ├── data
        ├── __init__.py
        ├── data_augment.py
        ├── data_prefetcher.py
        ├── dataloading.py
        ├── datasets
        │   ├── __init__.py
        │   ├── coco.py
        │   ├── coco_classes.py
        │   ├── datasets_wrapper.py
        │   ├── mosaicdetection.py
        │   ├── voc.py
        │   └── voc_classes.py
        └── samplers.py
    ├── evaluators
        ├── __init__.py
        ├── coco_evaluator.py
        ├── voc_eval.py
        └── voc_evaluator.py
    ├── exp
        ├── __init__.py
        ├── base_exp.py
        ├── build.py
        └── yolox_base.py
    ├── layers
        ├── __init__.py
        ├── csrc
        │   ├── cocoeval
        │   │   ├── cocoeval.cpp
        │   │   └── cocoeval.h
        │   └── vision.cpp
        └── fast_coco_eval_api.py
    ├── models
        ├── __init__.py
        ├── darknet.py
        ├── losses.py
        ├── network_blocks.py
        ├── yolo_fpn.py
        ├── yolo_head.py
        ├── yolo_pafpn.py
        └── yolox.py
    └── utils
        ├── __init__.py
        ├── allreduce_norm.py
        ├── boxes.py
        ├── checkpoint.py
        ├── demo_utils.py
        ├── dist.py
        ├── ema.py
        ├── logger.py
        ├── lr_scheduler.py
        ├── metric.py
        ├── model_utils.py
        ├── setup_env.py
        └── visualize.py


/AIDetector_pytorch.py:
--------------------------------------------------------------------------------
  1 | from yolox.utils.boxes import postprocess
  2 | from yolox.data.data_augment import preproc
  3 | import torch
  4 | import torch.nn as nn
  5 | import numpy as np
  6 | from BaseDetector import baseDet
  7 | import os
  8 | from yolox.utils import fuse_model
  9 | from yolox.data.datasets import COCO_CLASSES
 10 | 
 11 | 
 12 | def select_device(device='', batch_size=None):
 13 |     # device = 'cpu' or '0' or '0,1,2,3'
 14 |     cpu = device.lower() == 'cpu'
 15 |     if cpu:
 16 |         os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # force torch.cuda.is_available() = False
 17 |     elif device:  # non-cpu device requested
 18 |         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
 19 |         # check availability
 20 |         assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested'
 21 | 
 22 |     cuda = not cpu and torch.cuda.is_available()
 23 |     if cuda:
 24 |         devices = device.split(',') if device else range(torch.cuda.device_count())  # i.e. 0,1,6,7
 25 |         n = len(devices)  # device count
 26 |         if n > 1 and batch_size:  # check batch_size is divisible by device_count
 27 |             assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
 28 | 
 29 |     return torch.device('cuda:0' if cuda else 'cpu')
 30 | 
 31 | 
 32 | class Detector(baseDet):
 33 | 
 34 |     def __init__(self):
 35 |         super(Detector, self).__init__()
 36 |         
 37 |         self.build_config()
 38 |         self.mdepth = 0.33
 39 |         self.mwidth = 0.50
 40 |         self.confthre=0.01
 41 |         self.nmsthre=0.65
 42 |         self.test_size=(640, 640)
 43 |         self.rgb_means = (0.485, 0.456, 0.406)
 44 |         self.std = (0.229, 0.224, 0.225)
 45 |         self.init_model()
 46 | 
 47 |     def init_model(self):
 48 | 
 49 |         from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
 50 | 
 51 |         def init_yolo(M):
 52 |             for m in M.modules():
 53 |                 if isinstance(m, nn.BatchNorm2d):
 54 |                     m.eps = 1e-3
 55 |                     m.momentum = 0.03
 56 | 
 57 |         if getattr(self, "model", None) is None:
 58 |             in_channels = [256, 512, 1024]
 59 |             backbone = YOLOPAFPN(self.mdepth, self.mwidth, in_channels=in_channels)
 60 |             head = YOLOXHead(80, self.mwidth, in_channels=in_channels)
 61 |             model = YOLOX(backbone, head)
 62 | 
 63 |         model.apply(init_yolo)
 64 |         model.head.initialize_biases(1e-2)
 65 |         self.weights = 'weights/yolox_s.pth'
 66 |         self.device = '0' if torch.cuda.is_available() else 'cpu'
 67 |         self.device = select_device(self.device)
 68 |         ckpt = torch.load(self.weights)
 69 |         # load the model state dict
 70 |         model.load_state_dict(ckpt["model"])
 71 |         model.to(self.device).eval()
 72 |         model = fuse_model(model)
 73 |         self.m = model
 74 | 
 75 |         self.names = COCO_CLASSES
 76 |         self.num_classes = len(self.names)
 77 | 
 78 |     def preprocess(self, img):
 79 |         
 80 |         img_info = {"id": 0}
 81 |         img_info["file_name"] = None
 82 |         height, width = img.shape[:2]
 83 |         img_info["height"] = height
 84 |         img_info["width"] = width
 85 |         img_info["raw_img"] = img
 86 | 
 87 |         img, ratio = preproc(img, self.test_size, self.rgb_means, self.std)
 88 |         img_info["ratio"] = ratio
 89 |         img = torch.from_numpy(img).unsqueeze(0)
 90 |         if torch.cuda.is_available():
 91 |             img = img.cuda()
 92 | 
 93 |         return img_info, img
 94 | 
 95 |     def detect(self, im):
 96 | 
 97 |         img_info, img = self.preprocess(im)
 98 | 
 99 |         outputs = self.m(img)
100 |         outputs = postprocess(
101 |                 outputs, self.num_classes, self.confthre, self.nmsthre
102 |             )[0]
103 |         pred_boxes = []
104 |         ratio = img_info["ratio"]
105 |         img = img_info["raw_img"]
106 | 
107 |         boxes = outputs[:, 0:4]
108 | 
109 |         # preprocessing: resize
110 |         boxes /= ratio
111 | 
112 |         cls_ids = outputs[:, 6]
113 |         scores = outputs[:, 4] * outputs[:, 5]
114 | 
115 |         for i in range(len(boxes)):
116 |             box = boxes[i].cpu()
117 |             lbl = self.names[int(cls_ids[i])]
118 |             conf = scores[i]
119 |             if conf < self.confthre:
120 |                 continue
121 |             x1 = int(box[0])
122 |             y1 = int(box[1])
123 |             x2 = int(box[2])
124 |             y2 = int(box[3])
125 |             pred_boxes.append(
126 |                             (x1, y1, x2, y2, lbl, conf))
127 | 
128 |         return im, pred_boxes
129 | 
130 | 
131 | if __name__ == '__main__':
132 |     
133 |     det = Detector()
134 |     


--------------------------------------------------------------------------------
/BaseDetector.py:
--------------------------------------------------------------------------------
 1 | from tracker import update_tracker
 2 | import cv2
 3 | 
 4 | 
 5 | class baseDet(object):
 6 | 
 7 |     def __init__(self):
 8 | 
 9 |         self.stride = 1
10 | 
11 |     def build_config(self):
12 | 
13 |         self.faceTracker = {}
14 |         self.faceClasses = {}
15 |         self.faceLocation1 = {}
16 |         self.faceLocation2 = {}
17 |         self.frameCounter = 0
18 |         self.currentCarID = 0
19 |         self.recorded = []
20 | 
21 |         self.font = cv2.FONT_HERSHEY_SIMPLEX
22 | 
23 |     def feedCap(self, im):
24 | 
25 |         retDict = {
26 |             'frame': None,
27 |             'faces': None,
28 |             'list_of_ids': None,
29 |             'face_bboxes': []
30 |         }
31 |         self.frameCounter += 1
32 | 
33 |         im, faces, face_bboxes = update_tracker(self, im)
34 | 
35 |         retDict['frame'] = im
36 |         retDict['faces'] = faces
37 |         retDict['face_bboxes'] = face_bboxes
38 | 
39 |         return retDict
40 | 
41 |     def init_model(self):
42 |         raise EOFError("Undefined model type.")
43 | 
44 |     def preprocess(self):
45 |         raise EOFError("Undefined model type.")
46 | 
47 |     def detect(self):
48 |         raise EOFError("Undefined model type.")
49 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 项目简介：
 2 | 使用YOLOX+Deepsort实现车辆行人追踪和计数，代码封装成一个Detector类，更容易嵌入到自己的项目中。
 3 | 
 4 | 代码地址（欢迎star）：
 5 | 
 6 | [https://github.com/Sharpiless/yolox-deepsort/](https://github.com/Sharpiless/yolox-deepsort/)
 7 | 
 8 | 最终效果：
 9 | 
10 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/7768e8e4cf0a4bbf97bb10ab56ea028c.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NDkzNjg4OQ==,size_16,color_FFFFFF,t_70)
11 | 
12 | # 运行demo：
13 | 
14 | ```bash
15 | python demo.py
16 | ```
17 | 
18 | # 下载预训练模型：
19 | 
20 | |Model |size |mAP<sup>test<br>0.5:0.95 | Speed V100<br>(ms) | Params<br>(M) |FLOPs<br>(G)| weights |
21 | | ------        |:---: | :---:       |:---:     |:---:  | :---: | :----: |
22 | |[YOLOX-s](./exps/default/yolox_s.py)    |640  |39.6      |9.8     |9.0 | 26.8 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EW62gmO2vnNNs5npxjzunVwB9p307qqygaCkXdTO88BLUg?e=NMTQYw)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s.pth) |
23 | |[YOLOX-m](./exps/default/yolox_m.py)    |640  |46.4      |12.3     |25.3 |73.8| [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ERMTP7VFqrVBrXKMU7Vl4TcBQs0SUeCT7kvc-JdIbej4tQ?e=1MDo9y)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_m.pth) |
24 | |[YOLOX-l](./exps/default/yolox_l.py)    |640  |50.0  |14.5 |54.2| 155.6 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EWA8w_IEOzBKvuueBqfaZh0BeoG5sVzR-XYbOJO4YlOkRw?e=wHWOBE)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_l.pth) |
25 | |[YOLOX-x](./exps/default/yolox_x.py)   |640  |**51.2**      | 17.3 |99.1 |281.9 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EdgVPHBziOVBtGAXHfeHI5kBza0q9yyueMGdT0wXZfI1rQ?e=tABO5u)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_x.pth) |
26 | |[YOLOX-Darknet53](./exps/default/yolov3.py)   |640  | 47.4      | 11.1 |63.7 | 185.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZ-MV1r_fMFPkPrNjvbJEMoBLOLAnXH-XKEB77w8LhXL6Q?e=mf6wOc)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_darknet53.pth) |
27 | 
28 | 下载 yolox_s.pth 放到 weights 文件夹下
29 |   
30 | 下载 [https://github.com/Sharpiless/Yolov5-Deepsort/blob/main/deep_sort/deep_sort/deep/checkpoint/ckpt.t7](https://github.com/Sharpiless/Yolov5-Deepsort/blob/main/deep_sort/deep_sort/deep/checkpoint/ckpt.t7) 放到 deep_sort/deep_sort/deep/checkpoint 文件夹下
31 |   
32 | # 训练自己的模型：
33 | 
34 | 
35 | 训练好后放到 weights 文件夹下
36 | 
37 | # 调用接口：
38 | 
39 | ## 创建检测器：
40 | 
41 | ```python
42 | from AIDetector_pytorch import Detector
43 | 
44 | det = Detector()
45 | ```
46 | 
47 | ## 调用检测接口：
48 | 
49 | ```python
50 | result = det.feedCap(im)
51 | ```
52 | 
53 | 其中 im 为 BGR 图像
54 | 
55 | 返回的 result 是字典，result['frame'] 返回可视化后的图像
56 | 
57 | # 联系作者：
58 | 
59 | > B站：[https://space.bilibili.com/470550823](https://space.bilibili.com/470550823)
60 | 
61 | > CSDN：[https://blog.csdn.net/weixin_44936889](https://blog.csdn.net/weixin_44936889)
62 | 
63 | > AI Studio：[https://aistudio.baidu.com/aistudio/personalcenter/thirdview/67156](https://aistudio.baidu.com/aistudio/personalcenter/thirdview/67156)
64 | 
65 | > Github：[https://github.com/Sharpiless](https://github.com/Sharpiless)
66 | 
67 | 


--------------------------------------------------------------------------------
/deep_sort/configs/deep_sort.yaml:
--------------------------------------------------------------------------------
 1 | DEEPSORT:
 2 |   REID_CKPT: "deep_sort/deep_sort/deep/checkpoint/ckpt.t7"
 3 |   MAX_DIST: 0.2
 4 |   MIN_CONFIDENCE: 0.3
 5 |   NMS_MAX_OVERLAP: 0.5
 6 |   MAX_IOU_DISTANCE: 0.7
 7 |   MAX_AGE: 70
 8 |   N_INIT: 3
 9 |   NN_BUDGET: 100
10 |   
11 | 


--------------------------------------------------------------------------------
/deep_sort/deep_sort/README.md:
--------------------------------------------------------------------------------
1 | # Deep Sort 
2 | 
3 | This is the implemention of deep sort with pytorch.


--------------------------------------------------------------------------------
/deep_sort/deep_sort/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deep_sort import DeepSort
 2 | 
 3 | 
 4 | __all__ = ['DeepSort', 'build_tracker']
 5 | 
 6 | 
 7 | def build_tracker(cfg, use_cuda):
 8 |     return DeepSort(cfg.DEEPSORT.REID_CKPT, 
 9 |                 max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 
10 |                 nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 
11 |                 max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
12 |     
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/deep_sort/deep_sort/deep/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sharpiless/yolox-deepsort/4bb35dd50b2112026bdda34078e65de22afb5a8b/deep_sort/deep_sort/deep/__init__.py


--------------------------------------------------------------------------------
/deep_sort/deep_sort/deep/checkpoint/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sharpiless/yolox-deepsort/4bb35dd50b2112026bdda34078e65de22afb5a8b/deep_sort/deep_sort/deep/checkpoint/.gitkeep


--------------------------------------------------------------------------------
/deep_sort/deep_sort/deep/evaluate.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | features = torch.load("features.pth")
 4 | qf = features["qf"]
 5 | ql = features["ql"]
 6 | gf = features["gf"]
 7 | gl = features["gl"]
 8 | 
 9 | scores = qf.mm(gf.t())
10 | res = scores.topk(5, dim=1)[1][:,0]
11 | top1correct = gl[res].eq(ql).sum().item()
12 | 
13 | print("Acc top1:{:.3f}".format(top1correct/ql.size(0)))
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/deep_sort/deep_sort/deep/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision.transforms as transforms
 3 | import numpy as np
 4 | import cv2
 5 | import logging
 6 | 
 7 | from .model import Net
 8 | 
 9 | class Extractor(object):
10 |     def __init__(self, model_path, use_cuda=True):
11 |         self.net = Net(reid=True)
12 |         self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
13 |         state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)['net_dict']
14 |         self.net.load_state_dict(state_dict)
15 |         logger = logging.getLogger("root.tracker")
16 |         logger.info("Loading weights from {}... Done!".format(model_path))
17 |         self.net.to(self.device)
18 |         self.size = (64, 128)
19 |         self.norm = transforms.Compose([
20 |             transforms.ToTensor(),
21 |             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
22 |         ])
23 |         
24 | 
25 | 
26 |     def _preprocess(self, im_crops):
27 |         """
28 |         TODO:
29 |             1. to float with scale from 0 to 1
30 |             2. resize to (64, 128) as Market1501 dataset did
31 |             3. concatenate to a numpy array
32 |             3. to torch Tensor
33 |             4. normalize
34 |         """
35 |         def _resize(im, size):
36 |             return cv2.resize(im.astype(np.float32)/255., size)
37 | 
38 |         im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float()
39 |         return im_batch
40 | 
41 | 
42 |     def __call__(self, im_crops):
43 |         im_batch = self._preprocess(im_crops)
44 |         with torch.no_grad():
45 |             im_batch = im_batch.to(self.device)
46 |             features = self.net(im_batch)
47 |         return features.cpu().numpy()
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     img = cv2.imread("demo.jpg")[:,:,(2,1,0)]
52 |     extr = Extractor("checkpoint/ckpt.t7")
53 |     feature = extr(img)
54 |     print(feature.shape)
55 | 
56 | 


--------------------------------------------------------------------------------
/deep_sort/deep_sort/deep/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | class BasicBlock(nn.Module):
  6 |     def __init__(self, c_in, c_out,is_downsample=False):
  7 |         super(BasicBlock,self).__init__()
  8 |         self.is_downsample = is_downsample
  9 |         if is_downsample:
 10 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
 11 |         else:
 12 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
 13 |         self.bn1 = nn.BatchNorm2d(c_out)
 14 |         self.relu = nn.ReLU(True)
 15 |         self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
 16 |         self.bn2 = nn.BatchNorm2d(c_out)
 17 |         if is_downsample:
 18 |             self.downsample = nn.Sequential(
 19 |                 nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
 20 |                 nn.BatchNorm2d(c_out)
 21 |             )
 22 |         elif c_in != c_out:
 23 |             self.downsample = nn.Sequential(
 24 |                 nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
 25 |                 nn.BatchNorm2d(c_out)
 26 |             )
 27 |             self.is_downsample = True
 28 | 
 29 |     def forward(self,x):
 30 |         y = self.conv1(x)
 31 |         y = self.bn1(y)
 32 |         y = self.relu(y)
 33 |         y = self.conv2(y)
 34 |         y = self.bn2(y)
 35 |         if self.is_downsample:
 36 |             x = self.downsample(x)
 37 |         return F.relu(x.add(y),True)
 38 | 
 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False):
 40 |     blocks = []
 41 |     for i in range(repeat_times):
 42 |         if i ==0:
 43 |             blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
 44 |         else:
 45 |             blocks += [BasicBlock(c_out,c_out),]
 46 |     return nn.Sequential(*blocks)
 47 | 
 48 | class Net(nn.Module):
 49 |     def __init__(self, num_classes=751 ,reid=False):
 50 |         super(Net,self).__init__()
 51 |         # 3 128 64
 52 |         self.conv = nn.Sequential(
 53 |             nn.Conv2d(3,64,3,stride=1,padding=1),
 54 |             nn.BatchNorm2d(64),
 55 |             nn.ReLU(inplace=True),
 56 |             # nn.Conv2d(32,32,3,stride=1,padding=1),
 57 |             # nn.BatchNorm2d(32),
 58 |             # nn.ReLU(inplace=True),
 59 |             nn.MaxPool2d(3,2,padding=1),
 60 |         )
 61 |         # 32 64 32
 62 |         self.layer1 = make_layers(64,64,2,False)
 63 |         # 32 64 32
 64 |         self.layer2 = make_layers(64,128,2,True)
 65 |         # 64 32 16
 66 |         self.layer3 = make_layers(128,256,2,True)
 67 |         # 128 16 8
 68 |         self.layer4 = make_layers(256,512,2,True)
 69 |         # 256 8 4
 70 |         self.avgpool = nn.AvgPool2d((8,4),1)
 71 |         # 256 1 1 
 72 |         self.reid = reid
 73 |         self.classifier = nn.Sequential(
 74 |             nn.Linear(512, 256),
 75 |             nn.BatchNorm1d(256),
 76 |             nn.ReLU(inplace=True),
 77 |             nn.Dropout(),
 78 |             nn.Linear(256, num_classes),
 79 |         )
 80 |     
 81 |     def forward(self, x):
 82 |         x = self.conv(x)
 83 |         x = self.layer1(x)
 84 |         x = self.layer2(x)
 85 |         x = self.layer3(x)
 86 |         x = self.layer4(x)
 87 |         x = self.avgpool(x)
 88 |         x = x.view(x.size(0),-1)
 89 |         # B x 128
 90 |         if self.reid:
 91 |             x = x.div(x.norm(p=2,dim=1,keepdim=True))
 92 |             return x
 93 |         # classifier
 94 |         x = self.classifier(x)
 95 |         return x
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     net = Net()
100 |     x = torch.randn(4,3,128,64)
101 |     y = net(x)
102 |     import ipdb; ipdb.set_trace()
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/deep_sort/deep_sort/deep/original_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | class BasicBlock(nn.Module):
  6 |     def __init__(self, c_in, c_out,is_downsample=False):
  7 |         super(BasicBlock,self).__init__()
  8 |         self.is_downsample = is_downsample
  9 |         if is_downsample:
 10 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
 11 |         else:
 12 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
 13 |         self.bn1 = nn.BatchNorm2d(c_out)
 14 |         self.relu = nn.ReLU(True)
 15 |         self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
 16 |         self.bn2 = nn.BatchNorm2d(c_out)
 17 |         if is_downsample:
 18 |             self.downsample = nn.Sequential(
 19 |                 nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
 20 |                 nn.BatchNorm2d(c_out)
 21 |             )
 22 |         elif c_in != c_out:
 23 |             self.downsample = nn.Sequential(
 24 |                 nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
 25 |                 nn.BatchNorm2d(c_out)
 26 |             )
 27 |             self.is_downsample = True
 28 | 
 29 |     def forward(self,x):
 30 |         y = self.conv1(x)
 31 |         y = self.bn1(y)
 32 |         y = self.relu(y)
 33 |         y = self.conv2(y)
 34 |         y = self.bn2(y)
 35 |         if self.is_downsample:
 36 |             x = self.downsample(x)
 37 |         return F.relu(x.add(y),True)
 38 | 
 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False):
 40 |     blocks = []
 41 |     for i in range(repeat_times):
 42 |         if i ==0:
 43 |             blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
 44 |         else:
 45 |             blocks += [BasicBlock(c_out,c_out),]
 46 |     return nn.Sequential(*blocks)
 47 | 
 48 | class Net(nn.Module):
 49 |     def __init__(self, num_classes=625 ,reid=False):
 50 |         super(Net,self).__init__()
 51 |         # 3 128 64
 52 |         self.conv = nn.Sequential(
 53 |             nn.Conv2d(3,32,3,stride=1,padding=1),
 54 |             nn.BatchNorm2d(32),
 55 |             nn.ELU(inplace=True),
 56 |             nn.Conv2d(32,32,3,stride=1,padding=1),
 57 |             nn.BatchNorm2d(32),
 58 |             nn.ELU(inplace=True),
 59 |             nn.MaxPool2d(3,2,padding=1),
 60 |         )
 61 |         # 32 64 32
 62 |         self.layer1 = make_layers(32,32,2,False)
 63 |         # 32 64 32
 64 |         self.layer2 = make_layers(32,64,2,True)
 65 |         # 64 32 16
 66 |         self.layer3 = make_layers(64,128,2,True)
 67 |         # 128 16 8
 68 |         self.dense = nn.Sequential(
 69 |             nn.Dropout(p=0.6),
 70 |             nn.Linear(128*16*8, 128),
 71 |             nn.BatchNorm1d(128),
 72 |             nn.ELU(inplace=True)
 73 |         )
 74 |         # 256 1 1 
 75 |         self.reid = reid
 76 |         self.batch_norm = nn.BatchNorm1d(128)
 77 |         self.classifier = nn.Sequential(
 78 |             nn.Linear(128, num_classes),
 79 |         )
 80 |     
 81 |     def forward(self, x):
 82 |         x = self.conv(x)
 83 |         x = self.layer1(x)
 84 |         x = self.layer2(x)
 85 |         x = self.layer3(x)
 86 | 
 87 |         x = x.view(x.size(0),-1)
 88 |         if self.reid:
 89 |             x = self.dense[0](x)
 90 |             x = self.dense[1](x)
 91 |             x = x.div(x.norm(p=2,dim=1,keepdim=True))
 92 |             return x
 93 |         x = self.dense(x)
 94 |         # B x 128
 95 |         # classifier
 96 |         x = self.classifier(x)
 97 |         return x
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     net = Net(reid=True)
102 |     x = torch.randn(4,3,128,64)
103 |     y = net(x)
104 |     import ipdb; ipdb.set_trace()
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/deep_sort/deep_sort/deep/test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.backends.cudnn as cudnn
 3 | import torchvision
 4 | 
 5 | import argparse
 6 | import os
 7 | 
 8 | from model import Net
 9 | 
10 | parser = argparse.ArgumentParser(description="Train on market1501")
11 | parser.add_argument("--data-dir",default='data',type=str)
12 | parser.add_argument("--no-cuda",action="store_true")
13 | parser.add_argument("--gpu-id",default=0,type=int)
14 | args = parser.parse_args()
15 | 
16 | # device
17 | device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
18 | if torch.cuda.is_available() and not args.no_cuda:
19 |     cudnn.benchmark = True
20 | 
21 | # data loader
22 | root = args.data_dir
23 | query_dir = os.path.join(root,"query")
24 | gallery_dir = os.path.join(root,"gallery")
25 | transform = torchvision.transforms.Compose([
26 |     torchvision.transforms.Resize((128,64)),
27 |     torchvision.transforms.ToTensor(),
28 |     torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
29 | ])
30 | queryloader = torch.utils.data.DataLoader(
31 |     torchvision.datasets.ImageFolder(query_dir, transform=transform),
32 |     batch_size=64, shuffle=False
33 | )
34 | galleryloader = torch.utils.data.DataLoader(
35 |     torchvision.datasets.ImageFolder(gallery_dir, transform=transform),
36 |     batch_size=64, shuffle=False
37 | )
38 | 
39 | # net definition
40 | net = Net(reid=True)
41 | assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
42 | print('Loading from checkpoint/ckpt.t7')
43 | checkpoint = torch.load("./checkpoint/ckpt.t7")
44 | net_dict = checkpoint['net_dict']
45 | net.load_state_dict(net_dict, strict=False)
46 | net.eval()
47 | net.to(device)
48 | 
49 | # compute features
50 | query_features = torch.tensor([]).float()
51 | query_labels = torch.tensor([]).long()
52 | gallery_features = torch.tensor([]).float()
53 | gallery_labels = torch.tensor([]).long()
54 | 
55 | with torch.no_grad():
56 |     for idx,(inputs,labels) in enumerate(queryloader):
57 |         inputs = inputs.to(device)
58 |         features = net(inputs).cpu()
59 |         query_features = torch.cat((query_features, features), dim=0)
60 |         query_labels = torch.cat((query_labels, labels))
61 | 
62 |     for idx,(inputs,labels) in enumerate(galleryloader):
63 |         inputs = inputs.to(device)
64 |         features = net(inputs).cpu()
65 |         gallery_features = torch.cat((gallery_features, features), dim=0)
66 |         gallery_labels = torch.cat((gallery_labels, labels))
67 | 
68 | gallery_labels -= 2
69 | 
70 | # save features
71 | features = {
72 |     "qf": query_features,
73 |     "ql": query_labels,
74 |     "gf": gallery_features,
75 |     "gl": gallery_labels
76 | }
77 | torch.save(features,"features.pth")


--------------------------------------------------------------------------------
/deep_sort/deep_sort/deep/train.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sharpiless/yolox-deepsort/4bb35dd50b2112026bdda34078e65de22afb5a8b/deep_sort/deep_sort/deep/train.jpg


--------------------------------------------------------------------------------
/deep_sort/deep_sort/deep_sort.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from .deep.feature_extractor import Extractor
  5 | from .sort.nn_matching import NearestNeighborDistanceMetric
  6 | from .sort.preprocessing import non_max_suppression
  7 | from .sort.detection import Detection
  8 | from .sort.tracker import Tracker
  9 | 
 10 | 
 11 | __all__ = ['DeepSort']
 12 | 
 13 | 
 14 | class DeepSort(object):
 15 |     def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True):
 16 |         self.min_confidence = min_confidence
 17 |         self.nms_max_overlap = nms_max_overlap
 18 | 
 19 |         self.extractor = Extractor(model_path, use_cuda=use_cuda)
 20 | 
 21 |         max_cosine_distance = max_dist
 22 |         nn_budget = 100
 23 |         metric = NearestNeighborDistanceMetric(
 24 |             "cosine", max_cosine_distance, nn_budget)
 25 |         self.tracker = Tracker(
 26 |             metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init)
 27 | 
 28 |     def update(self, bbox_xywh, confidences, clss, ori_img):
 29 |         self.height, self.width = ori_img.shape[:2]
 30 |         # generate detections
 31 |         features = self._get_features(bbox_xywh, ori_img)
 32 |         bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
 33 |         detections = [Detection(bbox_tlwh[i], clss[i], conf, features[i]) for i, conf in enumerate(
 34 |             confidences) if conf > self.min_confidence]
 35 |         # update tracker
 36 |         self.tracker.predict()
 37 |         self.tracker.update(detections)
 38 | 
 39 |         # output bbox identities
 40 |         outputs = []
 41 |         for track in self.tracker.tracks:
 42 |             if not track.is_confirmed() or track.time_since_update > 1:
 43 |                 continue
 44 |             box = track.to_tlwh()
 45 |             x1, y1, x2, y2 = self._tlwh_to_xyxy(box)
 46 |             outputs.append((x1, y1, x2, y2, track.cls_, track.track_id))
 47 |         return outputs
 48 | 
 49 |     @staticmethod
 50 |     def _xywh_to_tlwh(bbox_xywh):
 51 |         if isinstance(bbox_xywh, np.ndarray):
 52 |             bbox_tlwh = bbox_xywh.copy()
 53 |         elif isinstance(bbox_xywh, torch.Tensor):
 54 |             bbox_tlwh = bbox_xywh.clone()
 55 |         bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2]/2.
 56 |         bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3]/2.
 57 |         return bbox_tlwh
 58 | 
 59 |     def _xywh_to_xyxy(self, bbox_xywh):
 60 |         x, y, w, h = bbox_xywh
 61 |         x1 = max(int(x-w/2), 0)
 62 |         x2 = min(int(x+w/2), self.width-1)
 63 |         y1 = max(int(y-h/2), 0)
 64 |         y2 = min(int(y+h/2), self.height-1)
 65 |         return x1, y1, x2, y2
 66 | 
 67 |     def _tlwh_to_xyxy(self, bbox_tlwh):
 68 |         """
 69 |         TODO:
 70 |             Convert bbox from xtl_ytl_w_h to xc_yc_w_h
 71 |         Thanks JieChen91@github.com for reporting this bug!
 72 |         """
 73 |         x, y, w, h = bbox_tlwh
 74 |         x1 = max(int(x), 0)
 75 |         x2 = min(int(x+w), self.width-1)
 76 |         y1 = max(int(y), 0)
 77 |         y2 = min(int(y+h), self.height-1)
 78 |         return x1, y1, x2, y2
 79 | 
 80 |     def _xyxy_to_tlwh(self, bbox_xyxy):
 81 |         x1, y1, x2, y2 = bbox_xyxy
 82 | 
 83 |         t = x1
 84 |         l = y1
 85 |         w = int(x2-x1)
 86 |         h = int(y2-y1)
 87 |         return t, l, w, h
 88 | 
 89 |     def _get_features(self, bbox_xywh, ori_img):
 90 |         im_crops = []
 91 |         for box in bbox_xywh:
 92 |             x1, y1, x2, y2 = self._xywh_to_xyxy(box)
 93 |             im = ori_img[y1:y2, x1:x2]
 94 |             im_crops.append(im)
 95 |         if im_crops:
 96 |             features = self.extractor(im_crops)
 97 |         else:
 98 |             features = np.array([])
 99 |         return features
100 | 


--------------------------------------------------------------------------------
/deep_sort/deep_sort/sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sharpiless/yolox-deepsort/4bb35dd50b2112026bdda34078e65de22afb5a8b/deep_sort/deep_sort/sort/__init__.py


--------------------------------------------------------------------------------
/deep_sort/deep_sort/sort/detection.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Detection(object):
 6 | 
 7 |     def __init__(self, tlwh, cls_, confidence, feature):
 8 |         self.tlwh = np.asarray(tlwh, dtype=np.float)
 9 |         self.cls_ = cls_
10 |         self.confidence = float(confidence)
11 |         self.feature = np.asarray(feature, dtype=np.float32)
12 | 
13 |     def to_tlbr(self):
14 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
15 |         `(top left, bottom right)`.
16 |         """
17 |         ret = self.tlwh.copy()
18 |         ret[2:] += ret[:2]
19 |         return ret
20 | 
21 |     def to_xyah(self):
22 |         """Convert bounding box to format `(center x, center y, aspect ratio,
23 |         height)`, where the aspect ratio is `width / height`.
24 |         """
25 |         ret = self.tlwh.copy()
26 |         ret[:2] += ret[2:] / 2
27 |         ret[2] /= ret[3]
28 |         return ret
29 | 


--------------------------------------------------------------------------------
/deep_sort/deep_sort/sort/iou_matching.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | from __future__ import absolute_import
 3 | import numpy as np
 4 | from . import linear_assignment
 5 | 
 6 | 
 7 | def iou(bbox, candidates):
 8 |     """Computer intersection over union.
 9 | 
10 |     Parameters
11 |     ----------
12 |     bbox : ndarray
13 |         A bounding box in format `(top left x, top left y, width, height)`.
14 |     candidates : ndarray
15 |         A matrix of candidate bounding boxes (one per row) in the same format
16 |         as `bbox`.
17 | 
18 |     Returns
19 |     -------
20 |     ndarray
21 |         The intersection over union in [0, 1] between the `bbox` and each
22 |         candidate. A higher score means a larger fraction of the `bbox` is
23 |         occluded by the candidate.
24 | 
25 |     """
26 |     bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27 |     candidates_tl = candidates[:, :2]
28 |     candidates_br = candidates[:, :2] + candidates[:, 2:]
29 | 
30 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34 |     wh = np.maximum(0., br - tl)
35 | 
36 |     area_intersection = wh.prod(axis=1)
37 |     area_bbox = bbox[2:].prod()
38 |     area_candidates = candidates[:, 2:].prod(axis=1)
39 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
40 | 
41 | 
42 | def iou_cost(tracks, detections, track_indices=None,
43 |              detection_indices=None):
44 |     """An intersection over union distance metric.
45 | 
46 |     Parameters
47 |     ----------
48 |     tracks : List[deep_sort.track.Track]
49 |         A list of tracks.
50 |     detections : List[deep_sort.detection.Detection]
51 |         A list of detections.
52 |     track_indices : Optional[List[int]]
53 |         A list of indices to tracks that should be matched. Defaults to
54 |         all `tracks`.
55 |     detection_indices : Optional[List[int]]
56 |         A list of indices to detections that should be matched. Defaults
57 |         to all `detections`.
58 | 
59 |     Returns
60 |     -------
61 |     ndarray
62 |         Returns a cost matrix of shape
63 |         len(track_indices), len(detection_indices) where entry (i, j) is
64 |         `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65 | 
66 |     """
67 |     if track_indices is None:
68 |         track_indices = np.arange(len(tracks))
69 |     if detection_indices is None:
70 |         detection_indices = np.arange(len(detections))
71 | 
72 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73 |     for row, track_idx in enumerate(track_indices):
74 |         if tracks[track_idx].time_since_update > 1:
75 |             cost_matrix[row, :] = linear_assignment.INFTY_COST
76 |             continue
77 | 
78 |         bbox = tracks[track_idx].to_tlwh()
79 |         candidates = np.asarray([detections[i].tlwh for i in detection_indices])
80 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
81 |     return cost_matrix
82 | 


--------------------------------------------------------------------------------
/deep_sort/deep_sort/sort/preprocessing.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | 
 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None):
 7 |     """Suppress overlapping detections.
 8 | 
 9 |     Original code from [1]_ has been adapted to include confidence score.
10 | 
11 |     .. [1] http://www.pyimagesearch.com/2015/02/16/
12 |            faster-non-maximum-suppression-python/
13 | 
14 |     Examples
15 |     --------
16 | 
17 |         >>> boxes = [d.roi for d in detections]
18 |         >>> scores = [d.confidence for d in detections]
19 |         >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20 |         >>> detections = [detections[i] for i in indices]
21 | 
22 |     Parameters
23 |     ----------
24 |     boxes : ndarray
25 |         Array of ROIs (x, y, width, height).
26 |     max_bbox_overlap : float
27 |         ROIs that overlap more than this values are suppressed.
28 |     scores : Optional[array_like]
29 |         Detector confidence score.
30 | 
31 |     Returns
32 |     -------
33 |     List[int]
34 |         Returns indices of detections that have survived non-maxima suppression.
35 | 
36 |     """
37 |     if len(boxes) == 0:
38 |         return []
39 | 
40 |     boxes = boxes.astype(np.float)
41 |     pick = []
42 | 
43 |     x1 = boxes[:, 0]
44 |     y1 = boxes[:, 1]
45 |     x2 = boxes[:, 2] + boxes[:, 0]
46 |     y2 = boxes[:, 3] + boxes[:, 1]
47 | 
48 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
49 |     if scores is not None:
50 |         idxs = np.argsort(scores)
51 |     else:
52 |         idxs = np.argsort(y2)
53 | 
54 |     while len(idxs) > 0:
55 |         last = len(idxs) - 1
56 |         i = idxs[last]
57 |         pick.append(i)
58 | 
59 |         xx1 = np.maximum(x1[i], x1[idxs[:last]])
60 |         yy1 = np.maximum(y1[i], y1[idxs[:last]])
61 |         xx2 = np.minimum(x2[i], x2[idxs[:last]])
62 |         yy2 = np.minimum(y2[i], y2[idxs[:last]])
63 | 
64 |         w = np.maximum(0, xx2 - xx1 + 1)
65 |         h = np.maximum(0, yy2 - yy1 + 1)
66 | 
67 |         overlap = (w * h) / area[idxs[:last]]
68 | 
69 |         idxs = np.delete(
70 |             idxs, np.concatenate(
71 |                 ([last], np.where(overlap > max_bbox_overlap)[0])))
72 | 
73 |     return pick
74 | 


--------------------------------------------------------------------------------
/deep_sort/deep_sort/sort/track.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | 
  3 | 
  4 | class TrackState:
  5 |     """
  6 |     Enumeration type for the single target track state. Newly created tracks are
  7 |     classified as `tentative` until enough evidence has been collected. Then,
  8 |     the track state is changed to `confirmed`. Tracks that are no longer alive
  9 |     are classified as `deleted` to mark them for removal from the set of active
 10 |     tracks.
 11 | 
 12 |     """
 13 | 
 14 |     Tentative = 1
 15 |     Confirmed = 2
 16 |     Deleted = 3
 17 | 
 18 | 
 19 | class Track:
 20 |     """
 21 |     A single target track with state space `(x, y, a, h)` and associated
 22 |     velocities, where `(x, y)` is the center of the bounding box, `a` is the
 23 |     aspect ratio and `h` is the height.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     mean : ndarray
 28 |         Mean vector of the initial state distribution.
 29 |     covariance : ndarray
 30 |         Covariance matrix of the initial state distribution.
 31 |     track_id : int
 32 |         A unique track identifier.
 33 |     n_init : int
 34 |         Number of consecutive detections before the track is confirmed. The
 35 |         track state is set to `Deleted` if a miss occurs within the first
 36 |         `n_init` frames.
 37 |     max_age : int
 38 |         The maximum number of consecutive misses before the track state is
 39 |         set to `Deleted`.
 40 |     feature : Optional[ndarray]
 41 |         Feature vector of the detection this track originates from. If not None,
 42 |         this feature is added to the `features` cache.
 43 | 
 44 |     Attributes
 45 |     ----------
 46 |     mean : ndarray
 47 |         Mean vector of the initial state distribution.
 48 |     covariance : ndarray
 49 |         Covariance matrix of the initial state distribution.
 50 |     track_id : int
 51 |         A unique track identifier.
 52 |     hits : int
 53 |         Total number of measurement updates.
 54 |     age : int
 55 |         Total number of frames since first occurance.
 56 |     time_since_update : int
 57 |         Total number of frames since last measurement update.
 58 |     state : TrackState
 59 |         The current track state.
 60 |     features : List[ndarray]
 61 |         A cache of features. On each measurement update, the associated feature
 62 |         vector is added to this list.
 63 | 
 64 |     """
 65 | 
 66 |     def __init__(self, mean, cls_, covariance, track_id, n_init, max_age,
 67 |                  feature=None):
 68 |         self.mean = mean
 69 |         self.cls_ = cls_
 70 |         self.covariance = covariance
 71 |         self.track_id = track_id
 72 |         self.hits = 1
 73 |         self.age = 1
 74 |         self.time_since_update = 0
 75 | 
 76 |         self.state = TrackState.Tentative
 77 |         self.features = []
 78 |         if feature is not None:
 79 |             self.features.append(feature)
 80 | 
 81 |         self._n_init = n_init
 82 |         self._max_age = max_age
 83 | 
 84 |     def to_tlwh(self):
 85 |         """Get current position in bounding box format `(top left x, top left y,
 86 |         width, height)`.
 87 | 
 88 |         Returns
 89 |         -------
 90 |         ndarray
 91 |             The bounding box.
 92 | 
 93 |         """
 94 |         ret = self.mean[:4].copy()
 95 |         ret[2] *= ret[3]
 96 |         ret[:2] -= ret[2:] / 2
 97 |         return ret
 98 | 
 99 |     def to_tlbr(self):
100 |         """Get current position in bounding box format `(min x, miny, max x,
101 |         max y)`.
102 | 
103 |         Returns
104 |         -------
105 |         ndarray
106 |             The bounding box.
107 | 
108 |         """
109 |         ret = self.to_tlwh()
110 |         ret[2:] = ret[:2] + ret[2:]
111 |         return ret
112 | 
113 |     def predict(self, kf):
114 |         """Propagate the state distribution to the current time step using a
115 |         Kalman filter prediction step.
116 | 
117 |         Parameters
118 |         ----------
119 |         kf : kalman_filter.KalmanFilter
120 |             The Kalman filter.
121 | 
122 |         """
123 |         self.mean, self.covariance = kf.predict(self.mean, self.covariance)
124 |         self.age += 1
125 |         self.time_since_update += 1
126 | 
127 |     def update(self, kf, detection):
128 |         """Perform Kalman filter measurement update step and update the feature
129 |         cache.
130 | 
131 |         Parameters
132 |         ----------
133 |         kf : kalman_filter.KalmanFilter
134 |             The Kalman filter.
135 |         detection : Detection
136 |             The associated detection.
137 | 
138 |         """
139 |         self.mean, self.covariance = kf.update(
140 |             self.mean, self.covariance, detection.to_xyah())
141 |         self.features.append(detection.feature)
142 |         self.cls_ = detection.cls_
143 | 
144 |         self.hits += 1
145 |         self.time_since_update = 0
146 |         if self.state == TrackState.Tentative and self.hits >= self._n_init:
147 |             self.state = TrackState.Confirmed
148 | 
149 |     def mark_missed(self):
150 |         """Mark this track as missed (no association at the current time step).
151 |         """
152 |         if self.state == TrackState.Tentative:
153 |             self.state = TrackState.Deleted
154 |         elif self.time_since_update > self._max_age:
155 |             self.state = TrackState.Deleted
156 | 
157 |     def is_tentative(self):
158 |         """Returns True if this track is tentative (unconfirmed).
159 |         """
160 |         return self.state == TrackState.Tentative
161 | 
162 |     def is_confirmed(self):
163 |         """Returns True if this track is confirmed."""
164 |         return self.state == TrackState.Confirmed
165 | 
166 |     def is_deleted(self):
167 |         """Returns True if this track is dead and should be deleted."""
168 |         return self.state == TrackState.Deleted
169 | 


--------------------------------------------------------------------------------
/deep_sort/deep_sort/sort/tracker.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | from . import kalman_filter
  5 | from . import linear_assignment
  6 | from . import iou_matching
  7 | from .track import Track
  8 | 
  9 | 
 10 | class Tracker:
 11 | 
 12 |     def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3):
 13 |         self.metric = metric
 14 |         self.max_iou_distance = max_iou_distance
 15 |         self.max_age = max_age
 16 |         self.n_init = n_init
 17 | 
 18 |         self.kf = kalman_filter.KalmanFilter()
 19 |         self.tracks = []
 20 |         self._next_id = 1
 21 | 
 22 |     def predict(self):
 23 |         """Propagate track state distributions one time step forward.
 24 | 
 25 |         This function should be called once every time step, before `update`.
 26 |         """
 27 |         for track in self.tracks:
 28 |             track.predict(self.kf)
 29 | 
 30 |     def update(self, detections):
 31 |         """Perform measurement update and track management.
 32 | 
 33 |         Parameters
 34 |         ----------
 35 |         detections : List[deep_sort.detection.Detection]
 36 |             A list of detections at the current time step.
 37 | 
 38 |         """
 39 |         # Run matching cascade.
 40 |         matches, unmatched_tracks, unmatched_detections = \
 41 |             self._match(detections)
 42 | 
 43 |         # Update track set.
 44 |         for track_idx, detection_idx in matches:
 45 |             self.tracks[track_idx].update(
 46 |                 self.kf, detections[detection_idx])
 47 |         for track_idx in unmatched_tracks:
 48 |             self.tracks[track_idx].mark_missed()
 49 |         for detection_idx in unmatched_detections:
 50 |             self._initiate_track(detections[detection_idx])
 51 |         self.tracks = [t for t in self.tracks if not t.is_deleted()]
 52 | 
 53 |         # Update distance metric.
 54 |         active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
 55 |         features, targets = [], []
 56 |         for track in self.tracks:
 57 |             if not track.is_confirmed():
 58 |                 continue
 59 |             features += track.features
 60 |             targets += [track.track_id for _ in track.features]
 61 |             track.features = []
 62 |         self.metric.partial_fit(
 63 |             np.asarray(features), np.asarray(targets), active_targets)
 64 | 
 65 |     def _match(self, detections):
 66 | 
 67 |         def gated_metric(tracks, dets, track_indices, detection_indices):
 68 |             features = np.array([dets[i].feature for i in detection_indices])
 69 |             targets = np.array([tracks[i].track_id for i in track_indices])
 70 |             cost_matrix = self.metric.distance(features, targets)
 71 |             cost_matrix = linear_assignment.gate_cost_matrix(
 72 |                 self.kf, cost_matrix, tracks, dets, track_indices,
 73 |                 detection_indices)
 74 | 
 75 |             return cost_matrix
 76 | 
 77 |         # Split track set into confirmed and unconfirmed tracks.
 78 |         confirmed_tracks = [
 79 |             i for i, t in enumerate(self.tracks) if t.is_confirmed()]
 80 |         unconfirmed_tracks = [
 81 |             i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
 82 | 
 83 |         # Associate confirmed tracks using appearance features.
 84 |         matches_a, unmatched_tracks_a, unmatched_detections = \
 85 |             linear_assignment.matching_cascade(
 86 |                 gated_metric, self.metric.matching_threshold, self.max_age,
 87 |                 self.tracks, detections, confirmed_tracks)
 88 | 
 89 |         # Associate remaining tracks together with unconfirmed tracks using IOU.
 90 |         iou_track_candidates = unconfirmed_tracks + [
 91 |             k for k in unmatched_tracks_a if
 92 |             self.tracks[k].time_since_update == 1]
 93 |         unmatched_tracks_a = [
 94 |             k for k in unmatched_tracks_a if
 95 |             self.tracks[k].time_since_update != 1]
 96 |         matches_b, unmatched_tracks_b, unmatched_detections = \
 97 |             linear_assignment.min_cost_matching(
 98 |                 iou_matching.iou_cost, self.max_iou_distance, self.tracks,
 99 |                 detections, iou_track_candidates, unmatched_detections)
100 |         matches = matches_a + matches_b
101 |         unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
102 |         return matches, unmatched_tracks, unmatched_detections
103 | 
104 |     def _initiate_track(self, detection):
105 |         mean, covariance = self.kf.initiate(detection.to_xyah())
106 |         self.tracks.append(Track(
107 |             mean, detection.cls_, covariance, self._next_id, self.n_init, self.max_age,
108 |             detection.feature))
109 |         self._next_id += 1
110 | 


--------------------------------------------------------------------------------
/deep_sort/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sharpiless/yolox-deepsort/4bb35dd50b2112026bdda34078e65de22afb5a8b/deep_sort/utils/__init__.py


--------------------------------------------------------------------------------
/deep_sort/utils/asserts.py:
--------------------------------------------------------------------------------
 1 | from os import environ
 2 | 
 3 | 
 4 | def assert_in(file, files_to_check):
 5 |     if file not in files_to_check:
 6 |         raise AssertionError("{} does not exist in the list".format(str(file)))
 7 |     return True
 8 | 
 9 | 
10 | def assert_in_env(check_list: list):
11 |     for item in check_list:
12 |         assert_in(item, environ.keys())
13 |     return True
14 | 


--------------------------------------------------------------------------------
/deep_sort/utils/draw.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
 5 | 
 6 | 
 7 | def compute_color_for_labels(label):
 8 |     """
 9 |     Simple function that adds fixed color depending on the class
10 |     """
11 |     color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
12 |     return tuple(color)
13 | 
14 | 
15 | def draw_boxes(img, bbox, identities=None, offset=(0,0)):
16 |     for i,box in enumerate(bbox):
17 |         x1,y1,x2,y2 = [int(i) for i in box]
18 |         x1 += offset[0]
19 |         x2 += offset[0]
20 |         y1 += offset[1]
21 |         y2 += offset[1]
22 |         # box text and bar
23 |         id = int(identities[i]) if identities is not None else 0    
24 |         color = compute_color_for_labels(id)
25 |         label = '{}{:d}'.format("", id)
26 |         t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
27 |         cv2.rectangle(img,(x1, y1),(x2,y2),color,3)
28 |         cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
29 |         cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)
30 |     return img
31 | 
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     for i in range(82):
36 |         print(compute_color_for_labels(i))
37 | 


--------------------------------------------------------------------------------
/deep_sort/utils/evaluation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import copy
  4 | import motmetrics as mm
  5 | mm.lap.default_solver = 'lap'
  6 | from utils.io import read_results, unzip_objs
  7 | 
  8 | 
  9 | class Evaluator(object):
 10 | 
 11 |     def __init__(self, data_root, seq_name, data_type):
 12 |         self.data_root = data_root
 13 |         self.seq_name = seq_name
 14 |         self.data_type = data_type
 15 | 
 16 |         self.load_annotations()
 17 |         self.reset_accumulator()
 18 | 
 19 |     def load_annotations(self):
 20 |         assert self.data_type == 'mot'
 21 | 
 22 |         gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
 23 |         self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
 24 |         self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
 25 | 
 26 |     def reset_accumulator(self):
 27 |         self.acc = mm.MOTAccumulator(auto_id=True)
 28 | 
 29 |     def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
 30 |         # results
 31 |         trk_tlwhs = np.copy(trk_tlwhs)
 32 |         trk_ids = np.copy(trk_ids)
 33 | 
 34 |         # gts
 35 |         gt_objs = self.gt_frame_dict.get(frame_id, [])
 36 |         gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
 37 | 
 38 |         # ignore boxes
 39 |         ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
 40 |         ignore_tlwhs = unzip_objs(ignore_objs)[0]
 41 | 
 42 | 
 43 |         # remove ignored results
 44 |         keep = np.ones(len(trk_tlwhs), dtype=bool)
 45 |         iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
 46 |         if len(iou_distance) > 0:
 47 |             match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
 48 |             match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
 49 |             match_ious = iou_distance[match_is, match_js]
 50 | 
 51 |             match_js = np.asarray(match_js, dtype=int)
 52 |             match_js = match_js[np.logical_not(np.isnan(match_ious))]
 53 |             keep[match_js] = False
 54 |             trk_tlwhs = trk_tlwhs[keep]
 55 |             trk_ids = trk_ids[keep]
 56 | 
 57 |         # get distance matrix
 58 |         iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
 59 | 
 60 |         # acc
 61 |         self.acc.update(gt_ids, trk_ids, iou_distance)
 62 | 
 63 |         if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
 64 |             events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
 65 |         else:
 66 |             events = None
 67 |         return events
 68 | 
 69 |     def eval_file(self, filename):
 70 |         self.reset_accumulator()
 71 | 
 72 |         result_frame_dict = read_results(filename, self.data_type, is_gt=False)
 73 |         frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
 74 |         for frame_id in frames:
 75 |             trk_objs = result_frame_dict.get(frame_id, [])
 76 |             trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
 77 |             self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
 78 | 
 79 |         return self.acc
 80 | 
 81 |     @staticmethod
 82 |     def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
 83 |         names = copy.deepcopy(names)
 84 |         if metrics is None:
 85 |             metrics = mm.metrics.motchallenge_metrics
 86 |         metrics = copy.deepcopy(metrics)
 87 | 
 88 |         mh = mm.metrics.create()
 89 |         summary = mh.compute_many(
 90 |             accs,
 91 |             metrics=metrics,
 92 |             names=names,
 93 |             generate_overall=True
 94 |         )
 95 | 
 96 |         return summary
 97 | 
 98 |     @staticmethod
 99 |     def save_summary(summary, filename):
100 |         import pandas as pd
101 |         writer = pd.ExcelWriter(filename)
102 |         summary.to_excel(writer)
103 |         writer.save()
104 | 


--------------------------------------------------------------------------------
/deep_sort/utils/io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict
  3 | import numpy as np
  4 | 
  5 | # from utils.log import get_logger
  6 | 
  7 | 
  8 | def write_results(filename, results, data_type):
  9 |     if data_type == 'mot':
 10 |         save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n'
 11 |     elif data_type == 'kitti':
 12 |         save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
 13 |     else:
 14 |         raise ValueError(data_type)
 15 | 
 16 |     with open(filename, 'w') as f:
 17 |         for frame_id, tlwhs, track_ids in results:
 18 |             if data_type == 'kitti':
 19 |                 frame_id -= 1
 20 |             for tlwh, track_id in zip(tlwhs, track_ids):
 21 |                 if track_id < 0:
 22 |                     continue
 23 |                 x1, y1, w, h = tlwh
 24 |                 x2, y2 = x1 + w, y1 + h
 25 |                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
 26 |                 f.write(line)
 27 | 
 28 | 
 29 | # def write_results(filename, results_dict: Dict, data_type: str):
 30 | #     if not filename:
 31 | #         return
 32 | #     path = os.path.dirname(filename)
 33 | #     if not os.path.exists(path):
 34 | #         os.makedirs(path)
 35 | 
 36 | #     if data_type in ('mot', 'mcmot', 'lab'):
 37 | #         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
 38 | #     elif data_type == 'kitti':
 39 | #         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
 40 | #     else:
 41 | #         raise ValueError(data_type)
 42 | 
 43 | #     with open(filename, 'w') as f:
 44 | #         for frame_id, frame_data in results_dict.items():
 45 | #             if data_type == 'kitti':
 46 | #                 frame_id -= 1
 47 | #             for tlwh, track_id in frame_data:
 48 | #                 if track_id < 0:
 49 | #                     continue
 50 | #                 x1, y1, w, h = tlwh
 51 | #                 x2, y2 = x1 + w, y1 + h
 52 | #                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
 53 | #                 f.write(line)
 54 | #     logger.info('Save results to {}'.format(filename))
 55 | 
 56 | 
 57 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
 58 |     if data_type in ('mot', 'lab'):
 59 |         read_fun = read_mot_results
 60 |     else:
 61 |         raise ValueError('Unknown data type: {}'.format(data_type))
 62 | 
 63 |     return read_fun(filename, is_gt, is_ignore)
 64 | 
 65 | 
 66 | """
 67 | labels={'ped', ...			% 1
 68 | 'person_on_vhcl', ...	% 2
 69 | 'car', ...				% 3
 70 | 'bicycle', ...			% 4
 71 | 'mbike', ...			% 5
 72 | 'non_mot_vhcl', ...		% 6
 73 | 'static_person', ...	% 7
 74 | 'distractor', ...		% 8
 75 | 'occluder', ...			% 9
 76 | 'occluder_on_grnd', ...		%10
 77 | 'occluder_full', ...		% 11
 78 | 'reflection', ...		% 12
 79 | 'crowd' ...			% 13
 80 | };
 81 | """
 82 | 
 83 | 
 84 | def read_mot_results(filename, is_gt, is_ignore):
 85 |     valid_labels = {1}
 86 |     ignore_labels = {2, 7, 8, 12}
 87 |     results_dict = dict()
 88 |     if os.path.isfile(filename):
 89 |         with open(filename, 'r') as f:
 90 |             for line in f.readlines():
 91 |                 linelist = line.split(',')
 92 |                 if len(linelist) < 7:
 93 |                     continue
 94 |                 fid = int(linelist[0])
 95 |                 if fid < 1:
 96 |                     continue
 97 |                 results_dict.setdefault(fid, list())
 98 | 
 99 |                 if is_gt:
100 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
101 |                         label = int(float(linelist[7]))
102 |                         mark = int(float(linelist[6]))
103 |                         if mark == 0 or label not in valid_labels:
104 |                             continue
105 |                     score = 1
106 |                 elif is_ignore:
107 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
108 |                         label = int(float(linelist[7]))
109 |                         vis_ratio = float(linelist[8])
110 |                         if label not in ignore_labels and vis_ratio >= 0:
111 |                             continue
112 |                     else:
113 |                         continue
114 |                     score = 1
115 |                 else:
116 |                     score = float(linelist[6])
117 | 
118 |                 tlwh = tuple(map(float, linelist[2:6]))
119 |                 target_id = int(linelist[1])
120 | 
121 |                 results_dict[fid].append((tlwh, target_id, score))
122 | 
123 |     return results_dict
124 | 
125 | 
126 | def unzip_objs(objs):
127 |     if len(objs) > 0:
128 |         tlwhs, ids, scores = zip(*objs)
129 |     else:
130 |         tlwhs, ids, scores = [], [], []
131 |     tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
132 | 
133 |     return tlwhs, ids, scores


--------------------------------------------------------------------------------
/deep_sort/utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def get_logger(name='root'):
 5 |     formatter = logging.Formatter(
 6 |         # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
 7 |         fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
 8 | 
 9 |     handler = logging.StreamHandler()
10 |     handler.setFormatter(formatter)
11 | 
12 |     logger = logging.getLogger(name)
13 |     logger.setLevel(logging.INFO)
14 |     logger.addHandler(handler)
15 |     return logger
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/deep_sort/utils/parser.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | from easydict import EasyDict as edict
 4 | 
 5 | class YamlParser(edict):
 6 |     """
 7 |     This is yaml parser based on EasyDict.
 8 |     """
 9 |     def __init__(self, cfg_dict=None, config_file=None):
10 |         if cfg_dict is None:
11 |             cfg_dict = {}
12 | 
13 |         if config_file is not None:
14 |             assert(os.path.isfile(config_file))
15 |             with open(config_file, 'r') as fo:
16 |                 cfg_dict.update(yaml.load(fo.read()))
17 | 
18 |         super(YamlParser, self).__init__(cfg_dict)
19 | 
20 |     
21 |     def merge_from_file(self, config_file):
22 |         with open(config_file, 'r') as fo:
23 |             self.update(yaml.load(fo.read()))
24 | 
25 |     
26 |     def merge_from_dict(self, config_dict):
27 |         self.update(config_dict)
28 | 
29 | 
30 | def get_config(config_file=None):
31 |     return YamlParser(config_file=config_file)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     cfg = YamlParser(config_file="../configs/yolov3.yaml")
36 |     cfg.merge_from_file("../configs/deep_sort.yaml")
37 | 
38 |     import ipdb; ipdb.set_trace()


--------------------------------------------------------------------------------
/deep_sort/utils/tools.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | from time import time
 3 | 
 4 | 
 5 | def is_video(ext: str):
 6 |     """
 7 |     Returns true if ext exists in
 8 |     allowed_exts for video files.
 9 | 
10 |     Args:
11 |         ext:
12 | 
13 |     Returns:
14 | 
15 |     """
16 | 
17 |     allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp')
18 |     return any((ext.endswith(x) for x in allowed_exts))
19 | 
20 | 
21 | def tik_tok(func):
22 |     """
23 |     keep track of time for each process.
24 |     Args:
25 |         func:
26 | 
27 |     Returns:
28 | 
29 |     """
30 |     @wraps(func)
31 |     def _time_it(*args, **kwargs):
32 |         start = time()
33 |         try:
34 |             return func(*args, **kwargs)
35 |         finally:
36 |             end_ = time()
37 |             print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start)))
38 | 
39 |     return _time_it
40 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | from AIDetector_pytorch import Detector
 2 | import imutils
 3 | import cv2
 4 | 
 5 | def main():
 6 | 
 7 |     name = 'demo'
 8 | 
 9 |     det = Detector()
10 |     cap = cv2.VideoCapture('E:/视频/行人监控/test01.mp4')
11 |     fps = int(cap.get(5))
12 |     print('fps:', fps)
13 |     t = int(1000/fps)
14 | 
15 |     videoWriter = None
16 | 
17 |     while True:
18 | 
19 |         # try:
20 |         _, im = cap.read()
21 |         if im is None:
22 |             break
23 |         
24 |         result = det.feedCap(im)
25 |         result = result['frame']
26 |         result = imutils.resize(result, height=500)
27 |         if videoWriter is None:
28 |             fourcc = cv2.VideoWriter_fourcc(
29 |                 'm', 'p', '4', 'v')  # opencv3.0
30 |             videoWriter = cv2.VideoWriter(
31 |                 'result.mp4', fourcc, fps, (result.shape[1], result.shape[0]))
32 | 
33 |         videoWriter.write(result)
34 |         cv2.imshow(name, result)
35 |         cv2.waitKey(t)
36 | 
37 |         if cv2.getWindowProperty(name, cv2.WND_PROP_AUTOSIZE) < 1:
38 |             # 点x退出
39 |             break
40 |         # except Exception as e:
41 |         #     print(e)
42 |         #     break
43 | 
44 |     cap.release()
45 |     videoWriter.release()
46 |     cv2.destroyAllWindows()
47 | 
48 | if __name__ == '__main__':
49 |     
50 |     main()


--------------------------------------------------------------------------------
/demo/MegEngine/cpp/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | if [ -z $CXX ];then
 5 |     echo "please export you c++ toolchain to CXX"
 6 |     echo "for example:"
 7 |     echo "build for host:                                        export CXX=g++"
 8 |     echo "cross build for aarch64-android(always locate in NDK): export CXX=aarch64-linux-android21-clang++"
 9 |     echo "cross build for aarch64-linux:                         export CXX=aarch64-linux-gnu-g++"
10 |     exit -1
11 | fi
12 | 
13 | if [ -z $MGE_INSTALL_PATH ];then
14 |     echo "please refsi ./README.md to init MGE_INSTALL_PATH env"
15 |     exit -1
16 | fi
17 | 
18 | if [ -z $OPENCV_INSTALL_INCLUDE_PATH ];then
19 |     echo "please refs ./README.md to init OPENCV_INSTALL_INCLUDE_PATH env"
20 |     exit -1
21 | fi
22 | 
23 | if [ -z $OPENCV_INSTALL_LIB_PATH ];then
24 |     echo "please refs ./README.md to init OPENCV_INSTALL_LIB_PATH env"
25 |     exit -1
26 | fi
27 | 
28 | INCLUDE_FLAG="-I$MGE_INSTALL_PATH/include -I$OPENCV_INSTALL_INCLUDE_PATH"
29 | LINK_FLAG="-L$MGE_INSTALL_PATH/lib/ -lmegengine -L$OPENCV_INSTALL_LIB_PATH -lopencv_core -lopencv_highgui -lopencv_imgproc -lopencv_imgcodecs"
30 | BUILD_FLAG="-static-libstdc++ -O3 -pie -fPIE -g"
31 | 
32 | if [[ $CXX =~ "android" ]]; then
33 |     LINK_FLAG="${LINK_FLAG} -llog -lz"
34 | fi
35 | 
36 | echo "CXX: $CXX"
37 | echo "MGE_INSTALL_PATH: $MGE_INSTALL_PATH"
38 | echo "INCLUDE_FLAG: $INCLUDE_FLAG"
39 | echo "LINK_FLAG: $LINK_FLAG"
40 | echo "BUILD_FLAG: $BUILD_FLAG"
41 | 
42 | echo "[" > compile_commands.json
43 | echo "{" >> compile_commands.json
44 | echo "\"directory\": \"$PWD\"," >> compile_commands.json
45 | echo "\"command\": \"$CXX yolox.cpp -o yolox ${INCLUDE_FLAG} ${LINK_FLAG}\"," >> compile_commands.json
46 | echo "\"file\": \"$PWD/yolox.cpp\"," >> compile_commands.json
47 | echo "}," >> compile_commands.json
48 | echo "]" >> compile_commands.json
49 | $CXX yolox.cpp -o yolox ${INCLUDE_FLAG} ${LINK_FLAG} ${BUILD_FLAG}
50 | 
51 | echo "build success, output file: yolox"
52 | if [[ $CXX =~ "android" ]]; then
53 |     echo "try command to run:"
54 |     echo "adb push/scp $MGE_INSTALL_PATH/lib/libmegengine.so android_phone"
55 |     echo "adb push/scp $OPENCV_INSTALL_LIB_PATH/*.so android_phone"
56 |     echo "adb push/scp ./yolox yolox_s.mge android_phone"
57 |     echo "adb push/scp ../../../assets/dog.jpg android_phone"
58 |     echo "adb/ssh to android_phone, then run: LD_LIBRARY_PATH=. ./yolox yolox_s.mge dog.jpg cpu/multithread <warmup_count> <thread_number> <use_fast_run> <use_weight_preprocess>"
59 | else
60 |     echo "try command to run: LD_LIBRARY_PATH=$MGE_INSTALL_PATH/lib/:$OPENCV_INSTALL_LIB_PATH ./yolox yolox_s.mge ../../../assets/dog.jpg cuda/cpu/multithread <warmup_count> <thread_number> <use_fast_run> <use_weight_preprocess>"
61 | fi
62 | 


--------------------------------------------------------------------------------
/demo/MegEngine/python/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOX-Python-MegEngine
 2 | 
 3 | Python version of YOLOX object detection base on [MegEngine](https://github.com/MegEngine/MegEngine).
 4 | 
 5 | ## Tutorial
 6 | 
 7 | ### Step1: install requirements
 8 | 
 9 | ```
10 | python3 -m pip install megengine -f https://megengine.org.cn/whl/mge.html
11 | ```
12 | 
13 | ### Step2: convert checkpoint weights from torch's path file
14 | 
15 | ```
16 | python3 convert_weights.py -w yolox_s.pth -o yolox_s_mge.pkl
17 | ```
18 | 
19 | ### Step3: run demo
20 | 
21 | This part is the same as torch's python demo, but no need to specify device.
22 | 
23 | ```
24 | python3 demo.py image -n yolox-s -c yolox_s_mge.pkl --path ../../../assets/dog.jpg --conf 0.25 --nms 0.45 --tsize 640 --save_result
25 | ```
26 | 
27 | ###  [Optional]Step4: dump model for cpp inference
28 | 
29 | > **Note**: result model is dumped with `optimize_for_inference` and `enable_fuse_conv_bias_nonlinearity`.
30 | 
31 | ```
32 | python3 dump.py -n yolox-s -c yolox_s_mge.pkl --dump_path yolox_s.mge
33 | ```
34 | 


--------------------------------------------------------------------------------
/demo/MegEngine/python/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import megengine as mge
 5 | import megengine.module as M
 6 | from megengine import jit
 7 | 
 8 | from models.yolo_fpn import YOLOFPN
 9 | from models.yolo_head import YOLOXHead
10 | from models.yolo_pafpn import YOLOPAFPN
11 | from models.yolox import YOLOX
12 | 
13 | 
14 | def build_yolox(name="yolox-s"):
15 |     num_classes = 80
16 | 
17 |     # value meaning: depth, width
18 |     param_dict = {
19 |         "yolox-nano": (0.33, 0.25),
20 |         "yolox-tiny": (0.33, 0.375),
21 |         "yolox-s": (0.33, 0.50),
22 |         "yolox-m": (0.67, 0.75),
23 |         "yolox-l": (1.0, 1.0),
24 |         "yolox-x": (1.33, 1.25),
25 |     }
26 |     if name == "yolov3":
27 |         depth = 1.0
28 |         width = 1.0
29 |         backbone = YOLOFPN()
30 |         head = YOLOXHead(num_classes, width, in_channels=[128, 256, 512], act="lrelu")
31 |         model = YOLOX(backbone, head)
32 |     else:
33 |         assert name in param_dict
34 |         kwargs = {}
35 |         depth, width = param_dict[name]
36 |         if name == "yolox-nano":
37 |             kwargs["depthwise"] = True
38 |         in_channels = [256, 512, 1024]
39 |         backbone = YOLOPAFPN(depth, width, in_channels=in_channels, **kwargs)
40 |         head = YOLOXHead(num_classes, width, in_channels=in_channels, **kwargs)
41 |         model = YOLOX(backbone, head)
42 | 
43 |     for m in model.modules():
44 |         if isinstance(m, M.BatchNorm2d):
45 |             m.eps = 1e-3
46 | 
47 |     return model
48 | 
49 | 
50 | def build_and_load(weight_file, name="yolox-s"):
51 |     model = build_yolox(name)
52 |     model_weights = mge.load(weight_file)
53 |     model.load_state_dict(model_weights, strict=False)
54 |     return model
55 | 


--------------------------------------------------------------------------------
/demo/MegEngine/python/coco_classes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | COCO_CLASSES = (
 6 |     "person",
 7 |     "bicycle",
 8 |     "car",
 9 |     "motorcycle",
10 |     "airplane",
11 |     "bus",
12 |     "train",
13 |     "truck",
14 |     "boat",
15 |     "traffic light",
16 |     "fire hydrant",
17 |     "stop sign",
18 |     "parking meter",
19 |     "bench",
20 |     "bird",
21 |     "cat",
22 |     "dog",
23 |     "horse",
24 |     "sheep",
25 |     "cow",
26 |     "elephant",
27 |     "bear",
28 |     "zebra",
29 |     "giraffe",
30 |     "backpack",
31 |     "umbrella",
32 |     "handbag",
33 |     "tie",
34 |     "suitcase",
35 |     "frisbee",
36 |     "skis",
37 |     "snowboard",
38 |     "sports ball",
39 |     "kite",
40 |     "baseball bat",
41 |     "baseball glove",
42 |     "skateboard",
43 |     "surfboard",
44 |     "tennis racket",
45 |     "bottle",
46 |     "wine glass",
47 |     "cup",
48 |     "fork",
49 |     "knife",
50 |     "spoon",
51 |     "bowl",
52 |     "banana",
53 |     "apple",
54 |     "sandwich",
55 |     "orange",
56 |     "broccoli",
57 |     "carrot",
58 |     "hot dog",
59 |     "pizza",
60 |     "donut",
61 |     "cake",
62 |     "chair",
63 |     "couch",
64 |     "potted plant",
65 |     "bed",
66 |     "dining table",
67 |     "toilet",
68 |     "tv",
69 |     "laptop",
70 |     "mouse",
71 |     "remote",
72 |     "keyboard",
73 |     "cell phone",
74 |     "microwave",
75 |     "oven",
76 |     "toaster",
77 |     "sink",
78 |     "refrigerator",
79 |     "book",
80 |     "clock",
81 |     "vase",
82 |     "scissors",
83 |     "teddy bear",
84 |     "hair drier",
85 |     "toothbrush",
86 | )
87 | 


--------------------------------------------------------------------------------
/demo/MegEngine/python/convert_weights.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | import argparse
 4 | from collections import OrderedDict
 5 | 
 6 | import megengine as mge
 7 | import torch
 8 | 
 9 | 
10 | def make_parser():
11 |     parser = argparse.ArgumentParser()
12 |     parser.add_argument("-w", "--weights", type=str, help="path of weight file")
13 |     parser.add_argument(
14 |         "-o",
15 |         "--output",
16 |         default="weight_mge.pkl",
17 |         type=str,
18 |         help="path of weight file",
19 |     )
20 |     return parser
21 | 
22 | 
23 | def numpy_weights(weight_file):
24 |     torch_weights = torch.load(weight_file, map_location="cpu")
25 |     if "model" in torch_weights:
26 |         torch_weights = torch_weights["model"]
27 |     new_dict = OrderedDict()
28 |     for k, v in torch_weights.items():
29 |         new_dict[k] = v.cpu().numpy()
30 |     return new_dict
31 | 
32 | 
33 | def map_weights(weight_file, output_file):
34 |     torch_weights = numpy_weights(weight_file)
35 | 
36 |     new_dict = OrderedDict()
37 |     for k, v in torch_weights.items():
38 |         if "num_batches_tracked" in k:
39 |             print("drop: {}".format(k))
40 |             continue
41 |         if k.endswith("bias"):
42 |             print("bias key: {}".format(k))
43 |             v = v.reshape(1, -1, 1, 1)
44 |             new_dict[k] = v
45 |         elif "dconv" in k and "conv.weight" in k:
46 |             print("depthwise conv key: {}".format(k))
47 |             cout, cin, k1, k2 = v.shape
48 |             v = v.reshape(cout, 1, cin, k1, k2)
49 |             new_dict[k] = v
50 |         else:
51 |             new_dict[k] = v
52 | 
53 |     mge.save(new_dict, output_file)
54 |     print("save weights to {}".format(output_file))
55 | 
56 | 
57 | def main():
58 |     parser = make_parser()
59 |     args = parser.parse_args()
60 |     map_weights(args.weights, args.output)
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     main()
65 | 


--------------------------------------------------------------------------------
/demo/MegEngine/python/dump.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import argparse
 6 | 
 7 | import megengine as mge
 8 | import numpy as np
 9 | from megengine import jit
10 | 
11 | from build import build_and_load
12 | 
13 | 
14 | def make_parser():
15 |     parser = argparse.ArgumentParser("YOLOX Demo Dump")
16 |     parser.add_argument("-n", "--name", type=str, default="yolox-s", help="model name")
17 |     parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval")
18 |     parser.add_argument(
19 |         "--dump_path", default="model.mge", help="path to save the dumped model"
20 |     )
21 |     return parser
22 | 
23 | 
24 | def dump_static_graph(model, graph_name="model.mge"):
25 |     model.eval()
26 |     model.head.decode_in_inference = False
27 | 
28 |     data = mge.Tensor(np.random.random((1, 3, 640, 640)))
29 | 
30 |     @jit.trace(capture_as_const=True)
31 |     def pred_func(data):
32 |         outputs = model(data)
33 |         return outputs
34 | 
35 |     pred_func(data)
36 |     pred_func.dump(
37 |         graph_name,
38 |         arg_names=["data"],
39 |         optimize_for_inference=True,
40 |         enable_fuse_conv_bias_nonlinearity=True,
41 |     )
42 | 
43 | 
44 | def main(args):
45 |     model = build_and_load(args.ckpt, name=args.name)
46 |     dump_static_graph(model, args.dump_path)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     args = make_parser().parse_args()
51 |     main(args)
52 | 


--------------------------------------------------------------------------------
/demo/MegEngine/python/models/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | from .darknet import CSPDarknet, Darknet
 6 | from .yolo_fpn import YOLOFPN
 7 | from .yolo_head import YOLOXHead
 8 | from .yolo_pafpn import YOLOPAFPN
 9 | from .yolox import YOLOX
10 | 


--------------------------------------------------------------------------------
/demo/MegEngine/python/models/yolo_fpn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import megengine.functional as F
 6 | import megengine.module as M
 7 | 
 8 | from .darknet import Darknet
 9 | from .network_blocks import BaseConv, UpSample
10 | 
11 | 
12 | class YOLOFPN(M.Module):
13 |     """
14 |     YOLOFPN module. Darknet 53 is the default backbone of this model.
15 |     """
16 | 
17 |     def __init__(
18 |         self, depth=53, in_features=["dark3", "dark4", "dark5"],
19 |     ):
20 |         super().__init__()
21 | 
22 |         self.backbone = Darknet(depth)
23 |         self.in_features = in_features
24 | 
25 |         # out 1
26 |         self.out1_cbl = self._make_cbl(512, 256, 1)
27 |         self.out1 = self._make_embedding([256, 512], 512 + 256)
28 | 
29 |         # out 2
30 |         self.out2_cbl = self._make_cbl(256, 128, 1)
31 |         self.out2 = self._make_embedding([128, 256], 256 + 128)
32 | 
33 |         # upsample
34 |         self.upsample = UpSample(scale_factor=2, mode="bilinear")
35 | 
36 |     def _make_cbl(self, _in, _out, ks):
37 |         return BaseConv(_in, _out, ks, stride=1, act="lrelu")
38 | 
39 |     def _make_embedding(self, filters_list, in_filters):
40 |         m = M.Sequential(
41 |             *[
42 |                 self._make_cbl(in_filters, filters_list[0], 1),
43 |                 self._make_cbl(filters_list[0], filters_list[1], 3),
44 | 
45 |                 self._make_cbl(filters_list[1], filters_list[0], 1),
46 | 
47 |                 self._make_cbl(filters_list[0], filters_list[1], 3),
48 |                 self._make_cbl(filters_list[1], filters_list[0], 1),
49 |             ]
50 |         )
51 |         return m
52 | 
53 |     def forward(self, inputs):
54 |         """
55 |         Args:
56 |             inputs (Tensor): input image.
57 | 
58 |         Returns:
59 |             Tuple[Tensor]: FPN output features..
60 |         """
61 |         #  backbone
62 |         out_features = self.backbone(inputs)
63 |         x2, x1, x0 = [out_features[f] for f in self.in_features]
64 | 
65 |         #  yolo branch 1
66 |         x1_in = self.out1_cbl(x0)
67 |         x1_in = self.upsample(x1_in)
68 |         x1_in = F.concat([x1_in, x1], 1)
69 |         out_dark4 = self.out1(x1_in)
70 | 
71 |         #  yolo branch 2
72 |         x2_in = self.out2_cbl(out_dark4)
73 |         x2_in = self.upsample(x2_in)
74 |         x2_in = F.concat([x2_in, x2], 1)
75 |         out_dark3 = self.out2(x2_in)
76 | 
77 |         outputs = (out_dark3, out_dark4, x0)
78 |         return outputs
79 | 


--------------------------------------------------------------------------------
/demo/MegEngine/python/models/yolo_pafpn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- encoding: utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import megengine.module as M
  6 | import megengine.functional as F
  7 | 
  8 | from .darknet import CSPDarknet
  9 | from .network_blocks import BaseConv, CSPLayer, DWConv, UpSample
 10 | 
 11 | 
 12 | class YOLOPAFPN(M.Module):
 13 |     """
 14 |     YOLOv3 model. Darknet 53 is the default backbone of this model.
 15 |     """
 16 | 
 17 |     def __init__(
 18 |         self, depth=1.0, width=1.0, in_features=("dark3", "dark4", "dark5"),
 19 |         in_channels=[256, 512, 1024], depthwise=False, act="silu",
 20 |     ):
 21 |         super().__init__()
 22 |         self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)
 23 |         self.in_features = in_features
 24 |         self.in_channels = in_channels
 25 |         Conv = DWConv if depthwise else BaseConv
 26 | 
 27 |         self.upsample = UpSample(scale_factor=2, mode="bilinear")
 28 |         self.lateral_conv0 = BaseConv(
 29 |             int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act
 30 |         )
 31 |         self.C3_p4 = CSPLayer(
 32 |             int(2 * in_channels[1] * width),
 33 |             int(in_channels[1] * width),
 34 |             round(3 * depth),
 35 |             False,
 36 |             depthwise=depthwise,
 37 |             act=act,
 38 |         )  # cat
 39 | 
 40 |         self.reduce_conv1 = BaseConv(
 41 |             int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act
 42 |         )
 43 |         self.C3_p3 = CSPLayer(
 44 |             int(2 * in_channels[0] * width),
 45 |             int(in_channels[0] * width),
 46 |             round(3 * depth),
 47 |             False,
 48 |             depthwise=depthwise,
 49 |             act=act,
 50 |         )
 51 | 
 52 |         # bottom-up conv
 53 |         self.bu_conv2 = Conv(
 54 |             int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act
 55 |         )
 56 |         self.C3_n3 = CSPLayer(
 57 |             int(2 * in_channels[0] * width),
 58 |             int(in_channels[1] * width),
 59 |             round(3 * depth),
 60 |             False,
 61 |             depthwise=depthwise,
 62 |             act=act,
 63 |         )
 64 | 
 65 |         # bottom-up conv
 66 |         self.bu_conv1 = Conv(
 67 |             int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act
 68 |         )
 69 |         self.C3_n4 = CSPLayer(
 70 |             int(2 * in_channels[1] * width),
 71 |             int(in_channels[2] * width),
 72 |             round(3 * depth),
 73 |             False,
 74 |             depthwise=depthwise,
 75 |             act=act,
 76 |         )
 77 | 
 78 |     def forward(self, input):
 79 |         """
 80 |         Args:
 81 |             inputs: input images.
 82 | 
 83 |         Returns:
 84 |             Tuple[Tensor]: FPN feature.
 85 |         """
 86 | 
 87 |         #  backbone
 88 |         out_features = self.backbone(input)
 89 |         features = [out_features[f] for f in self.in_features]
 90 |         [x2, x1, x0] = features
 91 | 
 92 |         fpn_out0 = self.lateral_conv0(x0)  # 1024->512/32
 93 |         f_out0 = self.upsample(fpn_out0)  # 512/16
 94 |         f_out0 = F.concat([f_out0, x1], 1)  # 512->1024/16
 95 |         f_out0 = self.C3_p4(f_out0)  # 1024->512/16
 96 | 
 97 |         fpn_out1 = self.reduce_conv1(f_out0)  # 512->256/16
 98 |         f_out1 = self.upsample(fpn_out1)  # 256/8
 99 |         f_out1 = F.concat([f_out1, x2], 1)  # 256->512/8
100 |         pan_out2 = self.C3_p3(f_out1)  # 512->256/8
101 | 
102 |         p_out1 = self.bu_conv2(pan_out2)  # 256->256/16
103 |         p_out1 = F.concat([p_out1, fpn_out1], 1)  # 256->512/16
104 |         pan_out1 = self.C3_n3(p_out1)  # 512->512/16
105 | 
106 |         p_out0 = self.bu_conv1(pan_out1)  # 512->512/32
107 |         p_out0 = F.concat([p_out0, fpn_out0], 1)  # 512->1024/32
108 |         pan_out0 = self.C3_n4(p_out0)  # 1024->1024/32
109 | 
110 |         outputs = (pan_out2, pan_out1, pan_out0)
111 |         return outputs
112 | 


--------------------------------------------------------------------------------
/demo/MegEngine/python/models/yolox.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import megengine.module as M
 6 | 
 7 | from .yolo_head import YOLOXHead
 8 | from .yolo_pafpn import YOLOPAFPN
 9 | 
10 | 
11 | class YOLOX(M.Module):
12 |     """
13 |     YOLOX model module. The module list is defined by create_yolov3_modules function.
14 |     The network returns loss values from three YOLO layers during training
15 |     and detection results during test.
16 |     """
17 | 
18 |     def __init__(self, backbone=None, head=None):
19 |         super().__init__()
20 |         if backbone is None:
21 |             backbone = YOLOPAFPN()
22 |         if head is None:
23 |             head = YOLOXHead(80)
24 | 
25 |         self.backbone = backbone
26 |         self.head = head
27 | 
28 |     def forward(self, x):
29 |         # fpn output content features of [dark3, dark4, dark5]
30 |         fpn_outs = self.backbone(x)
31 |         assert not self.training
32 |         outputs = self.head(fpn_outs)
33 | 
34 |         return outputs
35 | 


--------------------------------------------------------------------------------
/demo/MegEngine/python/process.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import cv2
 6 | import megengine.functional as F
 7 | import numpy as np
 8 | 
 9 | __all__ = [
10 |     "preprocess",
11 |     "postprocess",
12 | ]
13 | 
14 | 
15 | def preprocess(image, input_size, mean, std, swap=(2, 0, 1)):
16 |     if len(image.shape) == 3:
17 |         padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0
18 |     else:
19 |         padded_img = np.ones(input_size) * 114.0
20 |     img = np.array(image)
21 |     r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
22 |     resized_img = cv2.resize(
23 |         img,
24 |         (int(img.shape[1] * r), int(img.shape[0] * r)),
25 |         interpolation=cv2.INTER_LINEAR,
26 |     ).astype(np.float32)
27 |     padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
28 |     image = padded_img
29 | 
30 |     image = image.astype(np.float32)
31 |     image = image[:, :, ::-1]
32 |     image /= 255.0
33 |     if mean is not None:
34 |         image -= mean
35 |     if std is not None:
36 |         image /= std
37 |     image = image.transpose(swap)
38 |     image = np.ascontiguousarray(image, dtype=np.float32)
39 |     return image, r
40 | 
41 | 
42 | def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45):
43 |     box_corner = F.zeros_like(prediction)
44 |     box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
45 |     box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
46 |     box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
47 |     box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
48 |     prediction[:, :, :4] = box_corner[:, :, :4]
49 | 
50 |     output = [None for _ in range(len(prediction))]
51 |     for i, image_pred in enumerate(prediction):
52 | 
53 |         # If none are remaining => process next image
54 |         if not image_pred.shape[0]:
55 |             continue
56 |         # Get score and class with highest confidence
57 |         class_conf = F.max(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True)
58 |         class_pred = F.argmax(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True)
59 | 
60 |         class_conf_squeeze = F.squeeze(class_conf)
61 |         conf_mask = image_pred[:, 4] * class_conf_squeeze >= conf_thre
62 |         detections = F.concat((image_pred[:, :5], class_conf, class_pred), 1)
63 |         detections = detections[conf_mask]
64 |         if not detections.shape[0]:
65 |             continue
66 | 
67 |         nms_out_index = F.vision.nms(
68 |             detections[:, :4], detections[:, 4] * detections[:, 5], nms_thre,
69 |         )
70 |         detections = detections[nms_out_index]
71 |         if output[i] is None:
72 |             output[i] = detections
73 |         else:
74 |             output[i] = F.concat((output[i], detections))
75 | 
76 |     return output
77 | 


--------------------------------------------------------------------------------
/demo/MegEngine/python/visualize.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | 
  8 | __all__ = ["vis"]
  9 | 
 10 | 
 11 | def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
 12 | 
 13 |     for i in range(len(boxes)):
 14 |         box = boxes[i]
 15 |         cls_id = int(cls_ids[i])
 16 |         score = scores[i]
 17 |         if score < conf:
 18 |             continue
 19 |         x0 = int(box[0])
 20 |         y0 = int(box[1])
 21 |         x1 = int(box[2])
 22 |         y1 = int(box[3])
 23 | 
 24 |         color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist()
 25 |         text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100)
 26 |         txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255)
 27 |         font = cv2.FONT_HERSHEY_SIMPLEX
 28 | 
 29 |         txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
 30 |         cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
 31 | 
 32 |         txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist()
 33 |         cv2.rectangle(
 34 |             img,
 35 |             (x0, y0 + 1),
 36 |             (x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])),
 37 |             txt_bk_color,
 38 |             -1
 39 |         )
 40 |         cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1)
 41 | 
 42 |     return img
 43 | 
 44 | 
 45 | _COLORS = np.array(
 46 |     [
 47 |         0.000, 0.447, 0.741,
 48 |         0.850, 0.325, 0.098,
 49 |         0.929, 0.694, 0.125,
 50 |         0.494, 0.184, 0.556,
 51 |         0.466, 0.674, 0.188,
 52 |         0.301, 0.745, 0.933,
 53 |         0.635, 0.078, 0.184,
 54 |         0.300, 0.300, 0.300,
 55 |         0.600, 0.600, 0.600,
 56 |         1.000, 0.000, 0.000,
 57 |         1.000, 0.500, 0.000,
 58 |         0.749, 0.749, 0.000,
 59 |         0.000, 1.000, 0.000,
 60 |         0.000, 0.000, 1.000,
 61 |         0.667, 0.000, 1.000,
 62 |         0.333, 0.333, 0.000,
 63 |         0.333, 0.667, 0.000,
 64 |         0.333, 1.000, 0.000,
 65 |         0.667, 0.333, 0.000,
 66 |         0.667, 0.667, 0.000,
 67 |         0.667, 1.000, 0.000,
 68 |         1.000, 0.333, 0.000,
 69 |         1.000, 0.667, 0.000,
 70 |         1.000, 1.000, 0.000,
 71 |         0.000, 0.333, 0.500,
 72 |         0.000, 0.667, 0.500,
 73 |         0.000, 1.000, 0.500,
 74 |         0.333, 0.000, 0.500,
 75 |         0.333, 0.333, 0.500,
 76 |         0.333, 0.667, 0.500,
 77 |         0.333, 1.000, 0.500,
 78 |         0.667, 0.000, 0.500,
 79 |         0.667, 0.333, 0.500,
 80 |         0.667, 0.667, 0.500,
 81 |         0.667, 1.000, 0.500,
 82 |         1.000, 0.000, 0.500,
 83 |         1.000, 0.333, 0.500,
 84 |         1.000, 0.667, 0.500,
 85 |         1.000, 1.000, 0.500,
 86 |         0.000, 0.333, 1.000,
 87 |         0.000, 0.667, 1.000,
 88 |         0.000, 1.000, 1.000,
 89 |         0.333, 0.000, 1.000,
 90 |         0.333, 0.333, 1.000,
 91 |         0.333, 0.667, 1.000,
 92 |         0.333, 1.000, 1.000,
 93 |         0.667, 0.000, 1.000,
 94 |         0.667, 0.333, 1.000,
 95 |         0.667, 0.667, 1.000,
 96 |         0.667, 1.000, 1.000,
 97 |         1.000, 0.000, 1.000,
 98 |         1.000, 0.333, 1.000,
 99 |         1.000, 0.667, 1.000,
100 |         0.333, 0.000, 0.000,
101 |         0.500, 0.000, 0.000,
102 |         0.667, 0.000, 0.000,
103 |         0.833, 0.000, 0.000,
104 |         1.000, 0.000, 0.000,
105 |         0.000, 0.167, 0.000,
106 |         0.000, 0.333, 0.000,
107 |         0.000, 0.500, 0.000,
108 |         0.000, 0.667, 0.000,
109 |         0.000, 0.833, 0.000,
110 |         0.000, 1.000, 0.000,
111 |         0.000, 0.000, 0.167,
112 |         0.000, 0.000, 0.333,
113 |         0.000, 0.000, 0.500,
114 |         0.000, 0.000, 0.667,
115 |         0.000, 0.000, 0.833,
116 |         0.000, 0.000, 1.000,
117 |         0.000, 0.000, 0.000,
118 |         0.143, 0.143, 0.143,
119 |         0.286, 0.286, 0.286,
120 |         0.429, 0.429, 0.429,
121 |         0.571, 0.571, 0.571,
122 |         0.714, 0.714, 0.714,
123 |         0.857, 0.857, 0.857,
124 |         0.000, 0.447, 0.741,
125 |         0.314, 0.717, 0.741,
126 |         0.50, 0.5, 0
127 |     ]
128 | ).astype(np.float32).reshape(-1, 3)
129 | 


--------------------------------------------------------------------------------
/demo/ONNXRuntime/README.md:
--------------------------------------------------------------------------------
 1 | ## YOLOX-ONNXRuntime in Python
 2 | 
 3 | This doc introduces how to convert your pytorch model into onnx, and how to run an onnxruntime demo to verify your convertion.
 4 | 
 5 | ### Download ONNX models.
 6 | | Model | Parameters | GFLOPs | Test Size | mAP | Weights |
 7 | |:------| :----: | :----: | :---: | :---: | :---: |
 8 | |  YOLOX-Nano |  0.91M  | 1.08 | 416x416 | 25.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EfAGwvevU-lNhW5OqFAyHbwBJdI_7EaKu5yU04fgF5BU7w?e=gvq4hf)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_nano.onnx) |
 9 | |  YOLOX-Tiny | 5.06M     | 6.45 | 416x416 |32.8 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ET64VPoEV8FAm5YBiEj5JXwBVn_KYHM38iJQ_lpcK2slYw?e=uuJ7Ii)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_tiny_32dot8.onnx) |
10 | |  YOLOX-S | 9.0M | 26.8 | 640x640 |39.6 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/Ec0L1d1x2UtIpbfiahgxhtgBZVjb1NCXbotO8SCOdMqpQQ?e=siyIsK)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s.onnx) |
11 | |  YOLOX-M | 25.3M | 73.8 | 640x640 |46.4 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ERUKlQe-nlxBoTKPy1ynbxsBmAZ_h-VBEV-nnfPdzUIkZQ?e=hyQQtl)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_m.onnx) |
12 | |  YOLOX-L | 54.2M | 155.6 | 640x640 |50.0 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ET5w926jCA5GlVfg9ixB4KEBiW0HYl7SzaHNRaRG9dYO_A?e=ISmCYX)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_l.onnx) |
13 | |  YOLOX-Darknet53| 63.72M | 185.3 | 640x640 |47.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ESArloSW-MlPlLuemLh9zKkBdovgweKbfu4zkvzKAp7pPQ?e=f81Ikw)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_darknet53.onnx) |
14 | |  YOLOX-X | 99.1M | 281.9 | 640x640 |51.2 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ERjqoeMJlFdGuM3tQfXQmhABmGHlIHydWCwhlugeWLE9AA)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox.onnx) |
15 | 
16 | 
17 | ### Convert Your Model to ONNX
18 | 
19 | First, you should move to <YOLOX_HOME> by:
20 | ```shell
21 | cd <YOLOX_HOME>
22 | ```
23 | Then, you can:
24 | 
25 | 1. Convert a standard YOLOX model by -n:
26 | ```shell
27 | python3 tools/export_onnx.py --output-name yolox_s.onnx -n yolox-s -c yolox_s.pth
28 | ```
29 | Notes:
30 | * -n: specify a model name. The model name must be one of the [yolox-s,m,l,x and yolox-nane, yolox-tiny, yolov3]
31 | * -c: the model you have trained
32 | * -o: opset version, default 11. **However, if you will further convert your onnx model to [OpenVINO](../OpenVINO/), please specify the opset version to 10.**
33 | * --no-onnxsim: disable onnxsim
34 | * To customize an input shape for onnx model,  modify the following code in tools/export.py:
35 | 
36 |     ```python
37 |     dummy_input = torch.randn(1, 3, exp.test_size[0], exp.test_size[1])
38 |     ```
39 | 
40 | 2. Convert a standard YOLOX model by -f. When using -f, the above command is equivalent to:
41 | 
42 | ```shell
43 | python3 tools/export_onnx.py --output-name yolox_s.onnx -f exps/default/yolox_s.py -c yolox_s.pth
44 | ```
45 | 
46 | 3. To convert your customized model, please use -f:
47 | 
48 | ```shell
49 | python3 tools/export_onnx.py --output-name your_yolox.onnx -f exps/your_dir/your_yolox.py -c your_yolox.pth
50 | ```
51 | 
52 | ### ONNXRuntime Demo
53 | 
54 | Step1.
55 | ```shell
56 | cd <YOLOX_HOME>/demo/ONNXRuntime
57 | ```
58 | 
59 | Step2. 
60 | ```shell
61 | python3 onnx_inference.py -m <ONNX_MODEL_PATH> -i <IMAGE_PATH> -o <OUTPUT_DIR> -s 0.3 --input_shape 640,640
62 | ```
63 | Notes:
64 | * -m: your converted onnx model
65 | * -i: input_image
66 | * -s: score threshold for visualization.
67 | * --input_shape: should be consistent with the shape you used for onnx convertion.
68 | 


--------------------------------------------------------------------------------
/demo/ONNXRuntime/onnx_inference.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import argparse
 6 | import os
 7 | 
 8 | import cv2
 9 | import numpy as np
10 | 
11 | import onnxruntime
12 | 
13 | from yolox.data.data_augment import preproc as preprocess
14 | from yolox.data.datasets import COCO_CLASSES
15 | from yolox.utils import mkdir, multiclass_nms, demo_postprocess, vis
16 | 
17 | 
18 | def make_parser():
19 |     parser = argparse.ArgumentParser("onnxruntime inference sample")
20 |     parser.add_argument(
21 |         "-m",
22 |         "--model",
23 |         type=str,
24 |         default="yolox.onnx",
25 |         help="Input your onnx model.",
26 |     )
27 |     parser.add_argument(
28 |         "-i",
29 |         "--image_path",
30 |         type=str,
31 |         default='test_image.png',
32 |         help="Path to your input image.",
33 |     )
34 |     parser.add_argument(
35 |         "-o",
36 |         "--output_dir",
37 |         type=str,
38 |         default='demo_output',
39 |         help="Path to your output directory.",
40 |     )
41 |     parser.add_argument(
42 |         "-s",
43 |         "--score_thr",
44 |         type=float,
45 |         default=0.3,
46 |         help="Score threshould to filter the result.",
47 |     )
48 |     parser.add_argument(
49 |         "--input_shape",
50 |         type=str,
51 |         default="640,640",
52 |         help="Specify an input shape for inference.",
53 |     )
54 |     parser.add_argument(
55 |         "--with_p6",
56 |         action="store_true",
57 |         help="Whether your model uses p6 in FPN/PAN.",
58 |     )
59 |     return parser
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     args = make_parser().parse_args()
64 | 
65 |     input_shape = tuple(map(int, args.input_shape.split(',')))
66 |     origin_img = cv2.imread(args.image_path)
67 |     mean = (0.485, 0.456, 0.406)
68 |     std = (0.229, 0.224, 0.225)
69 |     img, ratio = preprocess(origin_img, input_shape, mean, std)
70 | 
71 |     session = onnxruntime.InferenceSession(args.model)
72 | 
73 |     ort_inputs = {session.get_inputs()[0].name: img[None, :, :, :]}
74 |     output = session.run(None, ort_inputs)
75 |     predictions = demo_postprocess(output[0], input_shape, p6=args.with_p6)[0]
76 | 
77 |     boxes = predictions[:, :4]
78 |     scores = predictions[:, 4:5] * predictions[:, 5:]
79 | 
80 |     boxes_xyxy = np.ones_like(boxes)
81 |     boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2.
82 |     boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3]/2.
83 |     boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]/2.
84 |     boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]/2.
85 |     boxes_xyxy /= ratio
86 |     dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1)
87 |     if dets is not None:
88 |         final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5]
89 |         origin_img = vis(origin_img, final_boxes, final_scores, final_cls_inds,
90 |                          conf=args.score_thr, class_names=COCO_CLASSES)
91 | 
92 |     mkdir(args.output_dir)
93 |     output_path = os.path.join(args.output_dir, args.image_path.split("/")[-1])
94 |     cv2.imwrite(output_path, origin_img)
95 | 


--------------------------------------------------------------------------------
/demo/OpenVINO/README.md:
--------------------------------------------------------------------------------
1 | ## YOLOX for OpenVINO
2 | 
3 | * [C++ Demo](./cpp)
4 | * [Python Demo](./python)


--------------------------------------------------------------------------------
/demo/OpenVINO/cpp/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.4.1)
 2 | set(CMAKE_CXX_STANDARD 14)
 3 | 
 4 | project(yolox_openvino_demo)
 5 | 
 6 | find_package(OpenCV REQUIRED)
 7 | find_package(InferenceEngine REQUIRED)
 8 | find_package(ngraph REQUIRED)
 9 | 
10 | include_directories(
11 |     ${OpenCV_INCLUDE_DIRS}
12 |     ${CMAKE_CURRENT_SOURCE_DIR}
13 |     ${CMAKE_CURRENT_BINARY_DIR}
14 | )
15 | 
16 | add_executable(yolox_openvino yolox_openvino.cpp)
17 | 
18 | target_link_libraries(
19 |      yolox_openvino
20 |     ${InferenceEngine_LIBRARIES}
21 |     ${NGRAPH_LIBRARIES}
22 |     ${OpenCV_LIBS} 
23 | )


--------------------------------------------------------------------------------
/demo/OpenVINO/cpp/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOX-OpenVINO in C++
 2 | 
 3 | This toturial includes a C++ demo for OpenVINO, as well as some converted models.
 4 | 
 5 | ### Download OpenVINO models.
 6 | | Model | Parameters | GFLOPs | Test Size | mAP | Weights |
 7 | |:------| :----: | :----: | :---: | :---: | :---: |
 8 | |  [YOLOX-Nano](../../../exps/nano.py) |  0.91M  | 1.08 | 416x416 | 25.3 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EeWY57o5wQZFtXYd1KJw6Z8B4vxZru649XxQHYIFgio3Qw?e=ZS81ce)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_nano_openvino.tar.gz) |
 9 | |  [YOLOX-Tiny](../../../exps/yolox_tiny.py) | 5.06M     | 6.45 | 416x416 |31.7 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ETfvOoCXdVZNinoSpKA_sEYBIQVqfjjF5_M6VvHRnLVcsA?e=STL1pi)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_tiny_openvino.tar.gz) |
10 | |  [YOLOX-S](../../../exps/yolox_s.py) | 9.0M | 26.8 | 640x640 |39.6 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EXUjf3PQnbBLrxNrXPueqaIBzVZOrYQOnJpLK1Fytj5ssA?e=GK0LOM)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s_openvino.tar.gz) |
11 | |  [YOLOX-M](../../../exps/yolox_m.py) | 25.3M | 73.8 | 640x640 |46.4 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EcoT1BPpeRpLvE_4c441zn8BVNCQ2naxDH3rho7WqdlgLQ?e=95VaM9)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_m_openvino.tar.gz) |
12 | |  [YOLOX-L](../../../exps/yolox_l.py) | 54.2M | 155.6 | 640x640 |50.0 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZvmn-YLRuVPh0GAP_w3xHMB2VGvrKqQXyK_Cv5yi_DXUg?e=YRh6Eq)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_l_openvino.tar.gz) |
13 | |  [YOLOX-Darknet53](../../../exps/yolov3.py) | 63.72M | 185.3 | 640x640 |47.3 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EQP8LSroikFHuwX0jFRetmcBOCDWSFmylHxolV7ezUPXGw?e=bEw5iq)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_darknet53_openvino.tar.gz) |
14 | |  [YOLOX-X](../../../exps/yolox_x.py) | 99.1M | 281.9 | 640x640 |51.2 | [Download](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZFPnLqiD-xIlt7rcZYDjQgB4YXE9wnq1qaSXQwJrsKbdg?e=83nwEz)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_x_openvino.tar.gz) |
15 | 
16 | ## Install OpenVINO Toolkit
17 | 
18 | Please visit [Openvino Homepage](https://docs.openvinotoolkit.org/latest/get_started_guides.html) for more details.
19 | 
20 | ## Set up the Environment
21 | 
22 | ### For Linux
23 | 
24 | **Option1. Set up the environment tempororally. You need to run this command everytime you start a new shell window.**
25 | 
26 | ```shell
27 | source /opt/intel/openvino_2021/bin/setupvars.sh
28 | ```
29 | 
30 | **Option2. Set up the environment permenantly.**
31 | 
32 | *Step1.* For Linux:
33 | ```shell
34 | vim ~/.bashrc 
35 | ```
36 | 
37 | *Step2.* Add the following line into your file:
38 | 
39 | ```shell
40 | source /opt/intel/openvino_2021/bin/setupvars.sh
41 | ```
42 | 
43 | *Step3.* Save and exit the file, then run:
44 | 
45 | ```shell
46 | source ~/.bashrc
47 | ```
48 | 
49 | 
50 | ## Convert model
51 | 
52 | 1. Export ONNX model
53 |    
54 |    Please refer to the [ONNX toturial](../../ONNXRuntime). **Note that you should set --opset to 10, otherwise your next step will fail.**
55 | 
56 | 2. Convert ONNX to OpenVINO 
57 | 
58 |    ``` shell
59 |    cd <INSTSLL_DIR>/openvino_2021/deployment_tools/model_optimizer
60 |    ```
61 | 
62 |    Install requirements for convert tool
63 | 
64 |    ```shell
65 |    sudo ./install_prerequisites/install_prerequisites_onnx.sh
66 |    ```
67 | 
68 |    Then convert model.
69 |    ```shell
70 |    python3 mo.py --input_model <ONNX_MODEL> --input_shape <INPUT_SHAPE> [--data_type FP16]
71 |    ```
72 |    For example:
73 |    ```shell
74 |    python3 mo.py --input_model yolox.onnx --input_shape (1,3,640,640) --data_type FP16
75 |    ```  
76 | 
77 | ## Build 
78 | 
79 | ### Linux
80 | ```shell
81 | source /opt/intel/openvino_2021/bin/setupvars.sh
82 | mkdir build
83 | cd build
84 | cmake ..
85 | make
86 | ```
87 | 
88 | ## Demo
89 | 
90 | ### c++
91 | 
92 | ```shell
93 | ./yolox_openvino <XML_MODEL_PATH> <IMAGE_PATH> <DEVICE>
94 | ```
95 | 


--------------------------------------------------------------------------------
/demo/OpenVINO/python/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOX-OpenVINO in Python
 2 | 
 3 | This toturial includes a Python demo for OpenVINO, as well as some converted models.
 4 | 
 5 | ### Download OpenVINO models.
 6 | | Model | Parameters | GFLOPs | Test Size | mAP | Weights |
 7 | |:------| :----: | :----: | :---: | :---: | :---: |
 8 | |  [YOLOX-Nano](../../../exps/default/nano.py) |  0.91M  | 1.08 | 416x416 | 25.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EeWY57o5wQZFtXYd1KJw6Z8B4vxZru649XxQHYIFgio3Qw?e=ZS81ce)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_nano_openvino.tar.gz) |
 9 | |  [YOLOX-Tiny](../../../exps/default/yolox_tiny.py) | 5.06M     | 6.45 | 416x416 |31.7 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ETfvOoCXdVZNinoSpKA_sEYBIQVqfjjF5_M6VvHRnLVcsA?e=STL1pi)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_tiny_openvino.tar.gz) |
10 | |  [YOLOX-S](../../../exps/default/yolox_s.py) | 9.0M | 26.8 | 640x640 |39.6 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EXUjf3PQnbBLrxNrXPueqaIBzVZOrYQOnJpLK1Fytj5ssA?e=GK0LOM)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s_openvino.tar.gz) |
11 | |  [YOLOX-M](../../../exps/default/yolox_m.py) | 25.3M | 73.8 | 640x640 |46.4 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EcoT1BPpeRpLvE_4c441zn8BVNCQ2naxDH3rho7WqdlgLQ?e=95VaM9)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_m_openvino.tar.gz) |
12 | |  [YOLOX-L](../../../exps/default/yolox_l.py) | 54.2M | 155.6 | 640x640 |50.0 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZvmn-YLRuVPh0GAP_w3xHMB2VGvrKqQXyK_Cv5yi_DXUg?e=YRh6Eq)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_l_openvino.tar.gz) |
13 | |  [YOLOX-Darknet53](../../../exps/default/yolov3.py) | 63.72M | 185.3 | 640x640 |47.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EQP8LSroikFHuwX0jFRetmcBOCDWSFmylHxolV7ezUPXGw?e=bEw5iq)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_darknet53_openvino.tar.gz) | 
14 | |  [YOLOX-X](../../../exps/default/yolox_x.py) | 99.1M | 281.9 | 640x640 |51.2 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZFPnLqiD-xIlt7rcZYDjQgB4YXE9wnq1qaSXQwJrsKbdg?e=83nwEz)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_x_openvino.tar.gz) |
15 | 
16 | ## Install OpenVINO Toolkit
17 | 
18 | Please visit [Openvino Homepage](https://docs.openvinotoolkit.org/latest/get_started_guides.html) for more details.
19 | 
20 | ## Set up the Environment
21 | 
22 | ### For Linux
23 | 
24 | **Option1. Set up the environment tempororally. You need to run this command everytime you start a new shell window.**
25 | 
26 | ```shell
27 | source /opt/intel/openvino_2021/bin/setupvars.sh
28 | ```
29 | 
30 | **Option2. Set up the environment permenantly.**
31 | 
32 | *Step1.* For Linux:
33 | ```shell
34 | vim ~/.bashrc
35 | ```
36 | 
37 | *Step2.* Add the following line into your file:
38 | 
39 | ```shell
40 | source /opt/intel/openvino_2021/bin/setupvars.sh
41 | ```
42 | 
43 | *Step3.* Save and exit the file, then run:
44 | 
45 | ```shell
46 | source ~/.bashrc
47 | ```
48 | 
49 | 
50 | ## Convert model
51 | 
52 | 1. Export ONNX model
53 | 
54 |    Please refer to the [ONNX toturial](../../ONNXRuntime). **Note that you should set --opset to 10, otherwise your next step will fail.**
55 | 
56 | 2. Convert ONNX to OpenVINO
57 | 
58 |    ``` shell
59 |    cd <INSTSLL_DIR>/openvino_2021/deployment_tools/model_optimizer
60 |    ```
61 | 
62 |    Install requirements for convert tool
63 | 
64 |    ```shell
65 |    sudo ./install_prerequisites/install_prerequisites_onnx.sh
66 |    ```
67 | 
68 |    Then convert model.
69 |    ```shell
70 |    python3 mo.py --input_model <ONNX_MODEL> --input_shape <INPUT_SHAPE> [--data_type FP16]
71 |    ```
72 |    For example:
73 |    ```shell
74 |    python3 mo.py --input_model yolox.onnx --input_shape [1,3,640,640] --data_type FP16 --output_dir converted_output
75 |    ```
76 | 
77 | ## Demo
78 | 
79 | ### python
80 | 
81 | ```shell
82 | python openvino_inference.py -m <XML_MODEL_PATH> -i <IMAGE_PATH> 
83 | ```
84 | or
85 | ```shell
86 | python openvino_inference.py -m <XML_MODEL_PATH> -i <IMAGE_PATH> -o <OUTPUT_DIR> -s <SCORE_THR> -d <DEVICE>
87 | ```
88 | 
89 | 


--------------------------------------------------------------------------------
/demo/TensorRT/cpp/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | 
 3 | project(yolox)
 4 | 
 5 | add_definitions(-std=c++11)
 6 | 
 7 | option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
 8 | set(CMAKE_CXX_STANDARD 11)
 9 | set(CMAKE_BUILD_TYPE Debug)
10 | 
11 | find_package(CUDA REQUIRED)
12 | 
13 | include_directories(${PROJECT_SOURCE_DIR}/include)
14 | # include and link dirs of cuda and tensorrt, you need adapt them if yours are different
15 | # cuda
16 | include_directories(/data/cuda/cuda-10.2/cuda/include)
17 | link_directories(/data/cuda/cuda-10.2/cuda/lib64)
18 | # cudnn
19 | include_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/include)
20 | link_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/lib64)
21 | # tensorrt
22 | include_directories(/data/cuda/cuda-10.2/TensorRT/v7.2.1.6/include)
23 | link_directories(/data/cuda/cuda-10.2/TensorRT/v7.2.1.6/lib)
24 | 
25 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
26 | 
27 | find_package(OpenCV)
28 | include_directories(${OpenCV_INCLUDE_DIRS})
29 | 
30 | add_executable(yolox ${PROJECT_SOURCE_DIR}/yolox.cpp)
31 | target_link_libraries(yolox nvinfer)
32 | target_link_libraries(yolox cudart)
33 | target_link_libraries(yolox ${OpenCV_LIBS})
34 | 
35 | add_definitions(-O2 -pthread)
36 | 
37 | 


--------------------------------------------------------------------------------
/demo/TensorRT/cpp/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOX-TensorRT in C++
 2 | 
 3 | As YOLOX models are easy to convert to tensorrt using [torch2trt gitrepo](https://github.com/NVIDIA-AI-IOT/torch2trt), 
 4 | our C++ demo does not include the model converting or constructing like other tenorrt demos.
 5 | 
 6 | 
 7 | ## Step 1: Prepare serialized engine file
 8 | 
 9 | Follow the trt [python demo README](../python/README.md) to convert and save the serialized engine file.
10 | 
11 | Check the 'model_trt.engine' file generated from Step 1, which will be automatically saved at the current demo dir.
12 | 
13 | 
14 | ## Step 2: build the demo
15 | 
16 | Please follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) to install TensorRT.
17 | 
18 | Install opencv with ```sudo apt-get install libopencv-dev``` (we don't need a higher version of opencv like v3.3+). 
19 | 
20 | build the demo:
21 | 
22 | ```shell
23 | mkdir build
24 | cd build
25 | cmake ..
26 | make
27 | ```
28 | 
29 | Then run the demo:
30 | 
31 | ```shell
32 | ./yolox ../model_trt.engine -i ../../../../assets/dog.jpg
33 | ```
34 | 
35 | or
36 | 
37 | ```shell
38 | ./yolox <path/to/your/engine_file> -i <path/to/image>
39 | ```
40 | 


--------------------------------------------------------------------------------
/demo/TensorRT/python/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOX-TensorRT in Python
 2 | 
 3 | This toturial includes a Python demo for TensorRT.
 4 | 
 5 | ## Install TensorRT Toolkit
 6 | 
 7 | Please follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) and [torch2trt gitrepo](https://github.com/NVIDIA-AI-IOT/torch2trt) to install TensorRT and torch2trt.
 8 | 
 9 | ## Convert model
10 | 
11 | YOLOX models can be easily conveted to TensorRT models using torch2trt
12 | 
13 |    If you want to convert our model, use the flag -n to specify a model name:
14 |    ```shell
15 |    python tools/trt.py -n <YOLOX_MODEL_NAME> -c <YOLOX_CHECKPOINT>
16 |    ```
17 |    For example:
18 |    ```shell
19 |    python tools/trt.py -n yolox-s -c your_ckpt.pth
20 |    ```
21 |    <YOLOX_MODEL_NAME> can be: yolox-nano, yolox-tiny. yolox-s, yolox-m, yolox-l, yolox-x.
22 | 
23 |    If you want to convert your customized model, use the flag -f to specify you exp file:
24 |    ```shell
25 |    python tools/trt.py -f <YOLOX_EXP_FILE> -c <YOLOX_CHECKPOINT>
26 |    ```
27 |    For example:
28 |    ```shell
29 |    python tools/trt.py -f /path/to/your/yolox/exps/yolox_s.py -c your_ckpt.pth
30 |    ```
31 |    *yolox_s.py* can be any exp file modified by you.
32 | 
33 | The converted model and the serialized engine file (for C++ demo) will be saved on your experiment output dir.  
34 | 
35 | ## Demo
36 | 
37 | The TensorRT python demo is merged on our pytorch demo file, so you can run the pytorch demo command with ```--trt```.
38 | 
39 | ```shell
40 | python tools/demo.py image -n yolox-s --trt --save_result
41 | ```
42 | or
43 | ```shell
44 | python tools/demo.py image -f exps/default/yolox_s.py --trt --save_result
45 | ```
46 | 
47 | 


--------------------------------------------------------------------------------
/demo/ncnn/android/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOX-Android-ncnn
 2 | 
 3 | Andoird app of YOLOX object detection base on [ncnn](https://github.com/Tencent/ncnn)
 4 | 
 5 | 
 6 | ## Tutorial
 7 | 
 8 | ### Step1
 9 | 
10 | Download ncnn-android-vulkan.zip from [releases of ncnn](https://github.com/Tencent/ncnn/releases). This repo uses
11 | [20210525 release](https://github.com/Tencent/ncnn/releases/download/20210525/ncnn-20210525-android-vulkan.zip) for building.
12 | 
13 | ### Step2
14 | 
15 | After downloading, please extract your zip file. Then, there are two ways to finish this step:
16 | * put your extracted directory into **app/src/main/jni**
17 | * change the **ncnn_DIR** path in **app/src/main/jni/CMakeLists.txt** to your extracted directory
18 | 
19 | ### Step3
20 | Download example param and bin file from [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ESXBH_GSSmFMszWJ6YG2VkQB5cWDfqVWXgk0D996jH0rpQ?e=qzEqUh) or [github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s_ncnn.tar.gz). Unzip the file to **app/src/main/assets**.
21 | 
22 | ### Step4
23 | Open this project with Android Studio, build it and enjoy!
24 | 
25 | ## Reference
26 | 
27 | * [ncnn-android-yolov5](https://github.com/nihui/ncnn-android-yolov5)
28 | 


--------------------------------------------------------------------------------
/demo/ncnn/android/app/build.gradle:
--------------------------------------------------------------------------------
 1 | apply plugin: 'com.android.application'
 2 | 
 3 | android {
 4 |     compileSdkVersion 24
 5 |     buildToolsVersion "29.0.2"
 6 | 
 7 |     defaultConfig {
 8 |         applicationId "com.megvii.yoloXncnn"
 9 |         archivesBaseName = "$applicationId"
10 | 
11 |         ndk {
12 |             moduleName "ncnn"
13 |             abiFilters "armeabi-v7a", "arm64-v8a"
14 |         }
15 |         minSdkVersion 24
16 |     }
17 | 
18 |     externalNativeBuild {
19 |         cmake {
20 |             version "3.10.2"
21 |             path file('src/main/jni/CMakeLists.txt')
22 |         }
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/demo/ncnn/android/app/src/main/AndroidManifest.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <manifest xmlns:android="http://schemas.android.com/apk/res/android"
 3 |       package="com.megvii.yoloXncnn"
 4 |       android:versionCode="1"
 5 |       android:versionName="1.1">
 6 |     <application android:label="@string/app_name" >
 7 |         <activity android:name="MainActivity"
 8 |                   android:label="@string/app_name">
 9 |             <intent-filter>
10 |                 <action android:name="android.intent.action.MAIN" />
11 |                 <category android:name="android.intent.category.LAUNCHER" />
12 |             </intent-filter>
13 |         </activity>
14 |     </application>
15 | </manifest> 
16 | 


--------------------------------------------------------------------------------
/demo/ncnn/android/app/src/main/java/com/megvii/yoloXncnn/yoloXncnn.java:
--------------------------------------------------------------------------------
 1 | // Copyright (C) Megvii, Inc. and its affiliates. All rights reserved.
 2 | 
 3 | package com.megvii.yoloXncnn;
 4 | 
 5 | import android.content.res.AssetManager;
 6 | import android.graphics.Bitmap;
 7 | 
 8 | public class YOLOXncnn
 9 | {
10 |     public native boolean Init(AssetManager mgr);
11 | 
12 |     public class Obj
13 |     {
14 |         public float x;
15 |         public float y;
16 |         public float w;
17 |         public float h;
18 |         public String label;
19 |         public float prob;
20 |     }
21 | 
22 |     public native Obj[] Detect(Bitmap bitmap, boolean use_gpu);
23 | 
24 |     static {
25 |         System.loadLibrary("yoloXncnn");
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/demo/ncnn/android/app/src/main/jni/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(yoloXncnn)
 2 | 
 3 | cmake_minimum_required(VERSION 3.4.1)
 4 | 
 5 | set(ncnn_DIR ${CMAKE_SOURCE_DIR}/ncnn-20210525-android-vulkan/${ANDROID_ABI}/lib/cmake/ncnn)
 6 | find_package(ncnn REQUIRED)
 7 | 
 8 | add_library(yoloXncnn SHARED yoloXncnn_jni.cpp)
 9 | 
10 | target_link_libraries(yoloXncnn
11 |     ncnn
12 | 
13 |     jnigraphics
14 | )
15 | 


--------------------------------------------------------------------------------
/demo/ncnn/android/app/src/main/res/layout/main.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
 3 |     android:orientation="vertical"
 4 |     android:layout_width="fill_parent"
 5 |     android:layout_height="fill_parent">
 6 | 
 7 |     <LinearLayout
 8 |         android:orientation="horizontal"
 9 |         android:layout_width="fill_parent"
10 |         android:layout_height="wrap_content">
11 | 
12 |     <Button
13 |         android:id="@+id/buttonImage"
14 |         android:layout_width="wrap_content"
15 |         android:layout_height="wrap_content"
16 |         android:text="image" />
17 |     <Button
18 |         android:id="@+id/buttonDetect"
19 |         android:layout_width="wrap_content"
20 |         android:layout_height="wrap_content"
21 |         android:text="infer-cpu" />
22 |     <Button
23 |         android:id="@+id/buttonDetectGPU"
24 |         android:layout_width="wrap_content"
25 |         android:layout_height="wrap_content"
26 |         android:text="infer-gpu" />
27 |     </LinearLayout>
28 | 
29 |     <ImageView
30 |         android:id="@+id/imageView"
31 |         android:layout_width="fill_parent"
32 |         android:layout_height="fill_parent"
33 |         android:layout_weight="1" />
34 | 
35 | </LinearLayout>
36 | 


--------------------------------------------------------------------------------
/demo/ncnn/android/app/src/main/res/values/strings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <resources>
3 |     <string name="app_name">yoloXncnn</string>
4 | </resources>
5 | 


--------------------------------------------------------------------------------
/demo/ncnn/android/build.gradle:
--------------------------------------------------------------------------------
 1 | // Top-level build file where you can add configuration options common to all sub-projects/modules.
 2 | buildscript {
 3 |     repositories {
 4 |         jcenter()
 5 |         google()
 6 |     }
 7 |     dependencies {
 8 |         classpath 'com.android.tools.build:gradle:3.5.0'
 9 |     }
10 | }
11 | 
12 | allprojects {
13 |     repositories {
14 |         jcenter()
15 |         google()
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/demo/ncnn/android/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sharpiless/yolox-deepsort/4bb35dd50b2112026bdda34078e65de22afb5a8b/demo/ncnn/android/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/demo/ncnn/android/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Sun Aug 25 10:34:48 CST 2019
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-5.4.1-all.zip
7 | 


--------------------------------------------------------------------------------
/demo/ncnn/android/gradlew.bat:
--------------------------------------------------------------------------------
 1 | @if "%DEBUG%" == "" @echo off
 2 | @rem ##########################################################################
 3 | @rem
 4 | @rem  Gradle startup script for Windows
 5 | @rem
 6 | @rem ##########################################################################
 7 | 
 8 | @rem Set local scope for the variables with windows NT shell
 9 | if "%OS%"=="Windows_NT" setlocal
10 | 
11 | set DIRNAME=%~dp0
12 | if "%DIRNAME%" == "" set DIRNAME=.
13 | set APP_BASE_NAME=%~n0
14 | set APP_HOME=%DIRNAME%
15 | 
16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17 | set DEFAULT_JVM_OPTS=
18 | 
19 | @rem Find java.exe
20 | if defined JAVA_HOME goto findJavaFromJavaHome
21 | 
22 | set JAVA_EXE=java.exe
23 | %JAVA_EXE% -version >NUL 2>&1
24 | if "%ERRORLEVEL%" == "0" goto init
25 | 
26 | echo.
27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28 | echo.
29 | echo Please set the JAVA_HOME variable in your environment to match the
30 | echo location of your Java installation.
31 | 
32 | goto fail
33 | 
34 | :findJavaFromJavaHome
35 | set JAVA_HOME=%JAVA_HOME:"=%
36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37 | 
38 | if exist "%JAVA_EXE%" goto init
39 | 
40 | echo.
41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42 | echo.
43 | echo Please set the JAVA_HOME variable in your environment to match the
44 | echo location of your Java installation.
45 | 
46 | goto fail
47 | 
48 | :init
49 | @rem Get command-line arguments, handling Windows variants
50 | 
51 | if not "%OS%" == "Windows_NT" goto win9xME_args
52 | 
53 | :win9xME_args
54 | @rem Slurp the command line arguments.
55 | set CMD_LINE_ARGS=
56 | set _SKIP=2
57 | 
58 | :win9xME_args_slurp
59 | if "x%~1" == "x" goto execute
60 | 
61 | set CMD_LINE_ARGS=%*
62 | 
63 | :execute
64 | @rem Setup the command line
65 | 
66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67 | 
68 | @rem Execute Gradle
69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70 | 
71 | :end
72 | @rem End local scope for the variables with windows NT shell
73 | if "%ERRORLEVEL%"=="0" goto mainEnd
74 | 
75 | :fail
76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77 | rem the _cmd.exe /c_ return code!
78 | if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79 | exit /b 1
80 | 
81 | :mainEnd
82 | if "%OS%"=="Windows_NT" endlocal
83 | 
84 | :omega
85 | 


--------------------------------------------------------------------------------
/demo/ncnn/android/settings.gradle:
--------------------------------------------------------------------------------
1 | include ':app'
2 | 


--------------------------------------------------------------------------------
/demo/ncnn/cpp/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOX-CPP-ncnn
 2 | 
 3 | Cpp file compile of YOLOX object detection base on [ncnn](https://github.com/Tencent/ncnn).
 4 | 
 5 | ## Tutorial
 6 | 
 7 | ### Step1
 8 | Clone [ncnn](https://github.com/Tencent/ncnn) first, then please following [build tutorial of ncnn](https://github.com/Tencent/ncnn/wiki/how-to-build) to build on your own device.
 9 | 
10 | ### Step2
11 | Use provided tools to generate onnx file.
12 | For example, if you want to generate onnx file of yolox-s, please run the following command:
13 | ```shell
14 | cd <path of yolox>
15 | python3 tools/export_onnx.py -n yolox-s
16 | ```
17 | Then, a yolox.onnx file is generated.
18 | 
19 | ### Step3
20 | Generate ncnn param and bin file.
21 | ```shell
22 | cd <path of ncnn>
23 | cd build/tools/ncnn
24 | ./onnx2ncnn yolox.onnx yolox.param yolox.bin
25 | ```
26 | 
27 | Since Focus module is not supported in ncnn. Warnings like:
28 | ```shell
29 | Unsupported slice step ! 
30 | ```
31 | will be printed. However, don't  worry!  C++ version of Focus layer is already implemented in yolox.cpp.
32 | 
33 | ### Step4
34 | Open **yolox.param**, and modify it.
35 | Before (just an example):
36 | ```
37 | 295 328
38 | Input            images                   0 1 images
39 | Split            splitncnn_input0         1 4 images images_splitncnn_0 images_splitncnn_1 images_splitncnn_2 images_splitncnn_3
40 | Crop             Slice_4                  1 1 images_splitncnn_3 647 -23309=1,0 -23310=1,2147483647 -23311=1,1
41 | Crop             Slice_9                  1 1 647 652 -23309=1,0 -23310=1,2147483647 -23311=1,2
42 | Crop             Slice_14                 1 1 images_splitncnn_2 657 -23309=1,0 -23310=1,2147483647 -23311=1,1
43 | Crop             Slice_19                 1 1 657 662 -23309=1,1 -23310=1,2147483647 -23311=1,2
44 | Crop             Slice_24                 1 1 images_splitncnn_1 667 -23309=1,1 -23310=1,2147483647 -23311=1,1
45 | Crop             Slice_29                 1 1 667 672 -23309=1,0 -23310=1,2147483647 -23311=1,2
46 | Crop             Slice_34                 1 1 images_splitncnn_0 677 -23309=1,1 -23310=1,2147483647 -23311=1,1
47 | Crop             Slice_39                 1 1 677 682 -23309=1,1 -23310=1,2147483647 -23311=1,2
48 | Concat           Concat_40                4 1 652 672 662 682 683 0=0
49 | ...
50 | ```
51 | * Change first number for 295 to 295 - 9 = 286(since we will remove 10 layers and add 1 layers, total layers number should minus 9). 
52 | * Then remove 10 lines of code from Split to Concat, but remember the last but 2nd number: 683.
53 | * Add YoloV5Focus layer After Input (using previous number 683):
54 | ```
55 | YoloV5Focus      focus                    1 1 images 683
56 | ```
57 | After(just an exmaple):
58 | ```
59 | 286 328
60 | Input            images                   0 1 images
61 | YoloV5Focus      focus                    1 1 images 683
62 | ...
63 | ```
64 | 
65 | ### Step5
66 | Use ncnn_optimize to generate new param and bin:
67 | ```shell
68 | # suppose you are still under ncnn/build/tools/ncnn dir.
69 | ../ncnnoptimize model.param model.bin yolox.param yolox.bin 65536
70 | ```
71 | 
72 | ### Step6
73 | Copy or Move yolox.cpp file into ncnn/examples, modify the CMakeList.txt, then build yolox
74 | 
75 | ### Step7
76 | Inference image with executable file yolox, enjoy the detect result:
77 | ```shell
78 | ./yolox demo.jpg
79 | ```
80 | 
81 | ## Acknowledgement
82 | 
83 | * [ncnn](https://github.com/Tencent/ncnn)
84 | 


--------------------------------------------------------------------------------
/exps/default/nano.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | import torch.nn as nn
 7 | 
 8 | from yolox.exp import Exp as MyExp
 9 | 
10 | 
11 | class Exp(MyExp):
12 |     def __init__(self):
13 |         super(Exp, self).__init__()
14 |         self.depth = 0.33
15 |         self.width = 0.25
16 |         self.scale = (0.5, 1.5)
17 |         self.random_size = (10, 20)
18 |         self.test_size = (416, 416)
19 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
20 |         self.enable_mixup = False
21 | 
22 |     def get_model(self, sublinear=False):
23 | 
24 |         def init_yolo(M):
25 |             for m in M.modules():
26 |                 if isinstance(m, nn.BatchNorm2d):
27 |                     m.eps = 1e-3
28 |                     m.momentum = 0.03
29 |         if "model" not in self.__dict__:
30 |             from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
31 |             in_channels = [256, 512, 1024]
32 |             # NANO model use depthwise = True, which is main difference.
33 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True)
34 |             head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True)
35 |             self.model = YOLOX(backbone, head)
36 | 
37 |         self.model.apply(init_yolo)
38 |         self.model.head.initialize_biases(1e-2)
39 |         return self.model
40 | 


--------------------------------------------------------------------------------
/exps/default/yolov3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | import torch
 7 | import torch.nn as nn
 8 | 
 9 | from yolox.exp import Exp as MyExp
10 | 
11 | 
12 | class Exp(MyExp):
13 |     def __init__(self):
14 |         super(Exp, self).__init__()
15 |         self.depth = 1.0
16 |         self.width = 1.0
17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
18 | 
19 |     def get_model(self, sublinear=False):
20 |         def init_yolo(M):
21 |             for m in M.modules():
22 |                 if isinstance(m, nn.BatchNorm2d):
23 |                     m.eps = 1e-3
24 |                     m.momentum = 0.03
25 |         if "model" not in self.__dict__:
26 |             from yolox.models import YOLOX, YOLOFPN, YOLOXHead
27 |             backbone = YOLOFPN()
28 |             head = YOLOXHead(self.num_classes, self.width, in_channels=[128, 256, 512], act="lrelu")
29 |             self.model = YOLOX(backbone, head)
30 |         self.model.apply(init_yolo)
31 |         self.model.head.initialize_biases(1e-2)
32 | 
33 |         return self.model
34 | 
35 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
36 |         from data.datasets.cocodataset import COCODataset
37 |         from data.datasets.mosaicdetection import MosaicDetection
38 |         from data.datasets.data_augment import TrainTransform
39 |         from data.datasets.dataloading import YoloBatchSampler, DataLoader, InfiniteSampler
40 |         import torch.distributed as dist
41 | 
42 |         dataset = COCODataset(
43 |                 data_dir='data/COCO/',
44 |                 json_file=self.train_ann,
45 |                 img_size=self.input_size,
46 |                 preproc=TrainTransform(
47 |                     rgb_means=(0.485, 0.456, 0.406),
48 |                     std=(0.229, 0.224, 0.225),
49 |                     max_labels=50
50 |                 ),
51 |         )
52 | 
53 |         dataset = MosaicDetection(
54 |             dataset,
55 |             mosaic=not no_aug,
56 |             img_size=self.input_size,
57 |             preproc=TrainTransform(
58 |                 rgb_means=(0.485, 0.456, 0.406),
59 |                 std=(0.229, 0.224, 0.225),
60 |                 max_labels=120
61 |             ),
62 |             degrees=self.degrees,
63 |             translate=self.translate,
64 |             scale=self.scale,
65 |             shear=self.shear,
66 |             perspective=self.perspective,
67 |         )
68 | 
69 |         self.dataset = dataset
70 | 
71 |         if is_distributed:
72 |             batch_size = batch_size // dist.get_world_size()
73 |             sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
74 |         else:
75 |             sampler = torch.utils.data.RandomSampler(self.dataset)
76 | 
77 |         batch_sampler = YoloBatchSampler(
78 |             sampler=sampler,
79 |             batch_size=batch_size,
80 |             drop_last=False,
81 |             input_dimension=self.input_size,
82 |             mosaic=not no_aug
83 |         )
84 | 
85 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
86 |         dataloader_kwargs["batch_sampler"] = batch_sampler
87 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
88 | 
89 |         return train_loader
90 | 


--------------------------------------------------------------------------------
/exps/default/yolox_l.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 1.0
14 |         self.width = 1.0
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/exps/default/yolox_m.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.67
14 |         self.width = 0.75
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/exps/default/yolox_s.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.33
14 |         self.width = 0.50
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/exps/default/yolox_tiny.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.33
14 |         self.width = 0.375
15 |         self.scale = (0.5, 1.5)
16 |         self.random_size = (10, 20)
17 |         self.test_size = (416, 416)
18 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
19 |         self.enable_mixup = False
20 | 


--------------------------------------------------------------------------------
/exps/default/yolox_x.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 1.33
14 |         self.width = 1.25
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/exps/example/custom/nano.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | import torch.nn as nn
 7 | 
 8 | from yolox.exp import Exp as MyExp
 9 | 
10 | 
11 | class Exp(MyExp):
12 |     def __init__(self):
13 |         super(Exp, self).__init__()
14 |         self.depth = 0.33
15 |         self.width = 0.25
16 |         self.scale = (0.5, 1.5)
17 |         self.random_size = (10, 20)
18 |         self.test_size = (416, 416)
19 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
20 |         self.enable_mixup = False
21 | 
22 |         # Define yourself dataset path
23 |         self.data_dir = "datasets/coco128"
24 |         self.train_ann = "instances_train2017.json"
25 |         self.val_ann = "instances_val2017.json"
26 | 
27 |         self.num_classes = 71
28 | 
29 |     def get_model(self, sublinear=False):
30 | 
31 |         def init_yolo(M):
32 |             for m in M.modules():
33 |                 if isinstance(m, nn.BatchNorm2d):
34 |                     m.eps = 1e-3
35 |                     m.momentum = 0.03
36 |         if "model" not in self.__dict__:
37 |             from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
38 |             in_channels = [256, 512, 1024]
39 |             # NANO model use depthwise = True, which is main difference.
40 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True)
41 |             head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True)
42 |             self.model = YOLOX(backbone, head)
43 | 
44 |         self.model.apply(init_yolo)
45 |         self.model.head.initialize_biases(1e-2)
46 |         return self.model
47 | 


--------------------------------------------------------------------------------
/exps/example/custom/yolox_s.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | import os
 5 | from pathlib import Path
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.33
14 |         self.width = 0.50
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 
17 |         # Define yourself dataset path
18 |         self.data_dir = "datasets/coco128"
19 |         self.train_ann = "instances_train2017.json"
20 |         self.val_ann = "instances_val2017.json"
21 | 
22 |         self.num_classes = 71
23 | 
24 |         self.max_epoch = 300
25 |         self.data_num_workers = 4
26 |         self.eval_interval = 1
27 | 


--------------------------------------------------------------------------------
/exps/example/yolox_voc/yolox_voc_s.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.distributed as dist
  7 | 
  8 | from yolox.exp import Exp as MyExp
  9 | from yolox.data import get_yolox_datadir
 10 | 
 11 | class Exp(MyExp):
 12 |     def __init__(self):
 13 |         super(Exp, self).__init__()
 14 |         self.num_classes = 20
 15 |         self.depth = 0.33
 16 |         self.width = 0.50
 17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 18 | 
 19 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
 20 |         from yolox.data import (
 21 |             VOCDetection,
 22 |             TrainTransform,
 23 |             YoloBatchSampler,
 24 |             DataLoader,
 25 |             InfiniteSampler,
 26 |             MosaicDetection,
 27 |         )
 28 | 
 29 |         dataset = VOCDetection(
 30 |             data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"),
 31 |             image_sets=[('2007', 'trainval'), ('2012', 'trainval')],
 32 |             img_size=self.input_size,
 33 |             preproc=TrainTransform(
 34 |                 rgb_means=(0.485, 0.456, 0.406),
 35 |                 std=(0.229, 0.224, 0.225),
 36 |                 max_labels=50,
 37 |             ),
 38 |         )
 39 | 
 40 |         dataset = MosaicDetection(
 41 |             dataset,
 42 |             mosaic=not no_aug,
 43 |             img_size=self.input_size,
 44 |             preproc=TrainTransform(
 45 |                 rgb_means=(0.485, 0.456, 0.406),
 46 |                 std=(0.229, 0.224, 0.225),
 47 |                 max_labels=120,
 48 |             ),
 49 |             degrees=self.degrees,
 50 |             translate=self.translate,
 51 |             scale=self.scale,
 52 |             shear=self.shear,
 53 |             perspective=self.perspective,
 54 |             enable_mixup=self.enable_mixup,
 55 |         )
 56 | 
 57 |         self.dataset = dataset
 58 | 
 59 |         if is_distributed:
 60 |             batch_size = batch_size // dist.get_world_size()
 61 | 
 62 |         sampler = InfiniteSampler(
 63 |             len(self.dataset), seed=self.seed if self.seed else 0
 64 |         )
 65 | 
 66 |         batch_sampler = YoloBatchSampler(
 67 |             sampler=sampler,
 68 |             batch_size=batch_size,
 69 |             drop_last=False,
 70 |             input_dimension=self.input_size,
 71 |             mosaic=not no_aug,
 72 |         )
 73 | 
 74 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
 75 |         dataloader_kwargs["batch_sampler"] = batch_sampler
 76 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
 77 | 
 78 |         return train_loader
 79 | 
 80 |     def get_eval_loader(self, batch_size, is_distributed, testdev=False):
 81 |         from yolox.data import VOCDetection, ValTransform
 82 | 
 83 |         valdataset = VOCDetection(
 84 |             data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"),
 85 |             image_sets=[('2007', 'test')],
 86 |             img_size=self.test_size,
 87 |             preproc=ValTransform(
 88 |                 rgb_means=(0.485, 0.456, 0.406),
 89 |                 std=(0.229, 0.224, 0.225),
 90 |             ),
 91 |         )
 92 | 
 93 |         if is_distributed:
 94 |             batch_size = batch_size // dist.get_world_size()
 95 |             sampler = torch.utils.data.distributed.DistributedSampler(
 96 |                 valdataset, shuffle=False
 97 |             )
 98 |         else:
 99 |             sampler = torch.utils.data.SequentialSampler(valdataset)
100 | 
101 |         dataloader_kwargs = {
102 |             "num_workers": self.data_num_workers,
103 |             "pin_memory": True,
104 |             "sampler": sampler,
105 |         }
106 |         dataloader_kwargs["batch_size"] = batch_size
107 |         val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
108 | 
109 |         return val_loader
110 | 
111 |     def get_evaluator(self, batch_size, is_distributed, testdev=False):
112 |         from yolox.evaluators import VOCEvaluator
113 | 
114 |         val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
115 |         evaluator = VOCEvaluator(
116 |             dataloader=val_loader,
117 |             img_size=self.test_size,
118 |             confthre=self.test_conf,
119 |             nmsthre=self.nmsthre,
120 |             num_classes=self.num_classes,
121 |         )
122 |         return evaluator
123 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # TODO: Update with exact module version
 2 | numpy
 3 | torch>=1.7
 4 | opencv_python
 5 | loguru
 6 | scikit-image
 7 | tqdm
 8 | torchvision
 9 | Pillow
10 | thop
11 | ninja
12 | tabulate
13 | tensorboard
14 | 
15 | # verified versions
16 | onnx==1.8.1
17 | onnxruntime==1.8.0
18 | onnx-simplifier==0.3.5


--------------------------------------------------------------------------------
/tools/export_onnx.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) Megvii, Inc. and its affiliates.
  4 | 
  5 | from loguru import logger
  6 | 
  7 | import torch
  8 | from torch import nn
  9 | 
 10 | from yolox.exp import get_exp
 11 | from yolox.models.network_blocks import SiLU
 12 | from yolox.utils import replace_module
 13 | 
 14 | import argparse
 15 | import os
 16 | 
 17 | 
 18 | def make_parser():
 19 |     parser = argparse.ArgumentParser("YOLOX onnx deploy")
 20 |     parser.add_argument(
 21 |         "--output-name", type=str, default="yolox.onnx", help="output name of models"
 22 |     )
 23 |     parser.add_argument(
 24 |         "--input", default="images", type=str, help="input node name of onnx model"
 25 |     )
 26 |     parser.add_argument(
 27 |         "--output", default="output", type=str, help="output node name of onnx model"
 28 |     )
 29 |     parser.add_argument(
 30 |         "-o", "--opset", default=11, type=int, help="onnx opset version"
 31 |     )
 32 |     parser.add_argument("--no-onnxsim", action="store_true", help="use onnxsim or not")
 33 |     parser.add_argument(
 34 |         "-f",
 35 |         "--exp_file",
 36 |         default=None,
 37 |         type=str,
 38 |         help="expriment description file",
 39 |     )
 40 |     parser.add_argument("-expn", "--experiment-name", type=str, default=None)
 41 |     parser.add_argument("-n", "--name", type=str, default=None, help="model name")
 42 |     parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt path")
 43 |     parser.add_argument(
 44 |         "opts",
 45 |         help="Modify config options using the command-line",
 46 |         default=None,
 47 |         nargs=argparse.REMAINDER,
 48 |     )
 49 | 
 50 |     return parser
 51 | 
 52 | 
 53 | @logger.catch
 54 | def main():
 55 |     args = make_parser().parse_args()
 56 |     logger.info("args value: {}".format(args))
 57 |     exp = get_exp(args.exp_file, args.name)
 58 |     exp.merge(args.opts)
 59 | 
 60 |     if not args.experiment_name:
 61 |         args.experiment_name = exp.exp_name
 62 | 
 63 |     model = exp.get_model()
 64 |     if args.ckpt is None:
 65 |         file_name = os.path.join(exp.output_dir, args.experiment_name)
 66 |         ckpt_file = os.path.join(file_name, "best_ckpt.pth")
 67 |     else:
 68 |         ckpt_file = args.ckpt
 69 | 
 70 |     # load the model state dict
 71 |     ckpt = torch.load(ckpt_file, map_location="cpu")
 72 | 
 73 |     model.eval()
 74 |     if "model" in ckpt:
 75 |         ckpt = ckpt["model"]
 76 |     model.load_state_dict(ckpt)
 77 |     model = replace_module(model, nn.SiLU, SiLU)
 78 |     model.head.decode_in_inference = False
 79 | 
 80 |     logger.info("loading checkpoint done.")
 81 |     dummy_input = torch.randn(1, 3, exp.test_size[0], exp.test_size[1])
 82 |     torch.onnx._export(
 83 |         model,
 84 |         dummy_input,
 85 |         args.output_name,
 86 |         input_names=[args.input],
 87 |         output_names=[args.output],
 88 |         opset_version=args.opset,
 89 |     )
 90 |     logger.info("generated onnx model named {}".format(args.output_name))
 91 | 
 92 |     if not args.no_onnxsim:
 93 |         import onnx
 94 | 
 95 |         from onnxsim import simplify
 96 | 
 97 |         # use onnxsimplify to reduce reduent model.
 98 |         onnx_model = onnx.load(args.output_name)
 99 |         model_simp, check = simplify(onnx_model)
100 |         assert check, "Simplified ONNX model could not be validated"
101 |         onnx.save(model_simp, args.output_name)
102 |         logger.info("generated simplified onnx model named {}".format(args.output_name))
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     main()
107 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) Megvii, Inc. and its affiliates.
  4 | 
  5 | from loguru import logger
  6 | 
  7 | import torch
  8 | import torch.backends.cudnn as cudnn
  9 | 
 10 | from yolox.core import Trainer, launch
 11 | from yolox.exp import get_exp
 12 | 
 13 | import argparse
 14 | import random
 15 | import warnings
 16 | 
 17 | 
 18 | def make_parser():
 19 |     parser = argparse.ArgumentParser("YOLOX train parser")
 20 |     parser.add_argument("-expn", "--experiment-name", type=str, default=None)
 21 |     parser.add_argument("-n", "--name", type=str, default=None, help="model name")
 22 | 
 23 |     # distributed
 24 |     parser.add_argument(
 25 |         "--dist-backend", default="nccl", type=str, help="distributed backend"
 26 |     )
 27 |     parser.add_argument(
 28 |         "--dist-url",
 29 |         default=None,
 30 |         type=str,
 31 |         help="url used to set up distributed training",
 32 |     )
 33 |     parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size")
 34 |     parser.add_argument(
 35 |         "-d", "--devices", default=None, type=int, help="device for training"
 36 |     )
 37 |     parser.add_argument(
 38 |         "--local_rank", default=0, type=int, help="local rank for dist training"
 39 |     )
 40 |     parser.add_argument(
 41 |         "-f",
 42 |         "--exp_file",
 43 |         default=None,
 44 |         type=str,
 45 |         help="plz input your expriment description file",
 46 |     )
 47 |     parser.add_argument(
 48 |         "--resume", default=False, action="store_true", help="resume training"
 49 |     )
 50 |     parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file")
 51 |     parser.add_argument(
 52 |         "-e",
 53 |         "--start_epoch",
 54 |         default=None,
 55 |         type=int,
 56 |         help="resume training start epoch",
 57 |     )
 58 |     parser.add_argument(
 59 |         "--num_machines", default=1, type=int, help="num of node for training"
 60 |     )
 61 |     parser.add_argument(
 62 |         "--machine_rank", default=0, type=int, help="node rank for multi-node training"
 63 |     )
 64 |     parser.add_argument(
 65 |         "--fp16",
 66 |         dest="fp16",
 67 |         default=False,
 68 |         action="store_true",
 69 |         help="Adopting mix precision training.",
 70 |     )
 71 |     parser.add_argument(
 72 |         "-o",
 73 |         "--occupy",
 74 |         dest="occupy",
 75 |         default=False,
 76 |         action="store_true",
 77 |         help="occupy GPU memory first for training.",
 78 |     )
 79 |     parser.add_argument(
 80 |         "opts",
 81 |         help="Modify config options using the command-line",
 82 |         default=None,
 83 |         nargs=argparse.REMAINDER,
 84 |     )
 85 |     return parser
 86 | 
 87 | 
 88 | @logger.catch
 89 | def main(exp, args):
 90 |     if exp.seed is not None:
 91 |         random.seed(exp.seed)
 92 |         torch.manual_seed(exp.seed)
 93 |         cudnn.deterministic = True
 94 |         warnings.warn(
 95 |             "You have chosen to seed training. This will turn on the CUDNN deterministic setting, "
 96 |             "which can slow down your training considerably! You may see unexpected behavior "
 97 |             "when restarting from checkpoints."
 98 |         )
 99 | 
100 |     # set environment variables for distributed training
101 |     cudnn.benchmark = True
102 | 
103 |     trainer = Trainer(exp, args)
104 |     trainer.train()
105 | 
106 | 
107 | if __name__ == "__main__":
108 |     args = make_parser().parse_args()
109 |     exp = get_exp(args.exp_file, args.name)
110 |     exp.merge(args.opts)
111 | 
112 |     if not args.experiment_name:
113 |         args.experiment_name = exp.exp_name
114 | 
115 |     num_gpu = torch.cuda.device_count() if args.devices is None else args.devices
116 |     assert num_gpu <= torch.cuda.device_count()
117 | 
118 |     launch(
119 |         main,
120 |         num_gpu,
121 |         args.num_machines,
122 |         args.machine_rank,
123 |         backend=args.dist_backend,
124 |         dist_url=args.dist_url,
125 |         args=(exp, args),
126 |     )
127 | 


--------------------------------------------------------------------------------
/tools/trt.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | from loguru import logger
 6 | 
 7 | import tensorrt as trt
 8 | import torch
 9 | from torch2trt import torch2trt
10 | 
11 | from yolox.exp import get_exp
12 | 
13 | import argparse
14 | import os
15 | import shutil
16 | 
17 | 
18 | def make_parser():
19 |     parser = argparse.ArgumentParser("YOLOX ncnn deploy")
20 |     parser.add_argument("-expn", "--experiment-name", type=str, default=None)
21 |     parser.add_argument("-n", "--name", type=str, default=None, help="model name")
22 | 
23 |     parser.add_argument(
24 |         "-f",
25 |         "--exp_file",
26 |         default=None,
27 |         type=str,
28 |         help="pls input your expriment description file",
29 |     )
30 |     parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt path")
31 |     return parser
32 | 
33 | 
34 | @logger.catch
35 | def main():
36 |     args = make_parser().parse_args()
37 |     exp = get_exp(args.exp_file, args.name)
38 |     if not args.experiment_name:
39 |         args.experiment_name = exp.exp_name
40 | 
41 |     model = exp.get_model()
42 |     file_name = os.path.join(exp.output_dir, args.experiment_name)
43 |     os.makedirs(file_name, exist_ok=True)
44 |     if args.ckpt is None:
45 |         ckpt_file = os.path.join(file_name, "best_ckpt.pth")
46 |     else:
47 |         ckpt_file = args.ckpt
48 | 
49 |     ckpt = torch.load(ckpt_file, map_location="cpu")
50 |     # load the model state dict
51 | 
52 |     model.load_state_dict(ckpt["model"])
53 |     logger.info("loaded checkpoint done.")
54 |     model.eval()
55 |     model.cuda()
56 |     model.head.decode_in_inference = False
57 |     x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()
58 |     model_trt = torch2trt(
59 |         model,
60 |         [x],
61 |         fp16_mode=True,
62 |         log_level=trt.Logger.INFO,
63 |         max_workspace_size=(1 << 32),
64 |     )
65 |     torch.save(model_trt.state_dict(), os.path.join(file_name, "model_trt.pth"))
66 |     logger.info("Converted TensorRT model done.")
67 |     engine_file = os.path.join(file_name, "model_trt.engine")
68 |     engine_file_demo = os.path.join("demo", "TensorRT", "cpp", "model_trt.engine")
69 |     with open(engine_file, "wb") as f:
70 |         f.write(model_trt.engine.serialize())
71 | 
72 |     shutil.copyfile(engine_file, engine_file_demo)
73 | 
74 |     logger.info("Converted TensorRT model engine file is saved for C++ inference.")
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     main()
79 | 


--------------------------------------------------------------------------------
/tracker.py:
--------------------------------------------------------------------------------
 1 | from deep_sort.utils.parser import get_config
 2 | from deep_sort.deep_sort import DeepSort
 3 | import torch
 4 | import cv2
 5 | 
 6 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
 7 | cfg = get_config()
 8 | cfg.merge_from_file("deep_sort/configs/deep_sort.yaml")
 9 | deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
10 |                     max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
11 |                     nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
12 |                     max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
13 |                     use_cuda=True)
14 | 
15 | 
16 | def plot_bboxes(image, bboxes, line_thickness=None):
17 |     # Plots one bounding box on image img
18 |     tl = line_thickness or round(
19 |         0.002 * (image.shape[0] + image.shape[1]) / 2) + 1  # line/font thickness
20 |     for (x1, y1, x2, y2, cls_id, pos_id) in bboxes:
21 |         if cls_id in ['person']:
22 |             color = (0, 0, 255)
23 |         else:
24 |             color = (0, 255, 0)
25 |         c1, c2 = (x1, y1), (x2, y2)
26 |         cv2.rectangle(image, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
27 |         tf = max(tl - 1, 1)  # font thickness
28 |         t_size = cv2.getTextSize(cls_id, 0, fontScale=tl / 3, thickness=tf)[0]
29 |         c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
30 |         cv2.rectangle(image, c1, c2, color, -1, cv2.LINE_AA)  # filled
31 |         cv2.putText(image, '{} ID-{}'.format(cls_id, pos_id), (c1[0], c1[1] - 2), 0, tl / 3,
32 |                     [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
33 | 
34 |     return image
35 | 
36 | 
37 | def update_tracker(target_detector, image):
38 | 
39 |     new_faces = []
40 |     _, bboxes = target_detector.detect(image)
41 | 
42 |     bbox_xywh = []
43 |     confs = []
44 |     clss = []
45 | 
46 |     for x1, y1, x2, y2, cls_id, conf in bboxes:
47 | 
48 |         obj = [
49 |             int((x1+x2)/2), int((y1+y2)/2),
50 |             x2-x1, y2-y1
51 |         ]
52 |         bbox_xywh.append(obj)
53 |         confs.append(conf)
54 |         clss.append(cls_id)
55 | 
56 |     xywhs = torch.Tensor(bbox_xywh)
57 |     confss = torch.Tensor(confs)
58 | 
59 |     outputs = deepsort.update(xywhs, confss, clss, image)
60 | 
61 |     bboxes2draw = []
62 |     face_bboxes = []
63 |     current_ids = []
64 |     for value in list(outputs):
65 |         x1, y1, x2, y2, cls_, track_id = value
66 |         bboxes2draw.append(
67 |             (x1, y1, x2, y2, cls_, track_id)
68 |         )
69 |         current_ids.append(track_id)
70 |         if cls_ == 'face':
71 |             if not track_id in target_detector.faceTracker:
72 |                 target_detector.faceTracker[track_id] = 0
73 |                 face = image[y1:y2, x1:x2]
74 |                 new_faces.append((face, track_id))
75 |             face_bboxes.append(
76 |                 (x1, y1, x2, y2)
77 |             )
78 | 
79 |     ids2delete = []
80 |     for history_id in target_detector.faceTracker:
81 |         if not history_id in current_ids:
82 |             target_detector.faceTracker[history_id] -= 1
83 |         if target_detector.faceTracker[history_id] < -5:
84 |             ids2delete.append(history_id)
85 | 
86 |     for ids in ids2delete:
87 |         target_detector.faceTracker.pop(ids)
88 |         print('-[INFO] Delete track id:', ids)
89 | 
90 |     image = plot_bboxes(image, bboxes2draw)
91 | 
92 |     return image, new_faces, face_bboxes
93 | 


--------------------------------------------------------------------------------
/yolox/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .utils import configure_module
5 | 
6 | configure_module()
7 | 
8 | __version__ = "0.1.0"
9 | 


--------------------------------------------------------------------------------
/yolox/core/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 
5 | from .launch import launch
6 | from .trainer import Trainer
7 | 


--------------------------------------------------------------------------------
/yolox/data/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | from .data_augment import TrainTransform, ValTransform
 6 | from .data_prefetcher import DataPrefetcher
 7 | from .dataloading import DataLoader, get_yolox_datadir
 8 | from .datasets import *
 9 | from .samplers import InfiniteSampler, YoloBatchSampler
10 | 


--------------------------------------------------------------------------------
/yolox/data/data_prefetcher.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import torch
 6 | import torch.distributed as dist
 7 | 
 8 | from yolox.utils import synchronize
 9 | 
10 | import random
11 | 
12 | 
13 | class DataPrefetcher:
14 |     """
15 |     DataPrefetcher is inspired by code of following file:
16 |     https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main_amp.py
17 |     It could speedup your pytorch dataloader. For more information, please check
18 |     https://github.com/NVIDIA/apex/issues/304#issuecomment-493562789.
19 |     """
20 | 
21 |     def __init__(self, loader):
22 |         self.loader = iter(loader)
23 |         self.stream = torch.cuda.Stream()
24 |         self.input_cuda = self._input_cuda_for_image
25 |         self.record_stream = DataPrefetcher._record_stream_for_image
26 |         self.preload()
27 | 
28 |     def preload(self):
29 |         try:
30 |             self.next_input, self.next_target, _, _ = next(self.loader)
31 |         except StopIteration:
32 |             self.next_input = None
33 |             self.next_target = None
34 |             return
35 | 
36 |         with torch.cuda.stream(self.stream):
37 |             self.input_cuda()
38 |             self.next_target = self.next_target.cuda(non_blocking=True)
39 | 
40 |     def next(self):
41 |         torch.cuda.current_stream().wait_stream(self.stream)
42 |         input = self.next_input
43 |         target = self.next_target
44 |         if input is not None:
45 |             self.record_stream(input)
46 |         if target is not None:
47 |             target.record_stream(torch.cuda.current_stream())
48 |         self.preload()
49 |         return input, target
50 | 
51 |     def _input_cuda_for_image(self):
52 |         self.next_input = self.next_input.cuda(non_blocking=True)
53 | 
54 |     @staticmethod
55 |     def _record_stream_for_image(input):
56 |         input.record_stream(torch.cuda.current_stream())
57 | 
58 | 
59 | def random_resize(data_loader, exp, epoch, rank, is_distributed):
60 |     tensor = torch.LongTensor(1).cuda()
61 |     if is_distributed:
62 |         synchronize()
63 | 
64 |     if rank == 0:
65 |         if epoch > exp.max_epoch - 10:
66 |             size = exp.input_size
67 |         else:
68 |             size = random.randint(*exp.random_size)
69 |             size = int(32 * size)
70 |         tensor.fill_(size)
71 | 
72 |     if is_distributed:
73 |         synchronize()
74 |         dist.broadcast(tensor, 0)
75 | 
76 |     input_size = data_loader.change_input_dim(multiple=tensor.item(), random_range=None)
77 |     return input_size
78 | 


--------------------------------------------------------------------------------
/yolox/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | from .coco import COCODataset
 6 | from .coco_classes import COCO_CLASSES
 7 | from .datasets_wrapper import ConcatDataset, Dataset, MixConcatDataset
 8 | from .mosaicdetection import MosaicDetection
 9 | from .voc import VOCDetection
10 | 


--------------------------------------------------------------------------------
/yolox/data/datasets/coco.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) Megvii, Inc. and its affiliates.
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from pycocotools.coco import COCO
  8 | 
  9 | import os
 10 | 
 11 | from ..dataloading import get_yolox_datadir
 12 | from .datasets_wrapper import Dataset
 13 | 
 14 | 
 15 | class COCODataset(Dataset):
 16 |     """
 17 |     COCO dataset class.
 18 |     """
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         data_dir=None,
 23 |         json_file="instances_train2017.json",
 24 |         name="train2017",
 25 |         img_size=(416, 416),
 26 |         preproc=None,
 27 |     ):
 28 |         """
 29 |         COCO dataset initialization. Annotation data are read into memory by COCO API.
 30 |         Args:
 31 |             data_dir (str): dataset root directory
 32 |             json_file (str): COCO json file name
 33 |             name (str): COCO data name (e.g. 'train2017' or 'val2017')
 34 |             img_size (int): target image size after pre-processing
 35 |             preproc: data augmentation strategy
 36 |         """
 37 |         super().__init__(img_size)
 38 |         if data_dir is None:
 39 |             data_dir = os.path.join(get_yolox_datadir(), "COCO")
 40 |         self.data_dir = data_dir
 41 |         self.json_file = json_file
 42 | 
 43 |         self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file))
 44 |         self.ids = self.coco.getImgIds()
 45 |         self.class_ids = sorted(self.coco.getCatIds())
 46 |         cats = self.coco.loadCats(self.coco.getCatIds())
 47 |         self._classes = tuple([c["name"] for c in cats])
 48 |         self.annotations = self._load_coco_annotations()
 49 |         self.name = name
 50 |         self.img_size = img_size
 51 |         self.preproc = preproc
 52 | 
 53 |     def __len__(self):
 54 |         return len(self.ids)
 55 | 
 56 |     def _load_coco_annotations(self):
 57 |         return [self.load_anno_from_ids(_ids) for _ids in self.ids]
 58 | 
 59 |     def load_anno_from_ids(self, id_):
 60 |         im_ann = self.coco.loadImgs(id_)[0]
 61 |         width = im_ann["width"]
 62 |         height = im_ann["height"]
 63 |         anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False)
 64 |         annotations = self.coco.loadAnns(anno_ids)
 65 |         objs = []
 66 |         for obj in annotations:
 67 |             x1 = np.max((0, obj["bbox"][0]))
 68 |             y1 = np.max((0, obj["bbox"][1]))
 69 |             x2 = np.min((width, x1 + np.max((0, obj["bbox"][2]))))
 70 |             y2 = np.min((height, y1 + np.max((0, obj["bbox"][3]))))
 71 |             if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
 72 |                 obj["clean_bbox"] = [x1, y1, x2, y2]
 73 |                 objs.append(obj)
 74 | 
 75 |         num_objs = len(objs)
 76 | 
 77 |         res = np.zeros((num_objs, 5))
 78 | 
 79 |         for ix, obj in enumerate(objs):
 80 |             cls = self.class_ids.index(obj["category_id"])
 81 |             res[ix, 0:4] = obj["clean_bbox"]
 82 |             res[ix, 4] = cls
 83 | 
 84 |         img_info = (height, width)
 85 | 
 86 |         file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg"
 87 | 
 88 |         del im_ann, annotations
 89 | 
 90 |         return (res, img_info, file_name)
 91 | 
 92 |     def load_anno(self, index):
 93 |         return self.annotations[index][0]
 94 | 
 95 |     def pull_item(self, index):
 96 |         id_ = self.ids[index]
 97 | 
 98 |         res, img_info, file_name = self.annotations[index]
 99 |         # load image and preprocess
100 |         img_file = os.path.join(
101 |             self.data_dir, self.name, file_name
102 |         )
103 | 
104 |         img = cv2.imread(img_file)
105 |         assert img is not None
106 | 
107 |         return img, res.copy(), img_info, np.array([id_])
108 | 
109 |     @Dataset.resize_getitem
110 |     def __getitem__(self, index):
111 |         """
112 |         One image / label pair for the given index is picked up and pre-processed.
113 | 
114 |         Args:
115 |             index (int): data index
116 | 
117 |         Returns:
118 |             img (numpy.ndarray): pre-processed image
119 |             padded_labels (torch.Tensor): pre-processed label data.
120 |                 The shape is :math:`[max_labels, 5]`.
121 |                 each label consists of [class, xc, yc, w, h]:
122 |                     class (float): class index.
123 |                     xc, yc (float) : center of bbox whose values range from 0 to 1.
124 |                     w, h (float) : size of bbox whose values range from 0 to 1.
125 |             info_img : tuple of h, w, nh, nw, dx, dy.
126 |                 h, w (int): original shape of the image
127 |                 nh, nw (int): shape of the resized image without padding
128 |                 dx, dy (int): pad size
129 |             img_id (int): same as the input index. Used for evaluation.
130 |         """
131 |         img, target, img_info, img_id = self.pull_item(index)
132 | 
133 |         if self.preproc is not None:
134 |             img, target = self.preproc(img, target, self.input_dim)
135 |         return img, target, img_info, img_id
136 | 


--------------------------------------------------------------------------------
/yolox/data/datasets/coco_classes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | COCO_CLASSES = (
 6 |     "person",
 7 |     "bicycle",
 8 |     "car",
 9 |     "motorcycle",
10 |     "airplane",
11 |     "bus",
12 |     "train",
13 |     "truck",
14 |     "boat",
15 |     "traffic light",
16 |     "fire hydrant",
17 |     "stop sign",
18 |     "parking meter",
19 |     "bench",
20 |     "bird",
21 |     "cat",
22 |     "dog",
23 |     "horse",
24 |     "sheep",
25 |     "cow",
26 |     "elephant",
27 |     "bear",
28 |     "zebra",
29 |     "giraffe",
30 |     "backpack",
31 |     "umbrella",
32 |     "handbag",
33 |     "tie",
34 |     "suitcase",
35 |     "frisbee",
36 |     "skis",
37 |     "snowboard",
38 |     "sports ball",
39 |     "kite",
40 |     "baseball bat",
41 |     "baseball glove",
42 |     "skateboard",
43 |     "surfboard",
44 |     "tennis racket",
45 |     "bottle",
46 |     "wine glass",
47 |     "cup",
48 |     "fork",
49 |     "knife",
50 |     "spoon",
51 |     "bowl",
52 |     "banana",
53 |     "apple",
54 |     "sandwich",
55 |     "orange",
56 |     "broccoli",
57 |     "carrot",
58 |     "hot dog",
59 |     "pizza",
60 |     "donut",
61 |     "cake",
62 |     "chair",
63 |     "couch",
64 |     "potted plant",
65 |     "bed",
66 |     "dining table",
67 |     "toilet",
68 |     "tv",
69 |     "laptop",
70 |     "mouse",
71 |     "remote",
72 |     "keyboard",
73 |     "cell phone",
74 |     "microwave",
75 |     "oven",
76 |     "toaster",
77 |     "sink",
78 |     "refrigerator",
79 |     "book",
80 |     "clock",
81 |     "vase",
82 |     "scissors",
83 |     "teddy bear",
84 |     "hair drier",
85 |     "toothbrush",
86 | )
87 | 


--------------------------------------------------------------------------------
/yolox/data/datasets/datasets_wrapper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) Megvii, Inc. and its affiliates.
  4 | 
  5 | from torch.utils.data.dataset import ConcatDataset as torchConcatDataset
  6 | from torch.utils.data.dataset import Dataset as torchDataset
  7 | 
  8 | import bisect
  9 | from functools import wraps
 10 | 
 11 | 
 12 | class ConcatDataset(torchConcatDataset):
 13 |     def __init__(self, datasets):
 14 |         super(ConcatDataset, self).__init__(datasets)
 15 |         if hasattr(self.datasets[0], "input_dim"):
 16 |             self._input_dim = self.datasets[0].input_dim
 17 |             self.input_dim = self.datasets[0].input_dim
 18 | 
 19 |     def pull_item(self, idx):
 20 |         if idx < 0:
 21 |             if -idx > len(self):
 22 |                 raise ValueError(
 23 |                     "absolute value of index should not exceed dataset length"
 24 |                 )
 25 |             idx = len(self) + idx
 26 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
 27 |         if dataset_idx == 0:
 28 |             sample_idx = idx
 29 |         else:
 30 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
 31 |         return self.datasets[dataset_idx].pull_item(sample_idx)
 32 | 
 33 | 
 34 | class MixConcatDataset(torchConcatDataset):
 35 |     def __init__(self, datasets):
 36 |         super(MixConcatDataset, self).__init__(datasets)
 37 |         if hasattr(self.datasets[0], "input_dim"):
 38 |             self._input_dim = self.datasets[0].input_dim
 39 |             self.input_dim = self.datasets[0].input_dim
 40 | 
 41 |     def __getitem__(self, index):
 42 | 
 43 |         if not isinstance(index, int):
 44 |             idx = index[1]
 45 |         if idx < 0:
 46 |             if -idx > len(self):
 47 |                 raise ValueError(
 48 |                     "absolute value of index should not exceed dataset length"
 49 |                 )
 50 |             idx = len(self) + idx
 51 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
 52 |         if dataset_idx == 0:
 53 |             sample_idx = idx
 54 |         else:
 55 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
 56 |         if not isinstance(index, int):
 57 |             index = (index[0], sample_idx, index[2])
 58 | 
 59 |         return self.datasets[dataset_idx][index]
 60 | 
 61 | 
 62 | class Dataset(torchDataset):
 63 |     """ This class is a subclass of the base :class:`torch.utils.data.Dataset`,
 64 |     that enables on the fly resizing of the ``input_dim``.
 65 | 
 66 |     Args:
 67 |         input_dimension (tuple): (width,height) tuple with default dimensions of the network
 68 |     """
 69 | 
 70 |     def __init__(self, input_dimension, mosaic=True):
 71 |         super().__init__()
 72 |         self.__input_dim = input_dimension[:2]
 73 |         self.enable_mosaic = mosaic
 74 | 
 75 |     @property
 76 |     def input_dim(self):
 77 |         """
 78 |         Dimension that can be used by transforms to set the correct image size, etc.
 79 |         This allows transforms to have a single source of truth
 80 |         for the input dimension of the network.
 81 | 
 82 |         Return:
 83 |             list: Tuple containing the current width,height
 84 |         """
 85 |         if hasattr(self, "_input_dim"):
 86 |             return self._input_dim
 87 |         return self.__input_dim
 88 | 
 89 |     @staticmethod
 90 |     def resize_getitem(getitem_fn):
 91 |         """
 92 |         Decorator method that needs to be used around the ``__getitem__`` method. |br|
 93 |         This decorator enables the on the fly resizing of
 94 |         the ``input_dim`` with our :class:`~lightnet.data.DataLoader` class.
 95 | 
 96 |         Example:
 97 |             >>> class CustomSet(ln.data.Dataset):
 98 |             ...     def __len__(self):
 99 |             ...         return 10
100 |             ...     @ln.data.Dataset.resize_getitem
101 |             ...     def __getitem__(self, index):
102 |             ...         # Should return (image, anno) but here we return input_dim
103 |             ...         return self.input_dim
104 |             >>> data = CustomSet((200,200))
105 |             >>> data[0]
106 |             (200, 200)
107 |             >>> data[(480,320), 0]
108 |             (480, 320)
109 |         """
110 | 
111 |         @wraps(getitem_fn)
112 |         def wrapper(self, index):
113 |             if not isinstance(index, int):
114 |                 has_dim = True
115 |                 self._input_dim = index[0]
116 |                 self.enable_mosaic = index[2]
117 |                 index = index[1]
118 |             else:
119 |                 has_dim = False
120 | 
121 |             ret_val = getitem_fn(self, index)
122 | 
123 |             if has_dim:
124 |                 del self._input_dim
125 | 
126 |             return ret_val
127 | 
128 |         return wrapper
129 | 


--------------------------------------------------------------------------------
/yolox/data/datasets/voc_classes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | # VOC_CLASSES = ( '__background__', # always index 0
 6 | VOC_CLASSES = (
 7 |     "aeroplane",
 8 |     "bicycle",
 9 |     "bird",
10 |     "boat",
11 |     "bottle",
12 |     "bus",
13 |     "car",
14 |     "cat",
15 |     "chair",
16 |     "cow",
17 |     "diningtable",
18 |     "dog",
19 |     "horse",
20 |     "motorbike",
21 |     "person",
22 |     "pottedplant",
23 |     "sheep",
24 |     "sofa",
25 |     "train",
26 |     "tvmonitor",
27 | )
28 | 


--------------------------------------------------------------------------------
/yolox/data/samplers.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import torch
 6 | import torch.distributed as dist
 7 | from torch.utils.data.sampler import BatchSampler as torchBatchSampler
 8 | from torch.utils.data.sampler import Sampler
 9 | 
10 | import itertools
11 | from typing import Optional
12 | 
13 | 
14 | class YoloBatchSampler(torchBatchSampler):
15 |     """
16 |     This batch sampler will generate mini-batches of (dim, index) tuples from another sampler.
17 |     It works just like the :class:`torch.utils.data.sampler.BatchSampler`,
18 |     but it will prepend a dimension, whilst ensuring it stays the same across one mini-batch.
19 |     """
20 | 
21 |     def __init__(self, *args, input_dimension=None, mosaic=True, **kwargs):
22 |         super().__init__(*args, **kwargs)
23 |         self.input_dim = input_dimension
24 |         self.new_input_dim = None
25 |         self.mosaic = mosaic
26 | 
27 |     def __iter__(self):
28 |         self.__set_input_dim()
29 |         for batch in super().__iter__():
30 |             yield [(self.input_dim, idx, self.mosaic) for idx in batch]
31 |             self.__set_input_dim()
32 | 
33 |     def __set_input_dim(self):
34 |         """ This function randomly changes the the input dimension of the dataset. """
35 |         if self.new_input_dim is not None:
36 |             self.input_dim = (self.new_input_dim[0], self.new_input_dim[1])
37 |             self.new_input_dim = None
38 | 
39 | 
40 | class InfiniteSampler(Sampler):
41 |     """
42 |     In training, we only care about the "infinite stream" of training data.
43 |     So this sampler produces an infinite stream of indices and
44 |     all workers cooperate to correctly shuffle the indices and sample different indices.
45 |     The samplers in each worker effectively produces `indices[worker_id::num_workers]`
46 |     where `indices` is an infinite stream of indices consisting of
47 |     `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True)
48 |     or `range(size) + range(size) + ...` (if shuffle is False)
49 |     """
50 | 
51 |     def __init__(
52 |         self,
53 |         size: int,
54 |         shuffle: bool = True,
55 |         seed: Optional[int] = 0,
56 |         rank=0,
57 |         world_size=1,
58 |     ):
59 |         """
60 |         Args:
61 |             size (int): the total number of data of the underlying dataset to sample from
62 |             shuffle (bool): whether to shuffle the indices or not
63 |             seed (int): the initial seed of the shuffle. Must be the same
64 |                 across all workers. If None, will use a random seed shared
65 |                 among workers (require synchronization among all workers).
66 |         """
67 |         self._size = size
68 |         assert size > 0
69 |         self._shuffle = shuffle
70 |         self._seed = int(seed)
71 | 
72 |         if dist.is_available() and dist.is_initialized():
73 |             self._rank = dist.get_rank()
74 |             self._world_size = dist.get_world_size()
75 |         else:
76 |             self._rank = rank
77 |             self._world_size = world_size
78 | 
79 |     def __iter__(self):
80 |         start = self._rank
81 |         yield from itertools.islice(
82 |             self._infinite_indices(), start, None, self._world_size
83 |         )
84 | 
85 |     def _infinite_indices(self):
86 |         g = torch.Generator()
87 |         g.manual_seed(self._seed)
88 |         while True:
89 |             if self._shuffle:
90 |                 yield from torch.randperm(self._size, generator=g)
91 |             else:
92 |                 yield from torch.arange(self._size)
93 | 
94 |     def __len__(self):
95 |         return self._size // self._world_size
96 | 


--------------------------------------------------------------------------------
/yolox/evaluators/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 
5 | from .coco_evaluator import COCOEvaluator
6 | from .voc_evaluator import VOCEvaluator
7 | 


--------------------------------------------------------------------------------
/yolox/exp/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
4 | 
5 | from .base_exp import BaseExp
6 | from .build import get_exp
7 | from .yolox_base import Exp
8 | 


--------------------------------------------------------------------------------
/yolox/exp/base_exp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import torch
 6 | from torch.nn import Module
 7 | 
 8 | from yolox.utils import LRScheduler
 9 | 
10 | import ast
11 | import pprint
12 | from abc import ABCMeta, abstractmethod
13 | from tabulate import tabulate
14 | from typing import Dict
15 | 
16 | 
17 | class BaseExp(metaclass=ABCMeta):
18 |     """Basic class for any experiment."""
19 | 
20 |     def __init__(self):
21 |         self.seed = None
22 |         self.output_dir = "./YOLOX_outputs"
23 |         self.print_interval = 100
24 |         self.eval_interval = 10
25 | 
26 |     @abstractmethod
27 |     def get_model(self) -> Module:
28 |         pass
29 | 
30 |     @abstractmethod
31 |     def get_data_loader(
32 |         self, batch_size: int, is_distributed: bool
33 |     ) -> Dict[str, torch.utils.data.DataLoader]:
34 |         pass
35 | 
36 |     @abstractmethod
37 |     def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer:
38 |         pass
39 | 
40 |     @abstractmethod
41 |     def get_lr_scheduler(
42 |         self, lr: float, iters_per_epoch: int, **kwargs
43 |     ) -> LRScheduler:
44 |         pass
45 | 
46 |     @abstractmethod
47 |     def get_evaluator(self):
48 |         pass
49 | 
50 |     @abstractmethod
51 |     def eval(self, model, evaluator, weights):
52 |         pass
53 | 
54 |     def __repr__(self):
55 |         table_header = ["keys", "values"]
56 |         exp_table = [
57 |             (str(k), pprint.pformat(v))
58 |             for k, v in vars(self).items()
59 |             if not k.startswith("_")
60 |         ]
61 |         return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid")
62 | 
63 |     def merge(self, cfg_list):
64 |         assert len(cfg_list) % 2 == 0
65 |         for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
66 |             # only update value with same key
67 |             if hasattr(self, k):
68 |                 src_value = getattr(self, k)
69 |                 src_type = type(src_value)
70 |                 if src_value is not None and src_type != type(v):
71 |                     try:
72 |                         v = src_type(v)
73 |                     except Exception:
74 |                         v = ast.literal_eval(v)
75 |                 setattr(self, k, v)
76 | 


--------------------------------------------------------------------------------
/yolox/exp/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import importlib
 6 | import os
 7 | import sys
 8 | 
 9 | 
10 | def get_exp_by_file(exp_file):
11 |     try:
12 |         sys.path.append(os.path.dirname(exp_file))
13 |         current_exp = importlib.import_module(os.path.basename(exp_file).split(".")[0])
14 |         exp = current_exp.Exp()
15 |     except Exception:
16 |         raise ImportError("{} doesn't contains class named 'Exp'".format(exp_file))
17 |     return exp
18 | 
19 | 
20 | def get_exp_by_name(exp_name):
21 |     import yolox
22 | 
23 |     yolox_path = os.path.dirname(os.path.dirname(yolox.__file__))
24 |     filedict = {
25 |         "yolox-s": "yolox_s.py",
26 |         "yolox-m": "yolox_m.py",
27 |         "yolox-l": "yolox_l.py",
28 |         "yolox-x": "yolox_x.py",
29 |         "yolox-tiny": "yolox_tiny.py",
30 |         "yolox-nano": "nano.py",
31 |         "yolov3": "yolov3.py",
32 |     }
33 |     filename = filedict[exp_name]
34 |     exp_path = os.path.join(yolox_path, "exps", "default", filename)
35 |     return get_exp_by_file(exp_path)
36 | 
37 | 
38 | def get_exp(exp_file, exp_name):
39 |     """
40 |     get Exp object by file or name. If exp_file and exp_name
41 |     are both provided, get Exp by exp_file.
42 | 
43 |     Args:
44 |         exp_file (str): file path of experiment.
45 |         exp_name (str): name of experiment. "yolo-s",
46 |     """
47 |     assert (
48 |         exp_file is not None or exp_name is not None
49 |     ), "plz provide exp file or exp name."
50 |     if exp_file is not None:
51 |         return get_exp_by_file(exp_file)
52 |     else:
53 |         return get_exp_by_name(exp_name)
54 | 


--------------------------------------------------------------------------------
/yolox/layers/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
4 | 
5 | from .fast_coco_eval_api import COCOeval_opt
6 | 


--------------------------------------------------------------------------------
/yolox/layers/csrc/cocoeval/cocoeval.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #pragma once
 3 | 
 4 | #include <pybind11/numpy.h>
 5 | #include <pybind11/pybind11.h>
 6 | #include <pybind11/stl.h>
 7 | #include <pybind11/stl_bind.h>
 8 | #include <vector>
 9 | 
10 | namespace py = pybind11;
11 | 
12 | namespace COCOeval {
13 | 
14 | // Annotation data for a single object instance in an image
15 | struct InstanceAnnotation {
16 |   InstanceAnnotation(
17 |       uint64_t id,
18 |       double score,
19 |       double area,
20 |       bool is_crowd,
21 |       bool ignore)
22 |       : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {}
23 |   uint64_t id;
24 |   double score = 0.;
25 |   double area = 0.;
26 |   bool is_crowd = false;
27 |   bool ignore = false;
28 | };
29 | 
30 | // Stores intermediate results for evaluating detection results for a single
31 | // image that has D detected instances and G ground truth instances. This stores
32 | // matches between detected and ground truth instances
33 | struct ImageEvaluation {
34 |   // For each of the D detected instances, the id of the matched ground truth
35 |   // instance, or 0 if unmatched
36 |   std::vector<uint64_t> detection_matches;
37 | 
38 |   // The detection score of each of the D detected instances
39 |   std::vector<double> detection_scores;
40 | 
41 |   // Marks whether or not each of G instances was ignored from evaluation (e.g.,
42 |   // because it's outside area_range)
43 |   std::vector<bool> ground_truth_ignores;
44 | 
45 |   // Marks whether or not each of D instances was ignored from evaluation (e.g.,
46 |   // because it's outside aRng)
47 |   std::vector<bool> detection_ignores;
48 | };
49 | 
50 | template <class T>
51 | using ImageCategoryInstances = std::vector<std::vector<std::vector<T>>>;
52 | 
53 | // C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg().  For each
54 | // combination of image, category, area range settings, and IOU thresholds to
55 | // evaluate, it matches detected instances to ground truth instances and stores
56 | // the results into a vector of ImageEvaluation results, which will be
57 | // interpreted by the COCOeval::Accumulate() function to produce precion-recall
58 | // curves.  The parameters of nested vectors have the following semantics:
59 | //   image_category_ious[i][c][d][g] is the intersection over union of the d'th
60 | //     detected instance and g'th ground truth instance of
61 | //     category category_ids[c] in image image_ids[i]
62 | //   image_category_ground_truth_instances[i][c] is a vector of ground truth
63 | //     instances in image image_ids[i] of category category_ids[c]
64 | //   image_category_detection_instances[i][c] is a vector of detected
65 | //     instances in image image_ids[i] of category category_ids[c]
66 | std::vector<ImageEvaluation> EvaluateImages(
67 |     const std::vector<std::array<double, 2>>& area_ranges, // vector of 2-tuples
68 |     int max_detections,
69 |     const std::vector<double>& iou_thresholds,
70 |     const ImageCategoryInstances<std::vector<double>>& image_category_ious,
71 |     const ImageCategoryInstances<InstanceAnnotation>&
72 |         image_category_ground_truth_instances,
73 |     const ImageCategoryInstances<InstanceAnnotation>&
74 |         image_category_detection_instances);
75 | 
76 | // C++ implementation of COCOeval.accumulate(), which generates precision
77 | // recall curves for each set of category, IOU threshold, detection area range,
78 | // and max number of detections parameters.  It is assumed that the parameter
79 | // evaluations is the return value of the functon COCOeval::EvaluateImages(),
80 | // which was called with the same parameter settings params
81 | py::dict Accumulate(
82 |     const py::object& params,
83 |     const std::vector<ImageEvaluation>& evalutations);
84 | 
85 | } // namespace COCOeval
86 | 


--------------------------------------------------------------------------------
/yolox/layers/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | #include "cocoeval/cocoeval.h"
 2 | 
 3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 4 |     m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate");
 5 |     m.def(
 6 |         "COCOevalEvaluateImages",
 7 |         &COCOeval::EvaluateImages,
 8 |         "COCOeval::EvaluateImages");
 9 |     pybind11::class_<COCOeval::InstanceAnnotation>(m, "InstanceAnnotation")
10 |         .def(pybind11::init<uint64_t, double, double, bool, bool>());
11 |     pybind11::class_<COCOeval::ImageEvaluation>(m, "ImageEvaluation")
12 |         .def(pybind11::init<>());
13 | }
14 | 


--------------------------------------------------------------------------------
/yolox/models/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | from .darknet import CSPDarknet, Darknet
 6 | from .losses import IOUloss
 7 | from .yolo_fpn import YOLOFPN
 8 | from .yolo_head import YOLOXHead
 9 | from .yolo_pafpn import YOLOPAFPN
10 | from .yolox import YOLOX
11 | 


--------------------------------------------------------------------------------
/yolox/models/losses.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | class IOUloss(nn.Module):
10 |     def __init__(self, reduction="none", loss_type="iou"):
11 |         super(IOUloss, self).__init__()
12 |         self.reduction = reduction
13 |         self.loss_type = loss_type
14 | 
15 |     def forward(self, pred, target):
16 |         assert pred.shape[0] == target.shape[0]
17 | 
18 |         pred = pred.view(-1, 4)
19 |         target = target.view(-1, 4)
20 |         tl = torch.max(
21 |             (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)
22 |         )
23 |         br = torch.min(
24 |             (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)
25 |         )
26 | 
27 |         area_p = torch.prod(pred[:, 2:], 1)
28 |         area_g = torch.prod(target[:, 2:], 1)
29 | 
30 |         en = (tl < br).type(tl.type()).prod(dim=1)
31 |         area_i = torch.prod(br - tl, 1) * en
32 |         iou = (area_i) / (area_p + area_g - area_i + 1e-16)
33 | 
34 |         if self.loss_type == "iou":
35 |             loss = 1 - iou ** 2
36 |         elif self.loss_type == "giou":
37 |             c_tl = torch.min(
38 |                 (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)
39 |             )
40 |             c_br = torch.max(
41 |                 (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)
42 |             )
43 |             area_c = torch.prod(c_br - c_tl, 1)
44 |             giou = iou - (area_c - area_i) / area_c.clamp(1e-16)
45 |             loss = 1 - giou.clamp(min=-1.0, max=1.0)
46 | 
47 |         if self.reduction == "mean":
48 |             loss = loss.mean()
49 |         elif self.reduction == "sum":
50 |             loss = loss.sum()
51 | 
52 |         return loss
53 | 


--------------------------------------------------------------------------------
/yolox/models/yolo_fpn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from .darknet import Darknet
 9 | from .network_blocks import BaseConv
10 | 
11 | 
12 | class YOLOFPN(nn.Module):
13 |     """
14 |     YOLOFPN module. Darknet 53 is the default backbone of this model.
15 |     """
16 | 
17 |     def __init__(
18 |         self,
19 |         depth=53,
20 |         in_features=["dark3", "dark4", "dark5"],
21 |     ):
22 |         super().__init__()
23 | 
24 |         self.backbone = Darknet(depth)
25 |         self.in_features = in_features
26 | 
27 |         # out 1
28 |         self.out1_cbl = self._make_cbl(512, 256, 1)
29 |         self.out1 = self._make_embedding([256, 512], 512 + 256)
30 | 
31 |         # out 2
32 |         self.out2_cbl = self._make_cbl(256, 128, 1)
33 |         self.out2 = self._make_embedding([128, 256], 256 + 128)
34 | 
35 |         # upsample
36 |         self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
37 | 
38 |     def _make_cbl(self, _in, _out, ks):
39 |         return BaseConv(_in, _out, ks, stride=1, act="lrelu")
40 | 
41 |     def _make_embedding(self, filters_list, in_filters):
42 |         m = nn.Sequential(
43 |             *[
44 |                 self._make_cbl(in_filters, filters_list[0], 1),
45 |                 self._make_cbl(filters_list[0], filters_list[1], 3),
46 |                 self._make_cbl(filters_list[1], filters_list[0], 1),
47 |                 self._make_cbl(filters_list[0], filters_list[1], 3),
48 |                 self._make_cbl(filters_list[1], filters_list[0], 1),
49 |             ]
50 |         )
51 |         return m
52 | 
53 |     def load_pretrained_model(self, filename="./weights/darknet53.mix.pth"):
54 |         with open(filename, "rb") as f:
55 |             state_dict = torch.load(f, map_location="cpu")
56 |         print("loading pretrained weights...")
57 |         self.backbone.load_state_dict(state_dict)
58 | 
59 |     def forward(self, inputs):
60 |         """
61 |         Args:
62 |             inputs (Tensor): input image.
63 | 
64 |         Returns:
65 |             Tuple[Tensor]: FPN output features..
66 |         """
67 |         #  backbone
68 |         out_features = self.backbone(inputs)
69 |         x2, x1, x0 = [out_features[f] for f in self.in_features]
70 | 
71 |         #  yolo branch 1
72 |         x1_in = self.out1_cbl(x0)
73 |         x1_in = self.upsample(x1_in)
74 |         x1_in = torch.cat([x1_in, x1], 1)
75 |         out_dark4 = self.out1(x1_in)
76 | 
77 |         #  yolo branch 2
78 |         x2_in = self.out2_cbl(out_dark4)
79 |         x2_in = self.upsample(x2_in)
80 |         x2_in = torch.cat([x2_in, x2], 1)
81 |         out_dark3 = self.out2(x2_in)
82 | 
83 |         outputs = (out_dark3, out_dark4, x0)
84 |         return outputs
85 | 


--------------------------------------------------------------------------------
/yolox/models/yolo_pafpn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | 
  8 | from .darknet import CSPDarknet
  9 | from .network_blocks import BaseConv, CSPLayer, DWConv
 10 | 
 11 | 
 12 | class YOLOPAFPN(nn.Module):
 13 |     """
 14 |     YOLOv3 model. Darknet 53 is the default backbone of this model.
 15 |     """
 16 | 
 17 |     def __init__(
 18 |         self,
 19 |         depth=1.0,
 20 |         width=1.0,
 21 |         in_features=("dark3", "dark4", "dark5"),
 22 |         in_channels=[256, 512, 1024],
 23 |         depthwise=False,
 24 |         act="silu",
 25 |     ):
 26 |         super().__init__()
 27 |         self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)
 28 |         self.in_features = in_features
 29 |         self.in_channels = in_channels
 30 |         Conv = DWConv if depthwise else BaseConv
 31 | 
 32 |         self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
 33 |         self.lateral_conv0 = BaseConv(
 34 |             int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act
 35 |         )
 36 |         self.C3_p4 = CSPLayer(
 37 |             int(2 * in_channels[1] * width),
 38 |             int(in_channels[1] * width),
 39 |             round(3 * depth),
 40 |             False,
 41 |             depthwise=depthwise,
 42 |             act=act,
 43 |         )  # cat
 44 | 
 45 |         self.reduce_conv1 = BaseConv(
 46 |             int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act
 47 |         )
 48 |         self.C3_p3 = CSPLayer(
 49 |             int(2 * in_channels[0] * width),
 50 |             int(in_channels[0] * width),
 51 |             round(3 * depth),
 52 |             False,
 53 |             depthwise=depthwise,
 54 |             act=act,
 55 |         )
 56 | 
 57 |         # bottom-up conv
 58 |         self.bu_conv2 = Conv(
 59 |             int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act
 60 |         )
 61 |         self.C3_n3 = CSPLayer(
 62 |             int(2 * in_channels[0] * width),
 63 |             int(in_channels[1] * width),
 64 |             round(3 * depth),
 65 |             False,
 66 |             depthwise=depthwise,
 67 |             act=act,
 68 |         )
 69 | 
 70 |         # bottom-up conv
 71 |         self.bu_conv1 = Conv(
 72 |             int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act
 73 |         )
 74 |         self.C3_n4 = CSPLayer(
 75 |             int(2 * in_channels[1] * width),
 76 |             int(in_channels[2] * width),
 77 |             round(3 * depth),
 78 |             False,
 79 |             depthwise=depthwise,
 80 |             act=act,
 81 |         )
 82 | 
 83 |     def forward(self, input):
 84 |         """
 85 |         Args:
 86 |             inputs: input images.
 87 | 
 88 |         Returns:
 89 |             Tuple[Tensor]: FPN feature.
 90 |         """
 91 | 
 92 |         #  backbone
 93 |         out_features = self.backbone(input)
 94 |         features = [out_features[f] for f in self.in_features]
 95 |         [x2, x1, x0] = features
 96 | 
 97 |         fpn_out0 = self.lateral_conv0(x0)  # 1024->512/32
 98 |         f_out0 = self.upsample(fpn_out0)  # 512/16
 99 |         f_out0 = torch.cat([f_out0, x1], 1)  # 512->1024/16
100 |         f_out0 = self.C3_p4(f_out0)  # 1024->512/16
101 | 
102 |         fpn_out1 = self.reduce_conv1(f_out0)  # 512->256/16
103 |         f_out1 = self.upsample(fpn_out1)  # 256/8
104 |         f_out1 = torch.cat([f_out1, x2], 1)  # 256->512/8
105 |         pan_out2 = self.C3_p3(f_out1)  # 512->256/8
106 | 
107 |         p_out1 = self.bu_conv2(pan_out2)  # 256->256/16
108 |         p_out1 = torch.cat([p_out1, fpn_out1], 1)  # 256->512/16
109 |         pan_out1 = self.C3_n3(p_out1)  # 512->512/16
110 | 
111 |         p_out0 = self.bu_conv1(pan_out1)  # 512->512/32
112 |         p_out0 = torch.cat([p_out0, fpn_out0], 1)  # 512->1024/32
113 |         pan_out0 = self.C3_n4(p_out0)  # 1024->1024/32
114 | 
115 |         outputs = (pan_out2, pan_out1, pan_out0)
116 |         return outputs
117 | 


--------------------------------------------------------------------------------
/yolox/models/yolox.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import torch.nn as nn
 6 | 
 7 | from .yolo_head import YOLOXHead
 8 | from .yolo_pafpn import YOLOPAFPN
 9 | 
10 | 
11 | class YOLOX(nn.Module):
12 |     """
13 |     YOLOX model module. The module list is defined by create_yolov3_modules function.
14 |     The network returns loss values from three YOLO layers during training
15 |     and detection results during test.
16 |     """
17 | 
18 |     def __init__(self, backbone=None, head=None):
19 |         super().__init__()
20 |         if backbone is None:
21 |             backbone = YOLOPAFPN()
22 |         if head is None:
23 |             head = YOLOXHead(80)
24 | 
25 |         self.backbone = backbone
26 |         self.head = head
27 | 
28 |     def forward(self, x, targets=None):
29 |         # fpn output content features of [dark3, dark4, dark5]
30 |         fpn_outs = self.backbone(x)
31 | 
32 |         if self.training:
33 |             assert targets is not None
34 |             loss, iou_loss, conf_loss, cls_loss, l1_loss, num_fg = self.head(
35 |                 fpn_outs, targets, x
36 |             )
37 |             outputs = {
38 |                 "total_loss": loss,
39 |                 "iou_loss": iou_loss,
40 |                 "l1_loss": l1_loss,
41 |                 "conf_loss": conf_loss,
42 |                 "cls_loss": cls_loss,
43 |                 "num_fg": num_fg,
44 |             }
45 |         else:
46 |             outputs = self.head(fpn_outs)
47 | 
48 |         return outputs
49 | 


--------------------------------------------------------------------------------
/yolox/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | from .allreduce_norm import *
 6 | from .boxes import *
 7 | from .checkpoint import load_ckpt, save_checkpoint
 8 | from .demo_utils import *
 9 | from .dist import *
10 | from .ema import ModelEMA
11 | from .logger import setup_logger
12 | from .lr_scheduler import LRScheduler
13 | from .metric import *
14 | from .model_utils import *
15 | from .setup_env import *
16 | from .visualize import *
17 | 


--------------------------------------------------------------------------------
/yolox/utils/allreduce_norm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import torch
  6 | from torch import distributed as dist
  7 | from torch import nn
  8 | 
  9 | import pickle
 10 | from collections import OrderedDict
 11 | 
 12 | from .dist import _get_global_gloo_group, get_world_size
 13 | 
 14 | ASYNC_NORM = (
 15 |     nn.BatchNorm1d,
 16 |     nn.BatchNorm2d,
 17 |     nn.BatchNorm3d,
 18 |     nn.InstanceNorm1d,
 19 |     nn.InstanceNorm2d,
 20 |     nn.InstanceNorm3d,
 21 | )
 22 | 
 23 | __all__ = [
 24 |     "get_async_norm_states",
 25 |     "pyobj2tensor",
 26 |     "tensor2pyobj",
 27 |     "all_reduce",
 28 |     "all_reduce_norm",
 29 | ]
 30 | 
 31 | 
 32 | def get_async_norm_states(module):
 33 |     async_norm_states = OrderedDict()
 34 |     for name, child in module.named_modules():
 35 |         if isinstance(child, ASYNC_NORM):
 36 |             for k, v in child.state_dict().items():
 37 |                 async_norm_states[".".join([name, k])] = v
 38 |     return async_norm_states
 39 | 
 40 | 
 41 | def pyobj2tensor(pyobj, device="cuda"):
 42 |     """serialize picklable python object to tensor"""
 43 |     storage = torch.ByteStorage.from_buffer(pickle.dumps(pyobj))
 44 |     return torch.ByteTensor(storage).to(device=device)
 45 | 
 46 | 
 47 | def tensor2pyobj(tensor):
 48 |     """deserialize tensor to picklable python object"""
 49 |     return pickle.loads(tensor.cpu().numpy().tobytes())
 50 | 
 51 | 
 52 | def _get_reduce_op(op_name):
 53 |     return {
 54 |         "sum": dist.ReduceOp.SUM,
 55 |         "mean": dist.ReduceOp.SUM,
 56 |     }[op_name.lower()]
 57 | 
 58 | 
 59 | def all_reduce(py_dict, op="sum", group=None):
 60 |     """
 61 |     Apply all reduce function for python dict object.
 62 |     NOTE: make sure that every py_dict has the same keys and values are in the same shape.
 63 | 
 64 |     Args:
 65 |         py_dict (dict): dict to apply all reduce op.
 66 |         op (str): operator, could be "sum" or "mean".
 67 |     """
 68 |     world_size = get_world_size()
 69 |     if world_size == 1:
 70 |         return py_dict
 71 |     if group is None:
 72 |         group = _get_global_gloo_group()
 73 |     if dist.get_world_size(group) == 1:
 74 |         return py_dict
 75 | 
 76 |     # all reduce logic across different devices.
 77 |     py_key = list(py_dict.keys())
 78 |     py_key_tensor = pyobj2tensor(py_key)
 79 |     dist.broadcast(py_key_tensor, src=0)
 80 |     py_key = tensor2pyobj(py_key_tensor)
 81 | 
 82 |     tensor_shapes = [py_dict[k].shape for k in py_key]
 83 |     tensor_numels = [py_dict[k].numel() for k in py_key]
 84 | 
 85 |     flatten_tensor = torch.cat([py_dict[k].flatten() for k in py_key])
 86 |     dist.all_reduce(flatten_tensor, op=_get_reduce_op(op))
 87 |     if op == "mean":
 88 |         flatten_tensor /= world_size
 89 | 
 90 |     split_tensors = [
 91 |         x.reshape(shape)
 92 |         for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes)
 93 |     ]
 94 |     return OrderedDict({k: v for k, v in zip(py_key, split_tensors)})
 95 | 
 96 | 
 97 | def all_reduce_norm(module):
 98 |     """
 99 |     All reduce norm statistics in different devices.
100 |     """
101 |     states = get_async_norm_states(module)
102 |     states = all_reduce(states, op="mean")
103 |     module.load_state_dict(states, strict=False)
104 | 


--------------------------------------------------------------------------------
/yolox/utils/boxes.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import numpy as np
  6 | 
  7 | import torch
  8 | import torchvision
  9 | 
 10 | __all__ = [
 11 |     "filter_box",
 12 |     "postprocess",
 13 |     "bboxes_iou",
 14 |     "matrix_iou",
 15 |     "adjust_box_anns",
 16 |     "xyxy2xywh",
 17 |     "xyxy2cxcywh",
 18 | ]
 19 | 
 20 | 
 21 | def filter_box(output, scale_range):
 22 |     """
 23 |     output: (N, 5+class) shape
 24 |     """
 25 |     min_scale, max_scale = scale_range
 26 |     w = output[:, 2] - output[:, 0]
 27 |     h = output[:, 3] - output[:, 1]
 28 |     keep = (w * h > min_scale * min_scale) & (w * h < max_scale * max_scale)
 29 |     return output[keep]
 30 | 
 31 | 
 32 | def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45):
 33 |     box_corner = prediction.new(prediction.shape)
 34 |     box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
 35 |     box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
 36 |     box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
 37 |     box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
 38 |     prediction[:, :, :4] = box_corner[:, :, :4]
 39 | 
 40 |     output = [None for _ in range(len(prediction))]
 41 |     for i, image_pred in enumerate(prediction):
 42 | 
 43 |         # If none are remaining => process next image
 44 |         if not image_pred.size(0):
 45 |             continue
 46 |         # Get score and class with highest confidence
 47 |         class_conf, class_pred = torch.max(
 48 |             image_pred[:, 5 : 5 + num_classes], 1, keepdim=True
 49 |         )
 50 | 
 51 |         conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
 52 |         # _, conf_mask = torch.topk((image_pred[:, 4] * class_conf.squeeze()), 1000)
 53 |         # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
 54 |         detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
 55 |         detections = detections[conf_mask]
 56 |         if not detections.size(0):
 57 |             continue
 58 | 
 59 |         nms_out_index = torchvision.ops.batched_nms(
 60 |             detections[:, :4],
 61 |             detections[:, 4] * detections[:, 5],
 62 |             detections[:, 6],
 63 |             nms_thre,
 64 |         )
 65 |         detections = detections[nms_out_index]
 66 |         if output[i] is None:
 67 |             output[i] = detections
 68 |         else:
 69 |             output[i] = torch.cat((output[i], detections))
 70 | 
 71 |     return output
 72 | 
 73 | 
 74 | def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):
 75 |     if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
 76 |         raise IndexError
 77 | 
 78 |     if xyxy:
 79 |         tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2])
 80 |         br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:])
 81 |         area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
 82 |         area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
 83 |     else:
 84 |         tl = torch.max(
 85 |             (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),
 86 |             (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2),
 87 |         )
 88 |         br = torch.min(
 89 |             (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),
 90 |             (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2),
 91 |         )
 92 | 
 93 |         area_a = torch.prod(bboxes_a[:, 2:], 1)
 94 |         area_b = torch.prod(bboxes_b[:, 2:], 1)
 95 |     en = (tl < br).type(tl.type()).prod(dim=2)
 96 |     area_i = torch.prod(br - tl, 2) * en  # * ((tl < br).all())
 97 |     return area_i / (area_a[:, None] + area_b - area_i)
 98 | 
 99 | 
100 | def matrix_iou(a, b):
101 |     """
102 |     return iou of a and b, numpy version for data augenmentation
103 |     """
104 |     lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
105 |     rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
106 | 
107 |     area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
108 |     area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
109 |     area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
110 |     return area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12)
111 | 
112 | 
113 | def adjust_box_anns(bbox, scale_ratio, padw, padh, w_max, h_max):
114 |     bbox[:, 0::2] = np.clip(bbox[:, 0::2] * scale_ratio + padw, 0, w_max)
115 |     bbox[:, 1::2] = np.clip(bbox[:, 1::2] * scale_ratio + padh, 0, h_max)
116 |     return bbox
117 | 
118 | 
119 | def xyxy2xywh(bboxes):
120 |     bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
121 |     bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
122 |     return bboxes
123 | 
124 | 
125 | def xyxy2cxcywh(bboxes):
126 |     bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
127 |     bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
128 |     bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5
129 |     bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5
130 |     return bboxes
131 | 


--------------------------------------------------------------------------------
/yolox/utils/checkpoint.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | from loguru import logger
 5 | 
 6 | import torch
 7 | 
 8 | import os
 9 | import shutil
10 | 
11 | 
12 | def load_ckpt(model, ckpt):
13 |     model_state_dict = model.state_dict()
14 |     load_dict = {}
15 |     for key_model, v in model_state_dict.items():
16 |         if key_model not in ckpt:
17 |             logger.warning(
18 |                 "{} is not in the ckpt. Please double check and see if this is desired.".format(
19 |                     key_model
20 |                 )
21 |             )
22 |             continue
23 |         v_ckpt = ckpt[key_model]
24 |         if v.shape != v_ckpt.shape:
25 |             logger.warning(
26 |                 "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
27 |                     key_model, v_ckpt.shape, key_model, v.shape
28 |                 )
29 |             )
30 |             continue
31 |         load_dict[key_model] = v_ckpt
32 | 
33 |     model.load_state_dict(load_dict, strict=False)
34 |     return model
35 | 
36 | 
37 | def save_checkpoint(state, is_best, save_dir, model_name=""):
38 |     if not os.path.exists(save_dir):
39 |         os.makedirs(save_dir)
40 |     filename = os.path.join(save_dir, model_name + "_ckpt.pth")
41 |     torch.save(state, filename)
42 |     if is_best:
43 |         best_filename = os.path.join(save_dir, "best_ckpt.pth")
44 |         shutil.copyfile(filename, best_filename)
45 | 


--------------------------------------------------------------------------------
/yolox/utils/demo_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import numpy as np
 6 | 
 7 | import os
 8 | 
 9 | __all__ = ["mkdir", "nms", "multiclass_nms", "demo_postprocess"]
10 | 
11 | 
12 | def mkdir(path):
13 |     if not os.path.exists(path):
14 |         os.makedirs(path)
15 | 
16 | 
17 | def nms(boxes, scores, nms_thr):
18 |     """Single class NMS implemented in Numpy."""
19 |     x1 = boxes[:, 0]
20 |     y1 = boxes[:, 1]
21 |     x2 = boxes[:, 2]
22 |     y2 = boxes[:, 3]
23 | 
24 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     order = scores.argsort()[::-1]
26 | 
27 |     keep = []
28 |     while order.size > 0:
29 |         i = order[0]
30 |         keep.append(i)
31 |         xx1 = np.maximum(x1[i], x1[order[1:]])
32 |         yy1 = np.maximum(y1[i], y1[order[1:]])
33 |         xx2 = np.minimum(x2[i], x2[order[1:]])
34 |         yy2 = np.minimum(y2[i], y2[order[1:]])
35 | 
36 |         w = np.maximum(0.0, xx2 - xx1 + 1)
37 |         h = np.maximum(0.0, yy2 - yy1 + 1)
38 |         inter = w * h
39 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
40 | 
41 |         inds = np.where(ovr <= nms_thr)[0]
42 |         order = order[inds + 1]
43 | 
44 |     return keep
45 | 
46 | 
47 | def multiclass_nms(boxes, scores, nms_thr, score_thr):
48 |     """Multiclass NMS implemented in Numpy"""
49 |     final_dets = []
50 |     num_classes = scores.shape[1]
51 |     for cls_ind in range(num_classes):
52 |         cls_scores = scores[:, cls_ind]
53 |         valid_score_mask = cls_scores > score_thr
54 |         if valid_score_mask.sum() == 0:
55 |             continue
56 |         else:
57 |             valid_scores = cls_scores[valid_score_mask]
58 |             valid_boxes = boxes[valid_score_mask]
59 |             keep = nms(valid_boxes, valid_scores, nms_thr)
60 |             if len(keep) > 0:
61 |                 cls_inds = np.ones((len(keep), 1)) * cls_ind
62 |                 dets = np.concatenate(
63 |                     [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1
64 |                 )
65 |                 final_dets.append(dets)
66 |     if len(final_dets) == 0:
67 |         return None
68 |     return np.concatenate(final_dets, 0)
69 | 
70 | 
71 | def demo_postprocess(outputs, img_size, p6=False):
72 | 
73 |     grids = []
74 |     expanded_strides = []
75 | 
76 |     if not p6:
77 |         strides = [8, 16, 32]
78 |     else:
79 |         strides = [8, 16, 32, 64]
80 | 
81 |     hsizes = [img_size[0] // stride for stride in strides]
82 |     wsizes = [img_size[1] // stride for stride in strides]
83 | 
84 |     for hsize, wsize, stride in zip(hsizes, wsizes, strides):
85 |         xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
86 |         grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
87 |         grids.append(grid)
88 |         shape = grid.shape[:2]
89 |         expanded_strides.append(np.full((*shape, 1), stride))
90 | 
91 |     grids = np.concatenate(grids, 1)
92 |     expanded_strides = np.concatenate(expanded_strides, 1)
93 |     outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
94 |     outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
95 | 
96 |     return outputs
97 | 


--------------------------------------------------------------------------------
/yolox/utils/ema.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | import math
 8 | from copy import deepcopy
 9 | 
10 | 
11 | def is_parallel(model):
12 |     """check if model is in parallel mode."""
13 |     import apex
14 | 
15 |     parallel_type = (
16 |         nn.parallel.DataParallel,
17 |         nn.parallel.DistributedDataParallel,
18 |         apex.parallel.distributed.DistributedDataParallel,
19 |     )
20 |     return isinstance(model, parallel_type)
21 | 
22 | 
23 | def copy_attr(a, b, include=(), exclude=()):
24 |     # Copy attributes from b to a, options to only include [...] and to exclude [...]
25 |     for k, v in b.__dict__.items():
26 |         if (len(include) and k not in include) or k.startswith("_") or k in exclude:
27 |             continue
28 |         else:
29 |             setattr(a, k, v)
30 | 
31 | 
32 | class ModelEMA:
33 |     """
34 |     Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
35 |     Keep a moving average of everything in the model state_dict (parameters and buffers).
36 |     This is intended to allow functionality like
37 |     https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
38 |     A smoothed version of the weights is necessary for some training schemes to perform well.
39 |     This class is sensitive where it is initialized in the sequence of model init,
40 |     GPU assignment and distributed training wrappers.
41 |     """
42 | 
43 |     def __init__(self, model, decay=0.9999, updates=0):
44 |         """
45 |         Args:
46 |             model (nn.Module): model to apply EMA.
47 |             decay (float): ema decay reate.
48 |             updates (int): counter of EMA updates.
49 |         """
50 |         # Create EMA(FP32)
51 |         self.ema = deepcopy(model.module if is_parallel(model) else model).eval()
52 |         self.updates = updates
53 |         # decay exponential ramp (to help early epochs)
54 |         self.decay = lambda x: decay * (1 - math.exp(-x / 2000))
55 |         for p in self.ema.parameters():
56 |             p.requires_grad_(False)
57 | 
58 |     def update(self, model):
59 |         # Update EMA parameters
60 |         with torch.no_grad():
61 |             self.updates += 1
62 |             d = self.decay(self.updates)
63 | 
64 |             msd = (
65 |                 model.module.state_dict() if is_parallel(model) else model.state_dict()
66 |             )  # model state_dict
67 |             for k, v in self.ema.state_dict().items():
68 |                 if v.dtype.is_floating_point:
69 |                     v *= d
70 |                     v += (1.0 - d) * msd[k].detach()
71 | 
72 |     def update_attr(self, model, include=(), exclude=("process_group", "reducer")):
73 |         # Update EMA attributes
74 |         copy_attr(self.ema, model, include, exclude)
75 | 


--------------------------------------------------------------------------------
/yolox/utils/logger.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | from loguru import logger
 6 | 
 7 | import inspect
 8 | import os
 9 | import sys
10 | 
11 | 
12 | def get_caller_name(depth=0):
13 |     """
14 |     Args:
15 |         depth (int): Depth of caller conext, use 0 for caller depth. Default value: 0.
16 | 
17 |     Returns:
18 |         str: module name of the caller
19 |     """
20 |     # the following logic is a little bit faster than inspect.stack() logic
21 |     frame = inspect.currentframe().f_back
22 |     for _ in range(depth):
23 |         frame = frame.f_back
24 | 
25 |     return frame.f_globals["__name__"]
26 | 
27 | 
28 | class StreamToLoguru:
29 |     """
30 |     stream object that redirects writes to a logger instance.
31 |     """
32 | 
33 |     def __init__(self, level="INFO", caller_names=("apex", "pycocotools")):
34 |         """
35 |         Args:
36 |             level(str): log level string of loguru. Default value: "INFO".
37 |             caller_names(tuple): caller names of redirected module.
38 |                 Default value: (apex, pycocotools).
39 |         """
40 |         self.level = level
41 |         self.linebuf = ""
42 |         self.caller_names = caller_names
43 | 
44 |     def write(self, buf):
45 |         full_name = get_caller_name(depth=1)
46 |         module_name = full_name.rsplit(".", maxsplit=-1)[0]
47 |         if module_name in self.caller_names:
48 |             for line in buf.rstrip().splitlines():
49 |                 # use caller level log
50 |                 logger.opt(depth=2).log(self.level, line.rstrip())
51 |         else:
52 |             sys.__stdout__.write(buf)
53 | 
54 |     def flush(self):
55 |         pass
56 | 
57 | 
58 | def redirect_sys_output(log_level="INFO"):
59 |     redirect_logger = StreamToLoguru(log_level)
60 |     sys.stderr = redirect_logger
61 |     sys.stdout = redirect_logger
62 | 
63 | 
64 | def setup_logger(save_dir, distributed_rank=0, filename="log.txt", mode="a"):
65 |     """setup logger for training and testing.
66 |     Args:
67 |         save_dir(str): location to save log file
68 |         distributed_rank(int): device rank when multi-gpu environment
69 |         filename (string): log save name.
70 |         mode(str): log file write mode, `append` or `override`. default is `a`.
71 | 
72 |     Return:
73 |         logger instance.
74 |     """
75 |     loguru_format = (
76 |         "<green>{time:YYYY-MM-DD HH:mm:ss}</green> | "
77 |         "<level>{level: <8}</level> | "
78 |         "<cyan>{name}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"
79 |     )
80 | 
81 |     logger.remove()
82 |     save_file = os.path.join(save_dir, filename)
83 |     if mode == "o" and os.path.exists(save_file):
84 |         os.remove(save_file)
85 |     # only keep logger in rank0 process
86 |     if distributed_rank == 0:
87 |         logger.add(
88 |             sys.stderr,
89 |             format=loguru_format,
90 |             level="INFO",
91 |             enqueue=True,
92 |         )
93 |         logger.add(save_file)
94 | 
95 |     # redirect stdout/stderr to loguru
96 |     redirect_sys_output("INFO")
97 | 


--------------------------------------------------------------------------------
/yolox/utils/metric.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | import numpy as np
  5 | 
  6 | import torch
  7 | 
  8 | import functools
  9 | import os
 10 | import time
 11 | from collections import defaultdict, deque
 12 | 
 13 | __all__ = [
 14 |     "AverageMeter",
 15 |     "MeterBuffer",
 16 |     "get_total_and_free_memory_in_Mb",
 17 |     "occupy_mem",
 18 |     "gpu_mem_usage",
 19 | ]
 20 | 
 21 | 
 22 | def get_total_and_free_memory_in_Mb(cuda_device):
 23 |     devices_info_str = os.popen(
 24 |         "nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader"
 25 |     )
 26 |     devices_info = devices_info_str.read().strip().split("\n")
 27 |     total, used = devices_info[int(cuda_device)].split(",")
 28 |     return int(total), int(used)
 29 | 
 30 | 
 31 | def occupy_mem(cuda_device, mem_ratio=0.9):
 32 |     """
 33 |     pre-allocate gpu memory for training to avoid memory Fragmentation.
 34 |     """
 35 |     total, used = get_total_and_free_memory_in_Mb(cuda_device)
 36 |     max_mem = int(total * mem_ratio)
 37 |     block_mem = max_mem - used
 38 |     x = torch.cuda.FloatTensor(256, 1024, block_mem)
 39 |     del x
 40 |     time.sleep(5)
 41 | 
 42 | 
 43 | def gpu_mem_usage():
 44 |     """
 45 |     Compute the GPU memory usage for the current device (MB).
 46 |     """
 47 |     mem_usage_bytes = torch.cuda.max_memory_allocated()
 48 |     return mem_usage_bytes / (1024 * 1024)
 49 | 
 50 | 
 51 | class AverageMeter:
 52 |     """Track a series of values and provide access to smoothed values over a
 53 |     window or the global series average.
 54 |     """
 55 | 
 56 |     def __init__(self, window_size=50):
 57 |         self._deque = deque(maxlen=window_size)
 58 |         self._total = 0.0
 59 |         self._count = 0
 60 | 
 61 |     def update(self, value):
 62 |         self._deque.append(value)
 63 |         self._count += 1
 64 |         self._total += value
 65 | 
 66 |     @property
 67 |     def median(self):
 68 |         d = np.array(list(self._deque))
 69 |         return np.median(d)
 70 | 
 71 |     @property
 72 |     def avg(self):
 73 |         # if deque is empty, nan will be returned.
 74 |         d = np.array(list(self._deque))
 75 |         return d.mean()
 76 | 
 77 |     @property
 78 |     def global_avg(self):
 79 |         return self._total / max(self._count, 1e-5)
 80 | 
 81 |     @property
 82 |     def latest(self):
 83 |         return self._deque[-1] if len(self._deque) > 0 else None
 84 | 
 85 |     @property
 86 |     def total(self):
 87 |         return self._total
 88 | 
 89 |     def reset(self):
 90 |         self._deque.clear()
 91 |         self._total = 0.0
 92 |         self._count = 0
 93 | 
 94 |     def clear(self):
 95 |         self._deque.clear()
 96 | 
 97 | 
 98 | class MeterBuffer(defaultdict):
 99 |     """Computes and stores the average and current value"""
100 | 
101 |     def __init__(self, window_size=20):
102 |         factory = functools.partial(AverageMeter, window_size=window_size)
103 |         super().__init__(factory)
104 | 
105 |     def reset(self):
106 |         for v in self.values():
107 |             v.reset()
108 | 
109 |     def get_filtered_meter(self, filter_key="time"):
110 |         return {k: v for k, v in self.items() if filter_key in k}
111 | 
112 |     def update(self, values=None, **kwargs):
113 |         if values is None:
114 |             values = {}
115 |         values.update(kwargs)
116 |         for k, v in values.items():
117 |             if isinstance(v, torch.Tensor):
118 |                 v = v.detach()
119 |             self[k].update(v)
120 | 
121 |     def clear_meters(self):
122 |         for v in self.values():
123 |             v.clear()
124 | 


--------------------------------------------------------------------------------
/yolox/utils/model_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from thop import profile
  8 | 
  9 | from copy import deepcopy
 10 | 
 11 | __all__ = [
 12 |     "fuse_conv_and_bn",
 13 |     "fuse_model",
 14 |     "get_model_info",
 15 |     "replace_module",
 16 | ]
 17 | 
 18 | 
 19 | def get_model_info(model, tsize):
 20 | 
 21 |     stride = 64
 22 |     img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device)
 23 |     flops, params = profile(deepcopy(model), inputs=(img,), verbose=False)
 24 |     params /= 1e6
 25 |     flops /= 1e9
 26 |     flops *= tsize[0] * tsize[1] / stride / stride * 2  # Gflops
 27 |     info = "Params: {:.2f}M, Gflops: {:.2f}".format(params, flops)
 28 |     return info
 29 | 
 30 | 
 31 | def fuse_conv_and_bn(conv, bn):
 32 |     # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
 33 |     fusedconv = (
 34 |         nn.Conv2d(
 35 |             conv.in_channels,
 36 |             conv.out_channels,
 37 |             kernel_size=conv.kernel_size,
 38 |             stride=conv.stride,
 39 |             padding=conv.padding,
 40 |             groups=conv.groups,
 41 |             bias=True,
 42 |         )
 43 |         .requires_grad_(False)
 44 |         .to(conv.weight.device)
 45 |     )
 46 | 
 47 |     # prepare filters
 48 |     w_conv = conv.weight.clone().view(conv.out_channels, -1)
 49 |     w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
 50 |     fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
 51 | 
 52 |     # prepare spatial bias
 53 |     b_conv = (
 54 |         torch.zeros(conv.weight.size(0), device=conv.weight.device)
 55 |         if conv.bias is None
 56 |         else conv.bias
 57 |     )
 58 |     b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(
 59 |         torch.sqrt(bn.running_var + bn.eps)
 60 |     )
 61 |     fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
 62 | 
 63 |     return fusedconv
 64 | 
 65 | 
 66 | def fuse_model(model):
 67 |     from yolox.models.network_blocks import BaseConv
 68 | 
 69 |     for m in model.modules():
 70 |         if type(m) is BaseConv and hasattr(m, "bn"):
 71 |             m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
 72 |             delattr(m, "bn")  # remove batchnorm
 73 |             m.forward = m.fuseforward  # update forward
 74 |     return model
 75 | 
 76 | 
 77 | def replace_module(module, replaced_module_type, new_module_type, replace_func=None):
 78 |     """
 79 |     Replace given type in module to a new type. mostly used in deploy.
 80 | 
 81 |     Args:
 82 |         module (nn.Module): model to apply replace operation.
 83 |         replaced_module_type (Type): module type to be replaced.
 84 |         new_module_type (Type)
 85 |         replace_func (function): python function to describe replace logic. Defalut value None.
 86 | 
 87 |     Returns:
 88 |         model (nn.Module): module that already been replaced.
 89 |     """
 90 | 
 91 |     def default_replace_func(replaced_module_type, new_module_type):
 92 |         return new_module_type()
 93 | 
 94 |     if replace_func is None:
 95 |         replace_func = default_replace_func
 96 | 
 97 |     model = module
 98 |     if isinstance(module, replaced_module_type):
 99 |         model = replace_func(replaced_module_type, new_module_type)
100 |     else:  # recurrsively replace
101 |         for name, child in module.named_children():
102 |             new_child = replace_module(child, replaced_module_type, new_module_type)
103 |             if new_child is not child:  # child is already replaced
104 |                 model.add_module(name, new_child)
105 | 
106 |     return model
107 | 


--------------------------------------------------------------------------------
/yolox/utils/setup_env.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import cv2
 6 | 
 7 | import os
 8 | import subprocess
 9 | 
10 | __all__ = ["configure_nccl", "configure_module"]
11 | 
12 | 
13 | def configure_nccl():
14 |     """Configure multi-machine environment variables of NCCL."""
15 |     os.environ["NCCL_LAUNCH_MODE"] = "PARALLEL"
16 |     os.environ["NCCL_IB_HCA"] = subprocess.getoutput(
17 |         "pushd /sys/class/infiniband/ > /dev/null; for i in mlx5_*; "
18 |         "do cat $i/ports/1/gid_attrs/types/* 2>/dev/null "
19 |         "| grep v >/dev/null && echo $i ; done; popd > /dev/null"
20 |     )
21 |     os.environ["NCCL_IB_GID_INDEX"] = "3"
22 |     os.environ["NCCL_IB_TC"] = "106"
23 | 
24 | 
25 | def configure_module(ulimit_value=8192):
26 |     """
27 |     Configure pytorch module environment. setting of ulimit and cv2 will be set.
28 | 
29 |     Args:
30 |         ulimit_value(int): default open file number on linux. Default value: 8192.
31 |     """
32 |     # system setting
33 |     try:
34 |         import resource
35 | 
36 |         rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
37 |         resource.setrlimit(resource.RLIMIT_NOFILE, (ulimit_value, rlimit[1]))
38 |     except Exception:
39 |         # Exception might be raised in Windows OS or rlimit reaches max limit number.
40 |         # However, set rlimit value might not be necessary.
41 |         pass
42 | 
43 |     # cv2
44 |     # multiprocess might be harmful on performance of torch dataloader
45 |     os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled"
46 |     try:
47 |         cv2.setNumThreads(0)
48 |         cv2.ocl.setUseOpenCL(False)
49 |     except Exception:
50 |         # cv2 version mismatch might rasie exceptions.
51 |         pass
52 | 


--------------------------------------------------------------------------------
/yolox/utils/visualize.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | 
  8 | __all__ = ["vis"]
  9 | 
 10 | 
 11 | def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
 12 | 
 13 |     for i in range(len(boxes)):
 14 |         box = boxes[i]
 15 |         cls_id = int(cls_ids[i])
 16 |         score = scores[i]
 17 |         if score < conf:
 18 |             continue
 19 |         x0 = int(box[0])
 20 |         y0 = int(box[1])
 21 |         x1 = int(box[2])
 22 |         y1 = int(box[3])
 23 | 
 24 |         color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist()
 25 |         text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100)
 26 |         txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255)
 27 |         font = cv2.FONT_HERSHEY_SIMPLEX
 28 | 
 29 |         txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
 30 |         cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
 31 | 
 32 |         txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist()
 33 |         cv2.rectangle(
 34 |             img,
 35 |             (x0, y0 + 1),
 36 |             (x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])),
 37 |             txt_bk_color,
 38 |             -1
 39 |         )
 40 |         cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1)
 41 | 
 42 |     return img
 43 | 
 44 | 
 45 | _COLORS = np.array(
 46 |     [
 47 |         0.000, 0.447, 0.741,
 48 |         0.850, 0.325, 0.098,
 49 |         0.929, 0.694, 0.125,
 50 |         0.494, 0.184, 0.556,
 51 |         0.466, 0.674, 0.188,
 52 |         0.301, 0.745, 0.933,
 53 |         0.635, 0.078, 0.184,
 54 |         0.300, 0.300, 0.300,
 55 |         0.600, 0.600, 0.600,
 56 |         1.000, 0.000, 0.000,
 57 |         1.000, 0.500, 0.000,
 58 |         0.749, 0.749, 0.000,
 59 |         0.000, 1.000, 0.000,
 60 |         0.000, 0.000, 1.000,
 61 |         0.667, 0.000, 1.000,
 62 |         0.333, 0.333, 0.000,
 63 |         0.333, 0.667, 0.000,
 64 |         0.333, 1.000, 0.000,
 65 |         0.667, 0.333, 0.000,
 66 |         0.667, 0.667, 0.000,
 67 |         0.667, 1.000, 0.000,
 68 |         1.000, 0.333, 0.000,
 69 |         1.000, 0.667, 0.000,
 70 |         1.000, 1.000, 0.000,
 71 |         0.000, 0.333, 0.500,
 72 |         0.000, 0.667, 0.500,
 73 |         0.000, 1.000, 0.500,
 74 |         0.333, 0.000, 0.500,
 75 |         0.333, 0.333, 0.500,
 76 |         0.333, 0.667, 0.500,
 77 |         0.333, 1.000, 0.500,
 78 |         0.667, 0.000, 0.500,
 79 |         0.667, 0.333, 0.500,
 80 |         0.667, 0.667, 0.500,
 81 |         0.667, 1.000, 0.500,
 82 |         1.000, 0.000, 0.500,
 83 |         1.000, 0.333, 0.500,
 84 |         1.000, 0.667, 0.500,
 85 |         1.000, 1.000, 0.500,
 86 |         0.000, 0.333, 1.000,
 87 |         0.000, 0.667, 1.000,
 88 |         0.000, 1.000, 1.000,
 89 |         0.333, 0.000, 1.000,
 90 |         0.333, 0.333, 1.000,
 91 |         0.333, 0.667, 1.000,
 92 |         0.333, 1.000, 1.000,
 93 |         0.667, 0.000, 1.000,
 94 |         0.667, 0.333, 1.000,
 95 |         0.667, 0.667, 1.000,
 96 |         0.667, 1.000, 1.000,
 97 |         1.000, 0.000, 1.000,
 98 |         1.000, 0.333, 1.000,
 99 |         1.000, 0.667, 1.000,
100 |         0.333, 0.000, 0.000,
101 |         0.500, 0.000, 0.000,
102 |         0.667, 0.000, 0.000,
103 |         0.833, 0.000, 0.000,
104 |         1.000, 0.000, 0.000,
105 |         0.000, 0.167, 0.000,
106 |         0.000, 0.333, 0.000,
107 |         0.000, 0.500, 0.000,
108 |         0.000, 0.667, 0.000,
109 |         0.000, 0.833, 0.000,
110 |         0.000, 1.000, 0.000,
111 |         0.000, 0.000, 0.167,
112 |         0.000, 0.000, 0.333,
113 |         0.000, 0.000, 0.500,
114 |         0.000, 0.000, 0.667,
115 |         0.000, 0.000, 0.833,
116 |         0.000, 0.000, 1.000,
117 |         0.000, 0.000, 0.000,
118 |         0.143, 0.143, 0.143,
119 |         0.286, 0.286, 0.286,
120 |         0.429, 0.429, 0.429,
121 |         0.571, 0.571, 0.571,
122 |         0.714, 0.714, 0.714,
123 |         0.857, 0.857, 0.857,
124 |         0.000, 0.447, 0.741,
125 |         0.314, 0.717, 0.741,
126 |         0.50, 0.5, 0
127 |     ]
128 | ).astype(np.float32).reshape(-1, 3)
129 | 


--------------------------------------------------------------------------------